a55f7d0913551e605b07b91c488a212f03d4a51a
[platform/kernel/linux-starfive.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53
54 #include "trace.h"
55 #include "trace_output.h"
56
57 /*
58  * On boot up, the ring buffer is set to the minimum size, so that
59  * we do not waste memory on systems that are not using tracing.
60  */
61 bool ring_buffer_expanded;
62
63 #ifdef CONFIG_FTRACE_STARTUP_TEST
64 /*
65  * We need to change this state when a selftest is running.
66  * A selftest will lurk into the ring-buffer to count the
67  * entries inserted during the selftest although some concurrent
68  * insertions into the ring-buffer such as trace_printk could occurred
69  * at the same time, giving false positive or negative results.
70  */
71 static bool __read_mostly tracing_selftest_running;
72
73 /*
74  * If boot-time tracing including tracers/events via kernel cmdline
75  * is running, we do not want to run SELFTEST.
76  */
77 bool __read_mostly tracing_selftest_disabled;
78
79 void __init disable_tracing_selftest(const char *reason)
80 {
81         if (!tracing_selftest_disabled) {
82                 tracing_selftest_disabled = true;
83                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
84         }
85 }
86 #else
87 #define tracing_selftest_running        0
88 #define tracing_selftest_disabled       0
89 #endif
90
91 /* Pipe tracepoints to printk */
92 static struct trace_iterator *tracepoint_print_iter;
93 int tracepoint_printk;
94 static bool tracepoint_printk_stop_on_boot __initdata;
95 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
96
97 /* For tracers that don't implement custom flags */
98 static struct tracer_opt dummy_tracer_opt[] = {
99         { }
100 };
101
102 static int
103 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
104 {
105         return 0;
106 }
107
108 /*
109  * To prevent the comm cache from being overwritten when no
110  * tracing is active, only save the comm when a trace event
111  * occurred.
112  */
113 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
114
115 /*
116  * Kill all tracing for good (never come back).
117  * It is initialized to 1 but will turn to zero if the initialization
118  * of the tracer is successful. But that is the only place that sets
119  * this back to zero.
120  */
121 static int tracing_disabled = 1;
122
123 cpumask_var_t __read_mostly     tracing_buffer_mask;
124
125 /*
126  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127  *
128  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129  * is set, then ftrace_dump is called. This will output the contents
130  * of the ftrace buffers to the console.  This is very useful for
131  * capturing traces that lead to crashes and outputing it to a
132  * serial console.
133  *
134  * It is default off, but you can enable it with either specifying
135  * "ftrace_dump_on_oops" in the kernel command line, or setting
136  * /proc/sys/kernel/ftrace_dump_on_oops
137  * Set 1 if you want to dump buffers of all CPUs
138  * Set 2 if you want to dump the buffer of the CPU that triggered oops
139  */
140
141 enum ftrace_dump_mode ftrace_dump_on_oops;
142
143 /* When set, tracing will stop when a WARN*() is hit */
144 int __disable_trace_on_warning;
145
146 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
147 /* Map of enums to their values, for "eval_map" file */
148 struct trace_eval_map_head {
149         struct module                   *mod;
150         unsigned long                   length;
151 };
152
153 union trace_eval_map_item;
154
155 struct trace_eval_map_tail {
156         /*
157          * "end" is first and points to NULL as it must be different
158          * than "mod" or "eval_string"
159          */
160         union trace_eval_map_item       *next;
161         const char                      *end;   /* points to NULL */
162 };
163
164 static DEFINE_MUTEX(trace_eval_mutex);
165
166 /*
167  * The trace_eval_maps are saved in an array with two extra elements,
168  * one at the beginning, and one at the end. The beginning item contains
169  * the count of the saved maps (head.length), and the module they
170  * belong to if not built in (head.mod). The ending item contains a
171  * pointer to the next array of saved eval_map items.
172  */
173 union trace_eval_map_item {
174         struct trace_eval_map           map;
175         struct trace_eval_map_head      head;
176         struct trace_eval_map_tail      tail;
177 };
178
179 static union trace_eval_map_item *trace_eval_maps;
180 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
181
182 int tracing_set_tracer(struct trace_array *tr, const char *buf);
183 static void ftrace_trace_userstack(struct trace_array *tr,
184                                    struct trace_buffer *buffer,
185                                    unsigned int trace_ctx);
186
187 #define MAX_TRACER_SIZE         100
188 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
189 static char *default_bootup_tracer;
190
191 static bool allocate_snapshot;
192 static bool snapshot_at_boot;
193
194 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
195 static int boot_instance_index;
196
197 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
198 static int boot_snapshot_index;
199
200 static int __init set_cmdline_ftrace(char *str)
201 {
202         strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
203         default_bootup_tracer = bootup_tracer_buf;
204         /* We are using ftrace early, expand it */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("ftrace=", set_cmdline_ftrace);
209
210 static int __init set_ftrace_dump_on_oops(char *str)
211 {
212         if (*str++ != '=' || !*str || !strcmp("1", str)) {
213                 ftrace_dump_on_oops = DUMP_ALL;
214                 return 1;
215         }
216
217         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
218                 ftrace_dump_on_oops = DUMP_ORIG;
219                 return 1;
220         }
221
222         return 0;
223 }
224 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
225
226 static int __init stop_trace_on_warning(char *str)
227 {
228         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
229                 __disable_trace_on_warning = 1;
230         return 1;
231 }
232 __setup("traceoff_on_warning", stop_trace_on_warning);
233
234 static int __init boot_alloc_snapshot(char *str)
235 {
236         char *slot = boot_snapshot_info + boot_snapshot_index;
237         int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
238         int ret;
239
240         if (str[0] == '=') {
241                 str++;
242                 if (strlen(str) >= left)
243                         return -1;
244
245                 ret = snprintf(slot, left, "%s\t", str);
246                 boot_snapshot_index += ret;
247         } else {
248                 allocate_snapshot = true;
249                 /* We also need the main ring buffer expanded */
250                 ring_buffer_expanded = true;
251         }
252         return 1;
253 }
254 __setup("alloc_snapshot", boot_alloc_snapshot);
255
256
257 static int __init boot_snapshot(char *str)
258 {
259         snapshot_at_boot = true;
260         boot_alloc_snapshot(str);
261         return 1;
262 }
263 __setup("ftrace_boot_snapshot", boot_snapshot);
264
265
266 static int __init boot_instance(char *str)
267 {
268         char *slot = boot_instance_info + boot_instance_index;
269         int left = sizeof(boot_instance_info) - boot_instance_index;
270         int ret;
271
272         if (strlen(str) >= left)
273                 return -1;
274
275         ret = snprintf(slot, left, "%s\t", str);
276         boot_instance_index += ret;
277
278         return 1;
279 }
280 __setup("trace_instance=", boot_instance);
281
282
283 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
284
285 static int __init set_trace_boot_options(char *str)
286 {
287         strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
288         return 1;
289 }
290 __setup("trace_options=", set_trace_boot_options);
291
292 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
293 static char *trace_boot_clock __initdata;
294
295 static int __init set_trace_boot_clock(char *str)
296 {
297         strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
298         trace_boot_clock = trace_boot_clock_buf;
299         return 1;
300 }
301 __setup("trace_clock=", set_trace_boot_clock);
302
303 static int __init set_tracepoint_printk(char *str)
304 {
305         /* Ignore the "tp_printk_stop_on_boot" param */
306         if (*str == '_')
307                 return 0;
308
309         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
310                 tracepoint_printk = 1;
311         return 1;
312 }
313 __setup("tp_printk", set_tracepoint_printk);
314
315 static int __init set_tracepoint_printk_stop(char *str)
316 {
317         tracepoint_printk_stop_on_boot = true;
318         return 1;
319 }
320 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
321
322 unsigned long long ns2usecs(u64 nsec)
323 {
324         nsec += 500;
325         do_div(nsec, 1000);
326         return nsec;
327 }
328
329 static void
330 trace_process_export(struct trace_export *export,
331                struct ring_buffer_event *event, int flag)
332 {
333         struct trace_entry *entry;
334         unsigned int size = 0;
335
336         if (export->flags & flag) {
337                 entry = ring_buffer_event_data(event);
338                 size = ring_buffer_event_length(event);
339                 export->write(export, entry, size);
340         }
341 }
342
343 static DEFINE_MUTEX(ftrace_export_lock);
344
345 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
346
347 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
348 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
349 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
350
351 static inline void ftrace_exports_enable(struct trace_export *export)
352 {
353         if (export->flags & TRACE_EXPORT_FUNCTION)
354                 static_branch_inc(&trace_function_exports_enabled);
355
356         if (export->flags & TRACE_EXPORT_EVENT)
357                 static_branch_inc(&trace_event_exports_enabled);
358
359         if (export->flags & TRACE_EXPORT_MARKER)
360                 static_branch_inc(&trace_marker_exports_enabled);
361 }
362
363 static inline void ftrace_exports_disable(struct trace_export *export)
364 {
365         if (export->flags & TRACE_EXPORT_FUNCTION)
366                 static_branch_dec(&trace_function_exports_enabled);
367
368         if (export->flags & TRACE_EXPORT_EVENT)
369                 static_branch_dec(&trace_event_exports_enabled);
370
371         if (export->flags & TRACE_EXPORT_MARKER)
372                 static_branch_dec(&trace_marker_exports_enabled);
373 }
374
375 static void ftrace_exports(struct ring_buffer_event *event, int flag)
376 {
377         struct trace_export *export;
378
379         preempt_disable_notrace();
380
381         export = rcu_dereference_raw_check(ftrace_exports_list);
382         while (export) {
383                 trace_process_export(export, event, flag);
384                 export = rcu_dereference_raw_check(export->next);
385         }
386
387         preempt_enable_notrace();
388 }
389
390 static inline void
391 add_trace_export(struct trace_export **list, struct trace_export *export)
392 {
393         rcu_assign_pointer(export->next, *list);
394         /*
395          * We are entering export into the list but another
396          * CPU might be walking that list. We need to make sure
397          * the export->next pointer is valid before another CPU sees
398          * the export pointer included into the list.
399          */
400         rcu_assign_pointer(*list, export);
401 }
402
403 static inline int
404 rm_trace_export(struct trace_export **list, struct trace_export *export)
405 {
406         struct trace_export **p;
407
408         for (p = list; *p != NULL; p = &(*p)->next)
409                 if (*p == export)
410                         break;
411
412         if (*p != export)
413                 return -1;
414
415         rcu_assign_pointer(*p, (*p)->next);
416
417         return 0;
418 }
419
420 static inline void
421 add_ftrace_export(struct trace_export **list, struct trace_export *export)
422 {
423         ftrace_exports_enable(export);
424
425         add_trace_export(list, export);
426 }
427
428 static inline int
429 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
430 {
431         int ret;
432
433         ret = rm_trace_export(list, export);
434         ftrace_exports_disable(export);
435
436         return ret;
437 }
438
439 int register_ftrace_export(struct trace_export *export)
440 {
441         if (WARN_ON_ONCE(!export->write))
442                 return -1;
443
444         mutex_lock(&ftrace_export_lock);
445
446         add_ftrace_export(&ftrace_exports_list, export);
447
448         mutex_unlock(&ftrace_export_lock);
449
450         return 0;
451 }
452 EXPORT_SYMBOL_GPL(register_ftrace_export);
453
454 int unregister_ftrace_export(struct trace_export *export)
455 {
456         int ret;
457
458         mutex_lock(&ftrace_export_lock);
459
460         ret = rm_ftrace_export(&ftrace_exports_list, export);
461
462         mutex_unlock(&ftrace_export_lock);
463
464         return ret;
465 }
466 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
467
468 /* trace_flags holds trace_options default values */
469 #define TRACE_DEFAULT_FLAGS                                             \
470         (FUNCTION_DEFAULT_FLAGS |                                       \
471          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
472          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
473          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
474          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
475          TRACE_ITER_HASH_PTR)
476
477 /* trace_options that are only supported by global_trace */
478 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
479                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
480
481 /* trace_flags that are default zero for instances */
482 #define ZEROED_TRACE_FLAGS \
483         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
484
485 /*
486  * The global_trace is the descriptor that holds the top-level tracing
487  * buffers for the live tracing.
488  */
489 static struct trace_array global_trace = {
490         .trace_flags = TRACE_DEFAULT_FLAGS,
491 };
492
493 LIST_HEAD(ftrace_trace_arrays);
494
495 int trace_array_get(struct trace_array *this_tr)
496 {
497         struct trace_array *tr;
498         int ret = -ENODEV;
499
500         mutex_lock(&trace_types_lock);
501         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
502                 if (tr == this_tr) {
503                         tr->ref++;
504                         ret = 0;
505                         break;
506                 }
507         }
508         mutex_unlock(&trace_types_lock);
509
510         return ret;
511 }
512
513 static void __trace_array_put(struct trace_array *this_tr)
514 {
515         WARN_ON(!this_tr->ref);
516         this_tr->ref--;
517 }
518
519 /**
520  * trace_array_put - Decrement the reference counter for this trace array.
521  * @this_tr : pointer to the trace array
522  *
523  * NOTE: Use this when we no longer need the trace array returned by
524  * trace_array_get_by_name(). This ensures the trace array can be later
525  * destroyed.
526  *
527  */
528 void trace_array_put(struct trace_array *this_tr)
529 {
530         if (!this_tr)
531                 return;
532
533         mutex_lock(&trace_types_lock);
534         __trace_array_put(this_tr);
535         mutex_unlock(&trace_types_lock);
536 }
537 EXPORT_SYMBOL_GPL(trace_array_put);
538
539 int tracing_check_open_get_tr(struct trace_array *tr)
540 {
541         int ret;
542
543         ret = security_locked_down(LOCKDOWN_TRACEFS);
544         if (ret)
545                 return ret;
546
547         if (tracing_disabled)
548                 return -ENODEV;
549
550         if (tr && trace_array_get(tr) < 0)
551                 return -ENODEV;
552
553         return 0;
554 }
555
556 int call_filter_check_discard(struct trace_event_call *call, void *rec,
557                               struct trace_buffer *buffer,
558                               struct ring_buffer_event *event)
559 {
560         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
561             !filter_match_preds(call->filter, rec)) {
562                 __trace_event_discard_commit(buffer, event);
563                 return 1;
564         }
565
566         return 0;
567 }
568
569 /**
570  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
571  * @filtered_pids: The list of pids to check
572  * @search_pid: The PID to find in @filtered_pids
573  *
574  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
575  */
576 bool
577 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
578 {
579         return trace_pid_list_is_set(filtered_pids, search_pid);
580 }
581
582 /**
583  * trace_ignore_this_task - should a task be ignored for tracing
584  * @filtered_pids: The list of pids to check
585  * @filtered_no_pids: The list of pids not to be traced
586  * @task: The task that should be ignored if not filtered
587  *
588  * Checks if @task should be traced or not from @filtered_pids.
589  * Returns true if @task should *NOT* be traced.
590  * Returns false if @task should be traced.
591  */
592 bool
593 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
594                        struct trace_pid_list *filtered_no_pids,
595                        struct task_struct *task)
596 {
597         /*
598          * If filtered_no_pids is not empty, and the task's pid is listed
599          * in filtered_no_pids, then return true.
600          * Otherwise, if filtered_pids is empty, that means we can
601          * trace all tasks. If it has content, then only trace pids
602          * within filtered_pids.
603          */
604
605         return (filtered_pids &&
606                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
607                 (filtered_no_pids &&
608                  trace_find_filtered_pid(filtered_no_pids, task->pid));
609 }
610
611 /**
612  * trace_filter_add_remove_task - Add or remove a task from a pid_list
613  * @pid_list: The list to modify
614  * @self: The current task for fork or NULL for exit
615  * @task: The task to add or remove
616  *
617  * If adding a task, if @self is defined, the task is only added if @self
618  * is also included in @pid_list. This happens on fork and tasks should
619  * only be added when the parent is listed. If @self is NULL, then the
620  * @task pid will be removed from the list, which would happen on exit
621  * of a task.
622  */
623 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
624                                   struct task_struct *self,
625                                   struct task_struct *task)
626 {
627         if (!pid_list)
628                 return;
629
630         /* For forks, we only add if the forking task is listed */
631         if (self) {
632                 if (!trace_find_filtered_pid(pid_list, self->pid))
633                         return;
634         }
635
636         /* "self" is set for forks, and NULL for exits */
637         if (self)
638                 trace_pid_list_set(pid_list, task->pid);
639         else
640                 trace_pid_list_clear(pid_list, task->pid);
641 }
642
643 /**
644  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
645  * @pid_list: The pid list to show
646  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
647  * @pos: The position of the file
648  *
649  * This is used by the seq_file "next" operation to iterate the pids
650  * listed in a trace_pid_list structure.
651  *
652  * Returns the pid+1 as we want to display pid of zero, but NULL would
653  * stop the iteration.
654  */
655 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
656 {
657         long pid = (unsigned long)v;
658         unsigned int next;
659
660         (*pos)++;
661
662         /* pid already is +1 of the actual previous bit */
663         if (trace_pid_list_next(pid_list, pid, &next) < 0)
664                 return NULL;
665
666         pid = next;
667
668         /* Return pid + 1 to allow zero to be represented */
669         return (void *)(pid + 1);
670 }
671
672 /**
673  * trace_pid_start - Used for seq_file to start reading pid lists
674  * @pid_list: The pid list to show
675  * @pos: The position of the file
676  *
677  * This is used by seq_file "start" operation to start the iteration
678  * of listing pids.
679  *
680  * Returns the pid+1 as we want to display pid of zero, but NULL would
681  * stop the iteration.
682  */
683 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
684 {
685         unsigned long pid;
686         unsigned int first;
687         loff_t l = 0;
688
689         if (trace_pid_list_first(pid_list, &first) < 0)
690                 return NULL;
691
692         pid = first;
693
694         /* Return pid + 1 so that zero can be the exit value */
695         for (pid++; pid && l < *pos;
696              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
697                 ;
698         return (void *)pid;
699 }
700
701 /**
702  * trace_pid_show - show the current pid in seq_file processing
703  * @m: The seq_file structure to write into
704  * @v: A void pointer of the pid (+1) value to display
705  *
706  * Can be directly used by seq_file operations to display the current
707  * pid value.
708  */
709 int trace_pid_show(struct seq_file *m, void *v)
710 {
711         unsigned long pid = (unsigned long)v - 1;
712
713         seq_printf(m, "%lu\n", pid);
714         return 0;
715 }
716
717 /* 128 should be much more than enough */
718 #define PID_BUF_SIZE            127
719
720 int trace_pid_write(struct trace_pid_list *filtered_pids,
721                     struct trace_pid_list **new_pid_list,
722                     const char __user *ubuf, size_t cnt)
723 {
724         struct trace_pid_list *pid_list;
725         struct trace_parser parser;
726         unsigned long val;
727         int nr_pids = 0;
728         ssize_t read = 0;
729         ssize_t ret;
730         loff_t pos;
731         pid_t pid;
732
733         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
734                 return -ENOMEM;
735
736         /*
737          * Always recreate a new array. The write is an all or nothing
738          * operation. Always create a new array when adding new pids by
739          * the user. If the operation fails, then the current list is
740          * not modified.
741          */
742         pid_list = trace_pid_list_alloc();
743         if (!pid_list) {
744                 trace_parser_put(&parser);
745                 return -ENOMEM;
746         }
747
748         if (filtered_pids) {
749                 /* copy the current bits to the new max */
750                 ret = trace_pid_list_first(filtered_pids, &pid);
751                 while (!ret) {
752                         trace_pid_list_set(pid_list, pid);
753                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
754                         nr_pids++;
755                 }
756         }
757
758         ret = 0;
759         while (cnt > 0) {
760
761                 pos = 0;
762
763                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
764                 if (ret < 0)
765                         break;
766
767                 read += ret;
768                 ubuf += ret;
769                 cnt -= ret;
770
771                 if (!trace_parser_loaded(&parser))
772                         break;
773
774                 ret = -EINVAL;
775                 if (kstrtoul(parser.buffer, 0, &val))
776                         break;
777
778                 pid = (pid_t)val;
779
780                 if (trace_pid_list_set(pid_list, pid) < 0) {
781                         ret = -1;
782                         break;
783                 }
784                 nr_pids++;
785
786                 trace_parser_clear(&parser);
787                 ret = 0;
788         }
789         trace_parser_put(&parser);
790
791         if (ret < 0) {
792                 trace_pid_list_free(pid_list);
793                 return ret;
794         }
795
796         if (!nr_pids) {
797                 /* Cleared the list of pids */
798                 trace_pid_list_free(pid_list);
799                 pid_list = NULL;
800         }
801
802         *new_pid_list = pid_list;
803
804         return read;
805 }
806
807 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
808 {
809         u64 ts;
810
811         /* Early boot up does not have a buffer yet */
812         if (!buf->buffer)
813                 return trace_clock_local();
814
815         ts = ring_buffer_time_stamp(buf->buffer);
816         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
817
818         return ts;
819 }
820
821 u64 ftrace_now(int cpu)
822 {
823         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
824 }
825
826 /**
827  * tracing_is_enabled - Show if global_trace has been enabled
828  *
829  * Shows if the global trace has been enabled or not. It uses the
830  * mirror flag "buffer_disabled" to be used in fast paths such as for
831  * the irqsoff tracer. But it may be inaccurate due to races. If you
832  * need to know the accurate state, use tracing_is_on() which is a little
833  * slower, but accurate.
834  */
835 int tracing_is_enabled(void)
836 {
837         /*
838          * For quick access (irqsoff uses this in fast path), just
839          * return the mirror variable of the state of the ring buffer.
840          * It's a little racy, but we don't really care.
841          */
842         smp_rmb();
843         return !global_trace.buffer_disabled;
844 }
845
846 /*
847  * trace_buf_size is the size in bytes that is allocated
848  * for a buffer. Note, the number of bytes is always rounded
849  * to page size.
850  *
851  * This number is purposely set to a low number of 16384.
852  * If the dump on oops happens, it will be much appreciated
853  * to not have to wait for all that output. Anyway this can be
854  * boot time and run time configurable.
855  */
856 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
857
858 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
859
860 /* trace_types holds a link list of available tracers. */
861 static struct tracer            *trace_types __read_mostly;
862
863 /*
864  * trace_types_lock is used to protect the trace_types list.
865  */
866 DEFINE_MUTEX(trace_types_lock);
867
868 /*
869  * serialize the access of the ring buffer
870  *
871  * ring buffer serializes readers, but it is low level protection.
872  * The validity of the events (which returns by ring_buffer_peek() ..etc)
873  * are not protected by ring buffer.
874  *
875  * The content of events may become garbage if we allow other process consumes
876  * these events concurrently:
877  *   A) the page of the consumed events may become a normal page
878  *      (not reader page) in ring buffer, and this page will be rewritten
879  *      by events producer.
880  *   B) The page of the consumed events may become a page for splice_read,
881  *      and this page will be returned to system.
882  *
883  * These primitives allow multi process access to different cpu ring buffer
884  * concurrently.
885  *
886  * These primitives don't distinguish read-only and read-consume access.
887  * Multi read-only access are also serialized.
888  */
889
890 #ifdef CONFIG_SMP
891 static DECLARE_RWSEM(all_cpu_access_lock);
892 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
893
894 static inline void trace_access_lock(int cpu)
895 {
896         if (cpu == RING_BUFFER_ALL_CPUS) {
897                 /* gain it for accessing the whole ring buffer. */
898                 down_write(&all_cpu_access_lock);
899         } else {
900                 /* gain it for accessing a cpu ring buffer. */
901
902                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
903                 down_read(&all_cpu_access_lock);
904
905                 /* Secondly block other access to this @cpu ring buffer. */
906                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
907         }
908 }
909
910 static inline void trace_access_unlock(int cpu)
911 {
912         if (cpu == RING_BUFFER_ALL_CPUS) {
913                 up_write(&all_cpu_access_lock);
914         } else {
915                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
916                 up_read(&all_cpu_access_lock);
917         }
918 }
919
920 static inline void trace_access_lock_init(void)
921 {
922         int cpu;
923
924         for_each_possible_cpu(cpu)
925                 mutex_init(&per_cpu(cpu_access_lock, cpu));
926 }
927
928 #else
929
930 static DEFINE_MUTEX(access_lock);
931
932 static inline void trace_access_lock(int cpu)
933 {
934         (void)cpu;
935         mutex_lock(&access_lock);
936 }
937
938 static inline void trace_access_unlock(int cpu)
939 {
940         (void)cpu;
941         mutex_unlock(&access_lock);
942 }
943
944 static inline void trace_access_lock_init(void)
945 {
946 }
947
948 #endif
949
950 #ifdef CONFIG_STACKTRACE
951 static void __ftrace_trace_stack(struct trace_buffer *buffer,
952                                  unsigned int trace_ctx,
953                                  int skip, struct pt_regs *regs);
954 static inline void ftrace_trace_stack(struct trace_array *tr,
955                                       struct trace_buffer *buffer,
956                                       unsigned int trace_ctx,
957                                       int skip, struct pt_regs *regs);
958
959 #else
960 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
961                                         unsigned int trace_ctx,
962                                         int skip, struct pt_regs *regs)
963 {
964 }
965 static inline void ftrace_trace_stack(struct trace_array *tr,
966                                       struct trace_buffer *buffer,
967                                       unsigned long trace_ctx,
968                                       int skip, struct pt_regs *regs)
969 {
970 }
971
972 #endif
973
974 static __always_inline void
975 trace_event_setup(struct ring_buffer_event *event,
976                   int type, unsigned int trace_ctx)
977 {
978         struct trace_entry *ent = ring_buffer_event_data(event);
979
980         tracing_generic_entry_update(ent, type, trace_ctx);
981 }
982
983 static __always_inline struct ring_buffer_event *
984 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
985                           int type,
986                           unsigned long len,
987                           unsigned int trace_ctx)
988 {
989         struct ring_buffer_event *event;
990
991         event = ring_buffer_lock_reserve(buffer, len);
992         if (event != NULL)
993                 trace_event_setup(event, type, trace_ctx);
994
995         return event;
996 }
997
998 void tracer_tracing_on(struct trace_array *tr)
999 {
1000         if (tr->array_buffer.buffer)
1001                 ring_buffer_record_on(tr->array_buffer.buffer);
1002         /*
1003          * This flag is looked at when buffers haven't been allocated
1004          * yet, or by some tracers (like irqsoff), that just want to
1005          * know if the ring buffer has been disabled, but it can handle
1006          * races of where it gets disabled but we still do a record.
1007          * As the check is in the fast path of the tracers, it is more
1008          * important to be fast than accurate.
1009          */
1010         tr->buffer_disabled = 0;
1011         /* Make the flag seen by readers */
1012         smp_wmb();
1013 }
1014
1015 /**
1016  * tracing_on - enable tracing buffers
1017  *
1018  * This function enables tracing buffers that may have been
1019  * disabled with tracing_off.
1020  */
1021 void tracing_on(void)
1022 {
1023         tracer_tracing_on(&global_trace);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_on);
1026
1027
1028 static __always_inline void
1029 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1030 {
1031         __this_cpu_write(trace_taskinfo_save, true);
1032
1033         /* If this is the temp buffer, we need to commit fully */
1034         if (this_cpu_read(trace_buffered_event) == event) {
1035                 /* Length is in event->array[0] */
1036                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1037                 /* Release the temp buffer */
1038                 this_cpu_dec(trace_buffered_event_cnt);
1039                 /* ring_buffer_unlock_commit() enables preemption */
1040                 preempt_enable_notrace();
1041         } else
1042                 ring_buffer_unlock_commit(buffer);
1043 }
1044
1045 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1046                        const char *str, int size)
1047 {
1048         struct ring_buffer_event *event;
1049         struct trace_buffer *buffer;
1050         struct print_entry *entry;
1051         unsigned int trace_ctx;
1052         int alloc;
1053
1054         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1055                 return 0;
1056
1057         if (unlikely(tracing_selftest_running && tr == &global_trace))
1058                 return 0;
1059
1060         if (unlikely(tracing_disabled))
1061                 return 0;
1062
1063         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1064
1065         trace_ctx = tracing_gen_ctx();
1066         buffer = tr->array_buffer.buffer;
1067         ring_buffer_nest_start(buffer);
1068         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1069                                             trace_ctx);
1070         if (!event) {
1071                 size = 0;
1072                 goto out;
1073         }
1074
1075         entry = ring_buffer_event_data(event);
1076         entry->ip = ip;
1077
1078         memcpy(&entry->buf, str, size);
1079
1080         /* Add a newline if necessary */
1081         if (entry->buf[size - 1] != '\n') {
1082                 entry->buf[size] = '\n';
1083                 entry->buf[size + 1] = '\0';
1084         } else
1085                 entry->buf[size] = '\0';
1086
1087         __buffer_unlock_commit(buffer, event);
1088         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1089  out:
1090         ring_buffer_nest_end(buffer);
1091         return size;
1092 }
1093 EXPORT_SYMBOL_GPL(__trace_array_puts);
1094
1095 /**
1096  * __trace_puts - write a constant string into the trace buffer.
1097  * @ip:    The address of the caller
1098  * @str:   The constant string to write
1099  * @size:  The size of the string.
1100  */
1101 int __trace_puts(unsigned long ip, const char *str, int size)
1102 {
1103         return __trace_array_puts(&global_trace, ip, str, size);
1104 }
1105 EXPORT_SYMBOL_GPL(__trace_puts);
1106
1107 /**
1108  * __trace_bputs - write the pointer to a constant string into trace buffer
1109  * @ip:    The address of the caller
1110  * @str:   The constant string to write to the buffer to
1111  */
1112 int __trace_bputs(unsigned long ip, const char *str)
1113 {
1114         struct ring_buffer_event *event;
1115         struct trace_buffer *buffer;
1116         struct bputs_entry *entry;
1117         unsigned int trace_ctx;
1118         int size = sizeof(struct bputs_entry);
1119         int ret = 0;
1120
1121         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1122                 return 0;
1123
1124         if (unlikely(tracing_selftest_running || tracing_disabled))
1125                 return 0;
1126
1127         trace_ctx = tracing_gen_ctx();
1128         buffer = global_trace.array_buffer.buffer;
1129
1130         ring_buffer_nest_start(buffer);
1131         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1132                                             trace_ctx);
1133         if (!event)
1134                 goto out;
1135
1136         entry = ring_buffer_event_data(event);
1137         entry->ip                       = ip;
1138         entry->str                      = str;
1139
1140         __buffer_unlock_commit(buffer, event);
1141         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1142
1143         ret = 1;
1144  out:
1145         ring_buffer_nest_end(buffer);
1146         return ret;
1147 }
1148 EXPORT_SYMBOL_GPL(__trace_bputs);
1149
1150 #ifdef CONFIG_TRACER_SNAPSHOT
1151 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1152                                            void *cond_data)
1153 {
1154         struct tracer *tracer = tr->current_trace;
1155         unsigned long flags;
1156
1157         if (in_nmi()) {
1158                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1159                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1160                 return;
1161         }
1162
1163         if (!tr->allocated_snapshot) {
1164                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1165                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1166                 tracer_tracing_off(tr);
1167                 return;
1168         }
1169
1170         /* Note, snapshot can not be used when the tracer uses it */
1171         if (tracer->use_max_tr) {
1172                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1173                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1174                 return;
1175         }
1176
1177         local_irq_save(flags);
1178         update_max_tr(tr, current, smp_processor_id(), cond_data);
1179         local_irq_restore(flags);
1180 }
1181
1182 void tracing_snapshot_instance(struct trace_array *tr)
1183 {
1184         tracing_snapshot_instance_cond(tr, NULL);
1185 }
1186
1187 /**
1188  * tracing_snapshot - take a snapshot of the current buffer.
1189  *
1190  * This causes a swap between the snapshot buffer and the current live
1191  * tracing buffer. You can use this to take snapshots of the live
1192  * trace when some condition is triggered, but continue to trace.
1193  *
1194  * Note, make sure to allocate the snapshot with either
1195  * a tracing_snapshot_alloc(), or by doing it manually
1196  * with: echo 1 > /sys/kernel/tracing/snapshot
1197  *
1198  * If the snapshot buffer is not allocated, it will stop tracing.
1199  * Basically making a permanent snapshot.
1200  */
1201 void tracing_snapshot(void)
1202 {
1203         struct trace_array *tr = &global_trace;
1204
1205         tracing_snapshot_instance(tr);
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_snapshot);
1208
1209 /**
1210  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1211  * @tr:         The tracing instance to snapshot
1212  * @cond_data:  The data to be tested conditionally, and possibly saved
1213  *
1214  * This is the same as tracing_snapshot() except that the snapshot is
1215  * conditional - the snapshot will only happen if the
1216  * cond_snapshot.update() implementation receiving the cond_data
1217  * returns true, which means that the trace array's cond_snapshot
1218  * update() operation used the cond_data to determine whether the
1219  * snapshot should be taken, and if it was, presumably saved it along
1220  * with the snapshot.
1221  */
1222 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1223 {
1224         tracing_snapshot_instance_cond(tr, cond_data);
1225 }
1226 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1227
1228 /**
1229  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1230  * @tr:         The tracing instance
1231  *
1232  * When the user enables a conditional snapshot using
1233  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1234  * with the snapshot.  This accessor is used to retrieve it.
1235  *
1236  * Should not be called from cond_snapshot.update(), since it takes
1237  * the tr->max_lock lock, which the code calling
1238  * cond_snapshot.update() has already done.
1239  *
1240  * Returns the cond_data associated with the trace array's snapshot.
1241  */
1242 void *tracing_cond_snapshot_data(struct trace_array *tr)
1243 {
1244         void *cond_data = NULL;
1245
1246         local_irq_disable();
1247         arch_spin_lock(&tr->max_lock);
1248
1249         if (tr->cond_snapshot)
1250                 cond_data = tr->cond_snapshot->cond_data;
1251
1252         arch_spin_unlock(&tr->max_lock);
1253         local_irq_enable();
1254
1255         return cond_data;
1256 }
1257 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1258
1259 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1260                                         struct array_buffer *size_buf, int cpu_id);
1261 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1262
1263 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1264 {
1265         int ret;
1266
1267         if (!tr->allocated_snapshot) {
1268
1269                 /* allocate spare buffer */
1270                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1271                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1272                 if (ret < 0)
1273                         return ret;
1274
1275                 tr->allocated_snapshot = true;
1276         }
1277
1278         return 0;
1279 }
1280
1281 static void free_snapshot(struct trace_array *tr)
1282 {
1283         /*
1284          * We don't free the ring buffer. instead, resize it because
1285          * The max_tr ring buffer has some state (e.g. ring->clock) and
1286          * we want preserve it.
1287          */
1288         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1289         set_buffer_entries(&tr->max_buffer, 1);
1290         tracing_reset_online_cpus(&tr->max_buffer);
1291         tr->allocated_snapshot = false;
1292 }
1293
1294 /**
1295  * tracing_alloc_snapshot - allocate snapshot buffer.
1296  *
1297  * This only allocates the snapshot buffer if it isn't already
1298  * allocated - it doesn't also take a snapshot.
1299  *
1300  * This is meant to be used in cases where the snapshot buffer needs
1301  * to be set up for events that can't sleep but need to be able to
1302  * trigger a snapshot.
1303  */
1304 int tracing_alloc_snapshot(void)
1305 {
1306         struct trace_array *tr = &global_trace;
1307         int ret;
1308
1309         ret = tracing_alloc_snapshot_instance(tr);
1310         WARN_ON(ret < 0);
1311
1312         return ret;
1313 }
1314 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1315
1316 /**
1317  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1318  *
1319  * This is similar to tracing_snapshot(), but it will allocate the
1320  * snapshot buffer if it isn't already allocated. Use this only
1321  * where it is safe to sleep, as the allocation may sleep.
1322  *
1323  * This causes a swap between the snapshot buffer and the current live
1324  * tracing buffer. You can use this to take snapshots of the live
1325  * trace when some condition is triggered, but continue to trace.
1326  */
1327 void tracing_snapshot_alloc(void)
1328 {
1329         int ret;
1330
1331         ret = tracing_alloc_snapshot();
1332         if (ret < 0)
1333                 return;
1334
1335         tracing_snapshot();
1336 }
1337 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1338
1339 /**
1340  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1341  * @tr:         The tracing instance
1342  * @cond_data:  User data to associate with the snapshot
1343  * @update:     Implementation of the cond_snapshot update function
1344  *
1345  * Check whether the conditional snapshot for the given instance has
1346  * already been enabled, or if the current tracer is already using a
1347  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1348  * save the cond_data and update function inside.
1349  *
1350  * Returns 0 if successful, error otherwise.
1351  */
1352 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1353                                  cond_update_fn_t update)
1354 {
1355         struct cond_snapshot *cond_snapshot;
1356         int ret = 0;
1357
1358         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1359         if (!cond_snapshot)
1360                 return -ENOMEM;
1361
1362         cond_snapshot->cond_data = cond_data;
1363         cond_snapshot->update = update;
1364
1365         mutex_lock(&trace_types_lock);
1366
1367         ret = tracing_alloc_snapshot_instance(tr);
1368         if (ret)
1369                 goto fail_unlock;
1370
1371         if (tr->current_trace->use_max_tr) {
1372                 ret = -EBUSY;
1373                 goto fail_unlock;
1374         }
1375
1376         /*
1377          * The cond_snapshot can only change to NULL without the
1378          * trace_types_lock. We don't care if we race with it going
1379          * to NULL, but we want to make sure that it's not set to
1380          * something other than NULL when we get here, which we can
1381          * do safely with only holding the trace_types_lock and not
1382          * having to take the max_lock.
1383          */
1384         if (tr->cond_snapshot) {
1385                 ret = -EBUSY;
1386                 goto fail_unlock;
1387         }
1388
1389         local_irq_disable();
1390         arch_spin_lock(&tr->max_lock);
1391         tr->cond_snapshot = cond_snapshot;
1392         arch_spin_unlock(&tr->max_lock);
1393         local_irq_enable();
1394
1395         mutex_unlock(&trace_types_lock);
1396
1397         return ret;
1398
1399  fail_unlock:
1400         mutex_unlock(&trace_types_lock);
1401         kfree(cond_snapshot);
1402         return ret;
1403 }
1404 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1405
1406 /**
1407  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1408  * @tr:         The tracing instance
1409  *
1410  * Check whether the conditional snapshot for the given instance is
1411  * enabled; if so, free the cond_snapshot associated with it,
1412  * otherwise return -EINVAL.
1413  *
1414  * Returns 0 if successful, error otherwise.
1415  */
1416 int tracing_snapshot_cond_disable(struct trace_array *tr)
1417 {
1418         int ret = 0;
1419
1420         local_irq_disable();
1421         arch_spin_lock(&tr->max_lock);
1422
1423         if (!tr->cond_snapshot)
1424                 ret = -EINVAL;
1425         else {
1426                 kfree(tr->cond_snapshot);
1427                 tr->cond_snapshot = NULL;
1428         }
1429
1430         arch_spin_unlock(&tr->max_lock);
1431         local_irq_enable();
1432
1433         return ret;
1434 }
1435 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1436 #else
1437 void tracing_snapshot(void)
1438 {
1439         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1440 }
1441 EXPORT_SYMBOL_GPL(tracing_snapshot);
1442 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1443 {
1444         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1445 }
1446 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1447 int tracing_alloc_snapshot(void)
1448 {
1449         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1450         return -ENODEV;
1451 }
1452 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1453 void tracing_snapshot_alloc(void)
1454 {
1455         /* Give warning */
1456         tracing_snapshot();
1457 }
1458 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1459 void *tracing_cond_snapshot_data(struct trace_array *tr)
1460 {
1461         return NULL;
1462 }
1463 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1464 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1465 {
1466         return -ENODEV;
1467 }
1468 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1469 int tracing_snapshot_cond_disable(struct trace_array *tr)
1470 {
1471         return false;
1472 }
1473 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1474 #define free_snapshot(tr)       do { } while (0)
1475 #endif /* CONFIG_TRACER_SNAPSHOT */
1476
1477 void tracer_tracing_off(struct trace_array *tr)
1478 {
1479         if (tr->array_buffer.buffer)
1480                 ring_buffer_record_off(tr->array_buffer.buffer);
1481         /*
1482          * This flag is looked at when buffers haven't been allocated
1483          * yet, or by some tracers (like irqsoff), that just want to
1484          * know if the ring buffer has been disabled, but it can handle
1485          * races of where it gets disabled but we still do a record.
1486          * As the check is in the fast path of the tracers, it is more
1487          * important to be fast than accurate.
1488          */
1489         tr->buffer_disabled = 1;
1490         /* Make the flag seen by readers */
1491         smp_wmb();
1492 }
1493
1494 /**
1495  * tracing_off - turn off tracing buffers
1496  *
1497  * This function stops the tracing buffers from recording data.
1498  * It does not disable any overhead the tracers themselves may
1499  * be causing. This function simply causes all recording to
1500  * the ring buffers to fail.
1501  */
1502 void tracing_off(void)
1503 {
1504         tracer_tracing_off(&global_trace);
1505 }
1506 EXPORT_SYMBOL_GPL(tracing_off);
1507
1508 void disable_trace_on_warning(void)
1509 {
1510         if (__disable_trace_on_warning) {
1511                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1512                         "Disabling tracing due to warning\n");
1513                 tracing_off();
1514         }
1515 }
1516
1517 /**
1518  * tracer_tracing_is_on - show real state of ring buffer enabled
1519  * @tr : the trace array to know if ring buffer is enabled
1520  *
1521  * Shows real state of the ring buffer if it is enabled or not.
1522  */
1523 bool tracer_tracing_is_on(struct trace_array *tr)
1524 {
1525         if (tr->array_buffer.buffer)
1526                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1527         return !tr->buffer_disabled;
1528 }
1529
1530 /**
1531  * tracing_is_on - show state of ring buffers enabled
1532  */
1533 int tracing_is_on(void)
1534 {
1535         return tracer_tracing_is_on(&global_trace);
1536 }
1537 EXPORT_SYMBOL_GPL(tracing_is_on);
1538
1539 static int __init set_buf_size(char *str)
1540 {
1541         unsigned long buf_size;
1542
1543         if (!str)
1544                 return 0;
1545         buf_size = memparse(str, &str);
1546         /*
1547          * nr_entries can not be zero and the startup
1548          * tests require some buffer space. Therefore
1549          * ensure we have at least 4096 bytes of buffer.
1550          */
1551         trace_buf_size = max(4096UL, buf_size);
1552         return 1;
1553 }
1554 __setup("trace_buf_size=", set_buf_size);
1555
1556 static int __init set_tracing_thresh(char *str)
1557 {
1558         unsigned long threshold;
1559         int ret;
1560
1561         if (!str)
1562                 return 0;
1563         ret = kstrtoul(str, 0, &threshold);
1564         if (ret < 0)
1565                 return 0;
1566         tracing_thresh = threshold * 1000;
1567         return 1;
1568 }
1569 __setup("tracing_thresh=", set_tracing_thresh);
1570
1571 unsigned long nsecs_to_usecs(unsigned long nsecs)
1572 {
1573         return nsecs / 1000;
1574 }
1575
1576 /*
1577  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1578  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1579  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1580  * of strings in the order that the evals (enum) were defined.
1581  */
1582 #undef C
1583 #define C(a, b) b
1584
1585 /* These must match the bit positions in trace_iterator_flags */
1586 static const char *trace_options[] = {
1587         TRACE_FLAGS
1588         NULL
1589 };
1590
1591 static struct {
1592         u64 (*func)(void);
1593         const char *name;
1594         int in_ns;              /* is this clock in nanoseconds? */
1595 } trace_clocks[] = {
1596         { trace_clock_local,            "local",        1 },
1597         { trace_clock_global,           "global",       1 },
1598         { trace_clock_counter,          "counter",      0 },
1599         { trace_clock_jiffies,          "uptime",       0 },
1600         { trace_clock,                  "perf",         1 },
1601         { ktime_get_mono_fast_ns,       "mono",         1 },
1602         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1603         { ktime_get_boot_fast_ns,       "boot",         1 },
1604         { ktime_get_tai_fast_ns,        "tai",          1 },
1605         ARCH_TRACE_CLOCKS
1606 };
1607
1608 bool trace_clock_in_ns(struct trace_array *tr)
1609 {
1610         if (trace_clocks[tr->clock_id].in_ns)
1611                 return true;
1612
1613         return false;
1614 }
1615
1616 /*
1617  * trace_parser_get_init - gets the buffer for trace parser
1618  */
1619 int trace_parser_get_init(struct trace_parser *parser, int size)
1620 {
1621         memset(parser, 0, sizeof(*parser));
1622
1623         parser->buffer = kmalloc(size, GFP_KERNEL);
1624         if (!parser->buffer)
1625                 return 1;
1626
1627         parser->size = size;
1628         return 0;
1629 }
1630
1631 /*
1632  * trace_parser_put - frees the buffer for trace parser
1633  */
1634 void trace_parser_put(struct trace_parser *parser)
1635 {
1636         kfree(parser->buffer);
1637         parser->buffer = NULL;
1638 }
1639
1640 /*
1641  * trace_get_user - reads the user input string separated by  space
1642  * (matched by isspace(ch))
1643  *
1644  * For each string found the 'struct trace_parser' is updated,
1645  * and the function returns.
1646  *
1647  * Returns number of bytes read.
1648  *
1649  * See kernel/trace/trace.h for 'struct trace_parser' details.
1650  */
1651 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1652         size_t cnt, loff_t *ppos)
1653 {
1654         char ch;
1655         size_t read = 0;
1656         ssize_t ret;
1657
1658         if (!*ppos)
1659                 trace_parser_clear(parser);
1660
1661         ret = get_user(ch, ubuf++);
1662         if (ret)
1663                 goto out;
1664
1665         read++;
1666         cnt--;
1667
1668         /*
1669          * The parser is not finished with the last write,
1670          * continue reading the user input without skipping spaces.
1671          */
1672         if (!parser->cont) {
1673                 /* skip white space */
1674                 while (cnt && isspace(ch)) {
1675                         ret = get_user(ch, ubuf++);
1676                         if (ret)
1677                                 goto out;
1678                         read++;
1679                         cnt--;
1680                 }
1681
1682                 parser->idx = 0;
1683
1684                 /* only spaces were written */
1685                 if (isspace(ch) || !ch) {
1686                         *ppos += read;
1687                         ret = read;
1688                         goto out;
1689                 }
1690         }
1691
1692         /* read the non-space input */
1693         while (cnt && !isspace(ch) && ch) {
1694                 if (parser->idx < parser->size - 1)
1695                         parser->buffer[parser->idx++] = ch;
1696                 else {
1697                         ret = -EINVAL;
1698                         goto out;
1699                 }
1700                 ret = get_user(ch, ubuf++);
1701                 if (ret)
1702                         goto out;
1703                 read++;
1704                 cnt--;
1705         }
1706
1707         /* We either got finished input or we have to wait for another call. */
1708         if (isspace(ch) || !ch) {
1709                 parser->buffer[parser->idx] = 0;
1710                 parser->cont = false;
1711         } else if (parser->idx < parser->size - 1) {
1712                 parser->cont = true;
1713                 parser->buffer[parser->idx++] = ch;
1714                 /* Make sure the parsed string always terminates with '\0'. */
1715                 parser->buffer[parser->idx] = 0;
1716         } else {
1717                 ret = -EINVAL;
1718                 goto out;
1719         }
1720
1721         *ppos += read;
1722         ret = read;
1723
1724 out:
1725         return ret;
1726 }
1727
1728 /* TODO add a seq_buf_to_buffer() */
1729 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1730 {
1731         int len;
1732
1733         if (trace_seq_used(s) <= s->seq.readpos)
1734                 return -EBUSY;
1735
1736         len = trace_seq_used(s) - s->seq.readpos;
1737         if (cnt > len)
1738                 cnt = len;
1739         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1740
1741         s->seq.readpos += cnt;
1742         return cnt;
1743 }
1744
1745 unsigned long __read_mostly     tracing_thresh;
1746
1747 #ifdef CONFIG_TRACER_MAX_TRACE
1748 static const struct file_operations tracing_max_lat_fops;
1749
1750 #ifdef LATENCY_FS_NOTIFY
1751
1752 static struct workqueue_struct *fsnotify_wq;
1753
1754 static void latency_fsnotify_workfn(struct work_struct *work)
1755 {
1756         struct trace_array *tr = container_of(work, struct trace_array,
1757                                               fsnotify_work);
1758         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1759 }
1760
1761 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1762 {
1763         struct trace_array *tr = container_of(iwork, struct trace_array,
1764                                               fsnotify_irqwork);
1765         queue_work(fsnotify_wq, &tr->fsnotify_work);
1766 }
1767
1768 static void trace_create_maxlat_file(struct trace_array *tr,
1769                                      struct dentry *d_tracer)
1770 {
1771         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1772         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1773         tr->d_max_latency = trace_create_file("tracing_max_latency",
1774                                               TRACE_MODE_WRITE,
1775                                               d_tracer, tr,
1776                                               &tracing_max_lat_fops);
1777 }
1778
1779 __init static int latency_fsnotify_init(void)
1780 {
1781         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1782                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1783         if (!fsnotify_wq) {
1784                 pr_err("Unable to allocate tr_max_lat_wq\n");
1785                 return -ENOMEM;
1786         }
1787         return 0;
1788 }
1789
1790 late_initcall_sync(latency_fsnotify_init);
1791
1792 void latency_fsnotify(struct trace_array *tr)
1793 {
1794         if (!fsnotify_wq)
1795                 return;
1796         /*
1797          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1798          * possible that we are called from __schedule() or do_idle(), which
1799          * could cause a deadlock.
1800          */
1801         irq_work_queue(&tr->fsnotify_irqwork);
1802 }
1803
1804 #else /* !LATENCY_FS_NOTIFY */
1805
1806 #define trace_create_maxlat_file(tr, d_tracer)                          \
1807         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1808                           d_tracer, tr, &tracing_max_lat_fops)
1809
1810 #endif
1811
1812 /*
1813  * Copy the new maximum trace into the separate maximum-trace
1814  * structure. (this way the maximum trace is permanently saved,
1815  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1816  */
1817 static void
1818 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1819 {
1820         struct array_buffer *trace_buf = &tr->array_buffer;
1821         struct array_buffer *max_buf = &tr->max_buffer;
1822         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1823         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1824
1825         max_buf->cpu = cpu;
1826         max_buf->time_start = data->preempt_timestamp;
1827
1828         max_data->saved_latency = tr->max_latency;
1829         max_data->critical_start = data->critical_start;
1830         max_data->critical_end = data->critical_end;
1831
1832         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1833         max_data->pid = tsk->pid;
1834         /*
1835          * If tsk == current, then use current_uid(), as that does not use
1836          * RCU. The irq tracer can be called out of RCU scope.
1837          */
1838         if (tsk == current)
1839                 max_data->uid = current_uid();
1840         else
1841                 max_data->uid = task_uid(tsk);
1842
1843         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1844         max_data->policy = tsk->policy;
1845         max_data->rt_priority = tsk->rt_priority;
1846
1847         /* record this tasks comm */
1848         tracing_record_cmdline(tsk);
1849         latency_fsnotify(tr);
1850 }
1851
1852 /**
1853  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1854  * @tr: tracer
1855  * @tsk: the task with the latency
1856  * @cpu: The cpu that initiated the trace.
1857  * @cond_data: User data associated with a conditional snapshot
1858  *
1859  * Flip the buffers between the @tr and the max_tr and record information
1860  * about which task was the cause of this latency.
1861  */
1862 void
1863 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1864               void *cond_data)
1865 {
1866         if (tr->stop_count)
1867                 return;
1868
1869         WARN_ON_ONCE(!irqs_disabled());
1870
1871         if (!tr->allocated_snapshot) {
1872                 /* Only the nop tracer should hit this when disabling */
1873                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1874                 return;
1875         }
1876
1877         arch_spin_lock(&tr->max_lock);
1878
1879         /* Inherit the recordable setting from array_buffer */
1880         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1881                 ring_buffer_record_on(tr->max_buffer.buffer);
1882         else
1883                 ring_buffer_record_off(tr->max_buffer.buffer);
1884
1885 #ifdef CONFIG_TRACER_SNAPSHOT
1886         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1887                 arch_spin_unlock(&tr->max_lock);
1888                 return;
1889         }
1890 #endif
1891         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1892
1893         __update_max_tr(tr, tsk, cpu);
1894
1895         arch_spin_unlock(&tr->max_lock);
1896
1897         /* Any waiters on the old snapshot buffer need to wake up */
1898         ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1899 }
1900
1901 /**
1902  * update_max_tr_single - only copy one trace over, and reset the rest
1903  * @tr: tracer
1904  * @tsk: task with the latency
1905  * @cpu: the cpu of the buffer to copy.
1906  *
1907  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1908  */
1909 void
1910 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1911 {
1912         int ret;
1913
1914         if (tr->stop_count)
1915                 return;
1916
1917         WARN_ON_ONCE(!irqs_disabled());
1918         if (!tr->allocated_snapshot) {
1919                 /* Only the nop tracer should hit this when disabling */
1920                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1921                 return;
1922         }
1923
1924         arch_spin_lock(&tr->max_lock);
1925
1926         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1927
1928         if (ret == -EBUSY) {
1929                 /*
1930                  * We failed to swap the buffer due to a commit taking
1931                  * place on this CPU. We fail to record, but we reset
1932                  * the max trace buffer (no one writes directly to it)
1933                  * and flag that it failed.
1934                  * Another reason is resize is in progress.
1935                  */
1936                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1937                         "Failed to swap buffers due to commit or resize in progress\n");
1938         }
1939
1940         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1941
1942         __update_max_tr(tr, tsk, cpu);
1943         arch_spin_unlock(&tr->max_lock);
1944 }
1945
1946 #endif /* CONFIG_TRACER_MAX_TRACE */
1947
1948 static int wait_on_pipe(struct trace_iterator *iter, int full)
1949 {
1950         int ret;
1951
1952         /* Iterators are static, they should be filled or empty */
1953         if (trace_buffer_iter(iter, iter->cpu_file))
1954                 return 0;
1955
1956         ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1957
1958 #ifdef CONFIG_TRACER_MAX_TRACE
1959         /*
1960          * Make sure this is still the snapshot buffer, as if a snapshot were
1961          * to happen, this would now be the main buffer.
1962          */
1963         if (iter->snapshot)
1964                 iter->array_buffer = &iter->tr->max_buffer;
1965 #endif
1966         return ret;
1967 }
1968
1969 #ifdef CONFIG_FTRACE_STARTUP_TEST
1970 static bool selftests_can_run;
1971
1972 struct trace_selftests {
1973         struct list_head                list;
1974         struct tracer                   *type;
1975 };
1976
1977 static LIST_HEAD(postponed_selftests);
1978
1979 static int save_selftest(struct tracer *type)
1980 {
1981         struct trace_selftests *selftest;
1982
1983         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1984         if (!selftest)
1985                 return -ENOMEM;
1986
1987         selftest->type = type;
1988         list_add(&selftest->list, &postponed_selftests);
1989         return 0;
1990 }
1991
1992 static int run_tracer_selftest(struct tracer *type)
1993 {
1994         struct trace_array *tr = &global_trace;
1995         struct tracer *saved_tracer = tr->current_trace;
1996         int ret;
1997
1998         if (!type->selftest || tracing_selftest_disabled)
1999                 return 0;
2000
2001         /*
2002          * If a tracer registers early in boot up (before scheduling is
2003          * initialized and such), then do not run its selftests yet.
2004          * Instead, run it a little later in the boot process.
2005          */
2006         if (!selftests_can_run)
2007                 return save_selftest(type);
2008
2009         if (!tracing_is_on()) {
2010                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2011                         type->name);
2012                 return 0;
2013         }
2014
2015         /*
2016          * Run a selftest on this tracer.
2017          * Here we reset the trace buffer, and set the current
2018          * tracer to be this tracer. The tracer can then run some
2019          * internal tracing to verify that everything is in order.
2020          * If we fail, we do not register this tracer.
2021          */
2022         tracing_reset_online_cpus(&tr->array_buffer);
2023
2024         tr->current_trace = type;
2025
2026 #ifdef CONFIG_TRACER_MAX_TRACE
2027         if (type->use_max_tr) {
2028                 /* If we expanded the buffers, make sure the max is expanded too */
2029                 if (ring_buffer_expanded)
2030                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2031                                            RING_BUFFER_ALL_CPUS);
2032                 tr->allocated_snapshot = true;
2033         }
2034 #endif
2035
2036         /* the test is responsible for initializing and enabling */
2037         pr_info("Testing tracer %s: ", type->name);
2038         ret = type->selftest(type, tr);
2039         /* the test is responsible for resetting too */
2040         tr->current_trace = saved_tracer;
2041         if (ret) {
2042                 printk(KERN_CONT "FAILED!\n");
2043                 /* Add the warning after printing 'FAILED' */
2044                 WARN_ON(1);
2045                 return -1;
2046         }
2047         /* Only reset on passing, to avoid touching corrupted buffers */
2048         tracing_reset_online_cpus(&tr->array_buffer);
2049
2050 #ifdef CONFIG_TRACER_MAX_TRACE
2051         if (type->use_max_tr) {
2052                 tr->allocated_snapshot = false;
2053
2054                 /* Shrink the max buffer again */
2055                 if (ring_buffer_expanded)
2056                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2057                                            RING_BUFFER_ALL_CPUS);
2058         }
2059 #endif
2060
2061         printk(KERN_CONT "PASSED\n");
2062         return 0;
2063 }
2064
2065 static int do_run_tracer_selftest(struct tracer *type)
2066 {
2067         int ret;
2068
2069         /*
2070          * Tests can take a long time, especially if they are run one after the
2071          * other, as does happen during bootup when all the tracers are
2072          * registered. This could cause the soft lockup watchdog to trigger.
2073          */
2074         cond_resched();
2075
2076         tracing_selftest_running = true;
2077         ret = run_tracer_selftest(type);
2078         tracing_selftest_running = false;
2079
2080         return ret;
2081 }
2082
2083 static __init int init_trace_selftests(void)
2084 {
2085         struct trace_selftests *p, *n;
2086         struct tracer *t, **last;
2087         int ret;
2088
2089         selftests_can_run = true;
2090
2091         mutex_lock(&trace_types_lock);
2092
2093         if (list_empty(&postponed_selftests))
2094                 goto out;
2095
2096         pr_info("Running postponed tracer tests:\n");
2097
2098         tracing_selftest_running = true;
2099         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2100                 /* This loop can take minutes when sanitizers are enabled, so
2101                  * lets make sure we allow RCU processing.
2102                  */
2103                 cond_resched();
2104                 ret = run_tracer_selftest(p->type);
2105                 /* If the test fails, then warn and remove from available_tracers */
2106                 if (ret < 0) {
2107                         WARN(1, "tracer: %s failed selftest, disabling\n",
2108                              p->type->name);
2109                         last = &trace_types;
2110                         for (t = trace_types; t; t = t->next) {
2111                                 if (t == p->type) {
2112                                         *last = t->next;
2113                                         break;
2114                                 }
2115                                 last = &t->next;
2116                         }
2117                 }
2118                 list_del(&p->list);
2119                 kfree(p);
2120         }
2121         tracing_selftest_running = false;
2122
2123  out:
2124         mutex_unlock(&trace_types_lock);
2125
2126         return 0;
2127 }
2128 core_initcall(init_trace_selftests);
2129 #else
2130 static inline int run_tracer_selftest(struct tracer *type)
2131 {
2132         return 0;
2133 }
2134 static inline int do_run_tracer_selftest(struct tracer *type)
2135 {
2136         return 0;
2137 }
2138 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2139
2140 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2141
2142 static void __init apply_trace_boot_options(void);
2143
2144 /**
2145  * register_tracer - register a tracer with the ftrace system.
2146  * @type: the plugin for the tracer
2147  *
2148  * Register a new plugin tracer.
2149  */
2150 int __init register_tracer(struct tracer *type)
2151 {
2152         struct tracer *t;
2153         int ret = 0;
2154
2155         if (!type->name) {
2156                 pr_info("Tracer must have a name\n");
2157                 return -1;
2158         }
2159
2160         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2161                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2162                 return -1;
2163         }
2164
2165         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2166                 pr_warn("Can not register tracer %s due to lockdown\n",
2167                            type->name);
2168                 return -EPERM;
2169         }
2170
2171         mutex_lock(&trace_types_lock);
2172
2173         for (t = trace_types; t; t = t->next) {
2174                 if (strcmp(type->name, t->name) == 0) {
2175                         /* already found */
2176                         pr_info("Tracer %s already registered\n",
2177                                 type->name);
2178                         ret = -1;
2179                         goto out;
2180                 }
2181         }
2182
2183         if (!type->set_flag)
2184                 type->set_flag = &dummy_set_flag;
2185         if (!type->flags) {
2186                 /*allocate a dummy tracer_flags*/
2187                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2188                 if (!type->flags) {
2189                         ret = -ENOMEM;
2190                         goto out;
2191                 }
2192                 type->flags->val = 0;
2193                 type->flags->opts = dummy_tracer_opt;
2194         } else
2195                 if (!type->flags->opts)
2196                         type->flags->opts = dummy_tracer_opt;
2197
2198         /* store the tracer for __set_tracer_option */
2199         type->flags->trace = type;
2200
2201         ret = do_run_tracer_selftest(type);
2202         if (ret < 0)
2203                 goto out;
2204
2205         type->next = trace_types;
2206         trace_types = type;
2207         add_tracer_options(&global_trace, type);
2208
2209  out:
2210         mutex_unlock(&trace_types_lock);
2211
2212         if (ret || !default_bootup_tracer)
2213                 goto out_unlock;
2214
2215         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2216                 goto out_unlock;
2217
2218         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2219         /* Do we want this tracer to start on bootup? */
2220         tracing_set_tracer(&global_trace, type->name);
2221         default_bootup_tracer = NULL;
2222
2223         apply_trace_boot_options();
2224
2225         /* disable other selftests, since this will break it. */
2226         disable_tracing_selftest("running a tracer");
2227
2228  out_unlock:
2229         return ret;
2230 }
2231
2232 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2233 {
2234         struct trace_buffer *buffer = buf->buffer;
2235
2236         if (!buffer)
2237                 return;
2238
2239         ring_buffer_record_disable(buffer);
2240
2241         /* Make sure all commits have finished */
2242         synchronize_rcu();
2243         ring_buffer_reset_cpu(buffer, cpu);
2244
2245         ring_buffer_record_enable(buffer);
2246 }
2247
2248 void tracing_reset_online_cpus(struct array_buffer *buf)
2249 {
2250         struct trace_buffer *buffer = buf->buffer;
2251
2252         if (!buffer)
2253                 return;
2254
2255         ring_buffer_record_disable(buffer);
2256
2257         /* Make sure all commits have finished */
2258         synchronize_rcu();
2259
2260         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2261
2262         ring_buffer_reset_online_cpus(buffer);
2263
2264         ring_buffer_record_enable(buffer);
2265 }
2266
2267 /* Must have trace_types_lock held */
2268 void tracing_reset_all_online_cpus_unlocked(void)
2269 {
2270         struct trace_array *tr;
2271
2272         lockdep_assert_held(&trace_types_lock);
2273
2274         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2275                 if (!tr->clear_trace)
2276                         continue;
2277                 tr->clear_trace = false;
2278                 tracing_reset_online_cpus(&tr->array_buffer);
2279 #ifdef CONFIG_TRACER_MAX_TRACE
2280                 tracing_reset_online_cpus(&tr->max_buffer);
2281 #endif
2282         }
2283 }
2284
2285 void tracing_reset_all_online_cpus(void)
2286 {
2287         mutex_lock(&trace_types_lock);
2288         tracing_reset_all_online_cpus_unlocked();
2289         mutex_unlock(&trace_types_lock);
2290 }
2291
2292 /*
2293  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2294  * is the tgid last observed corresponding to pid=i.
2295  */
2296 static int *tgid_map;
2297
2298 /* The maximum valid index into tgid_map. */
2299 static size_t tgid_map_max;
2300
2301 #define SAVED_CMDLINES_DEFAULT 128
2302 #define NO_CMDLINE_MAP UINT_MAX
2303 /*
2304  * Preemption must be disabled before acquiring trace_cmdline_lock.
2305  * The various trace_arrays' max_lock must be acquired in a context
2306  * where interrupt is disabled.
2307  */
2308 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2309 struct saved_cmdlines_buffer {
2310         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2311         unsigned *map_cmdline_to_pid;
2312         unsigned cmdline_num;
2313         int cmdline_idx;
2314         char *saved_cmdlines;
2315 };
2316 static struct saved_cmdlines_buffer *savedcmd;
2317
2318 static inline char *get_saved_cmdlines(int idx)
2319 {
2320         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2321 }
2322
2323 static inline void set_cmdline(int idx, const char *cmdline)
2324 {
2325         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2326 }
2327
2328 static int allocate_cmdlines_buffer(unsigned int val,
2329                                     struct saved_cmdlines_buffer *s)
2330 {
2331         s->map_cmdline_to_pid = kmalloc_array(val,
2332                                               sizeof(*s->map_cmdline_to_pid),
2333                                               GFP_KERNEL);
2334         if (!s->map_cmdline_to_pid)
2335                 return -ENOMEM;
2336
2337         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2338         if (!s->saved_cmdlines) {
2339                 kfree(s->map_cmdline_to_pid);
2340                 return -ENOMEM;
2341         }
2342
2343         s->cmdline_idx = 0;
2344         s->cmdline_num = val;
2345         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2346                sizeof(s->map_pid_to_cmdline));
2347         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2348                val * sizeof(*s->map_cmdline_to_pid));
2349
2350         return 0;
2351 }
2352
2353 static int trace_create_savedcmd(void)
2354 {
2355         int ret;
2356
2357         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2358         if (!savedcmd)
2359                 return -ENOMEM;
2360
2361         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2362         if (ret < 0) {
2363                 kfree(savedcmd);
2364                 savedcmd = NULL;
2365                 return -ENOMEM;
2366         }
2367
2368         return 0;
2369 }
2370
2371 int is_tracing_stopped(void)
2372 {
2373         return global_trace.stop_count;
2374 }
2375
2376 static void tracing_start_tr(struct trace_array *tr)
2377 {
2378         struct trace_buffer *buffer;
2379         unsigned long flags;
2380
2381         if (tracing_disabled)
2382                 return;
2383
2384         raw_spin_lock_irqsave(&tr->start_lock, flags);
2385         if (--tr->stop_count) {
2386                 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2387                         /* Someone screwed up their debugging */
2388                         tr->stop_count = 0;
2389                 }
2390                 goto out;
2391         }
2392
2393         /* Prevent the buffers from switching */
2394         arch_spin_lock(&tr->max_lock);
2395
2396         buffer = tr->array_buffer.buffer;
2397         if (buffer)
2398                 ring_buffer_record_enable(buffer);
2399
2400 #ifdef CONFIG_TRACER_MAX_TRACE
2401         buffer = tr->max_buffer.buffer;
2402         if (buffer)
2403                 ring_buffer_record_enable(buffer);
2404 #endif
2405
2406         arch_spin_unlock(&tr->max_lock);
2407
2408  out:
2409         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2410 }
2411
2412 /**
2413  * tracing_start - quick start of the tracer
2414  *
2415  * If tracing is enabled but was stopped by tracing_stop,
2416  * this will start the tracer back up.
2417  */
2418 void tracing_start(void)
2419
2420 {
2421         return tracing_start_tr(&global_trace);
2422 }
2423
2424 static void tracing_stop_tr(struct trace_array *tr)
2425 {
2426         struct trace_buffer *buffer;
2427         unsigned long flags;
2428
2429         raw_spin_lock_irqsave(&tr->start_lock, flags);
2430         if (tr->stop_count++)
2431                 goto out;
2432
2433         /* Prevent the buffers from switching */
2434         arch_spin_lock(&tr->max_lock);
2435
2436         buffer = tr->array_buffer.buffer;
2437         if (buffer)
2438                 ring_buffer_record_disable(buffer);
2439
2440 #ifdef CONFIG_TRACER_MAX_TRACE
2441         buffer = tr->max_buffer.buffer;
2442         if (buffer)
2443                 ring_buffer_record_disable(buffer);
2444 #endif
2445
2446         arch_spin_unlock(&tr->max_lock);
2447
2448  out:
2449         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2450 }
2451
2452 /**
2453  * tracing_stop - quick stop of the tracer
2454  *
2455  * Light weight way to stop tracing. Use in conjunction with
2456  * tracing_start.
2457  */
2458 void tracing_stop(void)
2459 {
2460         return tracing_stop_tr(&global_trace);
2461 }
2462
2463 static int trace_save_cmdline(struct task_struct *tsk)
2464 {
2465         unsigned tpid, idx;
2466
2467         /* treat recording of idle task as a success */
2468         if (!tsk->pid)
2469                 return 1;
2470
2471         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2472
2473         /*
2474          * It's not the end of the world if we don't get
2475          * the lock, but we also don't want to spin
2476          * nor do we want to disable interrupts,
2477          * so if we miss here, then better luck next time.
2478          *
2479          * This is called within the scheduler and wake up, so interrupts
2480          * had better been disabled and run queue lock been held.
2481          */
2482         lockdep_assert_preemption_disabled();
2483         if (!arch_spin_trylock(&trace_cmdline_lock))
2484                 return 0;
2485
2486         idx = savedcmd->map_pid_to_cmdline[tpid];
2487         if (idx == NO_CMDLINE_MAP) {
2488                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2489
2490                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2491                 savedcmd->cmdline_idx = idx;
2492         }
2493
2494         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2495         set_cmdline(idx, tsk->comm);
2496
2497         arch_spin_unlock(&trace_cmdline_lock);
2498
2499         return 1;
2500 }
2501
2502 static void __trace_find_cmdline(int pid, char comm[])
2503 {
2504         unsigned map;
2505         int tpid;
2506
2507         if (!pid) {
2508                 strcpy(comm, "<idle>");
2509                 return;
2510         }
2511
2512         if (WARN_ON_ONCE(pid < 0)) {
2513                 strcpy(comm, "<XXX>");
2514                 return;
2515         }
2516
2517         tpid = pid & (PID_MAX_DEFAULT - 1);
2518         map = savedcmd->map_pid_to_cmdline[tpid];
2519         if (map != NO_CMDLINE_MAP) {
2520                 tpid = savedcmd->map_cmdline_to_pid[map];
2521                 if (tpid == pid) {
2522                         strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2523                         return;
2524                 }
2525         }
2526         strcpy(comm, "<...>");
2527 }
2528
2529 void trace_find_cmdline(int pid, char comm[])
2530 {
2531         preempt_disable();
2532         arch_spin_lock(&trace_cmdline_lock);
2533
2534         __trace_find_cmdline(pid, comm);
2535
2536         arch_spin_unlock(&trace_cmdline_lock);
2537         preempt_enable();
2538 }
2539
2540 static int *trace_find_tgid_ptr(int pid)
2541 {
2542         /*
2543          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2544          * if we observe a non-NULL tgid_map then we also observe the correct
2545          * tgid_map_max.
2546          */
2547         int *map = smp_load_acquire(&tgid_map);
2548
2549         if (unlikely(!map || pid > tgid_map_max))
2550                 return NULL;
2551
2552         return &map[pid];
2553 }
2554
2555 int trace_find_tgid(int pid)
2556 {
2557         int *ptr = trace_find_tgid_ptr(pid);
2558
2559         return ptr ? *ptr : 0;
2560 }
2561
2562 static int trace_save_tgid(struct task_struct *tsk)
2563 {
2564         int *ptr;
2565
2566         /* treat recording of idle task as a success */
2567         if (!tsk->pid)
2568                 return 1;
2569
2570         ptr = trace_find_tgid_ptr(tsk->pid);
2571         if (!ptr)
2572                 return 0;
2573
2574         *ptr = tsk->tgid;
2575         return 1;
2576 }
2577
2578 static bool tracing_record_taskinfo_skip(int flags)
2579 {
2580         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2581                 return true;
2582         if (!__this_cpu_read(trace_taskinfo_save))
2583                 return true;
2584         return false;
2585 }
2586
2587 /**
2588  * tracing_record_taskinfo - record the task info of a task
2589  *
2590  * @task:  task to record
2591  * @flags: TRACE_RECORD_CMDLINE for recording comm
2592  *         TRACE_RECORD_TGID for recording tgid
2593  */
2594 void tracing_record_taskinfo(struct task_struct *task, int flags)
2595 {
2596         bool done;
2597
2598         if (tracing_record_taskinfo_skip(flags))
2599                 return;
2600
2601         /*
2602          * Record as much task information as possible. If some fail, continue
2603          * to try to record the others.
2604          */
2605         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2606         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2607
2608         /* If recording any information failed, retry again soon. */
2609         if (!done)
2610                 return;
2611
2612         __this_cpu_write(trace_taskinfo_save, false);
2613 }
2614
2615 /**
2616  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2617  *
2618  * @prev: previous task during sched_switch
2619  * @next: next task during sched_switch
2620  * @flags: TRACE_RECORD_CMDLINE for recording comm
2621  *         TRACE_RECORD_TGID for recording tgid
2622  */
2623 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2624                                           struct task_struct *next, int flags)
2625 {
2626         bool done;
2627
2628         if (tracing_record_taskinfo_skip(flags))
2629                 return;
2630
2631         /*
2632          * Record as much task information as possible. If some fail, continue
2633          * to try to record the others.
2634          */
2635         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2636         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2637         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2638         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2639
2640         /* If recording any information failed, retry again soon. */
2641         if (!done)
2642                 return;
2643
2644         __this_cpu_write(trace_taskinfo_save, false);
2645 }
2646
2647 /* Helpers to record a specific task information */
2648 void tracing_record_cmdline(struct task_struct *task)
2649 {
2650         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2651 }
2652
2653 void tracing_record_tgid(struct task_struct *task)
2654 {
2655         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2656 }
2657
2658 /*
2659  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2660  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2661  * simplifies those functions and keeps them in sync.
2662  */
2663 enum print_line_t trace_handle_return(struct trace_seq *s)
2664 {
2665         return trace_seq_has_overflowed(s) ?
2666                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2667 }
2668 EXPORT_SYMBOL_GPL(trace_handle_return);
2669
2670 static unsigned short migration_disable_value(void)
2671 {
2672 #if defined(CONFIG_SMP)
2673         return current->migration_disabled;
2674 #else
2675         return 0;
2676 #endif
2677 }
2678
2679 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2680 {
2681         unsigned int trace_flags = irqs_status;
2682         unsigned int pc;
2683
2684         pc = preempt_count();
2685
2686         if (pc & NMI_MASK)
2687                 trace_flags |= TRACE_FLAG_NMI;
2688         if (pc & HARDIRQ_MASK)
2689                 trace_flags |= TRACE_FLAG_HARDIRQ;
2690         if (in_serving_softirq())
2691                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2692         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2693                 trace_flags |= TRACE_FLAG_BH_OFF;
2694
2695         if (tif_need_resched())
2696                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2697         if (test_preempt_need_resched())
2698                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2699         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2700                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2701 }
2702
2703 struct ring_buffer_event *
2704 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2705                           int type,
2706                           unsigned long len,
2707                           unsigned int trace_ctx)
2708 {
2709         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2710 }
2711
2712 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2713 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2714 static int trace_buffered_event_ref;
2715
2716 /**
2717  * trace_buffered_event_enable - enable buffering events
2718  *
2719  * When events are being filtered, it is quicker to use a temporary
2720  * buffer to write the event data into if there's a likely chance
2721  * that it will not be committed. The discard of the ring buffer
2722  * is not as fast as committing, and is much slower than copying
2723  * a commit.
2724  *
2725  * When an event is to be filtered, allocate per cpu buffers to
2726  * write the event data into, and if the event is filtered and discarded
2727  * it is simply dropped, otherwise, the entire data is to be committed
2728  * in one shot.
2729  */
2730 void trace_buffered_event_enable(void)
2731 {
2732         struct ring_buffer_event *event;
2733         struct page *page;
2734         int cpu;
2735
2736         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2737
2738         if (trace_buffered_event_ref++)
2739                 return;
2740
2741         for_each_tracing_cpu(cpu) {
2742                 page = alloc_pages_node(cpu_to_node(cpu),
2743                                         GFP_KERNEL | __GFP_NORETRY, 0);
2744                 /* This is just an optimization and can handle failures */
2745                 if (!page) {
2746                         pr_err("Failed to allocate event buffer\n");
2747                         break;
2748                 }
2749
2750                 event = page_address(page);
2751                 memset(event, 0, sizeof(*event));
2752
2753                 per_cpu(trace_buffered_event, cpu) = event;
2754
2755                 preempt_disable();
2756                 if (cpu == smp_processor_id() &&
2757                     __this_cpu_read(trace_buffered_event) !=
2758                     per_cpu(trace_buffered_event, cpu))
2759                         WARN_ON_ONCE(1);
2760                 preempt_enable();
2761         }
2762 }
2763
2764 static void enable_trace_buffered_event(void *data)
2765 {
2766         /* Probably not needed, but do it anyway */
2767         smp_rmb();
2768         this_cpu_dec(trace_buffered_event_cnt);
2769 }
2770
2771 static void disable_trace_buffered_event(void *data)
2772 {
2773         this_cpu_inc(trace_buffered_event_cnt);
2774 }
2775
2776 /**
2777  * trace_buffered_event_disable - disable buffering events
2778  *
2779  * When a filter is removed, it is faster to not use the buffered
2780  * events, and to commit directly into the ring buffer. Free up
2781  * the temp buffers when there are no more users. This requires
2782  * special synchronization with current events.
2783  */
2784 void trace_buffered_event_disable(void)
2785 {
2786         int cpu;
2787
2788         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2789
2790         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2791                 return;
2792
2793         if (--trace_buffered_event_ref)
2794                 return;
2795
2796         /* For each CPU, set the buffer as used. */
2797         on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2798                          NULL, true);
2799
2800         /* Wait for all current users to finish */
2801         synchronize_rcu();
2802
2803         for_each_tracing_cpu(cpu) {
2804                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2805                 per_cpu(trace_buffered_event, cpu) = NULL;
2806         }
2807
2808         /*
2809          * Wait for all CPUs that potentially started checking if they can use
2810          * their event buffer only after the previous synchronize_rcu() call and
2811          * they still read a valid pointer from trace_buffered_event. It must be
2812          * ensured they don't see cleared trace_buffered_event_cnt else they
2813          * could wrongly decide to use the pointed-to buffer which is now freed.
2814          */
2815         synchronize_rcu();
2816
2817         /* For each CPU, relinquish the buffer */
2818         on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2819                          true);
2820 }
2821
2822 static struct trace_buffer *temp_buffer;
2823
2824 struct ring_buffer_event *
2825 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2826                           struct trace_event_file *trace_file,
2827                           int type, unsigned long len,
2828                           unsigned int trace_ctx)
2829 {
2830         struct ring_buffer_event *entry;
2831         struct trace_array *tr = trace_file->tr;
2832         int val;
2833
2834         *current_rb = tr->array_buffer.buffer;
2835
2836         if (!tr->no_filter_buffering_ref &&
2837             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2838                 preempt_disable_notrace();
2839                 /*
2840                  * Filtering is on, so try to use the per cpu buffer first.
2841                  * This buffer will simulate a ring_buffer_event,
2842                  * where the type_len is zero and the array[0] will
2843                  * hold the full length.
2844                  * (see include/linux/ring-buffer.h for details on
2845                  *  how the ring_buffer_event is structured).
2846                  *
2847                  * Using a temp buffer during filtering and copying it
2848                  * on a matched filter is quicker than writing directly
2849                  * into the ring buffer and then discarding it when
2850                  * it doesn't match. That is because the discard
2851                  * requires several atomic operations to get right.
2852                  * Copying on match and doing nothing on a failed match
2853                  * is still quicker than no copy on match, but having
2854                  * to discard out of the ring buffer on a failed match.
2855                  */
2856                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2857                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2858
2859                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2860
2861                         /*
2862                          * Preemption is disabled, but interrupts and NMIs
2863                          * can still come in now. If that happens after
2864                          * the above increment, then it will have to go
2865                          * back to the old method of allocating the event
2866                          * on the ring buffer, and if the filter fails, it
2867                          * will have to call ring_buffer_discard_commit()
2868                          * to remove it.
2869                          *
2870                          * Need to also check the unlikely case that the
2871                          * length is bigger than the temp buffer size.
2872                          * If that happens, then the reserve is pretty much
2873                          * guaranteed to fail, as the ring buffer currently
2874                          * only allows events less than a page. But that may
2875                          * change in the future, so let the ring buffer reserve
2876                          * handle the failure in that case.
2877                          */
2878                         if (val == 1 && likely(len <= max_len)) {
2879                                 trace_event_setup(entry, type, trace_ctx);
2880                                 entry->array[0] = len;
2881                                 /* Return with preemption disabled */
2882                                 return entry;
2883                         }
2884                         this_cpu_dec(trace_buffered_event_cnt);
2885                 }
2886                 /* __trace_buffer_lock_reserve() disables preemption */
2887                 preempt_enable_notrace();
2888         }
2889
2890         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2891                                             trace_ctx);
2892         /*
2893          * If tracing is off, but we have triggers enabled
2894          * we still need to look at the event data. Use the temp_buffer
2895          * to store the trace event for the trigger to use. It's recursive
2896          * safe and will not be recorded anywhere.
2897          */
2898         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2899                 *current_rb = temp_buffer;
2900                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2901                                                     trace_ctx);
2902         }
2903         return entry;
2904 }
2905 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2906
2907 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2908 static DEFINE_MUTEX(tracepoint_printk_mutex);
2909
2910 static void output_printk(struct trace_event_buffer *fbuffer)
2911 {
2912         struct trace_event_call *event_call;
2913         struct trace_event_file *file;
2914         struct trace_event *event;
2915         unsigned long flags;
2916         struct trace_iterator *iter = tracepoint_print_iter;
2917
2918         /* We should never get here if iter is NULL */
2919         if (WARN_ON_ONCE(!iter))
2920                 return;
2921
2922         event_call = fbuffer->trace_file->event_call;
2923         if (!event_call || !event_call->event.funcs ||
2924             !event_call->event.funcs->trace)
2925                 return;
2926
2927         file = fbuffer->trace_file;
2928         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2929             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2930              !filter_match_preds(file->filter, fbuffer->entry)))
2931                 return;
2932
2933         event = &fbuffer->trace_file->event_call->event;
2934
2935         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2936         trace_seq_init(&iter->seq);
2937         iter->ent = fbuffer->entry;
2938         event_call->event.funcs->trace(iter, 0, event);
2939         trace_seq_putc(&iter->seq, 0);
2940         printk("%s", iter->seq.buffer);
2941
2942         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2943 }
2944
2945 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2946                              void *buffer, size_t *lenp,
2947                              loff_t *ppos)
2948 {
2949         int save_tracepoint_printk;
2950         int ret;
2951
2952         mutex_lock(&tracepoint_printk_mutex);
2953         save_tracepoint_printk = tracepoint_printk;
2954
2955         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2956
2957         /*
2958          * This will force exiting early, as tracepoint_printk
2959          * is always zero when tracepoint_printk_iter is not allocated
2960          */
2961         if (!tracepoint_print_iter)
2962                 tracepoint_printk = 0;
2963
2964         if (save_tracepoint_printk == tracepoint_printk)
2965                 goto out;
2966
2967         if (tracepoint_printk)
2968                 static_key_enable(&tracepoint_printk_key.key);
2969         else
2970                 static_key_disable(&tracepoint_printk_key.key);
2971
2972  out:
2973         mutex_unlock(&tracepoint_printk_mutex);
2974
2975         return ret;
2976 }
2977
2978 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2979 {
2980         enum event_trigger_type tt = ETT_NONE;
2981         struct trace_event_file *file = fbuffer->trace_file;
2982
2983         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2984                         fbuffer->entry, &tt))
2985                 goto discard;
2986
2987         if (static_key_false(&tracepoint_printk_key.key))
2988                 output_printk(fbuffer);
2989
2990         if (static_branch_unlikely(&trace_event_exports_enabled))
2991                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2992
2993         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2994                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2995
2996 discard:
2997         if (tt)
2998                 event_triggers_post_call(file, tt);
2999
3000 }
3001 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3002
3003 /*
3004  * Skip 3:
3005  *
3006  *   trace_buffer_unlock_commit_regs()
3007  *   trace_event_buffer_commit()
3008  *   trace_event_raw_event_xxx()
3009  */
3010 # define STACK_SKIP 3
3011
3012 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3013                                      struct trace_buffer *buffer,
3014                                      struct ring_buffer_event *event,
3015                                      unsigned int trace_ctx,
3016                                      struct pt_regs *regs)
3017 {
3018         __buffer_unlock_commit(buffer, event);
3019
3020         /*
3021          * If regs is not set, then skip the necessary functions.
3022          * Note, we can still get here via blktrace, wakeup tracer
3023          * and mmiotrace, but that's ok if they lose a function or
3024          * two. They are not that meaningful.
3025          */
3026         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3027         ftrace_trace_userstack(tr, buffer, trace_ctx);
3028 }
3029
3030 /*
3031  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3032  */
3033 void
3034 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3035                                    struct ring_buffer_event *event)
3036 {
3037         __buffer_unlock_commit(buffer, event);
3038 }
3039
3040 void
3041 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3042                parent_ip, unsigned int trace_ctx)
3043 {
3044         struct trace_event_call *call = &event_function;
3045         struct trace_buffer *buffer = tr->array_buffer.buffer;
3046         struct ring_buffer_event *event;
3047         struct ftrace_entry *entry;
3048
3049         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3050                                             trace_ctx);
3051         if (!event)
3052                 return;
3053         entry   = ring_buffer_event_data(event);
3054         entry->ip                       = ip;
3055         entry->parent_ip                = parent_ip;
3056
3057         if (!call_filter_check_discard(call, entry, buffer, event)) {
3058                 if (static_branch_unlikely(&trace_function_exports_enabled))
3059                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3060                 __buffer_unlock_commit(buffer, event);
3061         }
3062 }
3063
3064 #ifdef CONFIG_STACKTRACE
3065
3066 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3067 #define FTRACE_KSTACK_NESTING   4
3068
3069 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3070
3071 struct ftrace_stack {
3072         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3073 };
3074
3075
3076 struct ftrace_stacks {
3077         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3078 };
3079
3080 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3081 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3082
3083 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3084                                  unsigned int trace_ctx,
3085                                  int skip, struct pt_regs *regs)
3086 {
3087         struct trace_event_call *call = &event_kernel_stack;
3088         struct ring_buffer_event *event;
3089         unsigned int size, nr_entries;
3090         struct ftrace_stack *fstack;
3091         struct stack_entry *entry;
3092         int stackidx;
3093
3094         /*
3095          * Add one, for this function and the call to save_stack_trace()
3096          * If regs is set, then these functions will not be in the way.
3097          */
3098 #ifndef CONFIG_UNWINDER_ORC
3099         if (!regs)
3100                 skip++;
3101 #endif
3102
3103         preempt_disable_notrace();
3104
3105         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3106
3107         /* This should never happen. If it does, yell once and skip */
3108         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3109                 goto out;
3110
3111         /*
3112          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3113          * interrupt will either see the value pre increment or post
3114          * increment. If the interrupt happens pre increment it will have
3115          * restored the counter when it returns.  We just need a barrier to
3116          * keep gcc from moving things around.
3117          */
3118         barrier();
3119
3120         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3121         size = ARRAY_SIZE(fstack->calls);
3122
3123         if (regs) {
3124                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3125                                                    size, skip);
3126         } else {
3127                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3128         }
3129
3130         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3131                                     struct_size(entry, caller, nr_entries),
3132                                     trace_ctx);
3133         if (!event)
3134                 goto out;
3135         entry = ring_buffer_event_data(event);
3136
3137         entry->size = nr_entries;
3138         memcpy(&entry->caller, fstack->calls,
3139                flex_array_size(entry, caller, nr_entries));
3140
3141         if (!call_filter_check_discard(call, entry, buffer, event))
3142                 __buffer_unlock_commit(buffer, event);
3143
3144  out:
3145         /* Again, don't let gcc optimize things here */
3146         barrier();
3147         __this_cpu_dec(ftrace_stack_reserve);
3148         preempt_enable_notrace();
3149
3150 }
3151
3152 static inline void ftrace_trace_stack(struct trace_array *tr,
3153                                       struct trace_buffer *buffer,
3154                                       unsigned int trace_ctx,
3155                                       int skip, struct pt_regs *regs)
3156 {
3157         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3158                 return;
3159
3160         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3161 }
3162
3163 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3164                    int skip)
3165 {
3166         struct trace_buffer *buffer = tr->array_buffer.buffer;
3167
3168         if (rcu_is_watching()) {
3169                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3170                 return;
3171         }
3172
3173         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3174                 return;
3175
3176         /*
3177          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3178          * but if the above rcu_is_watching() failed, then the NMI
3179          * triggered someplace critical, and ct_irq_enter() should
3180          * not be called from NMI.
3181          */
3182         if (unlikely(in_nmi()))
3183                 return;
3184
3185         ct_irq_enter_irqson();
3186         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3187         ct_irq_exit_irqson();
3188 }
3189
3190 /**
3191  * trace_dump_stack - record a stack back trace in the trace buffer
3192  * @skip: Number of functions to skip (helper handlers)
3193  */
3194 void trace_dump_stack(int skip)
3195 {
3196         if (tracing_disabled || tracing_selftest_running)
3197                 return;
3198
3199 #ifndef CONFIG_UNWINDER_ORC
3200         /* Skip 1 to skip this function. */
3201         skip++;
3202 #endif
3203         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3204                              tracing_gen_ctx(), skip, NULL);
3205 }
3206 EXPORT_SYMBOL_GPL(trace_dump_stack);
3207
3208 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3209 static DEFINE_PER_CPU(int, user_stack_count);
3210
3211 static void
3212 ftrace_trace_userstack(struct trace_array *tr,
3213                        struct trace_buffer *buffer, unsigned int trace_ctx)
3214 {
3215         struct trace_event_call *call = &event_user_stack;
3216         struct ring_buffer_event *event;
3217         struct userstack_entry *entry;
3218
3219         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3220                 return;
3221
3222         /*
3223          * NMIs can not handle page faults, even with fix ups.
3224          * The save user stack can (and often does) fault.
3225          */
3226         if (unlikely(in_nmi()))
3227                 return;
3228
3229         /*
3230          * prevent recursion, since the user stack tracing may
3231          * trigger other kernel events.
3232          */
3233         preempt_disable();
3234         if (__this_cpu_read(user_stack_count))
3235                 goto out;
3236
3237         __this_cpu_inc(user_stack_count);
3238
3239         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3240                                             sizeof(*entry), trace_ctx);
3241         if (!event)
3242                 goto out_drop_count;
3243         entry   = ring_buffer_event_data(event);
3244
3245         entry->tgid             = current->tgid;
3246         memset(&entry->caller, 0, sizeof(entry->caller));
3247
3248         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3249         if (!call_filter_check_discard(call, entry, buffer, event))
3250                 __buffer_unlock_commit(buffer, event);
3251
3252  out_drop_count:
3253         __this_cpu_dec(user_stack_count);
3254  out:
3255         preempt_enable();
3256 }
3257 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3258 static void ftrace_trace_userstack(struct trace_array *tr,
3259                                    struct trace_buffer *buffer,
3260                                    unsigned int trace_ctx)
3261 {
3262 }
3263 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3264
3265 #endif /* CONFIG_STACKTRACE */
3266
3267 static inline void
3268 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3269                           unsigned long long delta)
3270 {
3271         entry->bottom_delta_ts = delta & U32_MAX;
3272         entry->top_delta_ts = (delta >> 32);
3273 }
3274
3275 void trace_last_func_repeats(struct trace_array *tr,
3276                              struct trace_func_repeats *last_info,
3277                              unsigned int trace_ctx)
3278 {
3279         struct trace_buffer *buffer = tr->array_buffer.buffer;
3280         struct func_repeats_entry *entry;
3281         struct ring_buffer_event *event;
3282         u64 delta;
3283
3284         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3285                                             sizeof(*entry), trace_ctx);
3286         if (!event)
3287                 return;
3288
3289         delta = ring_buffer_event_time_stamp(buffer, event) -
3290                 last_info->ts_last_call;
3291
3292         entry = ring_buffer_event_data(event);
3293         entry->ip = last_info->ip;
3294         entry->parent_ip = last_info->parent_ip;
3295         entry->count = last_info->count;
3296         func_repeats_set_delta_ts(entry, delta);
3297
3298         __buffer_unlock_commit(buffer, event);
3299 }
3300
3301 /* created for use with alloc_percpu */
3302 struct trace_buffer_struct {
3303         int nesting;
3304         char buffer[4][TRACE_BUF_SIZE];
3305 };
3306
3307 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3308
3309 /*
3310  * This allows for lockless recording.  If we're nested too deeply, then
3311  * this returns NULL.
3312  */
3313 static char *get_trace_buf(void)
3314 {
3315         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3316
3317         if (!trace_percpu_buffer || buffer->nesting >= 4)
3318                 return NULL;
3319
3320         buffer->nesting++;
3321
3322         /* Interrupts must see nesting incremented before we use the buffer */
3323         barrier();
3324         return &buffer->buffer[buffer->nesting - 1][0];
3325 }
3326
3327 static void put_trace_buf(void)
3328 {
3329         /* Don't let the decrement of nesting leak before this */
3330         barrier();
3331         this_cpu_dec(trace_percpu_buffer->nesting);
3332 }
3333
3334 static int alloc_percpu_trace_buffer(void)
3335 {
3336         struct trace_buffer_struct __percpu *buffers;
3337
3338         if (trace_percpu_buffer)
3339                 return 0;
3340
3341         buffers = alloc_percpu(struct trace_buffer_struct);
3342         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3343                 return -ENOMEM;
3344
3345         trace_percpu_buffer = buffers;
3346         return 0;
3347 }
3348
3349 static int buffers_allocated;
3350
3351 void trace_printk_init_buffers(void)
3352 {
3353         if (buffers_allocated)
3354                 return;
3355
3356         if (alloc_percpu_trace_buffer())
3357                 return;
3358
3359         /* trace_printk() is for debug use only. Don't use it in production. */
3360
3361         pr_warn("\n");
3362         pr_warn("**********************************************************\n");
3363         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3364         pr_warn("**                                                      **\n");
3365         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3366         pr_warn("**                                                      **\n");
3367         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3368         pr_warn("** unsafe for production use.                           **\n");
3369         pr_warn("**                                                      **\n");
3370         pr_warn("** If you see this message and you are not debugging    **\n");
3371         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3372         pr_warn("**                                                      **\n");
3373         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3374         pr_warn("**********************************************************\n");
3375
3376         /* Expand the buffers to set size */
3377         tracing_update_buffers();
3378
3379         buffers_allocated = 1;
3380
3381         /*
3382          * trace_printk_init_buffers() can be called by modules.
3383          * If that happens, then we need to start cmdline recording
3384          * directly here. If the global_trace.buffer is already
3385          * allocated here, then this was called by module code.
3386          */
3387         if (global_trace.array_buffer.buffer)
3388                 tracing_start_cmdline_record();
3389 }
3390 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3391
3392 void trace_printk_start_comm(void)
3393 {
3394         /* Start tracing comms if trace printk is set */
3395         if (!buffers_allocated)
3396                 return;
3397         tracing_start_cmdline_record();
3398 }
3399
3400 static void trace_printk_start_stop_comm(int enabled)
3401 {
3402         if (!buffers_allocated)
3403                 return;
3404
3405         if (enabled)
3406                 tracing_start_cmdline_record();
3407         else
3408                 tracing_stop_cmdline_record();
3409 }
3410
3411 /**
3412  * trace_vbprintk - write binary msg to tracing buffer
3413  * @ip:    The address of the caller
3414  * @fmt:   The string format to write to the buffer
3415  * @args:  Arguments for @fmt
3416  */
3417 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3418 {
3419         struct trace_event_call *call = &event_bprint;
3420         struct ring_buffer_event *event;
3421         struct trace_buffer *buffer;
3422         struct trace_array *tr = &global_trace;
3423         struct bprint_entry *entry;
3424         unsigned int trace_ctx;
3425         char *tbuffer;
3426         int len = 0, size;
3427
3428         if (unlikely(tracing_selftest_running || tracing_disabled))
3429                 return 0;
3430
3431         /* Don't pollute graph traces with trace_vprintk internals */
3432         pause_graph_tracing();
3433
3434         trace_ctx = tracing_gen_ctx();
3435         preempt_disable_notrace();
3436
3437         tbuffer = get_trace_buf();
3438         if (!tbuffer) {
3439                 len = 0;
3440                 goto out_nobuffer;
3441         }
3442
3443         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3444
3445         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3446                 goto out_put;
3447
3448         size = sizeof(*entry) + sizeof(u32) * len;
3449         buffer = tr->array_buffer.buffer;
3450         ring_buffer_nest_start(buffer);
3451         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3452                                             trace_ctx);
3453         if (!event)
3454                 goto out;
3455         entry = ring_buffer_event_data(event);
3456         entry->ip                       = ip;
3457         entry->fmt                      = fmt;
3458
3459         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3460         if (!call_filter_check_discard(call, entry, buffer, event)) {
3461                 __buffer_unlock_commit(buffer, event);
3462                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3463         }
3464
3465 out:
3466         ring_buffer_nest_end(buffer);
3467 out_put:
3468         put_trace_buf();
3469
3470 out_nobuffer:
3471         preempt_enable_notrace();
3472         unpause_graph_tracing();
3473
3474         return len;
3475 }
3476 EXPORT_SYMBOL_GPL(trace_vbprintk);
3477
3478 __printf(3, 0)
3479 static int
3480 __trace_array_vprintk(struct trace_buffer *buffer,
3481                       unsigned long ip, const char *fmt, va_list args)
3482 {
3483         struct trace_event_call *call = &event_print;
3484         struct ring_buffer_event *event;
3485         int len = 0, size;
3486         struct print_entry *entry;
3487         unsigned int trace_ctx;
3488         char *tbuffer;
3489
3490         if (tracing_disabled)
3491                 return 0;
3492
3493         /* Don't pollute graph traces with trace_vprintk internals */
3494         pause_graph_tracing();
3495
3496         trace_ctx = tracing_gen_ctx();
3497         preempt_disable_notrace();
3498
3499
3500         tbuffer = get_trace_buf();
3501         if (!tbuffer) {
3502                 len = 0;
3503                 goto out_nobuffer;
3504         }
3505
3506         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3507
3508         size = sizeof(*entry) + len + 1;
3509         ring_buffer_nest_start(buffer);
3510         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3511                                             trace_ctx);
3512         if (!event)
3513                 goto out;
3514         entry = ring_buffer_event_data(event);
3515         entry->ip = ip;
3516
3517         memcpy(&entry->buf, tbuffer, len + 1);
3518         if (!call_filter_check_discard(call, entry, buffer, event)) {
3519                 __buffer_unlock_commit(buffer, event);
3520                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3521         }
3522
3523 out:
3524         ring_buffer_nest_end(buffer);
3525         put_trace_buf();
3526
3527 out_nobuffer:
3528         preempt_enable_notrace();
3529         unpause_graph_tracing();
3530
3531         return len;
3532 }
3533
3534 __printf(3, 0)
3535 int trace_array_vprintk(struct trace_array *tr,
3536                         unsigned long ip, const char *fmt, va_list args)
3537 {
3538         if (tracing_selftest_running && tr == &global_trace)
3539                 return 0;
3540
3541         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3542 }
3543
3544 /**
3545  * trace_array_printk - Print a message to a specific instance
3546  * @tr: The instance trace_array descriptor
3547  * @ip: The instruction pointer that this is called from.
3548  * @fmt: The format to print (printf format)
3549  *
3550  * If a subsystem sets up its own instance, they have the right to
3551  * printk strings into their tracing instance buffer using this
3552  * function. Note, this function will not write into the top level
3553  * buffer (use trace_printk() for that), as writing into the top level
3554  * buffer should only have events that can be individually disabled.
3555  * trace_printk() is only used for debugging a kernel, and should not
3556  * be ever incorporated in normal use.
3557  *
3558  * trace_array_printk() can be used, as it will not add noise to the
3559  * top level tracing buffer.
3560  *
3561  * Note, trace_array_init_printk() must be called on @tr before this
3562  * can be used.
3563  */
3564 __printf(3, 0)
3565 int trace_array_printk(struct trace_array *tr,
3566                        unsigned long ip, const char *fmt, ...)
3567 {
3568         int ret;
3569         va_list ap;
3570
3571         if (!tr)
3572                 return -ENOENT;
3573
3574         /* This is only allowed for created instances */
3575         if (tr == &global_trace)
3576                 return 0;
3577
3578         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3579                 return 0;
3580
3581         va_start(ap, fmt);
3582         ret = trace_array_vprintk(tr, ip, fmt, ap);
3583         va_end(ap);
3584         return ret;
3585 }
3586 EXPORT_SYMBOL_GPL(trace_array_printk);
3587
3588 /**
3589  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3590  * @tr: The trace array to initialize the buffers for
3591  *
3592  * As trace_array_printk() only writes into instances, they are OK to
3593  * have in the kernel (unlike trace_printk()). This needs to be called
3594  * before trace_array_printk() can be used on a trace_array.
3595  */
3596 int trace_array_init_printk(struct trace_array *tr)
3597 {
3598         if (!tr)
3599                 return -ENOENT;
3600
3601         /* This is only allowed for created instances */
3602         if (tr == &global_trace)
3603                 return -EINVAL;
3604
3605         return alloc_percpu_trace_buffer();
3606 }
3607 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3608
3609 __printf(3, 4)
3610 int trace_array_printk_buf(struct trace_buffer *buffer,
3611                            unsigned long ip, const char *fmt, ...)
3612 {
3613         int ret;
3614         va_list ap;
3615
3616         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3617                 return 0;
3618
3619         va_start(ap, fmt);
3620         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3621         va_end(ap);
3622         return ret;
3623 }
3624
3625 __printf(2, 0)
3626 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3627 {
3628         return trace_array_vprintk(&global_trace, ip, fmt, args);
3629 }
3630 EXPORT_SYMBOL_GPL(trace_vprintk);
3631
3632 static void trace_iterator_increment(struct trace_iterator *iter)
3633 {
3634         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3635
3636         iter->idx++;
3637         if (buf_iter)
3638                 ring_buffer_iter_advance(buf_iter);
3639 }
3640
3641 static struct trace_entry *
3642 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3643                 unsigned long *lost_events)
3644 {
3645         struct ring_buffer_event *event;
3646         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3647
3648         if (buf_iter) {
3649                 event = ring_buffer_iter_peek(buf_iter, ts);
3650                 if (lost_events)
3651                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3652                                 (unsigned long)-1 : 0;
3653         } else {
3654                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3655                                          lost_events);
3656         }
3657
3658         if (event) {
3659                 iter->ent_size = ring_buffer_event_length(event);
3660                 return ring_buffer_event_data(event);
3661         }
3662         iter->ent_size = 0;
3663         return NULL;
3664 }
3665
3666 static struct trace_entry *
3667 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3668                   unsigned long *missing_events, u64 *ent_ts)
3669 {
3670         struct trace_buffer *buffer = iter->array_buffer->buffer;
3671         struct trace_entry *ent, *next = NULL;
3672         unsigned long lost_events = 0, next_lost = 0;
3673         int cpu_file = iter->cpu_file;
3674         u64 next_ts = 0, ts;
3675         int next_cpu = -1;
3676         int next_size = 0;
3677         int cpu;
3678
3679         /*
3680          * If we are in a per_cpu trace file, don't bother by iterating over
3681          * all cpu and peek directly.
3682          */
3683         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3684                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3685                         return NULL;
3686                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3687                 if (ent_cpu)
3688                         *ent_cpu = cpu_file;
3689
3690                 return ent;
3691         }
3692
3693         for_each_tracing_cpu(cpu) {
3694
3695                 if (ring_buffer_empty_cpu(buffer, cpu))
3696                         continue;
3697
3698                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3699
3700                 /*
3701                  * Pick the entry with the smallest timestamp:
3702                  */
3703                 if (ent && (!next || ts < next_ts)) {
3704                         next = ent;
3705                         next_cpu = cpu;
3706                         next_ts = ts;
3707                         next_lost = lost_events;
3708                         next_size = iter->ent_size;
3709                 }
3710         }
3711
3712         iter->ent_size = next_size;
3713
3714         if (ent_cpu)
3715                 *ent_cpu = next_cpu;
3716
3717         if (ent_ts)
3718                 *ent_ts = next_ts;
3719
3720         if (missing_events)
3721                 *missing_events = next_lost;
3722
3723         return next;
3724 }
3725
3726 #define STATIC_FMT_BUF_SIZE     128
3727 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3728
3729 char *trace_iter_expand_format(struct trace_iterator *iter)
3730 {
3731         char *tmp;
3732
3733         /*
3734          * iter->tr is NULL when used with tp_printk, which makes
3735          * this get called where it is not safe to call krealloc().
3736          */
3737         if (!iter->tr || iter->fmt == static_fmt_buf)
3738                 return NULL;
3739
3740         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3741                        GFP_KERNEL);
3742         if (tmp) {
3743                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3744                 iter->fmt = tmp;
3745         }
3746
3747         return tmp;
3748 }
3749
3750 /* Returns true if the string is safe to dereference from an event */
3751 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3752                            bool star, int len)
3753 {
3754         unsigned long addr = (unsigned long)str;
3755         struct trace_event *trace_event;
3756         struct trace_event_call *event;
3757
3758         /* Ignore strings with no length */
3759         if (star && !len)
3760                 return true;
3761
3762         /* OK if part of the event data */
3763         if ((addr >= (unsigned long)iter->ent) &&
3764             (addr < (unsigned long)iter->ent + iter->ent_size))
3765                 return true;
3766
3767         /* OK if part of the temp seq buffer */
3768         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3769             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3770                 return true;
3771
3772         /* Core rodata can not be freed */
3773         if (is_kernel_rodata(addr))
3774                 return true;
3775
3776         if (trace_is_tracepoint_string(str))
3777                 return true;
3778
3779         /*
3780          * Now this could be a module event, referencing core module
3781          * data, which is OK.
3782          */
3783         if (!iter->ent)
3784                 return false;
3785
3786         trace_event = ftrace_find_event(iter->ent->type);
3787         if (!trace_event)
3788                 return false;
3789
3790         event = container_of(trace_event, struct trace_event_call, event);
3791         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3792                 return false;
3793
3794         /* Would rather have rodata, but this will suffice */
3795         if (within_module_core(addr, event->module))
3796                 return true;
3797
3798         return false;
3799 }
3800
3801 static const char *show_buffer(struct trace_seq *s)
3802 {
3803         struct seq_buf *seq = &s->seq;
3804
3805         seq_buf_terminate(seq);
3806
3807         return seq->buffer;
3808 }
3809
3810 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3811
3812 static int test_can_verify_check(const char *fmt, ...)
3813 {
3814         char buf[16];
3815         va_list ap;
3816         int ret;
3817
3818         /*
3819          * The verifier is dependent on vsnprintf() modifies the va_list
3820          * passed to it, where it is sent as a reference. Some architectures
3821          * (like x86_32) passes it by value, which means that vsnprintf()
3822          * does not modify the va_list passed to it, and the verifier
3823          * would then need to be able to understand all the values that
3824          * vsnprintf can use. If it is passed by value, then the verifier
3825          * is disabled.
3826          */
3827         va_start(ap, fmt);
3828         vsnprintf(buf, 16, "%d", ap);
3829         ret = va_arg(ap, int);
3830         va_end(ap);
3831
3832         return ret;
3833 }
3834
3835 static void test_can_verify(void)
3836 {
3837         if (!test_can_verify_check("%d %d", 0, 1)) {
3838                 pr_info("trace event string verifier disabled\n");
3839                 static_branch_inc(&trace_no_verify);
3840         }
3841 }
3842
3843 /**
3844  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3845  * @iter: The iterator that holds the seq buffer and the event being printed
3846  * @fmt: The format used to print the event
3847  * @ap: The va_list holding the data to print from @fmt.
3848  *
3849  * This writes the data into the @iter->seq buffer using the data from
3850  * @fmt and @ap. If the format has a %s, then the source of the string
3851  * is examined to make sure it is safe to print, otherwise it will
3852  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3853  * pointer.
3854  */
3855 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3856                          va_list ap)
3857 {
3858         const char *p = fmt;
3859         const char *str;
3860         int i, j;
3861
3862         if (WARN_ON_ONCE(!fmt))
3863                 return;
3864
3865         if (static_branch_unlikely(&trace_no_verify))
3866                 goto print;
3867
3868         /* Don't bother checking when doing a ftrace_dump() */
3869         if (iter->fmt == static_fmt_buf)
3870                 goto print;
3871
3872         while (*p) {
3873                 bool star = false;
3874                 int len = 0;
3875
3876                 j = 0;
3877
3878                 /* We only care about %s and variants */
3879                 for (i = 0; p[i]; i++) {
3880                         if (i + 1 >= iter->fmt_size) {
3881                                 /*
3882                                  * If we can't expand the copy buffer,
3883                                  * just print it.
3884                                  */
3885                                 if (!trace_iter_expand_format(iter))
3886                                         goto print;
3887                         }
3888
3889                         if (p[i] == '\\' && p[i+1]) {
3890                                 i++;
3891                                 continue;
3892                         }
3893                         if (p[i] == '%') {
3894                                 /* Need to test cases like %08.*s */
3895                                 for (j = 1; p[i+j]; j++) {
3896                                         if (isdigit(p[i+j]) ||
3897                                             p[i+j] == '.')
3898                                                 continue;
3899                                         if (p[i+j] == '*') {
3900                                                 star = true;
3901                                                 continue;
3902                                         }
3903                                         break;
3904                                 }
3905                                 if (p[i+j] == 's')
3906                                         break;
3907                                 star = false;
3908                         }
3909                         j = 0;
3910                 }
3911                 /* If no %s found then just print normally */
3912                 if (!p[i])
3913                         break;
3914
3915                 /* Copy up to the %s, and print that */
3916                 strncpy(iter->fmt, p, i);
3917                 iter->fmt[i] = '\0';
3918                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3919
3920                 /*
3921                  * If iter->seq is full, the above call no longer guarantees
3922                  * that ap is in sync with fmt processing, and further calls
3923                  * to va_arg() can return wrong positional arguments.
3924                  *
3925                  * Ensure that ap is no longer used in this case.
3926                  */
3927                 if (iter->seq.full) {
3928                         p = "";
3929                         break;
3930                 }
3931
3932                 if (star)
3933                         len = va_arg(ap, int);
3934
3935                 /* The ap now points to the string data of the %s */
3936                 str = va_arg(ap, const char *);
3937
3938                 /*
3939                  * If you hit this warning, it is likely that the
3940                  * trace event in question used %s on a string that
3941                  * was saved at the time of the event, but may not be
3942                  * around when the trace is read. Use __string(),
3943                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3944                  * instead. See samples/trace_events/trace-events-sample.h
3945                  * for reference.
3946                  */
3947                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3948                               "fmt: '%s' current_buffer: '%s'",
3949                               fmt, show_buffer(&iter->seq))) {
3950                         int ret;
3951
3952                         /* Try to safely read the string */
3953                         if (star) {
3954                                 if (len + 1 > iter->fmt_size)
3955                                         len = iter->fmt_size - 1;
3956                                 if (len < 0)
3957                                         len = 0;
3958                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3959                                 iter->fmt[len] = 0;
3960                                 star = false;
3961                         } else {
3962                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3963                                                                   iter->fmt_size);
3964                         }
3965                         if (ret < 0)
3966                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3967                         else
3968                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3969                                                  str, iter->fmt);
3970                         str = "[UNSAFE-MEMORY]";
3971                         strcpy(iter->fmt, "%s");
3972                 } else {
3973                         strncpy(iter->fmt, p + i, j + 1);
3974                         iter->fmt[j+1] = '\0';
3975                 }
3976                 if (star)
3977                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3978                 else
3979                         trace_seq_printf(&iter->seq, iter->fmt, str);
3980
3981                 p += i + j + 1;
3982         }
3983  print:
3984         if (*p)
3985                 trace_seq_vprintf(&iter->seq, p, ap);
3986 }
3987
3988 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3989 {
3990         const char *p, *new_fmt;
3991         char *q;
3992
3993         if (WARN_ON_ONCE(!fmt))
3994                 return fmt;
3995
3996         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3997                 return fmt;
3998
3999         p = fmt;
4000         new_fmt = q = iter->fmt;
4001         while (*p) {
4002                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4003                         if (!trace_iter_expand_format(iter))
4004                                 return fmt;
4005
4006                         q += iter->fmt - new_fmt;
4007                         new_fmt = iter->fmt;
4008                 }
4009
4010                 *q++ = *p++;
4011
4012                 /* Replace %p with %px */
4013                 if (p[-1] == '%') {
4014                         if (p[0] == '%') {
4015                                 *q++ = *p++;
4016                         } else if (p[0] == 'p' && !isalnum(p[1])) {
4017                                 *q++ = *p++;
4018                                 *q++ = 'x';
4019                         }
4020                 }
4021         }
4022         *q = '\0';
4023
4024         return new_fmt;
4025 }
4026
4027 #define STATIC_TEMP_BUF_SIZE    128
4028 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4029
4030 /* Find the next real entry, without updating the iterator itself */
4031 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4032                                           int *ent_cpu, u64 *ent_ts)
4033 {
4034         /* __find_next_entry will reset ent_size */
4035         int ent_size = iter->ent_size;
4036         struct trace_entry *entry;
4037
4038         /*
4039          * If called from ftrace_dump(), then the iter->temp buffer
4040          * will be the static_temp_buf and not created from kmalloc.
4041          * If the entry size is greater than the buffer, we can
4042          * not save it. Just return NULL in that case. This is only
4043          * used to add markers when two consecutive events' time
4044          * stamps have a large delta. See trace_print_lat_context()
4045          */
4046         if (iter->temp == static_temp_buf &&
4047             STATIC_TEMP_BUF_SIZE < ent_size)
4048                 return NULL;
4049
4050         /*
4051          * The __find_next_entry() may call peek_next_entry(), which may
4052          * call ring_buffer_peek() that may make the contents of iter->ent
4053          * undefined. Need to copy iter->ent now.
4054          */
4055         if (iter->ent && iter->ent != iter->temp) {
4056                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4057                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4058                         void *temp;
4059                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4060                         if (!temp)
4061                                 return NULL;
4062                         kfree(iter->temp);
4063                         iter->temp = temp;
4064                         iter->temp_size = iter->ent_size;
4065                 }
4066                 memcpy(iter->temp, iter->ent, iter->ent_size);
4067                 iter->ent = iter->temp;
4068         }
4069         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4070         /* Put back the original ent_size */
4071         iter->ent_size = ent_size;
4072
4073         return entry;
4074 }
4075
4076 /* Find the next real entry, and increment the iterator to the next entry */
4077 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4078 {
4079         iter->ent = __find_next_entry(iter, &iter->cpu,
4080                                       &iter->lost_events, &iter->ts);
4081
4082         if (iter->ent)
4083                 trace_iterator_increment(iter);
4084
4085         return iter->ent ? iter : NULL;
4086 }
4087
4088 static void trace_consume(struct trace_iterator *iter)
4089 {
4090         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4091                             &iter->lost_events);
4092 }
4093
4094 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4095 {
4096         struct trace_iterator *iter = m->private;
4097         int i = (int)*pos;
4098         void *ent;
4099
4100         WARN_ON_ONCE(iter->leftover);
4101
4102         (*pos)++;
4103
4104         /* can't go backwards */
4105         if (iter->idx > i)
4106                 return NULL;
4107
4108         if (iter->idx < 0)
4109                 ent = trace_find_next_entry_inc(iter);
4110         else
4111                 ent = iter;
4112
4113         while (ent && iter->idx < i)
4114                 ent = trace_find_next_entry_inc(iter);
4115
4116         iter->pos = *pos;
4117
4118         return ent;
4119 }
4120
4121 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4122 {
4123         struct ring_buffer_iter *buf_iter;
4124         unsigned long entries = 0;
4125         u64 ts;
4126
4127         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4128
4129         buf_iter = trace_buffer_iter(iter, cpu);
4130         if (!buf_iter)
4131                 return;
4132
4133         ring_buffer_iter_reset(buf_iter);
4134
4135         /*
4136          * We could have the case with the max latency tracers
4137          * that a reset never took place on a cpu. This is evident
4138          * by the timestamp being before the start of the buffer.
4139          */
4140         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4141                 if (ts >= iter->array_buffer->time_start)
4142                         break;
4143                 entries++;
4144                 ring_buffer_iter_advance(buf_iter);
4145         }
4146
4147         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4148 }
4149
4150 /*
4151  * The current tracer is copied to avoid a global locking
4152  * all around.
4153  */
4154 static void *s_start(struct seq_file *m, loff_t *pos)
4155 {
4156         struct trace_iterator *iter = m->private;
4157         struct trace_array *tr = iter->tr;
4158         int cpu_file = iter->cpu_file;
4159         void *p = NULL;
4160         loff_t l = 0;
4161         int cpu;
4162
4163         mutex_lock(&trace_types_lock);
4164         if (unlikely(tr->current_trace != iter->trace)) {
4165                 /* Close iter->trace before switching to the new current tracer */
4166                 if (iter->trace->close)
4167                         iter->trace->close(iter);
4168                 iter->trace = tr->current_trace;
4169                 /* Reopen the new current tracer */
4170                 if (iter->trace->open)
4171                         iter->trace->open(iter);
4172         }
4173         mutex_unlock(&trace_types_lock);
4174
4175 #ifdef CONFIG_TRACER_MAX_TRACE
4176         if (iter->snapshot && iter->trace->use_max_tr)
4177                 return ERR_PTR(-EBUSY);
4178 #endif
4179
4180         if (*pos != iter->pos) {
4181                 iter->ent = NULL;
4182                 iter->cpu = 0;
4183                 iter->idx = -1;
4184
4185                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4186                         for_each_tracing_cpu(cpu)
4187                                 tracing_iter_reset(iter, cpu);
4188                 } else
4189                         tracing_iter_reset(iter, cpu_file);
4190
4191                 iter->leftover = 0;
4192                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4193                         ;
4194
4195         } else {
4196                 /*
4197                  * If we overflowed the seq_file before, then we want
4198                  * to just reuse the trace_seq buffer again.
4199                  */
4200                 if (iter->leftover)
4201                         p = iter;
4202                 else {
4203                         l = *pos - 1;
4204                         p = s_next(m, p, &l);
4205                 }
4206         }
4207
4208         trace_event_read_lock();
4209         trace_access_lock(cpu_file);
4210         return p;
4211 }
4212
4213 static void s_stop(struct seq_file *m, void *p)
4214 {
4215         struct trace_iterator *iter = m->private;
4216
4217 #ifdef CONFIG_TRACER_MAX_TRACE
4218         if (iter->snapshot && iter->trace->use_max_tr)
4219                 return;
4220 #endif
4221
4222         trace_access_unlock(iter->cpu_file);
4223         trace_event_read_unlock();
4224 }
4225
4226 static void
4227 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4228                       unsigned long *entries, int cpu)
4229 {
4230         unsigned long count;
4231
4232         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4233         /*
4234          * If this buffer has skipped entries, then we hold all
4235          * entries for the trace and we need to ignore the
4236          * ones before the time stamp.
4237          */
4238         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4239                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4240                 /* total is the same as the entries */
4241                 *total = count;
4242         } else
4243                 *total = count +
4244                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4245         *entries = count;
4246 }
4247
4248 static void
4249 get_total_entries(struct array_buffer *buf,
4250                   unsigned long *total, unsigned long *entries)
4251 {
4252         unsigned long t, e;
4253         int cpu;
4254
4255         *total = 0;
4256         *entries = 0;
4257
4258         for_each_tracing_cpu(cpu) {
4259                 get_total_entries_cpu(buf, &t, &e, cpu);
4260                 *total += t;
4261                 *entries += e;
4262         }
4263 }
4264
4265 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4266 {
4267         unsigned long total, entries;
4268
4269         if (!tr)
4270                 tr = &global_trace;
4271
4272         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4273
4274         return entries;
4275 }
4276
4277 unsigned long trace_total_entries(struct trace_array *tr)
4278 {
4279         unsigned long total, entries;
4280
4281         if (!tr)
4282                 tr = &global_trace;
4283
4284         get_total_entries(&tr->array_buffer, &total, &entries);
4285
4286         return entries;
4287 }
4288
4289 static void print_lat_help_header(struct seq_file *m)
4290 {
4291         seq_puts(m, "#                    _------=> CPU#            \n"
4292                     "#                   / _-----=> irqs-off/BH-disabled\n"
4293                     "#                  | / _----=> need-resched    \n"
4294                     "#                  || / _---=> hardirq/softirq \n"
4295                     "#                  ||| / _--=> preempt-depth   \n"
4296                     "#                  |||| / _-=> migrate-disable \n"
4297                     "#                  ||||| /     delay           \n"
4298                     "#  cmd     pid     |||||| time  |   caller     \n"
4299                     "#     \\   /        ||||||  \\    |    /       \n");
4300 }
4301
4302 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4303 {
4304         unsigned long total;
4305         unsigned long entries;
4306
4307         get_total_entries(buf, &total, &entries);
4308         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4309                    entries, total, num_online_cpus());
4310         seq_puts(m, "#\n");
4311 }
4312
4313 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4314                                    unsigned int flags)
4315 {
4316         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4317
4318         print_event_info(buf, m);
4319
4320         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4321         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4322 }
4323
4324 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4325                                        unsigned int flags)
4326 {
4327         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4328         static const char space[] = "            ";
4329         int prec = tgid ? 12 : 2;
4330
4331         print_event_info(buf, m);
4332
4333         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4334         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4335         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4336         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4337         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4338         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4339         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4340         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4341 }
4342
4343 void
4344 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4345 {
4346         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4347         struct array_buffer *buf = iter->array_buffer;
4348         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4349         struct tracer *type = iter->trace;
4350         unsigned long entries;
4351         unsigned long total;
4352         const char *name = type->name;
4353
4354         get_total_entries(buf, &total, &entries);
4355
4356         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4357                    name, UTS_RELEASE);
4358         seq_puts(m, "# -----------------------------------"
4359                  "---------------------------------\n");
4360         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4361                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4362                    nsecs_to_usecs(data->saved_latency),
4363                    entries,
4364                    total,
4365                    buf->cpu,
4366                    preempt_model_none()      ? "server" :
4367                    preempt_model_voluntary() ? "desktop" :
4368                    preempt_model_full()      ? "preempt" :
4369                    preempt_model_rt()        ? "preempt_rt" :
4370                    "unknown",
4371                    /* These are reserved for later use */
4372                    0, 0, 0, 0);
4373 #ifdef CONFIG_SMP
4374         seq_printf(m, " #P:%d)\n", num_online_cpus());
4375 #else
4376         seq_puts(m, ")\n");
4377 #endif
4378         seq_puts(m, "#    -----------------\n");
4379         seq_printf(m, "#    | task: %.16s-%d "
4380                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4381                    data->comm, data->pid,
4382                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4383                    data->policy, data->rt_priority);
4384         seq_puts(m, "#    -----------------\n");
4385
4386         if (data->critical_start) {
4387                 seq_puts(m, "#  => started at: ");
4388                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4389                 trace_print_seq(m, &iter->seq);
4390                 seq_puts(m, "\n#  => ended at:   ");
4391                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4392                 trace_print_seq(m, &iter->seq);
4393                 seq_puts(m, "\n#\n");
4394         }
4395
4396         seq_puts(m, "#\n");
4397 }
4398
4399 static void test_cpu_buff_start(struct trace_iterator *iter)
4400 {
4401         struct trace_seq *s = &iter->seq;
4402         struct trace_array *tr = iter->tr;
4403
4404         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4405                 return;
4406
4407         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4408                 return;
4409
4410         if (cpumask_available(iter->started) &&
4411             cpumask_test_cpu(iter->cpu, iter->started))
4412                 return;
4413
4414         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4415                 return;
4416
4417         if (cpumask_available(iter->started))
4418                 cpumask_set_cpu(iter->cpu, iter->started);
4419
4420         /* Don't print started cpu buffer for the first entry of the trace */
4421         if (iter->idx > 1)
4422                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4423                                 iter->cpu);
4424 }
4425
4426 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4427 {
4428         struct trace_array *tr = iter->tr;
4429         struct trace_seq *s = &iter->seq;
4430         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4431         struct trace_entry *entry;
4432         struct trace_event *event;
4433
4434         entry = iter->ent;
4435
4436         test_cpu_buff_start(iter);
4437
4438         event = ftrace_find_event(entry->type);
4439
4440         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4441                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4442                         trace_print_lat_context(iter);
4443                 else
4444                         trace_print_context(iter);
4445         }
4446
4447         if (trace_seq_has_overflowed(s))
4448                 return TRACE_TYPE_PARTIAL_LINE;
4449
4450         if (event) {
4451                 if (tr->trace_flags & TRACE_ITER_FIELDS)
4452                         return print_event_fields(iter, event);
4453                 return event->funcs->trace(iter, sym_flags, event);
4454         }
4455
4456         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4457
4458         return trace_handle_return(s);
4459 }
4460
4461 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4462 {
4463         struct trace_array *tr = iter->tr;
4464         struct trace_seq *s = &iter->seq;
4465         struct trace_entry *entry;
4466         struct trace_event *event;
4467
4468         entry = iter->ent;
4469
4470         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4471                 trace_seq_printf(s, "%d %d %llu ",
4472                                  entry->pid, iter->cpu, iter->ts);
4473
4474         if (trace_seq_has_overflowed(s))
4475                 return TRACE_TYPE_PARTIAL_LINE;
4476
4477         event = ftrace_find_event(entry->type);
4478         if (event)
4479                 return event->funcs->raw(iter, 0, event);
4480
4481         trace_seq_printf(s, "%d ?\n", entry->type);
4482
4483         return trace_handle_return(s);
4484 }
4485
4486 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4487 {
4488         struct trace_array *tr = iter->tr;
4489         struct trace_seq *s = &iter->seq;
4490         unsigned char newline = '\n';
4491         struct trace_entry *entry;
4492         struct trace_event *event;
4493
4494         entry = iter->ent;
4495
4496         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4497                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4498                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4499                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4500                 if (trace_seq_has_overflowed(s))
4501                         return TRACE_TYPE_PARTIAL_LINE;
4502         }
4503
4504         event = ftrace_find_event(entry->type);
4505         if (event) {
4506                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4507                 if (ret != TRACE_TYPE_HANDLED)
4508                         return ret;
4509         }
4510
4511         SEQ_PUT_FIELD(s, newline);
4512
4513         return trace_handle_return(s);
4514 }
4515
4516 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4517 {
4518         struct trace_array *tr = iter->tr;
4519         struct trace_seq *s = &iter->seq;
4520         struct trace_entry *entry;
4521         struct trace_event *event;
4522
4523         entry = iter->ent;
4524
4525         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4526                 SEQ_PUT_FIELD(s, entry->pid);
4527                 SEQ_PUT_FIELD(s, iter->cpu);
4528                 SEQ_PUT_FIELD(s, iter->ts);
4529                 if (trace_seq_has_overflowed(s))
4530                         return TRACE_TYPE_PARTIAL_LINE;
4531         }
4532
4533         event = ftrace_find_event(entry->type);
4534         return event ? event->funcs->binary(iter, 0, event) :
4535                 TRACE_TYPE_HANDLED;
4536 }
4537
4538 int trace_empty(struct trace_iterator *iter)
4539 {
4540         struct ring_buffer_iter *buf_iter;
4541         int cpu;
4542
4543         /* If we are looking at one CPU buffer, only check that one */
4544         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4545                 cpu = iter->cpu_file;
4546                 buf_iter = trace_buffer_iter(iter, cpu);
4547                 if (buf_iter) {
4548                         if (!ring_buffer_iter_empty(buf_iter))
4549                                 return 0;
4550                 } else {
4551                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4552                                 return 0;
4553                 }
4554                 return 1;
4555         }
4556
4557         for_each_tracing_cpu(cpu) {
4558                 buf_iter = trace_buffer_iter(iter, cpu);
4559                 if (buf_iter) {
4560                         if (!ring_buffer_iter_empty(buf_iter))
4561                                 return 0;
4562                 } else {
4563                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4564                                 return 0;
4565                 }
4566         }
4567
4568         return 1;
4569 }
4570
4571 /*  Called with trace_event_read_lock() held. */
4572 enum print_line_t print_trace_line(struct trace_iterator *iter)
4573 {
4574         struct trace_array *tr = iter->tr;
4575         unsigned long trace_flags = tr->trace_flags;
4576         enum print_line_t ret;
4577
4578         if (iter->lost_events) {
4579                 if (iter->lost_events == (unsigned long)-1)
4580                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4581                                          iter->cpu);
4582                 else
4583                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4584                                          iter->cpu, iter->lost_events);
4585                 if (trace_seq_has_overflowed(&iter->seq))
4586                         return TRACE_TYPE_PARTIAL_LINE;
4587         }
4588
4589         if (iter->trace && iter->trace->print_line) {
4590                 ret = iter->trace->print_line(iter);
4591                 if (ret != TRACE_TYPE_UNHANDLED)
4592                         return ret;
4593         }
4594
4595         if (iter->ent->type == TRACE_BPUTS &&
4596                         trace_flags & TRACE_ITER_PRINTK &&
4597                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4598                 return trace_print_bputs_msg_only(iter);
4599
4600         if (iter->ent->type == TRACE_BPRINT &&
4601                         trace_flags & TRACE_ITER_PRINTK &&
4602                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4603                 return trace_print_bprintk_msg_only(iter);
4604
4605         if (iter->ent->type == TRACE_PRINT &&
4606                         trace_flags & TRACE_ITER_PRINTK &&
4607                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4608                 return trace_print_printk_msg_only(iter);
4609
4610         if (trace_flags & TRACE_ITER_BIN)
4611                 return print_bin_fmt(iter);
4612
4613         if (trace_flags & TRACE_ITER_HEX)
4614                 return print_hex_fmt(iter);
4615
4616         if (trace_flags & TRACE_ITER_RAW)
4617                 return print_raw_fmt(iter);
4618
4619         return print_trace_fmt(iter);
4620 }
4621
4622 void trace_latency_header(struct seq_file *m)
4623 {
4624         struct trace_iterator *iter = m->private;
4625         struct trace_array *tr = iter->tr;
4626
4627         /* print nothing if the buffers are empty */
4628         if (trace_empty(iter))
4629                 return;
4630
4631         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4632                 print_trace_header(m, iter);
4633
4634         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4635                 print_lat_help_header(m);
4636 }
4637
4638 void trace_default_header(struct seq_file *m)
4639 {
4640         struct trace_iterator *iter = m->private;
4641         struct trace_array *tr = iter->tr;
4642         unsigned long trace_flags = tr->trace_flags;
4643
4644         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4645                 return;
4646
4647         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4648                 /* print nothing if the buffers are empty */
4649                 if (trace_empty(iter))
4650                         return;
4651                 print_trace_header(m, iter);
4652                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4653                         print_lat_help_header(m);
4654         } else {
4655                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4656                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4657                                 print_func_help_header_irq(iter->array_buffer,
4658                                                            m, trace_flags);
4659                         else
4660                                 print_func_help_header(iter->array_buffer, m,
4661                                                        trace_flags);
4662                 }
4663         }
4664 }
4665
4666 static void test_ftrace_alive(struct seq_file *m)
4667 {
4668         if (!ftrace_is_dead())
4669                 return;
4670         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4671                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4672 }
4673
4674 #ifdef CONFIG_TRACER_MAX_TRACE
4675 static void show_snapshot_main_help(struct seq_file *m)
4676 {
4677         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4678                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4679                     "#                      Takes a snapshot of the main buffer.\n"
4680                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4681                     "#                      (Doesn't have to be '2' works with any number that\n"
4682                     "#                       is not a '0' or '1')\n");
4683 }
4684
4685 static void show_snapshot_percpu_help(struct seq_file *m)
4686 {
4687         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4688 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4689         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4690                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4691 #else
4692         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4693                     "#                     Must use main snapshot file to allocate.\n");
4694 #endif
4695         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4696                     "#                      (Doesn't have to be '2' works with any number that\n"
4697                     "#                       is not a '0' or '1')\n");
4698 }
4699
4700 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4701 {
4702         if (iter->tr->allocated_snapshot)
4703                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4704         else
4705                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4706
4707         seq_puts(m, "# Snapshot commands:\n");
4708         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4709                 show_snapshot_main_help(m);
4710         else
4711                 show_snapshot_percpu_help(m);
4712 }
4713 #else
4714 /* Should never be called */
4715 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4716 #endif
4717
4718 static int s_show(struct seq_file *m, void *v)
4719 {
4720         struct trace_iterator *iter = v;
4721         int ret;
4722
4723         if (iter->ent == NULL) {
4724                 if (iter->tr) {
4725                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4726                         seq_puts(m, "#\n");
4727                         test_ftrace_alive(m);
4728                 }
4729                 if (iter->snapshot && trace_empty(iter))
4730                         print_snapshot_help(m, iter);
4731                 else if (iter->trace && iter->trace->print_header)
4732                         iter->trace->print_header(m);
4733                 else
4734                         trace_default_header(m);
4735
4736         } else if (iter->leftover) {
4737                 /*
4738                  * If we filled the seq_file buffer earlier, we
4739                  * want to just show it now.
4740                  */
4741                 ret = trace_print_seq(m, &iter->seq);
4742
4743                 /* ret should this time be zero, but you never know */
4744                 iter->leftover = ret;
4745
4746         } else {
4747                 print_trace_line(iter);
4748                 ret = trace_print_seq(m, &iter->seq);
4749                 /*
4750                  * If we overflow the seq_file buffer, then it will
4751                  * ask us for this data again at start up.
4752                  * Use that instead.
4753                  *  ret is 0 if seq_file write succeeded.
4754                  *        -1 otherwise.
4755                  */
4756                 iter->leftover = ret;
4757         }
4758
4759         return 0;
4760 }
4761
4762 /*
4763  * Should be used after trace_array_get(), trace_types_lock
4764  * ensures that i_cdev was already initialized.
4765  */
4766 static inline int tracing_get_cpu(struct inode *inode)
4767 {
4768         if (inode->i_cdev) /* See trace_create_cpu_file() */
4769                 return (long)inode->i_cdev - 1;
4770         return RING_BUFFER_ALL_CPUS;
4771 }
4772
4773 static const struct seq_operations tracer_seq_ops = {
4774         .start          = s_start,
4775         .next           = s_next,
4776         .stop           = s_stop,
4777         .show           = s_show,
4778 };
4779
4780 /*
4781  * Note, as iter itself can be allocated and freed in different
4782  * ways, this function is only used to free its content, and not
4783  * the iterator itself. The only requirement to all the allocations
4784  * is that it must zero all fields (kzalloc), as freeing works with
4785  * ethier allocated content or NULL.
4786  */
4787 static void free_trace_iter_content(struct trace_iterator *iter)
4788 {
4789         /* The fmt is either NULL, allocated or points to static_fmt_buf */
4790         if (iter->fmt != static_fmt_buf)
4791                 kfree(iter->fmt);
4792
4793         kfree(iter->temp);
4794         kfree(iter->buffer_iter);
4795         mutex_destroy(&iter->mutex);
4796         free_cpumask_var(iter->started);
4797 }
4798
4799 static struct trace_iterator *
4800 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4801 {
4802         struct trace_array *tr = inode->i_private;
4803         struct trace_iterator *iter;
4804         int cpu;
4805
4806         if (tracing_disabled)
4807                 return ERR_PTR(-ENODEV);
4808
4809         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4810         if (!iter)
4811                 return ERR_PTR(-ENOMEM);
4812
4813         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4814                                     GFP_KERNEL);
4815         if (!iter->buffer_iter)
4816                 goto release;
4817
4818         /*
4819          * trace_find_next_entry() may need to save off iter->ent.
4820          * It will place it into the iter->temp buffer. As most
4821          * events are less than 128, allocate a buffer of that size.
4822          * If one is greater, then trace_find_next_entry() will
4823          * allocate a new buffer to adjust for the bigger iter->ent.
4824          * It's not critical if it fails to get allocated here.
4825          */
4826         iter->temp = kmalloc(128, GFP_KERNEL);
4827         if (iter->temp)
4828                 iter->temp_size = 128;
4829
4830         /*
4831          * trace_event_printf() may need to modify given format
4832          * string to replace %p with %px so that it shows real address
4833          * instead of hash value. However, that is only for the event
4834          * tracing, other tracer may not need. Defer the allocation
4835          * until it is needed.
4836          */
4837         iter->fmt = NULL;
4838         iter->fmt_size = 0;
4839
4840         mutex_lock(&trace_types_lock);
4841         iter->trace = tr->current_trace;
4842
4843         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4844                 goto fail;
4845
4846         iter->tr = tr;
4847
4848 #ifdef CONFIG_TRACER_MAX_TRACE
4849         /* Currently only the top directory has a snapshot */
4850         if (tr->current_trace->print_max || snapshot)
4851                 iter->array_buffer = &tr->max_buffer;
4852         else
4853 #endif
4854                 iter->array_buffer = &tr->array_buffer;
4855         iter->snapshot = snapshot;
4856         iter->pos = -1;
4857         iter->cpu_file = tracing_get_cpu(inode);
4858         mutex_init(&iter->mutex);
4859
4860         /* Notify the tracer early; before we stop tracing. */
4861         if (iter->trace->open)
4862                 iter->trace->open(iter);
4863
4864         /* Annotate start of buffers if we had overruns */
4865         if (ring_buffer_overruns(iter->array_buffer->buffer))
4866                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4867
4868         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4869         if (trace_clocks[tr->clock_id].in_ns)
4870                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4871
4872         /*
4873          * If pause-on-trace is enabled, then stop the trace while
4874          * dumping, unless this is the "snapshot" file
4875          */
4876         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4877                 tracing_stop_tr(tr);
4878
4879         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4880                 for_each_tracing_cpu(cpu) {
4881                         iter->buffer_iter[cpu] =
4882                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4883                                                          cpu, GFP_KERNEL);
4884                 }
4885                 ring_buffer_read_prepare_sync();
4886                 for_each_tracing_cpu(cpu) {
4887                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4888                         tracing_iter_reset(iter, cpu);
4889                 }
4890         } else {
4891                 cpu = iter->cpu_file;
4892                 iter->buffer_iter[cpu] =
4893                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4894                                                  cpu, GFP_KERNEL);
4895                 ring_buffer_read_prepare_sync();
4896                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4897                 tracing_iter_reset(iter, cpu);
4898         }
4899
4900         mutex_unlock(&trace_types_lock);
4901
4902         return iter;
4903
4904  fail:
4905         mutex_unlock(&trace_types_lock);
4906         free_trace_iter_content(iter);
4907 release:
4908         seq_release_private(inode, file);
4909         return ERR_PTR(-ENOMEM);
4910 }
4911
4912 int tracing_open_generic(struct inode *inode, struct file *filp)
4913 {
4914         int ret;
4915
4916         ret = tracing_check_open_get_tr(NULL);
4917         if (ret)
4918                 return ret;
4919
4920         filp->private_data = inode->i_private;
4921         return 0;
4922 }
4923
4924 bool tracing_is_disabled(void)
4925 {
4926         return (tracing_disabled) ? true: false;
4927 }
4928
4929 /*
4930  * Open and update trace_array ref count.
4931  * Must have the current trace_array passed to it.
4932  */
4933 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4934 {
4935         struct trace_array *tr = inode->i_private;
4936         int ret;
4937
4938         ret = tracing_check_open_get_tr(tr);
4939         if (ret)
4940                 return ret;
4941
4942         filp->private_data = inode->i_private;
4943
4944         return 0;
4945 }
4946
4947 /*
4948  * The private pointer of the inode is the trace_event_file.
4949  * Update the tr ref count associated to it.
4950  */
4951 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4952 {
4953         struct trace_event_file *file = inode->i_private;
4954         int ret;
4955
4956         ret = tracing_check_open_get_tr(file->tr);
4957         if (ret)
4958                 return ret;
4959
4960         mutex_lock(&event_mutex);
4961
4962         /* Fail if the file is marked for removal */
4963         if (file->flags & EVENT_FILE_FL_FREED) {
4964                 trace_array_put(file->tr);
4965                 ret = -ENODEV;
4966         } else {
4967                 event_file_get(file);
4968         }
4969
4970         mutex_unlock(&event_mutex);
4971         if (ret)
4972                 return ret;
4973
4974         filp->private_data = inode->i_private;
4975
4976         return 0;
4977 }
4978
4979 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4980 {
4981         struct trace_event_file *file = inode->i_private;
4982
4983         trace_array_put(file->tr);
4984         event_file_put(file);
4985
4986         return 0;
4987 }
4988
4989 static int tracing_mark_open(struct inode *inode, struct file *filp)
4990 {
4991         stream_open(inode, filp);
4992         return tracing_open_generic_tr(inode, filp);
4993 }
4994
4995 static int tracing_release(struct inode *inode, struct file *file)
4996 {
4997         struct trace_array *tr = inode->i_private;
4998         struct seq_file *m = file->private_data;
4999         struct trace_iterator *iter;
5000         int cpu;
5001
5002         if (!(file->f_mode & FMODE_READ)) {
5003                 trace_array_put(tr);
5004                 return 0;
5005         }
5006
5007         /* Writes do not use seq_file */
5008         iter = m->private;
5009         mutex_lock(&trace_types_lock);
5010
5011         for_each_tracing_cpu(cpu) {
5012                 if (iter->buffer_iter[cpu])
5013                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
5014         }
5015
5016         if (iter->trace && iter->trace->close)
5017                 iter->trace->close(iter);
5018
5019         if (!iter->snapshot && tr->stop_count)
5020                 /* reenable tracing if it was previously enabled */
5021                 tracing_start_tr(tr);
5022
5023         __trace_array_put(tr);
5024
5025         mutex_unlock(&trace_types_lock);
5026
5027         free_trace_iter_content(iter);
5028         seq_release_private(inode, file);
5029
5030         return 0;
5031 }
5032
5033 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
5034 {
5035         struct trace_array *tr = inode->i_private;
5036
5037         trace_array_put(tr);
5038         return 0;
5039 }
5040
5041 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5042 {
5043         struct trace_array *tr = inode->i_private;
5044
5045         trace_array_put(tr);
5046
5047         return single_release(inode, file);
5048 }
5049
5050 static int tracing_open(struct inode *inode, struct file *file)
5051 {
5052         struct trace_array *tr = inode->i_private;
5053         struct trace_iterator *iter;
5054         int ret;
5055
5056         ret = tracing_check_open_get_tr(tr);
5057         if (ret)
5058                 return ret;
5059
5060         /* If this file was open for write, then erase contents */
5061         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5062                 int cpu = tracing_get_cpu(inode);
5063                 struct array_buffer *trace_buf = &tr->array_buffer;
5064
5065 #ifdef CONFIG_TRACER_MAX_TRACE
5066                 if (tr->current_trace->print_max)
5067                         trace_buf = &tr->max_buffer;
5068 #endif
5069
5070                 if (cpu == RING_BUFFER_ALL_CPUS)
5071                         tracing_reset_online_cpus(trace_buf);
5072                 else
5073                         tracing_reset_cpu(trace_buf, cpu);
5074         }
5075
5076         if (file->f_mode & FMODE_READ) {
5077                 iter = __tracing_open(inode, file, false);
5078                 if (IS_ERR(iter))
5079                         ret = PTR_ERR(iter);
5080                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5081                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
5082         }
5083
5084         if (ret < 0)
5085                 trace_array_put(tr);
5086
5087         return ret;
5088 }
5089
5090 /*
5091  * Some tracers are not suitable for instance buffers.
5092  * A tracer is always available for the global array (toplevel)
5093  * or if it explicitly states that it is.
5094  */
5095 static bool
5096 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5097 {
5098         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5099 }
5100
5101 /* Find the next tracer that this trace array may use */
5102 static struct tracer *
5103 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5104 {
5105         while (t && !trace_ok_for_array(t, tr))
5106                 t = t->next;
5107
5108         return t;
5109 }
5110
5111 static void *
5112 t_next(struct seq_file *m, void *v, loff_t *pos)
5113 {
5114         struct trace_array *tr = m->private;
5115         struct tracer *t = v;
5116
5117         (*pos)++;
5118
5119         if (t)
5120                 t = get_tracer_for_array(tr, t->next);
5121
5122         return t;
5123 }
5124
5125 static void *t_start(struct seq_file *m, loff_t *pos)
5126 {
5127         struct trace_array *tr = m->private;
5128         struct tracer *t;
5129         loff_t l = 0;
5130
5131         mutex_lock(&trace_types_lock);
5132
5133         t = get_tracer_for_array(tr, trace_types);
5134         for (; t && l < *pos; t = t_next(m, t, &l))
5135                         ;
5136
5137         return t;
5138 }
5139
5140 static void t_stop(struct seq_file *m, void *p)
5141 {
5142         mutex_unlock(&trace_types_lock);
5143 }
5144
5145 static int t_show(struct seq_file *m, void *v)
5146 {
5147         struct tracer *t = v;
5148
5149         if (!t)
5150                 return 0;
5151
5152         seq_puts(m, t->name);
5153         if (t->next)
5154                 seq_putc(m, ' ');
5155         else
5156                 seq_putc(m, '\n');
5157
5158         return 0;
5159 }
5160
5161 static const struct seq_operations show_traces_seq_ops = {
5162         .start          = t_start,
5163         .next           = t_next,
5164         .stop           = t_stop,
5165         .show           = t_show,
5166 };
5167
5168 static int show_traces_open(struct inode *inode, struct file *file)
5169 {
5170         struct trace_array *tr = inode->i_private;
5171         struct seq_file *m;
5172         int ret;
5173
5174         ret = tracing_check_open_get_tr(tr);
5175         if (ret)
5176                 return ret;
5177
5178         ret = seq_open(file, &show_traces_seq_ops);
5179         if (ret) {
5180                 trace_array_put(tr);
5181                 return ret;
5182         }
5183
5184         m = file->private_data;
5185         m->private = tr;
5186
5187         return 0;
5188 }
5189
5190 static int show_traces_release(struct inode *inode, struct file *file)
5191 {
5192         struct trace_array *tr = inode->i_private;
5193
5194         trace_array_put(tr);
5195         return seq_release(inode, file);
5196 }
5197
5198 static ssize_t
5199 tracing_write_stub(struct file *filp, const char __user *ubuf,
5200                    size_t count, loff_t *ppos)
5201 {
5202         return count;
5203 }
5204
5205 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5206 {
5207         int ret;
5208
5209         if (file->f_mode & FMODE_READ)
5210                 ret = seq_lseek(file, offset, whence);
5211         else
5212                 file->f_pos = ret = 0;
5213
5214         return ret;
5215 }
5216
5217 static const struct file_operations tracing_fops = {
5218         .open           = tracing_open,
5219         .read           = seq_read,
5220         .read_iter      = seq_read_iter,
5221         .splice_read    = copy_splice_read,
5222         .write          = tracing_write_stub,
5223         .llseek         = tracing_lseek,
5224         .release        = tracing_release,
5225 };
5226
5227 static const struct file_operations show_traces_fops = {
5228         .open           = show_traces_open,
5229         .read           = seq_read,
5230         .llseek         = seq_lseek,
5231         .release        = show_traces_release,
5232 };
5233
5234 static ssize_t
5235 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5236                      size_t count, loff_t *ppos)
5237 {
5238         struct trace_array *tr = file_inode(filp)->i_private;
5239         char *mask_str;
5240         int len;
5241
5242         len = snprintf(NULL, 0, "%*pb\n",
5243                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5244         mask_str = kmalloc(len, GFP_KERNEL);
5245         if (!mask_str)
5246                 return -ENOMEM;
5247
5248         len = snprintf(mask_str, len, "%*pb\n",
5249                        cpumask_pr_args(tr->tracing_cpumask));
5250         if (len >= count) {
5251                 count = -EINVAL;
5252                 goto out_err;
5253         }
5254         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5255
5256 out_err:
5257         kfree(mask_str);
5258
5259         return count;
5260 }
5261
5262 int tracing_set_cpumask(struct trace_array *tr,
5263                         cpumask_var_t tracing_cpumask_new)
5264 {
5265         int cpu;
5266
5267         if (!tr)
5268                 return -EINVAL;
5269
5270         local_irq_disable();
5271         arch_spin_lock(&tr->max_lock);
5272         for_each_tracing_cpu(cpu) {
5273                 /*
5274                  * Increase/decrease the disabled counter if we are
5275                  * about to flip a bit in the cpumask:
5276                  */
5277                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5278                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5279                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5280                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5281 #ifdef CONFIG_TRACER_MAX_TRACE
5282                         ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5283 #endif
5284                 }
5285                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5286                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5287                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5288                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5289 #ifdef CONFIG_TRACER_MAX_TRACE
5290                         ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5291 #endif
5292                 }
5293         }
5294         arch_spin_unlock(&tr->max_lock);
5295         local_irq_enable();
5296
5297         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5298
5299         return 0;
5300 }
5301
5302 static ssize_t
5303 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5304                       size_t count, loff_t *ppos)
5305 {
5306         struct trace_array *tr = file_inode(filp)->i_private;
5307         cpumask_var_t tracing_cpumask_new;
5308         int err;
5309
5310         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5311                 return -ENOMEM;
5312
5313         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5314         if (err)
5315                 goto err_free;
5316
5317         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5318         if (err)
5319                 goto err_free;
5320
5321         free_cpumask_var(tracing_cpumask_new);
5322
5323         return count;
5324
5325 err_free:
5326         free_cpumask_var(tracing_cpumask_new);
5327
5328         return err;
5329 }
5330
5331 static const struct file_operations tracing_cpumask_fops = {
5332         .open           = tracing_open_generic_tr,
5333         .read           = tracing_cpumask_read,
5334         .write          = tracing_cpumask_write,
5335         .release        = tracing_release_generic_tr,
5336         .llseek         = generic_file_llseek,
5337 };
5338
5339 static int tracing_trace_options_show(struct seq_file *m, void *v)
5340 {
5341         struct tracer_opt *trace_opts;
5342         struct trace_array *tr = m->private;
5343         u32 tracer_flags;
5344         int i;
5345
5346         mutex_lock(&trace_types_lock);
5347         tracer_flags = tr->current_trace->flags->val;
5348         trace_opts = tr->current_trace->flags->opts;
5349
5350         for (i = 0; trace_options[i]; i++) {
5351                 if (tr->trace_flags & (1 << i))
5352                         seq_printf(m, "%s\n", trace_options[i]);
5353                 else
5354                         seq_printf(m, "no%s\n", trace_options[i]);
5355         }
5356
5357         for (i = 0; trace_opts[i].name; i++) {
5358                 if (tracer_flags & trace_opts[i].bit)
5359                         seq_printf(m, "%s\n", trace_opts[i].name);
5360                 else
5361                         seq_printf(m, "no%s\n", trace_opts[i].name);
5362         }
5363         mutex_unlock(&trace_types_lock);
5364
5365         return 0;
5366 }
5367
5368 static int __set_tracer_option(struct trace_array *tr,
5369                                struct tracer_flags *tracer_flags,
5370                                struct tracer_opt *opts, int neg)
5371 {
5372         struct tracer *trace = tracer_flags->trace;
5373         int ret;
5374
5375         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5376         if (ret)
5377                 return ret;
5378
5379         if (neg)
5380                 tracer_flags->val &= ~opts->bit;
5381         else
5382                 tracer_flags->val |= opts->bit;
5383         return 0;
5384 }
5385
5386 /* Try to assign a tracer specific option */
5387 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5388 {
5389         struct tracer *trace = tr->current_trace;
5390         struct tracer_flags *tracer_flags = trace->flags;
5391         struct tracer_opt *opts = NULL;
5392         int i;
5393
5394         for (i = 0; tracer_flags->opts[i].name; i++) {
5395                 opts = &tracer_flags->opts[i];
5396
5397                 if (strcmp(cmp, opts->name) == 0)
5398                         return __set_tracer_option(tr, trace->flags, opts, neg);
5399         }
5400
5401         return -EINVAL;
5402 }
5403
5404 /* Some tracers require overwrite to stay enabled */
5405 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5406 {
5407         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5408                 return -1;
5409
5410         return 0;
5411 }
5412
5413 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5414 {
5415         int *map;
5416
5417         if ((mask == TRACE_ITER_RECORD_TGID) ||
5418             (mask == TRACE_ITER_RECORD_CMD))
5419                 lockdep_assert_held(&event_mutex);
5420
5421         /* do nothing if flag is already set */
5422         if (!!(tr->trace_flags & mask) == !!enabled)
5423                 return 0;
5424
5425         /* Give the tracer a chance to approve the change */
5426         if (tr->current_trace->flag_changed)
5427                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5428                         return -EINVAL;
5429
5430         if (enabled)
5431                 tr->trace_flags |= mask;
5432         else
5433                 tr->trace_flags &= ~mask;
5434
5435         if (mask == TRACE_ITER_RECORD_CMD)
5436                 trace_event_enable_cmd_record(enabled);
5437
5438         if (mask == TRACE_ITER_RECORD_TGID) {
5439                 if (!tgid_map) {
5440                         tgid_map_max = pid_max;
5441                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5442                                        GFP_KERNEL);
5443
5444                         /*
5445                          * Pairs with smp_load_acquire() in
5446                          * trace_find_tgid_ptr() to ensure that if it observes
5447                          * the tgid_map we just allocated then it also observes
5448                          * the corresponding tgid_map_max value.
5449                          */
5450                         smp_store_release(&tgid_map, map);
5451                 }
5452                 if (!tgid_map) {
5453                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5454                         return -ENOMEM;
5455                 }
5456
5457                 trace_event_enable_tgid_record(enabled);
5458         }
5459
5460         if (mask == TRACE_ITER_EVENT_FORK)
5461                 trace_event_follow_fork(tr, enabled);
5462
5463         if (mask == TRACE_ITER_FUNC_FORK)
5464                 ftrace_pid_follow_fork(tr, enabled);
5465
5466         if (mask == TRACE_ITER_OVERWRITE) {
5467                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5468 #ifdef CONFIG_TRACER_MAX_TRACE
5469                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5470 #endif
5471         }
5472
5473         if (mask == TRACE_ITER_PRINTK) {
5474                 trace_printk_start_stop_comm(enabled);
5475                 trace_printk_control(enabled);
5476         }
5477
5478         return 0;
5479 }
5480
5481 int trace_set_options(struct trace_array *tr, char *option)
5482 {
5483         char *cmp;
5484         int neg = 0;
5485         int ret;
5486         size_t orig_len = strlen(option);
5487         int len;
5488
5489         cmp = strstrip(option);
5490
5491         len = str_has_prefix(cmp, "no");
5492         if (len)
5493                 neg = 1;
5494
5495         cmp += len;
5496
5497         mutex_lock(&event_mutex);
5498         mutex_lock(&trace_types_lock);
5499
5500         ret = match_string(trace_options, -1, cmp);
5501         /* If no option could be set, test the specific tracer options */
5502         if (ret < 0)
5503                 ret = set_tracer_option(tr, cmp, neg);
5504         else
5505                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5506
5507         mutex_unlock(&trace_types_lock);
5508         mutex_unlock(&event_mutex);
5509
5510         /*
5511          * If the first trailing whitespace is replaced with '\0' by strstrip,
5512          * turn it back into a space.
5513          */
5514         if (orig_len > strlen(option))
5515                 option[strlen(option)] = ' ';
5516
5517         return ret;
5518 }
5519
5520 static void __init apply_trace_boot_options(void)
5521 {
5522         char *buf = trace_boot_options_buf;
5523         char *option;
5524
5525         while (true) {
5526                 option = strsep(&buf, ",");
5527
5528                 if (!option)
5529                         break;
5530
5531                 if (*option)
5532                         trace_set_options(&global_trace, option);
5533
5534                 /* Put back the comma to allow this to be called again */
5535                 if (buf)
5536                         *(buf - 1) = ',';
5537         }
5538 }
5539
5540 static ssize_t
5541 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5542                         size_t cnt, loff_t *ppos)
5543 {
5544         struct seq_file *m = filp->private_data;
5545         struct trace_array *tr = m->private;
5546         char buf[64];
5547         int ret;
5548
5549         if (cnt >= sizeof(buf))
5550                 return -EINVAL;
5551
5552         if (copy_from_user(buf, ubuf, cnt))
5553                 return -EFAULT;
5554
5555         buf[cnt] = 0;
5556
5557         ret = trace_set_options(tr, buf);
5558         if (ret < 0)
5559                 return ret;
5560
5561         *ppos += cnt;
5562
5563         return cnt;
5564 }
5565
5566 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5567 {
5568         struct trace_array *tr = inode->i_private;
5569         int ret;
5570
5571         ret = tracing_check_open_get_tr(tr);
5572         if (ret)
5573                 return ret;
5574
5575         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5576         if (ret < 0)
5577                 trace_array_put(tr);
5578
5579         return ret;
5580 }
5581
5582 static const struct file_operations tracing_iter_fops = {
5583         .open           = tracing_trace_options_open,
5584         .read           = seq_read,
5585         .llseek         = seq_lseek,
5586         .release        = tracing_single_release_tr,
5587         .write          = tracing_trace_options_write,
5588 };
5589
5590 static const char readme_msg[] =
5591         "tracing mini-HOWTO:\n\n"
5592         "# echo 0 > tracing_on : quick way to disable tracing\n"
5593         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5594         " Important files:\n"
5595         "  trace\t\t\t- The static contents of the buffer\n"
5596         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5597         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5598         "  current_tracer\t- function and latency tracers\n"
5599         "  available_tracers\t- list of configured tracers for current_tracer\n"
5600         "  error_log\t- error log for failed commands (that support it)\n"
5601         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5602         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5603         "  trace_clock\t\t- change the clock used to order events\n"
5604         "       local:   Per cpu clock but may not be synced across CPUs\n"
5605         "      global:   Synced across CPUs but slows tracing down.\n"
5606         "     counter:   Not a clock, but just an increment\n"
5607         "      uptime:   Jiffy counter from time of boot\n"
5608         "        perf:   Same clock that perf events use\n"
5609 #ifdef CONFIG_X86_64
5610         "     x86-tsc:   TSC cycle counter\n"
5611 #endif
5612         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5613         "       delta:   Delta difference against a buffer-wide timestamp\n"
5614         "    absolute:   Absolute (standalone) timestamp\n"
5615         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5616         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5617         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5618         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5619         "\t\t\t  Remove sub-buffer with rmdir\n"
5620         "  trace_options\t\t- Set format or modify how tracing happens\n"
5621         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5622         "\t\t\t  option name\n"
5623         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5624 #ifdef CONFIG_DYNAMIC_FTRACE
5625         "\n  available_filter_functions - list of functions that can be filtered on\n"
5626         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5627         "\t\t\t  functions\n"
5628         "\t     accepts: func_full_name or glob-matching-pattern\n"
5629         "\t     modules: Can select a group via module\n"
5630         "\t      Format: :mod:<module-name>\n"
5631         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5632         "\t    triggers: a command to perform when function is hit\n"
5633         "\t      Format: <function>:<trigger>[:count]\n"
5634         "\t     trigger: traceon, traceoff\n"
5635         "\t\t      enable_event:<system>:<event>\n"
5636         "\t\t      disable_event:<system>:<event>\n"
5637 #ifdef CONFIG_STACKTRACE
5638         "\t\t      stacktrace\n"
5639 #endif
5640 #ifdef CONFIG_TRACER_SNAPSHOT
5641         "\t\t      snapshot\n"
5642 #endif
5643         "\t\t      dump\n"
5644         "\t\t      cpudump\n"
5645         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5646         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5647         "\t     The first one will disable tracing every time do_fault is hit\n"
5648         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5649         "\t       The first time do trap is hit and it disables tracing, the\n"
5650         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5651         "\t       the counter will not decrement. It only decrements when the\n"
5652         "\t       trigger did work\n"
5653         "\t     To remove trigger without count:\n"
5654         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5655         "\t     To remove trigger with a count:\n"
5656         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5657         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5658         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5659         "\t    modules: Can select a group via module command :mod:\n"
5660         "\t    Does not accept triggers\n"
5661 #endif /* CONFIG_DYNAMIC_FTRACE */
5662 #ifdef CONFIG_FUNCTION_TRACER
5663         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5664         "\t\t    (function)\n"
5665         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5666         "\t\t    (function)\n"
5667 #endif
5668 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5669         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5670         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5671         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5672 #endif
5673 #ifdef CONFIG_TRACER_SNAPSHOT
5674         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5675         "\t\t\t  snapshot buffer. Read the contents for more\n"
5676         "\t\t\t  information\n"
5677 #endif
5678 #ifdef CONFIG_STACK_TRACER
5679         "  stack_trace\t\t- Shows the max stack trace when active\n"
5680         "  stack_max_size\t- Shows current max stack size that was traced\n"
5681         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5682         "\t\t\t  new trace)\n"
5683 #ifdef CONFIG_DYNAMIC_FTRACE
5684         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5685         "\t\t\t  traces\n"
5686 #endif
5687 #endif /* CONFIG_STACK_TRACER */
5688 #ifdef CONFIG_DYNAMIC_EVENTS
5689         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5690         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5691 #endif
5692 #ifdef CONFIG_KPROBE_EVENTS
5693         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5694         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5695 #endif
5696 #ifdef CONFIG_UPROBE_EVENTS
5697         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5698         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5699 #endif
5700 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5701     defined(CONFIG_FPROBE_EVENTS)
5702         "\t  accepts: event-definitions (one definition per line)\n"
5703 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5704         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5705         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5706 #endif
5707 #ifdef CONFIG_FPROBE_EVENTS
5708         "\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5709         "\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5710 #endif
5711 #ifdef CONFIG_HIST_TRIGGERS
5712         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5713 #endif
5714         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5715         "\t           -:[<group>/][<event>]\n"
5716 #ifdef CONFIG_KPROBE_EVENTS
5717         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5718   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5719 #endif
5720 #ifdef CONFIG_UPROBE_EVENTS
5721   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5722 #endif
5723         "\t     args: <name>=fetcharg[:type]\n"
5724         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5725 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5726 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5727         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5728         "\t           <argname>[->field[->field|.field...]],\n"
5729 #else
5730         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5731 #endif
5732 #else
5733         "\t           $stack<index>, $stack, $retval, $comm,\n"
5734 #endif
5735         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5736         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5737         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5738         "\t           symstr, <type>\\[<array-size>\\]\n"
5739 #ifdef CONFIG_HIST_TRIGGERS
5740         "\t    field: <stype> <name>;\n"
5741         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5742         "\t           [unsigned] char/int/long\n"
5743 #endif
5744         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5745         "\t            of the <attached-group>/<attached-event>.\n"
5746 #endif
5747         "  events/\t\t- Directory containing all trace event subsystems:\n"
5748         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5749         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5750         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5751         "\t\t\t  events\n"
5752         "      filter\t\t- If set, only events passing filter are traced\n"
5753         "  events/<system>/<event>/\t- Directory containing control files for\n"
5754         "\t\t\t  <event>:\n"
5755         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5756         "      filter\t\t- If set, only events passing filter are traced\n"
5757         "      trigger\t\t- If set, a command to perform when event is hit\n"
5758         "\t    Format: <trigger>[:count][if <filter>]\n"
5759         "\t   trigger: traceon, traceoff\n"
5760         "\t            enable_event:<system>:<event>\n"
5761         "\t            disable_event:<system>:<event>\n"
5762 #ifdef CONFIG_HIST_TRIGGERS
5763         "\t            enable_hist:<system>:<event>\n"
5764         "\t            disable_hist:<system>:<event>\n"
5765 #endif
5766 #ifdef CONFIG_STACKTRACE
5767         "\t\t    stacktrace\n"
5768 #endif
5769 #ifdef CONFIG_TRACER_SNAPSHOT
5770         "\t\t    snapshot\n"
5771 #endif
5772 #ifdef CONFIG_HIST_TRIGGERS
5773         "\t\t    hist (see below)\n"
5774 #endif
5775         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5776         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5777         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5778         "\t                  events/block/block_unplug/trigger\n"
5779         "\t   The first disables tracing every time block_unplug is hit.\n"
5780         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5781         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5782         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5783         "\t   Like function triggers, the counter is only decremented if it\n"
5784         "\t    enabled or disabled tracing.\n"
5785         "\t   To remove a trigger without a count:\n"
5786         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5787         "\t   To remove a trigger with a count:\n"
5788         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5789         "\t   Filters can be ignored when removing a trigger.\n"
5790 #ifdef CONFIG_HIST_TRIGGERS
5791         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5792         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5793         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5794         "\t            [:values=<field1[,field2,...]>]\n"
5795         "\t            [:sort=<field1[,field2,...]>]\n"
5796         "\t            [:size=#entries]\n"
5797         "\t            [:pause][:continue][:clear]\n"
5798         "\t            [:name=histname1]\n"
5799         "\t            [:nohitcount]\n"
5800         "\t            [:<handler>.<action>]\n"
5801         "\t            [if <filter>]\n\n"
5802         "\t    Note, special fields can be used as well:\n"
5803         "\t            common_timestamp - to record current timestamp\n"
5804         "\t            common_cpu - to record the CPU the event happened on\n"
5805         "\n"
5806         "\t    A hist trigger variable can be:\n"
5807         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5808         "\t        - a reference to another variable e.g. y=$x,\n"
5809         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5810         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5811         "\n"
5812         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5813         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5814         "\t    variable reference, field or numeric literal.\n"
5815         "\n"
5816         "\t    When a matching event is hit, an entry is added to a hash\n"
5817         "\t    table using the key(s) and value(s) named, and the value of a\n"
5818         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5819         "\t    correspond to fields in the event's format description.  Keys\n"
5820         "\t    can be any field, or the special string 'common_stacktrace'.\n"
5821         "\t    Compound keys consisting of up to two fields can be specified\n"
5822         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5823         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5824         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5825         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5826         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5827         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5828         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5829         "\t    its histogram data will be shared with other triggers of the\n"
5830         "\t    same name, and trigger hits will update this common data.\n\n"
5831         "\t    Reading the 'hist' file for the event will dump the hash\n"
5832         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5833         "\t    triggers attached to an event, there will be a table for each\n"
5834         "\t    trigger in the output.  The table displayed for a named\n"
5835         "\t    trigger will be the same as any other instance having the\n"
5836         "\t    same name.  The default format used to display a given field\n"
5837         "\t    can be modified by appending any of the following modifiers\n"
5838         "\t    to the field name, as applicable:\n\n"
5839         "\t            .hex        display a number as a hex value\n"
5840         "\t            .sym        display an address as a symbol\n"
5841         "\t            .sym-offset display an address as a symbol and offset\n"
5842         "\t            .execname   display a common_pid as a program name\n"
5843         "\t            .syscall    display a syscall id as a syscall name\n"
5844         "\t            .log2       display log2 value rather than raw number\n"
5845         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5846         "\t            .usecs      display a common_timestamp in microseconds\n"
5847         "\t            .percent    display a number of percentage value\n"
5848         "\t            .graph      display a bar-graph of a value\n\n"
5849         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5850         "\t    trigger or to start a hist trigger but not log any events\n"
5851         "\t    until told to do so.  'continue' can be used to start or\n"
5852         "\t    restart a paused hist trigger.\n\n"
5853         "\t    The 'clear' parameter will clear the contents of a running\n"
5854         "\t    hist trigger and leave its current paused/active state\n"
5855         "\t    unchanged.\n\n"
5856         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5857         "\t    raw hitcount in the histogram.\n\n"
5858         "\t    The enable_hist and disable_hist triggers can be used to\n"
5859         "\t    have one event conditionally start and stop another event's\n"
5860         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5861         "\t    the enable_event and disable_event triggers.\n\n"
5862         "\t    Hist trigger handlers and actions are executed whenever a\n"
5863         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5864         "\t        <handler>.<action>\n\n"
5865         "\t    The available handlers are:\n\n"
5866         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5867         "\t        onmax(var)               - invoke if var exceeds current max\n"
5868         "\t        onchange(var)            - invoke action if var changes\n\n"
5869         "\t    The available actions are:\n\n"
5870         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5871         "\t        save(field,...)                      - save current event fields\n"
5872 #ifdef CONFIG_TRACER_SNAPSHOT
5873         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5874 #endif
5875 #ifdef CONFIG_SYNTH_EVENTS
5876         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5877         "\t  Write into this file to define/undefine new synthetic events.\n"
5878         "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5879 #endif
5880 #endif
5881 ;
5882
5883 static ssize_t
5884 tracing_readme_read(struct file *filp, char __user *ubuf,
5885                        size_t cnt, loff_t *ppos)
5886 {
5887         return simple_read_from_buffer(ubuf, cnt, ppos,
5888                                         readme_msg, strlen(readme_msg));
5889 }
5890
5891 static const struct file_operations tracing_readme_fops = {
5892         .open           = tracing_open_generic,
5893         .read           = tracing_readme_read,
5894         .llseek         = generic_file_llseek,
5895 };
5896
5897 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5898 {
5899         int pid = ++(*pos);
5900
5901         return trace_find_tgid_ptr(pid);
5902 }
5903
5904 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5905 {
5906         int pid = *pos;
5907
5908         return trace_find_tgid_ptr(pid);
5909 }
5910
5911 static void saved_tgids_stop(struct seq_file *m, void *v)
5912 {
5913 }
5914
5915 static int saved_tgids_show(struct seq_file *m, void *v)
5916 {
5917         int *entry = (int *)v;
5918         int pid = entry - tgid_map;
5919         int tgid = *entry;
5920
5921         if (tgid == 0)
5922                 return SEQ_SKIP;
5923
5924         seq_printf(m, "%d %d\n", pid, tgid);
5925         return 0;
5926 }
5927
5928 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5929         .start          = saved_tgids_start,
5930         .stop           = saved_tgids_stop,
5931         .next           = saved_tgids_next,
5932         .show           = saved_tgids_show,
5933 };
5934
5935 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5936 {
5937         int ret;
5938
5939         ret = tracing_check_open_get_tr(NULL);
5940         if (ret)
5941                 return ret;
5942
5943         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5944 }
5945
5946
5947 static const struct file_operations tracing_saved_tgids_fops = {
5948         .open           = tracing_saved_tgids_open,
5949         .read           = seq_read,
5950         .llseek         = seq_lseek,
5951         .release        = seq_release,
5952 };
5953
5954 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5955 {
5956         unsigned int *ptr = v;
5957
5958         if (*pos || m->count)
5959                 ptr++;
5960
5961         (*pos)++;
5962
5963         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5964              ptr++) {
5965                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5966                         continue;
5967
5968                 return ptr;
5969         }
5970
5971         return NULL;
5972 }
5973
5974 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5975 {
5976         void *v;
5977         loff_t l = 0;
5978
5979         preempt_disable();
5980         arch_spin_lock(&trace_cmdline_lock);
5981
5982         v = &savedcmd->map_cmdline_to_pid[0];
5983         while (l <= *pos) {
5984                 v = saved_cmdlines_next(m, v, &l);
5985                 if (!v)
5986                         return NULL;
5987         }
5988
5989         return v;
5990 }
5991
5992 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5993 {
5994         arch_spin_unlock(&trace_cmdline_lock);
5995         preempt_enable();
5996 }
5997
5998 static int saved_cmdlines_show(struct seq_file *m, void *v)
5999 {
6000         char buf[TASK_COMM_LEN];
6001         unsigned int *pid = v;
6002
6003         __trace_find_cmdline(*pid, buf);
6004         seq_printf(m, "%d %s\n", *pid, buf);
6005         return 0;
6006 }
6007
6008 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6009         .start          = saved_cmdlines_start,
6010         .next           = saved_cmdlines_next,
6011         .stop           = saved_cmdlines_stop,
6012         .show           = saved_cmdlines_show,
6013 };
6014
6015 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6016 {
6017         int ret;
6018
6019         ret = tracing_check_open_get_tr(NULL);
6020         if (ret)
6021                 return ret;
6022
6023         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6024 }
6025
6026 static const struct file_operations tracing_saved_cmdlines_fops = {
6027         .open           = tracing_saved_cmdlines_open,
6028         .read           = seq_read,
6029         .llseek         = seq_lseek,
6030         .release        = seq_release,
6031 };
6032
6033 static ssize_t
6034 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6035                                  size_t cnt, loff_t *ppos)
6036 {
6037         char buf[64];
6038         int r;
6039
6040         preempt_disable();
6041         arch_spin_lock(&trace_cmdline_lock);
6042         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6043         arch_spin_unlock(&trace_cmdline_lock);
6044         preempt_enable();
6045
6046         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6047 }
6048
6049 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
6050 {
6051         kfree(s->saved_cmdlines);
6052         kfree(s->map_cmdline_to_pid);
6053         kfree(s);
6054 }
6055
6056 static int tracing_resize_saved_cmdlines(unsigned int val)
6057 {
6058         struct saved_cmdlines_buffer *s, *savedcmd_temp;
6059
6060         s = kmalloc(sizeof(*s), GFP_KERNEL);
6061         if (!s)
6062                 return -ENOMEM;
6063
6064         if (allocate_cmdlines_buffer(val, s) < 0) {
6065                 kfree(s);
6066                 return -ENOMEM;
6067         }
6068
6069         preempt_disable();
6070         arch_spin_lock(&trace_cmdline_lock);
6071         savedcmd_temp = savedcmd;
6072         savedcmd = s;
6073         arch_spin_unlock(&trace_cmdline_lock);
6074         preempt_enable();
6075         free_saved_cmdlines_buffer(savedcmd_temp);
6076
6077         return 0;
6078 }
6079
6080 static ssize_t
6081 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6082                                   size_t cnt, loff_t *ppos)
6083 {
6084         unsigned long val;
6085         int ret;
6086
6087         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6088         if (ret)
6089                 return ret;
6090
6091         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
6092         if (!val || val > PID_MAX_DEFAULT)
6093                 return -EINVAL;
6094
6095         ret = tracing_resize_saved_cmdlines((unsigned int)val);
6096         if (ret < 0)
6097                 return ret;
6098
6099         *ppos += cnt;
6100
6101         return cnt;
6102 }
6103
6104 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6105         .open           = tracing_open_generic,
6106         .read           = tracing_saved_cmdlines_size_read,
6107         .write          = tracing_saved_cmdlines_size_write,
6108 };
6109
6110 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6111 static union trace_eval_map_item *
6112 update_eval_map(union trace_eval_map_item *ptr)
6113 {
6114         if (!ptr->map.eval_string) {
6115                 if (ptr->tail.next) {
6116                         ptr = ptr->tail.next;
6117                         /* Set ptr to the next real item (skip head) */
6118                         ptr++;
6119                 } else
6120                         return NULL;
6121         }
6122         return ptr;
6123 }
6124
6125 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6126 {
6127         union trace_eval_map_item *ptr = v;
6128
6129         /*
6130          * Paranoid! If ptr points to end, we don't want to increment past it.
6131          * This really should never happen.
6132          */
6133         (*pos)++;
6134         ptr = update_eval_map(ptr);
6135         if (WARN_ON_ONCE(!ptr))
6136                 return NULL;
6137
6138         ptr++;
6139         ptr = update_eval_map(ptr);
6140
6141         return ptr;
6142 }
6143
6144 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6145 {
6146         union trace_eval_map_item *v;
6147         loff_t l = 0;
6148
6149         mutex_lock(&trace_eval_mutex);
6150
6151         v = trace_eval_maps;
6152         if (v)
6153                 v++;
6154
6155         while (v && l < *pos) {
6156                 v = eval_map_next(m, v, &l);
6157         }
6158
6159         return v;
6160 }
6161
6162 static void eval_map_stop(struct seq_file *m, void *v)
6163 {
6164         mutex_unlock(&trace_eval_mutex);
6165 }
6166
6167 static int eval_map_show(struct seq_file *m, void *v)
6168 {
6169         union trace_eval_map_item *ptr = v;
6170
6171         seq_printf(m, "%s %ld (%s)\n",
6172                    ptr->map.eval_string, ptr->map.eval_value,
6173                    ptr->map.system);
6174
6175         return 0;
6176 }
6177
6178 static const struct seq_operations tracing_eval_map_seq_ops = {
6179         .start          = eval_map_start,
6180         .next           = eval_map_next,
6181         .stop           = eval_map_stop,
6182         .show           = eval_map_show,
6183 };
6184
6185 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6186 {
6187         int ret;
6188
6189         ret = tracing_check_open_get_tr(NULL);
6190         if (ret)
6191                 return ret;
6192
6193         return seq_open(filp, &tracing_eval_map_seq_ops);
6194 }
6195
6196 static const struct file_operations tracing_eval_map_fops = {
6197         .open           = tracing_eval_map_open,
6198         .read           = seq_read,
6199         .llseek         = seq_lseek,
6200         .release        = seq_release,
6201 };
6202
6203 static inline union trace_eval_map_item *
6204 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6205 {
6206         /* Return tail of array given the head */
6207         return ptr + ptr->head.length + 1;
6208 }
6209
6210 static void
6211 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6212                            int len)
6213 {
6214         struct trace_eval_map **stop;
6215         struct trace_eval_map **map;
6216         union trace_eval_map_item *map_array;
6217         union trace_eval_map_item *ptr;
6218
6219         stop = start + len;
6220
6221         /*
6222          * The trace_eval_maps contains the map plus a head and tail item,
6223          * where the head holds the module and length of array, and the
6224          * tail holds a pointer to the next list.
6225          */
6226         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6227         if (!map_array) {
6228                 pr_warn("Unable to allocate trace eval mapping\n");
6229                 return;
6230         }
6231
6232         mutex_lock(&trace_eval_mutex);
6233
6234         if (!trace_eval_maps)
6235                 trace_eval_maps = map_array;
6236         else {
6237                 ptr = trace_eval_maps;
6238                 for (;;) {
6239                         ptr = trace_eval_jmp_to_tail(ptr);
6240                         if (!ptr->tail.next)
6241                                 break;
6242                         ptr = ptr->tail.next;
6243
6244                 }
6245                 ptr->tail.next = map_array;
6246         }
6247         map_array->head.mod = mod;
6248         map_array->head.length = len;
6249         map_array++;
6250
6251         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6252                 map_array->map = **map;
6253                 map_array++;
6254         }
6255         memset(map_array, 0, sizeof(*map_array));
6256
6257         mutex_unlock(&trace_eval_mutex);
6258 }
6259
6260 static void trace_create_eval_file(struct dentry *d_tracer)
6261 {
6262         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6263                           NULL, &tracing_eval_map_fops);
6264 }
6265
6266 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6267 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6268 static inline void trace_insert_eval_map_file(struct module *mod,
6269                               struct trace_eval_map **start, int len) { }
6270 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6271
6272 static void trace_insert_eval_map(struct module *mod,
6273                                   struct trace_eval_map **start, int len)
6274 {
6275         struct trace_eval_map **map;
6276
6277         if (len <= 0)
6278                 return;
6279
6280         map = start;
6281
6282         trace_event_eval_update(map, len);
6283
6284         trace_insert_eval_map_file(mod, start, len);
6285 }
6286
6287 static ssize_t
6288 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6289                        size_t cnt, loff_t *ppos)
6290 {
6291         struct trace_array *tr = filp->private_data;
6292         char buf[MAX_TRACER_SIZE+2];
6293         int r;
6294
6295         mutex_lock(&trace_types_lock);
6296         r = sprintf(buf, "%s\n", tr->current_trace->name);
6297         mutex_unlock(&trace_types_lock);
6298
6299         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6300 }
6301
6302 int tracer_init(struct tracer *t, struct trace_array *tr)
6303 {
6304         tracing_reset_online_cpus(&tr->array_buffer);
6305         return t->init(tr);
6306 }
6307
6308 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6309 {
6310         int cpu;
6311
6312         for_each_tracing_cpu(cpu)
6313                 per_cpu_ptr(buf->data, cpu)->entries = val;
6314 }
6315
6316 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6317 {
6318         if (cpu == RING_BUFFER_ALL_CPUS) {
6319                 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6320         } else {
6321                 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6322         }
6323 }
6324
6325 #ifdef CONFIG_TRACER_MAX_TRACE
6326 /* resize @tr's buffer to the size of @size_tr's entries */
6327 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6328                                         struct array_buffer *size_buf, int cpu_id)
6329 {
6330         int cpu, ret = 0;
6331
6332         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6333                 for_each_tracing_cpu(cpu) {
6334                         ret = ring_buffer_resize(trace_buf->buffer,
6335                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6336                         if (ret < 0)
6337                                 break;
6338                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6339                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6340                 }
6341         } else {
6342                 ret = ring_buffer_resize(trace_buf->buffer,
6343                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6344                 if (ret == 0)
6345                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6346                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6347         }
6348
6349         return ret;
6350 }
6351 #endif /* CONFIG_TRACER_MAX_TRACE */
6352
6353 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6354                                         unsigned long size, int cpu)
6355 {
6356         int ret;
6357
6358         /*
6359          * If kernel or user changes the size of the ring buffer
6360          * we use the size that was given, and we can forget about
6361          * expanding it later.
6362          */
6363         ring_buffer_expanded = true;
6364
6365         /* May be called before buffers are initialized */
6366         if (!tr->array_buffer.buffer)
6367                 return 0;
6368
6369         /* Do not allow tracing while resizing ring buffer */
6370         tracing_stop_tr(tr);
6371
6372         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6373         if (ret < 0)
6374                 goto out_start;
6375
6376 #ifdef CONFIG_TRACER_MAX_TRACE
6377         if (!tr->allocated_snapshot)
6378                 goto out;
6379
6380         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6381         if (ret < 0) {
6382                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6383                                                      &tr->array_buffer, cpu);
6384                 if (r < 0) {
6385                         /*
6386                          * AARGH! We are left with different
6387                          * size max buffer!!!!
6388                          * The max buffer is our "snapshot" buffer.
6389                          * When a tracer needs a snapshot (one of the
6390                          * latency tracers), it swaps the max buffer
6391                          * with the saved snap shot. We succeeded to
6392                          * update the size of the main buffer, but failed to
6393                          * update the size of the max buffer. But when we tried
6394                          * to reset the main buffer to the original size, we
6395                          * failed there too. This is very unlikely to
6396                          * happen, but if it does, warn and kill all
6397                          * tracing.
6398                          */
6399                         WARN_ON(1);
6400                         tracing_disabled = 1;
6401                 }
6402                 goto out_start;
6403         }
6404
6405         update_buffer_entries(&tr->max_buffer, cpu);
6406
6407  out:
6408 #endif /* CONFIG_TRACER_MAX_TRACE */
6409
6410         update_buffer_entries(&tr->array_buffer, cpu);
6411  out_start:
6412         tracing_start_tr(tr);
6413         return ret;
6414 }
6415
6416 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6417                                   unsigned long size, int cpu_id)
6418 {
6419         int ret;
6420
6421         mutex_lock(&trace_types_lock);
6422
6423         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6424                 /* make sure, this cpu is enabled in the mask */
6425                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6426                         ret = -EINVAL;
6427                         goto out;
6428                 }
6429         }
6430
6431         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6432         if (ret < 0)
6433                 ret = -ENOMEM;
6434
6435 out:
6436         mutex_unlock(&trace_types_lock);
6437
6438         return ret;
6439 }
6440
6441
6442 /**
6443  * tracing_update_buffers - used by tracing facility to expand ring buffers
6444  *
6445  * To save on memory when the tracing is never used on a system with it
6446  * configured in. The ring buffers are set to a minimum size. But once
6447  * a user starts to use the tracing facility, then they need to grow
6448  * to their default size.
6449  *
6450  * This function is to be called when a tracer is about to be used.
6451  */
6452 int tracing_update_buffers(void)
6453 {
6454         int ret = 0;
6455
6456         mutex_lock(&trace_types_lock);
6457         if (!ring_buffer_expanded)
6458                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6459                                                 RING_BUFFER_ALL_CPUS);
6460         mutex_unlock(&trace_types_lock);
6461
6462         return ret;
6463 }
6464
6465 struct trace_option_dentry;
6466
6467 static void
6468 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6469
6470 /*
6471  * Used to clear out the tracer before deletion of an instance.
6472  * Must have trace_types_lock held.
6473  */
6474 static void tracing_set_nop(struct trace_array *tr)
6475 {
6476         if (tr->current_trace == &nop_trace)
6477                 return;
6478         
6479         tr->current_trace->enabled--;
6480
6481         if (tr->current_trace->reset)
6482                 tr->current_trace->reset(tr);
6483
6484         tr->current_trace = &nop_trace;
6485 }
6486
6487 static bool tracer_options_updated;
6488
6489 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6490 {
6491         /* Only enable if the directory has been created already. */
6492         if (!tr->dir)
6493                 return;
6494
6495         /* Only create trace option files after update_tracer_options finish */
6496         if (!tracer_options_updated)
6497                 return;
6498
6499         create_trace_option_files(tr, t);
6500 }
6501
6502 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6503 {
6504         struct tracer *t;
6505 #ifdef CONFIG_TRACER_MAX_TRACE
6506         bool had_max_tr;
6507 #endif
6508         int ret = 0;
6509
6510         mutex_lock(&trace_types_lock);
6511
6512         if (!ring_buffer_expanded) {
6513                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6514                                                 RING_BUFFER_ALL_CPUS);
6515                 if (ret < 0)
6516                         goto out;
6517                 ret = 0;
6518         }
6519
6520         for (t = trace_types; t; t = t->next) {
6521                 if (strcmp(t->name, buf) == 0)
6522                         break;
6523         }
6524         if (!t) {
6525                 ret = -EINVAL;
6526                 goto out;
6527         }
6528         if (t == tr->current_trace)
6529                 goto out;
6530
6531 #ifdef CONFIG_TRACER_SNAPSHOT
6532         if (t->use_max_tr) {
6533                 local_irq_disable();
6534                 arch_spin_lock(&tr->max_lock);
6535                 if (tr->cond_snapshot)
6536                         ret = -EBUSY;
6537                 arch_spin_unlock(&tr->max_lock);
6538                 local_irq_enable();
6539                 if (ret)
6540                         goto out;
6541         }
6542 #endif
6543         /* Some tracers won't work on kernel command line */
6544         if (system_state < SYSTEM_RUNNING && t->noboot) {
6545                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6546                         t->name);
6547                 goto out;
6548         }
6549
6550         /* Some tracers are only allowed for the top level buffer */
6551         if (!trace_ok_for_array(t, tr)) {
6552                 ret = -EINVAL;
6553                 goto out;
6554         }
6555
6556         /* If trace pipe files are being read, we can't change the tracer */
6557         if (tr->trace_ref) {
6558                 ret = -EBUSY;
6559                 goto out;
6560         }
6561
6562         trace_branch_disable();
6563
6564         tr->current_trace->enabled--;
6565
6566         if (tr->current_trace->reset)
6567                 tr->current_trace->reset(tr);
6568
6569 #ifdef CONFIG_TRACER_MAX_TRACE
6570         had_max_tr = tr->current_trace->use_max_tr;
6571
6572         /* Current trace needs to be nop_trace before synchronize_rcu */
6573         tr->current_trace = &nop_trace;
6574
6575         if (had_max_tr && !t->use_max_tr) {
6576                 /*
6577                  * We need to make sure that the update_max_tr sees that
6578                  * current_trace changed to nop_trace to keep it from
6579                  * swapping the buffers after we resize it.
6580                  * The update_max_tr is called from interrupts disabled
6581                  * so a synchronized_sched() is sufficient.
6582                  */
6583                 synchronize_rcu();
6584                 free_snapshot(tr);
6585         }
6586
6587         if (t->use_max_tr && !tr->allocated_snapshot) {
6588                 ret = tracing_alloc_snapshot_instance(tr);
6589                 if (ret < 0)
6590                         goto out;
6591         }
6592 #else
6593         tr->current_trace = &nop_trace;
6594 #endif
6595
6596         if (t->init) {
6597                 ret = tracer_init(t, tr);
6598                 if (ret)
6599                         goto out;
6600         }
6601
6602         tr->current_trace = t;
6603         tr->current_trace->enabled++;
6604         trace_branch_enable(tr);
6605  out:
6606         mutex_unlock(&trace_types_lock);
6607
6608         return ret;
6609 }
6610
6611 static ssize_t
6612 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6613                         size_t cnt, loff_t *ppos)
6614 {
6615         struct trace_array *tr = filp->private_data;
6616         char buf[MAX_TRACER_SIZE+1];
6617         char *name;
6618         size_t ret;
6619         int err;
6620
6621         ret = cnt;
6622
6623         if (cnt > MAX_TRACER_SIZE)
6624                 cnt = MAX_TRACER_SIZE;
6625
6626         if (copy_from_user(buf, ubuf, cnt))
6627                 return -EFAULT;
6628
6629         buf[cnt] = 0;
6630
6631         name = strim(buf);
6632
6633         err = tracing_set_tracer(tr, name);
6634         if (err)
6635                 return err;
6636
6637         *ppos += ret;
6638
6639         return ret;
6640 }
6641
6642 static ssize_t
6643 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6644                    size_t cnt, loff_t *ppos)
6645 {
6646         char buf[64];
6647         int r;
6648
6649         r = snprintf(buf, sizeof(buf), "%ld\n",
6650                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6651         if (r > sizeof(buf))
6652                 r = sizeof(buf);
6653         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6654 }
6655
6656 static ssize_t
6657 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6658                     size_t cnt, loff_t *ppos)
6659 {
6660         unsigned long val;
6661         int ret;
6662
6663         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6664         if (ret)
6665                 return ret;
6666
6667         *ptr = val * 1000;
6668
6669         return cnt;
6670 }
6671
6672 static ssize_t
6673 tracing_thresh_read(struct file *filp, char __user *ubuf,
6674                     size_t cnt, loff_t *ppos)
6675 {
6676         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6677 }
6678
6679 static ssize_t
6680 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6681                      size_t cnt, loff_t *ppos)
6682 {
6683         struct trace_array *tr = filp->private_data;
6684         int ret;
6685
6686         mutex_lock(&trace_types_lock);
6687         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6688         if (ret < 0)
6689                 goto out;
6690
6691         if (tr->current_trace->update_thresh) {
6692                 ret = tr->current_trace->update_thresh(tr);
6693                 if (ret < 0)
6694                         goto out;
6695         }
6696
6697         ret = cnt;
6698 out:
6699         mutex_unlock(&trace_types_lock);
6700
6701         return ret;
6702 }
6703
6704 #ifdef CONFIG_TRACER_MAX_TRACE
6705
6706 static ssize_t
6707 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6708                      size_t cnt, loff_t *ppos)
6709 {
6710         struct trace_array *tr = filp->private_data;
6711
6712         return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6713 }
6714
6715 static ssize_t
6716 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6717                       size_t cnt, loff_t *ppos)
6718 {
6719         struct trace_array *tr = filp->private_data;
6720
6721         return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6722 }
6723
6724 #endif
6725
6726 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6727 {
6728         if (cpu == RING_BUFFER_ALL_CPUS) {
6729                 if (cpumask_empty(tr->pipe_cpumask)) {
6730                         cpumask_setall(tr->pipe_cpumask);
6731                         return 0;
6732                 }
6733         } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6734                 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6735                 return 0;
6736         }
6737         return -EBUSY;
6738 }
6739
6740 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6741 {
6742         if (cpu == RING_BUFFER_ALL_CPUS) {
6743                 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6744                 cpumask_clear(tr->pipe_cpumask);
6745         } else {
6746                 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6747                 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6748         }
6749 }
6750
6751 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6752 {
6753         struct trace_array *tr = inode->i_private;
6754         struct trace_iterator *iter;
6755         int cpu;
6756         int ret;
6757
6758         ret = tracing_check_open_get_tr(tr);
6759         if (ret)
6760                 return ret;
6761
6762         mutex_lock(&trace_types_lock);
6763         cpu = tracing_get_cpu(inode);
6764         ret = open_pipe_on_cpu(tr, cpu);
6765         if (ret)
6766                 goto fail_pipe_on_cpu;
6767
6768         /* create a buffer to store the information to pass to userspace */
6769         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6770         if (!iter) {
6771                 ret = -ENOMEM;
6772                 goto fail_alloc_iter;
6773         }
6774
6775         trace_seq_init(&iter->seq);
6776         iter->trace = tr->current_trace;
6777
6778         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6779                 ret = -ENOMEM;
6780                 goto fail;
6781         }
6782
6783         /* trace pipe does not show start of buffer */
6784         cpumask_setall(iter->started);
6785
6786         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6787                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6788
6789         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6790         if (trace_clocks[tr->clock_id].in_ns)
6791                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6792
6793         iter->tr = tr;
6794         iter->array_buffer = &tr->array_buffer;
6795         iter->cpu_file = cpu;
6796         mutex_init(&iter->mutex);
6797         filp->private_data = iter;
6798
6799         if (iter->trace->pipe_open)
6800                 iter->trace->pipe_open(iter);
6801
6802         nonseekable_open(inode, filp);
6803
6804         tr->trace_ref++;
6805
6806         mutex_unlock(&trace_types_lock);
6807         return ret;
6808
6809 fail:
6810         kfree(iter);
6811 fail_alloc_iter:
6812         close_pipe_on_cpu(tr, cpu);
6813 fail_pipe_on_cpu:
6814         __trace_array_put(tr);
6815         mutex_unlock(&trace_types_lock);
6816         return ret;
6817 }
6818
6819 static int tracing_release_pipe(struct inode *inode, struct file *file)
6820 {
6821         struct trace_iterator *iter = file->private_data;
6822         struct trace_array *tr = inode->i_private;
6823
6824         mutex_lock(&trace_types_lock);
6825
6826         tr->trace_ref--;
6827
6828         if (iter->trace->pipe_close)
6829                 iter->trace->pipe_close(iter);
6830         close_pipe_on_cpu(tr, iter->cpu_file);
6831         mutex_unlock(&trace_types_lock);
6832
6833         free_trace_iter_content(iter);
6834         kfree(iter);
6835
6836         trace_array_put(tr);
6837
6838         return 0;
6839 }
6840
6841 static __poll_t
6842 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6843 {
6844         struct trace_array *tr = iter->tr;
6845
6846         /* Iterators are static, they should be filled or empty */
6847         if (trace_buffer_iter(iter, iter->cpu_file))
6848                 return EPOLLIN | EPOLLRDNORM;
6849
6850         if (tr->trace_flags & TRACE_ITER_BLOCK)
6851                 /*
6852                  * Always select as readable when in blocking mode
6853                  */
6854                 return EPOLLIN | EPOLLRDNORM;
6855         else
6856                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6857                                              filp, poll_table, iter->tr->buffer_percent);
6858 }
6859
6860 static __poll_t
6861 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6862 {
6863         struct trace_iterator *iter = filp->private_data;
6864
6865         return trace_poll(iter, filp, poll_table);
6866 }
6867
6868 /* Must be called with iter->mutex held. */
6869 static int tracing_wait_pipe(struct file *filp)
6870 {
6871         struct trace_iterator *iter = filp->private_data;
6872         int ret;
6873
6874         while (trace_empty(iter)) {
6875
6876                 if ((filp->f_flags & O_NONBLOCK)) {
6877                         return -EAGAIN;
6878                 }
6879
6880                 /*
6881                  * We block until we read something and tracing is disabled.
6882                  * We still block if tracing is disabled, but we have never
6883                  * read anything. This allows a user to cat this file, and
6884                  * then enable tracing. But after we have read something,
6885                  * we give an EOF when tracing is again disabled.
6886                  *
6887                  * iter->pos will be 0 if we haven't read anything.
6888                  */
6889                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6890                         break;
6891
6892                 mutex_unlock(&iter->mutex);
6893
6894                 ret = wait_on_pipe(iter, 0);
6895
6896                 mutex_lock(&iter->mutex);
6897
6898                 if (ret)
6899                         return ret;
6900         }
6901
6902         return 1;
6903 }
6904
6905 /*
6906  * Consumer reader.
6907  */
6908 static ssize_t
6909 tracing_read_pipe(struct file *filp, char __user *ubuf,
6910                   size_t cnt, loff_t *ppos)
6911 {
6912         struct trace_iterator *iter = filp->private_data;
6913         ssize_t sret;
6914
6915         /*
6916          * Avoid more than one consumer on a single file descriptor
6917          * This is just a matter of traces coherency, the ring buffer itself
6918          * is protected.
6919          */
6920         mutex_lock(&iter->mutex);
6921
6922         /* return any leftover data */
6923         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6924         if (sret != -EBUSY)
6925                 goto out;
6926
6927         trace_seq_init(&iter->seq);
6928
6929         if (iter->trace->read) {
6930                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6931                 if (sret)
6932                         goto out;
6933         }
6934
6935 waitagain:
6936         sret = tracing_wait_pipe(filp);
6937         if (sret <= 0)
6938                 goto out;
6939
6940         /* stop when tracing is finished */
6941         if (trace_empty(iter)) {
6942                 sret = 0;
6943                 goto out;
6944         }
6945
6946         if (cnt >= PAGE_SIZE)
6947                 cnt = PAGE_SIZE - 1;
6948
6949         /* reset all but tr, trace, and overruns */
6950         trace_iterator_reset(iter);
6951         cpumask_clear(iter->started);
6952         trace_seq_init(&iter->seq);
6953
6954         trace_event_read_lock();
6955         trace_access_lock(iter->cpu_file);
6956         while (trace_find_next_entry_inc(iter) != NULL) {
6957                 enum print_line_t ret;
6958                 int save_len = iter->seq.seq.len;
6959
6960                 ret = print_trace_line(iter);
6961                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6962                         /*
6963                          * If one print_trace_line() fills entire trace_seq in one shot,
6964                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6965                          * In this case, we need to consume it, otherwise, loop will peek
6966                          * this event next time, resulting in an infinite loop.
6967                          */
6968                         if (save_len == 0) {
6969                                 iter->seq.full = 0;
6970                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6971                                 trace_consume(iter);
6972                                 break;
6973                         }
6974
6975                         /* In other cases, don't print partial lines */
6976                         iter->seq.seq.len = save_len;
6977                         break;
6978                 }
6979                 if (ret != TRACE_TYPE_NO_CONSUME)
6980                         trace_consume(iter);
6981
6982                 if (trace_seq_used(&iter->seq) >= cnt)
6983                         break;
6984
6985                 /*
6986                  * Setting the full flag means we reached the trace_seq buffer
6987                  * size and we should leave by partial output condition above.
6988                  * One of the trace_seq_* functions is not used properly.
6989                  */
6990                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6991                           iter->ent->type);
6992         }
6993         trace_access_unlock(iter->cpu_file);
6994         trace_event_read_unlock();
6995
6996         /* Now copy what we have to the user */
6997         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6998         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6999                 trace_seq_init(&iter->seq);
7000
7001         /*
7002          * If there was nothing to send to user, in spite of consuming trace
7003          * entries, go back to wait for more entries.
7004          */
7005         if (sret == -EBUSY)
7006                 goto waitagain;
7007
7008 out:
7009         mutex_unlock(&iter->mutex);
7010
7011         return sret;
7012 }
7013
7014 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7015                                      unsigned int idx)
7016 {
7017         __free_page(spd->pages[idx]);
7018 }
7019
7020 static size_t
7021 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7022 {
7023         size_t count;
7024         int save_len;
7025         int ret;
7026
7027         /* Seq buffer is page-sized, exactly what we need. */
7028         for (;;) {
7029                 save_len = iter->seq.seq.len;
7030                 ret = print_trace_line(iter);
7031
7032                 if (trace_seq_has_overflowed(&iter->seq)) {
7033                         iter->seq.seq.len = save_len;
7034                         break;
7035                 }
7036
7037                 /*
7038                  * This should not be hit, because it should only
7039                  * be set if the iter->seq overflowed. But check it
7040                  * anyway to be safe.
7041                  */
7042                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
7043                         iter->seq.seq.len = save_len;
7044                         break;
7045                 }
7046
7047                 count = trace_seq_used(&iter->seq) - save_len;
7048                 if (rem < count) {
7049                         rem = 0;
7050                         iter->seq.seq.len = save_len;
7051                         break;
7052                 }
7053
7054                 if (ret != TRACE_TYPE_NO_CONSUME)
7055                         trace_consume(iter);
7056                 rem -= count;
7057                 if (!trace_find_next_entry_inc(iter))   {
7058                         rem = 0;
7059                         iter->ent = NULL;
7060                         break;
7061                 }
7062         }
7063
7064         return rem;
7065 }
7066
7067 static ssize_t tracing_splice_read_pipe(struct file *filp,
7068                                         loff_t *ppos,
7069                                         struct pipe_inode_info *pipe,
7070                                         size_t len,
7071                                         unsigned int flags)
7072 {
7073         struct page *pages_def[PIPE_DEF_BUFFERS];
7074         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7075         struct trace_iterator *iter = filp->private_data;
7076         struct splice_pipe_desc spd = {
7077                 .pages          = pages_def,
7078                 .partial        = partial_def,
7079                 .nr_pages       = 0, /* This gets updated below. */
7080                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7081                 .ops            = &default_pipe_buf_ops,
7082                 .spd_release    = tracing_spd_release_pipe,
7083         };
7084         ssize_t ret;
7085         size_t rem;
7086         unsigned int i;
7087
7088         if (splice_grow_spd(pipe, &spd))
7089                 return -ENOMEM;
7090
7091         mutex_lock(&iter->mutex);
7092
7093         if (iter->trace->splice_read) {
7094                 ret = iter->trace->splice_read(iter, filp,
7095                                                ppos, pipe, len, flags);
7096                 if (ret)
7097                         goto out_err;
7098         }
7099
7100         ret = tracing_wait_pipe(filp);
7101         if (ret <= 0)
7102                 goto out_err;
7103
7104         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7105                 ret = -EFAULT;
7106                 goto out_err;
7107         }
7108
7109         trace_event_read_lock();
7110         trace_access_lock(iter->cpu_file);
7111
7112         /* Fill as many pages as possible. */
7113         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7114                 spd.pages[i] = alloc_page(GFP_KERNEL);
7115                 if (!spd.pages[i])
7116                         break;
7117
7118                 rem = tracing_fill_pipe_page(rem, iter);
7119
7120                 /* Copy the data into the page, so we can start over. */
7121                 ret = trace_seq_to_buffer(&iter->seq,
7122                                           page_address(spd.pages[i]),
7123                                           trace_seq_used(&iter->seq));
7124                 if (ret < 0) {
7125                         __free_page(spd.pages[i]);
7126                         break;
7127                 }
7128                 spd.partial[i].offset = 0;
7129                 spd.partial[i].len = trace_seq_used(&iter->seq);
7130
7131                 trace_seq_init(&iter->seq);
7132         }
7133
7134         trace_access_unlock(iter->cpu_file);
7135         trace_event_read_unlock();
7136         mutex_unlock(&iter->mutex);
7137
7138         spd.nr_pages = i;
7139
7140         if (i)
7141                 ret = splice_to_pipe(pipe, &spd);
7142         else
7143                 ret = 0;
7144 out:
7145         splice_shrink_spd(&spd);
7146         return ret;
7147
7148 out_err:
7149         mutex_unlock(&iter->mutex);
7150         goto out;
7151 }
7152
7153 static ssize_t
7154 tracing_entries_read(struct file *filp, char __user *ubuf,
7155                      size_t cnt, loff_t *ppos)
7156 {
7157         struct inode *inode = file_inode(filp);
7158         struct trace_array *tr = inode->i_private;
7159         int cpu = tracing_get_cpu(inode);
7160         char buf[64];
7161         int r = 0;
7162         ssize_t ret;
7163
7164         mutex_lock(&trace_types_lock);
7165
7166         if (cpu == RING_BUFFER_ALL_CPUS) {
7167                 int cpu, buf_size_same;
7168                 unsigned long size;
7169
7170                 size = 0;
7171                 buf_size_same = 1;
7172                 /* check if all cpu sizes are same */
7173                 for_each_tracing_cpu(cpu) {
7174                         /* fill in the size from first enabled cpu */
7175                         if (size == 0)
7176                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7177                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7178                                 buf_size_same = 0;
7179                                 break;
7180                         }
7181                 }
7182
7183                 if (buf_size_same) {
7184                         if (!ring_buffer_expanded)
7185                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7186                                             size >> 10,
7187                                             trace_buf_size >> 10);
7188                         else
7189                                 r = sprintf(buf, "%lu\n", size >> 10);
7190                 } else
7191                         r = sprintf(buf, "X\n");
7192         } else
7193                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7194
7195         mutex_unlock(&trace_types_lock);
7196
7197         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7198         return ret;
7199 }
7200
7201 static ssize_t
7202 tracing_entries_write(struct file *filp, const char __user *ubuf,
7203                       size_t cnt, loff_t *ppos)
7204 {
7205         struct inode *inode = file_inode(filp);
7206         struct trace_array *tr = inode->i_private;
7207         unsigned long val;
7208         int ret;
7209
7210         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7211         if (ret)
7212                 return ret;
7213
7214         /* must have at least 1 entry */
7215         if (!val)
7216                 return -EINVAL;
7217
7218         /* value is in KB */
7219         val <<= 10;
7220         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7221         if (ret < 0)
7222                 return ret;
7223
7224         *ppos += cnt;
7225
7226         return cnt;
7227 }
7228
7229 static ssize_t
7230 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7231                                 size_t cnt, loff_t *ppos)
7232 {
7233         struct trace_array *tr = filp->private_data;
7234         char buf[64];
7235         int r, cpu;
7236         unsigned long size = 0, expanded_size = 0;
7237
7238         mutex_lock(&trace_types_lock);
7239         for_each_tracing_cpu(cpu) {
7240                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7241                 if (!ring_buffer_expanded)
7242                         expanded_size += trace_buf_size >> 10;
7243         }
7244         if (ring_buffer_expanded)
7245                 r = sprintf(buf, "%lu\n", size);
7246         else
7247                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7248         mutex_unlock(&trace_types_lock);
7249
7250         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7251 }
7252
7253 static ssize_t
7254 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7255                           size_t cnt, loff_t *ppos)
7256 {
7257         /*
7258          * There is no need to read what the user has written, this function
7259          * is just to make sure that there is no error when "echo" is used
7260          */
7261
7262         *ppos += cnt;
7263
7264         return cnt;
7265 }
7266
7267 static int
7268 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7269 {
7270         struct trace_array *tr = inode->i_private;
7271
7272         /* disable tracing ? */
7273         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7274                 tracer_tracing_off(tr);
7275         /* resize the ring buffer to 0 */
7276         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7277
7278         trace_array_put(tr);
7279
7280         return 0;
7281 }
7282
7283 static ssize_t
7284 tracing_mark_write(struct file *filp, const char __user *ubuf,
7285                                         size_t cnt, loff_t *fpos)
7286 {
7287         struct trace_array *tr = filp->private_data;
7288         struct ring_buffer_event *event;
7289         enum event_trigger_type tt = ETT_NONE;
7290         struct trace_buffer *buffer;
7291         struct print_entry *entry;
7292         ssize_t written;
7293         int size;
7294         int len;
7295
7296 /* Used in tracing_mark_raw_write() as well */
7297 #define FAULTED_STR "<faulted>"
7298 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7299
7300         if (tracing_disabled)
7301                 return -EINVAL;
7302
7303         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7304                 return -EINVAL;
7305
7306         if (cnt > TRACE_BUF_SIZE)
7307                 cnt = TRACE_BUF_SIZE;
7308
7309         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7310
7311         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7312
7313         /* If less than "<faulted>", then make sure we can still add that */
7314         if (cnt < FAULTED_SIZE)
7315                 size += FAULTED_SIZE - cnt;
7316
7317         buffer = tr->array_buffer.buffer;
7318         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7319                                             tracing_gen_ctx());
7320         if (unlikely(!event))
7321                 /* Ring buffer disabled, return as if not open for write */
7322                 return -EBADF;
7323
7324         entry = ring_buffer_event_data(event);
7325         entry->ip = _THIS_IP_;
7326
7327         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7328         if (len) {
7329                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7330                 cnt = FAULTED_SIZE;
7331                 written = -EFAULT;
7332         } else
7333                 written = cnt;
7334
7335         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7336                 /* do not add \n before testing triggers, but add \0 */
7337                 entry->buf[cnt] = '\0';
7338                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7339         }
7340
7341         if (entry->buf[cnt - 1] != '\n') {
7342                 entry->buf[cnt] = '\n';
7343                 entry->buf[cnt + 1] = '\0';
7344         } else
7345                 entry->buf[cnt] = '\0';
7346
7347         if (static_branch_unlikely(&trace_marker_exports_enabled))
7348                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7349         __buffer_unlock_commit(buffer, event);
7350
7351         if (tt)
7352                 event_triggers_post_call(tr->trace_marker_file, tt);
7353
7354         return written;
7355 }
7356
7357 /* Limit it for now to 3K (including tag) */
7358 #define RAW_DATA_MAX_SIZE (1024*3)
7359
7360 static ssize_t
7361 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7362                                         size_t cnt, loff_t *fpos)
7363 {
7364         struct trace_array *tr = filp->private_data;
7365         struct ring_buffer_event *event;
7366         struct trace_buffer *buffer;
7367         struct raw_data_entry *entry;
7368         ssize_t written;
7369         int size;
7370         int len;
7371
7372 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7373
7374         if (tracing_disabled)
7375                 return -EINVAL;
7376
7377         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7378                 return -EINVAL;
7379
7380         /* The marker must at least have a tag id */
7381         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7382                 return -EINVAL;
7383
7384         if (cnt > TRACE_BUF_SIZE)
7385                 cnt = TRACE_BUF_SIZE;
7386
7387         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7388
7389         size = sizeof(*entry) + cnt;
7390         if (cnt < FAULT_SIZE_ID)
7391                 size += FAULT_SIZE_ID - cnt;
7392
7393         buffer = tr->array_buffer.buffer;
7394         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7395                                             tracing_gen_ctx());
7396         if (!event)
7397                 /* Ring buffer disabled, return as if not open for write */
7398                 return -EBADF;
7399
7400         entry = ring_buffer_event_data(event);
7401
7402         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7403         if (len) {
7404                 entry->id = -1;
7405                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7406                 written = -EFAULT;
7407         } else
7408                 written = cnt;
7409
7410         __buffer_unlock_commit(buffer, event);
7411
7412         return written;
7413 }
7414
7415 static int tracing_clock_show(struct seq_file *m, void *v)
7416 {
7417         struct trace_array *tr = m->private;
7418         int i;
7419
7420         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7421                 seq_printf(m,
7422                         "%s%s%s%s", i ? " " : "",
7423                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7424                         i == tr->clock_id ? "]" : "");
7425         seq_putc(m, '\n');
7426
7427         return 0;
7428 }
7429
7430 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7431 {
7432         int i;
7433
7434         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7435                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7436                         break;
7437         }
7438         if (i == ARRAY_SIZE(trace_clocks))
7439                 return -EINVAL;
7440
7441         mutex_lock(&trace_types_lock);
7442
7443         tr->clock_id = i;
7444
7445         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7446
7447         /*
7448          * New clock may not be consistent with the previous clock.
7449          * Reset the buffer so that it doesn't have incomparable timestamps.
7450          */
7451         tracing_reset_online_cpus(&tr->array_buffer);
7452
7453 #ifdef CONFIG_TRACER_MAX_TRACE
7454         if (tr->max_buffer.buffer)
7455                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7456         tracing_reset_online_cpus(&tr->max_buffer);
7457 #endif
7458
7459         mutex_unlock(&trace_types_lock);
7460
7461         return 0;
7462 }
7463
7464 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7465                                    size_t cnt, loff_t *fpos)
7466 {
7467         struct seq_file *m = filp->private_data;
7468         struct trace_array *tr = m->private;
7469         char buf[64];
7470         const char *clockstr;
7471         int ret;
7472
7473         if (cnt >= sizeof(buf))
7474                 return -EINVAL;
7475
7476         if (copy_from_user(buf, ubuf, cnt))
7477                 return -EFAULT;
7478
7479         buf[cnt] = 0;
7480
7481         clockstr = strstrip(buf);
7482
7483         ret = tracing_set_clock(tr, clockstr);
7484         if (ret)
7485                 return ret;
7486
7487         *fpos += cnt;
7488
7489         return cnt;
7490 }
7491
7492 static int tracing_clock_open(struct inode *inode, struct file *file)
7493 {
7494         struct trace_array *tr = inode->i_private;
7495         int ret;
7496
7497         ret = tracing_check_open_get_tr(tr);
7498         if (ret)
7499                 return ret;
7500
7501         ret = single_open(file, tracing_clock_show, inode->i_private);
7502         if (ret < 0)
7503                 trace_array_put(tr);
7504
7505         return ret;
7506 }
7507
7508 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7509 {
7510         struct trace_array *tr = m->private;
7511
7512         mutex_lock(&trace_types_lock);
7513
7514         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7515                 seq_puts(m, "delta [absolute]\n");
7516         else
7517                 seq_puts(m, "[delta] absolute\n");
7518
7519         mutex_unlock(&trace_types_lock);
7520
7521         return 0;
7522 }
7523
7524 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7525 {
7526         struct trace_array *tr = inode->i_private;
7527         int ret;
7528
7529         ret = tracing_check_open_get_tr(tr);
7530         if (ret)
7531                 return ret;
7532
7533         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7534         if (ret < 0)
7535                 trace_array_put(tr);
7536
7537         return ret;
7538 }
7539
7540 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7541 {
7542         if (rbe == this_cpu_read(trace_buffered_event))
7543                 return ring_buffer_time_stamp(buffer);
7544
7545         return ring_buffer_event_time_stamp(buffer, rbe);
7546 }
7547
7548 /*
7549  * Set or disable using the per CPU trace_buffer_event when possible.
7550  */
7551 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7552 {
7553         int ret = 0;
7554
7555         mutex_lock(&trace_types_lock);
7556
7557         if (set && tr->no_filter_buffering_ref++)
7558                 goto out;
7559
7560         if (!set) {
7561                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7562                         ret = -EINVAL;
7563                         goto out;
7564                 }
7565
7566                 --tr->no_filter_buffering_ref;
7567         }
7568  out:
7569         mutex_unlock(&trace_types_lock);
7570
7571         return ret;
7572 }
7573
7574 struct ftrace_buffer_info {
7575         struct trace_iterator   iter;
7576         void                    *spare;
7577         unsigned int            spare_cpu;
7578         unsigned int            read;
7579 };
7580
7581 #ifdef CONFIG_TRACER_SNAPSHOT
7582 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7583 {
7584         struct trace_array *tr = inode->i_private;
7585         struct trace_iterator *iter;
7586         struct seq_file *m;
7587         int ret;
7588
7589         ret = tracing_check_open_get_tr(tr);
7590         if (ret)
7591                 return ret;
7592
7593         if (file->f_mode & FMODE_READ) {
7594                 iter = __tracing_open(inode, file, true);
7595                 if (IS_ERR(iter))
7596                         ret = PTR_ERR(iter);
7597         } else {
7598                 /* Writes still need the seq_file to hold the private data */
7599                 ret = -ENOMEM;
7600                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7601                 if (!m)
7602                         goto out;
7603                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7604                 if (!iter) {
7605                         kfree(m);
7606                         goto out;
7607                 }
7608                 ret = 0;
7609
7610                 iter->tr = tr;
7611                 iter->array_buffer = &tr->max_buffer;
7612                 iter->cpu_file = tracing_get_cpu(inode);
7613                 m->private = iter;
7614                 file->private_data = m;
7615         }
7616 out:
7617         if (ret < 0)
7618                 trace_array_put(tr);
7619
7620         return ret;
7621 }
7622
7623 static void tracing_swap_cpu_buffer(void *tr)
7624 {
7625         update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7626 }
7627
7628 static ssize_t
7629 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7630                        loff_t *ppos)
7631 {
7632         struct seq_file *m = filp->private_data;
7633         struct trace_iterator *iter = m->private;
7634         struct trace_array *tr = iter->tr;
7635         unsigned long val;
7636         int ret;
7637
7638         ret = tracing_update_buffers();
7639         if (ret < 0)
7640                 return ret;
7641
7642         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7643         if (ret)
7644                 return ret;
7645
7646         mutex_lock(&trace_types_lock);
7647
7648         if (tr->current_trace->use_max_tr) {
7649                 ret = -EBUSY;
7650                 goto out;
7651         }
7652
7653         local_irq_disable();
7654         arch_spin_lock(&tr->max_lock);
7655         if (tr->cond_snapshot)
7656                 ret = -EBUSY;
7657         arch_spin_unlock(&tr->max_lock);
7658         local_irq_enable();
7659         if (ret)
7660                 goto out;
7661
7662         switch (val) {
7663         case 0:
7664                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7665                         ret = -EINVAL;
7666                         break;
7667                 }
7668                 if (tr->allocated_snapshot)
7669                         free_snapshot(tr);
7670                 break;
7671         case 1:
7672 /* Only allow per-cpu swap if the ring buffer supports it */
7673 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7674                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7675                         ret = -EINVAL;
7676                         break;
7677                 }
7678 #endif
7679                 if (tr->allocated_snapshot)
7680                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7681                                         &tr->array_buffer, iter->cpu_file);
7682                 else
7683                         ret = tracing_alloc_snapshot_instance(tr);
7684                 if (ret < 0)
7685                         break;
7686                 /* Now, we're going to swap */
7687                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7688                         local_irq_disable();
7689                         update_max_tr(tr, current, smp_processor_id(), NULL);
7690                         local_irq_enable();
7691                 } else {
7692                         smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7693                                                  (void *)tr, 1);
7694                 }
7695                 break;
7696         default:
7697                 if (tr->allocated_snapshot) {
7698                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7699                                 tracing_reset_online_cpus(&tr->max_buffer);
7700                         else
7701                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7702                 }
7703                 break;
7704         }
7705
7706         if (ret >= 0) {
7707                 *ppos += cnt;
7708                 ret = cnt;
7709         }
7710 out:
7711         mutex_unlock(&trace_types_lock);
7712         return ret;
7713 }
7714
7715 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7716 {
7717         struct seq_file *m = file->private_data;
7718         int ret;
7719
7720         ret = tracing_release(inode, file);
7721
7722         if (file->f_mode & FMODE_READ)
7723                 return ret;
7724
7725         /* If write only, the seq_file is just a stub */
7726         if (m)
7727                 kfree(m->private);
7728         kfree(m);
7729
7730         return 0;
7731 }
7732
7733 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7734 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7735                                     size_t count, loff_t *ppos);
7736 static int tracing_buffers_release(struct inode *inode, struct file *file);
7737 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7738                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7739
7740 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7741 {
7742         struct ftrace_buffer_info *info;
7743         int ret;
7744
7745         /* The following checks for tracefs lockdown */
7746         ret = tracing_buffers_open(inode, filp);
7747         if (ret < 0)
7748                 return ret;
7749
7750         info = filp->private_data;
7751
7752         if (info->iter.trace->use_max_tr) {
7753                 tracing_buffers_release(inode, filp);
7754                 return -EBUSY;
7755         }
7756
7757         info->iter.snapshot = true;
7758         info->iter.array_buffer = &info->iter.tr->max_buffer;
7759
7760         return ret;
7761 }
7762
7763 #endif /* CONFIG_TRACER_SNAPSHOT */
7764
7765
7766 static const struct file_operations tracing_thresh_fops = {
7767         .open           = tracing_open_generic,
7768         .read           = tracing_thresh_read,
7769         .write          = tracing_thresh_write,
7770         .llseek         = generic_file_llseek,
7771 };
7772
7773 #ifdef CONFIG_TRACER_MAX_TRACE
7774 static const struct file_operations tracing_max_lat_fops = {
7775         .open           = tracing_open_generic_tr,
7776         .read           = tracing_max_lat_read,
7777         .write          = tracing_max_lat_write,
7778         .llseek         = generic_file_llseek,
7779         .release        = tracing_release_generic_tr,
7780 };
7781 #endif
7782
7783 static const struct file_operations set_tracer_fops = {
7784         .open           = tracing_open_generic_tr,
7785         .read           = tracing_set_trace_read,
7786         .write          = tracing_set_trace_write,
7787         .llseek         = generic_file_llseek,
7788         .release        = tracing_release_generic_tr,
7789 };
7790
7791 static const struct file_operations tracing_pipe_fops = {
7792         .open           = tracing_open_pipe,
7793         .poll           = tracing_poll_pipe,
7794         .read           = tracing_read_pipe,
7795         .splice_read    = tracing_splice_read_pipe,
7796         .release        = tracing_release_pipe,
7797         .llseek         = no_llseek,
7798 };
7799
7800 static const struct file_operations tracing_entries_fops = {
7801         .open           = tracing_open_generic_tr,
7802         .read           = tracing_entries_read,
7803         .write          = tracing_entries_write,
7804         .llseek         = generic_file_llseek,
7805         .release        = tracing_release_generic_tr,
7806 };
7807
7808 static const struct file_operations tracing_total_entries_fops = {
7809         .open           = tracing_open_generic_tr,
7810         .read           = tracing_total_entries_read,
7811         .llseek         = generic_file_llseek,
7812         .release        = tracing_release_generic_tr,
7813 };
7814
7815 static const struct file_operations tracing_free_buffer_fops = {
7816         .open           = tracing_open_generic_tr,
7817         .write          = tracing_free_buffer_write,
7818         .release        = tracing_free_buffer_release,
7819 };
7820
7821 static const struct file_operations tracing_mark_fops = {
7822         .open           = tracing_mark_open,
7823         .write          = tracing_mark_write,
7824         .release        = tracing_release_generic_tr,
7825 };
7826
7827 static const struct file_operations tracing_mark_raw_fops = {
7828         .open           = tracing_mark_open,
7829         .write          = tracing_mark_raw_write,
7830         .release        = tracing_release_generic_tr,
7831 };
7832
7833 static const struct file_operations trace_clock_fops = {
7834         .open           = tracing_clock_open,
7835         .read           = seq_read,
7836         .llseek         = seq_lseek,
7837         .release        = tracing_single_release_tr,
7838         .write          = tracing_clock_write,
7839 };
7840
7841 static const struct file_operations trace_time_stamp_mode_fops = {
7842         .open           = tracing_time_stamp_mode_open,
7843         .read           = seq_read,
7844         .llseek         = seq_lseek,
7845         .release        = tracing_single_release_tr,
7846 };
7847
7848 #ifdef CONFIG_TRACER_SNAPSHOT
7849 static const struct file_operations snapshot_fops = {
7850         .open           = tracing_snapshot_open,
7851         .read           = seq_read,
7852         .write          = tracing_snapshot_write,
7853         .llseek         = tracing_lseek,
7854         .release        = tracing_snapshot_release,
7855 };
7856
7857 static const struct file_operations snapshot_raw_fops = {
7858         .open           = snapshot_raw_open,
7859         .read           = tracing_buffers_read,
7860         .release        = tracing_buffers_release,
7861         .splice_read    = tracing_buffers_splice_read,
7862         .llseek         = no_llseek,
7863 };
7864
7865 #endif /* CONFIG_TRACER_SNAPSHOT */
7866
7867 /*
7868  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7869  * @filp: The active open file structure
7870  * @ubuf: The userspace provided buffer to read value into
7871  * @cnt: The maximum number of bytes to read
7872  * @ppos: The current "file" position
7873  *
7874  * This function implements the write interface for a struct trace_min_max_param.
7875  * The filp->private_data must point to a trace_min_max_param structure that
7876  * defines where to write the value, the min and the max acceptable values,
7877  * and a lock to protect the write.
7878  */
7879 static ssize_t
7880 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7881 {
7882         struct trace_min_max_param *param = filp->private_data;
7883         u64 val;
7884         int err;
7885
7886         if (!param)
7887                 return -EFAULT;
7888
7889         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7890         if (err)
7891                 return err;
7892
7893         if (param->lock)
7894                 mutex_lock(param->lock);
7895
7896         if (param->min && val < *param->min)
7897                 err = -EINVAL;
7898
7899         if (param->max && val > *param->max)
7900                 err = -EINVAL;
7901
7902         if (!err)
7903                 *param->val = val;
7904
7905         if (param->lock)
7906                 mutex_unlock(param->lock);
7907
7908         if (err)
7909                 return err;
7910
7911         return cnt;
7912 }
7913
7914 /*
7915  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7916  * @filp: The active open file structure
7917  * @ubuf: The userspace provided buffer to read value into
7918  * @cnt: The maximum number of bytes to read
7919  * @ppos: The current "file" position
7920  *
7921  * This function implements the read interface for a struct trace_min_max_param.
7922  * The filp->private_data must point to a trace_min_max_param struct with valid
7923  * data.
7924  */
7925 static ssize_t
7926 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7927 {
7928         struct trace_min_max_param *param = filp->private_data;
7929         char buf[U64_STR_SIZE];
7930         int len;
7931         u64 val;
7932
7933         if (!param)
7934                 return -EFAULT;
7935
7936         val = *param->val;
7937
7938         if (cnt > sizeof(buf))
7939                 cnt = sizeof(buf);
7940
7941         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7942
7943         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7944 }
7945
7946 const struct file_operations trace_min_max_fops = {
7947         .open           = tracing_open_generic,
7948         .read           = trace_min_max_read,
7949         .write          = trace_min_max_write,
7950 };
7951
7952 #define TRACING_LOG_ERRS_MAX    8
7953 #define TRACING_LOG_LOC_MAX     128
7954
7955 #define CMD_PREFIX "  Command: "
7956
7957 struct err_info {
7958         const char      **errs; /* ptr to loc-specific array of err strings */
7959         u8              type;   /* index into errs -> specific err string */
7960         u16             pos;    /* caret position */
7961         u64             ts;
7962 };
7963
7964 struct tracing_log_err {
7965         struct list_head        list;
7966         struct err_info         info;
7967         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7968         char                    *cmd;                     /* what caused err */
7969 };
7970
7971 static DEFINE_MUTEX(tracing_err_log_lock);
7972
7973 static struct tracing_log_err *alloc_tracing_log_err(int len)
7974 {
7975         struct tracing_log_err *err;
7976
7977         err = kzalloc(sizeof(*err), GFP_KERNEL);
7978         if (!err)
7979                 return ERR_PTR(-ENOMEM);
7980
7981         err->cmd = kzalloc(len, GFP_KERNEL);
7982         if (!err->cmd) {
7983                 kfree(err);
7984                 return ERR_PTR(-ENOMEM);
7985         }
7986
7987         return err;
7988 }
7989
7990 static void free_tracing_log_err(struct tracing_log_err *err)
7991 {
7992         kfree(err->cmd);
7993         kfree(err);
7994 }
7995
7996 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7997                                                    int len)
7998 {
7999         struct tracing_log_err *err;
8000         char *cmd;
8001
8002         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8003                 err = alloc_tracing_log_err(len);
8004                 if (PTR_ERR(err) != -ENOMEM)
8005                         tr->n_err_log_entries++;
8006
8007                 return err;
8008         }
8009         cmd = kzalloc(len, GFP_KERNEL);
8010         if (!cmd)
8011                 return ERR_PTR(-ENOMEM);
8012         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8013         kfree(err->cmd);
8014         err->cmd = cmd;
8015         list_del(&err->list);
8016
8017         return err;
8018 }
8019
8020 /**
8021  * err_pos - find the position of a string within a command for error careting
8022  * @cmd: The tracing command that caused the error
8023  * @str: The string to position the caret at within @cmd
8024  *
8025  * Finds the position of the first occurrence of @str within @cmd.  The
8026  * return value can be passed to tracing_log_err() for caret placement
8027  * within @cmd.
8028  *
8029  * Returns the index within @cmd of the first occurrence of @str or 0
8030  * if @str was not found.
8031  */
8032 unsigned int err_pos(char *cmd, const char *str)
8033 {
8034         char *found;
8035
8036         if (WARN_ON(!strlen(cmd)))
8037                 return 0;
8038
8039         found = strstr(cmd, str);
8040         if (found)
8041                 return found - cmd;
8042
8043         return 0;
8044 }
8045
8046 /**
8047  * tracing_log_err - write an error to the tracing error log
8048  * @tr: The associated trace array for the error (NULL for top level array)
8049  * @loc: A string describing where the error occurred
8050  * @cmd: The tracing command that caused the error
8051  * @errs: The array of loc-specific static error strings
8052  * @type: The index into errs[], which produces the specific static err string
8053  * @pos: The position the caret should be placed in the cmd
8054  *
8055  * Writes an error into tracing/error_log of the form:
8056  *
8057  * <loc>: error: <text>
8058  *   Command: <cmd>
8059  *              ^
8060  *
8061  * tracing/error_log is a small log file containing the last
8062  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8063  * unless there has been a tracing error, and the error log can be
8064  * cleared and have its memory freed by writing the empty string in
8065  * truncation mode to it i.e. echo > tracing/error_log.
8066  *
8067  * NOTE: the @errs array along with the @type param are used to
8068  * produce a static error string - this string is not copied and saved
8069  * when the error is logged - only a pointer to it is saved.  See
8070  * existing callers for examples of how static strings are typically
8071  * defined for use with tracing_log_err().
8072  */
8073 void tracing_log_err(struct trace_array *tr,
8074                      const char *loc, const char *cmd,
8075                      const char **errs, u8 type, u16 pos)
8076 {
8077         struct tracing_log_err *err;
8078         int len = 0;
8079
8080         if (!tr)
8081                 tr = &global_trace;
8082
8083         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8084
8085         mutex_lock(&tracing_err_log_lock);
8086         err = get_tracing_log_err(tr, len);
8087         if (PTR_ERR(err) == -ENOMEM) {
8088                 mutex_unlock(&tracing_err_log_lock);
8089                 return;
8090         }
8091
8092         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8093         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8094
8095         err->info.errs = errs;
8096         err->info.type = type;
8097         err->info.pos = pos;
8098         err->info.ts = local_clock();
8099
8100         list_add_tail(&err->list, &tr->err_log);
8101         mutex_unlock(&tracing_err_log_lock);
8102 }
8103
8104 static void clear_tracing_err_log(struct trace_array *tr)
8105 {
8106         struct tracing_log_err *err, *next;
8107
8108         mutex_lock(&tracing_err_log_lock);
8109         list_for_each_entry_safe(err, next, &tr->err_log, list) {
8110                 list_del(&err->list);
8111                 free_tracing_log_err(err);
8112         }
8113
8114         tr->n_err_log_entries = 0;
8115         mutex_unlock(&tracing_err_log_lock);
8116 }
8117
8118 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8119 {
8120         struct trace_array *tr = m->private;
8121
8122         mutex_lock(&tracing_err_log_lock);
8123
8124         return seq_list_start(&tr->err_log, *pos);
8125 }
8126
8127 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8128 {
8129         struct trace_array *tr = m->private;
8130
8131         return seq_list_next(v, &tr->err_log, pos);
8132 }
8133
8134 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8135 {
8136         mutex_unlock(&tracing_err_log_lock);
8137 }
8138
8139 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8140 {
8141         u16 i;
8142
8143         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8144                 seq_putc(m, ' ');
8145         for (i = 0; i < pos; i++)
8146                 seq_putc(m, ' ');
8147         seq_puts(m, "^\n");
8148 }
8149
8150 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8151 {
8152         struct tracing_log_err *err = v;
8153
8154         if (err) {
8155                 const char *err_text = err->info.errs[err->info.type];
8156                 u64 sec = err->info.ts;
8157                 u32 nsec;
8158
8159                 nsec = do_div(sec, NSEC_PER_SEC);
8160                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8161                            err->loc, err_text);
8162                 seq_printf(m, "%s", err->cmd);
8163                 tracing_err_log_show_pos(m, err->info.pos);
8164         }
8165
8166         return 0;
8167 }
8168
8169 static const struct seq_operations tracing_err_log_seq_ops = {
8170         .start  = tracing_err_log_seq_start,
8171         .next   = tracing_err_log_seq_next,
8172         .stop   = tracing_err_log_seq_stop,
8173         .show   = tracing_err_log_seq_show
8174 };
8175
8176 static int tracing_err_log_open(struct inode *inode, struct file *file)
8177 {
8178         struct trace_array *tr = inode->i_private;
8179         int ret = 0;
8180
8181         ret = tracing_check_open_get_tr(tr);
8182         if (ret)
8183                 return ret;
8184
8185         /* If this file was opened for write, then erase contents */
8186         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8187                 clear_tracing_err_log(tr);
8188
8189         if (file->f_mode & FMODE_READ) {
8190                 ret = seq_open(file, &tracing_err_log_seq_ops);
8191                 if (!ret) {
8192                         struct seq_file *m = file->private_data;
8193                         m->private = tr;
8194                 } else {
8195                         trace_array_put(tr);
8196                 }
8197         }
8198         return ret;
8199 }
8200
8201 static ssize_t tracing_err_log_write(struct file *file,
8202                                      const char __user *buffer,
8203                                      size_t count, loff_t *ppos)
8204 {
8205         return count;
8206 }
8207
8208 static int tracing_err_log_release(struct inode *inode, struct file *file)
8209 {
8210         struct trace_array *tr = inode->i_private;
8211
8212         trace_array_put(tr);
8213
8214         if (file->f_mode & FMODE_READ)
8215                 seq_release(inode, file);
8216
8217         return 0;
8218 }
8219
8220 static const struct file_operations tracing_err_log_fops = {
8221         .open           = tracing_err_log_open,
8222         .write          = tracing_err_log_write,
8223         .read           = seq_read,
8224         .llseek         = tracing_lseek,
8225         .release        = tracing_err_log_release,
8226 };
8227
8228 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8229 {
8230         struct trace_array *tr = inode->i_private;
8231         struct ftrace_buffer_info *info;
8232         int ret;
8233
8234         ret = tracing_check_open_get_tr(tr);
8235         if (ret)
8236                 return ret;
8237
8238         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8239         if (!info) {
8240                 trace_array_put(tr);
8241                 return -ENOMEM;
8242         }
8243
8244         mutex_lock(&trace_types_lock);
8245
8246         info->iter.tr           = tr;
8247         info->iter.cpu_file     = tracing_get_cpu(inode);
8248         info->iter.trace        = tr->current_trace;
8249         info->iter.array_buffer = &tr->array_buffer;
8250         info->spare             = NULL;
8251         /* Force reading ring buffer for first read */
8252         info->read              = (unsigned int)-1;
8253
8254         filp->private_data = info;
8255
8256         tr->trace_ref++;
8257
8258         mutex_unlock(&trace_types_lock);
8259
8260         ret = nonseekable_open(inode, filp);
8261         if (ret < 0)
8262                 trace_array_put(tr);
8263
8264         return ret;
8265 }
8266
8267 static __poll_t
8268 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8269 {
8270         struct ftrace_buffer_info *info = filp->private_data;
8271         struct trace_iterator *iter = &info->iter;
8272
8273         return trace_poll(iter, filp, poll_table);
8274 }
8275
8276 static ssize_t
8277 tracing_buffers_read(struct file *filp, char __user *ubuf,
8278                      size_t count, loff_t *ppos)
8279 {
8280         struct ftrace_buffer_info *info = filp->private_data;
8281         struct trace_iterator *iter = &info->iter;
8282         ssize_t ret = 0;
8283         ssize_t size;
8284
8285         if (!count)
8286                 return 0;
8287
8288 #ifdef CONFIG_TRACER_MAX_TRACE
8289         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8290                 return -EBUSY;
8291 #endif
8292
8293         if (!info->spare) {
8294                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8295                                                           iter->cpu_file);
8296                 if (IS_ERR(info->spare)) {
8297                         ret = PTR_ERR(info->spare);
8298                         info->spare = NULL;
8299                 } else {
8300                         info->spare_cpu = iter->cpu_file;
8301                 }
8302         }
8303         if (!info->spare)
8304                 return ret;
8305
8306         /* Do we have previous read data to read? */
8307         if (info->read < PAGE_SIZE)
8308                 goto read;
8309
8310  again:
8311         trace_access_lock(iter->cpu_file);
8312         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8313                                     &info->spare,
8314                                     count,
8315                                     iter->cpu_file, 0);
8316         trace_access_unlock(iter->cpu_file);
8317
8318         if (ret < 0) {
8319                 if (trace_empty(iter)) {
8320                         if ((filp->f_flags & O_NONBLOCK))
8321                                 return -EAGAIN;
8322
8323                         ret = wait_on_pipe(iter, 0);
8324                         if (ret)
8325                                 return ret;
8326
8327                         goto again;
8328                 }
8329                 return 0;
8330         }
8331
8332         info->read = 0;
8333  read:
8334         size = PAGE_SIZE - info->read;
8335         if (size > count)
8336                 size = count;
8337
8338         ret = copy_to_user(ubuf, info->spare + info->read, size);
8339         if (ret == size)
8340                 return -EFAULT;
8341
8342         size -= ret;
8343
8344         *ppos += size;
8345         info->read += size;
8346
8347         return size;
8348 }
8349
8350 static int tracing_buffers_release(struct inode *inode, struct file *file)
8351 {
8352         struct ftrace_buffer_info *info = file->private_data;
8353         struct trace_iterator *iter = &info->iter;
8354
8355         mutex_lock(&trace_types_lock);
8356
8357         iter->tr->trace_ref--;
8358
8359         __trace_array_put(iter->tr);
8360
8361         iter->wait_index++;
8362         /* Make sure the waiters see the new wait_index */
8363         smp_wmb();
8364
8365         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8366
8367         if (info->spare)
8368                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8369                                            info->spare_cpu, info->spare);
8370         kvfree(info);
8371
8372         mutex_unlock(&trace_types_lock);
8373
8374         return 0;
8375 }
8376
8377 struct buffer_ref {
8378         struct trace_buffer     *buffer;
8379         void                    *page;
8380         int                     cpu;
8381         refcount_t              refcount;
8382 };
8383
8384 static void buffer_ref_release(struct buffer_ref *ref)
8385 {
8386         if (!refcount_dec_and_test(&ref->refcount))
8387                 return;
8388         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8389         kfree(ref);
8390 }
8391
8392 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8393                                     struct pipe_buffer *buf)
8394 {
8395         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8396
8397         buffer_ref_release(ref);
8398         buf->private = 0;
8399 }
8400
8401 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8402                                 struct pipe_buffer *buf)
8403 {
8404         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8405
8406         if (refcount_read(&ref->refcount) > INT_MAX/2)
8407                 return false;
8408
8409         refcount_inc(&ref->refcount);
8410         return true;
8411 }
8412
8413 /* Pipe buffer operations for a buffer. */
8414 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8415         .release                = buffer_pipe_buf_release,
8416         .get                    = buffer_pipe_buf_get,
8417 };
8418
8419 /*
8420  * Callback from splice_to_pipe(), if we need to release some pages
8421  * at the end of the spd in case we error'ed out in filling the pipe.
8422  */
8423 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8424 {
8425         struct buffer_ref *ref =
8426                 (struct buffer_ref *)spd->partial[i].private;
8427
8428         buffer_ref_release(ref);
8429         spd->partial[i].private = 0;
8430 }
8431
8432 static ssize_t
8433 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8434                             struct pipe_inode_info *pipe, size_t len,
8435                             unsigned int flags)
8436 {
8437         struct ftrace_buffer_info *info = file->private_data;
8438         struct trace_iterator *iter = &info->iter;
8439         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8440         struct page *pages_def[PIPE_DEF_BUFFERS];
8441         struct splice_pipe_desc spd = {
8442                 .pages          = pages_def,
8443                 .partial        = partial_def,
8444                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8445                 .ops            = &buffer_pipe_buf_ops,
8446                 .spd_release    = buffer_spd_release,
8447         };
8448         struct buffer_ref *ref;
8449         int entries, i;
8450         ssize_t ret = 0;
8451
8452 #ifdef CONFIG_TRACER_MAX_TRACE
8453         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8454                 return -EBUSY;
8455 #endif
8456
8457         if (*ppos & (PAGE_SIZE - 1))
8458                 return -EINVAL;
8459
8460         if (len & (PAGE_SIZE - 1)) {
8461                 if (len < PAGE_SIZE)
8462                         return -EINVAL;
8463                 len &= PAGE_MASK;
8464         }
8465
8466         if (splice_grow_spd(pipe, &spd))
8467                 return -ENOMEM;
8468
8469  again:
8470         trace_access_lock(iter->cpu_file);
8471         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8472
8473         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8474                 struct page *page;
8475                 int r;
8476
8477                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8478                 if (!ref) {
8479                         ret = -ENOMEM;
8480                         break;
8481                 }
8482
8483                 refcount_set(&ref->refcount, 1);
8484                 ref->buffer = iter->array_buffer->buffer;
8485                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8486                 if (IS_ERR(ref->page)) {
8487                         ret = PTR_ERR(ref->page);
8488                         ref->page = NULL;
8489                         kfree(ref);
8490                         break;
8491                 }
8492                 ref->cpu = iter->cpu_file;
8493
8494                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8495                                           len, iter->cpu_file, 1);
8496                 if (r < 0) {
8497                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8498                                                    ref->page);
8499                         kfree(ref);
8500                         break;
8501                 }
8502
8503                 page = virt_to_page(ref->page);
8504
8505                 spd.pages[i] = page;
8506                 spd.partial[i].len = PAGE_SIZE;
8507                 spd.partial[i].offset = 0;
8508                 spd.partial[i].private = (unsigned long)ref;
8509                 spd.nr_pages++;
8510                 *ppos += PAGE_SIZE;
8511
8512                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8513         }
8514
8515         trace_access_unlock(iter->cpu_file);
8516         spd.nr_pages = i;
8517
8518         /* did we read anything? */
8519         if (!spd.nr_pages) {
8520                 long wait_index;
8521
8522                 if (ret)
8523                         goto out;
8524
8525                 ret = -EAGAIN;
8526                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8527                         goto out;
8528
8529                 wait_index = READ_ONCE(iter->wait_index);
8530
8531                 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8532                 if (ret)
8533                         goto out;
8534
8535                 /* No need to wait after waking up when tracing is off */
8536                 if (!tracer_tracing_is_on(iter->tr))
8537                         goto out;
8538
8539                 /* Make sure we see the new wait_index */
8540                 smp_rmb();
8541                 if (wait_index != iter->wait_index)
8542                         goto out;
8543
8544                 goto again;
8545         }
8546
8547         ret = splice_to_pipe(pipe, &spd);
8548 out:
8549         splice_shrink_spd(&spd);
8550
8551         return ret;
8552 }
8553
8554 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8555 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8556 {
8557         struct ftrace_buffer_info *info = file->private_data;
8558         struct trace_iterator *iter = &info->iter;
8559
8560         if (cmd)
8561                 return -ENOIOCTLCMD;
8562
8563         mutex_lock(&trace_types_lock);
8564
8565         iter->wait_index++;
8566         /* Make sure the waiters see the new wait_index */
8567         smp_wmb();
8568
8569         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8570
8571         mutex_unlock(&trace_types_lock);
8572         return 0;
8573 }
8574
8575 static const struct file_operations tracing_buffers_fops = {
8576         .open           = tracing_buffers_open,
8577         .read           = tracing_buffers_read,
8578         .poll           = tracing_buffers_poll,
8579         .release        = tracing_buffers_release,
8580         .splice_read    = tracing_buffers_splice_read,
8581         .unlocked_ioctl = tracing_buffers_ioctl,
8582         .llseek         = no_llseek,
8583 };
8584
8585 static ssize_t
8586 tracing_stats_read(struct file *filp, char __user *ubuf,
8587                    size_t count, loff_t *ppos)
8588 {
8589         struct inode *inode = file_inode(filp);
8590         struct trace_array *tr = inode->i_private;
8591         struct array_buffer *trace_buf = &tr->array_buffer;
8592         int cpu = tracing_get_cpu(inode);
8593         struct trace_seq *s;
8594         unsigned long cnt;
8595         unsigned long long t;
8596         unsigned long usec_rem;
8597
8598         s = kmalloc(sizeof(*s), GFP_KERNEL);
8599         if (!s)
8600                 return -ENOMEM;
8601
8602         trace_seq_init(s);
8603
8604         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8605         trace_seq_printf(s, "entries: %ld\n", cnt);
8606
8607         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8608         trace_seq_printf(s, "overrun: %ld\n", cnt);
8609
8610         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8611         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8612
8613         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8614         trace_seq_printf(s, "bytes: %ld\n", cnt);
8615
8616         if (trace_clocks[tr->clock_id].in_ns) {
8617                 /* local or global for trace_clock */
8618                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8619                 usec_rem = do_div(t, USEC_PER_SEC);
8620                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8621                                                                 t, usec_rem);
8622
8623                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8624                 usec_rem = do_div(t, USEC_PER_SEC);
8625                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8626         } else {
8627                 /* counter or tsc mode for trace_clock */
8628                 trace_seq_printf(s, "oldest event ts: %llu\n",
8629                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8630
8631                 trace_seq_printf(s, "now ts: %llu\n",
8632                                 ring_buffer_time_stamp(trace_buf->buffer));
8633         }
8634
8635         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8636         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8637
8638         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8639         trace_seq_printf(s, "read events: %ld\n", cnt);
8640
8641         count = simple_read_from_buffer(ubuf, count, ppos,
8642                                         s->buffer, trace_seq_used(s));
8643
8644         kfree(s);
8645
8646         return count;
8647 }
8648
8649 static const struct file_operations tracing_stats_fops = {
8650         .open           = tracing_open_generic_tr,
8651         .read           = tracing_stats_read,
8652         .llseek         = generic_file_llseek,
8653         .release        = tracing_release_generic_tr,
8654 };
8655
8656 #ifdef CONFIG_DYNAMIC_FTRACE
8657
8658 static ssize_t
8659 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8660                   size_t cnt, loff_t *ppos)
8661 {
8662         ssize_t ret;
8663         char *buf;
8664         int r;
8665
8666         /* 256 should be plenty to hold the amount needed */
8667         buf = kmalloc(256, GFP_KERNEL);
8668         if (!buf)
8669                 return -ENOMEM;
8670
8671         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8672                       ftrace_update_tot_cnt,
8673                       ftrace_number_of_pages,
8674                       ftrace_number_of_groups);
8675
8676         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8677         kfree(buf);
8678         return ret;
8679 }
8680
8681 static const struct file_operations tracing_dyn_info_fops = {
8682         .open           = tracing_open_generic,
8683         .read           = tracing_read_dyn_info,
8684         .llseek         = generic_file_llseek,
8685 };
8686 #endif /* CONFIG_DYNAMIC_FTRACE */
8687
8688 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8689 static void
8690 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8691                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8692                 void *data)
8693 {
8694         tracing_snapshot_instance(tr);
8695 }
8696
8697 static void
8698 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8699                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8700                       void *data)
8701 {
8702         struct ftrace_func_mapper *mapper = data;
8703         long *count = NULL;
8704
8705         if (mapper)
8706                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8707
8708         if (count) {
8709
8710                 if (*count <= 0)
8711                         return;
8712
8713                 (*count)--;
8714         }
8715
8716         tracing_snapshot_instance(tr);
8717 }
8718
8719 static int
8720 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8721                       struct ftrace_probe_ops *ops, void *data)
8722 {
8723         struct ftrace_func_mapper *mapper = data;
8724         long *count = NULL;
8725
8726         seq_printf(m, "%ps:", (void *)ip);
8727
8728         seq_puts(m, "snapshot");
8729
8730         if (mapper)
8731                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8732
8733         if (count)
8734                 seq_printf(m, ":count=%ld\n", *count);
8735         else
8736                 seq_puts(m, ":unlimited\n");
8737
8738         return 0;
8739 }
8740
8741 static int
8742 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8743                      unsigned long ip, void *init_data, void **data)
8744 {
8745         struct ftrace_func_mapper *mapper = *data;
8746
8747         if (!mapper) {
8748                 mapper = allocate_ftrace_func_mapper();
8749                 if (!mapper)
8750                         return -ENOMEM;
8751                 *data = mapper;
8752         }
8753
8754         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8755 }
8756
8757 static void
8758 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8759                      unsigned long ip, void *data)
8760 {
8761         struct ftrace_func_mapper *mapper = data;
8762
8763         if (!ip) {
8764                 if (!mapper)
8765                         return;
8766                 free_ftrace_func_mapper(mapper, NULL);
8767                 return;
8768         }
8769
8770         ftrace_func_mapper_remove_ip(mapper, ip);
8771 }
8772
8773 static struct ftrace_probe_ops snapshot_probe_ops = {
8774         .func                   = ftrace_snapshot,
8775         .print                  = ftrace_snapshot_print,
8776 };
8777
8778 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8779         .func                   = ftrace_count_snapshot,
8780         .print                  = ftrace_snapshot_print,
8781         .init                   = ftrace_snapshot_init,
8782         .free                   = ftrace_snapshot_free,
8783 };
8784
8785 static int
8786 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8787                                char *glob, char *cmd, char *param, int enable)
8788 {
8789         struct ftrace_probe_ops *ops;
8790         void *count = (void *)-1;
8791         char *number;
8792         int ret;
8793
8794         if (!tr)
8795                 return -ENODEV;
8796
8797         /* hash funcs only work with set_ftrace_filter */
8798         if (!enable)
8799                 return -EINVAL;
8800
8801         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8802
8803         if (glob[0] == '!')
8804                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8805
8806         if (!param)
8807                 goto out_reg;
8808
8809         number = strsep(&param, ":");
8810
8811         if (!strlen(number))
8812                 goto out_reg;
8813
8814         /*
8815          * We use the callback data field (which is a pointer)
8816          * as our counter.
8817          */
8818         ret = kstrtoul(number, 0, (unsigned long *)&count);
8819         if (ret)
8820                 return ret;
8821
8822  out_reg:
8823         ret = tracing_alloc_snapshot_instance(tr);
8824         if (ret < 0)
8825                 goto out;
8826
8827         ret = register_ftrace_function_probe(glob, tr, ops, count);
8828
8829  out:
8830         return ret < 0 ? ret : 0;
8831 }
8832
8833 static struct ftrace_func_command ftrace_snapshot_cmd = {
8834         .name                   = "snapshot",
8835         .func                   = ftrace_trace_snapshot_callback,
8836 };
8837
8838 static __init int register_snapshot_cmd(void)
8839 {
8840         return register_ftrace_command(&ftrace_snapshot_cmd);
8841 }
8842 #else
8843 static inline __init int register_snapshot_cmd(void) { return 0; }
8844 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8845
8846 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8847 {
8848         if (WARN_ON(!tr->dir))
8849                 return ERR_PTR(-ENODEV);
8850
8851         /* Top directory uses NULL as the parent */
8852         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8853                 return NULL;
8854
8855         /* All sub buffers have a descriptor */
8856         return tr->dir;
8857 }
8858
8859 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8860 {
8861         struct dentry *d_tracer;
8862
8863         if (tr->percpu_dir)
8864                 return tr->percpu_dir;
8865
8866         d_tracer = tracing_get_dentry(tr);
8867         if (IS_ERR(d_tracer))
8868                 return NULL;
8869
8870         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8871
8872         MEM_FAIL(!tr->percpu_dir,
8873                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8874
8875         return tr->percpu_dir;
8876 }
8877
8878 static struct dentry *
8879 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8880                       void *data, long cpu, const struct file_operations *fops)
8881 {
8882         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8883
8884         if (ret) /* See tracing_get_cpu() */
8885                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8886         return ret;
8887 }
8888
8889 static void
8890 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8891 {
8892         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8893         struct dentry *d_cpu;
8894         char cpu_dir[30]; /* 30 characters should be more than enough */
8895
8896         if (!d_percpu)
8897                 return;
8898
8899         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8900         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8901         if (!d_cpu) {
8902                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8903                 return;
8904         }
8905
8906         /* per cpu trace_pipe */
8907         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8908                                 tr, cpu, &tracing_pipe_fops);
8909
8910         /* per cpu trace */
8911         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8912                                 tr, cpu, &tracing_fops);
8913
8914         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8915                                 tr, cpu, &tracing_buffers_fops);
8916
8917         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8918                                 tr, cpu, &tracing_stats_fops);
8919
8920         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8921                                 tr, cpu, &tracing_entries_fops);
8922
8923 #ifdef CONFIG_TRACER_SNAPSHOT
8924         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8925                                 tr, cpu, &snapshot_fops);
8926
8927         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8928                                 tr, cpu, &snapshot_raw_fops);
8929 #endif
8930 }
8931
8932 #ifdef CONFIG_FTRACE_SELFTEST
8933 /* Let selftest have access to static functions in this file */
8934 #include "trace_selftest.c"
8935 #endif
8936
8937 static ssize_t
8938 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8939                         loff_t *ppos)
8940 {
8941         struct trace_option_dentry *topt = filp->private_data;
8942         char *buf;
8943
8944         if (topt->flags->val & topt->opt->bit)
8945                 buf = "1\n";
8946         else
8947                 buf = "0\n";
8948
8949         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8950 }
8951
8952 static ssize_t
8953 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8954                          loff_t *ppos)
8955 {
8956         struct trace_option_dentry *topt = filp->private_data;
8957         unsigned long val;
8958         int ret;
8959
8960         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8961         if (ret)
8962                 return ret;
8963
8964         if (val != 0 && val != 1)
8965                 return -EINVAL;
8966
8967         if (!!(topt->flags->val & topt->opt->bit) != val) {
8968                 mutex_lock(&trace_types_lock);
8969                 ret = __set_tracer_option(topt->tr, topt->flags,
8970                                           topt->opt, !val);
8971                 mutex_unlock(&trace_types_lock);
8972                 if (ret)
8973                         return ret;
8974         }
8975
8976         *ppos += cnt;
8977
8978         return cnt;
8979 }
8980
8981 static int tracing_open_options(struct inode *inode, struct file *filp)
8982 {
8983         struct trace_option_dentry *topt = inode->i_private;
8984         int ret;
8985
8986         ret = tracing_check_open_get_tr(topt->tr);
8987         if (ret)
8988                 return ret;
8989
8990         filp->private_data = inode->i_private;
8991         return 0;
8992 }
8993
8994 static int tracing_release_options(struct inode *inode, struct file *file)
8995 {
8996         struct trace_option_dentry *topt = file->private_data;
8997
8998         trace_array_put(topt->tr);
8999         return 0;
9000 }
9001
9002 static const struct file_operations trace_options_fops = {
9003         .open = tracing_open_options,
9004         .read = trace_options_read,
9005         .write = trace_options_write,
9006         .llseek = generic_file_llseek,
9007         .release = tracing_release_options,
9008 };
9009
9010 /*
9011  * In order to pass in both the trace_array descriptor as well as the index
9012  * to the flag that the trace option file represents, the trace_array
9013  * has a character array of trace_flags_index[], which holds the index
9014  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9015  * The address of this character array is passed to the flag option file
9016  * read/write callbacks.
9017  *
9018  * In order to extract both the index and the trace_array descriptor,
9019  * get_tr_index() uses the following algorithm.
9020  *
9021  *   idx = *ptr;
9022  *
9023  * As the pointer itself contains the address of the index (remember
9024  * index[1] == 1).
9025  *
9026  * Then to get the trace_array descriptor, by subtracting that index
9027  * from the ptr, we get to the start of the index itself.
9028  *
9029  *   ptr - idx == &index[0]
9030  *
9031  * Then a simple container_of() from that pointer gets us to the
9032  * trace_array descriptor.
9033  */
9034 static void get_tr_index(void *data, struct trace_array **ptr,
9035                          unsigned int *pindex)
9036 {
9037         *pindex = *(unsigned char *)data;
9038
9039         *ptr = container_of(data - *pindex, struct trace_array,
9040                             trace_flags_index);
9041 }
9042
9043 static ssize_t
9044 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9045                         loff_t *ppos)
9046 {
9047         void *tr_index = filp->private_data;
9048         struct trace_array *tr;
9049         unsigned int index;
9050         char *buf;
9051
9052         get_tr_index(tr_index, &tr, &index);
9053
9054         if (tr->trace_flags & (1 << index))
9055                 buf = "1\n";
9056         else
9057                 buf = "0\n";
9058
9059         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9060 }
9061
9062 static ssize_t
9063 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9064                          loff_t *ppos)
9065 {
9066         void *tr_index = filp->private_data;
9067         struct trace_array *tr;
9068         unsigned int index;
9069         unsigned long val;
9070         int ret;
9071
9072         get_tr_index(tr_index, &tr, &index);
9073
9074         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9075         if (ret)
9076                 return ret;
9077
9078         if (val != 0 && val != 1)
9079                 return -EINVAL;
9080
9081         mutex_lock(&event_mutex);
9082         mutex_lock(&trace_types_lock);
9083         ret = set_tracer_flag(tr, 1 << index, val);
9084         mutex_unlock(&trace_types_lock);
9085         mutex_unlock(&event_mutex);
9086
9087         if (ret < 0)
9088                 return ret;
9089
9090         *ppos += cnt;
9091
9092         return cnt;
9093 }
9094
9095 static const struct file_operations trace_options_core_fops = {
9096         .open = tracing_open_generic,
9097         .read = trace_options_core_read,
9098         .write = trace_options_core_write,
9099         .llseek = generic_file_llseek,
9100 };
9101
9102 struct dentry *trace_create_file(const char *name,
9103                                  umode_t mode,
9104                                  struct dentry *parent,
9105                                  void *data,
9106                                  const struct file_operations *fops)
9107 {
9108         struct dentry *ret;
9109
9110         ret = tracefs_create_file(name, mode, parent, data, fops);
9111         if (!ret)
9112                 pr_warn("Could not create tracefs '%s' entry\n", name);
9113
9114         return ret;
9115 }
9116
9117
9118 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9119 {
9120         struct dentry *d_tracer;
9121
9122         if (tr->options)
9123                 return tr->options;
9124
9125         d_tracer = tracing_get_dentry(tr);
9126         if (IS_ERR(d_tracer))
9127                 return NULL;
9128
9129         tr->options = tracefs_create_dir("options", d_tracer);
9130         if (!tr->options) {
9131                 pr_warn("Could not create tracefs directory 'options'\n");
9132                 return NULL;
9133         }
9134
9135         return tr->options;
9136 }
9137
9138 static void
9139 create_trace_option_file(struct trace_array *tr,
9140                          struct trace_option_dentry *topt,
9141                          struct tracer_flags *flags,
9142                          struct tracer_opt *opt)
9143 {
9144         struct dentry *t_options;
9145
9146         t_options = trace_options_init_dentry(tr);
9147         if (!t_options)
9148                 return;
9149
9150         topt->flags = flags;
9151         topt->opt = opt;
9152         topt->tr = tr;
9153
9154         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9155                                         t_options, topt, &trace_options_fops);
9156
9157 }
9158
9159 static void
9160 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9161 {
9162         struct trace_option_dentry *topts;
9163         struct trace_options *tr_topts;
9164         struct tracer_flags *flags;
9165         struct tracer_opt *opts;
9166         int cnt;
9167         int i;
9168
9169         if (!tracer)
9170                 return;
9171
9172         flags = tracer->flags;
9173
9174         if (!flags || !flags->opts)
9175                 return;
9176
9177         /*
9178          * If this is an instance, only create flags for tracers
9179          * the instance may have.
9180          */
9181         if (!trace_ok_for_array(tracer, tr))
9182                 return;
9183
9184         for (i = 0; i < tr->nr_topts; i++) {
9185                 /* Make sure there's no duplicate flags. */
9186                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9187                         return;
9188         }
9189
9190         opts = flags->opts;
9191
9192         for (cnt = 0; opts[cnt].name; cnt++)
9193                 ;
9194
9195         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9196         if (!topts)
9197                 return;
9198
9199         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9200                             GFP_KERNEL);
9201         if (!tr_topts) {
9202                 kfree(topts);
9203                 return;
9204         }
9205
9206         tr->topts = tr_topts;
9207         tr->topts[tr->nr_topts].tracer = tracer;
9208         tr->topts[tr->nr_topts].topts = topts;
9209         tr->nr_topts++;
9210
9211         for (cnt = 0; opts[cnt].name; cnt++) {
9212                 create_trace_option_file(tr, &topts[cnt], flags,
9213                                          &opts[cnt]);
9214                 MEM_FAIL(topts[cnt].entry == NULL,
9215                           "Failed to create trace option: %s",
9216                           opts[cnt].name);
9217         }
9218 }
9219
9220 static struct dentry *
9221 create_trace_option_core_file(struct trace_array *tr,
9222                               const char *option, long index)
9223 {
9224         struct dentry *t_options;
9225
9226         t_options = trace_options_init_dentry(tr);
9227         if (!t_options)
9228                 return NULL;
9229
9230         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9231                                  (void *)&tr->trace_flags_index[index],
9232                                  &trace_options_core_fops);
9233 }
9234
9235 static void create_trace_options_dir(struct trace_array *tr)
9236 {
9237         struct dentry *t_options;
9238         bool top_level = tr == &global_trace;
9239         int i;
9240
9241         t_options = trace_options_init_dentry(tr);
9242         if (!t_options)
9243                 return;
9244
9245         for (i = 0; trace_options[i]; i++) {
9246                 if (top_level ||
9247                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9248                         create_trace_option_core_file(tr, trace_options[i], i);
9249         }
9250 }
9251
9252 static ssize_t
9253 rb_simple_read(struct file *filp, char __user *ubuf,
9254                size_t cnt, loff_t *ppos)
9255 {
9256         struct trace_array *tr = filp->private_data;
9257         char buf[64];
9258         int r;
9259
9260         r = tracer_tracing_is_on(tr);
9261         r = sprintf(buf, "%d\n", r);
9262
9263         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9264 }
9265
9266 static ssize_t
9267 rb_simple_write(struct file *filp, const char __user *ubuf,
9268                 size_t cnt, loff_t *ppos)
9269 {
9270         struct trace_array *tr = filp->private_data;
9271         struct trace_buffer *buffer = tr->array_buffer.buffer;
9272         unsigned long val;
9273         int ret;
9274
9275         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9276         if (ret)
9277                 return ret;
9278
9279         if (buffer) {
9280                 mutex_lock(&trace_types_lock);
9281                 if (!!val == tracer_tracing_is_on(tr)) {
9282                         val = 0; /* do nothing */
9283                 } else if (val) {
9284                         tracer_tracing_on(tr);
9285                         if (tr->current_trace->start)
9286                                 tr->current_trace->start(tr);
9287                 } else {
9288                         tracer_tracing_off(tr);
9289                         if (tr->current_trace->stop)
9290                                 tr->current_trace->stop(tr);
9291                         /* Wake up any waiters */
9292                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9293                 }
9294                 mutex_unlock(&trace_types_lock);
9295         }
9296
9297         (*ppos)++;
9298
9299         return cnt;
9300 }
9301
9302 static const struct file_operations rb_simple_fops = {
9303         .open           = tracing_open_generic_tr,
9304         .read           = rb_simple_read,
9305         .write          = rb_simple_write,
9306         .release        = tracing_release_generic_tr,
9307         .llseek         = default_llseek,
9308 };
9309
9310 static ssize_t
9311 buffer_percent_read(struct file *filp, char __user *ubuf,
9312                     size_t cnt, loff_t *ppos)
9313 {
9314         struct trace_array *tr = filp->private_data;
9315         char buf[64];
9316         int r;
9317
9318         r = tr->buffer_percent;
9319         r = sprintf(buf, "%d\n", r);
9320
9321         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9322 }
9323
9324 static ssize_t
9325 buffer_percent_write(struct file *filp, const char __user *ubuf,
9326                      size_t cnt, loff_t *ppos)
9327 {
9328         struct trace_array *tr = filp->private_data;
9329         unsigned long val;
9330         int ret;
9331
9332         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9333         if (ret)
9334                 return ret;
9335
9336         if (val > 100)
9337                 return -EINVAL;
9338
9339         tr->buffer_percent = val;
9340
9341         (*ppos)++;
9342
9343         return cnt;
9344 }
9345
9346 static const struct file_operations buffer_percent_fops = {
9347         .open           = tracing_open_generic_tr,
9348         .read           = buffer_percent_read,
9349         .write          = buffer_percent_write,
9350         .release        = tracing_release_generic_tr,
9351         .llseek         = default_llseek,
9352 };
9353
9354 static struct dentry *trace_instance_dir;
9355
9356 static void
9357 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9358
9359 static int
9360 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9361 {
9362         enum ring_buffer_flags rb_flags;
9363
9364         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9365
9366         buf->tr = tr;
9367
9368         buf->buffer = ring_buffer_alloc(size, rb_flags);
9369         if (!buf->buffer)
9370                 return -ENOMEM;
9371
9372         buf->data = alloc_percpu(struct trace_array_cpu);
9373         if (!buf->data) {
9374                 ring_buffer_free(buf->buffer);
9375                 buf->buffer = NULL;
9376                 return -ENOMEM;
9377         }
9378
9379         /* Allocate the first page for all buffers */
9380         set_buffer_entries(&tr->array_buffer,
9381                            ring_buffer_size(tr->array_buffer.buffer, 0));
9382
9383         return 0;
9384 }
9385
9386 static void free_trace_buffer(struct array_buffer *buf)
9387 {
9388         if (buf->buffer) {
9389                 ring_buffer_free(buf->buffer);
9390                 buf->buffer = NULL;
9391                 free_percpu(buf->data);
9392                 buf->data = NULL;
9393         }
9394 }
9395
9396 static int allocate_trace_buffers(struct trace_array *tr, int size)
9397 {
9398         int ret;
9399
9400         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9401         if (ret)
9402                 return ret;
9403
9404 #ifdef CONFIG_TRACER_MAX_TRACE
9405         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9406                                     allocate_snapshot ? size : 1);
9407         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9408                 free_trace_buffer(&tr->array_buffer);
9409                 return -ENOMEM;
9410         }
9411         tr->allocated_snapshot = allocate_snapshot;
9412
9413         allocate_snapshot = false;
9414 #endif
9415
9416         return 0;
9417 }
9418
9419 static void free_trace_buffers(struct trace_array *tr)
9420 {
9421         if (!tr)
9422                 return;
9423
9424         free_trace_buffer(&tr->array_buffer);
9425
9426 #ifdef CONFIG_TRACER_MAX_TRACE
9427         free_trace_buffer(&tr->max_buffer);
9428 #endif
9429 }
9430
9431 static void init_trace_flags_index(struct trace_array *tr)
9432 {
9433         int i;
9434
9435         /* Used by the trace options files */
9436         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9437                 tr->trace_flags_index[i] = i;
9438 }
9439
9440 static void __update_tracer_options(struct trace_array *tr)
9441 {
9442         struct tracer *t;
9443
9444         for (t = trace_types; t; t = t->next)
9445                 add_tracer_options(tr, t);
9446 }
9447
9448 static void update_tracer_options(struct trace_array *tr)
9449 {
9450         mutex_lock(&trace_types_lock);
9451         tracer_options_updated = true;
9452         __update_tracer_options(tr);
9453         mutex_unlock(&trace_types_lock);
9454 }
9455
9456 /* Must have trace_types_lock held */
9457 struct trace_array *trace_array_find(const char *instance)
9458 {
9459         struct trace_array *tr, *found = NULL;
9460
9461         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9462                 if (tr->name && strcmp(tr->name, instance) == 0) {
9463                         found = tr;
9464                         break;
9465                 }
9466         }
9467
9468         return found;
9469 }
9470
9471 struct trace_array *trace_array_find_get(const char *instance)
9472 {
9473         struct trace_array *tr;
9474
9475         mutex_lock(&trace_types_lock);
9476         tr = trace_array_find(instance);
9477         if (tr)
9478                 tr->ref++;
9479         mutex_unlock(&trace_types_lock);
9480
9481         return tr;
9482 }
9483
9484 static int trace_array_create_dir(struct trace_array *tr)
9485 {
9486         int ret;
9487
9488         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9489         if (!tr->dir)
9490                 return -EINVAL;
9491
9492         ret = event_trace_add_tracer(tr->dir, tr);
9493         if (ret) {
9494                 tracefs_remove(tr->dir);
9495                 return ret;
9496         }
9497
9498         init_tracer_tracefs(tr, tr->dir);
9499         __update_tracer_options(tr);
9500
9501         return ret;
9502 }
9503
9504 static struct trace_array *trace_array_create(const char *name)
9505 {
9506         struct trace_array *tr;
9507         int ret;
9508
9509         ret = -ENOMEM;
9510         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9511         if (!tr)
9512                 return ERR_PTR(ret);
9513
9514         tr->name = kstrdup(name, GFP_KERNEL);
9515         if (!tr->name)
9516                 goto out_free_tr;
9517
9518         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9519                 goto out_free_tr;
9520
9521         if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9522                 goto out_free_tr;
9523
9524         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9525
9526         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9527
9528         raw_spin_lock_init(&tr->start_lock);
9529
9530         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9531
9532         tr->current_trace = &nop_trace;
9533
9534         INIT_LIST_HEAD(&tr->systems);
9535         INIT_LIST_HEAD(&tr->events);
9536         INIT_LIST_HEAD(&tr->hist_vars);
9537         INIT_LIST_HEAD(&tr->err_log);
9538
9539         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9540                 goto out_free_tr;
9541
9542         if (ftrace_allocate_ftrace_ops(tr) < 0)
9543                 goto out_free_tr;
9544
9545         ftrace_init_trace_array(tr);
9546
9547         init_trace_flags_index(tr);
9548
9549         if (trace_instance_dir) {
9550                 ret = trace_array_create_dir(tr);
9551                 if (ret)
9552                         goto out_free_tr;
9553         } else
9554                 __trace_early_add_events(tr);
9555
9556         list_add(&tr->list, &ftrace_trace_arrays);
9557
9558         tr->ref++;
9559
9560         return tr;
9561
9562  out_free_tr:
9563         ftrace_free_ftrace_ops(tr);
9564         free_trace_buffers(tr);
9565         free_cpumask_var(tr->pipe_cpumask);
9566         free_cpumask_var(tr->tracing_cpumask);
9567         kfree(tr->name);
9568         kfree(tr);
9569
9570         return ERR_PTR(ret);
9571 }
9572
9573 static int instance_mkdir(const char *name)
9574 {
9575         struct trace_array *tr;
9576         int ret;
9577
9578         mutex_lock(&event_mutex);
9579         mutex_lock(&trace_types_lock);
9580
9581         ret = -EEXIST;
9582         if (trace_array_find(name))
9583                 goto out_unlock;
9584
9585         tr = trace_array_create(name);
9586
9587         ret = PTR_ERR_OR_ZERO(tr);
9588
9589 out_unlock:
9590         mutex_unlock(&trace_types_lock);
9591         mutex_unlock(&event_mutex);
9592         return ret;
9593 }
9594
9595 /**
9596  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9597  * @name: The name of the trace array to be looked up/created.
9598  *
9599  * Returns pointer to trace array with given name.
9600  * NULL, if it cannot be created.
9601  *
9602  * NOTE: This function increments the reference counter associated with the
9603  * trace array returned. This makes sure it cannot be freed while in use.
9604  * Use trace_array_put() once the trace array is no longer needed.
9605  * If the trace_array is to be freed, trace_array_destroy() needs to
9606  * be called after the trace_array_put(), or simply let user space delete
9607  * it from the tracefs instances directory. But until the
9608  * trace_array_put() is called, user space can not delete it.
9609  *
9610  */
9611 struct trace_array *trace_array_get_by_name(const char *name)
9612 {
9613         struct trace_array *tr;
9614
9615         mutex_lock(&event_mutex);
9616         mutex_lock(&trace_types_lock);
9617
9618         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9619                 if (tr->name && strcmp(tr->name, name) == 0)
9620                         goto out_unlock;
9621         }
9622
9623         tr = trace_array_create(name);
9624
9625         if (IS_ERR(tr))
9626                 tr = NULL;
9627 out_unlock:
9628         if (tr)
9629                 tr->ref++;
9630
9631         mutex_unlock(&trace_types_lock);
9632         mutex_unlock(&event_mutex);
9633         return tr;
9634 }
9635 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9636
9637 static int __remove_instance(struct trace_array *tr)
9638 {
9639         int i;
9640
9641         /* Reference counter for a newly created trace array = 1. */
9642         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9643                 return -EBUSY;
9644
9645         list_del(&tr->list);
9646
9647         /* Disable all the flags that were enabled coming in */
9648         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9649                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9650                         set_tracer_flag(tr, 1 << i, 0);
9651         }
9652
9653         tracing_set_nop(tr);
9654         clear_ftrace_function_probes(tr);
9655         event_trace_del_tracer(tr);
9656         ftrace_clear_pids(tr);
9657         ftrace_destroy_function_files(tr);
9658         tracefs_remove(tr->dir);
9659         free_percpu(tr->last_func_repeats);
9660         free_trace_buffers(tr);
9661         clear_tracing_err_log(tr);
9662
9663         for (i = 0; i < tr->nr_topts; i++) {
9664                 kfree(tr->topts[i].topts);
9665         }
9666         kfree(tr->topts);
9667
9668         free_cpumask_var(tr->pipe_cpumask);
9669         free_cpumask_var(tr->tracing_cpumask);
9670         kfree(tr->name);
9671         kfree(tr);
9672
9673         return 0;
9674 }
9675
9676 int trace_array_destroy(struct trace_array *this_tr)
9677 {
9678         struct trace_array *tr;
9679         int ret;
9680
9681         if (!this_tr)
9682                 return -EINVAL;
9683
9684         mutex_lock(&event_mutex);
9685         mutex_lock(&trace_types_lock);
9686
9687         ret = -ENODEV;
9688
9689         /* Making sure trace array exists before destroying it. */
9690         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9691                 if (tr == this_tr) {
9692                         ret = __remove_instance(tr);
9693                         break;
9694                 }
9695         }
9696
9697         mutex_unlock(&trace_types_lock);
9698         mutex_unlock(&event_mutex);
9699
9700         return ret;
9701 }
9702 EXPORT_SYMBOL_GPL(trace_array_destroy);
9703
9704 static int instance_rmdir(const char *name)
9705 {
9706         struct trace_array *tr;
9707         int ret;
9708
9709         mutex_lock(&event_mutex);
9710         mutex_lock(&trace_types_lock);
9711
9712         ret = -ENODEV;
9713         tr = trace_array_find(name);
9714         if (tr)
9715                 ret = __remove_instance(tr);
9716
9717         mutex_unlock(&trace_types_lock);
9718         mutex_unlock(&event_mutex);
9719
9720         return ret;
9721 }
9722
9723 static __init void create_trace_instances(struct dentry *d_tracer)
9724 {
9725         struct trace_array *tr;
9726
9727         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9728                                                          instance_mkdir,
9729                                                          instance_rmdir);
9730         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9731                 return;
9732
9733         mutex_lock(&event_mutex);
9734         mutex_lock(&trace_types_lock);
9735
9736         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9737                 if (!tr->name)
9738                         continue;
9739                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9740                              "Failed to create instance directory\n"))
9741                         break;
9742         }
9743
9744         mutex_unlock(&trace_types_lock);
9745         mutex_unlock(&event_mutex);
9746 }
9747
9748 static void
9749 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9750 {
9751         struct trace_event_file *file;
9752         int cpu;
9753
9754         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9755                         tr, &show_traces_fops);
9756
9757         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9758                         tr, &set_tracer_fops);
9759
9760         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9761                           tr, &tracing_cpumask_fops);
9762
9763         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9764                           tr, &tracing_iter_fops);
9765
9766         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9767                           tr, &tracing_fops);
9768
9769         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9770                           tr, &tracing_pipe_fops);
9771
9772         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9773                           tr, &tracing_entries_fops);
9774
9775         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9776                           tr, &tracing_total_entries_fops);
9777
9778         trace_create_file("free_buffer", 0200, d_tracer,
9779                           tr, &tracing_free_buffer_fops);
9780
9781         trace_create_file("trace_marker", 0220, d_tracer,
9782                           tr, &tracing_mark_fops);
9783
9784         file = __find_event_file(tr, "ftrace", "print");
9785         if (file && file->ef)
9786                 eventfs_add_file("trigger", TRACE_MODE_WRITE, file->ef,
9787                                   file, &event_trigger_fops);
9788         tr->trace_marker_file = file;
9789
9790         trace_create_file("trace_marker_raw", 0220, d_tracer,
9791                           tr, &tracing_mark_raw_fops);
9792
9793         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9794                           &trace_clock_fops);
9795
9796         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9797                           tr, &rb_simple_fops);
9798
9799         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9800                           &trace_time_stamp_mode_fops);
9801
9802         tr->buffer_percent = 50;
9803
9804         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9805                         tr, &buffer_percent_fops);
9806
9807         create_trace_options_dir(tr);
9808
9809 #ifdef CONFIG_TRACER_MAX_TRACE
9810         trace_create_maxlat_file(tr, d_tracer);
9811 #endif
9812
9813         if (ftrace_create_function_files(tr, d_tracer))
9814                 MEM_FAIL(1, "Could not allocate function filter files");
9815
9816 #ifdef CONFIG_TRACER_SNAPSHOT
9817         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9818                           tr, &snapshot_fops);
9819 #endif
9820
9821         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9822                           tr, &tracing_err_log_fops);
9823
9824         for_each_tracing_cpu(cpu)
9825                 tracing_init_tracefs_percpu(tr, cpu);
9826
9827         ftrace_init_tracefs(tr, d_tracer);
9828 }
9829
9830 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9831 {
9832         struct vfsmount *mnt;
9833         struct file_system_type *type;
9834
9835         /*
9836          * To maintain backward compatibility for tools that mount
9837          * debugfs to get to the tracing facility, tracefs is automatically
9838          * mounted to the debugfs/tracing directory.
9839          */
9840         type = get_fs_type("tracefs");
9841         if (!type)
9842                 return NULL;
9843         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9844         put_filesystem(type);
9845         if (IS_ERR(mnt))
9846                 return NULL;
9847         mntget(mnt);
9848
9849         return mnt;
9850 }
9851
9852 /**
9853  * tracing_init_dentry - initialize top level trace array
9854  *
9855  * This is called when creating files or directories in the tracing
9856  * directory. It is called via fs_initcall() by any of the boot up code
9857  * and expects to return the dentry of the top level tracing directory.
9858  */
9859 int tracing_init_dentry(void)
9860 {
9861         struct trace_array *tr = &global_trace;
9862
9863         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9864                 pr_warn("Tracing disabled due to lockdown\n");
9865                 return -EPERM;
9866         }
9867
9868         /* The top level trace array uses  NULL as parent */
9869         if (tr->dir)
9870                 return 0;
9871
9872         if (WARN_ON(!tracefs_initialized()))
9873                 return -ENODEV;
9874
9875         /*
9876          * As there may still be users that expect the tracing
9877          * files to exist in debugfs/tracing, we must automount
9878          * the tracefs file system there, so older tools still
9879          * work with the newer kernel.
9880          */
9881         tr->dir = debugfs_create_automount("tracing", NULL,
9882                                            trace_automount, NULL);
9883
9884         return 0;
9885 }
9886
9887 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9888 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9889
9890 static struct workqueue_struct *eval_map_wq __initdata;
9891 static struct work_struct eval_map_work __initdata;
9892 static struct work_struct tracerfs_init_work __initdata;
9893
9894 static void __init eval_map_work_func(struct work_struct *work)
9895 {
9896         int len;
9897
9898         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9899         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9900 }
9901
9902 static int __init trace_eval_init(void)
9903 {
9904         INIT_WORK(&eval_map_work, eval_map_work_func);
9905
9906         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9907         if (!eval_map_wq) {
9908                 pr_err("Unable to allocate eval_map_wq\n");
9909                 /* Do work here */
9910                 eval_map_work_func(&eval_map_work);
9911                 return -ENOMEM;
9912         }
9913
9914         queue_work(eval_map_wq, &eval_map_work);
9915         return 0;
9916 }
9917
9918 subsys_initcall(trace_eval_init);
9919
9920 static int __init trace_eval_sync(void)
9921 {
9922         /* Make sure the eval map updates are finished */
9923         if (eval_map_wq)
9924                 destroy_workqueue(eval_map_wq);
9925         return 0;
9926 }
9927
9928 late_initcall_sync(trace_eval_sync);
9929
9930
9931 #ifdef CONFIG_MODULES
9932 static void trace_module_add_evals(struct module *mod)
9933 {
9934         if (!mod->num_trace_evals)
9935                 return;
9936
9937         /*
9938          * Modules with bad taint do not have events created, do
9939          * not bother with enums either.
9940          */
9941         if (trace_module_has_bad_taint(mod))
9942                 return;
9943
9944         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9945 }
9946
9947 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9948 static void trace_module_remove_evals(struct module *mod)
9949 {
9950         union trace_eval_map_item *map;
9951         union trace_eval_map_item **last = &trace_eval_maps;
9952
9953         if (!mod->num_trace_evals)
9954                 return;
9955
9956         mutex_lock(&trace_eval_mutex);
9957
9958         map = trace_eval_maps;
9959
9960         while (map) {
9961                 if (map->head.mod == mod)
9962                         break;
9963                 map = trace_eval_jmp_to_tail(map);
9964                 last = &map->tail.next;
9965                 map = map->tail.next;
9966         }
9967         if (!map)
9968                 goto out;
9969
9970         *last = trace_eval_jmp_to_tail(map)->tail.next;
9971         kfree(map);
9972  out:
9973         mutex_unlock(&trace_eval_mutex);
9974 }
9975 #else
9976 static inline void trace_module_remove_evals(struct module *mod) { }
9977 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9978
9979 static int trace_module_notify(struct notifier_block *self,
9980                                unsigned long val, void *data)
9981 {
9982         struct module *mod = data;
9983
9984         switch (val) {
9985         case MODULE_STATE_COMING:
9986                 trace_module_add_evals(mod);
9987                 break;
9988         case MODULE_STATE_GOING:
9989                 trace_module_remove_evals(mod);
9990                 break;
9991         }
9992
9993         return NOTIFY_OK;
9994 }
9995
9996 static struct notifier_block trace_module_nb = {
9997         .notifier_call = trace_module_notify,
9998         .priority = 0,
9999 };
10000 #endif /* CONFIG_MODULES */
10001
10002 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10003 {
10004
10005         event_trace_init();
10006
10007         init_tracer_tracefs(&global_trace, NULL);
10008         ftrace_init_tracefs_toplevel(&global_trace, NULL);
10009
10010         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10011                         &global_trace, &tracing_thresh_fops);
10012
10013         trace_create_file("README", TRACE_MODE_READ, NULL,
10014                         NULL, &tracing_readme_fops);
10015
10016         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10017                         NULL, &tracing_saved_cmdlines_fops);
10018
10019         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10020                           NULL, &tracing_saved_cmdlines_size_fops);
10021
10022         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10023                         NULL, &tracing_saved_tgids_fops);
10024
10025         trace_create_eval_file(NULL);
10026
10027 #ifdef CONFIG_MODULES
10028         register_module_notifier(&trace_module_nb);
10029 #endif
10030
10031 #ifdef CONFIG_DYNAMIC_FTRACE
10032         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10033                         NULL, &tracing_dyn_info_fops);
10034 #endif
10035
10036         create_trace_instances(NULL);
10037
10038         update_tracer_options(&global_trace);
10039 }
10040
10041 static __init int tracer_init_tracefs(void)
10042 {
10043         int ret;
10044
10045         trace_access_lock_init();
10046
10047         ret = tracing_init_dentry();
10048         if (ret)
10049                 return 0;
10050
10051         if (eval_map_wq) {
10052                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10053                 queue_work(eval_map_wq, &tracerfs_init_work);
10054         } else {
10055                 tracer_init_tracefs_work_func(NULL);
10056         }
10057
10058         rv_init_interface();
10059
10060         return 0;
10061 }
10062
10063 fs_initcall(tracer_init_tracefs);
10064
10065 static int trace_die_panic_handler(struct notifier_block *self,
10066                                 unsigned long ev, void *unused);
10067
10068 static struct notifier_block trace_panic_notifier = {
10069         .notifier_call = trace_die_panic_handler,
10070         .priority = INT_MAX - 1,
10071 };
10072
10073 static struct notifier_block trace_die_notifier = {
10074         .notifier_call = trace_die_panic_handler,
10075         .priority = INT_MAX - 1,
10076 };
10077
10078 /*
10079  * The idea is to execute the following die/panic callback early, in order
10080  * to avoid showing irrelevant information in the trace (like other panic
10081  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10082  * warnings get disabled (to prevent potential log flooding).
10083  */
10084 static int trace_die_panic_handler(struct notifier_block *self,
10085                                 unsigned long ev, void *unused)
10086 {
10087         if (!ftrace_dump_on_oops)
10088                 return NOTIFY_DONE;
10089
10090         /* The die notifier requires DIE_OOPS to trigger */
10091         if (self == &trace_die_notifier && ev != DIE_OOPS)
10092                 return NOTIFY_DONE;
10093
10094         ftrace_dump(ftrace_dump_on_oops);
10095
10096         return NOTIFY_DONE;
10097 }
10098
10099 /*
10100  * printk is set to max of 1024, we really don't need it that big.
10101  * Nothing should be printing 1000 characters anyway.
10102  */
10103 #define TRACE_MAX_PRINT         1000
10104
10105 /*
10106  * Define here KERN_TRACE so that we have one place to modify
10107  * it if we decide to change what log level the ftrace dump
10108  * should be at.
10109  */
10110 #define KERN_TRACE              KERN_EMERG
10111
10112 void
10113 trace_printk_seq(struct trace_seq *s)
10114 {
10115         /* Probably should print a warning here. */
10116         if (s->seq.len >= TRACE_MAX_PRINT)
10117                 s->seq.len = TRACE_MAX_PRINT;
10118
10119         /*
10120          * More paranoid code. Although the buffer size is set to
10121          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10122          * an extra layer of protection.
10123          */
10124         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10125                 s->seq.len = s->seq.size - 1;
10126
10127         /* should be zero ended, but we are paranoid. */
10128         s->buffer[s->seq.len] = 0;
10129
10130         printk(KERN_TRACE "%s", s->buffer);
10131
10132         trace_seq_init(s);
10133 }
10134
10135 void trace_init_global_iter(struct trace_iterator *iter)
10136 {
10137         iter->tr = &global_trace;
10138         iter->trace = iter->tr->current_trace;
10139         iter->cpu_file = RING_BUFFER_ALL_CPUS;
10140         iter->array_buffer = &global_trace.array_buffer;
10141
10142         if (iter->trace && iter->trace->open)
10143                 iter->trace->open(iter);
10144
10145         /* Annotate start of buffers if we had overruns */
10146         if (ring_buffer_overruns(iter->array_buffer->buffer))
10147                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10148
10149         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10150         if (trace_clocks[iter->tr->clock_id].in_ns)
10151                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10152
10153         /* Can not use kmalloc for iter.temp and iter.fmt */
10154         iter->temp = static_temp_buf;
10155         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10156         iter->fmt = static_fmt_buf;
10157         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10158 }
10159
10160 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10161 {
10162         /* use static because iter can be a bit big for the stack */
10163         static struct trace_iterator iter;
10164         static atomic_t dump_running;
10165         struct trace_array *tr = &global_trace;
10166         unsigned int old_userobj;
10167         unsigned long flags;
10168         int cnt = 0, cpu;
10169
10170         /* Only allow one dump user at a time. */
10171         if (atomic_inc_return(&dump_running) != 1) {
10172                 atomic_dec(&dump_running);
10173                 return;
10174         }
10175
10176         /*
10177          * Always turn off tracing when we dump.
10178          * We don't need to show trace output of what happens
10179          * between multiple crashes.
10180          *
10181          * If the user does a sysrq-z, then they can re-enable
10182          * tracing with echo 1 > tracing_on.
10183          */
10184         tracing_off();
10185
10186         local_irq_save(flags);
10187
10188         /* Simulate the iterator */
10189         trace_init_global_iter(&iter);
10190
10191         for_each_tracing_cpu(cpu) {
10192                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10193         }
10194
10195         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10196
10197         /* don't look at user memory in panic mode */
10198         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10199
10200         switch (oops_dump_mode) {
10201         case DUMP_ALL:
10202                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10203                 break;
10204         case DUMP_ORIG:
10205                 iter.cpu_file = raw_smp_processor_id();
10206                 break;
10207         case DUMP_NONE:
10208                 goto out_enable;
10209         default:
10210                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10211                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10212         }
10213
10214         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10215
10216         /* Did function tracer already get disabled? */
10217         if (ftrace_is_dead()) {
10218                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10219                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10220         }
10221
10222         /*
10223          * We need to stop all tracing on all CPUS to read
10224          * the next buffer. This is a bit expensive, but is
10225          * not done often. We fill all what we can read,
10226          * and then release the locks again.
10227          */
10228
10229         while (!trace_empty(&iter)) {
10230
10231                 if (!cnt)
10232                         printk(KERN_TRACE "---------------------------------\n");
10233
10234                 cnt++;
10235
10236                 trace_iterator_reset(&iter);
10237                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10238
10239                 if (trace_find_next_entry_inc(&iter) != NULL) {
10240                         int ret;
10241
10242                         ret = print_trace_line(&iter);
10243                         if (ret != TRACE_TYPE_NO_CONSUME)
10244                                 trace_consume(&iter);
10245                 }
10246                 touch_nmi_watchdog();
10247
10248                 trace_printk_seq(&iter.seq);
10249         }
10250
10251         if (!cnt)
10252                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10253         else
10254                 printk(KERN_TRACE "---------------------------------\n");
10255
10256  out_enable:
10257         tr->trace_flags |= old_userobj;
10258
10259         for_each_tracing_cpu(cpu) {
10260                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10261         }
10262         atomic_dec(&dump_running);
10263         local_irq_restore(flags);
10264 }
10265 EXPORT_SYMBOL_GPL(ftrace_dump);
10266
10267 #define WRITE_BUFSIZE  4096
10268
10269 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10270                                 size_t count, loff_t *ppos,
10271                                 int (*createfn)(const char *))
10272 {
10273         char *kbuf, *buf, *tmp;
10274         int ret = 0;
10275         size_t done = 0;
10276         size_t size;
10277
10278         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10279         if (!kbuf)
10280                 return -ENOMEM;
10281
10282         while (done < count) {
10283                 size = count - done;
10284
10285                 if (size >= WRITE_BUFSIZE)
10286                         size = WRITE_BUFSIZE - 1;
10287
10288                 if (copy_from_user(kbuf, buffer + done, size)) {
10289                         ret = -EFAULT;
10290                         goto out;
10291                 }
10292                 kbuf[size] = '\0';
10293                 buf = kbuf;
10294                 do {
10295                         tmp = strchr(buf, '\n');
10296                         if (tmp) {
10297                                 *tmp = '\0';
10298                                 size = tmp - buf + 1;
10299                         } else {
10300                                 size = strlen(buf);
10301                                 if (done + size < count) {
10302                                         if (buf != kbuf)
10303                                                 break;
10304                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10305                                         pr_warn("Line length is too long: Should be less than %d\n",
10306                                                 WRITE_BUFSIZE - 2);
10307                                         ret = -EINVAL;
10308                                         goto out;
10309                                 }
10310                         }
10311                         done += size;
10312
10313                         /* Remove comments */
10314                         tmp = strchr(buf, '#');
10315
10316                         if (tmp)
10317                                 *tmp = '\0';
10318
10319                         ret = createfn(buf);
10320                         if (ret)
10321                                 goto out;
10322                         buf += size;
10323
10324                 } while (done < count);
10325         }
10326         ret = done;
10327
10328 out:
10329         kfree(kbuf);
10330
10331         return ret;
10332 }
10333
10334 #ifdef CONFIG_TRACER_MAX_TRACE
10335 __init static bool tr_needs_alloc_snapshot(const char *name)
10336 {
10337         char *test;
10338         int len = strlen(name);
10339         bool ret;
10340
10341         if (!boot_snapshot_index)
10342                 return false;
10343
10344         if (strncmp(name, boot_snapshot_info, len) == 0 &&
10345             boot_snapshot_info[len] == '\t')
10346                 return true;
10347
10348         test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10349         if (!test)
10350                 return false;
10351
10352         sprintf(test, "\t%s\t", name);
10353         ret = strstr(boot_snapshot_info, test) == NULL;
10354         kfree(test);
10355         return ret;
10356 }
10357
10358 __init static void do_allocate_snapshot(const char *name)
10359 {
10360         if (!tr_needs_alloc_snapshot(name))
10361                 return;
10362
10363         /*
10364          * When allocate_snapshot is set, the next call to
10365          * allocate_trace_buffers() (called by trace_array_get_by_name())
10366          * will allocate the snapshot buffer. That will alse clear
10367          * this flag.
10368          */
10369         allocate_snapshot = true;
10370 }
10371 #else
10372 static inline void do_allocate_snapshot(const char *name) { }
10373 #endif
10374
10375 __init static void enable_instances(void)
10376 {
10377         struct trace_array *tr;
10378         char *curr_str;
10379         char *str;
10380         char *tok;
10381
10382         /* A tab is always appended */
10383         boot_instance_info[boot_instance_index - 1] = '\0';
10384         str = boot_instance_info;
10385
10386         while ((curr_str = strsep(&str, "\t"))) {
10387
10388                 tok = strsep(&curr_str, ",");
10389
10390                 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10391                         do_allocate_snapshot(tok);
10392
10393                 tr = trace_array_get_by_name(tok);
10394                 if (!tr) {
10395                         pr_warn("Failed to create instance buffer %s\n", curr_str);
10396                         continue;
10397                 }
10398                 /* Allow user space to delete it */
10399                 trace_array_put(tr);
10400
10401                 while ((tok = strsep(&curr_str, ","))) {
10402                         early_enable_events(tr, tok, true);
10403                 }
10404         }
10405 }
10406
10407 __init static int tracer_alloc_buffers(void)
10408 {
10409         int ring_buf_size;
10410         int ret = -ENOMEM;
10411
10412
10413         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10414                 pr_warn("Tracing disabled due to lockdown\n");
10415                 return -EPERM;
10416         }
10417
10418         /*
10419          * Make sure we don't accidentally add more trace options
10420          * than we have bits for.
10421          */
10422         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10423
10424         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10425                 goto out;
10426
10427         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10428                 goto out_free_buffer_mask;
10429
10430         /* Only allocate trace_printk buffers if a trace_printk exists */
10431         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10432                 /* Must be called before global_trace.buffer is allocated */
10433                 trace_printk_init_buffers();
10434
10435         /* To save memory, keep the ring buffer size to its minimum */
10436         if (ring_buffer_expanded)
10437                 ring_buf_size = trace_buf_size;
10438         else
10439                 ring_buf_size = 1;
10440
10441         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10442         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10443
10444         raw_spin_lock_init(&global_trace.start_lock);
10445
10446         /*
10447          * The prepare callbacks allocates some memory for the ring buffer. We
10448          * don't free the buffer if the CPU goes down. If we were to free
10449          * the buffer, then the user would lose any trace that was in the
10450          * buffer. The memory will be removed once the "instance" is removed.
10451          */
10452         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10453                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10454                                       NULL);
10455         if (ret < 0)
10456                 goto out_free_cpumask;
10457         /* Used for event triggers */
10458         ret = -ENOMEM;
10459         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10460         if (!temp_buffer)
10461                 goto out_rm_hp_state;
10462
10463         if (trace_create_savedcmd() < 0)
10464                 goto out_free_temp_buffer;
10465
10466         if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10467                 goto out_free_savedcmd;
10468
10469         /* TODO: make the number of buffers hot pluggable with CPUS */
10470         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10471                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10472                 goto out_free_pipe_cpumask;
10473         }
10474         if (global_trace.buffer_disabled)
10475                 tracing_off();
10476
10477         if (trace_boot_clock) {
10478                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10479                 if (ret < 0)
10480                         pr_warn("Trace clock %s not defined, going back to default\n",
10481                                 trace_boot_clock);
10482         }
10483
10484         /*
10485          * register_tracer() might reference current_trace, so it
10486          * needs to be set before we register anything. This is
10487          * just a bootstrap of current_trace anyway.
10488          */
10489         global_trace.current_trace = &nop_trace;
10490
10491         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10492
10493         ftrace_init_global_array_ops(&global_trace);
10494
10495         init_trace_flags_index(&global_trace);
10496
10497         register_tracer(&nop_trace);
10498
10499         /* Function tracing may start here (via kernel command line) */
10500         init_function_trace();
10501
10502         /* All seems OK, enable tracing */
10503         tracing_disabled = 0;
10504
10505         atomic_notifier_chain_register(&panic_notifier_list,
10506                                        &trace_panic_notifier);
10507
10508         register_die_notifier(&trace_die_notifier);
10509
10510         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10511
10512         INIT_LIST_HEAD(&global_trace.systems);
10513         INIT_LIST_HEAD(&global_trace.events);
10514         INIT_LIST_HEAD(&global_trace.hist_vars);
10515         INIT_LIST_HEAD(&global_trace.err_log);
10516         list_add(&global_trace.list, &ftrace_trace_arrays);
10517
10518         apply_trace_boot_options();
10519
10520         register_snapshot_cmd();
10521
10522         test_can_verify();
10523
10524         return 0;
10525
10526 out_free_pipe_cpumask:
10527         free_cpumask_var(global_trace.pipe_cpumask);
10528 out_free_savedcmd:
10529         free_saved_cmdlines_buffer(savedcmd);
10530 out_free_temp_buffer:
10531         ring_buffer_free(temp_buffer);
10532 out_rm_hp_state:
10533         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10534 out_free_cpumask:
10535         free_cpumask_var(global_trace.tracing_cpumask);
10536 out_free_buffer_mask:
10537         free_cpumask_var(tracing_buffer_mask);
10538 out:
10539         return ret;
10540 }
10541
10542 void __init ftrace_boot_snapshot(void)
10543 {
10544 #ifdef CONFIG_TRACER_MAX_TRACE
10545         struct trace_array *tr;
10546
10547         if (!snapshot_at_boot)
10548                 return;
10549
10550         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10551                 if (!tr->allocated_snapshot)
10552                         continue;
10553
10554                 tracing_snapshot_instance(tr);
10555                 trace_array_puts(tr, "** Boot snapshot taken **\n");
10556         }
10557 #endif
10558 }
10559
10560 void __init early_trace_init(void)
10561 {
10562         if (tracepoint_printk) {
10563                 tracepoint_print_iter =
10564                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10565                 if (MEM_FAIL(!tracepoint_print_iter,
10566                              "Failed to allocate trace iterator\n"))
10567                         tracepoint_printk = 0;
10568                 else
10569                         static_key_enable(&tracepoint_printk_key.key);
10570         }
10571         tracer_alloc_buffers();
10572
10573         init_events();
10574 }
10575
10576 void __init trace_init(void)
10577 {
10578         trace_event_init();
10579
10580         if (boot_instance_index)
10581                 enable_instances();
10582 }
10583
10584 __init static void clear_boot_tracer(void)
10585 {
10586         /*
10587          * The default tracer at boot buffer is an init section.
10588          * This function is called in lateinit. If we did not
10589          * find the boot tracer, then clear it out, to prevent
10590          * later registration from accessing the buffer that is
10591          * about to be freed.
10592          */
10593         if (!default_bootup_tracer)
10594                 return;
10595
10596         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10597                default_bootup_tracer);
10598         default_bootup_tracer = NULL;
10599 }
10600
10601 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10602 __init static void tracing_set_default_clock(void)
10603 {
10604         /* sched_clock_stable() is determined in late_initcall */
10605         if (!trace_boot_clock && !sched_clock_stable()) {
10606                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10607                         pr_warn("Can not set tracing clock due to lockdown\n");
10608                         return;
10609                 }
10610
10611                 printk(KERN_WARNING
10612                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10613                        "If you want to keep using the local clock, then add:\n"
10614                        "  \"trace_clock=local\"\n"
10615                        "on the kernel command line\n");
10616                 tracing_set_clock(&global_trace, "global");
10617         }
10618 }
10619 #else
10620 static inline void tracing_set_default_clock(void) { }
10621 #endif
10622
10623 __init static int late_trace_init(void)
10624 {
10625         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10626                 static_key_disable(&tracepoint_printk_key.key);
10627                 tracepoint_printk = 0;
10628         }
10629
10630         tracing_set_default_clock();
10631         clear_boot_tracer();
10632         return 0;
10633 }
10634
10635 late_initcall_sync(late_trace_init);