36caff9a14523e6777de55ae6abd02e410785195
[platform/kernel/linux-starfive.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53
54 #include "trace.h"
55 #include "trace_output.h"
56
57 /*
58  * On boot up, the ring buffer is set to the minimum size, so that
59  * we do not waste memory on systems that are not using tracing.
60  */
61 bool ring_buffer_expanded;
62
63 #ifdef CONFIG_FTRACE_STARTUP_TEST
64 /*
65  * We need to change this state when a selftest is running.
66  * A selftest will lurk into the ring-buffer to count the
67  * entries inserted during the selftest although some concurrent
68  * insertions into the ring-buffer such as trace_printk could occurred
69  * at the same time, giving false positive or negative results.
70  */
71 static bool __read_mostly tracing_selftest_running;
72
73 /*
74  * If boot-time tracing including tracers/events via kernel cmdline
75  * is running, we do not want to run SELFTEST.
76  */
77 bool __read_mostly tracing_selftest_disabled;
78
79 void __init disable_tracing_selftest(const char *reason)
80 {
81         if (!tracing_selftest_disabled) {
82                 tracing_selftest_disabled = true;
83                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
84         }
85 }
86 #else
87 #define tracing_selftest_running        0
88 #define tracing_selftest_disabled       0
89 #endif
90
91 /* Pipe tracepoints to printk */
92 static struct trace_iterator *tracepoint_print_iter;
93 int tracepoint_printk;
94 static bool tracepoint_printk_stop_on_boot __initdata;
95 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
96
97 /* For tracers that don't implement custom flags */
98 static struct tracer_opt dummy_tracer_opt[] = {
99         { }
100 };
101
102 static int
103 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
104 {
105         return 0;
106 }
107
108 /*
109  * To prevent the comm cache from being overwritten when no
110  * tracing is active, only save the comm when a trace event
111  * occurred.
112  */
113 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
114
115 /*
116  * Kill all tracing for good (never come back).
117  * It is initialized to 1 but will turn to zero if the initialization
118  * of the tracer is successful. But that is the only place that sets
119  * this back to zero.
120  */
121 static int tracing_disabled = 1;
122
123 cpumask_var_t __read_mostly     tracing_buffer_mask;
124
125 /*
126  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127  *
128  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129  * is set, then ftrace_dump is called. This will output the contents
130  * of the ftrace buffers to the console.  This is very useful for
131  * capturing traces that lead to crashes and outputing it to a
132  * serial console.
133  *
134  * It is default off, but you can enable it with either specifying
135  * "ftrace_dump_on_oops" in the kernel command line, or setting
136  * /proc/sys/kernel/ftrace_dump_on_oops
137  * Set 1 if you want to dump buffers of all CPUs
138  * Set 2 if you want to dump the buffer of the CPU that triggered oops
139  */
140
141 enum ftrace_dump_mode ftrace_dump_on_oops;
142
143 /* When set, tracing will stop when a WARN*() is hit */
144 int __disable_trace_on_warning;
145
146 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
147 /* Map of enums to their values, for "eval_map" file */
148 struct trace_eval_map_head {
149         struct module                   *mod;
150         unsigned long                   length;
151 };
152
153 union trace_eval_map_item;
154
155 struct trace_eval_map_tail {
156         /*
157          * "end" is first and points to NULL as it must be different
158          * than "mod" or "eval_string"
159          */
160         union trace_eval_map_item       *next;
161         const char                      *end;   /* points to NULL */
162 };
163
164 static DEFINE_MUTEX(trace_eval_mutex);
165
166 /*
167  * The trace_eval_maps are saved in an array with two extra elements,
168  * one at the beginning, and one at the end. The beginning item contains
169  * the count of the saved maps (head.length), and the module they
170  * belong to if not built in (head.mod). The ending item contains a
171  * pointer to the next array of saved eval_map items.
172  */
173 union trace_eval_map_item {
174         struct trace_eval_map           map;
175         struct trace_eval_map_head      head;
176         struct trace_eval_map_tail      tail;
177 };
178
179 static union trace_eval_map_item *trace_eval_maps;
180 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
181
182 int tracing_set_tracer(struct trace_array *tr, const char *buf);
183 static void ftrace_trace_userstack(struct trace_array *tr,
184                                    struct trace_buffer *buffer,
185                                    unsigned int trace_ctx);
186
187 #define MAX_TRACER_SIZE         100
188 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
189 static char *default_bootup_tracer;
190
191 static bool allocate_snapshot;
192 static bool snapshot_at_boot;
193
194 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
195 static int boot_instance_index;
196
197 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
198 static int boot_snapshot_index;
199
200 static int __init set_cmdline_ftrace(char *str)
201 {
202         strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
203         default_bootup_tracer = bootup_tracer_buf;
204         /* We are using ftrace early, expand it */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("ftrace=", set_cmdline_ftrace);
209
210 static int __init set_ftrace_dump_on_oops(char *str)
211 {
212         if (*str++ != '=' || !*str || !strcmp("1", str)) {
213                 ftrace_dump_on_oops = DUMP_ALL;
214                 return 1;
215         }
216
217         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
218                 ftrace_dump_on_oops = DUMP_ORIG;
219                 return 1;
220         }
221
222         return 0;
223 }
224 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
225
226 static int __init stop_trace_on_warning(char *str)
227 {
228         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
229                 __disable_trace_on_warning = 1;
230         return 1;
231 }
232 __setup("traceoff_on_warning", stop_trace_on_warning);
233
234 static int __init boot_alloc_snapshot(char *str)
235 {
236         char *slot = boot_snapshot_info + boot_snapshot_index;
237         int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
238         int ret;
239
240         if (str[0] == '=') {
241                 str++;
242                 if (strlen(str) >= left)
243                         return -1;
244
245                 ret = snprintf(slot, left, "%s\t", str);
246                 boot_snapshot_index += ret;
247         } else {
248                 allocate_snapshot = true;
249                 /* We also need the main ring buffer expanded */
250                 ring_buffer_expanded = true;
251         }
252         return 1;
253 }
254 __setup("alloc_snapshot", boot_alloc_snapshot);
255
256
257 static int __init boot_snapshot(char *str)
258 {
259         snapshot_at_boot = true;
260         boot_alloc_snapshot(str);
261         return 1;
262 }
263 __setup("ftrace_boot_snapshot", boot_snapshot);
264
265
266 static int __init boot_instance(char *str)
267 {
268         char *slot = boot_instance_info + boot_instance_index;
269         int left = sizeof(boot_instance_info) - boot_instance_index;
270         int ret;
271
272         if (strlen(str) >= left)
273                 return -1;
274
275         ret = snprintf(slot, left, "%s\t", str);
276         boot_instance_index += ret;
277
278         return 1;
279 }
280 __setup("trace_instance=", boot_instance);
281
282
283 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
284
285 static int __init set_trace_boot_options(char *str)
286 {
287         strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
288         return 1;
289 }
290 __setup("trace_options=", set_trace_boot_options);
291
292 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
293 static char *trace_boot_clock __initdata;
294
295 static int __init set_trace_boot_clock(char *str)
296 {
297         strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
298         trace_boot_clock = trace_boot_clock_buf;
299         return 1;
300 }
301 __setup("trace_clock=", set_trace_boot_clock);
302
303 static int __init set_tracepoint_printk(char *str)
304 {
305         /* Ignore the "tp_printk_stop_on_boot" param */
306         if (*str == '_')
307                 return 0;
308
309         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
310                 tracepoint_printk = 1;
311         return 1;
312 }
313 __setup("tp_printk", set_tracepoint_printk);
314
315 static int __init set_tracepoint_printk_stop(char *str)
316 {
317         tracepoint_printk_stop_on_boot = true;
318         return 1;
319 }
320 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
321
322 unsigned long long ns2usecs(u64 nsec)
323 {
324         nsec += 500;
325         do_div(nsec, 1000);
326         return nsec;
327 }
328
329 static void
330 trace_process_export(struct trace_export *export,
331                struct ring_buffer_event *event, int flag)
332 {
333         struct trace_entry *entry;
334         unsigned int size = 0;
335
336         if (export->flags & flag) {
337                 entry = ring_buffer_event_data(event);
338                 size = ring_buffer_event_length(event);
339                 export->write(export, entry, size);
340         }
341 }
342
343 static DEFINE_MUTEX(ftrace_export_lock);
344
345 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
346
347 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
348 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
349 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
350
351 static inline void ftrace_exports_enable(struct trace_export *export)
352 {
353         if (export->flags & TRACE_EXPORT_FUNCTION)
354                 static_branch_inc(&trace_function_exports_enabled);
355
356         if (export->flags & TRACE_EXPORT_EVENT)
357                 static_branch_inc(&trace_event_exports_enabled);
358
359         if (export->flags & TRACE_EXPORT_MARKER)
360                 static_branch_inc(&trace_marker_exports_enabled);
361 }
362
363 static inline void ftrace_exports_disable(struct trace_export *export)
364 {
365         if (export->flags & TRACE_EXPORT_FUNCTION)
366                 static_branch_dec(&trace_function_exports_enabled);
367
368         if (export->flags & TRACE_EXPORT_EVENT)
369                 static_branch_dec(&trace_event_exports_enabled);
370
371         if (export->flags & TRACE_EXPORT_MARKER)
372                 static_branch_dec(&trace_marker_exports_enabled);
373 }
374
375 static void ftrace_exports(struct ring_buffer_event *event, int flag)
376 {
377         struct trace_export *export;
378
379         preempt_disable_notrace();
380
381         export = rcu_dereference_raw_check(ftrace_exports_list);
382         while (export) {
383                 trace_process_export(export, event, flag);
384                 export = rcu_dereference_raw_check(export->next);
385         }
386
387         preempt_enable_notrace();
388 }
389
390 static inline void
391 add_trace_export(struct trace_export **list, struct trace_export *export)
392 {
393         rcu_assign_pointer(export->next, *list);
394         /*
395          * We are entering export into the list but another
396          * CPU might be walking that list. We need to make sure
397          * the export->next pointer is valid before another CPU sees
398          * the export pointer included into the list.
399          */
400         rcu_assign_pointer(*list, export);
401 }
402
403 static inline int
404 rm_trace_export(struct trace_export **list, struct trace_export *export)
405 {
406         struct trace_export **p;
407
408         for (p = list; *p != NULL; p = &(*p)->next)
409                 if (*p == export)
410                         break;
411
412         if (*p != export)
413                 return -1;
414
415         rcu_assign_pointer(*p, (*p)->next);
416
417         return 0;
418 }
419
420 static inline void
421 add_ftrace_export(struct trace_export **list, struct trace_export *export)
422 {
423         ftrace_exports_enable(export);
424
425         add_trace_export(list, export);
426 }
427
428 static inline int
429 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
430 {
431         int ret;
432
433         ret = rm_trace_export(list, export);
434         ftrace_exports_disable(export);
435
436         return ret;
437 }
438
439 int register_ftrace_export(struct trace_export *export)
440 {
441         if (WARN_ON_ONCE(!export->write))
442                 return -1;
443
444         mutex_lock(&ftrace_export_lock);
445
446         add_ftrace_export(&ftrace_exports_list, export);
447
448         mutex_unlock(&ftrace_export_lock);
449
450         return 0;
451 }
452 EXPORT_SYMBOL_GPL(register_ftrace_export);
453
454 int unregister_ftrace_export(struct trace_export *export)
455 {
456         int ret;
457
458         mutex_lock(&ftrace_export_lock);
459
460         ret = rm_ftrace_export(&ftrace_exports_list, export);
461
462         mutex_unlock(&ftrace_export_lock);
463
464         return ret;
465 }
466 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
467
468 /* trace_flags holds trace_options default values */
469 #define TRACE_DEFAULT_FLAGS                                             \
470         (FUNCTION_DEFAULT_FLAGS |                                       \
471          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
472          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
473          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
474          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
475          TRACE_ITER_HASH_PTR)
476
477 /* trace_options that are only supported by global_trace */
478 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
479                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
480
481 /* trace_flags that are default zero for instances */
482 #define ZEROED_TRACE_FLAGS \
483         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
484
485 /*
486  * The global_trace is the descriptor that holds the top-level tracing
487  * buffers for the live tracing.
488  */
489 static struct trace_array global_trace = {
490         .trace_flags = TRACE_DEFAULT_FLAGS,
491 };
492
493 LIST_HEAD(ftrace_trace_arrays);
494
495 int trace_array_get(struct trace_array *this_tr)
496 {
497         struct trace_array *tr;
498         int ret = -ENODEV;
499
500         mutex_lock(&trace_types_lock);
501         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
502                 if (tr == this_tr) {
503                         tr->ref++;
504                         ret = 0;
505                         break;
506                 }
507         }
508         mutex_unlock(&trace_types_lock);
509
510         return ret;
511 }
512
513 static void __trace_array_put(struct trace_array *this_tr)
514 {
515         WARN_ON(!this_tr->ref);
516         this_tr->ref--;
517 }
518
519 /**
520  * trace_array_put - Decrement the reference counter for this trace array.
521  * @this_tr : pointer to the trace array
522  *
523  * NOTE: Use this when we no longer need the trace array returned by
524  * trace_array_get_by_name(). This ensures the trace array can be later
525  * destroyed.
526  *
527  */
528 void trace_array_put(struct trace_array *this_tr)
529 {
530         if (!this_tr)
531                 return;
532
533         mutex_lock(&trace_types_lock);
534         __trace_array_put(this_tr);
535         mutex_unlock(&trace_types_lock);
536 }
537 EXPORT_SYMBOL_GPL(trace_array_put);
538
539 int tracing_check_open_get_tr(struct trace_array *tr)
540 {
541         int ret;
542
543         ret = security_locked_down(LOCKDOWN_TRACEFS);
544         if (ret)
545                 return ret;
546
547         if (tracing_disabled)
548                 return -ENODEV;
549
550         if (tr && trace_array_get(tr) < 0)
551                 return -ENODEV;
552
553         return 0;
554 }
555
556 int call_filter_check_discard(struct trace_event_call *call, void *rec,
557                               struct trace_buffer *buffer,
558                               struct ring_buffer_event *event)
559 {
560         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
561             !filter_match_preds(call->filter, rec)) {
562                 __trace_event_discard_commit(buffer, event);
563                 return 1;
564         }
565
566         return 0;
567 }
568
569 /**
570  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
571  * @filtered_pids: The list of pids to check
572  * @search_pid: The PID to find in @filtered_pids
573  *
574  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
575  */
576 bool
577 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
578 {
579         return trace_pid_list_is_set(filtered_pids, search_pid);
580 }
581
582 /**
583  * trace_ignore_this_task - should a task be ignored for tracing
584  * @filtered_pids: The list of pids to check
585  * @filtered_no_pids: The list of pids not to be traced
586  * @task: The task that should be ignored if not filtered
587  *
588  * Checks if @task should be traced or not from @filtered_pids.
589  * Returns true if @task should *NOT* be traced.
590  * Returns false if @task should be traced.
591  */
592 bool
593 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
594                        struct trace_pid_list *filtered_no_pids,
595                        struct task_struct *task)
596 {
597         /*
598          * If filtered_no_pids is not empty, and the task's pid is listed
599          * in filtered_no_pids, then return true.
600          * Otherwise, if filtered_pids is empty, that means we can
601          * trace all tasks. If it has content, then only trace pids
602          * within filtered_pids.
603          */
604
605         return (filtered_pids &&
606                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
607                 (filtered_no_pids &&
608                  trace_find_filtered_pid(filtered_no_pids, task->pid));
609 }
610
611 /**
612  * trace_filter_add_remove_task - Add or remove a task from a pid_list
613  * @pid_list: The list to modify
614  * @self: The current task for fork or NULL for exit
615  * @task: The task to add or remove
616  *
617  * If adding a task, if @self is defined, the task is only added if @self
618  * is also included in @pid_list. This happens on fork and tasks should
619  * only be added when the parent is listed. If @self is NULL, then the
620  * @task pid will be removed from the list, which would happen on exit
621  * of a task.
622  */
623 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
624                                   struct task_struct *self,
625                                   struct task_struct *task)
626 {
627         if (!pid_list)
628                 return;
629
630         /* For forks, we only add if the forking task is listed */
631         if (self) {
632                 if (!trace_find_filtered_pid(pid_list, self->pid))
633                         return;
634         }
635
636         /* "self" is set for forks, and NULL for exits */
637         if (self)
638                 trace_pid_list_set(pid_list, task->pid);
639         else
640                 trace_pid_list_clear(pid_list, task->pid);
641 }
642
643 /**
644  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
645  * @pid_list: The pid list to show
646  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
647  * @pos: The position of the file
648  *
649  * This is used by the seq_file "next" operation to iterate the pids
650  * listed in a trace_pid_list structure.
651  *
652  * Returns the pid+1 as we want to display pid of zero, but NULL would
653  * stop the iteration.
654  */
655 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
656 {
657         long pid = (unsigned long)v;
658         unsigned int next;
659
660         (*pos)++;
661
662         /* pid already is +1 of the actual previous bit */
663         if (trace_pid_list_next(pid_list, pid, &next) < 0)
664                 return NULL;
665
666         pid = next;
667
668         /* Return pid + 1 to allow zero to be represented */
669         return (void *)(pid + 1);
670 }
671
672 /**
673  * trace_pid_start - Used for seq_file to start reading pid lists
674  * @pid_list: The pid list to show
675  * @pos: The position of the file
676  *
677  * This is used by seq_file "start" operation to start the iteration
678  * of listing pids.
679  *
680  * Returns the pid+1 as we want to display pid of zero, but NULL would
681  * stop the iteration.
682  */
683 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
684 {
685         unsigned long pid;
686         unsigned int first;
687         loff_t l = 0;
688
689         if (trace_pid_list_first(pid_list, &first) < 0)
690                 return NULL;
691
692         pid = first;
693
694         /* Return pid + 1 so that zero can be the exit value */
695         for (pid++; pid && l < *pos;
696              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
697                 ;
698         return (void *)pid;
699 }
700
701 /**
702  * trace_pid_show - show the current pid in seq_file processing
703  * @m: The seq_file structure to write into
704  * @v: A void pointer of the pid (+1) value to display
705  *
706  * Can be directly used by seq_file operations to display the current
707  * pid value.
708  */
709 int trace_pid_show(struct seq_file *m, void *v)
710 {
711         unsigned long pid = (unsigned long)v - 1;
712
713         seq_printf(m, "%lu\n", pid);
714         return 0;
715 }
716
717 /* 128 should be much more than enough */
718 #define PID_BUF_SIZE            127
719
720 int trace_pid_write(struct trace_pid_list *filtered_pids,
721                     struct trace_pid_list **new_pid_list,
722                     const char __user *ubuf, size_t cnt)
723 {
724         struct trace_pid_list *pid_list;
725         struct trace_parser parser;
726         unsigned long val;
727         int nr_pids = 0;
728         ssize_t read = 0;
729         ssize_t ret;
730         loff_t pos;
731         pid_t pid;
732
733         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
734                 return -ENOMEM;
735
736         /*
737          * Always recreate a new array. The write is an all or nothing
738          * operation. Always create a new array when adding new pids by
739          * the user. If the operation fails, then the current list is
740          * not modified.
741          */
742         pid_list = trace_pid_list_alloc();
743         if (!pid_list) {
744                 trace_parser_put(&parser);
745                 return -ENOMEM;
746         }
747
748         if (filtered_pids) {
749                 /* copy the current bits to the new max */
750                 ret = trace_pid_list_first(filtered_pids, &pid);
751                 while (!ret) {
752                         trace_pid_list_set(pid_list, pid);
753                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
754                         nr_pids++;
755                 }
756         }
757
758         ret = 0;
759         while (cnt > 0) {
760
761                 pos = 0;
762
763                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
764                 if (ret < 0)
765                         break;
766
767                 read += ret;
768                 ubuf += ret;
769                 cnt -= ret;
770
771                 if (!trace_parser_loaded(&parser))
772                         break;
773
774                 ret = -EINVAL;
775                 if (kstrtoul(parser.buffer, 0, &val))
776                         break;
777
778                 pid = (pid_t)val;
779
780                 if (trace_pid_list_set(pid_list, pid) < 0) {
781                         ret = -1;
782                         break;
783                 }
784                 nr_pids++;
785
786                 trace_parser_clear(&parser);
787                 ret = 0;
788         }
789         trace_parser_put(&parser);
790
791         if (ret < 0) {
792                 trace_pid_list_free(pid_list);
793                 return ret;
794         }
795
796         if (!nr_pids) {
797                 /* Cleared the list of pids */
798                 trace_pid_list_free(pid_list);
799                 pid_list = NULL;
800         }
801
802         *new_pid_list = pid_list;
803
804         return read;
805 }
806
807 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
808 {
809         u64 ts;
810
811         /* Early boot up does not have a buffer yet */
812         if (!buf->buffer)
813                 return trace_clock_local();
814
815         ts = ring_buffer_time_stamp(buf->buffer);
816         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
817
818         return ts;
819 }
820
821 u64 ftrace_now(int cpu)
822 {
823         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
824 }
825
826 /**
827  * tracing_is_enabled - Show if global_trace has been enabled
828  *
829  * Shows if the global trace has been enabled or not. It uses the
830  * mirror flag "buffer_disabled" to be used in fast paths such as for
831  * the irqsoff tracer. But it may be inaccurate due to races. If you
832  * need to know the accurate state, use tracing_is_on() which is a little
833  * slower, but accurate.
834  */
835 int tracing_is_enabled(void)
836 {
837         /*
838          * For quick access (irqsoff uses this in fast path), just
839          * return the mirror variable of the state of the ring buffer.
840          * It's a little racy, but we don't really care.
841          */
842         smp_rmb();
843         return !global_trace.buffer_disabled;
844 }
845
846 /*
847  * trace_buf_size is the size in bytes that is allocated
848  * for a buffer. Note, the number of bytes is always rounded
849  * to page size.
850  *
851  * This number is purposely set to a low number of 16384.
852  * If the dump on oops happens, it will be much appreciated
853  * to not have to wait for all that output. Anyway this can be
854  * boot time and run time configurable.
855  */
856 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
857
858 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
859
860 /* trace_types holds a link list of available tracers. */
861 static struct tracer            *trace_types __read_mostly;
862
863 /*
864  * trace_types_lock is used to protect the trace_types list.
865  */
866 DEFINE_MUTEX(trace_types_lock);
867
868 /*
869  * serialize the access of the ring buffer
870  *
871  * ring buffer serializes readers, but it is low level protection.
872  * The validity of the events (which returns by ring_buffer_peek() ..etc)
873  * are not protected by ring buffer.
874  *
875  * The content of events may become garbage if we allow other process consumes
876  * these events concurrently:
877  *   A) the page of the consumed events may become a normal page
878  *      (not reader page) in ring buffer, and this page will be rewritten
879  *      by events producer.
880  *   B) The page of the consumed events may become a page for splice_read,
881  *      and this page will be returned to system.
882  *
883  * These primitives allow multi process access to different cpu ring buffer
884  * concurrently.
885  *
886  * These primitives don't distinguish read-only and read-consume access.
887  * Multi read-only access are also serialized.
888  */
889
890 #ifdef CONFIG_SMP
891 static DECLARE_RWSEM(all_cpu_access_lock);
892 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
893
894 static inline void trace_access_lock(int cpu)
895 {
896         if (cpu == RING_BUFFER_ALL_CPUS) {
897                 /* gain it for accessing the whole ring buffer. */
898                 down_write(&all_cpu_access_lock);
899         } else {
900                 /* gain it for accessing a cpu ring buffer. */
901
902                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
903                 down_read(&all_cpu_access_lock);
904
905                 /* Secondly block other access to this @cpu ring buffer. */
906                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
907         }
908 }
909
910 static inline void trace_access_unlock(int cpu)
911 {
912         if (cpu == RING_BUFFER_ALL_CPUS) {
913                 up_write(&all_cpu_access_lock);
914         } else {
915                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
916                 up_read(&all_cpu_access_lock);
917         }
918 }
919
920 static inline void trace_access_lock_init(void)
921 {
922         int cpu;
923
924         for_each_possible_cpu(cpu)
925                 mutex_init(&per_cpu(cpu_access_lock, cpu));
926 }
927
928 #else
929
930 static DEFINE_MUTEX(access_lock);
931
932 static inline void trace_access_lock(int cpu)
933 {
934         (void)cpu;
935         mutex_lock(&access_lock);
936 }
937
938 static inline void trace_access_unlock(int cpu)
939 {
940         (void)cpu;
941         mutex_unlock(&access_lock);
942 }
943
944 static inline void trace_access_lock_init(void)
945 {
946 }
947
948 #endif
949
950 #ifdef CONFIG_STACKTRACE
951 static void __ftrace_trace_stack(struct trace_buffer *buffer,
952                                  unsigned int trace_ctx,
953                                  int skip, struct pt_regs *regs);
954 static inline void ftrace_trace_stack(struct trace_array *tr,
955                                       struct trace_buffer *buffer,
956                                       unsigned int trace_ctx,
957                                       int skip, struct pt_regs *regs);
958
959 #else
960 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
961                                         unsigned int trace_ctx,
962                                         int skip, struct pt_regs *regs)
963 {
964 }
965 static inline void ftrace_trace_stack(struct trace_array *tr,
966                                       struct trace_buffer *buffer,
967                                       unsigned long trace_ctx,
968                                       int skip, struct pt_regs *regs)
969 {
970 }
971
972 #endif
973
974 static __always_inline void
975 trace_event_setup(struct ring_buffer_event *event,
976                   int type, unsigned int trace_ctx)
977 {
978         struct trace_entry *ent = ring_buffer_event_data(event);
979
980         tracing_generic_entry_update(ent, type, trace_ctx);
981 }
982
983 static __always_inline struct ring_buffer_event *
984 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
985                           int type,
986                           unsigned long len,
987                           unsigned int trace_ctx)
988 {
989         struct ring_buffer_event *event;
990
991         event = ring_buffer_lock_reserve(buffer, len);
992         if (event != NULL)
993                 trace_event_setup(event, type, trace_ctx);
994
995         return event;
996 }
997
998 void tracer_tracing_on(struct trace_array *tr)
999 {
1000         if (tr->array_buffer.buffer)
1001                 ring_buffer_record_on(tr->array_buffer.buffer);
1002         /*
1003          * This flag is looked at when buffers haven't been allocated
1004          * yet, or by some tracers (like irqsoff), that just want to
1005          * know if the ring buffer has been disabled, but it can handle
1006          * races of where it gets disabled but we still do a record.
1007          * As the check is in the fast path of the tracers, it is more
1008          * important to be fast than accurate.
1009          */
1010         tr->buffer_disabled = 0;
1011         /* Make the flag seen by readers */
1012         smp_wmb();
1013 }
1014
1015 /**
1016  * tracing_on - enable tracing buffers
1017  *
1018  * This function enables tracing buffers that may have been
1019  * disabled with tracing_off.
1020  */
1021 void tracing_on(void)
1022 {
1023         tracer_tracing_on(&global_trace);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_on);
1026
1027
1028 static __always_inline void
1029 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1030 {
1031         __this_cpu_write(trace_taskinfo_save, true);
1032
1033         /* If this is the temp buffer, we need to commit fully */
1034         if (this_cpu_read(trace_buffered_event) == event) {
1035                 /* Length is in event->array[0] */
1036                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1037                 /* Release the temp buffer */
1038                 this_cpu_dec(trace_buffered_event_cnt);
1039                 /* ring_buffer_unlock_commit() enables preemption */
1040                 preempt_enable_notrace();
1041         } else
1042                 ring_buffer_unlock_commit(buffer);
1043 }
1044
1045 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1046                        const char *str, int size)
1047 {
1048         struct ring_buffer_event *event;
1049         struct trace_buffer *buffer;
1050         struct print_entry *entry;
1051         unsigned int trace_ctx;
1052         int alloc;
1053
1054         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1055                 return 0;
1056
1057         if (unlikely(tracing_selftest_running && tr == &global_trace))
1058                 return 0;
1059
1060         if (unlikely(tracing_disabled))
1061                 return 0;
1062
1063         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1064
1065         trace_ctx = tracing_gen_ctx();
1066         buffer = tr->array_buffer.buffer;
1067         ring_buffer_nest_start(buffer);
1068         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1069                                             trace_ctx);
1070         if (!event) {
1071                 size = 0;
1072                 goto out;
1073         }
1074
1075         entry = ring_buffer_event_data(event);
1076         entry->ip = ip;
1077
1078         memcpy(&entry->buf, str, size);
1079
1080         /* Add a newline if necessary */
1081         if (entry->buf[size - 1] != '\n') {
1082                 entry->buf[size] = '\n';
1083                 entry->buf[size + 1] = '\0';
1084         } else
1085                 entry->buf[size] = '\0';
1086
1087         __buffer_unlock_commit(buffer, event);
1088         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1089  out:
1090         ring_buffer_nest_end(buffer);
1091         return size;
1092 }
1093 EXPORT_SYMBOL_GPL(__trace_array_puts);
1094
1095 /**
1096  * __trace_puts - write a constant string into the trace buffer.
1097  * @ip:    The address of the caller
1098  * @str:   The constant string to write
1099  * @size:  The size of the string.
1100  */
1101 int __trace_puts(unsigned long ip, const char *str, int size)
1102 {
1103         return __trace_array_puts(&global_trace, ip, str, size);
1104 }
1105 EXPORT_SYMBOL_GPL(__trace_puts);
1106
1107 /**
1108  * __trace_bputs - write the pointer to a constant string into trace buffer
1109  * @ip:    The address of the caller
1110  * @str:   The constant string to write to the buffer to
1111  */
1112 int __trace_bputs(unsigned long ip, const char *str)
1113 {
1114         struct ring_buffer_event *event;
1115         struct trace_buffer *buffer;
1116         struct bputs_entry *entry;
1117         unsigned int trace_ctx;
1118         int size = sizeof(struct bputs_entry);
1119         int ret = 0;
1120
1121         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1122                 return 0;
1123
1124         if (unlikely(tracing_selftest_running || tracing_disabled))
1125                 return 0;
1126
1127         trace_ctx = tracing_gen_ctx();
1128         buffer = global_trace.array_buffer.buffer;
1129
1130         ring_buffer_nest_start(buffer);
1131         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1132                                             trace_ctx);
1133         if (!event)
1134                 goto out;
1135
1136         entry = ring_buffer_event_data(event);
1137         entry->ip                       = ip;
1138         entry->str                      = str;
1139
1140         __buffer_unlock_commit(buffer, event);
1141         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1142
1143         ret = 1;
1144  out:
1145         ring_buffer_nest_end(buffer);
1146         return ret;
1147 }
1148 EXPORT_SYMBOL_GPL(__trace_bputs);
1149
1150 #ifdef CONFIG_TRACER_SNAPSHOT
1151 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1152                                            void *cond_data)
1153 {
1154         struct tracer *tracer = tr->current_trace;
1155         unsigned long flags;
1156
1157         if (in_nmi()) {
1158                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1159                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1160                 return;
1161         }
1162
1163         if (!tr->allocated_snapshot) {
1164                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1165                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1166                 tracer_tracing_off(tr);
1167                 return;
1168         }
1169
1170         /* Note, snapshot can not be used when the tracer uses it */
1171         if (tracer->use_max_tr) {
1172                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1173                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1174                 return;
1175         }
1176
1177         local_irq_save(flags);
1178         update_max_tr(tr, current, smp_processor_id(), cond_data);
1179         local_irq_restore(flags);
1180 }
1181
1182 void tracing_snapshot_instance(struct trace_array *tr)
1183 {
1184         tracing_snapshot_instance_cond(tr, NULL);
1185 }
1186
1187 /**
1188  * tracing_snapshot - take a snapshot of the current buffer.
1189  *
1190  * This causes a swap between the snapshot buffer and the current live
1191  * tracing buffer. You can use this to take snapshots of the live
1192  * trace when some condition is triggered, but continue to trace.
1193  *
1194  * Note, make sure to allocate the snapshot with either
1195  * a tracing_snapshot_alloc(), or by doing it manually
1196  * with: echo 1 > /sys/kernel/tracing/snapshot
1197  *
1198  * If the snapshot buffer is not allocated, it will stop tracing.
1199  * Basically making a permanent snapshot.
1200  */
1201 void tracing_snapshot(void)
1202 {
1203         struct trace_array *tr = &global_trace;
1204
1205         tracing_snapshot_instance(tr);
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_snapshot);
1208
1209 /**
1210  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1211  * @tr:         The tracing instance to snapshot
1212  * @cond_data:  The data to be tested conditionally, and possibly saved
1213  *
1214  * This is the same as tracing_snapshot() except that the snapshot is
1215  * conditional - the snapshot will only happen if the
1216  * cond_snapshot.update() implementation receiving the cond_data
1217  * returns true, which means that the trace array's cond_snapshot
1218  * update() operation used the cond_data to determine whether the
1219  * snapshot should be taken, and if it was, presumably saved it along
1220  * with the snapshot.
1221  */
1222 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1223 {
1224         tracing_snapshot_instance_cond(tr, cond_data);
1225 }
1226 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1227
1228 /**
1229  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1230  * @tr:         The tracing instance
1231  *
1232  * When the user enables a conditional snapshot using
1233  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1234  * with the snapshot.  This accessor is used to retrieve it.
1235  *
1236  * Should not be called from cond_snapshot.update(), since it takes
1237  * the tr->max_lock lock, which the code calling
1238  * cond_snapshot.update() has already done.
1239  *
1240  * Returns the cond_data associated with the trace array's snapshot.
1241  */
1242 void *tracing_cond_snapshot_data(struct trace_array *tr)
1243 {
1244         void *cond_data = NULL;
1245
1246         local_irq_disable();
1247         arch_spin_lock(&tr->max_lock);
1248
1249         if (tr->cond_snapshot)
1250                 cond_data = tr->cond_snapshot->cond_data;
1251
1252         arch_spin_unlock(&tr->max_lock);
1253         local_irq_enable();
1254
1255         return cond_data;
1256 }
1257 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1258
1259 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1260                                         struct array_buffer *size_buf, int cpu_id);
1261 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1262
1263 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1264 {
1265         int ret;
1266
1267         if (!tr->allocated_snapshot) {
1268
1269                 /* allocate spare buffer */
1270                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1271                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1272                 if (ret < 0)
1273                         return ret;
1274
1275                 tr->allocated_snapshot = true;
1276         }
1277
1278         return 0;
1279 }
1280
1281 static void free_snapshot(struct trace_array *tr)
1282 {
1283         /*
1284          * We don't free the ring buffer. instead, resize it because
1285          * The max_tr ring buffer has some state (e.g. ring->clock) and
1286          * we want preserve it.
1287          */
1288         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1289         set_buffer_entries(&tr->max_buffer, 1);
1290         tracing_reset_online_cpus(&tr->max_buffer);
1291         tr->allocated_snapshot = false;
1292 }
1293
1294 /**
1295  * tracing_alloc_snapshot - allocate snapshot buffer.
1296  *
1297  * This only allocates the snapshot buffer if it isn't already
1298  * allocated - it doesn't also take a snapshot.
1299  *
1300  * This is meant to be used in cases where the snapshot buffer needs
1301  * to be set up for events that can't sleep but need to be able to
1302  * trigger a snapshot.
1303  */
1304 int tracing_alloc_snapshot(void)
1305 {
1306         struct trace_array *tr = &global_trace;
1307         int ret;
1308
1309         ret = tracing_alloc_snapshot_instance(tr);
1310         WARN_ON(ret < 0);
1311
1312         return ret;
1313 }
1314 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1315
1316 /**
1317  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1318  *
1319  * This is similar to tracing_snapshot(), but it will allocate the
1320  * snapshot buffer if it isn't already allocated. Use this only
1321  * where it is safe to sleep, as the allocation may sleep.
1322  *
1323  * This causes a swap between the snapshot buffer and the current live
1324  * tracing buffer. You can use this to take snapshots of the live
1325  * trace when some condition is triggered, but continue to trace.
1326  */
1327 void tracing_snapshot_alloc(void)
1328 {
1329         int ret;
1330
1331         ret = tracing_alloc_snapshot();
1332         if (ret < 0)
1333                 return;
1334
1335         tracing_snapshot();
1336 }
1337 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1338
1339 /**
1340  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1341  * @tr:         The tracing instance
1342  * @cond_data:  User data to associate with the snapshot
1343  * @update:     Implementation of the cond_snapshot update function
1344  *
1345  * Check whether the conditional snapshot for the given instance has
1346  * already been enabled, or if the current tracer is already using a
1347  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1348  * save the cond_data and update function inside.
1349  *
1350  * Returns 0 if successful, error otherwise.
1351  */
1352 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1353                                  cond_update_fn_t update)
1354 {
1355         struct cond_snapshot *cond_snapshot;
1356         int ret = 0;
1357
1358         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1359         if (!cond_snapshot)
1360                 return -ENOMEM;
1361
1362         cond_snapshot->cond_data = cond_data;
1363         cond_snapshot->update = update;
1364
1365         mutex_lock(&trace_types_lock);
1366
1367         ret = tracing_alloc_snapshot_instance(tr);
1368         if (ret)
1369                 goto fail_unlock;
1370
1371         if (tr->current_trace->use_max_tr) {
1372                 ret = -EBUSY;
1373                 goto fail_unlock;
1374         }
1375
1376         /*
1377          * The cond_snapshot can only change to NULL without the
1378          * trace_types_lock. We don't care if we race with it going
1379          * to NULL, but we want to make sure that it's not set to
1380          * something other than NULL when we get here, which we can
1381          * do safely with only holding the trace_types_lock and not
1382          * having to take the max_lock.
1383          */
1384         if (tr->cond_snapshot) {
1385                 ret = -EBUSY;
1386                 goto fail_unlock;
1387         }
1388
1389         local_irq_disable();
1390         arch_spin_lock(&tr->max_lock);
1391         tr->cond_snapshot = cond_snapshot;
1392         arch_spin_unlock(&tr->max_lock);
1393         local_irq_enable();
1394
1395         mutex_unlock(&trace_types_lock);
1396
1397         return ret;
1398
1399  fail_unlock:
1400         mutex_unlock(&trace_types_lock);
1401         kfree(cond_snapshot);
1402         return ret;
1403 }
1404 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1405
1406 /**
1407  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1408  * @tr:         The tracing instance
1409  *
1410  * Check whether the conditional snapshot for the given instance is
1411  * enabled; if so, free the cond_snapshot associated with it,
1412  * otherwise return -EINVAL.
1413  *
1414  * Returns 0 if successful, error otherwise.
1415  */
1416 int tracing_snapshot_cond_disable(struct trace_array *tr)
1417 {
1418         int ret = 0;
1419
1420         local_irq_disable();
1421         arch_spin_lock(&tr->max_lock);
1422
1423         if (!tr->cond_snapshot)
1424                 ret = -EINVAL;
1425         else {
1426                 kfree(tr->cond_snapshot);
1427                 tr->cond_snapshot = NULL;
1428         }
1429
1430         arch_spin_unlock(&tr->max_lock);
1431         local_irq_enable();
1432
1433         return ret;
1434 }
1435 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1436 #else
1437 void tracing_snapshot(void)
1438 {
1439         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1440 }
1441 EXPORT_SYMBOL_GPL(tracing_snapshot);
1442 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1443 {
1444         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1445 }
1446 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1447 int tracing_alloc_snapshot(void)
1448 {
1449         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1450         return -ENODEV;
1451 }
1452 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1453 void tracing_snapshot_alloc(void)
1454 {
1455         /* Give warning */
1456         tracing_snapshot();
1457 }
1458 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1459 void *tracing_cond_snapshot_data(struct trace_array *tr)
1460 {
1461         return NULL;
1462 }
1463 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1464 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1465 {
1466         return -ENODEV;
1467 }
1468 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1469 int tracing_snapshot_cond_disable(struct trace_array *tr)
1470 {
1471         return false;
1472 }
1473 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1474 #define free_snapshot(tr)       do { } while (0)
1475 #endif /* CONFIG_TRACER_SNAPSHOT */
1476
1477 void tracer_tracing_off(struct trace_array *tr)
1478 {
1479         if (tr->array_buffer.buffer)
1480                 ring_buffer_record_off(tr->array_buffer.buffer);
1481         /*
1482          * This flag is looked at when buffers haven't been allocated
1483          * yet, or by some tracers (like irqsoff), that just want to
1484          * know if the ring buffer has been disabled, but it can handle
1485          * races of where it gets disabled but we still do a record.
1486          * As the check is in the fast path of the tracers, it is more
1487          * important to be fast than accurate.
1488          */
1489         tr->buffer_disabled = 1;
1490         /* Make the flag seen by readers */
1491         smp_wmb();
1492 }
1493
1494 /**
1495  * tracing_off - turn off tracing buffers
1496  *
1497  * This function stops the tracing buffers from recording data.
1498  * It does not disable any overhead the tracers themselves may
1499  * be causing. This function simply causes all recording to
1500  * the ring buffers to fail.
1501  */
1502 void tracing_off(void)
1503 {
1504         tracer_tracing_off(&global_trace);
1505 }
1506 EXPORT_SYMBOL_GPL(tracing_off);
1507
1508 void disable_trace_on_warning(void)
1509 {
1510         if (__disable_trace_on_warning) {
1511                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1512                         "Disabling tracing due to warning\n");
1513                 tracing_off();
1514         }
1515 }
1516
1517 /**
1518  * tracer_tracing_is_on - show real state of ring buffer enabled
1519  * @tr : the trace array to know if ring buffer is enabled
1520  *
1521  * Shows real state of the ring buffer if it is enabled or not.
1522  */
1523 bool tracer_tracing_is_on(struct trace_array *tr)
1524 {
1525         if (tr->array_buffer.buffer)
1526                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1527         return !tr->buffer_disabled;
1528 }
1529
1530 /**
1531  * tracing_is_on - show state of ring buffers enabled
1532  */
1533 int tracing_is_on(void)
1534 {
1535         return tracer_tracing_is_on(&global_trace);
1536 }
1537 EXPORT_SYMBOL_GPL(tracing_is_on);
1538
1539 static int __init set_buf_size(char *str)
1540 {
1541         unsigned long buf_size;
1542
1543         if (!str)
1544                 return 0;
1545         buf_size = memparse(str, &str);
1546         /*
1547          * nr_entries can not be zero and the startup
1548          * tests require some buffer space. Therefore
1549          * ensure we have at least 4096 bytes of buffer.
1550          */
1551         trace_buf_size = max(4096UL, buf_size);
1552         return 1;
1553 }
1554 __setup("trace_buf_size=", set_buf_size);
1555
1556 static int __init set_tracing_thresh(char *str)
1557 {
1558         unsigned long threshold;
1559         int ret;
1560
1561         if (!str)
1562                 return 0;
1563         ret = kstrtoul(str, 0, &threshold);
1564         if (ret < 0)
1565                 return 0;
1566         tracing_thresh = threshold * 1000;
1567         return 1;
1568 }
1569 __setup("tracing_thresh=", set_tracing_thresh);
1570
1571 unsigned long nsecs_to_usecs(unsigned long nsecs)
1572 {
1573         return nsecs / 1000;
1574 }
1575
1576 /*
1577  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1578  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1579  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1580  * of strings in the order that the evals (enum) were defined.
1581  */
1582 #undef C
1583 #define C(a, b) b
1584
1585 /* These must match the bit positions in trace_iterator_flags */
1586 static const char *trace_options[] = {
1587         TRACE_FLAGS
1588         NULL
1589 };
1590
1591 static struct {
1592         u64 (*func)(void);
1593         const char *name;
1594         int in_ns;              /* is this clock in nanoseconds? */
1595 } trace_clocks[] = {
1596         { trace_clock_local,            "local",        1 },
1597         { trace_clock_global,           "global",       1 },
1598         { trace_clock_counter,          "counter",      0 },
1599         { trace_clock_jiffies,          "uptime",       0 },
1600         { trace_clock,                  "perf",         1 },
1601         { ktime_get_mono_fast_ns,       "mono",         1 },
1602         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1603         { ktime_get_boot_fast_ns,       "boot",         1 },
1604         { ktime_get_tai_fast_ns,        "tai",          1 },
1605         ARCH_TRACE_CLOCKS
1606 };
1607
1608 bool trace_clock_in_ns(struct trace_array *tr)
1609 {
1610         if (trace_clocks[tr->clock_id].in_ns)
1611                 return true;
1612
1613         return false;
1614 }
1615
1616 /*
1617  * trace_parser_get_init - gets the buffer for trace parser
1618  */
1619 int trace_parser_get_init(struct trace_parser *parser, int size)
1620 {
1621         memset(parser, 0, sizeof(*parser));
1622
1623         parser->buffer = kmalloc(size, GFP_KERNEL);
1624         if (!parser->buffer)
1625                 return 1;
1626
1627         parser->size = size;
1628         return 0;
1629 }
1630
1631 /*
1632  * trace_parser_put - frees the buffer for trace parser
1633  */
1634 void trace_parser_put(struct trace_parser *parser)
1635 {
1636         kfree(parser->buffer);
1637         parser->buffer = NULL;
1638 }
1639
1640 /*
1641  * trace_get_user - reads the user input string separated by  space
1642  * (matched by isspace(ch))
1643  *
1644  * For each string found the 'struct trace_parser' is updated,
1645  * and the function returns.
1646  *
1647  * Returns number of bytes read.
1648  *
1649  * See kernel/trace/trace.h for 'struct trace_parser' details.
1650  */
1651 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1652         size_t cnt, loff_t *ppos)
1653 {
1654         char ch;
1655         size_t read = 0;
1656         ssize_t ret;
1657
1658         if (!*ppos)
1659                 trace_parser_clear(parser);
1660
1661         ret = get_user(ch, ubuf++);
1662         if (ret)
1663                 goto out;
1664
1665         read++;
1666         cnt--;
1667
1668         /*
1669          * The parser is not finished with the last write,
1670          * continue reading the user input without skipping spaces.
1671          */
1672         if (!parser->cont) {
1673                 /* skip white space */
1674                 while (cnt && isspace(ch)) {
1675                         ret = get_user(ch, ubuf++);
1676                         if (ret)
1677                                 goto out;
1678                         read++;
1679                         cnt--;
1680                 }
1681
1682                 parser->idx = 0;
1683
1684                 /* only spaces were written */
1685                 if (isspace(ch) || !ch) {
1686                         *ppos += read;
1687                         ret = read;
1688                         goto out;
1689                 }
1690         }
1691
1692         /* read the non-space input */
1693         while (cnt && !isspace(ch) && ch) {
1694                 if (parser->idx < parser->size - 1)
1695                         parser->buffer[parser->idx++] = ch;
1696                 else {
1697                         ret = -EINVAL;
1698                         goto out;
1699                 }
1700                 ret = get_user(ch, ubuf++);
1701                 if (ret)
1702                         goto out;
1703                 read++;
1704                 cnt--;
1705         }
1706
1707         /* We either got finished input or we have to wait for another call. */
1708         if (isspace(ch) || !ch) {
1709                 parser->buffer[parser->idx] = 0;
1710                 parser->cont = false;
1711         } else if (parser->idx < parser->size - 1) {
1712                 parser->cont = true;
1713                 parser->buffer[parser->idx++] = ch;
1714                 /* Make sure the parsed string always terminates with '\0'. */
1715                 parser->buffer[parser->idx] = 0;
1716         } else {
1717                 ret = -EINVAL;
1718                 goto out;
1719         }
1720
1721         *ppos += read;
1722         ret = read;
1723
1724 out:
1725         return ret;
1726 }
1727
1728 /* TODO add a seq_buf_to_buffer() */
1729 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1730 {
1731         int len;
1732
1733         if (trace_seq_used(s) <= s->seq.readpos)
1734                 return -EBUSY;
1735
1736         len = trace_seq_used(s) - s->seq.readpos;
1737         if (cnt > len)
1738                 cnt = len;
1739         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1740
1741         s->seq.readpos += cnt;
1742         return cnt;
1743 }
1744
1745 unsigned long __read_mostly     tracing_thresh;
1746
1747 #ifdef CONFIG_TRACER_MAX_TRACE
1748 static const struct file_operations tracing_max_lat_fops;
1749
1750 #ifdef LATENCY_FS_NOTIFY
1751
1752 static struct workqueue_struct *fsnotify_wq;
1753
1754 static void latency_fsnotify_workfn(struct work_struct *work)
1755 {
1756         struct trace_array *tr = container_of(work, struct trace_array,
1757                                               fsnotify_work);
1758         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1759 }
1760
1761 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1762 {
1763         struct trace_array *tr = container_of(iwork, struct trace_array,
1764                                               fsnotify_irqwork);
1765         queue_work(fsnotify_wq, &tr->fsnotify_work);
1766 }
1767
1768 static void trace_create_maxlat_file(struct trace_array *tr,
1769                                      struct dentry *d_tracer)
1770 {
1771         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1772         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1773         tr->d_max_latency = trace_create_file("tracing_max_latency",
1774                                               TRACE_MODE_WRITE,
1775                                               d_tracer, tr,
1776                                               &tracing_max_lat_fops);
1777 }
1778
1779 __init static int latency_fsnotify_init(void)
1780 {
1781         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1782                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1783         if (!fsnotify_wq) {
1784                 pr_err("Unable to allocate tr_max_lat_wq\n");
1785                 return -ENOMEM;
1786         }
1787         return 0;
1788 }
1789
1790 late_initcall_sync(latency_fsnotify_init);
1791
1792 void latency_fsnotify(struct trace_array *tr)
1793 {
1794         if (!fsnotify_wq)
1795                 return;
1796         /*
1797          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1798          * possible that we are called from __schedule() or do_idle(), which
1799          * could cause a deadlock.
1800          */
1801         irq_work_queue(&tr->fsnotify_irqwork);
1802 }
1803
1804 #else /* !LATENCY_FS_NOTIFY */
1805
1806 #define trace_create_maxlat_file(tr, d_tracer)                          \
1807         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1808                           d_tracer, tr, &tracing_max_lat_fops)
1809
1810 #endif
1811
1812 /*
1813  * Copy the new maximum trace into the separate maximum-trace
1814  * structure. (this way the maximum trace is permanently saved,
1815  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1816  */
1817 static void
1818 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1819 {
1820         struct array_buffer *trace_buf = &tr->array_buffer;
1821         struct array_buffer *max_buf = &tr->max_buffer;
1822         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1823         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1824
1825         max_buf->cpu = cpu;
1826         max_buf->time_start = data->preempt_timestamp;
1827
1828         max_data->saved_latency = tr->max_latency;
1829         max_data->critical_start = data->critical_start;
1830         max_data->critical_end = data->critical_end;
1831
1832         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1833         max_data->pid = tsk->pid;
1834         /*
1835          * If tsk == current, then use current_uid(), as that does not use
1836          * RCU. The irq tracer can be called out of RCU scope.
1837          */
1838         if (tsk == current)
1839                 max_data->uid = current_uid();
1840         else
1841                 max_data->uid = task_uid(tsk);
1842
1843         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1844         max_data->policy = tsk->policy;
1845         max_data->rt_priority = tsk->rt_priority;
1846
1847         /* record this tasks comm */
1848         tracing_record_cmdline(tsk);
1849         latency_fsnotify(tr);
1850 }
1851
1852 /**
1853  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1854  * @tr: tracer
1855  * @tsk: the task with the latency
1856  * @cpu: The cpu that initiated the trace.
1857  * @cond_data: User data associated with a conditional snapshot
1858  *
1859  * Flip the buffers between the @tr and the max_tr and record information
1860  * about which task was the cause of this latency.
1861  */
1862 void
1863 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1864               void *cond_data)
1865 {
1866         if (tr->stop_count)
1867                 return;
1868
1869         WARN_ON_ONCE(!irqs_disabled());
1870
1871         if (!tr->allocated_snapshot) {
1872                 /* Only the nop tracer should hit this when disabling */
1873                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1874                 return;
1875         }
1876
1877         arch_spin_lock(&tr->max_lock);
1878
1879         /* Inherit the recordable setting from array_buffer */
1880         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1881                 ring_buffer_record_on(tr->max_buffer.buffer);
1882         else
1883                 ring_buffer_record_off(tr->max_buffer.buffer);
1884
1885 #ifdef CONFIG_TRACER_SNAPSHOT
1886         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1887                 arch_spin_unlock(&tr->max_lock);
1888                 return;
1889         }
1890 #endif
1891         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1892
1893         __update_max_tr(tr, tsk, cpu);
1894
1895         arch_spin_unlock(&tr->max_lock);
1896 }
1897
1898 /**
1899  * update_max_tr_single - only copy one trace over, and reset the rest
1900  * @tr: tracer
1901  * @tsk: task with the latency
1902  * @cpu: the cpu of the buffer to copy.
1903  *
1904  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1905  */
1906 void
1907 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1908 {
1909         int ret;
1910
1911         if (tr->stop_count)
1912                 return;
1913
1914         WARN_ON_ONCE(!irqs_disabled());
1915         if (!tr->allocated_snapshot) {
1916                 /* Only the nop tracer should hit this when disabling */
1917                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1918                 return;
1919         }
1920
1921         arch_spin_lock(&tr->max_lock);
1922
1923         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1924
1925         if (ret == -EBUSY) {
1926                 /*
1927                  * We failed to swap the buffer due to a commit taking
1928                  * place on this CPU. We fail to record, but we reset
1929                  * the max trace buffer (no one writes directly to it)
1930                  * and flag that it failed.
1931                  * Another reason is resize is in progress.
1932                  */
1933                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1934                         "Failed to swap buffers due to commit or resize in progress\n");
1935         }
1936
1937         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1938
1939         __update_max_tr(tr, tsk, cpu);
1940         arch_spin_unlock(&tr->max_lock);
1941 }
1942
1943 #endif /* CONFIG_TRACER_MAX_TRACE */
1944
1945 static int wait_on_pipe(struct trace_iterator *iter, int full)
1946 {
1947         /* Iterators are static, they should be filled or empty */
1948         if (trace_buffer_iter(iter, iter->cpu_file))
1949                 return 0;
1950
1951         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1952                                 full);
1953 }
1954
1955 #ifdef CONFIG_FTRACE_STARTUP_TEST
1956 static bool selftests_can_run;
1957
1958 struct trace_selftests {
1959         struct list_head                list;
1960         struct tracer                   *type;
1961 };
1962
1963 static LIST_HEAD(postponed_selftests);
1964
1965 static int save_selftest(struct tracer *type)
1966 {
1967         struct trace_selftests *selftest;
1968
1969         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1970         if (!selftest)
1971                 return -ENOMEM;
1972
1973         selftest->type = type;
1974         list_add(&selftest->list, &postponed_selftests);
1975         return 0;
1976 }
1977
1978 static int run_tracer_selftest(struct tracer *type)
1979 {
1980         struct trace_array *tr = &global_trace;
1981         struct tracer *saved_tracer = tr->current_trace;
1982         int ret;
1983
1984         if (!type->selftest || tracing_selftest_disabled)
1985                 return 0;
1986
1987         /*
1988          * If a tracer registers early in boot up (before scheduling is
1989          * initialized and such), then do not run its selftests yet.
1990          * Instead, run it a little later in the boot process.
1991          */
1992         if (!selftests_can_run)
1993                 return save_selftest(type);
1994
1995         if (!tracing_is_on()) {
1996                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1997                         type->name);
1998                 return 0;
1999         }
2000
2001         /*
2002          * Run a selftest on this tracer.
2003          * Here we reset the trace buffer, and set the current
2004          * tracer to be this tracer. The tracer can then run some
2005          * internal tracing to verify that everything is in order.
2006          * If we fail, we do not register this tracer.
2007          */
2008         tracing_reset_online_cpus(&tr->array_buffer);
2009
2010         tr->current_trace = type;
2011
2012 #ifdef CONFIG_TRACER_MAX_TRACE
2013         if (type->use_max_tr) {
2014                 /* If we expanded the buffers, make sure the max is expanded too */
2015                 if (ring_buffer_expanded)
2016                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2017                                            RING_BUFFER_ALL_CPUS);
2018                 tr->allocated_snapshot = true;
2019         }
2020 #endif
2021
2022         /* the test is responsible for initializing and enabling */
2023         pr_info("Testing tracer %s: ", type->name);
2024         ret = type->selftest(type, tr);
2025         /* the test is responsible for resetting too */
2026         tr->current_trace = saved_tracer;
2027         if (ret) {
2028                 printk(KERN_CONT "FAILED!\n");
2029                 /* Add the warning after printing 'FAILED' */
2030                 WARN_ON(1);
2031                 return -1;
2032         }
2033         /* Only reset on passing, to avoid touching corrupted buffers */
2034         tracing_reset_online_cpus(&tr->array_buffer);
2035
2036 #ifdef CONFIG_TRACER_MAX_TRACE
2037         if (type->use_max_tr) {
2038                 tr->allocated_snapshot = false;
2039
2040                 /* Shrink the max buffer again */
2041                 if (ring_buffer_expanded)
2042                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2043                                            RING_BUFFER_ALL_CPUS);
2044         }
2045 #endif
2046
2047         printk(KERN_CONT "PASSED\n");
2048         return 0;
2049 }
2050
2051 static int do_run_tracer_selftest(struct tracer *type)
2052 {
2053         int ret;
2054
2055         /*
2056          * Tests can take a long time, especially if they are run one after the
2057          * other, as does happen during bootup when all the tracers are
2058          * registered. This could cause the soft lockup watchdog to trigger.
2059          */
2060         cond_resched();
2061
2062         tracing_selftest_running = true;
2063         ret = run_tracer_selftest(type);
2064         tracing_selftest_running = false;
2065
2066         return ret;
2067 }
2068
2069 static __init int init_trace_selftests(void)
2070 {
2071         struct trace_selftests *p, *n;
2072         struct tracer *t, **last;
2073         int ret;
2074
2075         selftests_can_run = true;
2076
2077         mutex_lock(&trace_types_lock);
2078
2079         if (list_empty(&postponed_selftests))
2080                 goto out;
2081
2082         pr_info("Running postponed tracer tests:\n");
2083
2084         tracing_selftest_running = true;
2085         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2086                 /* This loop can take minutes when sanitizers are enabled, so
2087                  * lets make sure we allow RCU processing.
2088                  */
2089                 cond_resched();
2090                 ret = run_tracer_selftest(p->type);
2091                 /* If the test fails, then warn and remove from available_tracers */
2092                 if (ret < 0) {
2093                         WARN(1, "tracer: %s failed selftest, disabling\n",
2094                              p->type->name);
2095                         last = &trace_types;
2096                         for (t = trace_types; t; t = t->next) {
2097                                 if (t == p->type) {
2098                                         *last = t->next;
2099                                         break;
2100                                 }
2101                                 last = &t->next;
2102                         }
2103                 }
2104                 list_del(&p->list);
2105                 kfree(p);
2106         }
2107         tracing_selftest_running = false;
2108
2109  out:
2110         mutex_unlock(&trace_types_lock);
2111
2112         return 0;
2113 }
2114 core_initcall(init_trace_selftests);
2115 #else
2116 static inline int run_tracer_selftest(struct tracer *type)
2117 {
2118         return 0;
2119 }
2120 static inline int do_run_tracer_selftest(struct tracer *type)
2121 {
2122         return 0;
2123 }
2124 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2125
2126 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2127
2128 static void __init apply_trace_boot_options(void);
2129
2130 /**
2131  * register_tracer - register a tracer with the ftrace system.
2132  * @type: the plugin for the tracer
2133  *
2134  * Register a new plugin tracer.
2135  */
2136 int __init register_tracer(struct tracer *type)
2137 {
2138         struct tracer *t;
2139         int ret = 0;
2140
2141         if (!type->name) {
2142                 pr_info("Tracer must have a name\n");
2143                 return -1;
2144         }
2145
2146         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2147                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2148                 return -1;
2149         }
2150
2151         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2152                 pr_warn("Can not register tracer %s due to lockdown\n",
2153                            type->name);
2154                 return -EPERM;
2155         }
2156
2157         mutex_lock(&trace_types_lock);
2158
2159         for (t = trace_types; t; t = t->next) {
2160                 if (strcmp(type->name, t->name) == 0) {
2161                         /* already found */
2162                         pr_info("Tracer %s already registered\n",
2163                                 type->name);
2164                         ret = -1;
2165                         goto out;
2166                 }
2167         }
2168
2169         if (!type->set_flag)
2170                 type->set_flag = &dummy_set_flag;
2171         if (!type->flags) {
2172                 /*allocate a dummy tracer_flags*/
2173                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2174                 if (!type->flags) {
2175                         ret = -ENOMEM;
2176                         goto out;
2177                 }
2178                 type->flags->val = 0;
2179                 type->flags->opts = dummy_tracer_opt;
2180         } else
2181                 if (!type->flags->opts)
2182                         type->flags->opts = dummy_tracer_opt;
2183
2184         /* store the tracer for __set_tracer_option */
2185         type->flags->trace = type;
2186
2187         ret = do_run_tracer_selftest(type);
2188         if (ret < 0)
2189                 goto out;
2190
2191         type->next = trace_types;
2192         trace_types = type;
2193         add_tracer_options(&global_trace, type);
2194
2195  out:
2196         mutex_unlock(&trace_types_lock);
2197
2198         if (ret || !default_bootup_tracer)
2199                 goto out_unlock;
2200
2201         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2202                 goto out_unlock;
2203
2204         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2205         /* Do we want this tracer to start on bootup? */
2206         tracing_set_tracer(&global_trace, type->name);
2207         default_bootup_tracer = NULL;
2208
2209         apply_trace_boot_options();
2210
2211         /* disable other selftests, since this will break it. */
2212         disable_tracing_selftest("running a tracer");
2213
2214  out_unlock:
2215         return ret;
2216 }
2217
2218 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2219 {
2220         struct trace_buffer *buffer = buf->buffer;
2221
2222         if (!buffer)
2223                 return;
2224
2225         ring_buffer_record_disable(buffer);
2226
2227         /* Make sure all commits have finished */
2228         synchronize_rcu();
2229         ring_buffer_reset_cpu(buffer, cpu);
2230
2231         ring_buffer_record_enable(buffer);
2232 }
2233
2234 void tracing_reset_online_cpus(struct array_buffer *buf)
2235 {
2236         struct trace_buffer *buffer = buf->buffer;
2237
2238         if (!buffer)
2239                 return;
2240
2241         ring_buffer_record_disable(buffer);
2242
2243         /* Make sure all commits have finished */
2244         synchronize_rcu();
2245
2246         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2247
2248         ring_buffer_reset_online_cpus(buffer);
2249
2250         ring_buffer_record_enable(buffer);
2251 }
2252
2253 /* Must have trace_types_lock held */
2254 void tracing_reset_all_online_cpus_unlocked(void)
2255 {
2256         struct trace_array *tr;
2257
2258         lockdep_assert_held(&trace_types_lock);
2259
2260         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2261                 if (!tr->clear_trace)
2262                         continue;
2263                 tr->clear_trace = false;
2264                 tracing_reset_online_cpus(&tr->array_buffer);
2265 #ifdef CONFIG_TRACER_MAX_TRACE
2266                 tracing_reset_online_cpus(&tr->max_buffer);
2267 #endif
2268         }
2269 }
2270
2271 void tracing_reset_all_online_cpus(void)
2272 {
2273         mutex_lock(&trace_types_lock);
2274         tracing_reset_all_online_cpus_unlocked();
2275         mutex_unlock(&trace_types_lock);
2276 }
2277
2278 /*
2279  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2280  * is the tgid last observed corresponding to pid=i.
2281  */
2282 static int *tgid_map;
2283
2284 /* The maximum valid index into tgid_map. */
2285 static size_t tgid_map_max;
2286
2287 #define SAVED_CMDLINES_DEFAULT 128
2288 #define NO_CMDLINE_MAP UINT_MAX
2289 /*
2290  * Preemption must be disabled before acquiring trace_cmdline_lock.
2291  * The various trace_arrays' max_lock must be acquired in a context
2292  * where interrupt is disabled.
2293  */
2294 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2295 struct saved_cmdlines_buffer {
2296         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2297         unsigned *map_cmdline_to_pid;
2298         unsigned cmdline_num;
2299         int cmdline_idx;
2300         char *saved_cmdlines;
2301 };
2302 static struct saved_cmdlines_buffer *savedcmd;
2303
2304 static inline char *get_saved_cmdlines(int idx)
2305 {
2306         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2307 }
2308
2309 static inline void set_cmdline(int idx, const char *cmdline)
2310 {
2311         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2312 }
2313
2314 static int allocate_cmdlines_buffer(unsigned int val,
2315                                     struct saved_cmdlines_buffer *s)
2316 {
2317         s->map_cmdline_to_pid = kmalloc_array(val,
2318                                               sizeof(*s->map_cmdline_to_pid),
2319                                               GFP_KERNEL);
2320         if (!s->map_cmdline_to_pid)
2321                 return -ENOMEM;
2322
2323         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2324         if (!s->saved_cmdlines) {
2325                 kfree(s->map_cmdline_to_pid);
2326                 return -ENOMEM;
2327         }
2328
2329         s->cmdline_idx = 0;
2330         s->cmdline_num = val;
2331         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2332                sizeof(s->map_pid_to_cmdline));
2333         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2334                val * sizeof(*s->map_cmdline_to_pid));
2335
2336         return 0;
2337 }
2338
2339 static int trace_create_savedcmd(void)
2340 {
2341         int ret;
2342
2343         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2344         if (!savedcmd)
2345                 return -ENOMEM;
2346
2347         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2348         if (ret < 0) {
2349                 kfree(savedcmd);
2350                 savedcmd = NULL;
2351                 return -ENOMEM;
2352         }
2353
2354         return 0;
2355 }
2356
2357 int is_tracing_stopped(void)
2358 {
2359         return global_trace.stop_count;
2360 }
2361
2362 /**
2363  * tracing_start - quick start of the tracer
2364  *
2365  * If tracing is enabled but was stopped by tracing_stop,
2366  * this will start the tracer back up.
2367  */
2368 void tracing_start(void)
2369 {
2370         struct trace_buffer *buffer;
2371         unsigned long flags;
2372
2373         if (tracing_disabled)
2374                 return;
2375
2376         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2377         if (--global_trace.stop_count) {
2378                 if (global_trace.stop_count < 0) {
2379                         /* Someone screwed up their debugging */
2380                         WARN_ON_ONCE(1);
2381                         global_trace.stop_count = 0;
2382                 }
2383                 goto out;
2384         }
2385
2386         /* Prevent the buffers from switching */
2387         arch_spin_lock(&global_trace.max_lock);
2388
2389         buffer = global_trace.array_buffer.buffer;
2390         if (buffer)
2391                 ring_buffer_record_enable(buffer);
2392
2393 #ifdef CONFIG_TRACER_MAX_TRACE
2394         buffer = global_trace.max_buffer.buffer;
2395         if (buffer)
2396                 ring_buffer_record_enable(buffer);
2397 #endif
2398
2399         arch_spin_unlock(&global_trace.max_lock);
2400
2401  out:
2402         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2403 }
2404
2405 static void tracing_start_tr(struct trace_array *tr)
2406 {
2407         struct trace_buffer *buffer;
2408         unsigned long flags;
2409
2410         if (tracing_disabled)
2411                 return;
2412
2413         /* If global, we need to also start the max tracer */
2414         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2415                 return tracing_start();
2416
2417         raw_spin_lock_irqsave(&tr->start_lock, flags);
2418
2419         if (--tr->stop_count) {
2420                 if (tr->stop_count < 0) {
2421                         /* Someone screwed up their debugging */
2422                         WARN_ON_ONCE(1);
2423                         tr->stop_count = 0;
2424                 }
2425                 goto out;
2426         }
2427
2428         buffer = tr->array_buffer.buffer;
2429         if (buffer)
2430                 ring_buffer_record_enable(buffer);
2431
2432  out:
2433         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2434 }
2435
2436 /**
2437  * tracing_stop - quick stop of the tracer
2438  *
2439  * Light weight way to stop tracing. Use in conjunction with
2440  * tracing_start.
2441  */
2442 void tracing_stop(void)
2443 {
2444         struct trace_buffer *buffer;
2445         unsigned long flags;
2446
2447         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2448         if (global_trace.stop_count++)
2449                 goto out;
2450
2451         /* Prevent the buffers from switching */
2452         arch_spin_lock(&global_trace.max_lock);
2453
2454         buffer = global_trace.array_buffer.buffer;
2455         if (buffer)
2456                 ring_buffer_record_disable(buffer);
2457
2458 #ifdef CONFIG_TRACER_MAX_TRACE
2459         buffer = global_trace.max_buffer.buffer;
2460         if (buffer)
2461                 ring_buffer_record_disable(buffer);
2462 #endif
2463
2464         arch_spin_unlock(&global_trace.max_lock);
2465
2466  out:
2467         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2468 }
2469
2470 static void tracing_stop_tr(struct trace_array *tr)
2471 {
2472         struct trace_buffer *buffer;
2473         unsigned long flags;
2474
2475         /* If global, we need to also stop the max tracer */
2476         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2477                 return tracing_stop();
2478
2479         raw_spin_lock_irqsave(&tr->start_lock, flags);
2480         if (tr->stop_count++)
2481                 goto out;
2482
2483         buffer = tr->array_buffer.buffer;
2484         if (buffer)
2485                 ring_buffer_record_disable(buffer);
2486
2487  out:
2488         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2489 }
2490
2491 static int trace_save_cmdline(struct task_struct *tsk)
2492 {
2493         unsigned tpid, idx;
2494
2495         /* treat recording of idle task as a success */
2496         if (!tsk->pid)
2497                 return 1;
2498
2499         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2500
2501         /*
2502          * It's not the end of the world if we don't get
2503          * the lock, but we also don't want to spin
2504          * nor do we want to disable interrupts,
2505          * so if we miss here, then better luck next time.
2506          *
2507          * This is called within the scheduler and wake up, so interrupts
2508          * had better been disabled and run queue lock been held.
2509          */
2510         lockdep_assert_preemption_disabled();
2511         if (!arch_spin_trylock(&trace_cmdline_lock))
2512                 return 0;
2513
2514         idx = savedcmd->map_pid_to_cmdline[tpid];
2515         if (idx == NO_CMDLINE_MAP) {
2516                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2517
2518                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2519                 savedcmd->cmdline_idx = idx;
2520         }
2521
2522         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2523         set_cmdline(idx, tsk->comm);
2524
2525         arch_spin_unlock(&trace_cmdline_lock);
2526
2527         return 1;
2528 }
2529
2530 static void __trace_find_cmdline(int pid, char comm[])
2531 {
2532         unsigned map;
2533         int tpid;
2534
2535         if (!pid) {
2536                 strcpy(comm, "<idle>");
2537                 return;
2538         }
2539
2540         if (WARN_ON_ONCE(pid < 0)) {
2541                 strcpy(comm, "<XXX>");
2542                 return;
2543         }
2544
2545         tpid = pid & (PID_MAX_DEFAULT - 1);
2546         map = savedcmd->map_pid_to_cmdline[tpid];
2547         if (map != NO_CMDLINE_MAP) {
2548                 tpid = savedcmd->map_cmdline_to_pid[map];
2549                 if (tpid == pid) {
2550                         strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2551                         return;
2552                 }
2553         }
2554         strcpy(comm, "<...>");
2555 }
2556
2557 void trace_find_cmdline(int pid, char comm[])
2558 {
2559         preempt_disable();
2560         arch_spin_lock(&trace_cmdline_lock);
2561
2562         __trace_find_cmdline(pid, comm);
2563
2564         arch_spin_unlock(&trace_cmdline_lock);
2565         preempt_enable();
2566 }
2567
2568 static int *trace_find_tgid_ptr(int pid)
2569 {
2570         /*
2571          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2572          * if we observe a non-NULL tgid_map then we also observe the correct
2573          * tgid_map_max.
2574          */
2575         int *map = smp_load_acquire(&tgid_map);
2576
2577         if (unlikely(!map || pid > tgid_map_max))
2578                 return NULL;
2579
2580         return &map[pid];
2581 }
2582
2583 int trace_find_tgid(int pid)
2584 {
2585         int *ptr = trace_find_tgid_ptr(pid);
2586
2587         return ptr ? *ptr : 0;
2588 }
2589
2590 static int trace_save_tgid(struct task_struct *tsk)
2591 {
2592         int *ptr;
2593
2594         /* treat recording of idle task as a success */
2595         if (!tsk->pid)
2596                 return 1;
2597
2598         ptr = trace_find_tgid_ptr(tsk->pid);
2599         if (!ptr)
2600                 return 0;
2601
2602         *ptr = tsk->tgid;
2603         return 1;
2604 }
2605
2606 static bool tracing_record_taskinfo_skip(int flags)
2607 {
2608         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2609                 return true;
2610         if (!__this_cpu_read(trace_taskinfo_save))
2611                 return true;
2612         return false;
2613 }
2614
2615 /**
2616  * tracing_record_taskinfo - record the task info of a task
2617  *
2618  * @task:  task to record
2619  * @flags: TRACE_RECORD_CMDLINE for recording comm
2620  *         TRACE_RECORD_TGID for recording tgid
2621  */
2622 void tracing_record_taskinfo(struct task_struct *task, int flags)
2623 {
2624         bool done;
2625
2626         if (tracing_record_taskinfo_skip(flags))
2627                 return;
2628
2629         /*
2630          * Record as much task information as possible. If some fail, continue
2631          * to try to record the others.
2632          */
2633         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2634         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2635
2636         /* If recording any information failed, retry again soon. */
2637         if (!done)
2638                 return;
2639
2640         __this_cpu_write(trace_taskinfo_save, false);
2641 }
2642
2643 /**
2644  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2645  *
2646  * @prev: previous task during sched_switch
2647  * @next: next task during sched_switch
2648  * @flags: TRACE_RECORD_CMDLINE for recording comm
2649  *         TRACE_RECORD_TGID for recording tgid
2650  */
2651 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2652                                           struct task_struct *next, int flags)
2653 {
2654         bool done;
2655
2656         if (tracing_record_taskinfo_skip(flags))
2657                 return;
2658
2659         /*
2660          * Record as much task information as possible. If some fail, continue
2661          * to try to record the others.
2662          */
2663         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2664         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2665         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2666         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2667
2668         /* If recording any information failed, retry again soon. */
2669         if (!done)
2670                 return;
2671
2672         __this_cpu_write(trace_taskinfo_save, false);
2673 }
2674
2675 /* Helpers to record a specific task information */
2676 void tracing_record_cmdline(struct task_struct *task)
2677 {
2678         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2679 }
2680
2681 void tracing_record_tgid(struct task_struct *task)
2682 {
2683         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2684 }
2685
2686 /*
2687  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2688  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2689  * simplifies those functions and keeps them in sync.
2690  */
2691 enum print_line_t trace_handle_return(struct trace_seq *s)
2692 {
2693         return trace_seq_has_overflowed(s) ?
2694                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2695 }
2696 EXPORT_SYMBOL_GPL(trace_handle_return);
2697
2698 static unsigned short migration_disable_value(void)
2699 {
2700 #if defined(CONFIG_SMP)
2701         return current->migration_disabled;
2702 #else
2703         return 0;
2704 #endif
2705 }
2706
2707 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2708 {
2709         unsigned int trace_flags = irqs_status;
2710         unsigned int pc;
2711
2712         pc = preempt_count();
2713
2714         if (pc & NMI_MASK)
2715                 trace_flags |= TRACE_FLAG_NMI;
2716         if (pc & HARDIRQ_MASK)
2717                 trace_flags |= TRACE_FLAG_HARDIRQ;
2718         if (in_serving_softirq())
2719                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2720         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2721                 trace_flags |= TRACE_FLAG_BH_OFF;
2722
2723         if (tif_need_resched())
2724                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2725         if (test_preempt_need_resched())
2726                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2727         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2728                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2729 }
2730
2731 struct ring_buffer_event *
2732 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2733                           int type,
2734                           unsigned long len,
2735                           unsigned int trace_ctx)
2736 {
2737         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2738 }
2739
2740 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2741 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2742 static int trace_buffered_event_ref;
2743
2744 /**
2745  * trace_buffered_event_enable - enable buffering events
2746  *
2747  * When events are being filtered, it is quicker to use a temporary
2748  * buffer to write the event data into if there's a likely chance
2749  * that it will not be committed. The discard of the ring buffer
2750  * is not as fast as committing, and is much slower than copying
2751  * a commit.
2752  *
2753  * When an event is to be filtered, allocate per cpu buffers to
2754  * write the event data into, and if the event is filtered and discarded
2755  * it is simply dropped, otherwise, the entire data is to be committed
2756  * in one shot.
2757  */
2758 void trace_buffered_event_enable(void)
2759 {
2760         struct ring_buffer_event *event;
2761         struct page *page;
2762         int cpu;
2763
2764         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2765
2766         if (trace_buffered_event_ref++)
2767                 return;
2768
2769         for_each_tracing_cpu(cpu) {
2770                 page = alloc_pages_node(cpu_to_node(cpu),
2771                                         GFP_KERNEL | __GFP_NORETRY, 0);
2772                 /* This is just an optimization and can handle failures */
2773                 if (!page) {
2774                         pr_err("Failed to allocate event buffer\n");
2775                         break;
2776                 }
2777
2778                 event = page_address(page);
2779                 memset(event, 0, sizeof(*event));
2780
2781                 per_cpu(trace_buffered_event, cpu) = event;
2782
2783                 preempt_disable();
2784                 if (cpu == smp_processor_id() &&
2785                     __this_cpu_read(trace_buffered_event) !=
2786                     per_cpu(trace_buffered_event, cpu))
2787                         WARN_ON_ONCE(1);
2788                 preempt_enable();
2789         }
2790 }
2791
2792 static void enable_trace_buffered_event(void *data)
2793 {
2794         /* Probably not needed, but do it anyway */
2795         smp_rmb();
2796         this_cpu_dec(trace_buffered_event_cnt);
2797 }
2798
2799 static void disable_trace_buffered_event(void *data)
2800 {
2801         this_cpu_inc(trace_buffered_event_cnt);
2802 }
2803
2804 /**
2805  * trace_buffered_event_disable - disable buffering events
2806  *
2807  * When a filter is removed, it is faster to not use the buffered
2808  * events, and to commit directly into the ring buffer. Free up
2809  * the temp buffers when there are no more users. This requires
2810  * special synchronization with current events.
2811  */
2812 void trace_buffered_event_disable(void)
2813 {
2814         int cpu;
2815
2816         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2817
2818         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2819                 return;
2820
2821         if (--trace_buffered_event_ref)
2822                 return;
2823
2824         preempt_disable();
2825         /* For each CPU, set the buffer as used. */
2826         smp_call_function_many(tracing_buffer_mask,
2827                                disable_trace_buffered_event, NULL, 1);
2828         preempt_enable();
2829
2830         /* Wait for all current users to finish */
2831         synchronize_rcu();
2832
2833         for_each_tracing_cpu(cpu) {
2834                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2835                 per_cpu(trace_buffered_event, cpu) = NULL;
2836         }
2837         /*
2838          * Make sure trace_buffered_event is NULL before clearing
2839          * trace_buffered_event_cnt.
2840          */
2841         smp_wmb();
2842
2843         preempt_disable();
2844         /* Do the work on each cpu */
2845         smp_call_function_many(tracing_buffer_mask,
2846                                enable_trace_buffered_event, NULL, 1);
2847         preempt_enable();
2848 }
2849
2850 static struct trace_buffer *temp_buffer;
2851
2852 struct ring_buffer_event *
2853 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2854                           struct trace_event_file *trace_file,
2855                           int type, unsigned long len,
2856                           unsigned int trace_ctx)
2857 {
2858         struct ring_buffer_event *entry;
2859         struct trace_array *tr = trace_file->tr;
2860         int val;
2861
2862         *current_rb = tr->array_buffer.buffer;
2863
2864         if (!tr->no_filter_buffering_ref &&
2865             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2866                 preempt_disable_notrace();
2867                 /*
2868                  * Filtering is on, so try to use the per cpu buffer first.
2869                  * This buffer will simulate a ring_buffer_event,
2870                  * where the type_len is zero and the array[0] will
2871                  * hold the full length.
2872                  * (see include/linux/ring-buffer.h for details on
2873                  *  how the ring_buffer_event is structured).
2874                  *
2875                  * Using a temp buffer during filtering and copying it
2876                  * on a matched filter is quicker than writing directly
2877                  * into the ring buffer and then discarding it when
2878                  * it doesn't match. That is because the discard
2879                  * requires several atomic operations to get right.
2880                  * Copying on match and doing nothing on a failed match
2881                  * is still quicker than no copy on match, but having
2882                  * to discard out of the ring buffer on a failed match.
2883                  */
2884                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2885                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2886
2887                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2888
2889                         /*
2890                          * Preemption is disabled, but interrupts and NMIs
2891                          * can still come in now. If that happens after
2892                          * the above increment, then it will have to go
2893                          * back to the old method of allocating the event
2894                          * on the ring buffer, and if the filter fails, it
2895                          * will have to call ring_buffer_discard_commit()
2896                          * to remove it.
2897                          *
2898                          * Need to also check the unlikely case that the
2899                          * length is bigger than the temp buffer size.
2900                          * If that happens, then the reserve is pretty much
2901                          * guaranteed to fail, as the ring buffer currently
2902                          * only allows events less than a page. But that may
2903                          * change in the future, so let the ring buffer reserve
2904                          * handle the failure in that case.
2905                          */
2906                         if (val == 1 && likely(len <= max_len)) {
2907                                 trace_event_setup(entry, type, trace_ctx);
2908                                 entry->array[0] = len;
2909                                 /* Return with preemption disabled */
2910                                 return entry;
2911                         }
2912                         this_cpu_dec(trace_buffered_event_cnt);
2913                 }
2914                 /* __trace_buffer_lock_reserve() disables preemption */
2915                 preempt_enable_notrace();
2916         }
2917
2918         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2919                                             trace_ctx);
2920         /*
2921          * If tracing is off, but we have triggers enabled
2922          * we still need to look at the event data. Use the temp_buffer
2923          * to store the trace event for the trigger to use. It's recursive
2924          * safe and will not be recorded anywhere.
2925          */
2926         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2927                 *current_rb = temp_buffer;
2928                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2929                                                     trace_ctx);
2930         }
2931         return entry;
2932 }
2933 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2934
2935 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2936 static DEFINE_MUTEX(tracepoint_printk_mutex);
2937
2938 static void output_printk(struct trace_event_buffer *fbuffer)
2939 {
2940         struct trace_event_call *event_call;
2941         struct trace_event_file *file;
2942         struct trace_event *event;
2943         unsigned long flags;
2944         struct trace_iterator *iter = tracepoint_print_iter;
2945
2946         /* We should never get here if iter is NULL */
2947         if (WARN_ON_ONCE(!iter))
2948                 return;
2949
2950         event_call = fbuffer->trace_file->event_call;
2951         if (!event_call || !event_call->event.funcs ||
2952             !event_call->event.funcs->trace)
2953                 return;
2954
2955         file = fbuffer->trace_file;
2956         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2957             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2958              !filter_match_preds(file->filter, fbuffer->entry)))
2959                 return;
2960
2961         event = &fbuffer->trace_file->event_call->event;
2962
2963         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2964         trace_seq_init(&iter->seq);
2965         iter->ent = fbuffer->entry;
2966         event_call->event.funcs->trace(iter, 0, event);
2967         trace_seq_putc(&iter->seq, 0);
2968         printk("%s", iter->seq.buffer);
2969
2970         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2971 }
2972
2973 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2974                              void *buffer, size_t *lenp,
2975                              loff_t *ppos)
2976 {
2977         int save_tracepoint_printk;
2978         int ret;
2979
2980         mutex_lock(&tracepoint_printk_mutex);
2981         save_tracepoint_printk = tracepoint_printk;
2982
2983         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2984
2985         /*
2986          * This will force exiting early, as tracepoint_printk
2987          * is always zero when tracepoint_printk_iter is not allocated
2988          */
2989         if (!tracepoint_print_iter)
2990                 tracepoint_printk = 0;
2991
2992         if (save_tracepoint_printk == tracepoint_printk)
2993                 goto out;
2994
2995         if (tracepoint_printk)
2996                 static_key_enable(&tracepoint_printk_key.key);
2997         else
2998                 static_key_disable(&tracepoint_printk_key.key);
2999
3000  out:
3001         mutex_unlock(&tracepoint_printk_mutex);
3002
3003         return ret;
3004 }
3005
3006 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
3007 {
3008         enum event_trigger_type tt = ETT_NONE;
3009         struct trace_event_file *file = fbuffer->trace_file;
3010
3011         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
3012                         fbuffer->entry, &tt))
3013                 goto discard;
3014
3015         if (static_key_false(&tracepoint_printk_key.key))
3016                 output_printk(fbuffer);
3017
3018         if (static_branch_unlikely(&trace_event_exports_enabled))
3019                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3020
3021         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3022                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3023
3024 discard:
3025         if (tt)
3026                 event_triggers_post_call(file, tt);
3027
3028 }
3029 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3030
3031 /*
3032  * Skip 3:
3033  *
3034  *   trace_buffer_unlock_commit_regs()
3035  *   trace_event_buffer_commit()
3036  *   trace_event_raw_event_xxx()
3037  */
3038 # define STACK_SKIP 3
3039
3040 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3041                                      struct trace_buffer *buffer,
3042                                      struct ring_buffer_event *event,
3043                                      unsigned int trace_ctx,
3044                                      struct pt_regs *regs)
3045 {
3046         __buffer_unlock_commit(buffer, event);
3047
3048         /*
3049          * If regs is not set, then skip the necessary functions.
3050          * Note, we can still get here via blktrace, wakeup tracer
3051          * and mmiotrace, but that's ok if they lose a function or
3052          * two. They are not that meaningful.
3053          */
3054         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3055         ftrace_trace_userstack(tr, buffer, trace_ctx);
3056 }
3057
3058 /*
3059  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3060  */
3061 void
3062 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3063                                    struct ring_buffer_event *event)
3064 {
3065         __buffer_unlock_commit(buffer, event);
3066 }
3067
3068 void
3069 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3070                parent_ip, unsigned int trace_ctx)
3071 {
3072         struct trace_event_call *call = &event_function;
3073         struct trace_buffer *buffer = tr->array_buffer.buffer;
3074         struct ring_buffer_event *event;
3075         struct ftrace_entry *entry;
3076
3077         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3078                                             trace_ctx);
3079         if (!event)
3080                 return;
3081         entry   = ring_buffer_event_data(event);
3082         entry->ip                       = ip;
3083         entry->parent_ip                = parent_ip;
3084
3085         if (!call_filter_check_discard(call, entry, buffer, event)) {
3086                 if (static_branch_unlikely(&trace_function_exports_enabled))
3087                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3088                 __buffer_unlock_commit(buffer, event);
3089         }
3090 }
3091
3092 #ifdef CONFIG_STACKTRACE
3093
3094 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3095 #define FTRACE_KSTACK_NESTING   4
3096
3097 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3098
3099 struct ftrace_stack {
3100         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3101 };
3102
3103
3104 struct ftrace_stacks {
3105         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3106 };
3107
3108 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3109 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3110
3111 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3112                                  unsigned int trace_ctx,
3113                                  int skip, struct pt_regs *regs)
3114 {
3115         struct trace_event_call *call = &event_kernel_stack;
3116         struct ring_buffer_event *event;
3117         unsigned int size, nr_entries;
3118         struct ftrace_stack *fstack;
3119         struct stack_entry *entry;
3120         int stackidx;
3121
3122         /*
3123          * Add one, for this function and the call to save_stack_trace()
3124          * If regs is set, then these functions will not be in the way.
3125          */
3126 #ifndef CONFIG_UNWINDER_ORC
3127         if (!regs)
3128                 skip++;
3129 #endif
3130
3131         preempt_disable_notrace();
3132
3133         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3134
3135         /* This should never happen. If it does, yell once and skip */
3136         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3137                 goto out;
3138
3139         /*
3140          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3141          * interrupt will either see the value pre increment or post
3142          * increment. If the interrupt happens pre increment it will have
3143          * restored the counter when it returns.  We just need a barrier to
3144          * keep gcc from moving things around.
3145          */
3146         barrier();
3147
3148         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3149         size = ARRAY_SIZE(fstack->calls);
3150
3151         if (regs) {
3152                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3153                                                    size, skip);
3154         } else {
3155                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3156         }
3157
3158         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3159                                     struct_size(entry, caller, nr_entries),
3160                                     trace_ctx);
3161         if (!event)
3162                 goto out;
3163         entry = ring_buffer_event_data(event);
3164
3165         entry->size = nr_entries;
3166         memcpy(&entry->caller, fstack->calls,
3167                flex_array_size(entry, caller, nr_entries));
3168
3169         if (!call_filter_check_discard(call, entry, buffer, event))
3170                 __buffer_unlock_commit(buffer, event);
3171
3172  out:
3173         /* Again, don't let gcc optimize things here */
3174         barrier();
3175         __this_cpu_dec(ftrace_stack_reserve);
3176         preempt_enable_notrace();
3177
3178 }
3179
3180 static inline void ftrace_trace_stack(struct trace_array *tr,
3181                                       struct trace_buffer *buffer,
3182                                       unsigned int trace_ctx,
3183                                       int skip, struct pt_regs *regs)
3184 {
3185         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3186                 return;
3187
3188         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3189 }
3190
3191 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3192                    int skip)
3193 {
3194         struct trace_buffer *buffer = tr->array_buffer.buffer;
3195
3196         if (rcu_is_watching()) {
3197                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3198                 return;
3199         }
3200
3201         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3202                 return;
3203
3204         /*
3205          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3206          * but if the above rcu_is_watching() failed, then the NMI
3207          * triggered someplace critical, and ct_irq_enter() should
3208          * not be called from NMI.
3209          */
3210         if (unlikely(in_nmi()))
3211                 return;
3212
3213         ct_irq_enter_irqson();
3214         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3215         ct_irq_exit_irqson();
3216 }
3217
3218 /**
3219  * trace_dump_stack - record a stack back trace in the trace buffer
3220  * @skip: Number of functions to skip (helper handlers)
3221  */
3222 void trace_dump_stack(int skip)
3223 {
3224         if (tracing_disabled || tracing_selftest_running)
3225                 return;
3226
3227 #ifndef CONFIG_UNWINDER_ORC
3228         /* Skip 1 to skip this function. */
3229         skip++;
3230 #endif
3231         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3232                              tracing_gen_ctx(), skip, NULL);
3233 }
3234 EXPORT_SYMBOL_GPL(trace_dump_stack);
3235
3236 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3237 static DEFINE_PER_CPU(int, user_stack_count);
3238
3239 static void
3240 ftrace_trace_userstack(struct trace_array *tr,
3241                        struct trace_buffer *buffer, unsigned int trace_ctx)
3242 {
3243         struct trace_event_call *call = &event_user_stack;
3244         struct ring_buffer_event *event;
3245         struct userstack_entry *entry;
3246
3247         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3248                 return;
3249
3250         /*
3251          * NMIs can not handle page faults, even with fix ups.
3252          * The save user stack can (and often does) fault.
3253          */
3254         if (unlikely(in_nmi()))
3255                 return;
3256
3257         /*
3258          * prevent recursion, since the user stack tracing may
3259          * trigger other kernel events.
3260          */
3261         preempt_disable();
3262         if (__this_cpu_read(user_stack_count))
3263                 goto out;
3264
3265         __this_cpu_inc(user_stack_count);
3266
3267         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3268                                             sizeof(*entry), trace_ctx);
3269         if (!event)
3270                 goto out_drop_count;
3271         entry   = ring_buffer_event_data(event);
3272
3273         entry->tgid             = current->tgid;
3274         memset(&entry->caller, 0, sizeof(entry->caller));
3275
3276         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3277         if (!call_filter_check_discard(call, entry, buffer, event))
3278                 __buffer_unlock_commit(buffer, event);
3279
3280  out_drop_count:
3281         __this_cpu_dec(user_stack_count);
3282  out:
3283         preempt_enable();
3284 }
3285 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3286 static void ftrace_trace_userstack(struct trace_array *tr,
3287                                    struct trace_buffer *buffer,
3288                                    unsigned int trace_ctx)
3289 {
3290 }
3291 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3292
3293 #endif /* CONFIG_STACKTRACE */
3294
3295 static inline void
3296 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3297                           unsigned long long delta)
3298 {
3299         entry->bottom_delta_ts = delta & U32_MAX;
3300         entry->top_delta_ts = (delta >> 32);
3301 }
3302
3303 void trace_last_func_repeats(struct trace_array *tr,
3304                              struct trace_func_repeats *last_info,
3305                              unsigned int trace_ctx)
3306 {
3307         struct trace_buffer *buffer = tr->array_buffer.buffer;
3308         struct func_repeats_entry *entry;
3309         struct ring_buffer_event *event;
3310         u64 delta;
3311
3312         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3313                                             sizeof(*entry), trace_ctx);
3314         if (!event)
3315                 return;
3316
3317         delta = ring_buffer_event_time_stamp(buffer, event) -
3318                 last_info->ts_last_call;
3319
3320         entry = ring_buffer_event_data(event);
3321         entry->ip = last_info->ip;
3322         entry->parent_ip = last_info->parent_ip;
3323         entry->count = last_info->count;
3324         func_repeats_set_delta_ts(entry, delta);
3325
3326         __buffer_unlock_commit(buffer, event);
3327 }
3328
3329 /* created for use with alloc_percpu */
3330 struct trace_buffer_struct {
3331         int nesting;
3332         char buffer[4][TRACE_BUF_SIZE];
3333 };
3334
3335 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3336
3337 /*
3338  * This allows for lockless recording.  If we're nested too deeply, then
3339  * this returns NULL.
3340  */
3341 static char *get_trace_buf(void)
3342 {
3343         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3344
3345         if (!trace_percpu_buffer || buffer->nesting >= 4)
3346                 return NULL;
3347
3348         buffer->nesting++;
3349
3350         /* Interrupts must see nesting incremented before we use the buffer */
3351         barrier();
3352         return &buffer->buffer[buffer->nesting - 1][0];
3353 }
3354
3355 static void put_trace_buf(void)
3356 {
3357         /* Don't let the decrement of nesting leak before this */
3358         barrier();
3359         this_cpu_dec(trace_percpu_buffer->nesting);
3360 }
3361
3362 static int alloc_percpu_trace_buffer(void)
3363 {
3364         struct trace_buffer_struct __percpu *buffers;
3365
3366         if (trace_percpu_buffer)
3367                 return 0;
3368
3369         buffers = alloc_percpu(struct trace_buffer_struct);
3370         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3371                 return -ENOMEM;
3372
3373         trace_percpu_buffer = buffers;
3374         return 0;
3375 }
3376
3377 static int buffers_allocated;
3378
3379 void trace_printk_init_buffers(void)
3380 {
3381         if (buffers_allocated)
3382                 return;
3383
3384         if (alloc_percpu_trace_buffer())
3385                 return;
3386
3387         /* trace_printk() is for debug use only. Don't use it in production. */
3388
3389         pr_warn("\n");
3390         pr_warn("**********************************************************\n");
3391         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3392         pr_warn("**                                                      **\n");
3393         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3394         pr_warn("**                                                      **\n");
3395         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3396         pr_warn("** unsafe for production use.                           **\n");
3397         pr_warn("**                                                      **\n");
3398         pr_warn("** If you see this message and you are not debugging    **\n");
3399         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3400         pr_warn("**                                                      **\n");
3401         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3402         pr_warn("**********************************************************\n");
3403
3404         /* Expand the buffers to set size */
3405         tracing_update_buffers();
3406
3407         buffers_allocated = 1;
3408
3409         /*
3410          * trace_printk_init_buffers() can be called by modules.
3411          * If that happens, then we need to start cmdline recording
3412          * directly here. If the global_trace.buffer is already
3413          * allocated here, then this was called by module code.
3414          */
3415         if (global_trace.array_buffer.buffer)
3416                 tracing_start_cmdline_record();
3417 }
3418 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3419
3420 void trace_printk_start_comm(void)
3421 {
3422         /* Start tracing comms if trace printk is set */
3423         if (!buffers_allocated)
3424                 return;
3425         tracing_start_cmdline_record();
3426 }
3427
3428 static void trace_printk_start_stop_comm(int enabled)
3429 {
3430         if (!buffers_allocated)
3431                 return;
3432
3433         if (enabled)
3434                 tracing_start_cmdline_record();
3435         else
3436                 tracing_stop_cmdline_record();
3437 }
3438
3439 /**
3440  * trace_vbprintk - write binary msg to tracing buffer
3441  * @ip:    The address of the caller
3442  * @fmt:   The string format to write to the buffer
3443  * @args:  Arguments for @fmt
3444  */
3445 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3446 {
3447         struct trace_event_call *call = &event_bprint;
3448         struct ring_buffer_event *event;
3449         struct trace_buffer *buffer;
3450         struct trace_array *tr = &global_trace;
3451         struct bprint_entry *entry;
3452         unsigned int trace_ctx;
3453         char *tbuffer;
3454         int len = 0, size;
3455
3456         if (unlikely(tracing_selftest_running || tracing_disabled))
3457                 return 0;
3458
3459         /* Don't pollute graph traces with trace_vprintk internals */
3460         pause_graph_tracing();
3461
3462         trace_ctx = tracing_gen_ctx();
3463         preempt_disable_notrace();
3464
3465         tbuffer = get_trace_buf();
3466         if (!tbuffer) {
3467                 len = 0;
3468                 goto out_nobuffer;
3469         }
3470
3471         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3472
3473         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3474                 goto out_put;
3475
3476         size = sizeof(*entry) + sizeof(u32) * len;
3477         buffer = tr->array_buffer.buffer;
3478         ring_buffer_nest_start(buffer);
3479         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3480                                             trace_ctx);
3481         if (!event)
3482                 goto out;
3483         entry = ring_buffer_event_data(event);
3484         entry->ip                       = ip;
3485         entry->fmt                      = fmt;
3486
3487         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3488         if (!call_filter_check_discard(call, entry, buffer, event)) {
3489                 __buffer_unlock_commit(buffer, event);
3490                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3491         }
3492
3493 out:
3494         ring_buffer_nest_end(buffer);
3495 out_put:
3496         put_trace_buf();
3497
3498 out_nobuffer:
3499         preempt_enable_notrace();
3500         unpause_graph_tracing();
3501
3502         return len;
3503 }
3504 EXPORT_SYMBOL_GPL(trace_vbprintk);
3505
3506 __printf(3, 0)
3507 static int
3508 __trace_array_vprintk(struct trace_buffer *buffer,
3509                       unsigned long ip, const char *fmt, va_list args)
3510 {
3511         struct trace_event_call *call = &event_print;
3512         struct ring_buffer_event *event;
3513         int len = 0, size;
3514         struct print_entry *entry;
3515         unsigned int trace_ctx;
3516         char *tbuffer;
3517
3518         if (tracing_disabled)
3519                 return 0;
3520
3521         /* Don't pollute graph traces with trace_vprintk internals */
3522         pause_graph_tracing();
3523
3524         trace_ctx = tracing_gen_ctx();
3525         preempt_disable_notrace();
3526
3527
3528         tbuffer = get_trace_buf();
3529         if (!tbuffer) {
3530                 len = 0;
3531                 goto out_nobuffer;
3532         }
3533
3534         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3535
3536         size = sizeof(*entry) + len + 1;
3537         ring_buffer_nest_start(buffer);
3538         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3539                                             trace_ctx);
3540         if (!event)
3541                 goto out;
3542         entry = ring_buffer_event_data(event);
3543         entry->ip = ip;
3544
3545         memcpy(&entry->buf, tbuffer, len + 1);
3546         if (!call_filter_check_discard(call, entry, buffer, event)) {
3547                 __buffer_unlock_commit(buffer, event);
3548                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3549         }
3550
3551 out:
3552         ring_buffer_nest_end(buffer);
3553         put_trace_buf();
3554
3555 out_nobuffer:
3556         preempt_enable_notrace();
3557         unpause_graph_tracing();
3558
3559         return len;
3560 }
3561
3562 __printf(3, 0)
3563 int trace_array_vprintk(struct trace_array *tr,
3564                         unsigned long ip, const char *fmt, va_list args)
3565 {
3566         if (tracing_selftest_running && tr == &global_trace)
3567                 return 0;
3568
3569         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3570 }
3571
3572 /**
3573  * trace_array_printk - Print a message to a specific instance
3574  * @tr: The instance trace_array descriptor
3575  * @ip: The instruction pointer that this is called from.
3576  * @fmt: The format to print (printf format)
3577  *
3578  * If a subsystem sets up its own instance, they have the right to
3579  * printk strings into their tracing instance buffer using this
3580  * function. Note, this function will not write into the top level
3581  * buffer (use trace_printk() for that), as writing into the top level
3582  * buffer should only have events that can be individually disabled.
3583  * trace_printk() is only used for debugging a kernel, and should not
3584  * be ever incorporated in normal use.
3585  *
3586  * trace_array_printk() can be used, as it will not add noise to the
3587  * top level tracing buffer.
3588  *
3589  * Note, trace_array_init_printk() must be called on @tr before this
3590  * can be used.
3591  */
3592 __printf(3, 0)
3593 int trace_array_printk(struct trace_array *tr,
3594                        unsigned long ip, const char *fmt, ...)
3595 {
3596         int ret;
3597         va_list ap;
3598
3599         if (!tr)
3600                 return -ENOENT;
3601
3602         /* This is only allowed for created instances */
3603         if (tr == &global_trace)
3604                 return 0;
3605
3606         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3607                 return 0;
3608
3609         va_start(ap, fmt);
3610         ret = trace_array_vprintk(tr, ip, fmt, ap);
3611         va_end(ap);
3612         return ret;
3613 }
3614 EXPORT_SYMBOL_GPL(trace_array_printk);
3615
3616 /**
3617  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3618  * @tr: The trace array to initialize the buffers for
3619  *
3620  * As trace_array_printk() only writes into instances, they are OK to
3621  * have in the kernel (unlike trace_printk()). This needs to be called
3622  * before trace_array_printk() can be used on a trace_array.
3623  */
3624 int trace_array_init_printk(struct trace_array *tr)
3625 {
3626         if (!tr)
3627                 return -ENOENT;
3628
3629         /* This is only allowed for created instances */
3630         if (tr == &global_trace)
3631                 return -EINVAL;
3632
3633         return alloc_percpu_trace_buffer();
3634 }
3635 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3636
3637 __printf(3, 4)
3638 int trace_array_printk_buf(struct trace_buffer *buffer,
3639                            unsigned long ip, const char *fmt, ...)
3640 {
3641         int ret;
3642         va_list ap;
3643
3644         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3645                 return 0;
3646
3647         va_start(ap, fmt);
3648         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3649         va_end(ap);
3650         return ret;
3651 }
3652
3653 __printf(2, 0)
3654 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3655 {
3656         return trace_array_vprintk(&global_trace, ip, fmt, args);
3657 }
3658 EXPORT_SYMBOL_GPL(trace_vprintk);
3659
3660 static void trace_iterator_increment(struct trace_iterator *iter)
3661 {
3662         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3663
3664         iter->idx++;
3665         if (buf_iter)
3666                 ring_buffer_iter_advance(buf_iter);
3667 }
3668
3669 static struct trace_entry *
3670 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3671                 unsigned long *lost_events)
3672 {
3673         struct ring_buffer_event *event;
3674         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3675
3676         if (buf_iter) {
3677                 event = ring_buffer_iter_peek(buf_iter, ts);
3678                 if (lost_events)
3679                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3680                                 (unsigned long)-1 : 0;
3681         } else {
3682                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3683                                          lost_events);
3684         }
3685
3686         if (event) {
3687                 iter->ent_size = ring_buffer_event_length(event);
3688                 return ring_buffer_event_data(event);
3689         }
3690         iter->ent_size = 0;
3691         return NULL;
3692 }
3693
3694 static struct trace_entry *
3695 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3696                   unsigned long *missing_events, u64 *ent_ts)
3697 {
3698         struct trace_buffer *buffer = iter->array_buffer->buffer;
3699         struct trace_entry *ent, *next = NULL;
3700         unsigned long lost_events = 0, next_lost = 0;
3701         int cpu_file = iter->cpu_file;
3702         u64 next_ts = 0, ts;
3703         int next_cpu = -1;
3704         int next_size = 0;
3705         int cpu;
3706
3707         /*
3708          * If we are in a per_cpu trace file, don't bother by iterating over
3709          * all cpu and peek directly.
3710          */
3711         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3712                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3713                         return NULL;
3714                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3715                 if (ent_cpu)
3716                         *ent_cpu = cpu_file;
3717
3718                 return ent;
3719         }
3720
3721         for_each_tracing_cpu(cpu) {
3722
3723                 if (ring_buffer_empty_cpu(buffer, cpu))
3724                         continue;
3725
3726                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3727
3728                 /*
3729                  * Pick the entry with the smallest timestamp:
3730                  */
3731                 if (ent && (!next || ts < next_ts)) {
3732                         next = ent;
3733                         next_cpu = cpu;
3734                         next_ts = ts;
3735                         next_lost = lost_events;
3736                         next_size = iter->ent_size;
3737                 }
3738         }
3739
3740         iter->ent_size = next_size;
3741
3742         if (ent_cpu)
3743                 *ent_cpu = next_cpu;
3744
3745         if (ent_ts)
3746                 *ent_ts = next_ts;
3747
3748         if (missing_events)
3749                 *missing_events = next_lost;
3750
3751         return next;
3752 }
3753
3754 #define STATIC_FMT_BUF_SIZE     128
3755 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3756
3757 char *trace_iter_expand_format(struct trace_iterator *iter)
3758 {
3759         char *tmp;
3760
3761         /*
3762          * iter->tr is NULL when used with tp_printk, which makes
3763          * this get called where it is not safe to call krealloc().
3764          */
3765         if (!iter->tr || iter->fmt == static_fmt_buf)
3766                 return NULL;
3767
3768         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3769                        GFP_KERNEL);
3770         if (tmp) {
3771                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3772                 iter->fmt = tmp;
3773         }
3774
3775         return tmp;
3776 }
3777
3778 /* Returns true if the string is safe to dereference from an event */
3779 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3780                            bool star, int len)
3781 {
3782         unsigned long addr = (unsigned long)str;
3783         struct trace_event *trace_event;
3784         struct trace_event_call *event;
3785
3786         /* Ignore strings with no length */
3787         if (star && !len)
3788                 return true;
3789
3790         /* OK if part of the event data */
3791         if ((addr >= (unsigned long)iter->ent) &&
3792             (addr < (unsigned long)iter->ent + iter->ent_size))
3793                 return true;
3794
3795         /* OK if part of the temp seq buffer */
3796         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3797             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3798                 return true;
3799
3800         /* Core rodata can not be freed */
3801         if (is_kernel_rodata(addr))
3802                 return true;
3803
3804         if (trace_is_tracepoint_string(str))
3805                 return true;
3806
3807         /*
3808          * Now this could be a module event, referencing core module
3809          * data, which is OK.
3810          */
3811         if (!iter->ent)
3812                 return false;
3813
3814         trace_event = ftrace_find_event(iter->ent->type);
3815         if (!trace_event)
3816                 return false;
3817
3818         event = container_of(trace_event, struct trace_event_call, event);
3819         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3820                 return false;
3821
3822         /* Would rather have rodata, but this will suffice */
3823         if (within_module_core(addr, event->module))
3824                 return true;
3825
3826         return false;
3827 }
3828
3829 static const char *show_buffer(struct trace_seq *s)
3830 {
3831         struct seq_buf *seq = &s->seq;
3832
3833         seq_buf_terminate(seq);
3834
3835         return seq->buffer;
3836 }
3837
3838 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3839
3840 static int test_can_verify_check(const char *fmt, ...)
3841 {
3842         char buf[16];
3843         va_list ap;
3844         int ret;
3845
3846         /*
3847          * The verifier is dependent on vsnprintf() modifies the va_list
3848          * passed to it, where it is sent as a reference. Some architectures
3849          * (like x86_32) passes it by value, which means that vsnprintf()
3850          * does not modify the va_list passed to it, and the verifier
3851          * would then need to be able to understand all the values that
3852          * vsnprintf can use. If it is passed by value, then the verifier
3853          * is disabled.
3854          */
3855         va_start(ap, fmt);
3856         vsnprintf(buf, 16, "%d", ap);
3857         ret = va_arg(ap, int);
3858         va_end(ap);
3859
3860         return ret;
3861 }
3862
3863 static void test_can_verify(void)
3864 {
3865         if (!test_can_verify_check("%d %d", 0, 1)) {
3866                 pr_info("trace event string verifier disabled\n");
3867                 static_branch_inc(&trace_no_verify);
3868         }
3869 }
3870
3871 /**
3872  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3873  * @iter: The iterator that holds the seq buffer and the event being printed
3874  * @fmt: The format used to print the event
3875  * @ap: The va_list holding the data to print from @fmt.
3876  *
3877  * This writes the data into the @iter->seq buffer using the data from
3878  * @fmt and @ap. If the format has a %s, then the source of the string
3879  * is examined to make sure it is safe to print, otherwise it will
3880  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3881  * pointer.
3882  */
3883 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3884                          va_list ap)
3885 {
3886         const char *p = fmt;
3887         const char *str;
3888         int i, j;
3889
3890         if (WARN_ON_ONCE(!fmt))
3891                 return;
3892
3893         if (static_branch_unlikely(&trace_no_verify))
3894                 goto print;
3895
3896         /* Don't bother checking when doing a ftrace_dump() */
3897         if (iter->fmt == static_fmt_buf)
3898                 goto print;
3899
3900         while (*p) {
3901                 bool star = false;
3902                 int len = 0;
3903
3904                 j = 0;
3905
3906                 /* We only care about %s and variants */
3907                 for (i = 0; p[i]; i++) {
3908                         if (i + 1 >= iter->fmt_size) {
3909                                 /*
3910                                  * If we can't expand the copy buffer,
3911                                  * just print it.
3912                                  */
3913                                 if (!trace_iter_expand_format(iter))
3914                                         goto print;
3915                         }
3916
3917                         if (p[i] == '\\' && p[i+1]) {
3918                                 i++;
3919                                 continue;
3920                         }
3921                         if (p[i] == '%') {
3922                                 /* Need to test cases like %08.*s */
3923                                 for (j = 1; p[i+j]; j++) {
3924                                         if (isdigit(p[i+j]) ||
3925                                             p[i+j] == '.')
3926                                                 continue;
3927                                         if (p[i+j] == '*') {
3928                                                 star = true;
3929                                                 continue;
3930                                         }
3931                                         break;
3932                                 }
3933                                 if (p[i+j] == 's')
3934                                         break;
3935                                 star = false;
3936                         }
3937                         j = 0;
3938                 }
3939                 /* If no %s found then just print normally */
3940                 if (!p[i])
3941                         break;
3942
3943                 /* Copy up to the %s, and print that */
3944                 strncpy(iter->fmt, p, i);
3945                 iter->fmt[i] = '\0';
3946                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3947
3948                 /*
3949                  * If iter->seq is full, the above call no longer guarantees
3950                  * that ap is in sync with fmt processing, and further calls
3951                  * to va_arg() can return wrong positional arguments.
3952                  *
3953                  * Ensure that ap is no longer used in this case.
3954                  */
3955                 if (iter->seq.full) {
3956                         p = "";
3957                         break;
3958                 }
3959
3960                 if (star)
3961                         len = va_arg(ap, int);
3962
3963                 /* The ap now points to the string data of the %s */
3964                 str = va_arg(ap, const char *);
3965
3966                 /*
3967                  * If you hit this warning, it is likely that the
3968                  * trace event in question used %s on a string that
3969                  * was saved at the time of the event, but may not be
3970                  * around when the trace is read. Use __string(),
3971                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3972                  * instead. See samples/trace_events/trace-events-sample.h
3973                  * for reference.
3974                  */
3975                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3976                               "fmt: '%s' current_buffer: '%s'",
3977                               fmt, show_buffer(&iter->seq))) {
3978                         int ret;
3979
3980                         /* Try to safely read the string */
3981                         if (star) {
3982                                 if (len + 1 > iter->fmt_size)
3983                                         len = iter->fmt_size - 1;
3984                                 if (len < 0)
3985                                         len = 0;
3986                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3987                                 iter->fmt[len] = 0;
3988                                 star = false;
3989                         } else {
3990                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3991                                                                   iter->fmt_size);
3992                         }
3993                         if (ret < 0)
3994                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3995                         else
3996                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3997                                                  str, iter->fmt);
3998                         str = "[UNSAFE-MEMORY]";
3999                         strcpy(iter->fmt, "%s");
4000                 } else {
4001                         strncpy(iter->fmt, p + i, j + 1);
4002                         iter->fmt[j+1] = '\0';
4003                 }
4004                 if (star)
4005                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
4006                 else
4007                         trace_seq_printf(&iter->seq, iter->fmt, str);
4008
4009                 p += i + j + 1;
4010         }
4011  print:
4012         if (*p)
4013                 trace_seq_vprintf(&iter->seq, p, ap);
4014 }
4015
4016 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
4017 {
4018         const char *p, *new_fmt;
4019         char *q;
4020
4021         if (WARN_ON_ONCE(!fmt))
4022                 return fmt;
4023
4024         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
4025                 return fmt;
4026
4027         p = fmt;
4028         new_fmt = q = iter->fmt;
4029         while (*p) {
4030                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4031                         if (!trace_iter_expand_format(iter))
4032                                 return fmt;
4033
4034                         q += iter->fmt - new_fmt;
4035                         new_fmt = iter->fmt;
4036                 }
4037
4038                 *q++ = *p++;
4039
4040                 /* Replace %p with %px */
4041                 if (p[-1] == '%') {
4042                         if (p[0] == '%') {
4043                                 *q++ = *p++;
4044                         } else if (p[0] == 'p' && !isalnum(p[1])) {
4045                                 *q++ = *p++;
4046                                 *q++ = 'x';
4047                         }
4048                 }
4049         }
4050         *q = '\0';
4051
4052         return new_fmt;
4053 }
4054
4055 #define STATIC_TEMP_BUF_SIZE    128
4056 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4057
4058 /* Find the next real entry, without updating the iterator itself */
4059 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4060                                           int *ent_cpu, u64 *ent_ts)
4061 {
4062         /* __find_next_entry will reset ent_size */
4063         int ent_size = iter->ent_size;
4064         struct trace_entry *entry;
4065
4066         /*
4067          * If called from ftrace_dump(), then the iter->temp buffer
4068          * will be the static_temp_buf and not created from kmalloc.
4069          * If the entry size is greater than the buffer, we can
4070          * not save it. Just return NULL in that case. This is only
4071          * used to add markers when two consecutive events' time
4072          * stamps have a large delta. See trace_print_lat_context()
4073          */
4074         if (iter->temp == static_temp_buf &&
4075             STATIC_TEMP_BUF_SIZE < ent_size)
4076                 return NULL;
4077
4078         /*
4079          * The __find_next_entry() may call peek_next_entry(), which may
4080          * call ring_buffer_peek() that may make the contents of iter->ent
4081          * undefined. Need to copy iter->ent now.
4082          */
4083         if (iter->ent && iter->ent != iter->temp) {
4084                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4085                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4086                         void *temp;
4087                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4088                         if (!temp)
4089                                 return NULL;
4090                         kfree(iter->temp);
4091                         iter->temp = temp;
4092                         iter->temp_size = iter->ent_size;
4093                 }
4094                 memcpy(iter->temp, iter->ent, iter->ent_size);
4095                 iter->ent = iter->temp;
4096         }
4097         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4098         /* Put back the original ent_size */
4099         iter->ent_size = ent_size;
4100
4101         return entry;
4102 }
4103
4104 /* Find the next real entry, and increment the iterator to the next entry */
4105 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4106 {
4107         iter->ent = __find_next_entry(iter, &iter->cpu,
4108                                       &iter->lost_events, &iter->ts);
4109
4110         if (iter->ent)
4111                 trace_iterator_increment(iter);
4112
4113         return iter->ent ? iter : NULL;
4114 }
4115
4116 static void trace_consume(struct trace_iterator *iter)
4117 {
4118         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4119                             &iter->lost_events);
4120 }
4121
4122 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4123 {
4124         struct trace_iterator *iter = m->private;
4125         int i = (int)*pos;
4126         void *ent;
4127
4128         WARN_ON_ONCE(iter->leftover);
4129
4130         (*pos)++;
4131
4132         /* can't go backwards */
4133         if (iter->idx > i)
4134                 return NULL;
4135
4136         if (iter->idx < 0)
4137                 ent = trace_find_next_entry_inc(iter);
4138         else
4139                 ent = iter;
4140
4141         while (ent && iter->idx < i)
4142                 ent = trace_find_next_entry_inc(iter);
4143
4144         iter->pos = *pos;
4145
4146         return ent;
4147 }
4148
4149 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4150 {
4151         struct ring_buffer_iter *buf_iter;
4152         unsigned long entries = 0;
4153         u64 ts;
4154
4155         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4156
4157         buf_iter = trace_buffer_iter(iter, cpu);
4158         if (!buf_iter)
4159                 return;
4160
4161         ring_buffer_iter_reset(buf_iter);
4162
4163         /*
4164          * We could have the case with the max latency tracers
4165          * that a reset never took place on a cpu. This is evident
4166          * by the timestamp being before the start of the buffer.
4167          */
4168         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4169                 if (ts >= iter->array_buffer->time_start)
4170                         break;
4171                 entries++;
4172                 ring_buffer_iter_advance(buf_iter);
4173         }
4174
4175         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4176 }
4177
4178 /*
4179  * The current tracer is copied to avoid a global locking
4180  * all around.
4181  */
4182 static void *s_start(struct seq_file *m, loff_t *pos)
4183 {
4184         struct trace_iterator *iter = m->private;
4185         struct trace_array *tr = iter->tr;
4186         int cpu_file = iter->cpu_file;
4187         void *p = NULL;
4188         loff_t l = 0;
4189         int cpu;
4190
4191         mutex_lock(&trace_types_lock);
4192         if (unlikely(tr->current_trace != iter->trace)) {
4193                 /* Close iter->trace before switching to the new current tracer */
4194                 if (iter->trace->close)
4195                         iter->trace->close(iter);
4196                 iter->trace = tr->current_trace;
4197                 /* Reopen the new current tracer */
4198                 if (iter->trace->open)
4199                         iter->trace->open(iter);
4200         }
4201         mutex_unlock(&trace_types_lock);
4202
4203 #ifdef CONFIG_TRACER_MAX_TRACE
4204         if (iter->snapshot && iter->trace->use_max_tr)
4205                 return ERR_PTR(-EBUSY);
4206 #endif
4207
4208         if (*pos != iter->pos) {
4209                 iter->ent = NULL;
4210                 iter->cpu = 0;
4211                 iter->idx = -1;
4212
4213                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4214                         for_each_tracing_cpu(cpu)
4215                                 tracing_iter_reset(iter, cpu);
4216                 } else
4217                         tracing_iter_reset(iter, cpu_file);
4218
4219                 iter->leftover = 0;
4220                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4221                         ;
4222
4223         } else {
4224                 /*
4225                  * If we overflowed the seq_file before, then we want
4226                  * to just reuse the trace_seq buffer again.
4227                  */
4228                 if (iter->leftover)
4229                         p = iter;
4230                 else {
4231                         l = *pos - 1;
4232                         p = s_next(m, p, &l);
4233                 }
4234         }
4235
4236         trace_event_read_lock();
4237         trace_access_lock(cpu_file);
4238         return p;
4239 }
4240
4241 static void s_stop(struct seq_file *m, void *p)
4242 {
4243         struct trace_iterator *iter = m->private;
4244
4245 #ifdef CONFIG_TRACER_MAX_TRACE
4246         if (iter->snapshot && iter->trace->use_max_tr)
4247                 return;
4248 #endif
4249
4250         trace_access_unlock(iter->cpu_file);
4251         trace_event_read_unlock();
4252 }
4253
4254 static void
4255 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4256                       unsigned long *entries, int cpu)
4257 {
4258         unsigned long count;
4259
4260         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4261         /*
4262          * If this buffer has skipped entries, then we hold all
4263          * entries for the trace and we need to ignore the
4264          * ones before the time stamp.
4265          */
4266         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4267                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4268                 /* total is the same as the entries */
4269                 *total = count;
4270         } else
4271                 *total = count +
4272                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4273         *entries = count;
4274 }
4275
4276 static void
4277 get_total_entries(struct array_buffer *buf,
4278                   unsigned long *total, unsigned long *entries)
4279 {
4280         unsigned long t, e;
4281         int cpu;
4282
4283         *total = 0;
4284         *entries = 0;
4285
4286         for_each_tracing_cpu(cpu) {
4287                 get_total_entries_cpu(buf, &t, &e, cpu);
4288                 *total += t;
4289                 *entries += e;
4290         }
4291 }
4292
4293 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4294 {
4295         unsigned long total, entries;
4296
4297         if (!tr)
4298                 tr = &global_trace;
4299
4300         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4301
4302         return entries;
4303 }
4304
4305 unsigned long trace_total_entries(struct trace_array *tr)
4306 {
4307         unsigned long total, entries;
4308
4309         if (!tr)
4310                 tr = &global_trace;
4311
4312         get_total_entries(&tr->array_buffer, &total, &entries);
4313
4314         return entries;
4315 }
4316
4317 static void print_lat_help_header(struct seq_file *m)
4318 {
4319         seq_puts(m, "#                    _------=> CPU#            \n"
4320                     "#                   / _-----=> irqs-off/BH-disabled\n"
4321                     "#                  | / _----=> need-resched    \n"
4322                     "#                  || / _---=> hardirq/softirq \n"
4323                     "#                  ||| / _--=> preempt-depth   \n"
4324                     "#                  |||| / _-=> migrate-disable \n"
4325                     "#                  ||||| /     delay           \n"
4326                     "#  cmd     pid     |||||| time  |   caller     \n"
4327                     "#     \\   /        ||||||  \\    |    /       \n");
4328 }
4329
4330 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4331 {
4332         unsigned long total;
4333         unsigned long entries;
4334
4335         get_total_entries(buf, &total, &entries);
4336         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4337                    entries, total, num_online_cpus());
4338         seq_puts(m, "#\n");
4339 }
4340
4341 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4342                                    unsigned int flags)
4343 {
4344         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4345
4346         print_event_info(buf, m);
4347
4348         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4349         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4350 }
4351
4352 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4353                                        unsigned int flags)
4354 {
4355         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4356         static const char space[] = "            ";
4357         int prec = tgid ? 12 : 2;
4358
4359         print_event_info(buf, m);
4360
4361         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4362         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4363         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4364         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4365         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4366         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4367         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4368         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4369 }
4370
4371 void
4372 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4373 {
4374         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4375         struct array_buffer *buf = iter->array_buffer;
4376         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4377         struct tracer *type = iter->trace;
4378         unsigned long entries;
4379         unsigned long total;
4380         const char *name = type->name;
4381
4382         get_total_entries(buf, &total, &entries);
4383
4384         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4385                    name, UTS_RELEASE);
4386         seq_puts(m, "# -----------------------------------"
4387                  "---------------------------------\n");
4388         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4389                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4390                    nsecs_to_usecs(data->saved_latency),
4391                    entries,
4392                    total,
4393                    buf->cpu,
4394                    preempt_model_none()      ? "server" :
4395                    preempt_model_voluntary() ? "desktop" :
4396                    preempt_model_full()      ? "preempt" :
4397                    preempt_model_rt()        ? "preempt_rt" :
4398                    "unknown",
4399                    /* These are reserved for later use */
4400                    0, 0, 0, 0);
4401 #ifdef CONFIG_SMP
4402         seq_printf(m, " #P:%d)\n", num_online_cpus());
4403 #else
4404         seq_puts(m, ")\n");
4405 #endif
4406         seq_puts(m, "#    -----------------\n");
4407         seq_printf(m, "#    | task: %.16s-%d "
4408                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4409                    data->comm, data->pid,
4410                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4411                    data->policy, data->rt_priority);
4412         seq_puts(m, "#    -----------------\n");
4413
4414         if (data->critical_start) {
4415                 seq_puts(m, "#  => started at: ");
4416                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4417                 trace_print_seq(m, &iter->seq);
4418                 seq_puts(m, "\n#  => ended at:   ");
4419                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4420                 trace_print_seq(m, &iter->seq);
4421                 seq_puts(m, "\n#\n");
4422         }
4423
4424         seq_puts(m, "#\n");
4425 }
4426
4427 static void test_cpu_buff_start(struct trace_iterator *iter)
4428 {
4429         struct trace_seq *s = &iter->seq;
4430         struct trace_array *tr = iter->tr;
4431
4432         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4433                 return;
4434
4435         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4436                 return;
4437
4438         if (cpumask_available(iter->started) &&
4439             cpumask_test_cpu(iter->cpu, iter->started))
4440                 return;
4441
4442         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4443                 return;
4444
4445         if (cpumask_available(iter->started))
4446                 cpumask_set_cpu(iter->cpu, iter->started);
4447
4448         /* Don't print started cpu buffer for the first entry of the trace */
4449         if (iter->idx > 1)
4450                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4451                                 iter->cpu);
4452 }
4453
4454 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4455 {
4456         struct trace_array *tr = iter->tr;
4457         struct trace_seq *s = &iter->seq;
4458         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4459         struct trace_entry *entry;
4460         struct trace_event *event;
4461
4462         entry = iter->ent;
4463
4464         test_cpu_buff_start(iter);
4465
4466         event = ftrace_find_event(entry->type);
4467
4468         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4469                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4470                         trace_print_lat_context(iter);
4471                 else
4472                         trace_print_context(iter);
4473         }
4474
4475         if (trace_seq_has_overflowed(s))
4476                 return TRACE_TYPE_PARTIAL_LINE;
4477
4478         if (event) {
4479                 if (tr->trace_flags & TRACE_ITER_FIELDS)
4480                         return print_event_fields(iter, event);
4481                 return event->funcs->trace(iter, sym_flags, event);
4482         }
4483
4484         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4485
4486         return trace_handle_return(s);
4487 }
4488
4489 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4490 {
4491         struct trace_array *tr = iter->tr;
4492         struct trace_seq *s = &iter->seq;
4493         struct trace_entry *entry;
4494         struct trace_event *event;
4495
4496         entry = iter->ent;
4497
4498         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4499                 trace_seq_printf(s, "%d %d %llu ",
4500                                  entry->pid, iter->cpu, iter->ts);
4501
4502         if (trace_seq_has_overflowed(s))
4503                 return TRACE_TYPE_PARTIAL_LINE;
4504
4505         event = ftrace_find_event(entry->type);
4506         if (event)
4507                 return event->funcs->raw(iter, 0, event);
4508
4509         trace_seq_printf(s, "%d ?\n", entry->type);
4510
4511         return trace_handle_return(s);
4512 }
4513
4514 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4515 {
4516         struct trace_array *tr = iter->tr;
4517         struct trace_seq *s = &iter->seq;
4518         unsigned char newline = '\n';
4519         struct trace_entry *entry;
4520         struct trace_event *event;
4521
4522         entry = iter->ent;
4523
4524         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4525                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4526                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4527                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4528                 if (trace_seq_has_overflowed(s))
4529                         return TRACE_TYPE_PARTIAL_LINE;
4530         }
4531
4532         event = ftrace_find_event(entry->type);
4533         if (event) {
4534                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4535                 if (ret != TRACE_TYPE_HANDLED)
4536                         return ret;
4537         }
4538
4539         SEQ_PUT_FIELD(s, newline);
4540
4541         return trace_handle_return(s);
4542 }
4543
4544 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4545 {
4546         struct trace_array *tr = iter->tr;
4547         struct trace_seq *s = &iter->seq;
4548         struct trace_entry *entry;
4549         struct trace_event *event;
4550
4551         entry = iter->ent;
4552
4553         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4554                 SEQ_PUT_FIELD(s, entry->pid);
4555                 SEQ_PUT_FIELD(s, iter->cpu);
4556                 SEQ_PUT_FIELD(s, iter->ts);
4557                 if (trace_seq_has_overflowed(s))
4558                         return TRACE_TYPE_PARTIAL_LINE;
4559         }
4560
4561         event = ftrace_find_event(entry->type);
4562         return event ? event->funcs->binary(iter, 0, event) :
4563                 TRACE_TYPE_HANDLED;
4564 }
4565
4566 int trace_empty(struct trace_iterator *iter)
4567 {
4568         struct ring_buffer_iter *buf_iter;
4569         int cpu;
4570
4571         /* If we are looking at one CPU buffer, only check that one */
4572         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4573                 cpu = iter->cpu_file;
4574                 buf_iter = trace_buffer_iter(iter, cpu);
4575                 if (buf_iter) {
4576                         if (!ring_buffer_iter_empty(buf_iter))
4577                                 return 0;
4578                 } else {
4579                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4580                                 return 0;
4581                 }
4582                 return 1;
4583         }
4584
4585         for_each_tracing_cpu(cpu) {
4586                 buf_iter = trace_buffer_iter(iter, cpu);
4587                 if (buf_iter) {
4588                         if (!ring_buffer_iter_empty(buf_iter))
4589                                 return 0;
4590                 } else {
4591                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4592                                 return 0;
4593                 }
4594         }
4595
4596         return 1;
4597 }
4598
4599 /*  Called with trace_event_read_lock() held. */
4600 enum print_line_t print_trace_line(struct trace_iterator *iter)
4601 {
4602         struct trace_array *tr = iter->tr;
4603         unsigned long trace_flags = tr->trace_flags;
4604         enum print_line_t ret;
4605
4606         if (iter->lost_events) {
4607                 if (iter->lost_events == (unsigned long)-1)
4608                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4609                                          iter->cpu);
4610                 else
4611                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4612                                          iter->cpu, iter->lost_events);
4613                 if (trace_seq_has_overflowed(&iter->seq))
4614                         return TRACE_TYPE_PARTIAL_LINE;
4615         }
4616
4617         if (iter->trace && iter->trace->print_line) {
4618                 ret = iter->trace->print_line(iter);
4619                 if (ret != TRACE_TYPE_UNHANDLED)
4620                         return ret;
4621         }
4622
4623         if (iter->ent->type == TRACE_BPUTS &&
4624                         trace_flags & TRACE_ITER_PRINTK &&
4625                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4626                 return trace_print_bputs_msg_only(iter);
4627
4628         if (iter->ent->type == TRACE_BPRINT &&
4629                         trace_flags & TRACE_ITER_PRINTK &&
4630                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4631                 return trace_print_bprintk_msg_only(iter);
4632
4633         if (iter->ent->type == TRACE_PRINT &&
4634                         trace_flags & TRACE_ITER_PRINTK &&
4635                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4636                 return trace_print_printk_msg_only(iter);
4637
4638         if (trace_flags & TRACE_ITER_BIN)
4639                 return print_bin_fmt(iter);
4640
4641         if (trace_flags & TRACE_ITER_HEX)
4642                 return print_hex_fmt(iter);
4643
4644         if (trace_flags & TRACE_ITER_RAW)
4645                 return print_raw_fmt(iter);
4646
4647         return print_trace_fmt(iter);
4648 }
4649
4650 void trace_latency_header(struct seq_file *m)
4651 {
4652         struct trace_iterator *iter = m->private;
4653         struct trace_array *tr = iter->tr;
4654
4655         /* print nothing if the buffers are empty */
4656         if (trace_empty(iter))
4657                 return;
4658
4659         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4660                 print_trace_header(m, iter);
4661
4662         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4663                 print_lat_help_header(m);
4664 }
4665
4666 void trace_default_header(struct seq_file *m)
4667 {
4668         struct trace_iterator *iter = m->private;
4669         struct trace_array *tr = iter->tr;
4670         unsigned long trace_flags = tr->trace_flags;
4671
4672         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4673                 return;
4674
4675         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4676                 /* print nothing if the buffers are empty */
4677                 if (trace_empty(iter))
4678                         return;
4679                 print_trace_header(m, iter);
4680                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4681                         print_lat_help_header(m);
4682         } else {
4683                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4684                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4685                                 print_func_help_header_irq(iter->array_buffer,
4686                                                            m, trace_flags);
4687                         else
4688                                 print_func_help_header(iter->array_buffer, m,
4689                                                        trace_flags);
4690                 }
4691         }
4692 }
4693
4694 static void test_ftrace_alive(struct seq_file *m)
4695 {
4696         if (!ftrace_is_dead())
4697                 return;
4698         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4699                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4700 }
4701
4702 #ifdef CONFIG_TRACER_MAX_TRACE
4703 static void show_snapshot_main_help(struct seq_file *m)
4704 {
4705         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4706                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4707                     "#                      Takes a snapshot of the main buffer.\n"
4708                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4709                     "#                      (Doesn't have to be '2' works with any number that\n"
4710                     "#                       is not a '0' or '1')\n");
4711 }
4712
4713 static void show_snapshot_percpu_help(struct seq_file *m)
4714 {
4715         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4716 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4717         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4718                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4719 #else
4720         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4721                     "#                     Must use main snapshot file to allocate.\n");
4722 #endif
4723         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4724                     "#                      (Doesn't have to be '2' works with any number that\n"
4725                     "#                       is not a '0' or '1')\n");
4726 }
4727
4728 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4729 {
4730         if (iter->tr->allocated_snapshot)
4731                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4732         else
4733                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4734
4735         seq_puts(m, "# Snapshot commands:\n");
4736         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4737                 show_snapshot_main_help(m);
4738         else
4739                 show_snapshot_percpu_help(m);
4740 }
4741 #else
4742 /* Should never be called */
4743 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4744 #endif
4745
4746 static int s_show(struct seq_file *m, void *v)
4747 {
4748         struct trace_iterator *iter = v;
4749         int ret;
4750
4751         if (iter->ent == NULL) {
4752                 if (iter->tr) {
4753                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4754                         seq_puts(m, "#\n");
4755                         test_ftrace_alive(m);
4756                 }
4757                 if (iter->snapshot && trace_empty(iter))
4758                         print_snapshot_help(m, iter);
4759                 else if (iter->trace && iter->trace->print_header)
4760                         iter->trace->print_header(m);
4761                 else
4762                         trace_default_header(m);
4763
4764         } else if (iter->leftover) {
4765                 /*
4766                  * If we filled the seq_file buffer earlier, we
4767                  * want to just show it now.
4768                  */
4769                 ret = trace_print_seq(m, &iter->seq);
4770
4771                 /* ret should this time be zero, but you never know */
4772                 iter->leftover = ret;
4773
4774         } else {
4775                 print_trace_line(iter);
4776                 ret = trace_print_seq(m, &iter->seq);
4777                 /*
4778                  * If we overflow the seq_file buffer, then it will
4779                  * ask us for this data again at start up.
4780                  * Use that instead.
4781                  *  ret is 0 if seq_file write succeeded.
4782                  *        -1 otherwise.
4783                  */
4784                 iter->leftover = ret;
4785         }
4786
4787         return 0;
4788 }
4789
4790 /*
4791  * Should be used after trace_array_get(), trace_types_lock
4792  * ensures that i_cdev was already initialized.
4793  */
4794 static inline int tracing_get_cpu(struct inode *inode)
4795 {
4796         if (inode->i_cdev) /* See trace_create_cpu_file() */
4797                 return (long)inode->i_cdev - 1;
4798         return RING_BUFFER_ALL_CPUS;
4799 }
4800
4801 static const struct seq_operations tracer_seq_ops = {
4802         .start          = s_start,
4803         .next           = s_next,
4804         .stop           = s_stop,
4805         .show           = s_show,
4806 };
4807
4808 /*
4809  * Note, as iter itself can be allocated and freed in different
4810  * ways, this function is only used to free its content, and not
4811  * the iterator itself. The only requirement to all the allocations
4812  * is that it must zero all fields (kzalloc), as freeing works with
4813  * ethier allocated content or NULL.
4814  */
4815 static void free_trace_iter_content(struct trace_iterator *iter)
4816 {
4817         /* The fmt is either NULL, allocated or points to static_fmt_buf */
4818         if (iter->fmt != static_fmt_buf)
4819                 kfree(iter->fmt);
4820
4821         kfree(iter->temp);
4822         kfree(iter->buffer_iter);
4823         mutex_destroy(&iter->mutex);
4824         free_cpumask_var(iter->started);
4825 }
4826
4827 static struct trace_iterator *
4828 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4829 {
4830         struct trace_array *tr = inode->i_private;
4831         struct trace_iterator *iter;
4832         int cpu;
4833
4834         if (tracing_disabled)
4835                 return ERR_PTR(-ENODEV);
4836
4837         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4838         if (!iter)
4839                 return ERR_PTR(-ENOMEM);
4840
4841         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4842                                     GFP_KERNEL);
4843         if (!iter->buffer_iter)
4844                 goto release;
4845
4846         /*
4847          * trace_find_next_entry() may need to save off iter->ent.
4848          * It will place it into the iter->temp buffer. As most
4849          * events are less than 128, allocate a buffer of that size.
4850          * If one is greater, then trace_find_next_entry() will
4851          * allocate a new buffer to adjust for the bigger iter->ent.
4852          * It's not critical if it fails to get allocated here.
4853          */
4854         iter->temp = kmalloc(128, GFP_KERNEL);
4855         if (iter->temp)
4856                 iter->temp_size = 128;
4857
4858         /*
4859          * trace_event_printf() may need to modify given format
4860          * string to replace %p with %px so that it shows real address
4861          * instead of hash value. However, that is only for the event
4862          * tracing, other tracer may not need. Defer the allocation
4863          * until it is needed.
4864          */
4865         iter->fmt = NULL;
4866         iter->fmt_size = 0;
4867
4868         mutex_lock(&trace_types_lock);
4869         iter->trace = tr->current_trace;
4870
4871         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4872                 goto fail;
4873
4874         iter->tr = tr;
4875
4876 #ifdef CONFIG_TRACER_MAX_TRACE
4877         /* Currently only the top directory has a snapshot */
4878         if (tr->current_trace->print_max || snapshot)
4879                 iter->array_buffer = &tr->max_buffer;
4880         else
4881 #endif
4882                 iter->array_buffer = &tr->array_buffer;
4883         iter->snapshot = snapshot;
4884         iter->pos = -1;
4885         iter->cpu_file = tracing_get_cpu(inode);
4886         mutex_init(&iter->mutex);
4887
4888         /* Notify the tracer early; before we stop tracing. */
4889         if (iter->trace->open)
4890                 iter->trace->open(iter);
4891
4892         /* Annotate start of buffers if we had overruns */
4893         if (ring_buffer_overruns(iter->array_buffer->buffer))
4894                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4895
4896         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4897         if (trace_clocks[tr->clock_id].in_ns)
4898                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4899
4900         /*
4901          * If pause-on-trace is enabled, then stop the trace while
4902          * dumping, unless this is the "snapshot" file
4903          */
4904         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4905                 tracing_stop_tr(tr);
4906
4907         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4908                 for_each_tracing_cpu(cpu) {
4909                         iter->buffer_iter[cpu] =
4910                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4911                                                          cpu, GFP_KERNEL);
4912                 }
4913                 ring_buffer_read_prepare_sync();
4914                 for_each_tracing_cpu(cpu) {
4915                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4916                         tracing_iter_reset(iter, cpu);
4917                 }
4918         } else {
4919                 cpu = iter->cpu_file;
4920                 iter->buffer_iter[cpu] =
4921                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4922                                                  cpu, GFP_KERNEL);
4923                 ring_buffer_read_prepare_sync();
4924                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4925                 tracing_iter_reset(iter, cpu);
4926         }
4927
4928         mutex_unlock(&trace_types_lock);
4929
4930         return iter;
4931
4932  fail:
4933         mutex_unlock(&trace_types_lock);
4934         free_trace_iter_content(iter);
4935 release:
4936         seq_release_private(inode, file);
4937         return ERR_PTR(-ENOMEM);
4938 }
4939
4940 int tracing_open_generic(struct inode *inode, struct file *filp)
4941 {
4942         int ret;
4943
4944         ret = tracing_check_open_get_tr(NULL);
4945         if (ret)
4946                 return ret;
4947
4948         filp->private_data = inode->i_private;
4949         return 0;
4950 }
4951
4952 bool tracing_is_disabled(void)
4953 {
4954         return (tracing_disabled) ? true: false;
4955 }
4956
4957 /*
4958  * Open and update trace_array ref count.
4959  * Must have the current trace_array passed to it.
4960  */
4961 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4962 {
4963         struct trace_array *tr = inode->i_private;
4964         int ret;
4965
4966         ret = tracing_check_open_get_tr(tr);
4967         if (ret)
4968                 return ret;
4969
4970         filp->private_data = inode->i_private;
4971
4972         return 0;
4973 }
4974
4975 /*
4976  * The private pointer of the inode is the trace_event_file.
4977  * Update the tr ref count associated to it.
4978  */
4979 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4980 {
4981         struct trace_event_file *file = inode->i_private;
4982         int ret;
4983
4984         ret = tracing_check_open_get_tr(file->tr);
4985         if (ret)
4986                 return ret;
4987
4988         mutex_lock(&event_mutex);
4989
4990         /* Fail if the file is marked for removal */
4991         if (file->flags & EVENT_FILE_FL_FREED) {
4992                 trace_array_put(file->tr);
4993                 ret = -ENODEV;
4994         } else {
4995                 event_file_get(file);
4996         }
4997
4998         mutex_unlock(&event_mutex);
4999         if (ret)
5000                 return ret;
5001
5002         filp->private_data = inode->i_private;
5003
5004         return 0;
5005 }
5006
5007 int tracing_release_file_tr(struct inode *inode, struct file *filp)
5008 {
5009         struct trace_event_file *file = inode->i_private;
5010
5011         trace_array_put(file->tr);
5012         event_file_put(file);
5013
5014         return 0;
5015 }
5016
5017 static int tracing_mark_open(struct inode *inode, struct file *filp)
5018 {
5019         stream_open(inode, filp);
5020         return tracing_open_generic_tr(inode, filp);
5021 }
5022
5023 static int tracing_release(struct inode *inode, struct file *file)
5024 {
5025         struct trace_array *tr = inode->i_private;
5026         struct seq_file *m = file->private_data;
5027         struct trace_iterator *iter;
5028         int cpu;
5029
5030         if (!(file->f_mode & FMODE_READ)) {
5031                 trace_array_put(tr);
5032                 return 0;
5033         }
5034
5035         /* Writes do not use seq_file */
5036         iter = m->private;
5037         mutex_lock(&trace_types_lock);
5038
5039         for_each_tracing_cpu(cpu) {
5040                 if (iter->buffer_iter[cpu])
5041                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
5042         }
5043
5044         if (iter->trace && iter->trace->close)
5045                 iter->trace->close(iter);
5046
5047         if (!iter->snapshot && tr->stop_count)
5048                 /* reenable tracing if it was previously enabled */
5049                 tracing_start_tr(tr);
5050
5051         __trace_array_put(tr);
5052
5053         mutex_unlock(&trace_types_lock);
5054
5055         free_trace_iter_content(iter);
5056         seq_release_private(inode, file);
5057
5058         return 0;
5059 }
5060
5061 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
5062 {
5063         struct trace_array *tr = inode->i_private;
5064
5065         trace_array_put(tr);
5066         return 0;
5067 }
5068
5069 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5070 {
5071         struct trace_array *tr = inode->i_private;
5072
5073         trace_array_put(tr);
5074
5075         return single_release(inode, file);
5076 }
5077
5078 static int tracing_open(struct inode *inode, struct file *file)
5079 {
5080         struct trace_array *tr = inode->i_private;
5081         struct trace_iterator *iter;
5082         int ret;
5083
5084         ret = tracing_check_open_get_tr(tr);
5085         if (ret)
5086                 return ret;
5087
5088         /* If this file was open for write, then erase contents */
5089         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5090                 int cpu = tracing_get_cpu(inode);
5091                 struct array_buffer *trace_buf = &tr->array_buffer;
5092
5093 #ifdef CONFIG_TRACER_MAX_TRACE
5094                 if (tr->current_trace->print_max)
5095                         trace_buf = &tr->max_buffer;
5096 #endif
5097
5098                 if (cpu == RING_BUFFER_ALL_CPUS)
5099                         tracing_reset_online_cpus(trace_buf);
5100                 else
5101                         tracing_reset_cpu(trace_buf, cpu);
5102         }
5103
5104         if (file->f_mode & FMODE_READ) {
5105                 iter = __tracing_open(inode, file, false);
5106                 if (IS_ERR(iter))
5107                         ret = PTR_ERR(iter);
5108                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5109                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
5110         }
5111
5112         if (ret < 0)
5113                 trace_array_put(tr);
5114
5115         return ret;
5116 }
5117
5118 /*
5119  * Some tracers are not suitable for instance buffers.
5120  * A tracer is always available for the global array (toplevel)
5121  * or if it explicitly states that it is.
5122  */
5123 static bool
5124 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5125 {
5126         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5127 }
5128
5129 /* Find the next tracer that this trace array may use */
5130 static struct tracer *
5131 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5132 {
5133         while (t && !trace_ok_for_array(t, tr))
5134                 t = t->next;
5135
5136         return t;
5137 }
5138
5139 static void *
5140 t_next(struct seq_file *m, void *v, loff_t *pos)
5141 {
5142         struct trace_array *tr = m->private;
5143         struct tracer *t = v;
5144
5145         (*pos)++;
5146
5147         if (t)
5148                 t = get_tracer_for_array(tr, t->next);
5149
5150         return t;
5151 }
5152
5153 static void *t_start(struct seq_file *m, loff_t *pos)
5154 {
5155         struct trace_array *tr = m->private;
5156         struct tracer *t;
5157         loff_t l = 0;
5158
5159         mutex_lock(&trace_types_lock);
5160
5161         t = get_tracer_for_array(tr, trace_types);
5162         for (; t && l < *pos; t = t_next(m, t, &l))
5163                         ;
5164
5165         return t;
5166 }
5167
5168 static void t_stop(struct seq_file *m, void *p)
5169 {
5170         mutex_unlock(&trace_types_lock);
5171 }
5172
5173 static int t_show(struct seq_file *m, void *v)
5174 {
5175         struct tracer *t = v;
5176
5177         if (!t)
5178                 return 0;
5179
5180         seq_puts(m, t->name);
5181         if (t->next)
5182                 seq_putc(m, ' ');
5183         else
5184                 seq_putc(m, '\n');
5185
5186         return 0;
5187 }
5188
5189 static const struct seq_operations show_traces_seq_ops = {
5190         .start          = t_start,
5191         .next           = t_next,
5192         .stop           = t_stop,
5193         .show           = t_show,
5194 };
5195
5196 static int show_traces_open(struct inode *inode, struct file *file)
5197 {
5198         struct trace_array *tr = inode->i_private;
5199         struct seq_file *m;
5200         int ret;
5201
5202         ret = tracing_check_open_get_tr(tr);
5203         if (ret)
5204                 return ret;
5205
5206         ret = seq_open(file, &show_traces_seq_ops);
5207         if (ret) {
5208                 trace_array_put(tr);
5209                 return ret;
5210         }
5211
5212         m = file->private_data;
5213         m->private = tr;
5214
5215         return 0;
5216 }
5217
5218 static int show_traces_release(struct inode *inode, struct file *file)
5219 {
5220         struct trace_array *tr = inode->i_private;
5221
5222         trace_array_put(tr);
5223         return seq_release(inode, file);
5224 }
5225
5226 static ssize_t
5227 tracing_write_stub(struct file *filp, const char __user *ubuf,
5228                    size_t count, loff_t *ppos)
5229 {
5230         return count;
5231 }
5232
5233 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5234 {
5235         int ret;
5236
5237         if (file->f_mode & FMODE_READ)
5238                 ret = seq_lseek(file, offset, whence);
5239         else
5240                 file->f_pos = ret = 0;
5241
5242         return ret;
5243 }
5244
5245 static const struct file_operations tracing_fops = {
5246         .open           = tracing_open,
5247         .read           = seq_read,
5248         .read_iter      = seq_read_iter,
5249         .splice_read    = copy_splice_read,
5250         .write          = tracing_write_stub,
5251         .llseek         = tracing_lseek,
5252         .release        = tracing_release,
5253 };
5254
5255 static const struct file_operations show_traces_fops = {
5256         .open           = show_traces_open,
5257         .read           = seq_read,
5258         .llseek         = seq_lseek,
5259         .release        = show_traces_release,
5260 };
5261
5262 static ssize_t
5263 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5264                      size_t count, loff_t *ppos)
5265 {
5266         struct trace_array *tr = file_inode(filp)->i_private;
5267         char *mask_str;
5268         int len;
5269
5270         len = snprintf(NULL, 0, "%*pb\n",
5271                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5272         mask_str = kmalloc(len, GFP_KERNEL);
5273         if (!mask_str)
5274                 return -ENOMEM;
5275
5276         len = snprintf(mask_str, len, "%*pb\n",
5277                        cpumask_pr_args(tr->tracing_cpumask));
5278         if (len >= count) {
5279                 count = -EINVAL;
5280                 goto out_err;
5281         }
5282         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5283
5284 out_err:
5285         kfree(mask_str);
5286
5287         return count;
5288 }
5289
5290 int tracing_set_cpumask(struct trace_array *tr,
5291                         cpumask_var_t tracing_cpumask_new)
5292 {
5293         int cpu;
5294
5295         if (!tr)
5296                 return -EINVAL;
5297
5298         local_irq_disable();
5299         arch_spin_lock(&tr->max_lock);
5300         for_each_tracing_cpu(cpu) {
5301                 /*
5302                  * Increase/decrease the disabled counter if we are
5303                  * about to flip a bit in the cpumask:
5304                  */
5305                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5306                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5307                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5308                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5309 #ifdef CONFIG_TRACER_MAX_TRACE
5310                         ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5311 #endif
5312                 }
5313                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5314                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5315                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5316                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5317 #ifdef CONFIG_TRACER_MAX_TRACE
5318                         ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5319 #endif
5320                 }
5321         }
5322         arch_spin_unlock(&tr->max_lock);
5323         local_irq_enable();
5324
5325         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5326
5327         return 0;
5328 }
5329
5330 static ssize_t
5331 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5332                       size_t count, loff_t *ppos)
5333 {
5334         struct trace_array *tr = file_inode(filp)->i_private;
5335         cpumask_var_t tracing_cpumask_new;
5336         int err;
5337
5338         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5339                 return -ENOMEM;
5340
5341         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5342         if (err)
5343                 goto err_free;
5344
5345         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5346         if (err)
5347                 goto err_free;
5348
5349         free_cpumask_var(tracing_cpumask_new);
5350
5351         return count;
5352
5353 err_free:
5354         free_cpumask_var(tracing_cpumask_new);
5355
5356         return err;
5357 }
5358
5359 static const struct file_operations tracing_cpumask_fops = {
5360         .open           = tracing_open_generic_tr,
5361         .read           = tracing_cpumask_read,
5362         .write          = tracing_cpumask_write,
5363         .release        = tracing_release_generic_tr,
5364         .llseek         = generic_file_llseek,
5365 };
5366
5367 static int tracing_trace_options_show(struct seq_file *m, void *v)
5368 {
5369         struct tracer_opt *trace_opts;
5370         struct trace_array *tr = m->private;
5371         u32 tracer_flags;
5372         int i;
5373
5374         mutex_lock(&trace_types_lock);
5375         tracer_flags = tr->current_trace->flags->val;
5376         trace_opts = tr->current_trace->flags->opts;
5377
5378         for (i = 0; trace_options[i]; i++) {
5379                 if (tr->trace_flags & (1 << i))
5380                         seq_printf(m, "%s\n", trace_options[i]);
5381                 else
5382                         seq_printf(m, "no%s\n", trace_options[i]);
5383         }
5384
5385         for (i = 0; trace_opts[i].name; i++) {
5386                 if (tracer_flags & trace_opts[i].bit)
5387                         seq_printf(m, "%s\n", trace_opts[i].name);
5388                 else
5389                         seq_printf(m, "no%s\n", trace_opts[i].name);
5390         }
5391         mutex_unlock(&trace_types_lock);
5392
5393         return 0;
5394 }
5395
5396 static int __set_tracer_option(struct trace_array *tr,
5397                                struct tracer_flags *tracer_flags,
5398                                struct tracer_opt *opts, int neg)
5399 {
5400         struct tracer *trace = tracer_flags->trace;
5401         int ret;
5402
5403         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5404         if (ret)
5405                 return ret;
5406
5407         if (neg)
5408                 tracer_flags->val &= ~opts->bit;
5409         else
5410                 tracer_flags->val |= opts->bit;
5411         return 0;
5412 }
5413
5414 /* Try to assign a tracer specific option */
5415 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5416 {
5417         struct tracer *trace = tr->current_trace;
5418         struct tracer_flags *tracer_flags = trace->flags;
5419         struct tracer_opt *opts = NULL;
5420         int i;
5421
5422         for (i = 0; tracer_flags->opts[i].name; i++) {
5423                 opts = &tracer_flags->opts[i];
5424
5425                 if (strcmp(cmp, opts->name) == 0)
5426                         return __set_tracer_option(tr, trace->flags, opts, neg);
5427         }
5428
5429         return -EINVAL;
5430 }
5431
5432 /* Some tracers require overwrite to stay enabled */
5433 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5434 {
5435         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5436                 return -1;
5437
5438         return 0;
5439 }
5440
5441 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5442 {
5443         int *map;
5444
5445         if ((mask == TRACE_ITER_RECORD_TGID) ||
5446             (mask == TRACE_ITER_RECORD_CMD))
5447                 lockdep_assert_held(&event_mutex);
5448
5449         /* do nothing if flag is already set */
5450         if (!!(tr->trace_flags & mask) == !!enabled)
5451                 return 0;
5452
5453         /* Give the tracer a chance to approve the change */
5454         if (tr->current_trace->flag_changed)
5455                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5456                         return -EINVAL;
5457
5458         if (enabled)
5459                 tr->trace_flags |= mask;
5460         else
5461                 tr->trace_flags &= ~mask;
5462
5463         if (mask == TRACE_ITER_RECORD_CMD)
5464                 trace_event_enable_cmd_record(enabled);
5465
5466         if (mask == TRACE_ITER_RECORD_TGID) {
5467                 if (!tgid_map) {
5468                         tgid_map_max = pid_max;
5469                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5470                                        GFP_KERNEL);
5471
5472                         /*
5473                          * Pairs with smp_load_acquire() in
5474                          * trace_find_tgid_ptr() to ensure that if it observes
5475                          * the tgid_map we just allocated then it also observes
5476                          * the corresponding tgid_map_max value.
5477                          */
5478                         smp_store_release(&tgid_map, map);
5479                 }
5480                 if (!tgid_map) {
5481                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5482                         return -ENOMEM;
5483                 }
5484
5485                 trace_event_enable_tgid_record(enabled);
5486         }
5487
5488         if (mask == TRACE_ITER_EVENT_FORK)
5489                 trace_event_follow_fork(tr, enabled);
5490
5491         if (mask == TRACE_ITER_FUNC_FORK)
5492                 ftrace_pid_follow_fork(tr, enabled);
5493
5494         if (mask == TRACE_ITER_OVERWRITE) {
5495                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5496 #ifdef CONFIG_TRACER_MAX_TRACE
5497                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5498 #endif
5499         }
5500
5501         if (mask == TRACE_ITER_PRINTK) {
5502                 trace_printk_start_stop_comm(enabled);
5503                 trace_printk_control(enabled);
5504         }
5505
5506         return 0;
5507 }
5508
5509 int trace_set_options(struct trace_array *tr, char *option)
5510 {
5511         char *cmp;
5512         int neg = 0;
5513         int ret;
5514         size_t orig_len = strlen(option);
5515         int len;
5516
5517         cmp = strstrip(option);
5518
5519         len = str_has_prefix(cmp, "no");
5520         if (len)
5521                 neg = 1;
5522
5523         cmp += len;
5524
5525         mutex_lock(&event_mutex);
5526         mutex_lock(&trace_types_lock);
5527
5528         ret = match_string(trace_options, -1, cmp);
5529         /* If no option could be set, test the specific tracer options */
5530         if (ret < 0)
5531                 ret = set_tracer_option(tr, cmp, neg);
5532         else
5533                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5534
5535         mutex_unlock(&trace_types_lock);
5536         mutex_unlock(&event_mutex);
5537
5538         /*
5539          * If the first trailing whitespace is replaced with '\0' by strstrip,
5540          * turn it back into a space.
5541          */
5542         if (orig_len > strlen(option))
5543                 option[strlen(option)] = ' ';
5544
5545         return ret;
5546 }
5547
5548 static void __init apply_trace_boot_options(void)
5549 {
5550         char *buf = trace_boot_options_buf;
5551         char *option;
5552
5553         while (true) {
5554                 option = strsep(&buf, ",");
5555
5556                 if (!option)
5557                         break;
5558
5559                 if (*option)
5560                         trace_set_options(&global_trace, option);
5561
5562                 /* Put back the comma to allow this to be called again */
5563                 if (buf)
5564                         *(buf - 1) = ',';
5565         }
5566 }
5567
5568 static ssize_t
5569 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5570                         size_t cnt, loff_t *ppos)
5571 {
5572         struct seq_file *m = filp->private_data;
5573         struct trace_array *tr = m->private;
5574         char buf[64];
5575         int ret;
5576
5577         if (cnt >= sizeof(buf))
5578                 return -EINVAL;
5579
5580         if (copy_from_user(buf, ubuf, cnt))
5581                 return -EFAULT;
5582
5583         buf[cnt] = 0;
5584
5585         ret = trace_set_options(tr, buf);
5586         if (ret < 0)
5587                 return ret;
5588
5589         *ppos += cnt;
5590
5591         return cnt;
5592 }
5593
5594 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5595 {
5596         struct trace_array *tr = inode->i_private;
5597         int ret;
5598
5599         ret = tracing_check_open_get_tr(tr);
5600         if (ret)
5601                 return ret;
5602
5603         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5604         if (ret < 0)
5605                 trace_array_put(tr);
5606
5607         return ret;
5608 }
5609
5610 static const struct file_operations tracing_iter_fops = {
5611         .open           = tracing_trace_options_open,
5612         .read           = seq_read,
5613         .llseek         = seq_lseek,
5614         .release        = tracing_single_release_tr,
5615         .write          = tracing_trace_options_write,
5616 };
5617
5618 static const char readme_msg[] =
5619         "tracing mini-HOWTO:\n\n"
5620         "# echo 0 > tracing_on : quick way to disable tracing\n"
5621         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5622         " Important files:\n"
5623         "  trace\t\t\t- The static contents of the buffer\n"
5624         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5625         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5626         "  current_tracer\t- function and latency tracers\n"
5627         "  available_tracers\t- list of configured tracers for current_tracer\n"
5628         "  error_log\t- error log for failed commands (that support it)\n"
5629         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5630         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5631         "  trace_clock\t\t- change the clock used to order events\n"
5632         "       local:   Per cpu clock but may not be synced across CPUs\n"
5633         "      global:   Synced across CPUs but slows tracing down.\n"
5634         "     counter:   Not a clock, but just an increment\n"
5635         "      uptime:   Jiffy counter from time of boot\n"
5636         "        perf:   Same clock that perf events use\n"
5637 #ifdef CONFIG_X86_64
5638         "     x86-tsc:   TSC cycle counter\n"
5639 #endif
5640         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5641         "       delta:   Delta difference against a buffer-wide timestamp\n"
5642         "    absolute:   Absolute (standalone) timestamp\n"
5643         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5644         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5645         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5646         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5647         "\t\t\t  Remove sub-buffer with rmdir\n"
5648         "  trace_options\t\t- Set format or modify how tracing happens\n"
5649         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5650         "\t\t\t  option name\n"
5651         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5652 #ifdef CONFIG_DYNAMIC_FTRACE
5653         "\n  available_filter_functions - list of functions that can be filtered on\n"
5654         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5655         "\t\t\t  functions\n"
5656         "\t     accepts: func_full_name or glob-matching-pattern\n"
5657         "\t     modules: Can select a group via module\n"
5658         "\t      Format: :mod:<module-name>\n"
5659         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5660         "\t    triggers: a command to perform when function is hit\n"
5661         "\t      Format: <function>:<trigger>[:count]\n"
5662         "\t     trigger: traceon, traceoff\n"
5663         "\t\t      enable_event:<system>:<event>\n"
5664         "\t\t      disable_event:<system>:<event>\n"
5665 #ifdef CONFIG_STACKTRACE
5666         "\t\t      stacktrace\n"
5667 #endif
5668 #ifdef CONFIG_TRACER_SNAPSHOT
5669         "\t\t      snapshot\n"
5670 #endif
5671         "\t\t      dump\n"
5672         "\t\t      cpudump\n"
5673         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5674         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5675         "\t     The first one will disable tracing every time do_fault is hit\n"
5676         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5677         "\t       The first time do trap is hit and it disables tracing, the\n"
5678         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5679         "\t       the counter will not decrement. It only decrements when the\n"
5680         "\t       trigger did work\n"
5681         "\t     To remove trigger without count:\n"
5682         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5683         "\t     To remove trigger with a count:\n"
5684         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5685         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5686         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5687         "\t    modules: Can select a group via module command :mod:\n"
5688         "\t    Does not accept triggers\n"
5689 #endif /* CONFIG_DYNAMIC_FTRACE */
5690 #ifdef CONFIG_FUNCTION_TRACER
5691         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5692         "\t\t    (function)\n"
5693         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5694         "\t\t    (function)\n"
5695 #endif
5696 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5697         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5698         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5699         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5700 #endif
5701 #ifdef CONFIG_TRACER_SNAPSHOT
5702         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5703         "\t\t\t  snapshot buffer. Read the contents for more\n"
5704         "\t\t\t  information\n"
5705 #endif
5706 #ifdef CONFIG_STACK_TRACER
5707         "  stack_trace\t\t- Shows the max stack trace when active\n"
5708         "  stack_max_size\t- Shows current max stack size that was traced\n"
5709         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5710         "\t\t\t  new trace)\n"
5711 #ifdef CONFIG_DYNAMIC_FTRACE
5712         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5713         "\t\t\t  traces\n"
5714 #endif
5715 #endif /* CONFIG_STACK_TRACER */
5716 #ifdef CONFIG_DYNAMIC_EVENTS
5717         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5718         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5719 #endif
5720 #ifdef CONFIG_KPROBE_EVENTS
5721         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5722         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5723 #endif
5724 #ifdef CONFIG_UPROBE_EVENTS
5725         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5726         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5727 #endif
5728 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5729     defined(CONFIG_FPROBE_EVENTS)
5730         "\t  accepts: event-definitions (one definition per line)\n"
5731 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5732         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5733         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5734 #endif
5735 #ifdef CONFIG_FPROBE_EVENTS
5736         "\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5737         "\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5738 #endif
5739 #ifdef CONFIG_HIST_TRIGGERS
5740         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5741 #endif
5742         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5743         "\t           -:[<group>/][<event>]\n"
5744 #ifdef CONFIG_KPROBE_EVENTS
5745         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5746   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5747 #endif
5748 #ifdef CONFIG_UPROBE_EVENTS
5749   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5750 #endif
5751         "\t     args: <name>=fetcharg[:type]\n"
5752         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5753 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5754 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5755         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5756         "\t           <argname>[->field[->field|.field...]],\n"
5757 #else
5758         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5759 #endif
5760 #else
5761         "\t           $stack<index>, $stack, $retval, $comm,\n"
5762 #endif
5763         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5764         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5765         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5766         "\t           symstr, <type>\\[<array-size>\\]\n"
5767 #ifdef CONFIG_HIST_TRIGGERS
5768         "\t    field: <stype> <name>;\n"
5769         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5770         "\t           [unsigned] char/int/long\n"
5771 #endif
5772         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5773         "\t            of the <attached-group>/<attached-event>.\n"
5774 #endif
5775         "  events/\t\t- Directory containing all trace event subsystems:\n"
5776         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5777         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5778         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5779         "\t\t\t  events\n"
5780         "      filter\t\t- If set, only events passing filter are traced\n"
5781         "  events/<system>/<event>/\t- Directory containing control files for\n"
5782         "\t\t\t  <event>:\n"
5783         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5784         "      filter\t\t- If set, only events passing filter are traced\n"
5785         "      trigger\t\t- If set, a command to perform when event is hit\n"
5786         "\t    Format: <trigger>[:count][if <filter>]\n"
5787         "\t   trigger: traceon, traceoff\n"
5788         "\t            enable_event:<system>:<event>\n"
5789         "\t            disable_event:<system>:<event>\n"
5790 #ifdef CONFIG_HIST_TRIGGERS
5791         "\t            enable_hist:<system>:<event>\n"
5792         "\t            disable_hist:<system>:<event>\n"
5793 #endif
5794 #ifdef CONFIG_STACKTRACE
5795         "\t\t    stacktrace\n"
5796 #endif
5797 #ifdef CONFIG_TRACER_SNAPSHOT
5798         "\t\t    snapshot\n"
5799 #endif
5800 #ifdef CONFIG_HIST_TRIGGERS
5801         "\t\t    hist (see below)\n"
5802 #endif
5803         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5804         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5805         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5806         "\t                  events/block/block_unplug/trigger\n"
5807         "\t   The first disables tracing every time block_unplug is hit.\n"
5808         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5809         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5810         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5811         "\t   Like function triggers, the counter is only decremented if it\n"
5812         "\t    enabled or disabled tracing.\n"
5813         "\t   To remove a trigger without a count:\n"
5814         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5815         "\t   To remove a trigger with a count:\n"
5816         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5817         "\t   Filters can be ignored when removing a trigger.\n"
5818 #ifdef CONFIG_HIST_TRIGGERS
5819         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5820         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5821         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5822         "\t            [:values=<field1[,field2,...]>]\n"
5823         "\t            [:sort=<field1[,field2,...]>]\n"
5824         "\t            [:size=#entries]\n"
5825         "\t            [:pause][:continue][:clear]\n"
5826         "\t            [:name=histname1]\n"
5827         "\t            [:nohitcount]\n"
5828         "\t            [:<handler>.<action>]\n"
5829         "\t            [if <filter>]\n\n"
5830         "\t    Note, special fields can be used as well:\n"
5831         "\t            common_timestamp - to record current timestamp\n"
5832         "\t            common_cpu - to record the CPU the event happened on\n"
5833         "\n"
5834         "\t    A hist trigger variable can be:\n"
5835         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5836         "\t        - a reference to another variable e.g. y=$x,\n"
5837         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5838         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5839         "\n"
5840         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5841         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5842         "\t    variable reference, field or numeric literal.\n"
5843         "\n"
5844         "\t    When a matching event is hit, an entry is added to a hash\n"
5845         "\t    table using the key(s) and value(s) named, and the value of a\n"
5846         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5847         "\t    correspond to fields in the event's format description.  Keys\n"
5848         "\t    can be any field, or the special string 'common_stacktrace'.\n"
5849         "\t    Compound keys consisting of up to two fields can be specified\n"
5850         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5851         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5852         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5853         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5854         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5855         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5856         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5857         "\t    its histogram data will be shared with other triggers of the\n"
5858         "\t    same name, and trigger hits will update this common data.\n\n"
5859         "\t    Reading the 'hist' file for the event will dump the hash\n"
5860         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5861         "\t    triggers attached to an event, there will be a table for each\n"
5862         "\t    trigger in the output.  The table displayed for a named\n"
5863         "\t    trigger will be the same as any other instance having the\n"
5864         "\t    same name.  The default format used to display a given field\n"
5865         "\t    can be modified by appending any of the following modifiers\n"
5866         "\t    to the field name, as applicable:\n\n"
5867         "\t            .hex        display a number as a hex value\n"
5868         "\t            .sym        display an address as a symbol\n"
5869         "\t            .sym-offset display an address as a symbol and offset\n"
5870         "\t            .execname   display a common_pid as a program name\n"
5871         "\t            .syscall    display a syscall id as a syscall name\n"
5872         "\t            .log2       display log2 value rather than raw number\n"
5873         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5874         "\t            .usecs      display a common_timestamp in microseconds\n"
5875         "\t            .percent    display a number of percentage value\n"
5876         "\t            .graph      display a bar-graph of a value\n\n"
5877         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5878         "\t    trigger or to start a hist trigger but not log any events\n"
5879         "\t    until told to do so.  'continue' can be used to start or\n"
5880         "\t    restart a paused hist trigger.\n\n"
5881         "\t    The 'clear' parameter will clear the contents of a running\n"
5882         "\t    hist trigger and leave its current paused/active state\n"
5883         "\t    unchanged.\n\n"
5884         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5885         "\t    raw hitcount in the histogram.\n\n"
5886         "\t    The enable_hist and disable_hist triggers can be used to\n"
5887         "\t    have one event conditionally start and stop another event's\n"
5888         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5889         "\t    the enable_event and disable_event triggers.\n\n"
5890         "\t    Hist trigger handlers and actions are executed whenever a\n"
5891         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5892         "\t        <handler>.<action>\n\n"
5893         "\t    The available handlers are:\n\n"
5894         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5895         "\t        onmax(var)               - invoke if var exceeds current max\n"
5896         "\t        onchange(var)            - invoke action if var changes\n\n"
5897         "\t    The available actions are:\n\n"
5898         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5899         "\t        save(field,...)                      - save current event fields\n"
5900 #ifdef CONFIG_TRACER_SNAPSHOT
5901         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5902 #endif
5903 #ifdef CONFIG_SYNTH_EVENTS
5904         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5905         "\t  Write into this file to define/undefine new synthetic events.\n"
5906         "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5907 #endif
5908 #endif
5909 ;
5910
5911 static ssize_t
5912 tracing_readme_read(struct file *filp, char __user *ubuf,
5913                        size_t cnt, loff_t *ppos)
5914 {
5915         return simple_read_from_buffer(ubuf, cnt, ppos,
5916                                         readme_msg, strlen(readme_msg));
5917 }
5918
5919 static const struct file_operations tracing_readme_fops = {
5920         .open           = tracing_open_generic,
5921         .read           = tracing_readme_read,
5922         .llseek         = generic_file_llseek,
5923 };
5924
5925 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5926 {
5927         int pid = ++(*pos);
5928
5929         return trace_find_tgid_ptr(pid);
5930 }
5931
5932 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5933 {
5934         int pid = *pos;
5935
5936         return trace_find_tgid_ptr(pid);
5937 }
5938
5939 static void saved_tgids_stop(struct seq_file *m, void *v)
5940 {
5941 }
5942
5943 static int saved_tgids_show(struct seq_file *m, void *v)
5944 {
5945         int *entry = (int *)v;
5946         int pid = entry - tgid_map;
5947         int tgid = *entry;
5948
5949         if (tgid == 0)
5950                 return SEQ_SKIP;
5951
5952         seq_printf(m, "%d %d\n", pid, tgid);
5953         return 0;
5954 }
5955
5956 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5957         .start          = saved_tgids_start,
5958         .stop           = saved_tgids_stop,
5959         .next           = saved_tgids_next,
5960         .show           = saved_tgids_show,
5961 };
5962
5963 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5964 {
5965         int ret;
5966
5967         ret = tracing_check_open_get_tr(NULL);
5968         if (ret)
5969                 return ret;
5970
5971         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5972 }
5973
5974
5975 static const struct file_operations tracing_saved_tgids_fops = {
5976         .open           = tracing_saved_tgids_open,
5977         .read           = seq_read,
5978         .llseek         = seq_lseek,
5979         .release        = seq_release,
5980 };
5981
5982 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5983 {
5984         unsigned int *ptr = v;
5985
5986         if (*pos || m->count)
5987                 ptr++;
5988
5989         (*pos)++;
5990
5991         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5992              ptr++) {
5993                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5994                         continue;
5995
5996                 return ptr;
5997         }
5998
5999         return NULL;
6000 }
6001
6002 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
6003 {
6004         void *v;
6005         loff_t l = 0;
6006
6007         preempt_disable();
6008         arch_spin_lock(&trace_cmdline_lock);
6009
6010         v = &savedcmd->map_cmdline_to_pid[0];
6011         while (l <= *pos) {
6012                 v = saved_cmdlines_next(m, v, &l);
6013                 if (!v)
6014                         return NULL;
6015         }
6016
6017         return v;
6018 }
6019
6020 static void saved_cmdlines_stop(struct seq_file *m, void *v)
6021 {
6022         arch_spin_unlock(&trace_cmdline_lock);
6023         preempt_enable();
6024 }
6025
6026 static int saved_cmdlines_show(struct seq_file *m, void *v)
6027 {
6028         char buf[TASK_COMM_LEN];
6029         unsigned int *pid = v;
6030
6031         __trace_find_cmdline(*pid, buf);
6032         seq_printf(m, "%d %s\n", *pid, buf);
6033         return 0;
6034 }
6035
6036 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6037         .start          = saved_cmdlines_start,
6038         .next           = saved_cmdlines_next,
6039         .stop           = saved_cmdlines_stop,
6040         .show           = saved_cmdlines_show,
6041 };
6042
6043 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6044 {
6045         int ret;
6046
6047         ret = tracing_check_open_get_tr(NULL);
6048         if (ret)
6049                 return ret;
6050
6051         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6052 }
6053
6054 static const struct file_operations tracing_saved_cmdlines_fops = {
6055         .open           = tracing_saved_cmdlines_open,
6056         .read           = seq_read,
6057         .llseek         = seq_lseek,
6058         .release        = seq_release,
6059 };
6060
6061 static ssize_t
6062 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6063                                  size_t cnt, loff_t *ppos)
6064 {
6065         char buf[64];
6066         int r;
6067
6068         preempt_disable();
6069         arch_spin_lock(&trace_cmdline_lock);
6070         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6071         arch_spin_unlock(&trace_cmdline_lock);
6072         preempt_enable();
6073
6074         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6075 }
6076
6077 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
6078 {
6079         kfree(s->saved_cmdlines);
6080         kfree(s->map_cmdline_to_pid);
6081         kfree(s);
6082 }
6083
6084 static int tracing_resize_saved_cmdlines(unsigned int val)
6085 {
6086         struct saved_cmdlines_buffer *s, *savedcmd_temp;
6087
6088         s = kmalloc(sizeof(*s), GFP_KERNEL);
6089         if (!s)
6090                 return -ENOMEM;
6091
6092         if (allocate_cmdlines_buffer(val, s) < 0) {
6093                 kfree(s);
6094                 return -ENOMEM;
6095         }
6096
6097         preempt_disable();
6098         arch_spin_lock(&trace_cmdline_lock);
6099         savedcmd_temp = savedcmd;
6100         savedcmd = s;
6101         arch_spin_unlock(&trace_cmdline_lock);
6102         preempt_enable();
6103         free_saved_cmdlines_buffer(savedcmd_temp);
6104
6105         return 0;
6106 }
6107
6108 static ssize_t
6109 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6110                                   size_t cnt, loff_t *ppos)
6111 {
6112         unsigned long val;
6113         int ret;
6114
6115         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6116         if (ret)
6117                 return ret;
6118
6119         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
6120         if (!val || val > PID_MAX_DEFAULT)
6121                 return -EINVAL;
6122
6123         ret = tracing_resize_saved_cmdlines((unsigned int)val);
6124         if (ret < 0)
6125                 return ret;
6126
6127         *ppos += cnt;
6128
6129         return cnt;
6130 }
6131
6132 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6133         .open           = tracing_open_generic,
6134         .read           = tracing_saved_cmdlines_size_read,
6135         .write          = tracing_saved_cmdlines_size_write,
6136 };
6137
6138 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6139 static union trace_eval_map_item *
6140 update_eval_map(union trace_eval_map_item *ptr)
6141 {
6142         if (!ptr->map.eval_string) {
6143                 if (ptr->tail.next) {
6144                         ptr = ptr->tail.next;
6145                         /* Set ptr to the next real item (skip head) */
6146                         ptr++;
6147                 } else
6148                         return NULL;
6149         }
6150         return ptr;
6151 }
6152
6153 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6154 {
6155         union trace_eval_map_item *ptr = v;
6156
6157         /*
6158          * Paranoid! If ptr points to end, we don't want to increment past it.
6159          * This really should never happen.
6160          */
6161         (*pos)++;
6162         ptr = update_eval_map(ptr);
6163         if (WARN_ON_ONCE(!ptr))
6164                 return NULL;
6165
6166         ptr++;
6167         ptr = update_eval_map(ptr);
6168
6169         return ptr;
6170 }
6171
6172 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6173 {
6174         union trace_eval_map_item *v;
6175         loff_t l = 0;
6176
6177         mutex_lock(&trace_eval_mutex);
6178
6179         v = trace_eval_maps;
6180         if (v)
6181                 v++;
6182
6183         while (v && l < *pos) {
6184                 v = eval_map_next(m, v, &l);
6185         }
6186
6187         return v;
6188 }
6189
6190 static void eval_map_stop(struct seq_file *m, void *v)
6191 {
6192         mutex_unlock(&trace_eval_mutex);
6193 }
6194
6195 static int eval_map_show(struct seq_file *m, void *v)
6196 {
6197         union trace_eval_map_item *ptr = v;
6198
6199         seq_printf(m, "%s %ld (%s)\n",
6200                    ptr->map.eval_string, ptr->map.eval_value,
6201                    ptr->map.system);
6202
6203         return 0;
6204 }
6205
6206 static const struct seq_operations tracing_eval_map_seq_ops = {
6207         .start          = eval_map_start,
6208         .next           = eval_map_next,
6209         .stop           = eval_map_stop,
6210         .show           = eval_map_show,
6211 };
6212
6213 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6214 {
6215         int ret;
6216
6217         ret = tracing_check_open_get_tr(NULL);
6218         if (ret)
6219                 return ret;
6220
6221         return seq_open(filp, &tracing_eval_map_seq_ops);
6222 }
6223
6224 static const struct file_operations tracing_eval_map_fops = {
6225         .open           = tracing_eval_map_open,
6226         .read           = seq_read,
6227         .llseek         = seq_lseek,
6228         .release        = seq_release,
6229 };
6230
6231 static inline union trace_eval_map_item *
6232 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6233 {
6234         /* Return tail of array given the head */
6235         return ptr + ptr->head.length + 1;
6236 }
6237
6238 static void
6239 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6240                            int len)
6241 {
6242         struct trace_eval_map **stop;
6243         struct trace_eval_map **map;
6244         union trace_eval_map_item *map_array;
6245         union trace_eval_map_item *ptr;
6246
6247         stop = start + len;
6248
6249         /*
6250          * The trace_eval_maps contains the map plus a head and tail item,
6251          * where the head holds the module and length of array, and the
6252          * tail holds a pointer to the next list.
6253          */
6254         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6255         if (!map_array) {
6256                 pr_warn("Unable to allocate trace eval mapping\n");
6257                 return;
6258         }
6259
6260         mutex_lock(&trace_eval_mutex);
6261
6262         if (!trace_eval_maps)
6263                 trace_eval_maps = map_array;
6264         else {
6265                 ptr = trace_eval_maps;
6266                 for (;;) {
6267                         ptr = trace_eval_jmp_to_tail(ptr);
6268                         if (!ptr->tail.next)
6269                                 break;
6270                         ptr = ptr->tail.next;
6271
6272                 }
6273                 ptr->tail.next = map_array;
6274         }
6275         map_array->head.mod = mod;
6276         map_array->head.length = len;
6277         map_array++;
6278
6279         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6280                 map_array->map = **map;
6281                 map_array++;
6282         }
6283         memset(map_array, 0, sizeof(*map_array));
6284
6285         mutex_unlock(&trace_eval_mutex);
6286 }
6287
6288 static void trace_create_eval_file(struct dentry *d_tracer)
6289 {
6290         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6291                           NULL, &tracing_eval_map_fops);
6292 }
6293
6294 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6295 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6296 static inline void trace_insert_eval_map_file(struct module *mod,
6297                               struct trace_eval_map **start, int len) { }
6298 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6299
6300 static void trace_insert_eval_map(struct module *mod,
6301                                   struct trace_eval_map **start, int len)
6302 {
6303         struct trace_eval_map **map;
6304
6305         if (len <= 0)
6306                 return;
6307
6308         map = start;
6309
6310         trace_event_eval_update(map, len);
6311
6312         trace_insert_eval_map_file(mod, start, len);
6313 }
6314
6315 static ssize_t
6316 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6317                        size_t cnt, loff_t *ppos)
6318 {
6319         struct trace_array *tr = filp->private_data;
6320         char buf[MAX_TRACER_SIZE+2];
6321         int r;
6322
6323         mutex_lock(&trace_types_lock);
6324         r = sprintf(buf, "%s\n", tr->current_trace->name);
6325         mutex_unlock(&trace_types_lock);
6326
6327         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6328 }
6329
6330 int tracer_init(struct tracer *t, struct trace_array *tr)
6331 {
6332         tracing_reset_online_cpus(&tr->array_buffer);
6333         return t->init(tr);
6334 }
6335
6336 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6337 {
6338         int cpu;
6339
6340         for_each_tracing_cpu(cpu)
6341                 per_cpu_ptr(buf->data, cpu)->entries = val;
6342 }
6343
6344 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6345 {
6346         if (cpu == RING_BUFFER_ALL_CPUS) {
6347                 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6348         } else {
6349                 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6350         }
6351 }
6352
6353 #ifdef CONFIG_TRACER_MAX_TRACE
6354 /* resize @tr's buffer to the size of @size_tr's entries */
6355 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6356                                         struct array_buffer *size_buf, int cpu_id)
6357 {
6358         int cpu, ret = 0;
6359
6360         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6361                 for_each_tracing_cpu(cpu) {
6362                         ret = ring_buffer_resize(trace_buf->buffer,
6363                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6364                         if (ret < 0)
6365                                 break;
6366                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6367                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6368                 }
6369         } else {
6370                 ret = ring_buffer_resize(trace_buf->buffer,
6371                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6372                 if (ret == 0)
6373                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6374                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6375         }
6376
6377         return ret;
6378 }
6379 #endif /* CONFIG_TRACER_MAX_TRACE */
6380
6381 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6382                                         unsigned long size, int cpu)
6383 {
6384         int ret;
6385
6386         /*
6387          * If kernel or user changes the size of the ring buffer
6388          * we use the size that was given, and we can forget about
6389          * expanding it later.
6390          */
6391         ring_buffer_expanded = true;
6392
6393         /* May be called before buffers are initialized */
6394         if (!tr->array_buffer.buffer)
6395                 return 0;
6396
6397         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6398         if (ret < 0)
6399                 return ret;
6400
6401 #ifdef CONFIG_TRACER_MAX_TRACE
6402         if (!tr->current_trace->use_max_tr)
6403                 goto out;
6404
6405         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6406         if (ret < 0) {
6407                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6408                                                      &tr->array_buffer, cpu);
6409                 if (r < 0) {
6410                         /*
6411                          * AARGH! We are left with different
6412                          * size max buffer!!!!
6413                          * The max buffer is our "snapshot" buffer.
6414                          * When a tracer needs a snapshot (one of the
6415                          * latency tracers), it swaps the max buffer
6416                          * with the saved snap shot. We succeeded to
6417                          * update the size of the main buffer, but failed to
6418                          * update the size of the max buffer. But when we tried
6419                          * to reset the main buffer to the original size, we
6420                          * failed there too. This is very unlikely to
6421                          * happen, but if it does, warn and kill all
6422                          * tracing.
6423                          */
6424                         WARN_ON(1);
6425                         tracing_disabled = 1;
6426                 }
6427                 return ret;
6428         }
6429
6430         update_buffer_entries(&tr->max_buffer, cpu);
6431
6432  out:
6433 #endif /* CONFIG_TRACER_MAX_TRACE */
6434
6435         update_buffer_entries(&tr->array_buffer, cpu);
6436
6437         return ret;
6438 }
6439
6440 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6441                                   unsigned long size, int cpu_id)
6442 {
6443         int ret;
6444
6445         mutex_lock(&trace_types_lock);
6446
6447         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6448                 /* make sure, this cpu is enabled in the mask */
6449                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6450                         ret = -EINVAL;
6451                         goto out;
6452                 }
6453         }
6454
6455         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6456         if (ret < 0)
6457                 ret = -ENOMEM;
6458
6459 out:
6460         mutex_unlock(&trace_types_lock);
6461
6462         return ret;
6463 }
6464
6465
6466 /**
6467  * tracing_update_buffers - used by tracing facility to expand ring buffers
6468  *
6469  * To save on memory when the tracing is never used on a system with it
6470  * configured in. The ring buffers are set to a minimum size. But once
6471  * a user starts to use the tracing facility, then they need to grow
6472  * to their default size.
6473  *
6474  * This function is to be called when a tracer is about to be used.
6475  */
6476 int tracing_update_buffers(void)
6477 {
6478         int ret = 0;
6479
6480         mutex_lock(&trace_types_lock);
6481         if (!ring_buffer_expanded)
6482                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6483                                                 RING_BUFFER_ALL_CPUS);
6484         mutex_unlock(&trace_types_lock);
6485
6486         return ret;
6487 }
6488
6489 struct trace_option_dentry;
6490
6491 static void
6492 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6493
6494 /*
6495  * Used to clear out the tracer before deletion of an instance.
6496  * Must have trace_types_lock held.
6497  */
6498 static void tracing_set_nop(struct trace_array *tr)
6499 {
6500         if (tr->current_trace == &nop_trace)
6501                 return;
6502         
6503         tr->current_trace->enabled--;
6504
6505         if (tr->current_trace->reset)
6506                 tr->current_trace->reset(tr);
6507
6508         tr->current_trace = &nop_trace;
6509 }
6510
6511 static bool tracer_options_updated;
6512
6513 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6514 {
6515         /* Only enable if the directory has been created already. */
6516         if (!tr->dir)
6517                 return;
6518
6519         /* Only create trace option files after update_tracer_options finish */
6520         if (!tracer_options_updated)
6521                 return;
6522
6523         create_trace_option_files(tr, t);
6524 }
6525
6526 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6527 {
6528         struct tracer *t;
6529 #ifdef CONFIG_TRACER_MAX_TRACE
6530         bool had_max_tr;
6531 #endif
6532         int ret = 0;
6533
6534         mutex_lock(&trace_types_lock);
6535
6536         if (!ring_buffer_expanded) {
6537                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6538                                                 RING_BUFFER_ALL_CPUS);
6539                 if (ret < 0)
6540                         goto out;
6541                 ret = 0;
6542         }
6543
6544         for (t = trace_types; t; t = t->next) {
6545                 if (strcmp(t->name, buf) == 0)
6546                         break;
6547         }
6548         if (!t) {
6549                 ret = -EINVAL;
6550                 goto out;
6551         }
6552         if (t == tr->current_trace)
6553                 goto out;
6554
6555 #ifdef CONFIG_TRACER_SNAPSHOT
6556         if (t->use_max_tr) {
6557                 local_irq_disable();
6558                 arch_spin_lock(&tr->max_lock);
6559                 if (tr->cond_snapshot)
6560                         ret = -EBUSY;
6561                 arch_spin_unlock(&tr->max_lock);
6562                 local_irq_enable();
6563                 if (ret)
6564                         goto out;
6565         }
6566 #endif
6567         /* Some tracers won't work on kernel command line */
6568         if (system_state < SYSTEM_RUNNING && t->noboot) {
6569                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6570                         t->name);
6571                 goto out;
6572         }
6573
6574         /* Some tracers are only allowed for the top level buffer */
6575         if (!trace_ok_for_array(t, tr)) {
6576                 ret = -EINVAL;
6577                 goto out;
6578         }
6579
6580         /* If trace pipe files are being read, we can't change the tracer */
6581         if (tr->trace_ref) {
6582                 ret = -EBUSY;
6583                 goto out;
6584         }
6585
6586         trace_branch_disable();
6587
6588         tr->current_trace->enabled--;
6589
6590         if (tr->current_trace->reset)
6591                 tr->current_trace->reset(tr);
6592
6593 #ifdef CONFIG_TRACER_MAX_TRACE
6594         had_max_tr = tr->current_trace->use_max_tr;
6595
6596         /* Current trace needs to be nop_trace before synchronize_rcu */
6597         tr->current_trace = &nop_trace;
6598
6599         if (had_max_tr && !t->use_max_tr) {
6600                 /*
6601                  * We need to make sure that the update_max_tr sees that
6602                  * current_trace changed to nop_trace to keep it from
6603                  * swapping the buffers after we resize it.
6604                  * The update_max_tr is called from interrupts disabled
6605                  * so a synchronized_sched() is sufficient.
6606                  */
6607                 synchronize_rcu();
6608                 free_snapshot(tr);
6609         }
6610
6611         if (t->use_max_tr && !tr->allocated_snapshot) {
6612                 ret = tracing_alloc_snapshot_instance(tr);
6613                 if (ret < 0)
6614                         goto out;
6615         }
6616 #else
6617         tr->current_trace = &nop_trace;
6618 #endif
6619
6620         if (t->init) {
6621                 ret = tracer_init(t, tr);
6622                 if (ret)
6623                         goto out;
6624         }
6625
6626         tr->current_trace = t;
6627         tr->current_trace->enabled++;
6628         trace_branch_enable(tr);
6629  out:
6630         mutex_unlock(&trace_types_lock);
6631
6632         return ret;
6633 }
6634
6635 static ssize_t
6636 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6637                         size_t cnt, loff_t *ppos)
6638 {
6639         struct trace_array *tr = filp->private_data;
6640         char buf[MAX_TRACER_SIZE+1];
6641         char *name;
6642         size_t ret;
6643         int err;
6644
6645         ret = cnt;
6646
6647         if (cnt > MAX_TRACER_SIZE)
6648                 cnt = MAX_TRACER_SIZE;
6649
6650         if (copy_from_user(buf, ubuf, cnt))
6651                 return -EFAULT;
6652
6653         buf[cnt] = 0;
6654
6655         name = strim(buf);
6656
6657         err = tracing_set_tracer(tr, name);
6658         if (err)
6659                 return err;
6660
6661         *ppos += ret;
6662
6663         return ret;
6664 }
6665
6666 static ssize_t
6667 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6668                    size_t cnt, loff_t *ppos)
6669 {
6670         char buf[64];
6671         int r;
6672
6673         r = snprintf(buf, sizeof(buf), "%ld\n",
6674                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6675         if (r > sizeof(buf))
6676                 r = sizeof(buf);
6677         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6678 }
6679
6680 static ssize_t
6681 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6682                     size_t cnt, loff_t *ppos)
6683 {
6684         unsigned long val;
6685         int ret;
6686
6687         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6688         if (ret)
6689                 return ret;
6690
6691         *ptr = val * 1000;
6692
6693         return cnt;
6694 }
6695
6696 static ssize_t
6697 tracing_thresh_read(struct file *filp, char __user *ubuf,
6698                     size_t cnt, loff_t *ppos)
6699 {
6700         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6701 }
6702
6703 static ssize_t
6704 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6705                      size_t cnt, loff_t *ppos)
6706 {
6707         struct trace_array *tr = filp->private_data;
6708         int ret;
6709
6710         mutex_lock(&trace_types_lock);
6711         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6712         if (ret < 0)
6713                 goto out;
6714
6715         if (tr->current_trace->update_thresh) {
6716                 ret = tr->current_trace->update_thresh(tr);
6717                 if (ret < 0)
6718                         goto out;
6719         }
6720
6721         ret = cnt;
6722 out:
6723         mutex_unlock(&trace_types_lock);
6724
6725         return ret;
6726 }
6727
6728 #ifdef CONFIG_TRACER_MAX_TRACE
6729
6730 static ssize_t
6731 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6732                      size_t cnt, loff_t *ppos)
6733 {
6734         struct trace_array *tr = filp->private_data;
6735
6736         return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6737 }
6738
6739 static ssize_t
6740 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6741                       size_t cnt, loff_t *ppos)
6742 {
6743         struct trace_array *tr = filp->private_data;
6744
6745         return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6746 }
6747
6748 #endif
6749
6750 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6751 {
6752         if (cpu == RING_BUFFER_ALL_CPUS) {
6753                 if (cpumask_empty(tr->pipe_cpumask)) {
6754                         cpumask_setall(tr->pipe_cpumask);
6755                         return 0;
6756                 }
6757         } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6758                 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6759                 return 0;
6760         }
6761         return -EBUSY;
6762 }
6763
6764 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6765 {
6766         if (cpu == RING_BUFFER_ALL_CPUS) {
6767                 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6768                 cpumask_clear(tr->pipe_cpumask);
6769         } else {
6770                 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6771                 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6772         }
6773 }
6774
6775 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6776 {
6777         struct trace_array *tr = inode->i_private;
6778         struct trace_iterator *iter;
6779         int cpu;
6780         int ret;
6781
6782         ret = tracing_check_open_get_tr(tr);
6783         if (ret)
6784                 return ret;
6785
6786         mutex_lock(&trace_types_lock);
6787         cpu = tracing_get_cpu(inode);
6788         ret = open_pipe_on_cpu(tr, cpu);
6789         if (ret)
6790                 goto fail_pipe_on_cpu;
6791
6792         /* create a buffer to store the information to pass to userspace */
6793         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6794         if (!iter) {
6795                 ret = -ENOMEM;
6796                 goto fail_alloc_iter;
6797         }
6798
6799         trace_seq_init(&iter->seq);
6800         iter->trace = tr->current_trace;
6801
6802         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6803                 ret = -ENOMEM;
6804                 goto fail;
6805         }
6806
6807         /* trace pipe does not show start of buffer */
6808         cpumask_setall(iter->started);
6809
6810         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6811                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6812
6813         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6814         if (trace_clocks[tr->clock_id].in_ns)
6815                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6816
6817         iter->tr = tr;
6818         iter->array_buffer = &tr->array_buffer;
6819         iter->cpu_file = cpu;
6820         mutex_init(&iter->mutex);
6821         filp->private_data = iter;
6822
6823         if (iter->trace->pipe_open)
6824                 iter->trace->pipe_open(iter);
6825
6826         nonseekable_open(inode, filp);
6827
6828         tr->trace_ref++;
6829
6830         mutex_unlock(&trace_types_lock);
6831         return ret;
6832
6833 fail:
6834         kfree(iter);
6835 fail_alloc_iter:
6836         close_pipe_on_cpu(tr, cpu);
6837 fail_pipe_on_cpu:
6838         __trace_array_put(tr);
6839         mutex_unlock(&trace_types_lock);
6840         return ret;
6841 }
6842
6843 static int tracing_release_pipe(struct inode *inode, struct file *file)
6844 {
6845         struct trace_iterator *iter = file->private_data;
6846         struct trace_array *tr = inode->i_private;
6847
6848         mutex_lock(&trace_types_lock);
6849
6850         tr->trace_ref--;
6851
6852         if (iter->trace->pipe_close)
6853                 iter->trace->pipe_close(iter);
6854         close_pipe_on_cpu(tr, iter->cpu_file);
6855         mutex_unlock(&trace_types_lock);
6856
6857         free_trace_iter_content(iter);
6858         kfree(iter);
6859
6860         trace_array_put(tr);
6861
6862         return 0;
6863 }
6864
6865 static __poll_t
6866 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6867 {
6868         struct trace_array *tr = iter->tr;
6869
6870         /* Iterators are static, they should be filled or empty */
6871         if (trace_buffer_iter(iter, iter->cpu_file))
6872                 return EPOLLIN | EPOLLRDNORM;
6873
6874         if (tr->trace_flags & TRACE_ITER_BLOCK)
6875                 /*
6876                  * Always select as readable when in blocking mode
6877                  */
6878                 return EPOLLIN | EPOLLRDNORM;
6879         else
6880                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6881                                              filp, poll_table, iter->tr->buffer_percent);
6882 }
6883
6884 static __poll_t
6885 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6886 {
6887         struct trace_iterator *iter = filp->private_data;
6888
6889         return trace_poll(iter, filp, poll_table);
6890 }
6891
6892 /* Must be called with iter->mutex held. */
6893 static int tracing_wait_pipe(struct file *filp)
6894 {
6895         struct trace_iterator *iter = filp->private_data;
6896         int ret;
6897
6898         while (trace_empty(iter)) {
6899
6900                 if ((filp->f_flags & O_NONBLOCK)) {
6901                         return -EAGAIN;
6902                 }
6903
6904                 /*
6905                  * We block until we read something and tracing is disabled.
6906                  * We still block if tracing is disabled, but we have never
6907                  * read anything. This allows a user to cat this file, and
6908                  * then enable tracing. But after we have read something,
6909                  * we give an EOF when tracing is again disabled.
6910                  *
6911                  * iter->pos will be 0 if we haven't read anything.
6912                  */
6913                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6914                         break;
6915
6916                 mutex_unlock(&iter->mutex);
6917
6918                 ret = wait_on_pipe(iter, 0);
6919
6920                 mutex_lock(&iter->mutex);
6921
6922                 if (ret)
6923                         return ret;
6924         }
6925
6926         return 1;
6927 }
6928
6929 /*
6930  * Consumer reader.
6931  */
6932 static ssize_t
6933 tracing_read_pipe(struct file *filp, char __user *ubuf,
6934                   size_t cnt, loff_t *ppos)
6935 {
6936         struct trace_iterator *iter = filp->private_data;
6937         ssize_t sret;
6938
6939         /*
6940          * Avoid more than one consumer on a single file descriptor
6941          * This is just a matter of traces coherency, the ring buffer itself
6942          * is protected.
6943          */
6944         mutex_lock(&iter->mutex);
6945
6946         /* return any leftover data */
6947         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6948         if (sret != -EBUSY)
6949                 goto out;
6950
6951         trace_seq_init(&iter->seq);
6952
6953         if (iter->trace->read) {
6954                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6955                 if (sret)
6956                         goto out;
6957         }
6958
6959 waitagain:
6960         sret = tracing_wait_pipe(filp);
6961         if (sret <= 0)
6962                 goto out;
6963
6964         /* stop when tracing is finished */
6965         if (trace_empty(iter)) {
6966                 sret = 0;
6967                 goto out;
6968         }
6969
6970         if (cnt >= PAGE_SIZE)
6971                 cnt = PAGE_SIZE - 1;
6972
6973         /* reset all but tr, trace, and overruns */
6974         trace_iterator_reset(iter);
6975         cpumask_clear(iter->started);
6976         trace_seq_init(&iter->seq);
6977
6978         trace_event_read_lock();
6979         trace_access_lock(iter->cpu_file);
6980         while (trace_find_next_entry_inc(iter) != NULL) {
6981                 enum print_line_t ret;
6982                 int save_len = iter->seq.seq.len;
6983
6984                 ret = print_trace_line(iter);
6985                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6986                         /*
6987                          * If one print_trace_line() fills entire trace_seq in one shot,
6988                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6989                          * In this case, we need to consume it, otherwise, loop will peek
6990                          * this event next time, resulting in an infinite loop.
6991                          */
6992                         if (save_len == 0) {
6993                                 iter->seq.full = 0;
6994                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6995                                 trace_consume(iter);
6996                                 break;
6997                         }
6998
6999                         /* In other cases, don't print partial lines */
7000                         iter->seq.seq.len = save_len;
7001                         break;
7002                 }
7003                 if (ret != TRACE_TYPE_NO_CONSUME)
7004                         trace_consume(iter);
7005
7006                 if (trace_seq_used(&iter->seq) >= cnt)
7007                         break;
7008
7009                 /*
7010                  * Setting the full flag means we reached the trace_seq buffer
7011                  * size and we should leave by partial output condition above.
7012                  * One of the trace_seq_* functions is not used properly.
7013                  */
7014                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
7015                           iter->ent->type);
7016         }
7017         trace_access_unlock(iter->cpu_file);
7018         trace_event_read_unlock();
7019
7020         /* Now copy what we have to the user */
7021         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
7022         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
7023                 trace_seq_init(&iter->seq);
7024
7025         /*
7026          * If there was nothing to send to user, in spite of consuming trace
7027          * entries, go back to wait for more entries.
7028          */
7029         if (sret == -EBUSY)
7030                 goto waitagain;
7031
7032 out:
7033         mutex_unlock(&iter->mutex);
7034
7035         return sret;
7036 }
7037
7038 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7039                                      unsigned int idx)
7040 {
7041         __free_page(spd->pages[idx]);
7042 }
7043
7044 static size_t
7045 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7046 {
7047         size_t count;
7048         int save_len;
7049         int ret;
7050
7051         /* Seq buffer is page-sized, exactly what we need. */
7052         for (;;) {
7053                 save_len = iter->seq.seq.len;
7054                 ret = print_trace_line(iter);
7055
7056                 if (trace_seq_has_overflowed(&iter->seq)) {
7057                         iter->seq.seq.len = save_len;
7058                         break;
7059                 }
7060
7061                 /*
7062                  * This should not be hit, because it should only
7063                  * be set if the iter->seq overflowed. But check it
7064                  * anyway to be safe.
7065                  */
7066                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
7067                         iter->seq.seq.len = save_len;
7068                         break;
7069                 }
7070
7071                 count = trace_seq_used(&iter->seq) - save_len;
7072                 if (rem < count) {
7073                         rem = 0;
7074                         iter->seq.seq.len = save_len;
7075                         break;
7076                 }
7077
7078                 if (ret != TRACE_TYPE_NO_CONSUME)
7079                         trace_consume(iter);
7080                 rem -= count;
7081                 if (!trace_find_next_entry_inc(iter))   {
7082                         rem = 0;
7083                         iter->ent = NULL;
7084                         break;
7085                 }
7086         }
7087
7088         return rem;
7089 }
7090
7091 static ssize_t tracing_splice_read_pipe(struct file *filp,
7092                                         loff_t *ppos,
7093                                         struct pipe_inode_info *pipe,
7094                                         size_t len,
7095                                         unsigned int flags)
7096 {
7097         struct page *pages_def[PIPE_DEF_BUFFERS];
7098         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7099         struct trace_iterator *iter = filp->private_data;
7100         struct splice_pipe_desc spd = {
7101                 .pages          = pages_def,
7102                 .partial        = partial_def,
7103                 .nr_pages       = 0, /* This gets updated below. */
7104                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7105                 .ops            = &default_pipe_buf_ops,
7106                 .spd_release    = tracing_spd_release_pipe,
7107         };
7108         ssize_t ret;
7109         size_t rem;
7110         unsigned int i;
7111
7112         if (splice_grow_spd(pipe, &spd))
7113                 return -ENOMEM;
7114
7115         mutex_lock(&iter->mutex);
7116
7117         if (iter->trace->splice_read) {
7118                 ret = iter->trace->splice_read(iter, filp,
7119                                                ppos, pipe, len, flags);
7120                 if (ret)
7121                         goto out_err;
7122         }
7123
7124         ret = tracing_wait_pipe(filp);
7125         if (ret <= 0)
7126                 goto out_err;
7127
7128         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7129                 ret = -EFAULT;
7130                 goto out_err;
7131         }
7132
7133         trace_event_read_lock();
7134         trace_access_lock(iter->cpu_file);
7135
7136         /* Fill as many pages as possible. */
7137         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7138                 spd.pages[i] = alloc_page(GFP_KERNEL);
7139                 if (!spd.pages[i])
7140                         break;
7141
7142                 rem = tracing_fill_pipe_page(rem, iter);
7143
7144                 /* Copy the data into the page, so we can start over. */
7145                 ret = trace_seq_to_buffer(&iter->seq,
7146                                           page_address(spd.pages[i]),
7147                                           trace_seq_used(&iter->seq));
7148                 if (ret < 0) {
7149                         __free_page(spd.pages[i]);
7150                         break;
7151                 }
7152                 spd.partial[i].offset = 0;
7153                 spd.partial[i].len = trace_seq_used(&iter->seq);
7154
7155                 trace_seq_init(&iter->seq);
7156         }
7157
7158         trace_access_unlock(iter->cpu_file);
7159         trace_event_read_unlock();
7160         mutex_unlock(&iter->mutex);
7161
7162         spd.nr_pages = i;
7163
7164         if (i)
7165                 ret = splice_to_pipe(pipe, &spd);
7166         else
7167                 ret = 0;
7168 out:
7169         splice_shrink_spd(&spd);
7170         return ret;
7171
7172 out_err:
7173         mutex_unlock(&iter->mutex);
7174         goto out;
7175 }
7176
7177 static ssize_t
7178 tracing_entries_read(struct file *filp, char __user *ubuf,
7179                      size_t cnt, loff_t *ppos)
7180 {
7181         struct inode *inode = file_inode(filp);
7182         struct trace_array *tr = inode->i_private;
7183         int cpu = tracing_get_cpu(inode);
7184         char buf[64];
7185         int r = 0;
7186         ssize_t ret;
7187
7188         mutex_lock(&trace_types_lock);
7189
7190         if (cpu == RING_BUFFER_ALL_CPUS) {
7191                 int cpu, buf_size_same;
7192                 unsigned long size;
7193
7194                 size = 0;
7195                 buf_size_same = 1;
7196                 /* check if all cpu sizes are same */
7197                 for_each_tracing_cpu(cpu) {
7198                         /* fill in the size from first enabled cpu */
7199                         if (size == 0)
7200                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7201                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7202                                 buf_size_same = 0;
7203                                 break;
7204                         }
7205                 }
7206
7207                 if (buf_size_same) {
7208                         if (!ring_buffer_expanded)
7209                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7210                                             size >> 10,
7211                                             trace_buf_size >> 10);
7212                         else
7213                                 r = sprintf(buf, "%lu\n", size >> 10);
7214                 } else
7215                         r = sprintf(buf, "X\n");
7216         } else
7217                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7218
7219         mutex_unlock(&trace_types_lock);
7220
7221         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7222         return ret;
7223 }
7224
7225 static ssize_t
7226 tracing_entries_write(struct file *filp, const char __user *ubuf,
7227                       size_t cnt, loff_t *ppos)
7228 {
7229         struct inode *inode = file_inode(filp);
7230         struct trace_array *tr = inode->i_private;
7231         unsigned long val;
7232         int ret;
7233
7234         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7235         if (ret)
7236                 return ret;
7237
7238         /* must have at least 1 entry */
7239         if (!val)
7240                 return -EINVAL;
7241
7242         /* value is in KB */
7243         val <<= 10;
7244         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7245         if (ret < 0)
7246                 return ret;
7247
7248         *ppos += cnt;
7249
7250         return cnt;
7251 }
7252
7253 static ssize_t
7254 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7255                                 size_t cnt, loff_t *ppos)
7256 {
7257         struct trace_array *tr = filp->private_data;
7258         char buf[64];
7259         int r, cpu;
7260         unsigned long size = 0, expanded_size = 0;
7261
7262         mutex_lock(&trace_types_lock);
7263         for_each_tracing_cpu(cpu) {
7264                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7265                 if (!ring_buffer_expanded)
7266                         expanded_size += trace_buf_size >> 10;
7267         }
7268         if (ring_buffer_expanded)
7269                 r = sprintf(buf, "%lu\n", size);
7270         else
7271                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7272         mutex_unlock(&trace_types_lock);
7273
7274         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7275 }
7276
7277 static ssize_t
7278 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7279                           size_t cnt, loff_t *ppos)
7280 {
7281         /*
7282          * There is no need to read what the user has written, this function
7283          * is just to make sure that there is no error when "echo" is used
7284          */
7285
7286         *ppos += cnt;
7287
7288         return cnt;
7289 }
7290
7291 static int
7292 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7293 {
7294         struct trace_array *tr = inode->i_private;
7295
7296         /* disable tracing ? */
7297         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7298                 tracer_tracing_off(tr);
7299         /* resize the ring buffer to 0 */
7300         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7301
7302         trace_array_put(tr);
7303
7304         return 0;
7305 }
7306
7307 static ssize_t
7308 tracing_mark_write(struct file *filp, const char __user *ubuf,
7309                                         size_t cnt, loff_t *fpos)
7310 {
7311         struct trace_array *tr = filp->private_data;
7312         struct ring_buffer_event *event;
7313         enum event_trigger_type tt = ETT_NONE;
7314         struct trace_buffer *buffer;
7315         struct print_entry *entry;
7316         ssize_t written;
7317         int size;
7318         int len;
7319
7320 /* Used in tracing_mark_raw_write() as well */
7321 #define FAULTED_STR "<faulted>"
7322 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7323
7324         if (tracing_disabled)
7325                 return -EINVAL;
7326
7327         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7328                 return -EINVAL;
7329
7330         if (cnt > TRACE_BUF_SIZE)
7331                 cnt = TRACE_BUF_SIZE;
7332
7333         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7334
7335         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7336
7337         /* If less than "<faulted>", then make sure we can still add that */
7338         if (cnt < FAULTED_SIZE)
7339                 size += FAULTED_SIZE - cnt;
7340
7341         buffer = tr->array_buffer.buffer;
7342         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7343                                             tracing_gen_ctx());
7344         if (unlikely(!event))
7345                 /* Ring buffer disabled, return as if not open for write */
7346                 return -EBADF;
7347
7348         entry = ring_buffer_event_data(event);
7349         entry->ip = _THIS_IP_;
7350
7351         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7352         if (len) {
7353                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7354                 cnt = FAULTED_SIZE;
7355                 written = -EFAULT;
7356         } else
7357                 written = cnt;
7358
7359         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7360                 /* do not add \n before testing triggers, but add \0 */
7361                 entry->buf[cnt] = '\0';
7362                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7363         }
7364
7365         if (entry->buf[cnt - 1] != '\n') {
7366                 entry->buf[cnt] = '\n';
7367                 entry->buf[cnt + 1] = '\0';
7368         } else
7369                 entry->buf[cnt] = '\0';
7370
7371         if (static_branch_unlikely(&trace_marker_exports_enabled))
7372                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7373         __buffer_unlock_commit(buffer, event);
7374
7375         if (tt)
7376                 event_triggers_post_call(tr->trace_marker_file, tt);
7377
7378         return written;
7379 }
7380
7381 /* Limit it for now to 3K (including tag) */
7382 #define RAW_DATA_MAX_SIZE (1024*3)
7383
7384 static ssize_t
7385 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7386                                         size_t cnt, loff_t *fpos)
7387 {
7388         struct trace_array *tr = filp->private_data;
7389         struct ring_buffer_event *event;
7390         struct trace_buffer *buffer;
7391         struct raw_data_entry *entry;
7392         ssize_t written;
7393         int size;
7394         int len;
7395
7396 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7397
7398         if (tracing_disabled)
7399                 return -EINVAL;
7400
7401         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7402                 return -EINVAL;
7403
7404         /* The marker must at least have a tag id */
7405         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7406                 return -EINVAL;
7407
7408         if (cnt > TRACE_BUF_SIZE)
7409                 cnt = TRACE_BUF_SIZE;
7410
7411         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7412
7413         size = sizeof(*entry) + cnt;
7414         if (cnt < FAULT_SIZE_ID)
7415                 size += FAULT_SIZE_ID - cnt;
7416
7417         buffer = tr->array_buffer.buffer;
7418         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7419                                             tracing_gen_ctx());
7420         if (!event)
7421                 /* Ring buffer disabled, return as if not open for write */
7422                 return -EBADF;
7423
7424         entry = ring_buffer_event_data(event);
7425
7426         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7427         if (len) {
7428                 entry->id = -1;
7429                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7430                 written = -EFAULT;
7431         } else
7432                 written = cnt;
7433
7434         __buffer_unlock_commit(buffer, event);
7435
7436         return written;
7437 }
7438
7439 static int tracing_clock_show(struct seq_file *m, void *v)
7440 {
7441         struct trace_array *tr = m->private;
7442         int i;
7443
7444         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7445                 seq_printf(m,
7446                         "%s%s%s%s", i ? " " : "",
7447                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7448                         i == tr->clock_id ? "]" : "");
7449         seq_putc(m, '\n');
7450
7451         return 0;
7452 }
7453
7454 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7455 {
7456         int i;
7457
7458         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7459                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7460                         break;
7461         }
7462         if (i == ARRAY_SIZE(trace_clocks))
7463                 return -EINVAL;
7464
7465         mutex_lock(&trace_types_lock);
7466
7467         tr->clock_id = i;
7468
7469         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7470
7471         /*
7472          * New clock may not be consistent with the previous clock.
7473          * Reset the buffer so that it doesn't have incomparable timestamps.
7474          */
7475         tracing_reset_online_cpus(&tr->array_buffer);
7476
7477 #ifdef CONFIG_TRACER_MAX_TRACE
7478         if (tr->max_buffer.buffer)
7479                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7480         tracing_reset_online_cpus(&tr->max_buffer);
7481 #endif
7482
7483         mutex_unlock(&trace_types_lock);
7484
7485         return 0;
7486 }
7487
7488 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7489                                    size_t cnt, loff_t *fpos)
7490 {
7491         struct seq_file *m = filp->private_data;
7492         struct trace_array *tr = m->private;
7493         char buf[64];
7494         const char *clockstr;
7495         int ret;
7496
7497         if (cnt >= sizeof(buf))
7498                 return -EINVAL;
7499
7500         if (copy_from_user(buf, ubuf, cnt))
7501                 return -EFAULT;
7502
7503         buf[cnt] = 0;
7504
7505         clockstr = strstrip(buf);
7506
7507         ret = tracing_set_clock(tr, clockstr);
7508         if (ret)
7509                 return ret;
7510
7511         *fpos += cnt;
7512
7513         return cnt;
7514 }
7515
7516 static int tracing_clock_open(struct inode *inode, struct file *file)
7517 {
7518         struct trace_array *tr = inode->i_private;
7519         int ret;
7520
7521         ret = tracing_check_open_get_tr(tr);
7522         if (ret)
7523                 return ret;
7524
7525         ret = single_open(file, tracing_clock_show, inode->i_private);
7526         if (ret < 0)
7527                 trace_array_put(tr);
7528
7529         return ret;
7530 }
7531
7532 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7533 {
7534         struct trace_array *tr = m->private;
7535
7536         mutex_lock(&trace_types_lock);
7537
7538         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7539                 seq_puts(m, "delta [absolute]\n");
7540         else
7541                 seq_puts(m, "[delta] absolute\n");
7542
7543         mutex_unlock(&trace_types_lock);
7544
7545         return 0;
7546 }
7547
7548 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7549 {
7550         struct trace_array *tr = inode->i_private;
7551         int ret;
7552
7553         ret = tracing_check_open_get_tr(tr);
7554         if (ret)
7555                 return ret;
7556
7557         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7558         if (ret < 0)
7559                 trace_array_put(tr);
7560
7561         return ret;
7562 }
7563
7564 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7565 {
7566         if (rbe == this_cpu_read(trace_buffered_event))
7567                 return ring_buffer_time_stamp(buffer);
7568
7569         return ring_buffer_event_time_stamp(buffer, rbe);
7570 }
7571
7572 /*
7573  * Set or disable using the per CPU trace_buffer_event when possible.
7574  */
7575 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7576 {
7577         int ret = 0;
7578
7579         mutex_lock(&trace_types_lock);
7580
7581         if (set && tr->no_filter_buffering_ref++)
7582                 goto out;
7583
7584         if (!set) {
7585                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7586                         ret = -EINVAL;
7587                         goto out;
7588                 }
7589
7590                 --tr->no_filter_buffering_ref;
7591         }
7592  out:
7593         mutex_unlock(&trace_types_lock);
7594
7595         return ret;
7596 }
7597
7598 struct ftrace_buffer_info {
7599         struct trace_iterator   iter;
7600         void                    *spare;
7601         unsigned int            spare_cpu;
7602         unsigned int            read;
7603 };
7604
7605 #ifdef CONFIG_TRACER_SNAPSHOT
7606 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7607 {
7608         struct trace_array *tr = inode->i_private;
7609         struct trace_iterator *iter;
7610         struct seq_file *m;
7611         int ret;
7612
7613         ret = tracing_check_open_get_tr(tr);
7614         if (ret)
7615                 return ret;
7616
7617         if (file->f_mode & FMODE_READ) {
7618                 iter = __tracing_open(inode, file, true);
7619                 if (IS_ERR(iter))
7620                         ret = PTR_ERR(iter);
7621         } else {
7622                 /* Writes still need the seq_file to hold the private data */
7623                 ret = -ENOMEM;
7624                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7625                 if (!m)
7626                         goto out;
7627                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7628                 if (!iter) {
7629                         kfree(m);
7630                         goto out;
7631                 }
7632                 ret = 0;
7633
7634                 iter->tr = tr;
7635                 iter->array_buffer = &tr->max_buffer;
7636                 iter->cpu_file = tracing_get_cpu(inode);
7637                 m->private = iter;
7638                 file->private_data = m;
7639         }
7640 out:
7641         if (ret < 0)
7642                 trace_array_put(tr);
7643
7644         return ret;
7645 }
7646
7647 static void tracing_swap_cpu_buffer(void *tr)
7648 {
7649         update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7650 }
7651
7652 static ssize_t
7653 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7654                        loff_t *ppos)
7655 {
7656         struct seq_file *m = filp->private_data;
7657         struct trace_iterator *iter = m->private;
7658         struct trace_array *tr = iter->tr;
7659         unsigned long val;
7660         int ret;
7661
7662         ret = tracing_update_buffers();
7663         if (ret < 0)
7664                 return ret;
7665
7666         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7667         if (ret)
7668                 return ret;
7669
7670         mutex_lock(&trace_types_lock);
7671
7672         if (tr->current_trace->use_max_tr) {
7673                 ret = -EBUSY;
7674                 goto out;
7675         }
7676
7677         local_irq_disable();
7678         arch_spin_lock(&tr->max_lock);
7679         if (tr->cond_snapshot)
7680                 ret = -EBUSY;
7681         arch_spin_unlock(&tr->max_lock);
7682         local_irq_enable();
7683         if (ret)
7684                 goto out;
7685
7686         switch (val) {
7687         case 0:
7688                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7689                         ret = -EINVAL;
7690                         break;
7691                 }
7692                 if (tr->allocated_snapshot)
7693                         free_snapshot(tr);
7694                 break;
7695         case 1:
7696 /* Only allow per-cpu swap if the ring buffer supports it */
7697 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7698                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7699                         ret = -EINVAL;
7700                         break;
7701                 }
7702 #endif
7703                 if (tr->allocated_snapshot)
7704                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7705                                         &tr->array_buffer, iter->cpu_file);
7706                 else
7707                         ret = tracing_alloc_snapshot_instance(tr);
7708                 if (ret < 0)
7709                         break;
7710                 /* Now, we're going to swap */
7711                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7712                         local_irq_disable();
7713                         update_max_tr(tr, current, smp_processor_id(), NULL);
7714                         local_irq_enable();
7715                 } else {
7716                         smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7717                                                  (void *)tr, 1);
7718                 }
7719                 break;
7720         default:
7721                 if (tr->allocated_snapshot) {
7722                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7723                                 tracing_reset_online_cpus(&tr->max_buffer);
7724                         else
7725                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7726                 }
7727                 break;
7728         }
7729
7730         if (ret >= 0) {
7731                 *ppos += cnt;
7732                 ret = cnt;
7733         }
7734 out:
7735         mutex_unlock(&trace_types_lock);
7736         return ret;
7737 }
7738
7739 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7740 {
7741         struct seq_file *m = file->private_data;
7742         int ret;
7743
7744         ret = tracing_release(inode, file);
7745
7746         if (file->f_mode & FMODE_READ)
7747                 return ret;
7748
7749         /* If write only, the seq_file is just a stub */
7750         if (m)
7751                 kfree(m->private);
7752         kfree(m);
7753
7754         return 0;
7755 }
7756
7757 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7758 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7759                                     size_t count, loff_t *ppos);
7760 static int tracing_buffers_release(struct inode *inode, struct file *file);
7761 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7762                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7763
7764 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7765 {
7766         struct ftrace_buffer_info *info;
7767         int ret;
7768
7769         /* The following checks for tracefs lockdown */
7770         ret = tracing_buffers_open(inode, filp);
7771         if (ret < 0)
7772                 return ret;
7773
7774         info = filp->private_data;
7775
7776         if (info->iter.trace->use_max_tr) {
7777                 tracing_buffers_release(inode, filp);
7778                 return -EBUSY;
7779         }
7780
7781         info->iter.snapshot = true;
7782         info->iter.array_buffer = &info->iter.tr->max_buffer;
7783
7784         return ret;
7785 }
7786
7787 #endif /* CONFIG_TRACER_SNAPSHOT */
7788
7789
7790 static const struct file_operations tracing_thresh_fops = {
7791         .open           = tracing_open_generic,
7792         .read           = tracing_thresh_read,
7793         .write          = tracing_thresh_write,
7794         .llseek         = generic_file_llseek,
7795 };
7796
7797 #ifdef CONFIG_TRACER_MAX_TRACE
7798 static const struct file_operations tracing_max_lat_fops = {
7799         .open           = tracing_open_generic_tr,
7800         .read           = tracing_max_lat_read,
7801         .write          = tracing_max_lat_write,
7802         .llseek         = generic_file_llseek,
7803         .release        = tracing_release_generic_tr,
7804 };
7805 #endif
7806
7807 static const struct file_operations set_tracer_fops = {
7808         .open           = tracing_open_generic_tr,
7809         .read           = tracing_set_trace_read,
7810         .write          = tracing_set_trace_write,
7811         .llseek         = generic_file_llseek,
7812         .release        = tracing_release_generic_tr,
7813 };
7814
7815 static const struct file_operations tracing_pipe_fops = {
7816         .open           = tracing_open_pipe,
7817         .poll           = tracing_poll_pipe,
7818         .read           = tracing_read_pipe,
7819         .splice_read    = tracing_splice_read_pipe,
7820         .release        = tracing_release_pipe,
7821         .llseek         = no_llseek,
7822 };
7823
7824 static const struct file_operations tracing_entries_fops = {
7825         .open           = tracing_open_generic_tr,
7826         .read           = tracing_entries_read,
7827         .write          = tracing_entries_write,
7828         .llseek         = generic_file_llseek,
7829         .release        = tracing_release_generic_tr,
7830 };
7831
7832 static const struct file_operations tracing_total_entries_fops = {
7833         .open           = tracing_open_generic_tr,
7834         .read           = tracing_total_entries_read,
7835         .llseek         = generic_file_llseek,
7836         .release        = tracing_release_generic_tr,
7837 };
7838
7839 static const struct file_operations tracing_free_buffer_fops = {
7840         .open           = tracing_open_generic_tr,
7841         .write          = tracing_free_buffer_write,
7842         .release        = tracing_free_buffer_release,
7843 };
7844
7845 static const struct file_operations tracing_mark_fops = {
7846         .open           = tracing_mark_open,
7847         .write          = tracing_mark_write,
7848         .release        = tracing_release_generic_tr,
7849 };
7850
7851 static const struct file_operations tracing_mark_raw_fops = {
7852         .open           = tracing_mark_open,
7853         .write          = tracing_mark_raw_write,
7854         .release        = tracing_release_generic_tr,
7855 };
7856
7857 static const struct file_operations trace_clock_fops = {
7858         .open           = tracing_clock_open,
7859         .read           = seq_read,
7860         .llseek         = seq_lseek,
7861         .release        = tracing_single_release_tr,
7862         .write          = tracing_clock_write,
7863 };
7864
7865 static const struct file_operations trace_time_stamp_mode_fops = {
7866         .open           = tracing_time_stamp_mode_open,
7867         .read           = seq_read,
7868         .llseek         = seq_lseek,
7869         .release        = tracing_single_release_tr,
7870 };
7871
7872 #ifdef CONFIG_TRACER_SNAPSHOT
7873 static const struct file_operations snapshot_fops = {
7874         .open           = tracing_snapshot_open,
7875         .read           = seq_read,
7876         .write          = tracing_snapshot_write,
7877         .llseek         = tracing_lseek,
7878         .release        = tracing_snapshot_release,
7879 };
7880
7881 static const struct file_operations snapshot_raw_fops = {
7882         .open           = snapshot_raw_open,
7883         .read           = tracing_buffers_read,
7884         .release        = tracing_buffers_release,
7885         .splice_read    = tracing_buffers_splice_read,
7886         .llseek         = no_llseek,
7887 };
7888
7889 #endif /* CONFIG_TRACER_SNAPSHOT */
7890
7891 /*
7892  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7893  * @filp: The active open file structure
7894  * @ubuf: The userspace provided buffer to read value into
7895  * @cnt: The maximum number of bytes to read
7896  * @ppos: The current "file" position
7897  *
7898  * This function implements the write interface for a struct trace_min_max_param.
7899  * The filp->private_data must point to a trace_min_max_param structure that
7900  * defines where to write the value, the min and the max acceptable values,
7901  * and a lock to protect the write.
7902  */
7903 static ssize_t
7904 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7905 {
7906         struct trace_min_max_param *param = filp->private_data;
7907         u64 val;
7908         int err;
7909
7910         if (!param)
7911                 return -EFAULT;
7912
7913         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7914         if (err)
7915                 return err;
7916
7917         if (param->lock)
7918                 mutex_lock(param->lock);
7919
7920         if (param->min && val < *param->min)
7921                 err = -EINVAL;
7922
7923         if (param->max && val > *param->max)
7924                 err = -EINVAL;
7925
7926         if (!err)
7927                 *param->val = val;
7928
7929         if (param->lock)
7930                 mutex_unlock(param->lock);
7931
7932         if (err)
7933                 return err;
7934
7935         return cnt;
7936 }
7937
7938 /*
7939  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7940  * @filp: The active open file structure
7941  * @ubuf: The userspace provided buffer to read value into
7942  * @cnt: The maximum number of bytes to read
7943  * @ppos: The current "file" position
7944  *
7945  * This function implements the read interface for a struct trace_min_max_param.
7946  * The filp->private_data must point to a trace_min_max_param struct with valid
7947  * data.
7948  */
7949 static ssize_t
7950 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7951 {
7952         struct trace_min_max_param *param = filp->private_data;
7953         char buf[U64_STR_SIZE];
7954         int len;
7955         u64 val;
7956
7957         if (!param)
7958                 return -EFAULT;
7959
7960         val = *param->val;
7961
7962         if (cnt > sizeof(buf))
7963                 cnt = sizeof(buf);
7964
7965         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7966
7967         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7968 }
7969
7970 const struct file_operations trace_min_max_fops = {
7971         .open           = tracing_open_generic,
7972         .read           = trace_min_max_read,
7973         .write          = trace_min_max_write,
7974 };
7975
7976 #define TRACING_LOG_ERRS_MAX    8
7977 #define TRACING_LOG_LOC_MAX     128
7978
7979 #define CMD_PREFIX "  Command: "
7980
7981 struct err_info {
7982         const char      **errs; /* ptr to loc-specific array of err strings */
7983         u8              type;   /* index into errs -> specific err string */
7984         u16             pos;    /* caret position */
7985         u64             ts;
7986 };
7987
7988 struct tracing_log_err {
7989         struct list_head        list;
7990         struct err_info         info;
7991         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7992         char                    *cmd;                     /* what caused err */
7993 };
7994
7995 static DEFINE_MUTEX(tracing_err_log_lock);
7996
7997 static struct tracing_log_err *alloc_tracing_log_err(int len)
7998 {
7999         struct tracing_log_err *err;
8000
8001         err = kzalloc(sizeof(*err), GFP_KERNEL);
8002         if (!err)
8003                 return ERR_PTR(-ENOMEM);
8004
8005         err->cmd = kzalloc(len, GFP_KERNEL);
8006         if (!err->cmd) {
8007                 kfree(err);
8008                 return ERR_PTR(-ENOMEM);
8009         }
8010
8011         return err;
8012 }
8013
8014 static void free_tracing_log_err(struct tracing_log_err *err)
8015 {
8016         kfree(err->cmd);
8017         kfree(err);
8018 }
8019
8020 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8021                                                    int len)
8022 {
8023         struct tracing_log_err *err;
8024         char *cmd;
8025
8026         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8027                 err = alloc_tracing_log_err(len);
8028                 if (PTR_ERR(err) != -ENOMEM)
8029                         tr->n_err_log_entries++;
8030
8031                 return err;
8032         }
8033         cmd = kzalloc(len, GFP_KERNEL);
8034         if (!cmd)
8035                 return ERR_PTR(-ENOMEM);
8036         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8037         kfree(err->cmd);
8038         err->cmd = cmd;
8039         list_del(&err->list);
8040
8041         return err;
8042 }
8043
8044 /**
8045  * err_pos - find the position of a string within a command for error careting
8046  * @cmd: The tracing command that caused the error
8047  * @str: The string to position the caret at within @cmd
8048  *
8049  * Finds the position of the first occurrence of @str within @cmd.  The
8050  * return value can be passed to tracing_log_err() for caret placement
8051  * within @cmd.
8052  *
8053  * Returns the index within @cmd of the first occurrence of @str or 0
8054  * if @str was not found.
8055  */
8056 unsigned int err_pos(char *cmd, const char *str)
8057 {
8058         char *found;
8059
8060         if (WARN_ON(!strlen(cmd)))
8061                 return 0;
8062
8063         found = strstr(cmd, str);
8064         if (found)
8065                 return found - cmd;
8066
8067         return 0;
8068 }
8069
8070 /**
8071  * tracing_log_err - write an error to the tracing error log
8072  * @tr: The associated trace array for the error (NULL for top level array)
8073  * @loc: A string describing where the error occurred
8074  * @cmd: The tracing command that caused the error
8075  * @errs: The array of loc-specific static error strings
8076  * @type: The index into errs[], which produces the specific static err string
8077  * @pos: The position the caret should be placed in the cmd
8078  *
8079  * Writes an error into tracing/error_log of the form:
8080  *
8081  * <loc>: error: <text>
8082  *   Command: <cmd>
8083  *              ^
8084  *
8085  * tracing/error_log is a small log file containing the last
8086  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8087  * unless there has been a tracing error, and the error log can be
8088  * cleared and have its memory freed by writing the empty string in
8089  * truncation mode to it i.e. echo > tracing/error_log.
8090  *
8091  * NOTE: the @errs array along with the @type param are used to
8092  * produce a static error string - this string is not copied and saved
8093  * when the error is logged - only a pointer to it is saved.  See
8094  * existing callers for examples of how static strings are typically
8095  * defined for use with tracing_log_err().
8096  */
8097 void tracing_log_err(struct trace_array *tr,
8098                      const char *loc, const char *cmd,
8099                      const char **errs, u8 type, u16 pos)
8100 {
8101         struct tracing_log_err *err;
8102         int len = 0;
8103
8104         if (!tr)
8105                 tr = &global_trace;
8106
8107         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8108
8109         mutex_lock(&tracing_err_log_lock);
8110         err = get_tracing_log_err(tr, len);
8111         if (PTR_ERR(err) == -ENOMEM) {
8112                 mutex_unlock(&tracing_err_log_lock);
8113                 return;
8114         }
8115
8116         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8117         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8118
8119         err->info.errs = errs;
8120         err->info.type = type;
8121         err->info.pos = pos;
8122         err->info.ts = local_clock();
8123
8124         list_add_tail(&err->list, &tr->err_log);
8125         mutex_unlock(&tracing_err_log_lock);
8126 }
8127
8128 static void clear_tracing_err_log(struct trace_array *tr)
8129 {
8130         struct tracing_log_err *err, *next;
8131
8132         mutex_lock(&tracing_err_log_lock);
8133         list_for_each_entry_safe(err, next, &tr->err_log, list) {
8134                 list_del(&err->list);
8135                 free_tracing_log_err(err);
8136         }
8137
8138         tr->n_err_log_entries = 0;
8139         mutex_unlock(&tracing_err_log_lock);
8140 }
8141
8142 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8143 {
8144         struct trace_array *tr = m->private;
8145
8146         mutex_lock(&tracing_err_log_lock);
8147
8148         return seq_list_start(&tr->err_log, *pos);
8149 }
8150
8151 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8152 {
8153         struct trace_array *tr = m->private;
8154
8155         return seq_list_next(v, &tr->err_log, pos);
8156 }
8157
8158 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8159 {
8160         mutex_unlock(&tracing_err_log_lock);
8161 }
8162
8163 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8164 {
8165         u16 i;
8166
8167         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8168                 seq_putc(m, ' ');
8169         for (i = 0; i < pos; i++)
8170                 seq_putc(m, ' ');
8171         seq_puts(m, "^\n");
8172 }
8173
8174 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8175 {
8176         struct tracing_log_err *err = v;
8177
8178         if (err) {
8179                 const char *err_text = err->info.errs[err->info.type];
8180                 u64 sec = err->info.ts;
8181                 u32 nsec;
8182
8183                 nsec = do_div(sec, NSEC_PER_SEC);
8184                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8185                            err->loc, err_text);
8186                 seq_printf(m, "%s", err->cmd);
8187                 tracing_err_log_show_pos(m, err->info.pos);
8188         }
8189
8190         return 0;
8191 }
8192
8193 static const struct seq_operations tracing_err_log_seq_ops = {
8194         .start  = tracing_err_log_seq_start,
8195         .next   = tracing_err_log_seq_next,
8196         .stop   = tracing_err_log_seq_stop,
8197         .show   = tracing_err_log_seq_show
8198 };
8199
8200 static int tracing_err_log_open(struct inode *inode, struct file *file)
8201 {
8202         struct trace_array *tr = inode->i_private;
8203         int ret = 0;
8204
8205         ret = tracing_check_open_get_tr(tr);
8206         if (ret)
8207                 return ret;
8208
8209         /* If this file was opened for write, then erase contents */
8210         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8211                 clear_tracing_err_log(tr);
8212
8213         if (file->f_mode & FMODE_READ) {
8214                 ret = seq_open(file, &tracing_err_log_seq_ops);
8215                 if (!ret) {
8216                         struct seq_file *m = file->private_data;
8217                         m->private = tr;
8218                 } else {
8219                         trace_array_put(tr);
8220                 }
8221         }
8222         return ret;
8223 }
8224
8225 static ssize_t tracing_err_log_write(struct file *file,
8226                                      const char __user *buffer,
8227                                      size_t count, loff_t *ppos)
8228 {
8229         return count;
8230 }
8231
8232 static int tracing_err_log_release(struct inode *inode, struct file *file)
8233 {
8234         struct trace_array *tr = inode->i_private;
8235
8236         trace_array_put(tr);
8237
8238         if (file->f_mode & FMODE_READ)
8239                 seq_release(inode, file);
8240
8241         return 0;
8242 }
8243
8244 static const struct file_operations tracing_err_log_fops = {
8245         .open           = tracing_err_log_open,
8246         .write          = tracing_err_log_write,
8247         .read           = seq_read,
8248         .llseek         = tracing_lseek,
8249         .release        = tracing_err_log_release,
8250 };
8251
8252 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8253 {
8254         struct trace_array *tr = inode->i_private;
8255         struct ftrace_buffer_info *info;
8256         int ret;
8257
8258         ret = tracing_check_open_get_tr(tr);
8259         if (ret)
8260                 return ret;
8261
8262         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8263         if (!info) {
8264                 trace_array_put(tr);
8265                 return -ENOMEM;
8266         }
8267
8268         mutex_lock(&trace_types_lock);
8269
8270         info->iter.tr           = tr;
8271         info->iter.cpu_file     = tracing_get_cpu(inode);
8272         info->iter.trace        = tr->current_trace;
8273         info->iter.array_buffer = &tr->array_buffer;
8274         info->spare             = NULL;
8275         /* Force reading ring buffer for first read */
8276         info->read              = (unsigned int)-1;
8277
8278         filp->private_data = info;
8279
8280         tr->trace_ref++;
8281
8282         mutex_unlock(&trace_types_lock);
8283
8284         ret = nonseekable_open(inode, filp);
8285         if (ret < 0)
8286                 trace_array_put(tr);
8287
8288         return ret;
8289 }
8290
8291 static __poll_t
8292 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8293 {
8294         struct ftrace_buffer_info *info = filp->private_data;
8295         struct trace_iterator *iter = &info->iter;
8296
8297         return trace_poll(iter, filp, poll_table);
8298 }
8299
8300 static ssize_t
8301 tracing_buffers_read(struct file *filp, char __user *ubuf,
8302                      size_t count, loff_t *ppos)
8303 {
8304         struct ftrace_buffer_info *info = filp->private_data;
8305         struct trace_iterator *iter = &info->iter;
8306         ssize_t ret = 0;
8307         ssize_t size;
8308
8309         if (!count)
8310                 return 0;
8311
8312 #ifdef CONFIG_TRACER_MAX_TRACE
8313         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8314                 return -EBUSY;
8315 #endif
8316
8317         if (!info->spare) {
8318                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8319                                                           iter->cpu_file);
8320                 if (IS_ERR(info->spare)) {
8321                         ret = PTR_ERR(info->spare);
8322                         info->spare = NULL;
8323                 } else {
8324                         info->spare_cpu = iter->cpu_file;
8325                 }
8326         }
8327         if (!info->spare)
8328                 return ret;
8329
8330         /* Do we have previous read data to read? */
8331         if (info->read < PAGE_SIZE)
8332                 goto read;
8333
8334  again:
8335         trace_access_lock(iter->cpu_file);
8336         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8337                                     &info->spare,
8338                                     count,
8339                                     iter->cpu_file, 0);
8340         trace_access_unlock(iter->cpu_file);
8341
8342         if (ret < 0) {
8343                 if (trace_empty(iter)) {
8344                         if ((filp->f_flags & O_NONBLOCK))
8345                                 return -EAGAIN;
8346
8347                         ret = wait_on_pipe(iter, 0);
8348                         if (ret)
8349                                 return ret;
8350
8351                         goto again;
8352                 }
8353                 return 0;
8354         }
8355
8356         info->read = 0;
8357  read:
8358         size = PAGE_SIZE - info->read;
8359         if (size > count)
8360                 size = count;
8361
8362         ret = copy_to_user(ubuf, info->spare + info->read, size);
8363         if (ret == size)
8364                 return -EFAULT;
8365
8366         size -= ret;
8367
8368         *ppos += size;
8369         info->read += size;
8370
8371         return size;
8372 }
8373
8374 static int tracing_buffers_release(struct inode *inode, struct file *file)
8375 {
8376         struct ftrace_buffer_info *info = file->private_data;
8377         struct trace_iterator *iter = &info->iter;
8378
8379         mutex_lock(&trace_types_lock);
8380
8381         iter->tr->trace_ref--;
8382
8383         __trace_array_put(iter->tr);
8384
8385         iter->wait_index++;
8386         /* Make sure the waiters see the new wait_index */
8387         smp_wmb();
8388
8389         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8390
8391         if (info->spare)
8392                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8393                                            info->spare_cpu, info->spare);
8394         kvfree(info);
8395
8396         mutex_unlock(&trace_types_lock);
8397
8398         return 0;
8399 }
8400
8401 struct buffer_ref {
8402         struct trace_buffer     *buffer;
8403         void                    *page;
8404         int                     cpu;
8405         refcount_t              refcount;
8406 };
8407
8408 static void buffer_ref_release(struct buffer_ref *ref)
8409 {
8410         if (!refcount_dec_and_test(&ref->refcount))
8411                 return;
8412         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8413         kfree(ref);
8414 }
8415
8416 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8417                                     struct pipe_buffer *buf)
8418 {
8419         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8420
8421         buffer_ref_release(ref);
8422         buf->private = 0;
8423 }
8424
8425 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8426                                 struct pipe_buffer *buf)
8427 {
8428         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8429
8430         if (refcount_read(&ref->refcount) > INT_MAX/2)
8431                 return false;
8432
8433         refcount_inc(&ref->refcount);
8434         return true;
8435 }
8436
8437 /* Pipe buffer operations for a buffer. */
8438 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8439         .release                = buffer_pipe_buf_release,
8440         .get                    = buffer_pipe_buf_get,
8441 };
8442
8443 /*
8444  * Callback from splice_to_pipe(), if we need to release some pages
8445  * at the end of the spd in case we error'ed out in filling the pipe.
8446  */
8447 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8448 {
8449         struct buffer_ref *ref =
8450                 (struct buffer_ref *)spd->partial[i].private;
8451
8452         buffer_ref_release(ref);
8453         spd->partial[i].private = 0;
8454 }
8455
8456 static ssize_t
8457 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8458                             struct pipe_inode_info *pipe, size_t len,
8459                             unsigned int flags)
8460 {
8461         struct ftrace_buffer_info *info = file->private_data;
8462         struct trace_iterator *iter = &info->iter;
8463         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8464         struct page *pages_def[PIPE_DEF_BUFFERS];
8465         struct splice_pipe_desc spd = {
8466                 .pages          = pages_def,
8467                 .partial        = partial_def,
8468                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8469                 .ops            = &buffer_pipe_buf_ops,
8470                 .spd_release    = buffer_spd_release,
8471         };
8472         struct buffer_ref *ref;
8473         int entries, i;
8474         ssize_t ret = 0;
8475
8476 #ifdef CONFIG_TRACER_MAX_TRACE
8477         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8478                 return -EBUSY;
8479 #endif
8480
8481         if (*ppos & (PAGE_SIZE - 1))
8482                 return -EINVAL;
8483
8484         if (len & (PAGE_SIZE - 1)) {
8485                 if (len < PAGE_SIZE)
8486                         return -EINVAL;
8487                 len &= PAGE_MASK;
8488         }
8489
8490         if (splice_grow_spd(pipe, &spd))
8491                 return -ENOMEM;
8492
8493  again:
8494         trace_access_lock(iter->cpu_file);
8495         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8496
8497         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8498                 struct page *page;
8499                 int r;
8500
8501                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8502                 if (!ref) {
8503                         ret = -ENOMEM;
8504                         break;
8505                 }
8506
8507                 refcount_set(&ref->refcount, 1);
8508                 ref->buffer = iter->array_buffer->buffer;
8509                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8510                 if (IS_ERR(ref->page)) {
8511                         ret = PTR_ERR(ref->page);
8512                         ref->page = NULL;
8513                         kfree(ref);
8514                         break;
8515                 }
8516                 ref->cpu = iter->cpu_file;
8517
8518                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8519                                           len, iter->cpu_file, 1);
8520                 if (r < 0) {
8521                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8522                                                    ref->page);
8523                         kfree(ref);
8524                         break;
8525                 }
8526
8527                 page = virt_to_page(ref->page);
8528
8529                 spd.pages[i] = page;
8530                 spd.partial[i].len = PAGE_SIZE;
8531                 spd.partial[i].offset = 0;
8532                 spd.partial[i].private = (unsigned long)ref;
8533                 spd.nr_pages++;
8534                 *ppos += PAGE_SIZE;
8535
8536                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8537         }
8538
8539         trace_access_unlock(iter->cpu_file);
8540         spd.nr_pages = i;
8541
8542         /* did we read anything? */
8543         if (!spd.nr_pages) {
8544                 long wait_index;
8545
8546                 if (ret)
8547                         goto out;
8548
8549                 ret = -EAGAIN;
8550                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8551                         goto out;
8552
8553                 wait_index = READ_ONCE(iter->wait_index);
8554
8555                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8556                 if (ret)
8557                         goto out;
8558
8559                 /* No need to wait after waking up when tracing is off */
8560                 if (!tracer_tracing_is_on(iter->tr))
8561                         goto out;
8562
8563                 /* Make sure we see the new wait_index */
8564                 smp_rmb();
8565                 if (wait_index != iter->wait_index)
8566                         goto out;
8567
8568                 goto again;
8569         }
8570
8571         ret = splice_to_pipe(pipe, &spd);
8572 out:
8573         splice_shrink_spd(&spd);
8574
8575         return ret;
8576 }
8577
8578 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8579 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8580 {
8581         struct ftrace_buffer_info *info = file->private_data;
8582         struct trace_iterator *iter = &info->iter;
8583
8584         if (cmd)
8585                 return -ENOIOCTLCMD;
8586
8587         mutex_lock(&trace_types_lock);
8588
8589         iter->wait_index++;
8590         /* Make sure the waiters see the new wait_index */
8591         smp_wmb();
8592
8593         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8594
8595         mutex_unlock(&trace_types_lock);
8596         return 0;
8597 }
8598
8599 static const struct file_operations tracing_buffers_fops = {
8600         .open           = tracing_buffers_open,
8601         .read           = tracing_buffers_read,
8602         .poll           = tracing_buffers_poll,
8603         .release        = tracing_buffers_release,
8604         .splice_read    = tracing_buffers_splice_read,
8605         .unlocked_ioctl = tracing_buffers_ioctl,
8606         .llseek         = no_llseek,
8607 };
8608
8609 static ssize_t
8610 tracing_stats_read(struct file *filp, char __user *ubuf,
8611                    size_t count, loff_t *ppos)
8612 {
8613         struct inode *inode = file_inode(filp);
8614         struct trace_array *tr = inode->i_private;
8615         struct array_buffer *trace_buf = &tr->array_buffer;
8616         int cpu = tracing_get_cpu(inode);
8617         struct trace_seq *s;
8618         unsigned long cnt;
8619         unsigned long long t;
8620         unsigned long usec_rem;
8621
8622         s = kmalloc(sizeof(*s), GFP_KERNEL);
8623         if (!s)
8624                 return -ENOMEM;
8625
8626         trace_seq_init(s);
8627
8628         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8629         trace_seq_printf(s, "entries: %ld\n", cnt);
8630
8631         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8632         trace_seq_printf(s, "overrun: %ld\n", cnt);
8633
8634         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8635         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8636
8637         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8638         trace_seq_printf(s, "bytes: %ld\n", cnt);
8639
8640         if (trace_clocks[tr->clock_id].in_ns) {
8641                 /* local or global for trace_clock */
8642                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8643                 usec_rem = do_div(t, USEC_PER_SEC);
8644                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8645                                                                 t, usec_rem);
8646
8647                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8648                 usec_rem = do_div(t, USEC_PER_SEC);
8649                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8650         } else {
8651                 /* counter or tsc mode for trace_clock */
8652                 trace_seq_printf(s, "oldest event ts: %llu\n",
8653                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8654
8655                 trace_seq_printf(s, "now ts: %llu\n",
8656                                 ring_buffer_time_stamp(trace_buf->buffer));
8657         }
8658
8659         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8660         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8661
8662         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8663         trace_seq_printf(s, "read events: %ld\n", cnt);
8664
8665         count = simple_read_from_buffer(ubuf, count, ppos,
8666                                         s->buffer, trace_seq_used(s));
8667
8668         kfree(s);
8669
8670         return count;
8671 }
8672
8673 static const struct file_operations tracing_stats_fops = {
8674         .open           = tracing_open_generic_tr,
8675         .read           = tracing_stats_read,
8676         .llseek         = generic_file_llseek,
8677         .release        = tracing_release_generic_tr,
8678 };
8679
8680 #ifdef CONFIG_DYNAMIC_FTRACE
8681
8682 static ssize_t
8683 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8684                   size_t cnt, loff_t *ppos)
8685 {
8686         ssize_t ret;
8687         char *buf;
8688         int r;
8689
8690         /* 256 should be plenty to hold the amount needed */
8691         buf = kmalloc(256, GFP_KERNEL);
8692         if (!buf)
8693                 return -ENOMEM;
8694
8695         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8696                       ftrace_update_tot_cnt,
8697                       ftrace_number_of_pages,
8698                       ftrace_number_of_groups);
8699
8700         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8701         kfree(buf);
8702         return ret;
8703 }
8704
8705 static const struct file_operations tracing_dyn_info_fops = {
8706         .open           = tracing_open_generic,
8707         .read           = tracing_read_dyn_info,
8708         .llseek         = generic_file_llseek,
8709 };
8710 #endif /* CONFIG_DYNAMIC_FTRACE */
8711
8712 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8713 static void
8714 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8715                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8716                 void *data)
8717 {
8718         tracing_snapshot_instance(tr);
8719 }
8720
8721 static void
8722 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8723                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8724                       void *data)
8725 {
8726         struct ftrace_func_mapper *mapper = data;
8727         long *count = NULL;
8728
8729         if (mapper)
8730                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8731
8732         if (count) {
8733
8734                 if (*count <= 0)
8735                         return;
8736
8737                 (*count)--;
8738         }
8739
8740         tracing_snapshot_instance(tr);
8741 }
8742
8743 static int
8744 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8745                       struct ftrace_probe_ops *ops, void *data)
8746 {
8747         struct ftrace_func_mapper *mapper = data;
8748         long *count = NULL;
8749
8750         seq_printf(m, "%ps:", (void *)ip);
8751
8752         seq_puts(m, "snapshot");
8753
8754         if (mapper)
8755                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8756
8757         if (count)
8758                 seq_printf(m, ":count=%ld\n", *count);
8759         else
8760                 seq_puts(m, ":unlimited\n");
8761
8762         return 0;
8763 }
8764
8765 static int
8766 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8767                      unsigned long ip, void *init_data, void **data)
8768 {
8769         struct ftrace_func_mapper *mapper = *data;
8770
8771         if (!mapper) {
8772                 mapper = allocate_ftrace_func_mapper();
8773                 if (!mapper)
8774                         return -ENOMEM;
8775                 *data = mapper;
8776         }
8777
8778         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8779 }
8780
8781 static void
8782 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8783                      unsigned long ip, void *data)
8784 {
8785         struct ftrace_func_mapper *mapper = data;
8786
8787         if (!ip) {
8788                 if (!mapper)
8789                         return;
8790                 free_ftrace_func_mapper(mapper, NULL);
8791                 return;
8792         }
8793
8794         ftrace_func_mapper_remove_ip(mapper, ip);
8795 }
8796
8797 static struct ftrace_probe_ops snapshot_probe_ops = {
8798         .func                   = ftrace_snapshot,
8799         .print                  = ftrace_snapshot_print,
8800 };
8801
8802 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8803         .func                   = ftrace_count_snapshot,
8804         .print                  = ftrace_snapshot_print,
8805         .init                   = ftrace_snapshot_init,
8806         .free                   = ftrace_snapshot_free,
8807 };
8808
8809 static int
8810 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8811                                char *glob, char *cmd, char *param, int enable)
8812 {
8813         struct ftrace_probe_ops *ops;
8814         void *count = (void *)-1;
8815         char *number;
8816         int ret;
8817
8818         if (!tr)
8819                 return -ENODEV;
8820
8821         /* hash funcs only work with set_ftrace_filter */
8822         if (!enable)
8823                 return -EINVAL;
8824
8825         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8826
8827         if (glob[0] == '!')
8828                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8829
8830         if (!param)
8831                 goto out_reg;
8832
8833         number = strsep(&param, ":");
8834
8835         if (!strlen(number))
8836                 goto out_reg;
8837
8838         /*
8839          * We use the callback data field (which is a pointer)
8840          * as our counter.
8841          */
8842         ret = kstrtoul(number, 0, (unsigned long *)&count);
8843         if (ret)
8844                 return ret;
8845
8846  out_reg:
8847         ret = tracing_alloc_snapshot_instance(tr);
8848         if (ret < 0)
8849                 goto out;
8850
8851         ret = register_ftrace_function_probe(glob, tr, ops, count);
8852
8853  out:
8854         return ret < 0 ? ret : 0;
8855 }
8856
8857 static struct ftrace_func_command ftrace_snapshot_cmd = {
8858         .name                   = "snapshot",
8859         .func                   = ftrace_trace_snapshot_callback,
8860 };
8861
8862 static __init int register_snapshot_cmd(void)
8863 {
8864         return register_ftrace_command(&ftrace_snapshot_cmd);
8865 }
8866 #else
8867 static inline __init int register_snapshot_cmd(void) { return 0; }
8868 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8869
8870 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8871 {
8872         if (WARN_ON(!tr->dir))
8873                 return ERR_PTR(-ENODEV);
8874
8875         /* Top directory uses NULL as the parent */
8876         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8877                 return NULL;
8878
8879         /* All sub buffers have a descriptor */
8880         return tr->dir;
8881 }
8882
8883 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8884 {
8885         struct dentry *d_tracer;
8886
8887         if (tr->percpu_dir)
8888                 return tr->percpu_dir;
8889
8890         d_tracer = tracing_get_dentry(tr);
8891         if (IS_ERR(d_tracer))
8892                 return NULL;
8893
8894         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8895
8896         MEM_FAIL(!tr->percpu_dir,
8897                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8898
8899         return tr->percpu_dir;
8900 }
8901
8902 static struct dentry *
8903 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8904                       void *data, long cpu, const struct file_operations *fops)
8905 {
8906         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8907
8908         if (ret) /* See tracing_get_cpu() */
8909                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8910         return ret;
8911 }
8912
8913 static void
8914 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8915 {
8916         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8917         struct dentry *d_cpu;
8918         char cpu_dir[30]; /* 30 characters should be more than enough */
8919
8920         if (!d_percpu)
8921                 return;
8922
8923         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8924         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8925         if (!d_cpu) {
8926                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8927                 return;
8928         }
8929
8930         /* per cpu trace_pipe */
8931         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8932                                 tr, cpu, &tracing_pipe_fops);
8933
8934         /* per cpu trace */
8935         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8936                                 tr, cpu, &tracing_fops);
8937
8938         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8939                                 tr, cpu, &tracing_buffers_fops);
8940
8941         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8942                                 tr, cpu, &tracing_stats_fops);
8943
8944         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8945                                 tr, cpu, &tracing_entries_fops);
8946
8947 #ifdef CONFIG_TRACER_SNAPSHOT
8948         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8949                                 tr, cpu, &snapshot_fops);
8950
8951         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8952                                 tr, cpu, &snapshot_raw_fops);
8953 #endif
8954 }
8955
8956 #ifdef CONFIG_FTRACE_SELFTEST
8957 /* Let selftest have access to static functions in this file */
8958 #include "trace_selftest.c"
8959 #endif
8960
8961 static ssize_t
8962 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8963                         loff_t *ppos)
8964 {
8965         struct trace_option_dentry *topt = filp->private_data;
8966         char *buf;
8967
8968         if (topt->flags->val & topt->opt->bit)
8969                 buf = "1\n";
8970         else
8971                 buf = "0\n";
8972
8973         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8974 }
8975
8976 static ssize_t
8977 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8978                          loff_t *ppos)
8979 {
8980         struct trace_option_dentry *topt = filp->private_data;
8981         unsigned long val;
8982         int ret;
8983
8984         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8985         if (ret)
8986                 return ret;
8987
8988         if (val != 0 && val != 1)
8989                 return -EINVAL;
8990
8991         if (!!(topt->flags->val & topt->opt->bit) != val) {
8992                 mutex_lock(&trace_types_lock);
8993                 ret = __set_tracer_option(topt->tr, topt->flags,
8994                                           topt->opt, !val);
8995                 mutex_unlock(&trace_types_lock);
8996                 if (ret)
8997                         return ret;
8998         }
8999
9000         *ppos += cnt;
9001
9002         return cnt;
9003 }
9004
9005 static int tracing_open_options(struct inode *inode, struct file *filp)
9006 {
9007         struct trace_option_dentry *topt = inode->i_private;
9008         int ret;
9009
9010         ret = tracing_check_open_get_tr(topt->tr);
9011         if (ret)
9012                 return ret;
9013
9014         filp->private_data = inode->i_private;
9015         return 0;
9016 }
9017
9018 static int tracing_release_options(struct inode *inode, struct file *file)
9019 {
9020         struct trace_option_dentry *topt = file->private_data;
9021
9022         trace_array_put(topt->tr);
9023         return 0;
9024 }
9025
9026 static const struct file_operations trace_options_fops = {
9027         .open = tracing_open_options,
9028         .read = trace_options_read,
9029         .write = trace_options_write,
9030         .llseek = generic_file_llseek,
9031         .release = tracing_release_options,
9032 };
9033
9034 /*
9035  * In order to pass in both the trace_array descriptor as well as the index
9036  * to the flag that the trace option file represents, the trace_array
9037  * has a character array of trace_flags_index[], which holds the index
9038  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9039  * The address of this character array is passed to the flag option file
9040  * read/write callbacks.
9041  *
9042  * In order to extract both the index and the trace_array descriptor,
9043  * get_tr_index() uses the following algorithm.
9044  *
9045  *   idx = *ptr;
9046  *
9047  * As the pointer itself contains the address of the index (remember
9048  * index[1] == 1).
9049  *
9050  * Then to get the trace_array descriptor, by subtracting that index
9051  * from the ptr, we get to the start of the index itself.
9052  *
9053  *   ptr - idx == &index[0]
9054  *
9055  * Then a simple container_of() from that pointer gets us to the
9056  * trace_array descriptor.
9057  */
9058 static void get_tr_index(void *data, struct trace_array **ptr,
9059                          unsigned int *pindex)
9060 {
9061         *pindex = *(unsigned char *)data;
9062
9063         *ptr = container_of(data - *pindex, struct trace_array,
9064                             trace_flags_index);
9065 }
9066
9067 static ssize_t
9068 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9069                         loff_t *ppos)
9070 {
9071         void *tr_index = filp->private_data;
9072         struct trace_array *tr;
9073         unsigned int index;
9074         char *buf;
9075
9076         get_tr_index(tr_index, &tr, &index);
9077
9078         if (tr->trace_flags & (1 << index))
9079                 buf = "1\n";
9080         else
9081                 buf = "0\n";
9082
9083         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9084 }
9085
9086 static ssize_t
9087 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9088                          loff_t *ppos)
9089 {
9090         void *tr_index = filp->private_data;
9091         struct trace_array *tr;
9092         unsigned int index;
9093         unsigned long val;
9094         int ret;
9095
9096         get_tr_index(tr_index, &tr, &index);
9097
9098         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9099         if (ret)
9100                 return ret;
9101
9102         if (val != 0 && val != 1)
9103                 return -EINVAL;
9104
9105         mutex_lock(&event_mutex);
9106         mutex_lock(&trace_types_lock);
9107         ret = set_tracer_flag(tr, 1 << index, val);
9108         mutex_unlock(&trace_types_lock);
9109         mutex_unlock(&event_mutex);
9110
9111         if (ret < 0)
9112                 return ret;
9113
9114         *ppos += cnt;
9115
9116         return cnt;
9117 }
9118
9119 static const struct file_operations trace_options_core_fops = {
9120         .open = tracing_open_generic,
9121         .read = trace_options_core_read,
9122         .write = trace_options_core_write,
9123         .llseek = generic_file_llseek,
9124 };
9125
9126 struct dentry *trace_create_file(const char *name,
9127                                  umode_t mode,
9128                                  struct dentry *parent,
9129                                  void *data,
9130                                  const struct file_operations *fops)
9131 {
9132         struct dentry *ret;
9133
9134         ret = tracefs_create_file(name, mode, parent, data, fops);
9135         if (!ret)
9136                 pr_warn("Could not create tracefs '%s' entry\n", name);
9137
9138         return ret;
9139 }
9140
9141
9142 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9143 {
9144         struct dentry *d_tracer;
9145
9146         if (tr->options)
9147                 return tr->options;
9148
9149         d_tracer = tracing_get_dentry(tr);
9150         if (IS_ERR(d_tracer))
9151                 return NULL;
9152
9153         tr->options = tracefs_create_dir("options", d_tracer);
9154         if (!tr->options) {
9155                 pr_warn("Could not create tracefs directory 'options'\n");
9156                 return NULL;
9157         }
9158
9159         return tr->options;
9160 }
9161
9162 static void
9163 create_trace_option_file(struct trace_array *tr,
9164                          struct trace_option_dentry *topt,
9165                          struct tracer_flags *flags,
9166                          struct tracer_opt *opt)
9167 {
9168         struct dentry *t_options;
9169
9170         t_options = trace_options_init_dentry(tr);
9171         if (!t_options)
9172                 return;
9173
9174         topt->flags = flags;
9175         topt->opt = opt;
9176         topt->tr = tr;
9177
9178         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9179                                         t_options, topt, &trace_options_fops);
9180
9181 }
9182
9183 static void
9184 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9185 {
9186         struct trace_option_dentry *topts;
9187         struct trace_options *tr_topts;
9188         struct tracer_flags *flags;
9189         struct tracer_opt *opts;
9190         int cnt;
9191         int i;
9192
9193         if (!tracer)
9194                 return;
9195
9196         flags = tracer->flags;
9197
9198         if (!flags || !flags->opts)
9199                 return;
9200
9201         /*
9202          * If this is an instance, only create flags for tracers
9203          * the instance may have.
9204          */
9205         if (!trace_ok_for_array(tracer, tr))
9206                 return;
9207
9208         for (i = 0; i < tr->nr_topts; i++) {
9209                 /* Make sure there's no duplicate flags. */
9210                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9211                         return;
9212         }
9213
9214         opts = flags->opts;
9215
9216         for (cnt = 0; opts[cnt].name; cnt++)
9217                 ;
9218
9219         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9220         if (!topts)
9221                 return;
9222
9223         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9224                             GFP_KERNEL);
9225         if (!tr_topts) {
9226                 kfree(topts);
9227                 return;
9228         }
9229
9230         tr->topts = tr_topts;
9231         tr->topts[tr->nr_topts].tracer = tracer;
9232         tr->topts[tr->nr_topts].topts = topts;
9233         tr->nr_topts++;
9234
9235         for (cnt = 0; opts[cnt].name; cnt++) {
9236                 create_trace_option_file(tr, &topts[cnt], flags,
9237                                          &opts[cnt]);
9238                 MEM_FAIL(topts[cnt].entry == NULL,
9239                           "Failed to create trace option: %s",
9240                           opts[cnt].name);
9241         }
9242 }
9243
9244 static struct dentry *
9245 create_trace_option_core_file(struct trace_array *tr,
9246                               const char *option, long index)
9247 {
9248         struct dentry *t_options;
9249
9250         t_options = trace_options_init_dentry(tr);
9251         if (!t_options)
9252                 return NULL;
9253
9254         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9255                                  (void *)&tr->trace_flags_index[index],
9256                                  &trace_options_core_fops);
9257 }
9258
9259 static void create_trace_options_dir(struct trace_array *tr)
9260 {
9261         struct dentry *t_options;
9262         bool top_level = tr == &global_trace;
9263         int i;
9264
9265         t_options = trace_options_init_dentry(tr);
9266         if (!t_options)
9267                 return;
9268
9269         for (i = 0; trace_options[i]; i++) {
9270                 if (top_level ||
9271                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9272                         create_trace_option_core_file(tr, trace_options[i], i);
9273         }
9274 }
9275
9276 static ssize_t
9277 rb_simple_read(struct file *filp, char __user *ubuf,
9278                size_t cnt, loff_t *ppos)
9279 {
9280         struct trace_array *tr = filp->private_data;
9281         char buf[64];
9282         int r;
9283
9284         r = tracer_tracing_is_on(tr);
9285         r = sprintf(buf, "%d\n", r);
9286
9287         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9288 }
9289
9290 static ssize_t
9291 rb_simple_write(struct file *filp, const char __user *ubuf,
9292                 size_t cnt, loff_t *ppos)
9293 {
9294         struct trace_array *tr = filp->private_data;
9295         struct trace_buffer *buffer = tr->array_buffer.buffer;
9296         unsigned long val;
9297         int ret;
9298
9299         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9300         if (ret)
9301                 return ret;
9302
9303         if (buffer) {
9304                 mutex_lock(&trace_types_lock);
9305                 if (!!val == tracer_tracing_is_on(tr)) {
9306                         val = 0; /* do nothing */
9307                 } else if (val) {
9308                         tracer_tracing_on(tr);
9309                         if (tr->current_trace->start)
9310                                 tr->current_trace->start(tr);
9311                 } else {
9312                         tracer_tracing_off(tr);
9313                         if (tr->current_trace->stop)
9314                                 tr->current_trace->stop(tr);
9315                         /* Wake up any waiters */
9316                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9317                 }
9318                 mutex_unlock(&trace_types_lock);
9319         }
9320
9321         (*ppos)++;
9322
9323         return cnt;
9324 }
9325
9326 static const struct file_operations rb_simple_fops = {
9327         .open           = tracing_open_generic_tr,
9328         .read           = rb_simple_read,
9329         .write          = rb_simple_write,
9330         .release        = tracing_release_generic_tr,
9331         .llseek         = default_llseek,
9332 };
9333
9334 static ssize_t
9335 buffer_percent_read(struct file *filp, char __user *ubuf,
9336                     size_t cnt, loff_t *ppos)
9337 {
9338         struct trace_array *tr = filp->private_data;
9339         char buf[64];
9340         int r;
9341
9342         r = tr->buffer_percent;
9343         r = sprintf(buf, "%d\n", r);
9344
9345         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9346 }
9347
9348 static ssize_t
9349 buffer_percent_write(struct file *filp, const char __user *ubuf,
9350                      size_t cnt, loff_t *ppos)
9351 {
9352         struct trace_array *tr = filp->private_data;
9353         unsigned long val;
9354         int ret;
9355
9356         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9357         if (ret)
9358                 return ret;
9359
9360         if (val > 100)
9361                 return -EINVAL;
9362
9363         tr->buffer_percent = val;
9364
9365         (*ppos)++;
9366
9367         return cnt;
9368 }
9369
9370 static const struct file_operations buffer_percent_fops = {
9371         .open           = tracing_open_generic_tr,
9372         .read           = buffer_percent_read,
9373         .write          = buffer_percent_write,
9374         .release        = tracing_release_generic_tr,
9375         .llseek         = default_llseek,
9376 };
9377
9378 static struct dentry *trace_instance_dir;
9379
9380 static void
9381 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9382
9383 static int
9384 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9385 {
9386         enum ring_buffer_flags rb_flags;
9387
9388         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9389
9390         buf->tr = tr;
9391
9392         buf->buffer = ring_buffer_alloc(size, rb_flags);
9393         if (!buf->buffer)
9394                 return -ENOMEM;
9395
9396         buf->data = alloc_percpu(struct trace_array_cpu);
9397         if (!buf->data) {
9398                 ring_buffer_free(buf->buffer);
9399                 buf->buffer = NULL;
9400                 return -ENOMEM;
9401         }
9402
9403         /* Allocate the first page for all buffers */
9404         set_buffer_entries(&tr->array_buffer,
9405                            ring_buffer_size(tr->array_buffer.buffer, 0));
9406
9407         return 0;
9408 }
9409
9410 static void free_trace_buffer(struct array_buffer *buf)
9411 {
9412         if (buf->buffer) {
9413                 ring_buffer_free(buf->buffer);
9414                 buf->buffer = NULL;
9415                 free_percpu(buf->data);
9416                 buf->data = NULL;
9417         }
9418 }
9419
9420 static int allocate_trace_buffers(struct trace_array *tr, int size)
9421 {
9422         int ret;
9423
9424         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9425         if (ret)
9426                 return ret;
9427
9428 #ifdef CONFIG_TRACER_MAX_TRACE
9429         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9430                                     allocate_snapshot ? size : 1);
9431         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9432                 free_trace_buffer(&tr->array_buffer);
9433                 return -ENOMEM;
9434         }
9435         tr->allocated_snapshot = allocate_snapshot;
9436
9437         allocate_snapshot = false;
9438 #endif
9439
9440         return 0;
9441 }
9442
9443 static void free_trace_buffers(struct trace_array *tr)
9444 {
9445         if (!tr)
9446                 return;
9447
9448         free_trace_buffer(&tr->array_buffer);
9449
9450 #ifdef CONFIG_TRACER_MAX_TRACE
9451         free_trace_buffer(&tr->max_buffer);
9452 #endif
9453 }
9454
9455 static void init_trace_flags_index(struct trace_array *tr)
9456 {
9457         int i;
9458
9459         /* Used by the trace options files */
9460         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9461                 tr->trace_flags_index[i] = i;
9462 }
9463
9464 static void __update_tracer_options(struct trace_array *tr)
9465 {
9466         struct tracer *t;
9467
9468         for (t = trace_types; t; t = t->next)
9469                 add_tracer_options(tr, t);
9470 }
9471
9472 static void update_tracer_options(struct trace_array *tr)
9473 {
9474         mutex_lock(&trace_types_lock);
9475         tracer_options_updated = true;
9476         __update_tracer_options(tr);
9477         mutex_unlock(&trace_types_lock);
9478 }
9479
9480 /* Must have trace_types_lock held */
9481 struct trace_array *trace_array_find(const char *instance)
9482 {
9483         struct trace_array *tr, *found = NULL;
9484
9485         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9486                 if (tr->name && strcmp(tr->name, instance) == 0) {
9487                         found = tr;
9488                         break;
9489                 }
9490         }
9491
9492         return found;
9493 }
9494
9495 struct trace_array *trace_array_find_get(const char *instance)
9496 {
9497         struct trace_array *tr;
9498
9499         mutex_lock(&trace_types_lock);
9500         tr = trace_array_find(instance);
9501         if (tr)
9502                 tr->ref++;
9503         mutex_unlock(&trace_types_lock);
9504
9505         return tr;
9506 }
9507
9508 static int trace_array_create_dir(struct trace_array *tr)
9509 {
9510         int ret;
9511
9512         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9513         if (!tr->dir)
9514                 return -EINVAL;
9515
9516         ret = event_trace_add_tracer(tr->dir, tr);
9517         if (ret) {
9518                 tracefs_remove(tr->dir);
9519                 return ret;
9520         }
9521
9522         init_tracer_tracefs(tr, tr->dir);
9523         __update_tracer_options(tr);
9524
9525         return ret;
9526 }
9527
9528 static struct trace_array *trace_array_create(const char *name)
9529 {
9530         struct trace_array *tr;
9531         int ret;
9532
9533         ret = -ENOMEM;
9534         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9535         if (!tr)
9536                 return ERR_PTR(ret);
9537
9538         tr->name = kstrdup(name, GFP_KERNEL);
9539         if (!tr->name)
9540                 goto out_free_tr;
9541
9542         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9543                 goto out_free_tr;
9544
9545         if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9546                 goto out_free_tr;
9547
9548         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9549
9550         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9551
9552         raw_spin_lock_init(&tr->start_lock);
9553
9554         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9555
9556         tr->current_trace = &nop_trace;
9557
9558         INIT_LIST_HEAD(&tr->systems);
9559         INIT_LIST_HEAD(&tr->events);
9560         INIT_LIST_HEAD(&tr->hist_vars);
9561         INIT_LIST_HEAD(&tr->err_log);
9562
9563         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9564                 goto out_free_tr;
9565
9566         if (ftrace_allocate_ftrace_ops(tr) < 0)
9567                 goto out_free_tr;
9568
9569         ftrace_init_trace_array(tr);
9570
9571         init_trace_flags_index(tr);
9572
9573         if (trace_instance_dir) {
9574                 ret = trace_array_create_dir(tr);
9575                 if (ret)
9576                         goto out_free_tr;
9577         } else
9578                 __trace_early_add_events(tr);
9579
9580         list_add(&tr->list, &ftrace_trace_arrays);
9581
9582         tr->ref++;
9583
9584         return tr;
9585
9586  out_free_tr:
9587         ftrace_free_ftrace_ops(tr);
9588         free_trace_buffers(tr);
9589         free_cpumask_var(tr->pipe_cpumask);
9590         free_cpumask_var(tr->tracing_cpumask);
9591         kfree(tr->name);
9592         kfree(tr);
9593
9594         return ERR_PTR(ret);
9595 }
9596
9597 static int instance_mkdir(const char *name)
9598 {
9599         struct trace_array *tr;
9600         int ret;
9601
9602         mutex_lock(&event_mutex);
9603         mutex_lock(&trace_types_lock);
9604
9605         ret = -EEXIST;
9606         if (trace_array_find(name))
9607                 goto out_unlock;
9608
9609         tr = trace_array_create(name);
9610
9611         ret = PTR_ERR_OR_ZERO(tr);
9612
9613 out_unlock:
9614         mutex_unlock(&trace_types_lock);
9615         mutex_unlock(&event_mutex);
9616         return ret;
9617 }
9618
9619 /**
9620  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9621  * @name: The name of the trace array to be looked up/created.
9622  *
9623  * Returns pointer to trace array with given name.
9624  * NULL, if it cannot be created.
9625  *
9626  * NOTE: This function increments the reference counter associated with the
9627  * trace array returned. This makes sure it cannot be freed while in use.
9628  * Use trace_array_put() once the trace array is no longer needed.
9629  * If the trace_array is to be freed, trace_array_destroy() needs to
9630  * be called after the trace_array_put(), or simply let user space delete
9631  * it from the tracefs instances directory. But until the
9632  * trace_array_put() is called, user space can not delete it.
9633  *
9634  */
9635 struct trace_array *trace_array_get_by_name(const char *name)
9636 {
9637         struct trace_array *tr;
9638
9639         mutex_lock(&event_mutex);
9640         mutex_lock(&trace_types_lock);
9641
9642         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9643                 if (tr->name && strcmp(tr->name, name) == 0)
9644                         goto out_unlock;
9645         }
9646
9647         tr = trace_array_create(name);
9648
9649         if (IS_ERR(tr))
9650                 tr = NULL;
9651 out_unlock:
9652         if (tr)
9653                 tr->ref++;
9654
9655         mutex_unlock(&trace_types_lock);
9656         mutex_unlock(&event_mutex);
9657         return tr;
9658 }
9659 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9660
9661 static int __remove_instance(struct trace_array *tr)
9662 {
9663         int i;
9664
9665         /* Reference counter for a newly created trace array = 1. */
9666         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9667                 return -EBUSY;
9668
9669         list_del(&tr->list);
9670
9671         /* Disable all the flags that were enabled coming in */
9672         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9673                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9674                         set_tracer_flag(tr, 1 << i, 0);
9675         }
9676
9677         tracing_set_nop(tr);
9678         clear_ftrace_function_probes(tr);
9679         event_trace_del_tracer(tr);
9680         ftrace_clear_pids(tr);
9681         ftrace_destroy_function_files(tr);
9682         tracefs_remove(tr->dir);
9683         free_percpu(tr->last_func_repeats);
9684         free_trace_buffers(tr);
9685         clear_tracing_err_log(tr);
9686
9687         for (i = 0; i < tr->nr_topts; i++) {
9688                 kfree(tr->topts[i].topts);
9689         }
9690         kfree(tr->topts);
9691
9692         free_cpumask_var(tr->pipe_cpumask);
9693         free_cpumask_var(tr->tracing_cpumask);
9694         kfree(tr->name);
9695         kfree(tr);
9696
9697         return 0;
9698 }
9699
9700 int trace_array_destroy(struct trace_array *this_tr)
9701 {
9702         struct trace_array *tr;
9703         int ret;
9704
9705         if (!this_tr)
9706                 return -EINVAL;
9707
9708         mutex_lock(&event_mutex);
9709         mutex_lock(&trace_types_lock);
9710
9711         ret = -ENODEV;
9712
9713         /* Making sure trace array exists before destroying it. */
9714         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9715                 if (tr == this_tr) {
9716                         ret = __remove_instance(tr);
9717                         break;
9718                 }
9719         }
9720
9721         mutex_unlock(&trace_types_lock);
9722         mutex_unlock(&event_mutex);
9723
9724         return ret;
9725 }
9726 EXPORT_SYMBOL_GPL(trace_array_destroy);
9727
9728 static int instance_rmdir(const char *name)
9729 {
9730         struct trace_array *tr;
9731         int ret;
9732
9733         mutex_lock(&event_mutex);
9734         mutex_lock(&trace_types_lock);
9735
9736         ret = -ENODEV;
9737         tr = trace_array_find(name);
9738         if (tr)
9739                 ret = __remove_instance(tr);
9740
9741         mutex_unlock(&trace_types_lock);
9742         mutex_unlock(&event_mutex);
9743
9744         return ret;
9745 }
9746
9747 static __init void create_trace_instances(struct dentry *d_tracer)
9748 {
9749         struct trace_array *tr;
9750
9751         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9752                                                          instance_mkdir,
9753                                                          instance_rmdir);
9754         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9755                 return;
9756
9757         mutex_lock(&event_mutex);
9758         mutex_lock(&trace_types_lock);
9759
9760         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9761                 if (!tr->name)
9762                         continue;
9763                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9764                              "Failed to create instance directory\n"))
9765                         break;
9766         }
9767
9768         mutex_unlock(&trace_types_lock);
9769         mutex_unlock(&event_mutex);
9770 }
9771
9772 static void
9773 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9774 {
9775         struct trace_event_file *file;
9776         int cpu;
9777
9778         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9779                         tr, &show_traces_fops);
9780
9781         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9782                         tr, &set_tracer_fops);
9783
9784         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9785                           tr, &tracing_cpumask_fops);
9786
9787         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9788                           tr, &tracing_iter_fops);
9789
9790         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9791                           tr, &tracing_fops);
9792
9793         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9794                           tr, &tracing_pipe_fops);
9795
9796         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9797                           tr, &tracing_entries_fops);
9798
9799         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9800                           tr, &tracing_total_entries_fops);
9801
9802         trace_create_file("free_buffer", 0200, d_tracer,
9803                           tr, &tracing_free_buffer_fops);
9804
9805         trace_create_file("trace_marker", 0220, d_tracer,
9806                           tr, &tracing_mark_fops);
9807
9808         file = __find_event_file(tr, "ftrace", "print");
9809         if (file && file->ef)
9810                 eventfs_add_file("trigger", TRACE_MODE_WRITE, file->ef,
9811                                   file, &event_trigger_fops);
9812         tr->trace_marker_file = file;
9813
9814         trace_create_file("trace_marker_raw", 0220, d_tracer,
9815                           tr, &tracing_mark_raw_fops);
9816
9817         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9818                           &trace_clock_fops);
9819
9820         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9821                           tr, &rb_simple_fops);
9822
9823         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9824                           &trace_time_stamp_mode_fops);
9825
9826         tr->buffer_percent = 50;
9827
9828         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9829                         tr, &buffer_percent_fops);
9830
9831         create_trace_options_dir(tr);
9832
9833 #ifdef CONFIG_TRACER_MAX_TRACE
9834         trace_create_maxlat_file(tr, d_tracer);
9835 #endif
9836
9837         if (ftrace_create_function_files(tr, d_tracer))
9838                 MEM_FAIL(1, "Could not allocate function filter files");
9839
9840 #ifdef CONFIG_TRACER_SNAPSHOT
9841         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9842                           tr, &snapshot_fops);
9843 #endif
9844
9845         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9846                           tr, &tracing_err_log_fops);
9847
9848         for_each_tracing_cpu(cpu)
9849                 tracing_init_tracefs_percpu(tr, cpu);
9850
9851         ftrace_init_tracefs(tr, d_tracer);
9852 }
9853
9854 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9855 {
9856         struct vfsmount *mnt;
9857         struct file_system_type *type;
9858
9859         /*
9860          * To maintain backward compatibility for tools that mount
9861          * debugfs to get to the tracing facility, tracefs is automatically
9862          * mounted to the debugfs/tracing directory.
9863          */
9864         type = get_fs_type("tracefs");
9865         if (!type)
9866                 return NULL;
9867         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9868         put_filesystem(type);
9869         if (IS_ERR(mnt))
9870                 return NULL;
9871         mntget(mnt);
9872
9873         return mnt;
9874 }
9875
9876 /**
9877  * tracing_init_dentry - initialize top level trace array
9878  *
9879  * This is called when creating files or directories in the tracing
9880  * directory. It is called via fs_initcall() by any of the boot up code
9881  * and expects to return the dentry of the top level tracing directory.
9882  */
9883 int tracing_init_dentry(void)
9884 {
9885         struct trace_array *tr = &global_trace;
9886
9887         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9888                 pr_warn("Tracing disabled due to lockdown\n");
9889                 return -EPERM;
9890         }
9891
9892         /* The top level trace array uses  NULL as parent */
9893         if (tr->dir)
9894                 return 0;
9895
9896         if (WARN_ON(!tracefs_initialized()))
9897                 return -ENODEV;
9898
9899         /*
9900          * As there may still be users that expect the tracing
9901          * files to exist in debugfs/tracing, we must automount
9902          * the tracefs file system there, so older tools still
9903          * work with the newer kernel.
9904          */
9905         tr->dir = debugfs_create_automount("tracing", NULL,
9906                                            trace_automount, NULL);
9907
9908         return 0;
9909 }
9910
9911 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9912 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9913
9914 static struct workqueue_struct *eval_map_wq __initdata;
9915 static struct work_struct eval_map_work __initdata;
9916 static struct work_struct tracerfs_init_work __initdata;
9917
9918 static void __init eval_map_work_func(struct work_struct *work)
9919 {
9920         int len;
9921
9922         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9923         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9924 }
9925
9926 static int __init trace_eval_init(void)
9927 {
9928         INIT_WORK(&eval_map_work, eval_map_work_func);
9929
9930         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9931         if (!eval_map_wq) {
9932                 pr_err("Unable to allocate eval_map_wq\n");
9933                 /* Do work here */
9934                 eval_map_work_func(&eval_map_work);
9935                 return -ENOMEM;
9936         }
9937
9938         queue_work(eval_map_wq, &eval_map_work);
9939         return 0;
9940 }
9941
9942 subsys_initcall(trace_eval_init);
9943
9944 static int __init trace_eval_sync(void)
9945 {
9946         /* Make sure the eval map updates are finished */
9947         if (eval_map_wq)
9948                 destroy_workqueue(eval_map_wq);
9949         return 0;
9950 }
9951
9952 late_initcall_sync(trace_eval_sync);
9953
9954
9955 #ifdef CONFIG_MODULES
9956 static void trace_module_add_evals(struct module *mod)
9957 {
9958         if (!mod->num_trace_evals)
9959                 return;
9960
9961         /*
9962          * Modules with bad taint do not have events created, do
9963          * not bother with enums either.
9964          */
9965         if (trace_module_has_bad_taint(mod))
9966                 return;
9967
9968         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9969 }
9970
9971 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9972 static void trace_module_remove_evals(struct module *mod)
9973 {
9974         union trace_eval_map_item *map;
9975         union trace_eval_map_item **last = &trace_eval_maps;
9976
9977         if (!mod->num_trace_evals)
9978                 return;
9979
9980         mutex_lock(&trace_eval_mutex);
9981
9982         map = trace_eval_maps;
9983
9984         while (map) {
9985                 if (map->head.mod == mod)
9986                         break;
9987                 map = trace_eval_jmp_to_tail(map);
9988                 last = &map->tail.next;
9989                 map = map->tail.next;
9990         }
9991         if (!map)
9992                 goto out;
9993
9994         *last = trace_eval_jmp_to_tail(map)->tail.next;
9995         kfree(map);
9996  out:
9997         mutex_unlock(&trace_eval_mutex);
9998 }
9999 #else
10000 static inline void trace_module_remove_evals(struct module *mod) { }
10001 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10002
10003 static int trace_module_notify(struct notifier_block *self,
10004                                unsigned long val, void *data)
10005 {
10006         struct module *mod = data;
10007
10008         switch (val) {
10009         case MODULE_STATE_COMING:
10010                 trace_module_add_evals(mod);
10011                 break;
10012         case MODULE_STATE_GOING:
10013                 trace_module_remove_evals(mod);
10014                 break;
10015         }
10016
10017         return NOTIFY_OK;
10018 }
10019
10020 static struct notifier_block trace_module_nb = {
10021         .notifier_call = trace_module_notify,
10022         .priority = 0,
10023 };
10024 #endif /* CONFIG_MODULES */
10025
10026 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10027 {
10028
10029         event_trace_init();
10030
10031         init_tracer_tracefs(&global_trace, NULL);
10032         ftrace_init_tracefs_toplevel(&global_trace, NULL);
10033
10034         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10035                         &global_trace, &tracing_thresh_fops);
10036
10037         trace_create_file("README", TRACE_MODE_READ, NULL,
10038                         NULL, &tracing_readme_fops);
10039
10040         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10041                         NULL, &tracing_saved_cmdlines_fops);
10042
10043         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10044                           NULL, &tracing_saved_cmdlines_size_fops);
10045
10046         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10047                         NULL, &tracing_saved_tgids_fops);
10048
10049         trace_create_eval_file(NULL);
10050
10051 #ifdef CONFIG_MODULES
10052         register_module_notifier(&trace_module_nb);
10053 #endif
10054
10055 #ifdef CONFIG_DYNAMIC_FTRACE
10056         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10057                         NULL, &tracing_dyn_info_fops);
10058 #endif
10059
10060         create_trace_instances(NULL);
10061
10062         update_tracer_options(&global_trace);
10063 }
10064
10065 static __init int tracer_init_tracefs(void)
10066 {
10067         int ret;
10068
10069         trace_access_lock_init();
10070
10071         ret = tracing_init_dentry();
10072         if (ret)
10073                 return 0;
10074
10075         if (eval_map_wq) {
10076                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10077                 queue_work(eval_map_wq, &tracerfs_init_work);
10078         } else {
10079                 tracer_init_tracefs_work_func(NULL);
10080         }
10081
10082         rv_init_interface();
10083
10084         return 0;
10085 }
10086
10087 fs_initcall(tracer_init_tracefs);
10088
10089 static int trace_die_panic_handler(struct notifier_block *self,
10090                                 unsigned long ev, void *unused);
10091
10092 static struct notifier_block trace_panic_notifier = {
10093         .notifier_call = trace_die_panic_handler,
10094         .priority = INT_MAX - 1,
10095 };
10096
10097 static struct notifier_block trace_die_notifier = {
10098         .notifier_call = trace_die_panic_handler,
10099         .priority = INT_MAX - 1,
10100 };
10101
10102 /*
10103  * The idea is to execute the following die/panic callback early, in order
10104  * to avoid showing irrelevant information in the trace (like other panic
10105  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10106  * warnings get disabled (to prevent potential log flooding).
10107  */
10108 static int trace_die_panic_handler(struct notifier_block *self,
10109                                 unsigned long ev, void *unused)
10110 {
10111         if (!ftrace_dump_on_oops)
10112                 return NOTIFY_DONE;
10113
10114         /* The die notifier requires DIE_OOPS to trigger */
10115         if (self == &trace_die_notifier && ev != DIE_OOPS)
10116                 return NOTIFY_DONE;
10117
10118         ftrace_dump(ftrace_dump_on_oops);
10119
10120         return NOTIFY_DONE;
10121 }
10122
10123 /*
10124  * printk is set to max of 1024, we really don't need it that big.
10125  * Nothing should be printing 1000 characters anyway.
10126  */
10127 #define TRACE_MAX_PRINT         1000
10128
10129 /*
10130  * Define here KERN_TRACE so that we have one place to modify
10131  * it if we decide to change what log level the ftrace dump
10132  * should be at.
10133  */
10134 #define KERN_TRACE              KERN_EMERG
10135
10136 void
10137 trace_printk_seq(struct trace_seq *s)
10138 {
10139         /* Probably should print a warning here. */
10140         if (s->seq.len >= TRACE_MAX_PRINT)
10141                 s->seq.len = TRACE_MAX_PRINT;
10142
10143         /*
10144          * More paranoid code. Although the buffer size is set to
10145          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10146          * an extra layer of protection.
10147          */
10148         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10149                 s->seq.len = s->seq.size - 1;
10150
10151         /* should be zero ended, but we are paranoid. */
10152         s->buffer[s->seq.len] = 0;
10153
10154         printk(KERN_TRACE "%s", s->buffer);
10155
10156         trace_seq_init(s);
10157 }
10158
10159 void trace_init_global_iter(struct trace_iterator *iter)
10160 {
10161         iter->tr = &global_trace;
10162         iter->trace = iter->tr->current_trace;
10163         iter->cpu_file = RING_BUFFER_ALL_CPUS;
10164         iter->array_buffer = &global_trace.array_buffer;
10165
10166         if (iter->trace && iter->trace->open)
10167                 iter->trace->open(iter);
10168
10169         /* Annotate start of buffers if we had overruns */
10170         if (ring_buffer_overruns(iter->array_buffer->buffer))
10171                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10172
10173         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10174         if (trace_clocks[iter->tr->clock_id].in_ns)
10175                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10176
10177         /* Can not use kmalloc for iter.temp and iter.fmt */
10178         iter->temp = static_temp_buf;
10179         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10180         iter->fmt = static_fmt_buf;
10181         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10182 }
10183
10184 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10185 {
10186         /* use static because iter can be a bit big for the stack */
10187         static struct trace_iterator iter;
10188         static atomic_t dump_running;
10189         struct trace_array *tr = &global_trace;
10190         unsigned int old_userobj;
10191         unsigned long flags;
10192         int cnt = 0, cpu;
10193
10194         /* Only allow one dump user at a time. */
10195         if (atomic_inc_return(&dump_running) != 1) {
10196                 atomic_dec(&dump_running);
10197                 return;
10198         }
10199
10200         /*
10201          * Always turn off tracing when we dump.
10202          * We don't need to show trace output of what happens
10203          * between multiple crashes.
10204          *
10205          * If the user does a sysrq-z, then they can re-enable
10206          * tracing with echo 1 > tracing_on.
10207          */
10208         tracing_off();
10209
10210         local_irq_save(flags);
10211
10212         /* Simulate the iterator */
10213         trace_init_global_iter(&iter);
10214
10215         for_each_tracing_cpu(cpu) {
10216                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10217         }
10218
10219         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10220
10221         /* don't look at user memory in panic mode */
10222         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10223
10224         switch (oops_dump_mode) {
10225         case DUMP_ALL:
10226                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10227                 break;
10228         case DUMP_ORIG:
10229                 iter.cpu_file = raw_smp_processor_id();
10230                 break;
10231         case DUMP_NONE:
10232                 goto out_enable;
10233         default:
10234                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10235                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10236         }
10237
10238         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10239
10240         /* Did function tracer already get disabled? */
10241         if (ftrace_is_dead()) {
10242                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10243                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10244         }
10245
10246         /*
10247          * We need to stop all tracing on all CPUS to read
10248          * the next buffer. This is a bit expensive, but is
10249          * not done often. We fill all what we can read,
10250          * and then release the locks again.
10251          */
10252
10253         while (!trace_empty(&iter)) {
10254
10255                 if (!cnt)
10256                         printk(KERN_TRACE "---------------------------------\n");
10257
10258                 cnt++;
10259
10260                 trace_iterator_reset(&iter);
10261                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10262
10263                 if (trace_find_next_entry_inc(&iter) != NULL) {
10264                         int ret;
10265
10266                         ret = print_trace_line(&iter);
10267                         if (ret != TRACE_TYPE_NO_CONSUME)
10268                                 trace_consume(&iter);
10269                 }
10270                 touch_nmi_watchdog();
10271
10272                 trace_printk_seq(&iter.seq);
10273         }
10274
10275         if (!cnt)
10276                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10277         else
10278                 printk(KERN_TRACE "---------------------------------\n");
10279
10280  out_enable:
10281         tr->trace_flags |= old_userobj;
10282
10283         for_each_tracing_cpu(cpu) {
10284                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10285         }
10286         atomic_dec(&dump_running);
10287         local_irq_restore(flags);
10288 }
10289 EXPORT_SYMBOL_GPL(ftrace_dump);
10290
10291 #define WRITE_BUFSIZE  4096
10292
10293 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10294                                 size_t count, loff_t *ppos,
10295                                 int (*createfn)(const char *))
10296 {
10297         char *kbuf, *buf, *tmp;
10298         int ret = 0;
10299         size_t done = 0;
10300         size_t size;
10301
10302         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10303         if (!kbuf)
10304                 return -ENOMEM;
10305
10306         while (done < count) {
10307                 size = count - done;
10308
10309                 if (size >= WRITE_BUFSIZE)
10310                         size = WRITE_BUFSIZE - 1;
10311
10312                 if (copy_from_user(kbuf, buffer + done, size)) {
10313                         ret = -EFAULT;
10314                         goto out;
10315                 }
10316                 kbuf[size] = '\0';
10317                 buf = kbuf;
10318                 do {
10319                         tmp = strchr(buf, '\n');
10320                         if (tmp) {
10321                                 *tmp = '\0';
10322                                 size = tmp - buf + 1;
10323                         } else {
10324                                 size = strlen(buf);
10325                                 if (done + size < count) {
10326                                         if (buf != kbuf)
10327                                                 break;
10328                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10329                                         pr_warn("Line length is too long: Should be less than %d\n",
10330                                                 WRITE_BUFSIZE - 2);
10331                                         ret = -EINVAL;
10332                                         goto out;
10333                                 }
10334                         }
10335                         done += size;
10336
10337                         /* Remove comments */
10338                         tmp = strchr(buf, '#');
10339
10340                         if (tmp)
10341                                 *tmp = '\0';
10342
10343                         ret = createfn(buf);
10344                         if (ret)
10345                                 goto out;
10346                         buf += size;
10347
10348                 } while (done < count);
10349         }
10350         ret = done;
10351
10352 out:
10353         kfree(kbuf);
10354
10355         return ret;
10356 }
10357
10358 #ifdef CONFIG_TRACER_MAX_TRACE
10359 __init static bool tr_needs_alloc_snapshot(const char *name)
10360 {
10361         char *test;
10362         int len = strlen(name);
10363         bool ret;
10364
10365         if (!boot_snapshot_index)
10366                 return false;
10367
10368         if (strncmp(name, boot_snapshot_info, len) == 0 &&
10369             boot_snapshot_info[len] == '\t')
10370                 return true;
10371
10372         test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10373         if (!test)
10374                 return false;
10375
10376         sprintf(test, "\t%s\t", name);
10377         ret = strstr(boot_snapshot_info, test) == NULL;
10378         kfree(test);
10379         return ret;
10380 }
10381
10382 __init static void do_allocate_snapshot(const char *name)
10383 {
10384         if (!tr_needs_alloc_snapshot(name))
10385                 return;
10386
10387         /*
10388          * When allocate_snapshot is set, the next call to
10389          * allocate_trace_buffers() (called by trace_array_get_by_name())
10390          * will allocate the snapshot buffer. That will alse clear
10391          * this flag.
10392          */
10393         allocate_snapshot = true;
10394 }
10395 #else
10396 static inline void do_allocate_snapshot(const char *name) { }
10397 #endif
10398
10399 __init static void enable_instances(void)
10400 {
10401         struct trace_array *tr;
10402         char *curr_str;
10403         char *str;
10404         char *tok;
10405
10406         /* A tab is always appended */
10407         boot_instance_info[boot_instance_index - 1] = '\0';
10408         str = boot_instance_info;
10409
10410         while ((curr_str = strsep(&str, "\t"))) {
10411
10412                 tok = strsep(&curr_str, ",");
10413
10414                 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10415                         do_allocate_snapshot(tok);
10416
10417                 tr = trace_array_get_by_name(tok);
10418                 if (!tr) {
10419                         pr_warn("Failed to create instance buffer %s\n", curr_str);
10420                         continue;
10421                 }
10422                 /* Allow user space to delete it */
10423                 trace_array_put(tr);
10424
10425                 while ((tok = strsep(&curr_str, ","))) {
10426                         early_enable_events(tr, tok, true);
10427                 }
10428         }
10429 }
10430
10431 __init static int tracer_alloc_buffers(void)
10432 {
10433         int ring_buf_size;
10434         int ret = -ENOMEM;
10435
10436
10437         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10438                 pr_warn("Tracing disabled due to lockdown\n");
10439                 return -EPERM;
10440         }
10441
10442         /*
10443          * Make sure we don't accidentally add more trace options
10444          * than we have bits for.
10445          */
10446         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10447
10448         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10449                 goto out;
10450
10451         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10452                 goto out_free_buffer_mask;
10453
10454         /* Only allocate trace_printk buffers if a trace_printk exists */
10455         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10456                 /* Must be called before global_trace.buffer is allocated */
10457                 trace_printk_init_buffers();
10458
10459         /* To save memory, keep the ring buffer size to its minimum */
10460         if (ring_buffer_expanded)
10461                 ring_buf_size = trace_buf_size;
10462         else
10463                 ring_buf_size = 1;
10464
10465         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10466         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10467
10468         raw_spin_lock_init(&global_trace.start_lock);
10469
10470         /*
10471          * The prepare callbacks allocates some memory for the ring buffer. We
10472          * don't free the buffer if the CPU goes down. If we were to free
10473          * the buffer, then the user would lose any trace that was in the
10474          * buffer. The memory will be removed once the "instance" is removed.
10475          */
10476         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10477                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10478                                       NULL);
10479         if (ret < 0)
10480                 goto out_free_cpumask;
10481         /* Used for event triggers */
10482         ret = -ENOMEM;
10483         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10484         if (!temp_buffer)
10485                 goto out_rm_hp_state;
10486
10487         if (trace_create_savedcmd() < 0)
10488                 goto out_free_temp_buffer;
10489
10490         if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10491                 goto out_free_savedcmd;
10492
10493         /* TODO: make the number of buffers hot pluggable with CPUS */
10494         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10495                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10496                 goto out_free_pipe_cpumask;
10497         }
10498         if (global_trace.buffer_disabled)
10499                 tracing_off();
10500
10501         if (trace_boot_clock) {
10502                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10503                 if (ret < 0)
10504                         pr_warn("Trace clock %s not defined, going back to default\n",
10505                                 trace_boot_clock);
10506         }
10507
10508         /*
10509          * register_tracer() might reference current_trace, so it
10510          * needs to be set before we register anything. This is
10511          * just a bootstrap of current_trace anyway.
10512          */
10513         global_trace.current_trace = &nop_trace;
10514
10515         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10516
10517         ftrace_init_global_array_ops(&global_trace);
10518
10519         init_trace_flags_index(&global_trace);
10520
10521         register_tracer(&nop_trace);
10522
10523         /* Function tracing may start here (via kernel command line) */
10524         init_function_trace();
10525
10526         /* All seems OK, enable tracing */
10527         tracing_disabled = 0;
10528
10529         atomic_notifier_chain_register(&panic_notifier_list,
10530                                        &trace_panic_notifier);
10531
10532         register_die_notifier(&trace_die_notifier);
10533
10534         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10535
10536         INIT_LIST_HEAD(&global_trace.systems);
10537         INIT_LIST_HEAD(&global_trace.events);
10538         INIT_LIST_HEAD(&global_trace.hist_vars);
10539         INIT_LIST_HEAD(&global_trace.err_log);
10540         list_add(&global_trace.list, &ftrace_trace_arrays);
10541
10542         apply_trace_boot_options();
10543
10544         register_snapshot_cmd();
10545
10546         test_can_verify();
10547
10548         return 0;
10549
10550 out_free_pipe_cpumask:
10551         free_cpumask_var(global_trace.pipe_cpumask);
10552 out_free_savedcmd:
10553         free_saved_cmdlines_buffer(savedcmd);
10554 out_free_temp_buffer:
10555         ring_buffer_free(temp_buffer);
10556 out_rm_hp_state:
10557         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10558 out_free_cpumask:
10559         free_cpumask_var(global_trace.tracing_cpumask);
10560 out_free_buffer_mask:
10561         free_cpumask_var(tracing_buffer_mask);
10562 out:
10563         return ret;
10564 }
10565
10566 void __init ftrace_boot_snapshot(void)
10567 {
10568 #ifdef CONFIG_TRACER_MAX_TRACE
10569         struct trace_array *tr;
10570
10571         if (!snapshot_at_boot)
10572                 return;
10573
10574         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10575                 if (!tr->allocated_snapshot)
10576                         continue;
10577
10578                 tracing_snapshot_instance(tr);
10579                 trace_array_puts(tr, "** Boot snapshot taken **\n");
10580         }
10581 #endif
10582 }
10583
10584 void __init early_trace_init(void)
10585 {
10586         if (tracepoint_printk) {
10587                 tracepoint_print_iter =
10588                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10589                 if (MEM_FAIL(!tracepoint_print_iter,
10590                              "Failed to allocate trace iterator\n"))
10591                         tracepoint_printk = 0;
10592                 else
10593                         static_key_enable(&tracepoint_printk_key.key);
10594         }
10595         tracer_alloc_buffers();
10596
10597         init_events();
10598 }
10599
10600 void __init trace_init(void)
10601 {
10602         trace_event_init();
10603
10604         if (boot_instance_index)
10605                 enable_instances();
10606 }
10607
10608 __init static void clear_boot_tracer(void)
10609 {
10610         /*
10611          * The default tracer at boot buffer is an init section.
10612          * This function is called in lateinit. If we did not
10613          * find the boot tracer, then clear it out, to prevent
10614          * later registration from accessing the buffer that is
10615          * about to be freed.
10616          */
10617         if (!default_bootup_tracer)
10618                 return;
10619
10620         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10621                default_bootup_tracer);
10622         default_bootup_tracer = NULL;
10623 }
10624
10625 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10626 __init static void tracing_set_default_clock(void)
10627 {
10628         /* sched_clock_stable() is determined in late_initcall */
10629         if (!trace_boot_clock && !sched_clock_stable()) {
10630                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10631                         pr_warn("Can not set tracing clock due to lockdown\n");
10632                         return;
10633                 }
10634
10635                 printk(KERN_WARNING
10636                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10637                        "If you want to keep using the local clock, then add:\n"
10638                        "  \"trace_clock=local\"\n"
10639                        "on the kernel command line\n");
10640                 tracing_set_clock(&global_trace, "global");
10641         }
10642 }
10643 #else
10644 static inline void tracing_set_default_clock(void) { }
10645 #endif
10646
10647 __init static int late_trace_init(void)
10648 {
10649         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10650                 static_key_disable(&tracepoint_printk_key.key);
10651                 tracepoint_printk = 0;
10652         }
10653
10654         tracing_set_default_clock();
10655         clear_boot_tracer();
10656         return 0;
10657 }
10658
10659 late_initcall_sync(late_trace_init);