24a5ea9a2cc04d7cc479ac697a96e09d4a15c6f3
[platform/kernel/linux-rpi.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188
189 static int __init set_cmdline_ftrace(char *str)
190 {
191         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192         default_bootup_tracer = bootup_tracer_buf;
193         /* We are using ftrace early, expand it */
194         ring_buffer_expanded = true;
195         return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201         if (*str++ != '=' || !*str || !strcmp("1", str)) {
202                 ftrace_dump_on_oops = DUMP_ALL;
203                 return 1;
204         }
205
206         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207                 ftrace_dump_on_oops = DUMP_ORIG;
208                 return 1;
209         }
210
211         return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214
215 static int __init stop_trace_on_warning(char *str)
216 {
217         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218                 __disable_trace_on_warning = 1;
219         return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222
223 static int __init boot_alloc_snapshot(char *str)
224 {
225         allocate_snapshot = true;
226         /* We also need the main ring buffer expanded */
227         ring_buffer_expanded = true;
228         return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231
232
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234
235 static int __init set_trace_boot_options(char *str)
236 {
237         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238         return 1;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244
245 static int __init set_trace_boot_clock(char *str)
246 {
247         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248         trace_boot_clock = trace_boot_clock_buf;
249         return 1;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252
253 static int __init set_tracepoint_printk(char *str)
254 {
255         /* Ignore the "tp_printk_stop_on_boot" param */
256         if (*str == '_')
257                 return 0;
258
259         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
260                 tracepoint_printk = 1;
261         return 1;
262 }
263 __setup("tp_printk", set_tracepoint_printk);
264
265 static int __init set_tracepoint_printk_stop(char *str)
266 {
267         tracepoint_printk_stop_on_boot = true;
268         return 1;
269 }
270 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
271
272 unsigned long long ns2usecs(u64 nsec)
273 {
274         nsec += 500;
275         do_div(nsec, 1000);
276         return nsec;
277 }
278
279 static void
280 trace_process_export(struct trace_export *export,
281                struct ring_buffer_event *event, int flag)
282 {
283         struct trace_entry *entry;
284         unsigned int size = 0;
285
286         if (export->flags & flag) {
287                 entry = ring_buffer_event_data(event);
288                 size = ring_buffer_event_length(event);
289                 export->write(export, entry, size);
290         }
291 }
292
293 static DEFINE_MUTEX(ftrace_export_lock);
294
295 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
296
297 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
298 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
299 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
300
301 static inline void ftrace_exports_enable(struct trace_export *export)
302 {
303         if (export->flags & TRACE_EXPORT_FUNCTION)
304                 static_branch_inc(&trace_function_exports_enabled);
305
306         if (export->flags & TRACE_EXPORT_EVENT)
307                 static_branch_inc(&trace_event_exports_enabled);
308
309         if (export->flags & TRACE_EXPORT_MARKER)
310                 static_branch_inc(&trace_marker_exports_enabled);
311 }
312
313 static inline void ftrace_exports_disable(struct trace_export *export)
314 {
315         if (export->flags & TRACE_EXPORT_FUNCTION)
316                 static_branch_dec(&trace_function_exports_enabled);
317
318         if (export->flags & TRACE_EXPORT_EVENT)
319                 static_branch_dec(&trace_event_exports_enabled);
320
321         if (export->flags & TRACE_EXPORT_MARKER)
322                 static_branch_dec(&trace_marker_exports_enabled);
323 }
324
325 static void ftrace_exports(struct ring_buffer_event *event, int flag)
326 {
327         struct trace_export *export;
328
329         preempt_disable_notrace();
330
331         export = rcu_dereference_raw_check(ftrace_exports_list);
332         while (export) {
333                 trace_process_export(export, event, flag);
334                 export = rcu_dereference_raw_check(export->next);
335         }
336
337         preempt_enable_notrace();
338 }
339
340 static inline void
341 add_trace_export(struct trace_export **list, struct trace_export *export)
342 {
343         rcu_assign_pointer(export->next, *list);
344         /*
345          * We are entering export into the list but another
346          * CPU might be walking that list. We need to make sure
347          * the export->next pointer is valid before another CPU sees
348          * the export pointer included into the list.
349          */
350         rcu_assign_pointer(*list, export);
351 }
352
353 static inline int
354 rm_trace_export(struct trace_export **list, struct trace_export *export)
355 {
356         struct trace_export **p;
357
358         for (p = list; *p != NULL; p = &(*p)->next)
359                 if (*p == export)
360                         break;
361
362         if (*p != export)
363                 return -1;
364
365         rcu_assign_pointer(*p, (*p)->next);
366
367         return 0;
368 }
369
370 static inline void
371 add_ftrace_export(struct trace_export **list, struct trace_export *export)
372 {
373         ftrace_exports_enable(export);
374
375         add_trace_export(list, export);
376 }
377
378 static inline int
379 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
380 {
381         int ret;
382
383         ret = rm_trace_export(list, export);
384         ftrace_exports_disable(export);
385
386         return ret;
387 }
388
389 int register_ftrace_export(struct trace_export *export)
390 {
391         if (WARN_ON_ONCE(!export->write))
392                 return -1;
393
394         mutex_lock(&ftrace_export_lock);
395
396         add_ftrace_export(&ftrace_exports_list, export);
397
398         mutex_unlock(&ftrace_export_lock);
399
400         return 0;
401 }
402 EXPORT_SYMBOL_GPL(register_ftrace_export);
403
404 int unregister_ftrace_export(struct trace_export *export)
405 {
406         int ret;
407
408         mutex_lock(&ftrace_export_lock);
409
410         ret = rm_ftrace_export(&ftrace_exports_list, export);
411
412         mutex_unlock(&ftrace_export_lock);
413
414         return ret;
415 }
416 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
417
418 /* trace_flags holds trace_options default values */
419 #define TRACE_DEFAULT_FLAGS                                             \
420         (FUNCTION_DEFAULT_FLAGS |                                       \
421          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
422          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
423          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
424          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
425          TRACE_ITER_HASH_PTR)
426
427 /* trace_options that are only supported by global_trace */
428 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
429                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
430
431 /* trace_flags that are default zero for instances */
432 #define ZEROED_TRACE_FLAGS \
433         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
434
435 /*
436  * The global_trace is the descriptor that holds the top-level tracing
437  * buffers for the live tracing.
438  */
439 static struct trace_array global_trace = {
440         .trace_flags = TRACE_DEFAULT_FLAGS,
441 };
442
443 LIST_HEAD(ftrace_trace_arrays);
444
445 int trace_array_get(struct trace_array *this_tr)
446 {
447         struct trace_array *tr;
448         int ret = -ENODEV;
449
450         mutex_lock(&trace_types_lock);
451         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
452                 if (tr == this_tr) {
453                         tr->ref++;
454                         ret = 0;
455                         break;
456                 }
457         }
458         mutex_unlock(&trace_types_lock);
459
460         return ret;
461 }
462
463 static void __trace_array_put(struct trace_array *this_tr)
464 {
465         WARN_ON(!this_tr->ref);
466         this_tr->ref--;
467 }
468
469 /**
470  * trace_array_put - Decrement the reference counter for this trace array.
471  * @this_tr : pointer to the trace array
472  *
473  * NOTE: Use this when we no longer need the trace array returned by
474  * trace_array_get_by_name(). This ensures the trace array can be later
475  * destroyed.
476  *
477  */
478 void trace_array_put(struct trace_array *this_tr)
479 {
480         if (!this_tr)
481                 return;
482
483         mutex_lock(&trace_types_lock);
484         __trace_array_put(this_tr);
485         mutex_unlock(&trace_types_lock);
486 }
487 EXPORT_SYMBOL_GPL(trace_array_put);
488
489 int tracing_check_open_get_tr(struct trace_array *tr)
490 {
491         int ret;
492
493         ret = security_locked_down(LOCKDOWN_TRACEFS);
494         if (ret)
495                 return ret;
496
497         if (tracing_disabled)
498                 return -ENODEV;
499
500         if (tr && trace_array_get(tr) < 0)
501                 return -ENODEV;
502
503         return 0;
504 }
505
506 int call_filter_check_discard(struct trace_event_call *call, void *rec,
507                               struct trace_buffer *buffer,
508                               struct ring_buffer_event *event)
509 {
510         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
511             !filter_match_preds(call->filter, rec)) {
512                 __trace_event_discard_commit(buffer, event);
513                 return 1;
514         }
515
516         return 0;
517 }
518
519 /**
520  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
521  * @filtered_pids: The list of pids to check
522  * @search_pid: The PID to find in @filtered_pids
523  *
524  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
525  */
526 bool
527 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
528 {
529         return trace_pid_list_is_set(filtered_pids, search_pid);
530 }
531
532 /**
533  * trace_ignore_this_task - should a task be ignored for tracing
534  * @filtered_pids: The list of pids to check
535  * @filtered_no_pids: The list of pids not to be traced
536  * @task: The task that should be ignored if not filtered
537  *
538  * Checks if @task should be traced or not from @filtered_pids.
539  * Returns true if @task should *NOT* be traced.
540  * Returns false if @task should be traced.
541  */
542 bool
543 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
544                        struct trace_pid_list *filtered_no_pids,
545                        struct task_struct *task)
546 {
547         /*
548          * If filtered_no_pids is not empty, and the task's pid is listed
549          * in filtered_no_pids, then return true.
550          * Otherwise, if filtered_pids is empty, that means we can
551          * trace all tasks. If it has content, then only trace pids
552          * within filtered_pids.
553          */
554
555         return (filtered_pids &&
556                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
557                 (filtered_no_pids &&
558                  trace_find_filtered_pid(filtered_no_pids, task->pid));
559 }
560
561 /**
562  * trace_filter_add_remove_task - Add or remove a task from a pid_list
563  * @pid_list: The list to modify
564  * @self: The current task for fork or NULL for exit
565  * @task: The task to add or remove
566  *
567  * If adding a task, if @self is defined, the task is only added if @self
568  * is also included in @pid_list. This happens on fork and tasks should
569  * only be added when the parent is listed. If @self is NULL, then the
570  * @task pid will be removed from the list, which would happen on exit
571  * of a task.
572  */
573 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
574                                   struct task_struct *self,
575                                   struct task_struct *task)
576 {
577         if (!pid_list)
578                 return;
579
580         /* For forks, we only add if the forking task is listed */
581         if (self) {
582                 if (!trace_find_filtered_pid(pid_list, self->pid))
583                         return;
584         }
585
586         /* "self" is set for forks, and NULL for exits */
587         if (self)
588                 trace_pid_list_set(pid_list, task->pid);
589         else
590                 trace_pid_list_clear(pid_list, task->pid);
591 }
592
593 /**
594  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
595  * @pid_list: The pid list to show
596  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
597  * @pos: The position of the file
598  *
599  * This is used by the seq_file "next" operation to iterate the pids
600  * listed in a trace_pid_list structure.
601  *
602  * Returns the pid+1 as we want to display pid of zero, but NULL would
603  * stop the iteration.
604  */
605 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
606 {
607         long pid = (unsigned long)v;
608         unsigned int next;
609
610         (*pos)++;
611
612         /* pid already is +1 of the actual previous bit */
613         if (trace_pid_list_next(pid_list, pid, &next) < 0)
614                 return NULL;
615
616         pid = next;
617
618         /* Return pid + 1 to allow zero to be represented */
619         return (void *)(pid + 1);
620 }
621
622 /**
623  * trace_pid_start - Used for seq_file to start reading pid lists
624  * @pid_list: The pid list to show
625  * @pos: The position of the file
626  *
627  * This is used by seq_file "start" operation to start the iteration
628  * of listing pids.
629  *
630  * Returns the pid+1 as we want to display pid of zero, but NULL would
631  * stop the iteration.
632  */
633 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
634 {
635         unsigned long pid;
636         unsigned int first;
637         loff_t l = 0;
638
639         if (trace_pid_list_first(pid_list, &first) < 0)
640                 return NULL;
641
642         pid = first;
643
644         /* Return pid + 1 so that zero can be the exit value */
645         for (pid++; pid && l < *pos;
646              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
647                 ;
648         return (void *)pid;
649 }
650
651 /**
652  * trace_pid_show - show the current pid in seq_file processing
653  * @m: The seq_file structure to write into
654  * @v: A void pointer of the pid (+1) value to display
655  *
656  * Can be directly used by seq_file operations to display the current
657  * pid value.
658  */
659 int trace_pid_show(struct seq_file *m, void *v)
660 {
661         unsigned long pid = (unsigned long)v - 1;
662
663         seq_printf(m, "%lu\n", pid);
664         return 0;
665 }
666
667 /* 128 should be much more than enough */
668 #define PID_BUF_SIZE            127
669
670 int trace_pid_write(struct trace_pid_list *filtered_pids,
671                     struct trace_pid_list **new_pid_list,
672                     const char __user *ubuf, size_t cnt)
673 {
674         struct trace_pid_list *pid_list;
675         struct trace_parser parser;
676         unsigned long val;
677         int nr_pids = 0;
678         ssize_t read = 0;
679         ssize_t ret;
680         loff_t pos;
681         pid_t pid;
682
683         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
684                 return -ENOMEM;
685
686         /*
687          * Always recreate a new array. The write is an all or nothing
688          * operation. Always create a new array when adding new pids by
689          * the user. If the operation fails, then the current list is
690          * not modified.
691          */
692         pid_list = trace_pid_list_alloc();
693         if (!pid_list) {
694                 trace_parser_put(&parser);
695                 return -ENOMEM;
696         }
697
698         if (filtered_pids) {
699                 /* copy the current bits to the new max */
700                 ret = trace_pid_list_first(filtered_pids, &pid);
701                 while (!ret) {
702                         trace_pid_list_set(pid_list, pid);
703                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
704                         nr_pids++;
705                 }
706         }
707
708         ret = 0;
709         while (cnt > 0) {
710
711                 pos = 0;
712
713                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
714                 if (ret < 0)
715                         break;
716
717                 read += ret;
718                 ubuf += ret;
719                 cnt -= ret;
720
721                 if (!trace_parser_loaded(&parser))
722                         break;
723
724                 ret = -EINVAL;
725                 if (kstrtoul(parser.buffer, 0, &val))
726                         break;
727
728                 pid = (pid_t)val;
729
730                 if (trace_pid_list_set(pid_list, pid) < 0) {
731                         ret = -1;
732                         break;
733                 }
734                 nr_pids++;
735
736                 trace_parser_clear(&parser);
737                 ret = 0;
738         }
739         trace_parser_put(&parser);
740
741         if (ret < 0) {
742                 trace_pid_list_free(pid_list);
743                 return ret;
744         }
745
746         if (!nr_pids) {
747                 /* Cleared the list of pids */
748                 trace_pid_list_free(pid_list);
749                 pid_list = NULL;
750         }
751
752         *new_pid_list = pid_list;
753
754         return read;
755 }
756
757 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
758 {
759         u64 ts;
760
761         /* Early boot up does not have a buffer yet */
762         if (!buf->buffer)
763                 return trace_clock_local();
764
765         ts = ring_buffer_time_stamp(buf->buffer);
766         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
767
768         return ts;
769 }
770
771 u64 ftrace_now(int cpu)
772 {
773         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
774 }
775
776 /**
777  * tracing_is_enabled - Show if global_trace has been enabled
778  *
779  * Shows if the global trace has been enabled or not. It uses the
780  * mirror flag "buffer_disabled" to be used in fast paths such as for
781  * the irqsoff tracer. But it may be inaccurate due to races. If you
782  * need to know the accurate state, use tracing_is_on() which is a little
783  * slower, but accurate.
784  */
785 int tracing_is_enabled(void)
786 {
787         /*
788          * For quick access (irqsoff uses this in fast path), just
789          * return the mirror variable of the state of the ring buffer.
790          * It's a little racy, but we don't really care.
791          */
792         smp_rmb();
793         return !global_trace.buffer_disabled;
794 }
795
796 /*
797  * trace_buf_size is the size in bytes that is allocated
798  * for a buffer. Note, the number of bytes is always rounded
799  * to page size.
800  *
801  * This number is purposely set to a low number of 16384.
802  * If the dump on oops happens, it will be much appreciated
803  * to not have to wait for all that output. Anyway this can be
804  * boot time and run time configurable.
805  */
806 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
807
808 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
809
810 /* trace_types holds a link list of available tracers. */
811 static struct tracer            *trace_types __read_mostly;
812
813 /*
814  * trace_types_lock is used to protect the trace_types list.
815  */
816 DEFINE_MUTEX(trace_types_lock);
817
818 /*
819  * serialize the access of the ring buffer
820  *
821  * ring buffer serializes readers, but it is low level protection.
822  * The validity of the events (which returns by ring_buffer_peek() ..etc)
823  * are not protected by ring buffer.
824  *
825  * The content of events may become garbage if we allow other process consumes
826  * these events concurrently:
827  *   A) the page of the consumed events may become a normal page
828  *      (not reader page) in ring buffer, and this page will be rewritten
829  *      by events producer.
830  *   B) The page of the consumed events may become a page for splice_read,
831  *      and this page will be returned to system.
832  *
833  * These primitives allow multi process access to different cpu ring buffer
834  * concurrently.
835  *
836  * These primitives don't distinguish read-only and read-consume access.
837  * Multi read-only access are also serialized.
838  */
839
840 #ifdef CONFIG_SMP
841 static DECLARE_RWSEM(all_cpu_access_lock);
842 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
843
844 static inline void trace_access_lock(int cpu)
845 {
846         if (cpu == RING_BUFFER_ALL_CPUS) {
847                 /* gain it for accessing the whole ring buffer. */
848                 down_write(&all_cpu_access_lock);
849         } else {
850                 /* gain it for accessing a cpu ring buffer. */
851
852                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
853                 down_read(&all_cpu_access_lock);
854
855                 /* Secondly block other access to this @cpu ring buffer. */
856                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
857         }
858 }
859
860 static inline void trace_access_unlock(int cpu)
861 {
862         if (cpu == RING_BUFFER_ALL_CPUS) {
863                 up_write(&all_cpu_access_lock);
864         } else {
865                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
866                 up_read(&all_cpu_access_lock);
867         }
868 }
869
870 static inline void trace_access_lock_init(void)
871 {
872         int cpu;
873
874         for_each_possible_cpu(cpu)
875                 mutex_init(&per_cpu(cpu_access_lock, cpu));
876 }
877
878 #else
879
880 static DEFINE_MUTEX(access_lock);
881
882 static inline void trace_access_lock(int cpu)
883 {
884         (void)cpu;
885         mutex_lock(&access_lock);
886 }
887
888 static inline void trace_access_unlock(int cpu)
889 {
890         (void)cpu;
891         mutex_unlock(&access_lock);
892 }
893
894 static inline void trace_access_lock_init(void)
895 {
896 }
897
898 #endif
899
900 #ifdef CONFIG_STACKTRACE
901 static void __ftrace_trace_stack(struct trace_buffer *buffer,
902                                  unsigned int trace_ctx,
903                                  int skip, struct pt_regs *regs);
904 static inline void ftrace_trace_stack(struct trace_array *tr,
905                                       struct trace_buffer *buffer,
906                                       unsigned int trace_ctx,
907                                       int skip, struct pt_regs *regs);
908
909 #else
910 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
911                                         unsigned int trace_ctx,
912                                         int skip, struct pt_regs *regs)
913 {
914 }
915 static inline void ftrace_trace_stack(struct trace_array *tr,
916                                       struct trace_buffer *buffer,
917                                       unsigned long trace_ctx,
918                                       int skip, struct pt_regs *regs)
919 {
920 }
921
922 #endif
923
924 static __always_inline void
925 trace_event_setup(struct ring_buffer_event *event,
926                   int type, unsigned int trace_ctx)
927 {
928         struct trace_entry *ent = ring_buffer_event_data(event);
929
930         tracing_generic_entry_update(ent, type, trace_ctx);
931 }
932
933 static __always_inline struct ring_buffer_event *
934 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
935                           int type,
936                           unsigned long len,
937                           unsigned int trace_ctx)
938 {
939         struct ring_buffer_event *event;
940
941         event = ring_buffer_lock_reserve(buffer, len);
942         if (event != NULL)
943                 trace_event_setup(event, type, trace_ctx);
944
945         return event;
946 }
947
948 void tracer_tracing_on(struct trace_array *tr)
949 {
950         if (tr->array_buffer.buffer)
951                 ring_buffer_record_on(tr->array_buffer.buffer);
952         /*
953          * This flag is looked at when buffers haven't been allocated
954          * yet, or by some tracers (like irqsoff), that just want to
955          * know if the ring buffer has been disabled, but it can handle
956          * races of where it gets disabled but we still do a record.
957          * As the check is in the fast path of the tracers, it is more
958          * important to be fast than accurate.
959          */
960         tr->buffer_disabled = 0;
961         /* Make the flag seen by readers */
962         smp_wmb();
963 }
964
965 /**
966  * tracing_on - enable tracing buffers
967  *
968  * This function enables tracing buffers that may have been
969  * disabled with tracing_off.
970  */
971 void tracing_on(void)
972 {
973         tracer_tracing_on(&global_trace);
974 }
975 EXPORT_SYMBOL_GPL(tracing_on);
976
977
978 static __always_inline void
979 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
980 {
981         __this_cpu_write(trace_taskinfo_save, true);
982
983         /* If this is the temp buffer, we need to commit fully */
984         if (this_cpu_read(trace_buffered_event) == event) {
985                 /* Length is in event->array[0] */
986                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
987                 /* Release the temp buffer */
988                 this_cpu_dec(trace_buffered_event_cnt);
989         } else
990                 ring_buffer_unlock_commit(buffer, event);
991 }
992
993 /**
994  * __trace_puts - write a constant string into the trace buffer.
995  * @ip:    The address of the caller
996  * @str:   The constant string to write
997  * @size:  The size of the string.
998  */
999 int __trace_puts(unsigned long ip, const char *str, int size)
1000 {
1001         struct ring_buffer_event *event;
1002         struct trace_buffer *buffer;
1003         struct print_entry *entry;
1004         unsigned int trace_ctx;
1005         int alloc;
1006
1007         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1008                 return 0;
1009
1010         if (unlikely(tracing_selftest_running || tracing_disabled))
1011                 return 0;
1012
1013         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1014
1015         trace_ctx = tracing_gen_ctx();
1016         buffer = global_trace.array_buffer.buffer;
1017         ring_buffer_nest_start(buffer);
1018         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1019                                             trace_ctx);
1020         if (!event) {
1021                 size = 0;
1022                 goto out;
1023         }
1024
1025         entry = ring_buffer_event_data(event);
1026         entry->ip = ip;
1027
1028         memcpy(&entry->buf, str, size);
1029
1030         /* Add a newline if necessary */
1031         if (entry->buf[size - 1] != '\n') {
1032                 entry->buf[size] = '\n';
1033                 entry->buf[size + 1] = '\0';
1034         } else
1035                 entry->buf[size] = '\0';
1036
1037         __buffer_unlock_commit(buffer, event);
1038         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1039  out:
1040         ring_buffer_nest_end(buffer);
1041         return size;
1042 }
1043 EXPORT_SYMBOL_GPL(__trace_puts);
1044
1045 /**
1046  * __trace_bputs - write the pointer to a constant string into trace buffer
1047  * @ip:    The address of the caller
1048  * @str:   The constant string to write to the buffer to
1049  */
1050 int __trace_bputs(unsigned long ip, const char *str)
1051 {
1052         struct ring_buffer_event *event;
1053         struct trace_buffer *buffer;
1054         struct bputs_entry *entry;
1055         unsigned int trace_ctx;
1056         int size = sizeof(struct bputs_entry);
1057         int ret = 0;
1058
1059         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1060                 return 0;
1061
1062         if (unlikely(tracing_selftest_running || tracing_disabled))
1063                 return 0;
1064
1065         trace_ctx = tracing_gen_ctx();
1066         buffer = global_trace.array_buffer.buffer;
1067
1068         ring_buffer_nest_start(buffer);
1069         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1070                                             trace_ctx);
1071         if (!event)
1072                 goto out;
1073
1074         entry = ring_buffer_event_data(event);
1075         entry->ip                       = ip;
1076         entry->str                      = str;
1077
1078         __buffer_unlock_commit(buffer, event);
1079         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1080
1081         ret = 1;
1082  out:
1083         ring_buffer_nest_end(buffer);
1084         return ret;
1085 }
1086 EXPORT_SYMBOL_GPL(__trace_bputs);
1087
1088 #ifdef CONFIG_TRACER_SNAPSHOT
1089 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1090                                            void *cond_data)
1091 {
1092         struct tracer *tracer = tr->current_trace;
1093         unsigned long flags;
1094
1095         if (in_nmi()) {
1096                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1097                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1098                 return;
1099         }
1100
1101         if (!tr->allocated_snapshot) {
1102                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1103                 internal_trace_puts("*** stopping trace here!   ***\n");
1104                 tracing_off();
1105                 return;
1106         }
1107
1108         /* Note, snapshot can not be used when the tracer uses it */
1109         if (tracer->use_max_tr) {
1110                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1111                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1112                 return;
1113         }
1114
1115         local_irq_save(flags);
1116         update_max_tr(tr, current, smp_processor_id(), cond_data);
1117         local_irq_restore(flags);
1118 }
1119
1120 void tracing_snapshot_instance(struct trace_array *tr)
1121 {
1122         tracing_snapshot_instance_cond(tr, NULL);
1123 }
1124
1125 /**
1126  * tracing_snapshot - take a snapshot of the current buffer.
1127  *
1128  * This causes a swap between the snapshot buffer and the current live
1129  * tracing buffer. You can use this to take snapshots of the live
1130  * trace when some condition is triggered, but continue to trace.
1131  *
1132  * Note, make sure to allocate the snapshot with either
1133  * a tracing_snapshot_alloc(), or by doing it manually
1134  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1135  *
1136  * If the snapshot buffer is not allocated, it will stop tracing.
1137  * Basically making a permanent snapshot.
1138  */
1139 void tracing_snapshot(void)
1140 {
1141         struct trace_array *tr = &global_trace;
1142
1143         tracing_snapshot_instance(tr);
1144 }
1145 EXPORT_SYMBOL_GPL(tracing_snapshot);
1146
1147 /**
1148  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1149  * @tr:         The tracing instance to snapshot
1150  * @cond_data:  The data to be tested conditionally, and possibly saved
1151  *
1152  * This is the same as tracing_snapshot() except that the snapshot is
1153  * conditional - the snapshot will only happen if the
1154  * cond_snapshot.update() implementation receiving the cond_data
1155  * returns true, which means that the trace array's cond_snapshot
1156  * update() operation used the cond_data to determine whether the
1157  * snapshot should be taken, and if it was, presumably saved it along
1158  * with the snapshot.
1159  */
1160 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1161 {
1162         tracing_snapshot_instance_cond(tr, cond_data);
1163 }
1164 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1165
1166 /**
1167  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1168  * @tr:         The tracing instance
1169  *
1170  * When the user enables a conditional snapshot using
1171  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1172  * with the snapshot.  This accessor is used to retrieve it.
1173  *
1174  * Should not be called from cond_snapshot.update(), since it takes
1175  * the tr->max_lock lock, which the code calling
1176  * cond_snapshot.update() has already done.
1177  *
1178  * Returns the cond_data associated with the trace array's snapshot.
1179  */
1180 void *tracing_cond_snapshot_data(struct trace_array *tr)
1181 {
1182         void *cond_data = NULL;
1183
1184         local_irq_disable();
1185         arch_spin_lock(&tr->max_lock);
1186
1187         if (tr->cond_snapshot)
1188                 cond_data = tr->cond_snapshot->cond_data;
1189
1190         arch_spin_unlock(&tr->max_lock);
1191         local_irq_enable();
1192
1193         return cond_data;
1194 }
1195 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1196
1197 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1198                                         struct array_buffer *size_buf, int cpu_id);
1199 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1200
1201 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1202 {
1203         int ret;
1204
1205         if (!tr->allocated_snapshot) {
1206
1207                 /* allocate spare buffer */
1208                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1209                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1210                 if (ret < 0)
1211                         return ret;
1212
1213                 tr->allocated_snapshot = true;
1214         }
1215
1216         return 0;
1217 }
1218
1219 static void free_snapshot(struct trace_array *tr)
1220 {
1221         /*
1222          * We don't free the ring buffer. instead, resize it because
1223          * The max_tr ring buffer has some state (e.g. ring->clock) and
1224          * we want preserve it.
1225          */
1226         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1227         set_buffer_entries(&tr->max_buffer, 1);
1228         tracing_reset_online_cpus(&tr->max_buffer);
1229         tr->allocated_snapshot = false;
1230 }
1231
1232 /**
1233  * tracing_alloc_snapshot - allocate snapshot buffer.
1234  *
1235  * This only allocates the snapshot buffer if it isn't already
1236  * allocated - it doesn't also take a snapshot.
1237  *
1238  * This is meant to be used in cases where the snapshot buffer needs
1239  * to be set up for events that can't sleep but need to be able to
1240  * trigger a snapshot.
1241  */
1242 int tracing_alloc_snapshot(void)
1243 {
1244         struct trace_array *tr = &global_trace;
1245         int ret;
1246
1247         ret = tracing_alloc_snapshot_instance(tr);
1248         WARN_ON(ret < 0);
1249
1250         return ret;
1251 }
1252 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1253
1254 /**
1255  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1256  *
1257  * This is similar to tracing_snapshot(), but it will allocate the
1258  * snapshot buffer if it isn't already allocated. Use this only
1259  * where it is safe to sleep, as the allocation may sleep.
1260  *
1261  * This causes a swap between the snapshot buffer and the current live
1262  * tracing buffer. You can use this to take snapshots of the live
1263  * trace when some condition is triggered, but continue to trace.
1264  */
1265 void tracing_snapshot_alloc(void)
1266 {
1267         int ret;
1268
1269         ret = tracing_alloc_snapshot();
1270         if (ret < 0)
1271                 return;
1272
1273         tracing_snapshot();
1274 }
1275 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1276
1277 /**
1278  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1279  * @tr:         The tracing instance
1280  * @cond_data:  User data to associate with the snapshot
1281  * @update:     Implementation of the cond_snapshot update function
1282  *
1283  * Check whether the conditional snapshot for the given instance has
1284  * already been enabled, or if the current tracer is already using a
1285  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1286  * save the cond_data and update function inside.
1287  *
1288  * Returns 0 if successful, error otherwise.
1289  */
1290 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1291                                  cond_update_fn_t update)
1292 {
1293         struct cond_snapshot *cond_snapshot;
1294         int ret = 0;
1295
1296         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1297         if (!cond_snapshot)
1298                 return -ENOMEM;
1299
1300         cond_snapshot->cond_data = cond_data;
1301         cond_snapshot->update = update;
1302
1303         mutex_lock(&trace_types_lock);
1304
1305         ret = tracing_alloc_snapshot_instance(tr);
1306         if (ret)
1307                 goto fail_unlock;
1308
1309         if (tr->current_trace->use_max_tr) {
1310                 ret = -EBUSY;
1311                 goto fail_unlock;
1312         }
1313
1314         /*
1315          * The cond_snapshot can only change to NULL without the
1316          * trace_types_lock. We don't care if we race with it going
1317          * to NULL, but we want to make sure that it's not set to
1318          * something other than NULL when we get here, which we can
1319          * do safely with only holding the trace_types_lock and not
1320          * having to take the max_lock.
1321          */
1322         if (tr->cond_snapshot) {
1323                 ret = -EBUSY;
1324                 goto fail_unlock;
1325         }
1326
1327         local_irq_disable();
1328         arch_spin_lock(&tr->max_lock);
1329         tr->cond_snapshot = cond_snapshot;
1330         arch_spin_unlock(&tr->max_lock);
1331         local_irq_enable();
1332
1333         mutex_unlock(&trace_types_lock);
1334
1335         return ret;
1336
1337  fail_unlock:
1338         mutex_unlock(&trace_types_lock);
1339         kfree(cond_snapshot);
1340         return ret;
1341 }
1342 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1343
1344 /**
1345  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1346  * @tr:         The tracing instance
1347  *
1348  * Check whether the conditional snapshot for the given instance is
1349  * enabled; if so, free the cond_snapshot associated with it,
1350  * otherwise return -EINVAL.
1351  *
1352  * Returns 0 if successful, error otherwise.
1353  */
1354 int tracing_snapshot_cond_disable(struct trace_array *tr)
1355 {
1356         int ret = 0;
1357
1358         local_irq_disable();
1359         arch_spin_lock(&tr->max_lock);
1360
1361         if (!tr->cond_snapshot)
1362                 ret = -EINVAL;
1363         else {
1364                 kfree(tr->cond_snapshot);
1365                 tr->cond_snapshot = NULL;
1366         }
1367
1368         arch_spin_unlock(&tr->max_lock);
1369         local_irq_enable();
1370
1371         return ret;
1372 }
1373 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1374 #else
1375 void tracing_snapshot(void)
1376 {
1377         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1378 }
1379 EXPORT_SYMBOL_GPL(tracing_snapshot);
1380 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1381 {
1382         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1383 }
1384 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1385 int tracing_alloc_snapshot(void)
1386 {
1387         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1388         return -ENODEV;
1389 }
1390 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1391 void tracing_snapshot_alloc(void)
1392 {
1393         /* Give warning */
1394         tracing_snapshot();
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1397 void *tracing_cond_snapshot_data(struct trace_array *tr)
1398 {
1399         return NULL;
1400 }
1401 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1402 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1403 {
1404         return -ENODEV;
1405 }
1406 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1407 int tracing_snapshot_cond_disable(struct trace_array *tr)
1408 {
1409         return false;
1410 }
1411 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1412 #endif /* CONFIG_TRACER_SNAPSHOT */
1413
1414 void tracer_tracing_off(struct trace_array *tr)
1415 {
1416         if (tr->array_buffer.buffer)
1417                 ring_buffer_record_off(tr->array_buffer.buffer);
1418         /*
1419          * This flag is looked at when buffers haven't been allocated
1420          * yet, or by some tracers (like irqsoff), that just want to
1421          * know if the ring buffer has been disabled, but it can handle
1422          * races of where it gets disabled but we still do a record.
1423          * As the check is in the fast path of the tracers, it is more
1424          * important to be fast than accurate.
1425          */
1426         tr->buffer_disabled = 1;
1427         /* Make the flag seen by readers */
1428         smp_wmb();
1429 }
1430
1431 /**
1432  * tracing_off - turn off tracing buffers
1433  *
1434  * This function stops the tracing buffers from recording data.
1435  * It does not disable any overhead the tracers themselves may
1436  * be causing. This function simply causes all recording to
1437  * the ring buffers to fail.
1438  */
1439 void tracing_off(void)
1440 {
1441         tracer_tracing_off(&global_trace);
1442 }
1443 EXPORT_SYMBOL_GPL(tracing_off);
1444
1445 void disable_trace_on_warning(void)
1446 {
1447         if (__disable_trace_on_warning) {
1448                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1449                         "Disabling tracing due to warning\n");
1450                 tracing_off();
1451         }
1452 }
1453
1454 /**
1455  * tracer_tracing_is_on - show real state of ring buffer enabled
1456  * @tr : the trace array to know if ring buffer is enabled
1457  *
1458  * Shows real state of the ring buffer if it is enabled or not.
1459  */
1460 bool tracer_tracing_is_on(struct trace_array *tr)
1461 {
1462         if (tr->array_buffer.buffer)
1463                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1464         return !tr->buffer_disabled;
1465 }
1466
1467 /**
1468  * tracing_is_on - show state of ring buffers enabled
1469  */
1470 int tracing_is_on(void)
1471 {
1472         return tracer_tracing_is_on(&global_trace);
1473 }
1474 EXPORT_SYMBOL_GPL(tracing_is_on);
1475
1476 static int __init set_buf_size(char *str)
1477 {
1478         unsigned long buf_size;
1479
1480         if (!str)
1481                 return 0;
1482         buf_size = memparse(str, &str);
1483         /*
1484          * nr_entries can not be zero and the startup
1485          * tests require some buffer space. Therefore
1486          * ensure we have at least 4096 bytes of buffer.
1487          */
1488         trace_buf_size = max(4096UL, buf_size);
1489         return 1;
1490 }
1491 __setup("trace_buf_size=", set_buf_size);
1492
1493 static int __init set_tracing_thresh(char *str)
1494 {
1495         unsigned long threshold;
1496         int ret;
1497
1498         if (!str)
1499                 return 0;
1500         ret = kstrtoul(str, 0, &threshold);
1501         if (ret < 0)
1502                 return 0;
1503         tracing_thresh = threshold * 1000;
1504         return 1;
1505 }
1506 __setup("tracing_thresh=", set_tracing_thresh);
1507
1508 unsigned long nsecs_to_usecs(unsigned long nsecs)
1509 {
1510         return nsecs / 1000;
1511 }
1512
1513 /*
1514  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1515  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1516  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1517  * of strings in the order that the evals (enum) were defined.
1518  */
1519 #undef C
1520 #define C(a, b) b
1521
1522 /* These must match the bit positions in trace_iterator_flags */
1523 static const char *trace_options[] = {
1524         TRACE_FLAGS
1525         NULL
1526 };
1527
1528 static struct {
1529         u64 (*func)(void);
1530         const char *name;
1531         int in_ns;              /* is this clock in nanoseconds? */
1532 } trace_clocks[] = {
1533         { trace_clock_local,            "local",        1 },
1534         { trace_clock_global,           "global",       1 },
1535         { trace_clock_counter,          "counter",      0 },
1536         { trace_clock_jiffies,          "uptime",       0 },
1537         { trace_clock,                  "perf",         1 },
1538         { ktime_get_mono_fast_ns,       "mono",         1 },
1539         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1540         { ktime_get_boot_fast_ns,       "boot",         1 },
1541         ARCH_TRACE_CLOCKS
1542 };
1543
1544 bool trace_clock_in_ns(struct trace_array *tr)
1545 {
1546         if (trace_clocks[tr->clock_id].in_ns)
1547                 return true;
1548
1549         return false;
1550 }
1551
1552 /*
1553  * trace_parser_get_init - gets the buffer for trace parser
1554  */
1555 int trace_parser_get_init(struct trace_parser *parser, int size)
1556 {
1557         memset(parser, 0, sizeof(*parser));
1558
1559         parser->buffer = kmalloc(size, GFP_KERNEL);
1560         if (!parser->buffer)
1561                 return 1;
1562
1563         parser->size = size;
1564         return 0;
1565 }
1566
1567 /*
1568  * trace_parser_put - frees the buffer for trace parser
1569  */
1570 void trace_parser_put(struct trace_parser *parser)
1571 {
1572         kfree(parser->buffer);
1573         parser->buffer = NULL;
1574 }
1575
1576 /*
1577  * trace_get_user - reads the user input string separated by  space
1578  * (matched by isspace(ch))
1579  *
1580  * For each string found the 'struct trace_parser' is updated,
1581  * and the function returns.
1582  *
1583  * Returns number of bytes read.
1584  *
1585  * See kernel/trace/trace.h for 'struct trace_parser' details.
1586  */
1587 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1588         size_t cnt, loff_t *ppos)
1589 {
1590         char ch;
1591         size_t read = 0;
1592         ssize_t ret;
1593
1594         if (!*ppos)
1595                 trace_parser_clear(parser);
1596
1597         ret = get_user(ch, ubuf++);
1598         if (ret)
1599                 goto out;
1600
1601         read++;
1602         cnt--;
1603
1604         /*
1605          * The parser is not finished with the last write,
1606          * continue reading the user input without skipping spaces.
1607          */
1608         if (!parser->cont) {
1609                 /* skip white space */
1610                 while (cnt && isspace(ch)) {
1611                         ret = get_user(ch, ubuf++);
1612                         if (ret)
1613                                 goto out;
1614                         read++;
1615                         cnt--;
1616                 }
1617
1618                 parser->idx = 0;
1619
1620                 /* only spaces were written */
1621                 if (isspace(ch) || !ch) {
1622                         *ppos += read;
1623                         ret = read;
1624                         goto out;
1625                 }
1626         }
1627
1628         /* read the non-space input */
1629         while (cnt && !isspace(ch) && ch) {
1630                 if (parser->idx < parser->size - 1)
1631                         parser->buffer[parser->idx++] = ch;
1632                 else {
1633                         ret = -EINVAL;
1634                         goto out;
1635                 }
1636                 ret = get_user(ch, ubuf++);
1637                 if (ret)
1638                         goto out;
1639                 read++;
1640                 cnt--;
1641         }
1642
1643         /* We either got finished input or we have to wait for another call. */
1644         if (isspace(ch) || !ch) {
1645                 parser->buffer[parser->idx] = 0;
1646                 parser->cont = false;
1647         } else if (parser->idx < parser->size - 1) {
1648                 parser->cont = true;
1649                 parser->buffer[parser->idx++] = ch;
1650                 /* Make sure the parsed string always terminates with '\0'. */
1651                 parser->buffer[parser->idx] = 0;
1652         } else {
1653                 ret = -EINVAL;
1654                 goto out;
1655         }
1656
1657         *ppos += read;
1658         ret = read;
1659
1660 out:
1661         return ret;
1662 }
1663
1664 /* TODO add a seq_buf_to_buffer() */
1665 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1666 {
1667         int len;
1668
1669         if (trace_seq_used(s) <= s->seq.readpos)
1670                 return -EBUSY;
1671
1672         len = trace_seq_used(s) - s->seq.readpos;
1673         if (cnt > len)
1674                 cnt = len;
1675         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1676
1677         s->seq.readpos += cnt;
1678         return cnt;
1679 }
1680
1681 unsigned long __read_mostly     tracing_thresh;
1682 static const struct file_operations tracing_max_lat_fops;
1683
1684 #ifdef LATENCY_FS_NOTIFY
1685
1686 static struct workqueue_struct *fsnotify_wq;
1687
1688 static void latency_fsnotify_workfn(struct work_struct *work)
1689 {
1690         struct trace_array *tr = container_of(work, struct trace_array,
1691                                               fsnotify_work);
1692         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1693 }
1694
1695 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1696 {
1697         struct trace_array *tr = container_of(iwork, struct trace_array,
1698                                               fsnotify_irqwork);
1699         queue_work(fsnotify_wq, &tr->fsnotify_work);
1700 }
1701
1702 static void trace_create_maxlat_file(struct trace_array *tr,
1703                                      struct dentry *d_tracer)
1704 {
1705         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1706         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1707         tr->d_max_latency = trace_create_file("tracing_max_latency",
1708                                               TRACE_MODE_WRITE,
1709                                               d_tracer, &tr->max_latency,
1710                                               &tracing_max_lat_fops);
1711 }
1712
1713 __init static int latency_fsnotify_init(void)
1714 {
1715         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1716                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1717         if (!fsnotify_wq) {
1718                 pr_err("Unable to allocate tr_max_lat_wq\n");
1719                 return -ENOMEM;
1720         }
1721         return 0;
1722 }
1723
1724 late_initcall_sync(latency_fsnotify_init);
1725
1726 void latency_fsnotify(struct trace_array *tr)
1727 {
1728         if (!fsnotify_wq)
1729                 return;
1730         /*
1731          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1732          * possible that we are called from __schedule() or do_idle(), which
1733          * could cause a deadlock.
1734          */
1735         irq_work_queue(&tr->fsnotify_irqwork);
1736 }
1737
1738 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)  \
1739         || defined(CONFIG_OSNOISE_TRACER)
1740
1741 #define trace_create_maxlat_file(tr, d_tracer)                          \
1742         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1743                           d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1744
1745 #else
1746 #define trace_create_maxlat_file(tr, d_tracer)   do { } while (0)
1747 #endif
1748
1749 #ifdef CONFIG_TRACER_MAX_TRACE
1750 /*
1751  * Copy the new maximum trace into the separate maximum-trace
1752  * structure. (this way the maximum trace is permanently saved,
1753  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1754  */
1755 static void
1756 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1757 {
1758         struct array_buffer *trace_buf = &tr->array_buffer;
1759         struct array_buffer *max_buf = &tr->max_buffer;
1760         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1761         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1762
1763         max_buf->cpu = cpu;
1764         max_buf->time_start = data->preempt_timestamp;
1765
1766         max_data->saved_latency = tr->max_latency;
1767         max_data->critical_start = data->critical_start;
1768         max_data->critical_end = data->critical_end;
1769
1770         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1771         max_data->pid = tsk->pid;
1772         /*
1773          * If tsk == current, then use current_uid(), as that does not use
1774          * RCU. The irq tracer can be called out of RCU scope.
1775          */
1776         if (tsk == current)
1777                 max_data->uid = current_uid();
1778         else
1779                 max_data->uid = task_uid(tsk);
1780
1781         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1782         max_data->policy = tsk->policy;
1783         max_data->rt_priority = tsk->rt_priority;
1784
1785         /* record this tasks comm */
1786         tracing_record_cmdline(tsk);
1787         latency_fsnotify(tr);
1788 }
1789
1790 /**
1791  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1792  * @tr: tracer
1793  * @tsk: the task with the latency
1794  * @cpu: The cpu that initiated the trace.
1795  * @cond_data: User data associated with a conditional snapshot
1796  *
1797  * Flip the buffers between the @tr and the max_tr and record information
1798  * about which task was the cause of this latency.
1799  */
1800 void
1801 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1802               void *cond_data)
1803 {
1804         if (tr->stop_count)
1805                 return;
1806
1807         WARN_ON_ONCE(!irqs_disabled());
1808
1809         if (!tr->allocated_snapshot) {
1810                 /* Only the nop tracer should hit this when disabling */
1811                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1812                 return;
1813         }
1814
1815         arch_spin_lock(&tr->max_lock);
1816
1817         /* Inherit the recordable setting from array_buffer */
1818         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1819                 ring_buffer_record_on(tr->max_buffer.buffer);
1820         else
1821                 ring_buffer_record_off(tr->max_buffer.buffer);
1822
1823 #ifdef CONFIG_TRACER_SNAPSHOT
1824         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1825                 goto out_unlock;
1826 #endif
1827         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1828
1829         __update_max_tr(tr, tsk, cpu);
1830
1831  out_unlock:
1832         arch_spin_unlock(&tr->max_lock);
1833 }
1834
1835 /**
1836  * update_max_tr_single - only copy one trace over, and reset the rest
1837  * @tr: tracer
1838  * @tsk: task with the latency
1839  * @cpu: the cpu of the buffer to copy.
1840  *
1841  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1842  */
1843 void
1844 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1845 {
1846         int ret;
1847
1848         if (tr->stop_count)
1849                 return;
1850
1851         WARN_ON_ONCE(!irqs_disabled());
1852         if (!tr->allocated_snapshot) {
1853                 /* Only the nop tracer should hit this when disabling */
1854                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1855                 return;
1856         }
1857
1858         arch_spin_lock(&tr->max_lock);
1859
1860         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1861
1862         if (ret == -EBUSY) {
1863                 /*
1864                  * We failed to swap the buffer due to a commit taking
1865                  * place on this CPU. We fail to record, but we reset
1866                  * the max trace buffer (no one writes directly to it)
1867                  * and flag that it failed.
1868                  */
1869                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1870                         "Failed to swap buffers due to commit in progress\n");
1871         }
1872
1873         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1874
1875         __update_max_tr(tr, tsk, cpu);
1876         arch_spin_unlock(&tr->max_lock);
1877 }
1878 #endif /* CONFIG_TRACER_MAX_TRACE */
1879
1880 static int wait_on_pipe(struct trace_iterator *iter, int full)
1881 {
1882         /* Iterators are static, they should be filled or empty */
1883         if (trace_buffer_iter(iter, iter->cpu_file))
1884                 return 0;
1885
1886         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1887                                 full);
1888 }
1889
1890 #ifdef CONFIG_FTRACE_STARTUP_TEST
1891 static bool selftests_can_run;
1892
1893 struct trace_selftests {
1894         struct list_head                list;
1895         struct tracer                   *type;
1896 };
1897
1898 static LIST_HEAD(postponed_selftests);
1899
1900 static int save_selftest(struct tracer *type)
1901 {
1902         struct trace_selftests *selftest;
1903
1904         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1905         if (!selftest)
1906                 return -ENOMEM;
1907
1908         selftest->type = type;
1909         list_add(&selftest->list, &postponed_selftests);
1910         return 0;
1911 }
1912
1913 static int run_tracer_selftest(struct tracer *type)
1914 {
1915         struct trace_array *tr = &global_trace;
1916         struct tracer *saved_tracer = tr->current_trace;
1917         int ret;
1918
1919         if (!type->selftest || tracing_selftest_disabled)
1920                 return 0;
1921
1922         /*
1923          * If a tracer registers early in boot up (before scheduling is
1924          * initialized and such), then do not run its selftests yet.
1925          * Instead, run it a little later in the boot process.
1926          */
1927         if (!selftests_can_run)
1928                 return save_selftest(type);
1929
1930         if (!tracing_is_on()) {
1931                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1932                         type->name);
1933                 return 0;
1934         }
1935
1936         /*
1937          * Run a selftest on this tracer.
1938          * Here we reset the trace buffer, and set the current
1939          * tracer to be this tracer. The tracer can then run some
1940          * internal tracing to verify that everything is in order.
1941          * If we fail, we do not register this tracer.
1942          */
1943         tracing_reset_online_cpus(&tr->array_buffer);
1944
1945         tr->current_trace = type;
1946
1947 #ifdef CONFIG_TRACER_MAX_TRACE
1948         if (type->use_max_tr) {
1949                 /* If we expanded the buffers, make sure the max is expanded too */
1950                 if (ring_buffer_expanded)
1951                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1952                                            RING_BUFFER_ALL_CPUS);
1953                 tr->allocated_snapshot = true;
1954         }
1955 #endif
1956
1957         /* the test is responsible for initializing and enabling */
1958         pr_info("Testing tracer %s: ", type->name);
1959         ret = type->selftest(type, tr);
1960         /* the test is responsible for resetting too */
1961         tr->current_trace = saved_tracer;
1962         if (ret) {
1963                 printk(KERN_CONT "FAILED!\n");
1964                 /* Add the warning after printing 'FAILED' */
1965                 WARN_ON(1);
1966                 return -1;
1967         }
1968         /* Only reset on passing, to avoid touching corrupted buffers */
1969         tracing_reset_online_cpus(&tr->array_buffer);
1970
1971 #ifdef CONFIG_TRACER_MAX_TRACE
1972         if (type->use_max_tr) {
1973                 tr->allocated_snapshot = false;
1974
1975                 /* Shrink the max buffer again */
1976                 if (ring_buffer_expanded)
1977                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1978                                            RING_BUFFER_ALL_CPUS);
1979         }
1980 #endif
1981
1982         printk(KERN_CONT "PASSED\n");
1983         return 0;
1984 }
1985
1986 static __init int init_trace_selftests(void)
1987 {
1988         struct trace_selftests *p, *n;
1989         struct tracer *t, **last;
1990         int ret;
1991
1992         selftests_can_run = true;
1993
1994         mutex_lock(&trace_types_lock);
1995
1996         if (list_empty(&postponed_selftests))
1997                 goto out;
1998
1999         pr_info("Running postponed tracer tests:\n");
2000
2001         tracing_selftest_running = true;
2002         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2003                 /* This loop can take minutes when sanitizers are enabled, so
2004                  * lets make sure we allow RCU processing.
2005                  */
2006                 cond_resched();
2007                 ret = run_tracer_selftest(p->type);
2008                 /* If the test fails, then warn and remove from available_tracers */
2009                 if (ret < 0) {
2010                         WARN(1, "tracer: %s failed selftest, disabling\n",
2011                              p->type->name);
2012                         last = &trace_types;
2013                         for (t = trace_types; t; t = t->next) {
2014                                 if (t == p->type) {
2015                                         *last = t->next;
2016                                         break;
2017                                 }
2018                                 last = &t->next;
2019                         }
2020                 }
2021                 list_del(&p->list);
2022                 kfree(p);
2023         }
2024         tracing_selftest_running = false;
2025
2026  out:
2027         mutex_unlock(&trace_types_lock);
2028
2029         return 0;
2030 }
2031 core_initcall(init_trace_selftests);
2032 #else
2033 static inline int run_tracer_selftest(struct tracer *type)
2034 {
2035         return 0;
2036 }
2037 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2038
2039 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2040
2041 static void __init apply_trace_boot_options(void);
2042
2043 /**
2044  * register_tracer - register a tracer with the ftrace system.
2045  * @type: the plugin for the tracer
2046  *
2047  * Register a new plugin tracer.
2048  */
2049 int __init register_tracer(struct tracer *type)
2050 {
2051         struct tracer *t;
2052         int ret = 0;
2053
2054         if (!type->name) {
2055                 pr_info("Tracer must have a name\n");
2056                 return -1;
2057         }
2058
2059         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2060                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2061                 return -1;
2062         }
2063
2064         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2065                 pr_warn("Can not register tracer %s due to lockdown\n",
2066                            type->name);
2067                 return -EPERM;
2068         }
2069
2070         mutex_lock(&trace_types_lock);
2071
2072         tracing_selftest_running = true;
2073
2074         for (t = trace_types; t; t = t->next) {
2075                 if (strcmp(type->name, t->name) == 0) {
2076                         /* already found */
2077                         pr_info("Tracer %s already registered\n",
2078                                 type->name);
2079                         ret = -1;
2080                         goto out;
2081                 }
2082         }
2083
2084         if (!type->set_flag)
2085                 type->set_flag = &dummy_set_flag;
2086         if (!type->flags) {
2087                 /*allocate a dummy tracer_flags*/
2088                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2089                 if (!type->flags) {
2090                         ret = -ENOMEM;
2091                         goto out;
2092                 }
2093                 type->flags->val = 0;
2094                 type->flags->opts = dummy_tracer_opt;
2095         } else
2096                 if (!type->flags->opts)
2097                         type->flags->opts = dummy_tracer_opt;
2098
2099         /* store the tracer for __set_tracer_option */
2100         type->flags->trace = type;
2101
2102         ret = run_tracer_selftest(type);
2103         if (ret < 0)
2104                 goto out;
2105
2106         type->next = trace_types;
2107         trace_types = type;
2108         add_tracer_options(&global_trace, type);
2109
2110  out:
2111         tracing_selftest_running = false;
2112         mutex_unlock(&trace_types_lock);
2113
2114         if (ret || !default_bootup_tracer)
2115                 goto out_unlock;
2116
2117         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2118                 goto out_unlock;
2119
2120         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2121         /* Do we want this tracer to start on bootup? */
2122         tracing_set_tracer(&global_trace, type->name);
2123         default_bootup_tracer = NULL;
2124
2125         apply_trace_boot_options();
2126
2127         /* disable other selftests, since this will break it. */
2128         disable_tracing_selftest("running a tracer");
2129
2130  out_unlock:
2131         return ret;
2132 }
2133
2134 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2135 {
2136         struct trace_buffer *buffer = buf->buffer;
2137
2138         if (!buffer)
2139                 return;
2140
2141         ring_buffer_record_disable(buffer);
2142
2143         /* Make sure all commits have finished */
2144         synchronize_rcu();
2145         ring_buffer_reset_cpu(buffer, cpu);
2146
2147         ring_buffer_record_enable(buffer);
2148 }
2149
2150 void tracing_reset_online_cpus(struct array_buffer *buf)
2151 {
2152         struct trace_buffer *buffer = buf->buffer;
2153
2154         if (!buffer)
2155                 return;
2156
2157         ring_buffer_record_disable(buffer);
2158
2159         /* Make sure all commits have finished */
2160         synchronize_rcu();
2161
2162         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2163
2164         ring_buffer_reset_online_cpus(buffer);
2165
2166         ring_buffer_record_enable(buffer);
2167 }
2168
2169 /* Must have trace_types_lock held */
2170 void tracing_reset_all_online_cpus(void)
2171 {
2172         struct trace_array *tr;
2173
2174         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2175                 if (!tr->clear_trace)
2176                         continue;
2177                 tr->clear_trace = false;
2178                 tracing_reset_online_cpus(&tr->array_buffer);
2179 #ifdef CONFIG_TRACER_MAX_TRACE
2180                 tracing_reset_online_cpus(&tr->max_buffer);
2181 #endif
2182         }
2183 }
2184
2185 /*
2186  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2187  * is the tgid last observed corresponding to pid=i.
2188  */
2189 static int *tgid_map;
2190
2191 /* The maximum valid index into tgid_map. */
2192 static size_t tgid_map_max;
2193
2194 #define SAVED_CMDLINES_DEFAULT 128
2195 #define NO_CMDLINE_MAP UINT_MAX
2196 /*
2197  * Preemption must be disabled before acquiring trace_cmdline_lock.
2198  * The various trace_arrays' max_lock must be acquired in a context
2199  * where interrupt is disabled.
2200  */
2201 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2202 struct saved_cmdlines_buffer {
2203         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2204         unsigned *map_cmdline_to_pid;
2205         unsigned cmdline_num;
2206         int cmdline_idx;
2207         char *saved_cmdlines;
2208 };
2209 static struct saved_cmdlines_buffer *savedcmd;
2210
2211 static inline char *get_saved_cmdlines(int idx)
2212 {
2213         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2214 }
2215
2216 static inline void set_cmdline(int idx, const char *cmdline)
2217 {
2218         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2219 }
2220
2221 static int allocate_cmdlines_buffer(unsigned int val,
2222                                     struct saved_cmdlines_buffer *s)
2223 {
2224         s->map_cmdline_to_pid = kmalloc_array(val,
2225                                               sizeof(*s->map_cmdline_to_pid),
2226                                               GFP_KERNEL);
2227         if (!s->map_cmdline_to_pid)
2228                 return -ENOMEM;
2229
2230         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2231         if (!s->saved_cmdlines) {
2232                 kfree(s->map_cmdline_to_pid);
2233                 return -ENOMEM;
2234         }
2235
2236         s->cmdline_idx = 0;
2237         s->cmdline_num = val;
2238         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2239                sizeof(s->map_pid_to_cmdline));
2240         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2241                val * sizeof(*s->map_cmdline_to_pid));
2242
2243         return 0;
2244 }
2245
2246 static int trace_create_savedcmd(void)
2247 {
2248         int ret;
2249
2250         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2251         if (!savedcmd)
2252                 return -ENOMEM;
2253
2254         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2255         if (ret < 0) {
2256                 kfree(savedcmd);
2257                 savedcmd = NULL;
2258                 return -ENOMEM;
2259         }
2260
2261         return 0;
2262 }
2263
2264 int is_tracing_stopped(void)
2265 {
2266         return global_trace.stop_count;
2267 }
2268
2269 /**
2270  * tracing_start - quick start of the tracer
2271  *
2272  * If tracing is enabled but was stopped by tracing_stop,
2273  * this will start the tracer back up.
2274  */
2275 void tracing_start(void)
2276 {
2277         struct trace_buffer *buffer;
2278         unsigned long flags;
2279
2280         if (tracing_disabled)
2281                 return;
2282
2283         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2284         if (--global_trace.stop_count) {
2285                 if (global_trace.stop_count < 0) {
2286                         /* Someone screwed up their debugging */
2287                         WARN_ON_ONCE(1);
2288                         global_trace.stop_count = 0;
2289                 }
2290                 goto out;
2291         }
2292
2293         /* Prevent the buffers from switching */
2294         arch_spin_lock(&global_trace.max_lock);
2295
2296         buffer = global_trace.array_buffer.buffer;
2297         if (buffer)
2298                 ring_buffer_record_enable(buffer);
2299
2300 #ifdef CONFIG_TRACER_MAX_TRACE
2301         buffer = global_trace.max_buffer.buffer;
2302         if (buffer)
2303                 ring_buffer_record_enable(buffer);
2304 #endif
2305
2306         arch_spin_unlock(&global_trace.max_lock);
2307
2308  out:
2309         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2310 }
2311
2312 static void tracing_start_tr(struct trace_array *tr)
2313 {
2314         struct trace_buffer *buffer;
2315         unsigned long flags;
2316
2317         if (tracing_disabled)
2318                 return;
2319
2320         /* If global, we need to also start the max tracer */
2321         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2322                 return tracing_start();
2323
2324         raw_spin_lock_irqsave(&tr->start_lock, flags);
2325
2326         if (--tr->stop_count) {
2327                 if (tr->stop_count < 0) {
2328                         /* Someone screwed up their debugging */
2329                         WARN_ON_ONCE(1);
2330                         tr->stop_count = 0;
2331                 }
2332                 goto out;
2333         }
2334
2335         buffer = tr->array_buffer.buffer;
2336         if (buffer)
2337                 ring_buffer_record_enable(buffer);
2338
2339  out:
2340         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2341 }
2342
2343 /**
2344  * tracing_stop - quick stop of the tracer
2345  *
2346  * Light weight way to stop tracing. Use in conjunction with
2347  * tracing_start.
2348  */
2349 void tracing_stop(void)
2350 {
2351         struct trace_buffer *buffer;
2352         unsigned long flags;
2353
2354         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2355         if (global_trace.stop_count++)
2356                 goto out;
2357
2358         /* Prevent the buffers from switching */
2359         arch_spin_lock(&global_trace.max_lock);
2360
2361         buffer = global_trace.array_buffer.buffer;
2362         if (buffer)
2363                 ring_buffer_record_disable(buffer);
2364
2365 #ifdef CONFIG_TRACER_MAX_TRACE
2366         buffer = global_trace.max_buffer.buffer;
2367         if (buffer)
2368                 ring_buffer_record_disable(buffer);
2369 #endif
2370
2371         arch_spin_unlock(&global_trace.max_lock);
2372
2373  out:
2374         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2375 }
2376
2377 static void tracing_stop_tr(struct trace_array *tr)
2378 {
2379         struct trace_buffer *buffer;
2380         unsigned long flags;
2381
2382         /* If global, we need to also stop the max tracer */
2383         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2384                 return tracing_stop();
2385
2386         raw_spin_lock_irqsave(&tr->start_lock, flags);
2387         if (tr->stop_count++)
2388                 goto out;
2389
2390         buffer = tr->array_buffer.buffer;
2391         if (buffer)
2392                 ring_buffer_record_disable(buffer);
2393
2394  out:
2395         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2396 }
2397
2398 static int trace_save_cmdline(struct task_struct *tsk)
2399 {
2400         unsigned tpid, idx;
2401
2402         /* treat recording of idle task as a success */
2403         if (!tsk->pid)
2404                 return 1;
2405
2406         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2407
2408         /*
2409          * It's not the end of the world if we don't get
2410          * the lock, but we also don't want to spin
2411          * nor do we want to disable interrupts,
2412          * so if we miss here, then better luck next time.
2413          *
2414          * This is called within the scheduler and wake up, so interrupts
2415          * had better been disabled and run queue lock been held.
2416          */
2417         lockdep_assert_preemption_disabled();
2418         if (!arch_spin_trylock(&trace_cmdline_lock))
2419                 return 0;
2420
2421         idx = savedcmd->map_pid_to_cmdline[tpid];
2422         if (idx == NO_CMDLINE_MAP) {
2423                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2424
2425                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2426                 savedcmd->cmdline_idx = idx;
2427         }
2428
2429         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2430         set_cmdline(idx, tsk->comm);
2431
2432         arch_spin_unlock(&trace_cmdline_lock);
2433
2434         return 1;
2435 }
2436
2437 static void __trace_find_cmdline(int pid, char comm[])
2438 {
2439         unsigned map;
2440         int tpid;
2441
2442         if (!pid) {
2443                 strcpy(comm, "<idle>");
2444                 return;
2445         }
2446
2447         if (WARN_ON_ONCE(pid < 0)) {
2448                 strcpy(comm, "<XXX>");
2449                 return;
2450         }
2451
2452         tpid = pid & (PID_MAX_DEFAULT - 1);
2453         map = savedcmd->map_pid_to_cmdline[tpid];
2454         if (map != NO_CMDLINE_MAP) {
2455                 tpid = savedcmd->map_cmdline_to_pid[map];
2456                 if (tpid == pid) {
2457                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2458                         return;
2459                 }
2460         }
2461         strcpy(comm, "<...>");
2462 }
2463
2464 void trace_find_cmdline(int pid, char comm[])
2465 {
2466         preempt_disable();
2467         arch_spin_lock(&trace_cmdline_lock);
2468
2469         __trace_find_cmdline(pid, comm);
2470
2471         arch_spin_unlock(&trace_cmdline_lock);
2472         preempt_enable();
2473 }
2474
2475 static int *trace_find_tgid_ptr(int pid)
2476 {
2477         /*
2478          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2479          * if we observe a non-NULL tgid_map then we also observe the correct
2480          * tgid_map_max.
2481          */
2482         int *map = smp_load_acquire(&tgid_map);
2483
2484         if (unlikely(!map || pid > tgid_map_max))
2485                 return NULL;
2486
2487         return &map[pid];
2488 }
2489
2490 int trace_find_tgid(int pid)
2491 {
2492         int *ptr = trace_find_tgid_ptr(pid);
2493
2494         return ptr ? *ptr : 0;
2495 }
2496
2497 static int trace_save_tgid(struct task_struct *tsk)
2498 {
2499         int *ptr;
2500
2501         /* treat recording of idle task as a success */
2502         if (!tsk->pid)
2503                 return 1;
2504
2505         ptr = trace_find_tgid_ptr(tsk->pid);
2506         if (!ptr)
2507                 return 0;
2508
2509         *ptr = tsk->tgid;
2510         return 1;
2511 }
2512
2513 static bool tracing_record_taskinfo_skip(int flags)
2514 {
2515         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2516                 return true;
2517         if (!__this_cpu_read(trace_taskinfo_save))
2518                 return true;
2519         return false;
2520 }
2521
2522 /**
2523  * tracing_record_taskinfo - record the task info of a task
2524  *
2525  * @task:  task to record
2526  * @flags: TRACE_RECORD_CMDLINE for recording comm
2527  *         TRACE_RECORD_TGID for recording tgid
2528  */
2529 void tracing_record_taskinfo(struct task_struct *task, int flags)
2530 {
2531         bool done;
2532
2533         if (tracing_record_taskinfo_skip(flags))
2534                 return;
2535
2536         /*
2537          * Record as much task information as possible. If some fail, continue
2538          * to try to record the others.
2539          */
2540         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2541         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2542
2543         /* If recording any information failed, retry again soon. */
2544         if (!done)
2545                 return;
2546
2547         __this_cpu_write(trace_taskinfo_save, false);
2548 }
2549
2550 /**
2551  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2552  *
2553  * @prev: previous task during sched_switch
2554  * @next: next task during sched_switch
2555  * @flags: TRACE_RECORD_CMDLINE for recording comm
2556  *         TRACE_RECORD_TGID for recording tgid
2557  */
2558 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2559                                           struct task_struct *next, int flags)
2560 {
2561         bool done;
2562
2563         if (tracing_record_taskinfo_skip(flags))
2564                 return;
2565
2566         /*
2567          * Record as much task information as possible. If some fail, continue
2568          * to try to record the others.
2569          */
2570         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2571         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2572         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2573         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2574
2575         /* If recording any information failed, retry again soon. */
2576         if (!done)
2577                 return;
2578
2579         __this_cpu_write(trace_taskinfo_save, false);
2580 }
2581
2582 /* Helpers to record a specific task information */
2583 void tracing_record_cmdline(struct task_struct *task)
2584 {
2585         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2586 }
2587
2588 void tracing_record_tgid(struct task_struct *task)
2589 {
2590         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2591 }
2592
2593 /*
2594  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2595  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2596  * simplifies those functions and keeps them in sync.
2597  */
2598 enum print_line_t trace_handle_return(struct trace_seq *s)
2599 {
2600         return trace_seq_has_overflowed(s) ?
2601                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2602 }
2603 EXPORT_SYMBOL_GPL(trace_handle_return);
2604
2605 static unsigned short migration_disable_value(void)
2606 {
2607 #if defined(CONFIG_SMP)
2608         return current->migration_disabled;
2609 #else
2610         return 0;
2611 #endif
2612 }
2613
2614 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2615 {
2616         unsigned int trace_flags = irqs_status;
2617         unsigned int pc;
2618
2619         pc = preempt_count();
2620
2621         if (pc & NMI_MASK)
2622                 trace_flags |= TRACE_FLAG_NMI;
2623         if (pc & HARDIRQ_MASK)
2624                 trace_flags |= TRACE_FLAG_HARDIRQ;
2625         if (in_serving_softirq())
2626                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2627
2628         if (tif_need_resched())
2629                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2630         if (test_preempt_need_resched())
2631                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2632         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2633                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2634 }
2635
2636 struct ring_buffer_event *
2637 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2638                           int type,
2639                           unsigned long len,
2640                           unsigned int trace_ctx)
2641 {
2642         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2643 }
2644
2645 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2646 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2647 static int trace_buffered_event_ref;
2648
2649 /**
2650  * trace_buffered_event_enable - enable buffering events
2651  *
2652  * When events are being filtered, it is quicker to use a temporary
2653  * buffer to write the event data into if there's a likely chance
2654  * that it will not be committed. The discard of the ring buffer
2655  * is not as fast as committing, and is much slower than copying
2656  * a commit.
2657  *
2658  * When an event is to be filtered, allocate per cpu buffers to
2659  * write the event data into, and if the event is filtered and discarded
2660  * it is simply dropped, otherwise, the entire data is to be committed
2661  * in one shot.
2662  */
2663 void trace_buffered_event_enable(void)
2664 {
2665         struct ring_buffer_event *event;
2666         struct page *page;
2667         int cpu;
2668
2669         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2670
2671         if (trace_buffered_event_ref++)
2672                 return;
2673
2674         for_each_tracing_cpu(cpu) {
2675                 page = alloc_pages_node(cpu_to_node(cpu),
2676                                         GFP_KERNEL | __GFP_NORETRY, 0);
2677                 if (!page)
2678                         goto failed;
2679
2680                 event = page_address(page);
2681                 memset(event, 0, sizeof(*event));
2682
2683                 per_cpu(trace_buffered_event, cpu) = event;
2684
2685                 preempt_disable();
2686                 if (cpu == smp_processor_id() &&
2687                     __this_cpu_read(trace_buffered_event) !=
2688                     per_cpu(trace_buffered_event, cpu))
2689                         WARN_ON_ONCE(1);
2690                 preempt_enable();
2691         }
2692
2693         return;
2694  failed:
2695         trace_buffered_event_disable();
2696 }
2697
2698 static void enable_trace_buffered_event(void *data)
2699 {
2700         /* Probably not needed, but do it anyway */
2701         smp_rmb();
2702         this_cpu_dec(trace_buffered_event_cnt);
2703 }
2704
2705 static void disable_trace_buffered_event(void *data)
2706 {
2707         this_cpu_inc(trace_buffered_event_cnt);
2708 }
2709
2710 /**
2711  * trace_buffered_event_disable - disable buffering events
2712  *
2713  * When a filter is removed, it is faster to not use the buffered
2714  * events, and to commit directly into the ring buffer. Free up
2715  * the temp buffers when there are no more users. This requires
2716  * special synchronization with current events.
2717  */
2718 void trace_buffered_event_disable(void)
2719 {
2720         int cpu;
2721
2722         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2723
2724         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2725                 return;
2726
2727         if (--trace_buffered_event_ref)
2728                 return;
2729
2730         preempt_disable();
2731         /* For each CPU, set the buffer as used. */
2732         smp_call_function_many(tracing_buffer_mask,
2733                                disable_trace_buffered_event, NULL, 1);
2734         preempt_enable();
2735
2736         /* Wait for all current users to finish */
2737         synchronize_rcu();
2738
2739         for_each_tracing_cpu(cpu) {
2740                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2741                 per_cpu(trace_buffered_event, cpu) = NULL;
2742         }
2743         /*
2744          * Make sure trace_buffered_event is NULL before clearing
2745          * trace_buffered_event_cnt.
2746          */
2747         smp_wmb();
2748
2749         preempt_disable();
2750         /* Do the work on each cpu */
2751         smp_call_function_many(tracing_buffer_mask,
2752                                enable_trace_buffered_event, NULL, 1);
2753         preempt_enable();
2754 }
2755
2756 static struct trace_buffer *temp_buffer;
2757
2758 struct ring_buffer_event *
2759 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2760                           struct trace_event_file *trace_file,
2761                           int type, unsigned long len,
2762                           unsigned int trace_ctx)
2763 {
2764         struct ring_buffer_event *entry;
2765         struct trace_array *tr = trace_file->tr;
2766         int val;
2767
2768         *current_rb = tr->array_buffer.buffer;
2769
2770         if (!tr->no_filter_buffering_ref &&
2771             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2772             (entry = this_cpu_read(trace_buffered_event))) {
2773                 /*
2774                  * Filtering is on, so try to use the per cpu buffer first.
2775                  * This buffer will simulate a ring_buffer_event,
2776                  * where the type_len is zero and the array[0] will
2777                  * hold the full length.
2778                  * (see include/linux/ring-buffer.h for details on
2779                  *  how the ring_buffer_event is structured).
2780                  *
2781                  * Using a temp buffer during filtering and copying it
2782                  * on a matched filter is quicker than writing directly
2783                  * into the ring buffer and then discarding it when
2784                  * it doesn't match. That is because the discard
2785                  * requires several atomic operations to get right.
2786                  * Copying on match and doing nothing on a failed match
2787                  * is still quicker than no copy on match, but having
2788                  * to discard out of the ring buffer on a failed match.
2789                  */
2790                 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2791
2792                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2793
2794                 /*
2795                  * Preemption is disabled, but interrupts and NMIs
2796                  * can still come in now. If that happens after
2797                  * the above increment, then it will have to go
2798                  * back to the old method of allocating the event
2799                  * on the ring buffer, and if the filter fails, it
2800                  * will have to call ring_buffer_discard_commit()
2801                  * to remove it.
2802                  *
2803                  * Need to also check the unlikely case that the
2804                  * length is bigger than the temp buffer size.
2805                  * If that happens, then the reserve is pretty much
2806                  * guaranteed to fail, as the ring buffer currently
2807                  * only allows events less than a page. But that may
2808                  * change in the future, so let the ring buffer reserve
2809                  * handle the failure in that case.
2810                  */
2811                 if (val == 1 && likely(len <= max_len)) {
2812                         trace_event_setup(entry, type, trace_ctx);
2813                         entry->array[0] = len;
2814                         return entry;
2815                 }
2816                 this_cpu_dec(trace_buffered_event_cnt);
2817         }
2818
2819         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2820                                             trace_ctx);
2821         /*
2822          * If tracing is off, but we have triggers enabled
2823          * we still need to look at the event data. Use the temp_buffer
2824          * to store the trace event for the trigger to use. It's recursive
2825          * safe and will not be recorded anywhere.
2826          */
2827         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2828                 *current_rb = temp_buffer;
2829                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2830                                                     trace_ctx);
2831         }
2832         return entry;
2833 }
2834 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2835
2836 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2837 static DEFINE_MUTEX(tracepoint_printk_mutex);
2838
2839 static void output_printk(struct trace_event_buffer *fbuffer)
2840 {
2841         struct trace_event_call *event_call;
2842         struct trace_event_file *file;
2843         struct trace_event *event;
2844         unsigned long flags;
2845         struct trace_iterator *iter = tracepoint_print_iter;
2846
2847         /* We should never get here if iter is NULL */
2848         if (WARN_ON_ONCE(!iter))
2849                 return;
2850
2851         event_call = fbuffer->trace_file->event_call;
2852         if (!event_call || !event_call->event.funcs ||
2853             !event_call->event.funcs->trace)
2854                 return;
2855
2856         file = fbuffer->trace_file;
2857         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2858             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2859              !filter_match_preds(file->filter, fbuffer->entry)))
2860                 return;
2861
2862         event = &fbuffer->trace_file->event_call->event;
2863
2864         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2865         trace_seq_init(&iter->seq);
2866         iter->ent = fbuffer->entry;
2867         event_call->event.funcs->trace(iter, 0, event);
2868         trace_seq_putc(&iter->seq, 0);
2869         printk("%s", iter->seq.buffer);
2870
2871         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2872 }
2873
2874 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2875                              void *buffer, size_t *lenp,
2876                              loff_t *ppos)
2877 {
2878         int save_tracepoint_printk;
2879         int ret;
2880
2881         mutex_lock(&tracepoint_printk_mutex);
2882         save_tracepoint_printk = tracepoint_printk;
2883
2884         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2885
2886         /*
2887          * This will force exiting early, as tracepoint_printk
2888          * is always zero when tracepoint_printk_iter is not allocated
2889          */
2890         if (!tracepoint_print_iter)
2891                 tracepoint_printk = 0;
2892
2893         if (save_tracepoint_printk == tracepoint_printk)
2894                 goto out;
2895
2896         if (tracepoint_printk)
2897                 static_key_enable(&tracepoint_printk_key.key);
2898         else
2899                 static_key_disable(&tracepoint_printk_key.key);
2900
2901  out:
2902         mutex_unlock(&tracepoint_printk_mutex);
2903
2904         return ret;
2905 }
2906
2907 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2908 {
2909         enum event_trigger_type tt = ETT_NONE;
2910         struct trace_event_file *file = fbuffer->trace_file;
2911
2912         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2913                         fbuffer->entry, &tt))
2914                 goto discard;
2915
2916         if (static_key_false(&tracepoint_printk_key.key))
2917                 output_printk(fbuffer);
2918
2919         if (static_branch_unlikely(&trace_event_exports_enabled))
2920                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2921
2922         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2923                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2924
2925 discard:
2926         if (tt)
2927                 event_triggers_post_call(file, tt);
2928
2929 }
2930 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2931
2932 /*
2933  * Skip 3:
2934  *
2935  *   trace_buffer_unlock_commit_regs()
2936  *   trace_event_buffer_commit()
2937  *   trace_event_raw_event_xxx()
2938  */
2939 # define STACK_SKIP 3
2940
2941 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2942                                      struct trace_buffer *buffer,
2943                                      struct ring_buffer_event *event,
2944                                      unsigned int trace_ctx,
2945                                      struct pt_regs *regs)
2946 {
2947         __buffer_unlock_commit(buffer, event);
2948
2949         /*
2950          * If regs is not set, then skip the necessary functions.
2951          * Note, we can still get here via blktrace, wakeup tracer
2952          * and mmiotrace, but that's ok if they lose a function or
2953          * two. They are not that meaningful.
2954          */
2955         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2956         ftrace_trace_userstack(tr, buffer, trace_ctx);
2957 }
2958
2959 /*
2960  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2961  */
2962 void
2963 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2964                                    struct ring_buffer_event *event)
2965 {
2966         __buffer_unlock_commit(buffer, event);
2967 }
2968
2969 void
2970 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2971                parent_ip, unsigned int trace_ctx)
2972 {
2973         struct trace_event_call *call = &event_function;
2974         struct trace_buffer *buffer = tr->array_buffer.buffer;
2975         struct ring_buffer_event *event;
2976         struct ftrace_entry *entry;
2977
2978         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2979                                             trace_ctx);
2980         if (!event)
2981                 return;
2982         entry   = ring_buffer_event_data(event);
2983         entry->ip                       = ip;
2984         entry->parent_ip                = parent_ip;
2985
2986         if (!call_filter_check_discard(call, entry, buffer, event)) {
2987                 if (static_branch_unlikely(&trace_function_exports_enabled))
2988                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2989                 __buffer_unlock_commit(buffer, event);
2990         }
2991 }
2992
2993 #ifdef CONFIG_STACKTRACE
2994
2995 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2996 #define FTRACE_KSTACK_NESTING   4
2997
2998 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2999
3000 struct ftrace_stack {
3001         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3002 };
3003
3004
3005 struct ftrace_stacks {
3006         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3007 };
3008
3009 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3010 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3011
3012 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3013                                  unsigned int trace_ctx,
3014                                  int skip, struct pt_regs *regs)
3015 {
3016         struct trace_event_call *call = &event_kernel_stack;
3017         struct ring_buffer_event *event;
3018         unsigned int size, nr_entries;
3019         struct ftrace_stack *fstack;
3020         struct stack_entry *entry;
3021         int stackidx;
3022
3023         /*
3024          * Add one, for this function and the call to save_stack_trace()
3025          * If regs is set, then these functions will not be in the way.
3026          */
3027 #ifndef CONFIG_UNWINDER_ORC
3028         if (!regs)
3029                 skip++;
3030 #endif
3031
3032         preempt_disable_notrace();
3033
3034         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3035
3036         /* This should never happen. If it does, yell once and skip */
3037         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3038                 goto out;
3039
3040         /*
3041          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3042          * interrupt will either see the value pre increment or post
3043          * increment. If the interrupt happens pre increment it will have
3044          * restored the counter when it returns.  We just need a barrier to
3045          * keep gcc from moving things around.
3046          */
3047         barrier();
3048
3049         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3050         size = ARRAY_SIZE(fstack->calls);
3051
3052         if (regs) {
3053                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3054                                                    size, skip);
3055         } else {
3056                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3057         }
3058
3059         size = nr_entries * sizeof(unsigned long);
3060         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3061                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3062                                     trace_ctx);
3063         if (!event)
3064                 goto out;
3065         entry = ring_buffer_event_data(event);
3066
3067         memcpy(&entry->caller, fstack->calls, size);
3068         entry->size = nr_entries;
3069
3070         if (!call_filter_check_discard(call, entry, buffer, event))
3071                 __buffer_unlock_commit(buffer, event);
3072
3073  out:
3074         /* Again, don't let gcc optimize things here */
3075         barrier();
3076         __this_cpu_dec(ftrace_stack_reserve);
3077         preempt_enable_notrace();
3078
3079 }
3080
3081 static inline void ftrace_trace_stack(struct trace_array *tr,
3082                                       struct trace_buffer *buffer,
3083                                       unsigned int trace_ctx,
3084                                       int skip, struct pt_regs *regs)
3085 {
3086         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3087                 return;
3088
3089         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3090 }
3091
3092 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3093                    int skip)
3094 {
3095         struct trace_buffer *buffer = tr->array_buffer.buffer;
3096
3097         if (rcu_is_watching()) {
3098                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3099                 return;
3100         }
3101
3102         /*
3103          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3104          * but if the above rcu_is_watching() failed, then the NMI
3105          * triggered someplace critical, and rcu_irq_enter() should
3106          * not be called from NMI.
3107          */
3108         if (unlikely(in_nmi()))
3109                 return;
3110
3111         rcu_irq_enter_irqson();
3112         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3113         rcu_irq_exit_irqson();
3114 }
3115
3116 /**
3117  * trace_dump_stack - record a stack back trace in the trace buffer
3118  * @skip: Number of functions to skip (helper handlers)
3119  */
3120 void trace_dump_stack(int skip)
3121 {
3122         if (tracing_disabled || tracing_selftest_running)
3123                 return;
3124
3125 #ifndef CONFIG_UNWINDER_ORC
3126         /* Skip 1 to skip this function. */
3127         skip++;
3128 #endif
3129         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3130                              tracing_gen_ctx(), skip, NULL);
3131 }
3132 EXPORT_SYMBOL_GPL(trace_dump_stack);
3133
3134 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3135 static DEFINE_PER_CPU(int, user_stack_count);
3136
3137 static void
3138 ftrace_trace_userstack(struct trace_array *tr,
3139                        struct trace_buffer *buffer, unsigned int trace_ctx)
3140 {
3141         struct trace_event_call *call = &event_user_stack;
3142         struct ring_buffer_event *event;
3143         struct userstack_entry *entry;
3144
3145         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3146                 return;
3147
3148         /*
3149          * NMIs can not handle page faults, even with fix ups.
3150          * The save user stack can (and often does) fault.
3151          */
3152         if (unlikely(in_nmi()))
3153                 return;
3154
3155         /*
3156          * prevent recursion, since the user stack tracing may
3157          * trigger other kernel events.
3158          */
3159         preempt_disable();
3160         if (__this_cpu_read(user_stack_count))
3161                 goto out;
3162
3163         __this_cpu_inc(user_stack_count);
3164
3165         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3166                                             sizeof(*entry), trace_ctx);
3167         if (!event)
3168                 goto out_drop_count;
3169         entry   = ring_buffer_event_data(event);
3170
3171         entry->tgid             = current->tgid;
3172         memset(&entry->caller, 0, sizeof(entry->caller));
3173
3174         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3175         if (!call_filter_check_discard(call, entry, buffer, event))
3176                 __buffer_unlock_commit(buffer, event);
3177
3178  out_drop_count:
3179         __this_cpu_dec(user_stack_count);
3180  out:
3181         preempt_enable();
3182 }
3183 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3184 static void ftrace_trace_userstack(struct trace_array *tr,
3185                                    struct trace_buffer *buffer,
3186                                    unsigned int trace_ctx)
3187 {
3188 }
3189 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3190
3191 #endif /* CONFIG_STACKTRACE */
3192
3193 static inline void
3194 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3195                           unsigned long long delta)
3196 {
3197         entry->bottom_delta_ts = delta & U32_MAX;
3198         entry->top_delta_ts = (delta >> 32);
3199 }
3200
3201 void trace_last_func_repeats(struct trace_array *tr,
3202                              struct trace_func_repeats *last_info,
3203                              unsigned int trace_ctx)
3204 {
3205         struct trace_buffer *buffer = tr->array_buffer.buffer;
3206         struct func_repeats_entry *entry;
3207         struct ring_buffer_event *event;
3208         u64 delta;
3209
3210         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3211                                             sizeof(*entry), trace_ctx);
3212         if (!event)
3213                 return;
3214
3215         delta = ring_buffer_event_time_stamp(buffer, event) -
3216                 last_info->ts_last_call;
3217
3218         entry = ring_buffer_event_data(event);
3219         entry->ip = last_info->ip;
3220         entry->parent_ip = last_info->parent_ip;
3221         entry->count = last_info->count;
3222         func_repeats_set_delta_ts(entry, delta);
3223
3224         __buffer_unlock_commit(buffer, event);
3225 }
3226
3227 /* created for use with alloc_percpu */
3228 struct trace_buffer_struct {
3229         int nesting;
3230         char buffer[4][TRACE_BUF_SIZE];
3231 };
3232
3233 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3234
3235 /*
3236  * This allows for lockless recording.  If we're nested too deeply, then
3237  * this returns NULL.
3238  */
3239 static char *get_trace_buf(void)
3240 {
3241         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3242
3243         if (!trace_percpu_buffer || buffer->nesting >= 4)
3244                 return NULL;
3245
3246         buffer->nesting++;
3247
3248         /* Interrupts must see nesting incremented before we use the buffer */
3249         barrier();
3250         return &buffer->buffer[buffer->nesting - 1][0];
3251 }
3252
3253 static void put_trace_buf(void)
3254 {
3255         /* Don't let the decrement of nesting leak before this */
3256         barrier();
3257         this_cpu_dec(trace_percpu_buffer->nesting);
3258 }
3259
3260 static int alloc_percpu_trace_buffer(void)
3261 {
3262         struct trace_buffer_struct __percpu *buffers;
3263
3264         if (trace_percpu_buffer)
3265                 return 0;
3266
3267         buffers = alloc_percpu(struct trace_buffer_struct);
3268         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3269                 return -ENOMEM;
3270
3271         trace_percpu_buffer = buffers;
3272         return 0;
3273 }
3274
3275 static int buffers_allocated;
3276
3277 void trace_printk_init_buffers(void)
3278 {
3279         if (buffers_allocated)
3280                 return;
3281
3282         if (alloc_percpu_trace_buffer())
3283                 return;
3284
3285         /* trace_printk() is for debug use only. Don't use it in production. */
3286
3287         pr_warn("\n");
3288         pr_warn("**********************************************************\n");
3289         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3290         pr_warn("**                                                      **\n");
3291         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3292         pr_warn("**                                                      **\n");
3293         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3294         pr_warn("** unsafe for production use.                           **\n");
3295         pr_warn("**                                                      **\n");
3296         pr_warn("** If you see this message and you are not debugging    **\n");
3297         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3298         pr_warn("**                                                      **\n");
3299         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3300         pr_warn("**********************************************************\n");
3301
3302         /* Expand the buffers to set size */
3303         tracing_update_buffers();
3304
3305         buffers_allocated = 1;
3306
3307         /*
3308          * trace_printk_init_buffers() can be called by modules.
3309          * If that happens, then we need to start cmdline recording
3310          * directly here. If the global_trace.buffer is already
3311          * allocated here, then this was called by module code.
3312          */
3313         if (global_trace.array_buffer.buffer)
3314                 tracing_start_cmdline_record();
3315 }
3316 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3317
3318 void trace_printk_start_comm(void)
3319 {
3320         /* Start tracing comms if trace printk is set */
3321         if (!buffers_allocated)
3322                 return;
3323         tracing_start_cmdline_record();
3324 }
3325
3326 static void trace_printk_start_stop_comm(int enabled)
3327 {
3328         if (!buffers_allocated)
3329                 return;
3330
3331         if (enabled)
3332                 tracing_start_cmdline_record();
3333         else
3334                 tracing_stop_cmdline_record();
3335 }
3336
3337 /**
3338  * trace_vbprintk - write binary msg to tracing buffer
3339  * @ip:    The address of the caller
3340  * @fmt:   The string format to write to the buffer
3341  * @args:  Arguments for @fmt
3342  */
3343 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3344 {
3345         struct trace_event_call *call = &event_bprint;
3346         struct ring_buffer_event *event;
3347         struct trace_buffer *buffer;
3348         struct trace_array *tr = &global_trace;
3349         struct bprint_entry *entry;
3350         unsigned int trace_ctx;
3351         char *tbuffer;
3352         int len = 0, size;
3353
3354         if (unlikely(tracing_selftest_running || tracing_disabled))
3355                 return 0;
3356
3357         /* Don't pollute graph traces with trace_vprintk internals */
3358         pause_graph_tracing();
3359
3360         trace_ctx = tracing_gen_ctx();
3361         preempt_disable_notrace();
3362
3363         tbuffer = get_trace_buf();
3364         if (!tbuffer) {
3365                 len = 0;
3366                 goto out_nobuffer;
3367         }
3368
3369         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3370
3371         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3372                 goto out_put;
3373
3374         size = sizeof(*entry) + sizeof(u32) * len;
3375         buffer = tr->array_buffer.buffer;
3376         ring_buffer_nest_start(buffer);
3377         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3378                                             trace_ctx);
3379         if (!event)
3380                 goto out;
3381         entry = ring_buffer_event_data(event);
3382         entry->ip                       = ip;
3383         entry->fmt                      = fmt;
3384
3385         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3386         if (!call_filter_check_discard(call, entry, buffer, event)) {
3387                 __buffer_unlock_commit(buffer, event);
3388                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3389         }
3390
3391 out:
3392         ring_buffer_nest_end(buffer);
3393 out_put:
3394         put_trace_buf();
3395
3396 out_nobuffer:
3397         preempt_enable_notrace();
3398         unpause_graph_tracing();
3399
3400         return len;
3401 }
3402 EXPORT_SYMBOL_GPL(trace_vbprintk);
3403
3404 __printf(3, 0)
3405 static int
3406 __trace_array_vprintk(struct trace_buffer *buffer,
3407                       unsigned long ip, const char *fmt, va_list args)
3408 {
3409         struct trace_event_call *call = &event_print;
3410         struct ring_buffer_event *event;
3411         int len = 0, size;
3412         struct print_entry *entry;
3413         unsigned int trace_ctx;
3414         char *tbuffer;
3415
3416         if (tracing_disabled || tracing_selftest_running)
3417                 return 0;
3418
3419         /* Don't pollute graph traces with trace_vprintk internals */
3420         pause_graph_tracing();
3421
3422         trace_ctx = tracing_gen_ctx();
3423         preempt_disable_notrace();
3424
3425
3426         tbuffer = get_trace_buf();
3427         if (!tbuffer) {
3428                 len = 0;
3429                 goto out_nobuffer;
3430         }
3431
3432         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3433
3434         size = sizeof(*entry) + len + 1;
3435         ring_buffer_nest_start(buffer);
3436         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3437                                             trace_ctx);
3438         if (!event)
3439                 goto out;
3440         entry = ring_buffer_event_data(event);
3441         entry->ip = ip;
3442
3443         memcpy(&entry->buf, tbuffer, len + 1);
3444         if (!call_filter_check_discard(call, entry, buffer, event)) {
3445                 __buffer_unlock_commit(buffer, event);
3446                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3447         }
3448
3449 out:
3450         ring_buffer_nest_end(buffer);
3451         put_trace_buf();
3452
3453 out_nobuffer:
3454         preempt_enable_notrace();
3455         unpause_graph_tracing();
3456
3457         return len;
3458 }
3459
3460 __printf(3, 0)
3461 int trace_array_vprintk(struct trace_array *tr,
3462                         unsigned long ip, const char *fmt, va_list args)
3463 {
3464         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3465 }
3466
3467 /**
3468  * trace_array_printk - Print a message to a specific instance
3469  * @tr: The instance trace_array descriptor
3470  * @ip: The instruction pointer that this is called from.
3471  * @fmt: The format to print (printf format)
3472  *
3473  * If a subsystem sets up its own instance, they have the right to
3474  * printk strings into their tracing instance buffer using this
3475  * function. Note, this function will not write into the top level
3476  * buffer (use trace_printk() for that), as writing into the top level
3477  * buffer should only have events that can be individually disabled.
3478  * trace_printk() is only used for debugging a kernel, and should not
3479  * be ever incorporated in normal use.
3480  *
3481  * trace_array_printk() can be used, as it will not add noise to the
3482  * top level tracing buffer.
3483  *
3484  * Note, trace_array_init_printk() must be called on @tr before this
3485  * can be used.
3486  */
3487 __printf(3, 0)
3488 int trace_array_printk(struct trace_array *tr,
3489                        unsigned long ip, const char *fmt, ...)
3490 {
3491         int ret;
3492         va_list ap;
3493
3494         if (!tr)
3495                 return -ENOENT;
3496
3497         /* This is only allowed for created instances */
3498         if (tr == &global_trace)
3499                 return 0;
3500
3501         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3502                 return 0;
3503
3504         va_start(ap, fmt);
3505         ret = trace_array_vprintk(tr, ip, fmt, ap);
3506         va_end(ap);
3507         return ret;
3508 }
3509 EXPORT_SYMBOL_GPL(trace_array_printk);
3510
3511 /**
3512  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3513  * @tr: The trace array to initialize the buffers for
3514  *
3515  * As trace_array_printk() only writes into instances, they are OK to
3516  * have in the kernel (unlike trace_printk()). This needs to be called
3517  * before trace_array_printk() can be used on a trace_array.
3518  */
3519 int trace_array_init_printk(struct trace_array *tr)
3520 {
3521         if (!tr)
3522                 return -ENOENT;
3523
3524         /* This is only allowed for created instances */
3525         if (tr == &global_trace)
3526                 return -EINVAL;
3527
3528         return alloc_percpu_trace_buffer();
3529 }
3530 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3531
3532 __printf(3, 4)
3533 int trace_array_printk_buf(struct trace_buffer *buffer,
3534                            unsigned long ip, const char *fmt, ...)
3535 {
3536         int ret;
3537         va_list ap;
3538
3539         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3540                 return 0;
3541
3542         va_start(ap, fmt);
3543         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3544         va_end(ap);
3545         return ret;
3546 }
3547
3548 __printf(2, 0)
3549 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3550 {
3551         return trace_array_vprintk(&global_trace, ip, fmt, args);
3552 }
3553 EXPORT_SYMBOL_GPL(trace_vprintk);
3554
3555 static void trace_iterator_increment(struct trace_iterator *iter)
3556 {
3557         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3558
3559         iter->idx++;
3560         if (buf_iter)
3561                 ring_buffer_iter_advance(buf_iter);
3562 }
3563
3564 static struct trace_entry *
3565 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3566                 unsigned long *lost_events)
3567 {
3568         struct ring_buffer_event *event;
3569         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3570
3571         if (buf_iter) {
3572                 event = ring_buffer_iter_peek(buf_iter, ts);
3573                 if (lost_events)
3574                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3575                                 (unsigned long)-1 : 0;
3576         } else {
3577                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3578                                          lost_events);
3579         }
3580
3581         if (event) {
3582                 iter->ent_size = ring_buffer_event_length(event);
3583                 return ring_buffer_event_data(event);
3584         }
3585         iter->ent_size = 0;
3586         return NULL;
3587 }
3588
3589 static struct trace_entry *
3590 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3591                   unsigned long *missing_events, u64 *ent_ts)
3592 {
3593         struct trace_buffer *buffer = iter->array_buffer->buffer;
3594         struct trace_entry *ent, *next = NULL;
3595         unsigned long lost_events = 0, next_lost = 0;
3596         int cpu_file = iter->cpu_file;
3597         u64 next_ts = 0, ts;
3598         int next_cpu = -1;
3599         int next_size = 0;
3600         int cpu;
3601
3602         /*
3603          * If we are in a per_cpu trace file, don't bother by iterating over
3604          * all cpu and peek directly.
3605          */
3606         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3607                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3608                         return NULL;
3609                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3610                 if (ent_cpu)
3611                         *ent_cpu = cpu_file;
3612
3613                 return ent;
3614         }
3615
3616         for_each_tracing_cpu(cpu) {
3617
3618                 if (ring_buffer_empty_cpu(buffer, cpu))
3619                         continue;
3620
3621                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3622
3623                 /*
3624                  * Pick the entry with the smallest timestamp:
3625                  */
3626                 if (ent && (!next || ts < next_ts)) {
3627                         next = ent;
3628                         next_cpu = cpu;
3629                         next_ts = ts;
3630                         next_lost = lost_events;
3631                         next_size = iter->ent_size;
3632                 }
3633         }
3634
3635         iter->ent_size = next_size;
3636
3637         if (ent_cpu)
3638                 *ent_cpu = next_cpu;
3639
3640         if (ent_ts)
3641                 *ent_ts = next_ts;
3642
3643         if (missing_events)
3644                 *missing_events = next_lost;
3645
3646         return next;
3647 }
3648
3649 #define STATIC_FMT_BUF_SIZE     128
3650 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3651
3652 static char *trace_iter_expand_format(struct trace_iterator *iter)
3653 {
3654         char *tmp;
3655
3656         /*
3657          * iter->tr is NULL when used with tp_printk, which makes
3658          * this get called where it is not safe to call krealloc().
3659          */
3660         if (!iter->tr || iter->fmt == static_fmt_buf)
3661                 return NULL;
3662
3663         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3664                        GFP_KERNEL);
3665         if (tmp) {
3666                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3667                 iter->fmt = tmp;
3668         }
3669
3670         return tmp;
3671 }
3672
3673 /* Returns true if the string is safe to dereference from an event */
3674 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3675                            bool star, int len)
3676 {
3677         unsigned long addr = (unsigned long)str;
3678         struct trace_event *trace_event;
3679         struct trace_event_call *event;
3680
3681         /* Ignore strings with no length */
3682         if (star && !len)
3683                 return true;
3684
3685         /* OK if part of the event data */
3686         if ((addr >= (unsigned long)iter->ent) &&
3687             (addr < (unsigned long)iter->ent + iter->ent_size))
3688                 return true;
3689
3690         /* OK if part of the temp seq buffer */
3691         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3692             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3693                 return true;
3694
3695         /* Core rodata can not be freed */
3696         if (is_kernel_rodata(addr))
3697                 return true;
3698
3699         if (trace_is_tracepoint_string(str))
3700                 return true;
3701
3702         /*
3703          * Now this could be a module event, referencing core module
3704          * data, which is OK.
3705          */
3706         if (!iter->ent)
3707                 return false;
3708
3709         trace_event = ftrace_find_event(iter->ent->type);
3710         if (!trace_event)
3711                 return false;
3712
3713         event = container_of(trace_event, struct trace_event_call, event);
3714         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3715                 return false;
3716
3717         /* Would rather have rodata, but this will suffice */
3718         if (within_module_core(addr, event->module))
3719                 return true;
3720
3721         return false;
3722 }
3723
3724 static const char *show_buffer(struct trace_seq *s)
3725 {
3726         struct seq_buf *seq = &s->seq;
3727
3728         seq_buf_terminate(seq);
3729
3730         return seq->buffer;
3731 }
3732
3733 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3734
3735 static int test_can_verify_check(const char *fmt, ...)
3736 {
3737         char buf[16];
3738         va_list ap;
3739         int ret;
3740
3741         /*
3742          * The verifier is dependent on vsnprintf() modifies the va_list
3743          * passed to it, where it is sent as a reference. Some architectures
3744          * (like x86_32) passes it by value, which means that vsnprintf()
3745          * does not modify the va_list passed to it, and the verifier
3746          * would then need to be able to understand all the values that
3747          * vsnprintf can use. If it is passed by value, then the verifier
3748          * is disabled.
3749          */
3750         va_start(ap, fmt);
3751         vsnprintf(buf, 16, "%d", ap);
3752         ret = va_arg(ap, int);
3753         va_end(ap);
3754
3755         return ret;
3756 }
3757
3758 static void test_can_verify(void)
3759 {
3760         if (!test_can_verify_check("%d %d", 0, 1)) {
3761                 pr_info("trace event string verifier disabled\n");
3762                 static_branch_inc(&trace_no_verify);
3763         }
3764 }
3765
3766 /**
3767  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3768  * @iter: The iterator that holds the seq buffer and the event being printed
3769  * @fmt: The format used to print the event
3770  * @ap: The va_list holding the data to print from @fmt.
3771  *
3772  * This writes the data into the @iter->seq buffer using the data from
3773  * @fmt and @ap. If the format has a %s, then the source of the string
3774  * is examined to make sure it is safe to print, otherwise it will
3775  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3776  * pointer.
3777  */
3778 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3779                          va_list ap)
3780 {
3781         const char *p = fmt;
3782         const char *str;
3783         int i, j;
3784
3785         if (WARN_ON_ONCE(!fmt))
3786                 return;
3787
3788         if (static_branch_unlikely(&trace_no_verify))
3789                 goto print;
3790
3791         /* Don't bother checking when doing a ftrace_dump() */
3792         if (iter->fmt == static_fmt_buf)
3793                 goto print;
3794
3795         while (*p) {
3796                 bool star = false;
3797                 int len = 0;
3798
3799                 j = 0;
3800
3801                 /* We only care about %s and variants */
3802                 for (i = 0; p[i]; i++) {
3803                         if (i + 1 >= iter->fmt_size) {
3804                                 /*
3805                                  * If we can't expand the copy buffer,
3806                                  * just print it.
3807                                  */
3808                                 if (!trace_iter_expand_format(iter))
3809                                         goto print;
3810                         }
3811
3812                         if (p[i] == '\\' && p[i+1]) {
3813                                 i++;
3814                                 continue;
3815                         }
3816                         if (p[i] == '%') {
3817                                 /* Need to test cases like %08.*s */
3818                                 for (j = 1; p[i+j]; j++) {
3819                                         if (isdigit(p[i+j]) ||
3820                                             p[i+j] == '.')
3821                                                 continue;
3822                                         if (p[i+j] == '*') {
3823                                                 star = true;
3824                                                 continue;
3825                                         }
3826                                         break;
3827                                 }
3828                                 if (p[i+j] == 's')
3829                                         break;
3830                                 star = false;
3831                         }
3832                         j = 0;
3833                 }
3834                 /* If no %s found then just print normally */
3835                 if (!p[i])
3836                         break;
3837
3838                 /* Copy up to the %s, and print that */
3839                 strncpy(iter->fmt, p, i);
3840                 iter->fmt[i] = '\0';
3841                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3842
3843                 /*
3844                  * If iter->seq is full, the above call no longer guarantees
3845                  * that ap is in sync with fmt processing, and further calls
3846                  * to va_arg() can return wrong positional arguments.
3847                  *
3848                  * Ensure that ap is no longer used in this case.
3849                  */
3850                 if (iter->seq.full) {
3851                         p = "";
3852                         break;
3853                 }
3854
3855                 if (star)
3856                         len = va_arg(ap, int);
3857
3858                 /* The ap now points to the string data of the %s */
3859                 str = va_arg(ap, const char *);
3860
3861                 /*
3862                  * If you hit this warning, it is likely that the
3863                  * trace event in question used %s on a string that
3864                  * was saved at the time of the event, but may not be
3865                  * around when the trace is read. Use __string(),
3866                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3867                  * instead. See samples/trace_events/trace-events-sample.h
3868                  * for reference.
3869                  */
3870                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3871                               "fmt: '%s' current_buffer: '%s'",
3872                               fmt, show_buffer(&iter->seq))) {
3873                         int ret;
3874
3875                         /* Try to safely read the string */
3876                         if (star) {
3877                                 if (len + 1 > iter->fmt_size)
3878                                         len = iter->fmt_size - 1;
3879                                 if (len < 0)
3880                                         len = 0;
3881                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3882                                 iter->fmt[len] = 0;
3883                                 star = false;
3884                         } else {
3885                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3886                                                                   iter->fmt_size);
3887                         }
3888                         if (ret < 0)
3889                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3890                         else
3891                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3892                                                  str, iter->fmt);
3893                         str = "[UNSAFE-MEMORY]";
3894                         strcpy(iter->fmt, "%s");
3895                 } else {
3896                         strncpy(iter->fmt, p + i, j + 1);
3897                         iter->fmt[j+1] = '\0';
3898                 }
3899                 if (star)
3900                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3901                 else
3902                         trace_seq_printf(&iter->seq, iter->fmt, str);
3903
3904                 p += i + j + 1;
3905         }
3906  print:
3907         if (*p)
3908                 trace_seq_vprintf(&iter->seq, p, ap);
3909 }
3910
3911 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3912 {
3913         const char *p, *new_fmt;
3914         char *q;
3915
3916         if (WARN_ON_ONCE(!fmt))
3917                 return fmt;
3918
3919         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3920                 return fmt;
3921
3922         p = fmt;
3923         new_fmt = q = iter->fmt;
3924         while (*p) {
3925                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3926                         if (!trace_iter_expand_format(iter))
3927                                 return fmt;
3928
3929                         q += iter->fmt - new_fmt;
3930                         new_fmt = iter->fmt;
3931                 }
3932
3933                 *q++ = *p++;
3934
3935                 /* Replace %p with %px */
3936                 if (p[-1] == '%') {
3937                         if (p[0] == '%') {
3938                                 *q++ = *p++;
3939                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3940                                 *q++ = *p++;
3941                                 *q++ = 'x';
3942                         }
3943                 }
3944         }
3945         *q = '\0';
3946
3947         return new_fmt;
3948 }
3949
3950 #define STATIC_TEMP_BUF_SIZE    128
3951 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3952
3953 /* Find the next real entry, without updating the iterator itself */
3954 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3955                                           int *ent_cpu, u64 *ent_ts)
3956 {
3957         /* __find_next_entry will reset ent_size */
3958         int ent_size = iter->ent_size;
3959         struct trace_entry *entry;
3960
3961         /*
3962          * If called from ftrace_dump(), then the iter->temp buffer
3963          * will be the static_temp_buf and not created from kmalloc.
3964          * If the entry size is greater than the buffer, we can
3965          * not save it. Just return NULL in that case. This is only
3966          * used to add markers when two consecutive events' time
3967          * stamps have a large delta. See trace_print_lat_context()
3968          */
3969         if (iter->temp == static_temp_buf &&
3970             STATIC_TEMP_BUF_SIZE < ent_size)
3971                 return NULL;
3972
3973         /*
3974          * The __find_next_entry() may call peek_next_entry(), which may
3975          * call ring_buffer_peek() that may make the contents of iter->ent
3976          * undefined. Need to copy iter->ent now.
3977          */
3978         if (iter->ent && iter->ent != iter->temp) {
3979                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3980                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3981                         void *temp;
3982                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3983                         if (!temp)
3984                                 return NULL;
3985                         kfree(iter->temp);
3986                         iter->temp = temp;
3987                         iter->temp_size = iter->ent_size;
3988                 }
3989                 memcpy(iter->temp, iter->ent, iter->ent_size);
3990                 iter->ent = iter->temp;
3991         }
3992         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3993         /* Put back the original ent_size */
3994         iter->ent_size = ent_size;
3995
3996         return entry;
3997 }
3998
3999 /* Find the next real entry, and increment the iterator to the next entry */
4000 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4001 {
4002         iter->ent = __find_next_entry(iter, &iter->cpu,
4003                                       &iter->lost_events, &iter->ts);
4004
4005         if (iter->ent)
4006                 trace_iterator_increment(iter);
4007
4008         return iter->ent ? iter : NULL;
4009 }
4010
4011 static void trace_consume(struct trace_iterator *iter)
4012 {
4013         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4014                             &iter->lost_events);
4015 }
4016
4017 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4018 {
4019         struct trace_iterator *iter = m->private;
4020         int i = (int)*pos;
4021         void *ent;
4022
4023         WARN_ON_ONCE(iter->leftover);
4024
4025         (*pos)++;
4026
4027         /* can't go backwards */
4028         if (iter->idx > i)
4029                 return NULL;
4030
4031         if (iter->idx < 0)
4032                 ent = trace_find_next_entry_inc(iter);
4033         else
4034                 ent = iter;
4035
4036         while (ent && iter->idx < i)
4037                 ent = trace_find_next_entry_inc(iter);
4038
4039         iter->pos = *pos;
4040
4041         return ent;
4042 }
4043
4044 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4045 {
4046         struct ring_buffer_iter *buf_iter;
4047         unsigned long entries = 0;
4048         u64 ts;
4049
4050         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4051
4052         buf_iter = trace_buffer_iter(iter, cpu);
4053         if (!buf_iter)
4054                 return;
4055
4056         ring_buffer_iter_reset(buf_iter);
4057
4058         /*
4059          * We could have the case with the max latency tracers
4060          * that a reset never took place on a cpu. This is evident
4061          * by the timestamp being before the start of the buffer.
4062          */
4063         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4064                 if (ts >= iter->array_buffer->time_start)
4065                         break;
4066                 entries++;
4067                 ring_buffer_iter_advance(buf_iter);
4068         }
4069
4070         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4071 }
4072
4073 /*
4074  * The current tracer is copied to avoid a global locking
4075  * all around.
4076  */
4077 static void *s_start(struct seq_file *m, loff_t *pos)
4078 {
4079         struct trace_iterator *iter = m->private;
4080         struct trace_array *tr = iter->tr;
4081         int cpu_file = iter->cpu_file;
4082         void *p = NULL;
4083         loff_t l = 0;
4084         int cpu;
4085
4086         /*
4087          * copy the tracer to avoid using a global lock all around.
4088          * iter->trace is a copy of current_trace, the pointer to the
4089          * name may be used instead of a strcmp(), as iter->trace->name
4090          * will point to the same string as current_trace->name.
4091          */
4092         mutex_lock(&trace_types_lock);
4093         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4094                 *iter->trace = *tr->current_trace;
4095         mutex_unlock(&trace_types_lock);
4096
4097 #ifdef CONFIG_TRACER_MAX_TRACE
4098         if (iter->snapshot && iter->trace->use_max_tr)
4099                 return ERR_PTR(-EBUSY);
4100 #endif
4101
4102         if (*pos != iter->pos) {
4103                 iter->ent = NULL;
4104                 iter->cpu = 0;
4105                 iter->idx = -1;
4106
4107                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4108                         for_each_tracing_cpu(cpu)
4109                                 tracing_iter_reset(iter, cpu);
4110                 } else
4111                         tracing_iter_reset(iter, cpu_file);
4112
4113                 iter->leftover = 0;
4114                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4115                         ;
4116
4117         } else {
4118                 /*
4119                  * If we overflowed the seq_file before, then we want
4120                  * to just reuse the trace_seq buffer again.
4121                  */
4122                 if (iter->leftover)
4123                         p = iter;
4124                 else {
4125                         l = *pos - 1;
4126                         p = s_next(m, p, &l);
4127                 }
4128         }
4129
4130         trace_event_read_lock();
4131         trace_access_lock(cpu_file);
4132         return p;
4133 }
4134
4135 static void s_stop(struct seq_file *m, void *p)
4136 {
4137         struct trace_iterator *iter = m->private;
4138
4139 #ifdef CONFIG_TRACER_MAX_TRACE
4140         if (iter->snapshot && iter->trace->use_max_tr)
4141                 return;
4142 #endif
4143
4144         trace_access_unlock(iter->cpu_file);
4145         trace_event_read_unlock();
4146 }
4147
4148 static void
4149 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4150                       unsigned long *entries, int cpu)
4151 {
4152         unsigned long count;
4153
4154         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4155         /*
4156          * If this buffer has skipped entries, then we hold all
4157          * entries for the trace and we need to ignore the
4158          * ones before the time stamp.
4159          */
4160         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4161                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4162                 /* total is the same as the entries */
4163                 *total = count;
4164         } else
4165                 *total = count +
4166                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4167         *entries = count;
4168 }
4169
4170 static void
4171 get_total_entries(struct array_buffer *buf,
4172                   unsigned long *total, unsigned long *entries)
4173 {
4174         unsigned long t, e;
4175         int cpu;
4176
4177         *total = 0;
4178         *entries = 0;
4179
4180         for_each_tracing_cpu(cpu) {
4181                 get_total_entries_cpu(buf, &t, &e, cpu);
4182                 *total += t;
4183                 *entries += e;
4184         }
4185 }
4186
4187 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4188 {
4189         unsigned long total, entries;
4190
4191         if (!tr)
4192                 tr = &global_trace;
4193
4194         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4195
4196         return entries;
4197 }
4198
4199 unsigned long trace_total_entries(struct trace_array *tr)
4200 {
4201         unsigned long total, entries;
4202
4203         if (!tr)
4204                 tr = &global_trace;
4205
4206         get_total_entries(&tr->array_buffer, &total, &entries);
4207
4208         return entries;
4209 }
4210
4211 static void print_lat_help_header(struct seq_file *m)
4212 {
4213         seq_puts(m, "#                    _------=> CPU#            \n"
4214                     "#                   / _-----=> irqs-off        \n"
4215                     "#                  | / _----=> need-resched    \n"
4216                     "#                  || / _---=> hardirq/softirq \n"
4217                     "#                  ||| / _--=> preempt-depth   \n"
4218                     "#                  |||| / _-=> migrate-disable \n"
4219                     "#                  ||||| /     delay           \n"
4220                     "#  cmd     pid     |||||| time  |   caller     \n"
4221                     "#     \\   /        ||||||  \\    |    /       \n");
4222 }
4223
4224 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4225 {
4226         unsigned long total;
4227         unsigned long entries;
4228
4229         get_total_entries(buf, &total, &entries);
4230         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4231                    entries, total, num_online_cpus());
4232         seq_puts(m, "#\n");
4233 }
4234
4235 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4236                                    unsigned int flags)
4237 {
4238         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4239
4240         print_event_info(buf, m);
4241
4242         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4243         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4244 }
4245
4246 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4247                                        unsigned int flags)
4248 {
4249         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4250         const char *space = "            ";
4251         int prec = tgid ? 12 : 2;
4252
4253         print_event_info(buf, m);
4254
4255         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4256         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4257         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4258         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4259         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4260         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4261         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4262         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4263 }
4264
4265 void
4266 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4267 {
4268         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4269         struct array_buffer *buf = iter->array_buffer;
4270         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4271         struct tracer *type = iter->trace;
4272         unsigned long entries;
4273         unsigned long total;
4274         const char *name = "preemption";
4275
4276         name = type->name;
4277
4278         get_total_entries(buf, &total, &entries);
4279
4280         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4281                    name, UTS_RELEASE);
4282         seq_puts(m, "# -----------------------------------"
4283                  "---------------------------------\n");
4284         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4285                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4286                    nsecs_to_usecs(data->saved_latency),
4287                    entries,
4288                    total,
4289                    buf->cpu,
4290 #if defined(CONFIG_PREEMPT_NONE)
4291                    "server",
4292 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4293                    "desktop",
4294 #elif defined(CONFIG_PREEMPT)
4295                    "preempt",
4296 #elif defined(CONFIG_PREEMPT_RT)
4297                    "preempt_rt",
4298 #else
4299                    "unknown",
4300 #endif
4301                    /* These are reserved for later use */
4302                    0, 0, 0, 0);
4303 #ifdef CONFIG_SMP
4304         seq_printf(m, " #P:%d)\n", num_online_cpus());
4305 #else
4306         seq_puts(m, ")\n");
4307 #endif
4308         seq_puts(m, "#    -----------------\n");
4309         seq_printf(m, "#    | task: %.16s-%d "
4310                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4311                    data->comm, data->pid,
4312                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4313                    data->policy, data->rt_priority);
4314         seq_puts(m, "#    -----------------\n");
4315
4316         if (data->critical_start) {
4317                 seq_puts(m, "#  => started at: ");
4318                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4319                 trace_print_seq(m, &iter->seq);
4320                 seq_puts(m, "\n#  => ended at:   ");
4321                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4322                 trace_print_seq(m, &iter->seq);
4323                 seq_puts(m, "\n#\n");
4324         }
4325
4326         seq_puts(m, "#\n");
4327 }
4328
4329 static void test_cpu_buff_start(struct trace_iterator *iter)
4330 {
4331         struct trace_seq *s = &iter->seq;
4332         struct trace_array *tr = iter->tr;
4333
4334         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4335                 return;
4336
4337         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4338                 return;
4339
4340         if (cpumask_available(iter->started) &&
4341             cpumask_test_cpu(iter->cpu, iter->started))
4342                 return;
4343
4344         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4345                 return;
4346
4347         if (cpumask_available(iter->started))
4348                 cpumask_set_cpu(iter->cpu, iter->started);
4349
4350         /* Don't print started cpu buffer for the first entry of the trace */
4351         if (iter->idx > 1)
4352                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4353                                 iter->cpu);
4354 }
4355
4356 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4357 {
4358         struct trace_array *tr = iter->tr;
4359         struct trace_seq *s = &iter->seq;
4360         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4361         struct trace_entry *entry;
4362         struct trace_event *event;
4363
4364         entry = iter->ent;
4365
4366         test_cpu_buff_start(iter);
4367
4368         event = ftrace_find_event(entry->type);
4369
4370         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4371                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4372                         trace_print_lat_context(iter);
4373                 else
4374                         trace_print_context(iter);
4375         }
4376
4377         if (trace_seq_has_overflowed(s))
4378                 return TRACE_TYPE_PARTIAL_LINE;
4379
4380         if (event)
4381                 return event->funcs->trace(iter, sym_flags, event);
4382
4383         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4384
4385         return trace_handle_return(s);
4386 }
4387
4388 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4389 {
4390         struct trace_array *tr = iter->tr;
4391         struct trace_seq *s = &iter->seq;
4392         struct trace_entry *entry;
4393         struct trace_event *event;
4394
4395         entry = iter->ent;
4396
4397         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4398                 trace_seq_printf(s, "%d %d %llu ",
4399                                  entry->pid, iter->cpu, iter->ts);
4400
4401         if (trace_seq_has_overflowed(s))
4402                 return TRACE_TYPE_PARTIAL_LINE;
4403
4404         event = ftrace_find_event(entry->type);
4405         if (event)
4406                 return event->funcs->raw(iter, 0, event);
4407
4408         trace_seq_printf(s, "%d ?\n", entry->type);
4409
4410         return trace_handle_return(s);
4411 }
4412
4413 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4414 {
4415         struct trace_array *tr = iter->tr;
4416         struct trace_seq *s = &iter->seq;
4417         unsigned char newline = '\n';
4418         struct trace_entry *entry;
4419         struct trace_event *event;
4420
4421         entry = iter->ent;
4422
4423         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4424                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4425                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4426                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4427                 if (trace_seq_has_overflowed(s))
4428                         return TRACE_TYPE_PARTIAL_LINE;
4429         }
4430
4431         event = ftrace_find_event(entry->type);
4432         if (event) {
4433                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4434                 if (ret != TRACE_TYPE_HANDLED)
4435                         return ret;
4436         }
4437
4438         SEQ_PUT_FIELD(s, newline);
4439
4440         return trace_handle_return(s);
4441 }
4442
4443 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4444 {
4445         struct trace_array *tr = iter->tr;
4446         struct trace_seq *s = &iter->seq;
4447         struct trace_entry *entry;
4448         struct trace_event *event;
4449
4450         entry = iter->ent;
4451
4452         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4453                 SEQ_PUT_FIELD(s, entry->pid);
4454                 SEQ_PUT_FIELD(s, iter->cpu);
4455                 SEQ_PUT_FIELD(s, iter->ts);
4456                 if (trace_seq_has_overflowed(s))
4457                         return TRACE_TYPE_PARTIAL_LINE;
4458         }
4459
4460         event = ftrace_find_event(entry->type);
4461         return event ? event->funcs->binary(iter, 0, event) :
4462                 TRACE_TYPE_HANDLED;
4463 }
4464
4465 int trace_empty(struct trace_iterator *iter)
4466 {
4467         struct ring_buffer_iter *buf_iter;
4468         int cpu;
4469
4470         /* If we are looking at one CPU buffer, only check that one */
4471         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4472                 cpu = iter->cpu_file;
4473                 buf_iter = trace_buffer_iter(iter, cpu);
4474                 if (buf_iter) {
4475                         if (!ring_buffer_iter_empty(buf_iter))
4476                                 return 0;
4477                 } else {
4478                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4479                                 return 0;
4480                 }
4481                 return 1;
4482         }
4483
4484         for_each_tracing_cpu(cpu) {
4485                 buf_iter = trace_buffer_iter(iter, cpu);
4486                 if (buf_iter) {
4487                         if (!ring_buffer_iter_empty(buf_iter))
4488                                 return 0;
4489                 } else {
4490                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4491                                 return 0;
4492                 }
4493         }
4494
4495         return 1;
4496 }
4497
4498 /*  Called with trace_event_read_lock() held. */
4499 enum print_line_t print_trace_line(struct trace_iterator *iter)
4500 {
4501         struct trace_array *tr = iter->tr;
4502         unsigned long trace_flags = tr->trace_flags;
4503         enum print_line_t ret;
4504
4505         if (iter->lost_events) {
4506                 if (iter->lost_events == (unsigned long)-1)
4507                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4508                                          iter->cpu);
4509                 else
4510                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4511                                          iter->cpu, iter->lost_events);
4512                 if (trace_seq_has_overflowed(&iter->seq))
4513                         return TRACE_TYPE_PARTIAL_LINE;
4514         }
4515
4516         if (iter->trace && iter->trace->print_line) {
4517                 ret = iter->trace->print_line(iter);
4518                 if (ret != TRACE_TYPE_UNHANDLED)
4519                         return ret;
4520         }
4521
4522         if (iter->ent->type == TRACE_BPUTS &&
4523                         trace_flags & TRACE_ITER_PRINTK &&
4524                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4525                 return trace_print_bputs_msg_only(iter);
4526
4527         if (iter->ent->type == TRACE_BPRINT &&
4528                         trace_flags & TRACE_ITER_PRINTK &&
4529                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4530                 return trace_print_bprintk_msg_only(iter);
4531
4532         if (iter->ent->type == TRACE_PRINT &&
4533                         trace_flags & TRACE_ITER_PRINTK &&
4534                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4535                 return trace_print_printk_msg_only(iter);
4536
4537         if (trace_flags & TRACE_ITER_BIN)
4538                 return print_bin_fmt(iter);
4539
4540         if (trace_flags & TRACE_ITER_HEX)
4541                 return print_hex_fmt(iter);
4542
4543         if (trace_flags & TRACE_ITER_RAW)
4544                 return print_raw_fmt(iter);
4545
4546         return print_trace_fmt(iter);
4547 }
4548
4549 void trace_latency_header(struct seq_file *m)
4550 {
4551         struct trace_iterator *iter = m->private;
4552         struct trace_array *tr = iter->tr;
4553
4554         /* print nothing if the buffers are empty */
4555         if (trace_empty(iter))
4556                 return;
4557
4558         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4559                 print_trace_header(m, iter);
4560
4561         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4562                 print_lat_help_header(m);
4563 }
4564
4565 void trace_default_header(struct seq_file *m)
4566 {
4567         struct trace_iterator *iter = m->private;
4568         struct trace_array *tr = iter->tr;
4569         unsigned long trace_flags = tr->trace_flags;
4570
4571         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4572                 return;
4573
4574         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4575                 /* print nothing if the buffers are empty */
4576                 if (trace_empty(iter))
4577                         return;
4578                 print_trace_header(m, iter);
4579                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4580                         print_lat_help_header(m);
4581         } else {
4582                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4583                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4584                                 print_func_help_header_irq(iter->array_buffer,
4585                                                            m, trace_flags);
4586                         else
4587                                 print_func_help_header(iter->array_buffer, m,
4588                                                        trace_flags);
4589                 }
4590         }
4591 }
4592
4593 static void test_ftrace_alive(struct seq_file *m)
4594 {
4595         if (!ftrace_is_dead())
4596                 return;
4597         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4598                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4599 }
4600
4601 #ifdef CONFIG_TRACER_MAX_TRACE
4602 static void show_snapshot_main_help(struct seq_file *m)
4603 {
4604         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4605                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4606                     "#                      Takes a snapshot of the main buffer.\n"
4607                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4608                     "#                      (Doesn't have to be '2' works with any number that\n"
4609                     "#                       is not a '0' or '1')\n");
4610 }
4611
4612 static void show_snapshot_percpu_help(struct seq_file *m)
4613 {
4614         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4615 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4616         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4617                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4618 #else
4619         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4620                     "#                     Must use main snapshot file to allocate.\n");
4621 #endif
4622         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4623                     "#                      (Doesn't have to be '2' works with any number that\n"
4624                     "#                       is not a '0' or '1')\n");
4625 }
4626
4627 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4628 {
4629         if (iter->tr->allocated_snapshot)
4630                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4631         else
4632                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4633
4634         seq_puts(m, "# Snapshot commands:\n");
4635         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4636                 show_snapshot_main_help(m);
4637         else
4638                 show_snapshot_percpu_help(m);
4639 }
4640 #else
4641 /* Should never be called */
4642 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4643 #endif
4644
4645 static int s_show(struct seq_file *m, void *v)
4646 {
4647         struct trace_iterator *iter = v;
4648         int ret;
4649
4650         if (iter->ent == NULL) {
4651                 if (iter->tr) {
4652                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4653                         seq_puts(m, "#\n");
4654                         test_ftrace_alive(m);
4655                 }
4656                 if (iter->snapshot && trace_empty(iter))
4657                         print_snapshot_help(m, iter);
4658                 else if (iter->trace && iter->trace->print_header)
4659                         iter->trace->print_header(m);
4660                 else
4661                         trace_default_header(m);
4662
4663         } else if (iter->leftover) {
4664                 /*
4665                  * If we filled the seq_file buffer earlier, we
4666                  * want to just show it now.
4667                  */
4668                 ret = trace_print_seq(m, &iter->seq);
4669
4670                 /* ret should this time be zero, but you never know */
4671                 iter->leftover = ret;
4672
4673         } else {
4674                 print_trace_line(iter);
4675                 ret = trace_print_seq(m, &iter->seq);
4676                 /*
4677                  * If we overflow the seq_file buffer, then it will
4678                  * ask us for this data again at start up.
4679                  * Use that instead.
4680                  *  ret is 0 if seq_file write succeeded.
4681                  *        -1 otherwise.
4682                  */
4683                 iter->leftover = ret;
4684         }
4685
4686         return 0;
4687 }
4688
4689 /*
4690  * Should be used after trace_array_get(), trace_types_lock
4691  * ensures that i_cdev was already initialized.
4692  */
4693 static inline int tracing_get_cpu(struct inode *inode)
4694 {
4695         if (inode->i_cdev) /* See trace_create_cpu_file() */
4696                 return (long)inode->i_cdev - 1;
4697         return RING_BUFFER_ALL_CPUS;
4698 }
4699
4700 static const struct seq_operations tracer_seq_ops = {
4701         .start          = s_start,
4702         .next           = s_next,
4703         .stop           = s_stop,
4704         .show           = s_show,
4705 };
4706
4707 static struct trace_iterator *
4708 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4709 {
4710         struct trace_array *tr = inode->i_private;
4711         struct trace_iterator *iter;
4712         int cpu;
4713
4714         if (tracing_disabled)
4715                 return ERR_PTR(-ENODEV);
4716
4717         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4718         if (!iter)
4719                 return ERR_PTR(-ENOMEM);
4720
4721         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4722                                     GFP_KERNEL);
4723         if (!iter->buffer_iter)
4724                 goto release;
4725
4726         /*
4727          * trace_find_next_entry() may need to save off iter->ent.
4728          * It will place it into the iter->temp buffer. As most
4729          * events are less than 128, allocate a buffer of that size.
4730          * If one is greater, then trace_find_next_entry() will
4731          * allocate a new buffer to adjust for the bigger iter->ent.
4732          * It's not critical if it fails to get allocated here.
4733          */
4734         iter->temp = kmalloc(128, GFP_KERNEL);
4735         if (iter->temp)
4736                 iter->temp_size = 128;
4737
4738         /*
4739          * trace_event_printf() may need to modify given format
4740          * string to replace %p with %px so that it shows real address
4741          * instead of hash value. However, that is only for the event
4742          * tracing, other tracer may not need. Defer the allocation
4743          * until it is needed.
4744          */
4745         iter->fmt = NULL;
4746         iter->fmt_size = 0;
4747
4748         /*
4749          * We make a copy of the current tracer to avoid concurrent
4750          * changes on it while we are reading.
4751          */
4752         mutex_lock(&trace_types_lock);
4753         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4754         if (!iter->trace)
4755                 goto fail;
4756
4757         *iter->trace = *tr->current_trace;
4758
4759         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4760                 goto fail;
4761
4762         iter->tr = tr;
4763
4764 #ifdef CONFIG_TRACER_MAX_TRACE
4765         /* Currently only the top directory has a snapshot */
4766         if (tr->current_trace->print_max || snapshot)
4767                 iter->array_buffer = &tr->max_buffer;
4768         else
4769 #endif
4770                 iter->array_buffer = &tr->array_buffer;
4771         iter->snapshot = snapshot;
4772         iter->pos = -1;
4773         iter->cpu_file = tracing_get_cpu(inode);
4774         mutex_init(&iter->mutex);
4775
4776         /* Notify the tracer early; before we stop tracing. */
4777         if (iter->trace->open)
4778                 iter->trace->open(iter);
4779
4780         /* Annotate start of buffers if we had overruns */
4781         if (ring_buffer_overruns(iter->array_buffer->buffer))
4782                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4783
4784         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4785         if (trace_clocks[tr->clock_id].in_ns)
4786                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4787
4788         /*
4789          * If pause-on-trace is enabled, then stop the trace while
4790          * dumping, unless this is the "snapshot" file
4791          */
4792         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4793                 tracing_stop_tr(tr);
4794
4795         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4796                 for_each_tracing_cpu(cpu) {
4797                         iter->buffer_iter[cpu] =
4798                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4799                                                          cpu, GFP_KERNEL);
4800                 }
4801                 ring_buffer_read_prepare_sync();
4802                 for_each_tracing_cpu(cpu) {
4803                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4804                         tracing_iter_reset(iter, cpu);
4805                 }
4806         } else {
4807                 cpu = iter->cpu_file;
4808                 iter->buffer_iter[cpu] =
4809                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4810                                                  cpu, GFP_KERNEL);
4811                 ring_buffer_read_prepare_sync();
4812                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4813                 tracing_iter_reset(iter, cpu);
4814         }
4815
4816         mutex_unlock(&trace_types_lock);
4817
4818         return iter;
4819
4820  fail:
4821         mutex_unlock(&trace_types_lock);
4822         kfree(iter->trace);
4823         kfree(iter->temp);
4824         kfree(iter->buffer_iter);
4825 release:
4826         seq_release_private(inode, file);
4827         return ERR_PTR(-ENOMEM);
4828 }
4829
4830 int tracing_open_generic(struct inode *inode, struct file *filp)
4831 {
4832         int ret;
4833
4834         ret = tracing_check_open_get_tr(NULL);
4835         if (ret)
4836                 return ret;
4837
4838         filp->private_data = inode->i_private;
4839         return 0;
4840 }
4841
4842 bool tracing_is_disabled(void)
4843 {
4844         return (tracing_disabled) ? true: false;
4845 }
4846
4847 /*
4848  * Open and update trace_array ref count.
4849  * Must have the current trace_array passed to it.
4850  */
4851 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4852 {
4853         struct trace_array *tr = inode->i_private;
4854         int ret;
4855
4856         ret = tracing_check_open_get_tr(tr);
4857         if (ret)
4858                 return ret;
4859
4860         filp->private_data = inode->i_private;
4861
4862         return 0;
4863 }
4864
4865 static int tracing_release(struct inode *inode, struct file *file)
4866 {
4867         struct trace_array *tr = inode->i_private;
4868         struct seq_file *m = file->private_data;
4869         struct trace_iterator *iter;
4870         int cpu;
4871
4872         if (!(file->f_mode & FMODE_READ)) {
4873                 trace_array_put(tr);
4874                 return 0;
4875         }
4876
4877         /* Writes do not use seq_file */
4878         iter = m->private;
4879         mutex_lock(&trace_types_lock);
4880
4881         for_each_tracing_cpu(cpu) {
4882                 if (iter->buffer_iter[cpu])
4883                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4884         }
4885
4886         if (iter->trace && iter->trace->close)
4887                 iter->trace->close(iter);
4888
4889         if (!iter->snapshot && tr->stop_count)
4890                 /* reenable tracing if it was previously enabled */
4891                 tracing_start_tr(tr);
4892
4893         __trace_array_put(tr);
4894
4895         mutex_unlock(&trace_types_lock);
4896
4897         mutex_destroy(&iter->mutex);
4898         free_cpumask_var(iter->started);
4899         kfree(iter->fmt);
4900         kfree(iter->temp);
4901         kfree(iter->trace);
4902         kfree(iter->buffer_iter);
4903         seq_release_private(inode, file);
4904
4905         return 0;
4906 }
4907
4908 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4909 {
4910         struct trace_array *tr = inode->i_private;
4911
4912         trace_array_put(tr);
4913         return 0;
4914 }
4915
4916 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4917 {
4918         struct trace_array *tr = inode->i_private;
4919
4920         trace_array_put(tr);
4921
4922         return single_release(inode, file);
4923 }
4924
4925 static int tracing_open(struct inode *inode, struct file *file)
4926 {
4927         struct trace_array *tr = inode->i_private;
4928         struct trace_iterator *iter;
4929         int ret;
4930
4931         ret = tracing_check_open_get_tr(tr);
4932         if (ret)
4933                 return ret;
4934
4935         /* If this file was open for write, then erase contents */
4936         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4937                 int cpu = tracing_get_cpu(inode);
4938                 struct array_buffer *trace_buf = &tr->array_buffer;
4939
4940 #ifdef CONFIG_TRACER_MAX_TRACE
4941                 if (tr->current_trace->print_max)
4942                         trace_buf = &tr->max_buffer;
4943 #endif
4944
4945                 if (cpu == RING_BUFFER_ALL_CPUS)
4946                         tracing_reset_online_cpus(trace_buf);
4947                 else
4948                         tracing_reset_cpu(trace_buf, cpu);
4949         }
4950
4951         if (file->f_mode & FMODE_READ) {
4952                 iter = __tracing_open(inode, file, false);
4953                 if (IS_ERR(iter))
4954                         ret = PTR_ERR(iter);
4955                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4956                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4957         }
4958
4959         if (ret < 0)
4960                 trace_array_put(tr);
4961
4962         return ret;
4963 }
4964
4965 /*
4966  * Some tracers are not suitable for instance buffers.
4967  * A tracer is always available for the global array (toplevel)
4968  * or if it explicitly states that it is.
4969  */
4970 static bool
4971 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4972 {
4973         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4974 }
4975
4976 /* Find the next tracer that this trace array may use */
4977 static struct tracer *
4978 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4979 {
4980         while (t && !trace_ok_for_array(t, tr))
4981                 t = t->next;
4982
4983         return t;
4984 }
4985
4986 static void *
4987 t_next(struct seq_file *m, void *v, loff_t *pos)
4988 {
4989         struct trace_array *tr = m->private;
4990         struct tracer *t = v;
4991
4992         (*pos)++;
4993
4994         if (t)
4995                 t = get_tracer_for_array(tr, t->next);
4996
4997         return t;
4998 }
4999
5000 static void *t_start(struct seq_file *m, loff_t *pos)
5001 {
5002         struct trace_array *tr = m->private;
5003         struct tracer *t;
5004         loff_t l = 0;
5005
5006         mutex_lock(&trace_types_lock);
5007
5008         t = get_tracer_for_array(tr, trace_types);
5009         for (; t && l < *pos; t = t_next(m, t, &l))
5010                         ;
5011
5012         return t;
5013 }
5014
5015 static void t_stop(struct seq_file *m, void *p)
5016 {
5017         mutex_unlock(&trace_types_lock);
5018 }
5019
5020 static int t_show(struct seq_file *m, void *v)
5021 {
5022         struct tracer *t = v;
5023
5024         if (!t)
5025                 return 0;
5026
5027         seq_puts(m, t->name);
5028         if (t->next)
5029                 seq_putc(m, ' ');
5030         else
5031                 seq_putc(m, '\n');
5032
5033         return 0;
5034 }
5035
5036 static const struct seq_operations show_traces_seq_ops = {
5037         .start          = t_start,
5038         .next           = t_next,
5039         .stop           = t_stop,
5040         .show           = t_show,
5041 };
5042
5043 static int show_traces_open(struct inode *inode, struct file *file)
5044 {
5045         struct trace_array *tr = inode->i_private;
5046         struct seq_file *m;
5047         int ret;
5048
5049         ret = tracing_check_open_get_tr(tr);
5050         if (ret)
5051                 return ret;
5052
5053         ret = seq_open(file, &show_traces_seq_ops);
5054         if (ret) {
5055                 trace_array_put(tr);
5056                 return ret;
5057         }
5058
5059         m = file->private_data;
5060         m->private = tr;
5061
5062         return 0;
5063 }
5064
5065 static int show_traces_release(struct inode *inode, struct file *file)
5066 {
5067         struct trace_array *tr = inode->i_private;
5068
5069         trace_array_put(tr);
5070         return seq_release(inode, file);
5071 }
5072
5073 static ssize_t
5074 tracing_write_stub(struct file *filp, const char __user *ubuf,
5075                    size_t count, loff_t *ppos)
5076 {
5077         return count;
5078 }
5079
5080 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5081 {
5082         int ret;
5083
5084         if (file->f_mode & FMODE_READ)
5085                 ret = seq_lseek(file, offset, whence);
5086         else
5087                 file->f_pos = ret = 0;
5088
5089         return ret;
5090 }
5091
5092 static const struct file_operations tracing_fops = {
5093         .open           = tracing_open,
5094         .read           = seq_read,
5095         .write          = tracing_write_stub,
5096         .llseek         = tracing_lseek,
5097         .release        = tracing_release,
5098 };
5099
5100 static const struct file_operations show_traces_fops = {
5101         .open           = show_traces_open,
5102         .read           = seq_read,
5103         .llseek         = seq_lseek,
5104         .release        = show_traces_release,
5105 };
5106
5107 static ssize_t
5108 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5109                      size_t count, loff_t *ppos)
5110 {
5111         struct trace_array *tr = file_inode(filp)->i_private;
5112         char *mask_str;
5113         int len;
5114
5115         len = snprintf(NULL, 0, "%*pb\n",
5116                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5117         mask_str = kmalloc(len, GFP_KERNEL);
5118         if (!mask_str)
5119                 return -ENOMEM;
5120
5121         len = snprintf(mask_str, len, "%*pb\n",
5122                        cpumask_pr_args(tr->tracing_cpumask));
5123         if (len >= count) {
5124                 count = -EINVAL;
5125                 goto out_err;
5126         }
5127         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5128
5129 out_err:
5130         kfree(mask_str);
5131
5132         return count;
5133 }
5134
5135 int tracing_set_cpumask(struct trace_array *tr,
5136                         cpumask_var_t tracing_cpumask_new)
5137 {
5138         int cpu;
5139
5140         if (!tr)
5141                 return -EINVAL;
5142
5143         local_irq_disable();
5144         arch_spin_lock(&tr->max_lock);
5145         for_each_tracing_cpu(cpu) {
5146                 /*
5147                  * Increase/decrease the disabled counter if we are
5148                  * about to flip a bit in the cpumask:
5149                  */
5150                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5151                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5152                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5153                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5154                 }
5155                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5156                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5157                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5158                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5159                 }
5160         }
5161         arch_spin_unlock(&tr->max_lock);
5162         local_irq_enable();
5163
5164         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5165
5166         return 0;
5167 }
5168
5169 static ssize_t
5170 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5171                       size_t count, loff_t *ppos)
5172 {
5173         struct trace_array *tr = file_inode(filp)->i_private;
5174         cpumask_var_t tracing_cpumask_new;
5175         int err;
5176
5177         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5178                 return -ENOMEM;
5179
5180         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5181         if (err)
5182                 goto err_free;
5183
5184         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5185         if (err)
5186                 goto err_free;
5187
5188         free_cpumask_var(tracing_cpumask_new);
5189
5190         return count;
5191
5192 err_free:
5193         free_cpumask_var(tracing_cpumask_new);
5194
5195         return err;
5196 }
5197
5198 static const struct file_operations tracing_cpumask_fops = {
5199         .open           = tracing_open_generic_tr,
5200         .read           = tracing_cpumask_read,
5201         .write          = tracing_cpumask_write,
5202         .release        = tracing_release_generic_tr,
5203         .llseek         = generic_file_llseek,
5204 };
5205
5206 static int tracing_trace_options_show(struct seq_file *m, void *v)
5207 {
5208         struct tracer_opt *trace_opts;
5209         struct trace_array *tr = m->private;
5210         u32 tracer_flags;
5211         int i;
5212
5213         mutex_lock(&trace_types_lock);
5214         tracer_flags = tr->current_trace->flags->val;
5215         trace_opts = tr->current_trace->flags->opts;
5216
5217         for (i = 0; trace_options[i]; i++) {
5218                 if (tr->trace_flags & (1 << i))
5219                         seq_printf(m, "%s\n", trace_options[i]);
5220                 else
5221                         seq_printf(m, "no%s\n", trace_options[i]);
5222         }
5223
5224         for (i = 0; trace_opts[i].name; i++) {
5225                 if (tracer_flags & trace_opts[i].bit)
5226                         seq_printf(m, "%s\n", trace_opts[i].name);
5227                 else
5228                         seq_printf(m, "no%s\n", trace_opts[i].name);
5229         }
5230         mutex_unlock(&trace_types_lock);
5231
5232         return 0;
5233 }
5234
5235 static int __set_tracer_option(struct trace_array *tr,
5236                                struct tracer_flags *tracer_flags,
5237                                struct tracer_opt *opts, int neg)
5238 {
5239         struct tracer *trace = tracer_flags->trace;
5240         int ret;
5241
5242         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5243         if (ret)
5244                 return ret;
5245
5246         if (neg)
5247                 tracer_flags->val &= ~opts->bit;
5248         else
5249                 tracer_flags->val |= opts->bit;
5250         return 0;
5251 }
5252
5253 /* Try to assign a tracer specific option */
5254 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5255 {
5256         struct tracer *trace = tr->current_trace;
5257         struct tracer_flags *tracer_flags = trace->flags;
5258         struct tracer_opt *opts = NULL;
5259         int i;
5260
5261         for (i = 0; tracer_flags->opts[i].name; i++) {
5262                 opts = &tracer_flags->opts[i];
5263
5264                 if (strcmp(cmp, opts->name) == 0)
5265                         return __set_tracer_option(tr, trace->flags, opts, neg);
5266         }
5267
5268         return -EINVAL;
5269 }
5270
5271 /* Some tracers require overwrite to stay enabled */
5272 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5273 {
5274         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5275                 return -1;
5276
5277         return 0;
5278 }
5279
5280 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5281 {
5282         int *map;
5283
5284         if ((mask == TRACE_ITER_RECORD_TGID) ||
5285             (mask == TRACE_ITER_RECORD_CMD))
5286                 lockdep_assert_held(&event_mutex);
5287
5288         /* do nothing if flag is already set */
5289         if (!!(tr->trace_flags & mask) == !!enabled)
5290                 return 0;
5291
5292         /* Give the tracer a chance to approve the change */
5293         if (tr->current_trace->flag_changed)
5294                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5295                         return -EINVAL;
5296
5297         if (enabled)
5298                 tr->trace_flags |= mask;
5299         else
5300                 tr->trace_flags &= ~mask;
5301
5302         if (mask == TRACE_ITER_RECORD_CMD)
5303                 trace_event_enable_cmd_record(enabled);
5304
5305         if (mask == TRACE_ITER_RECORD_TGID) {
5306                 if (!tgid_map) {
5307                         tgid_map_max = pid_max;
5308                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5309                                        GFP_KERNEL);
5310
5311                         /*
5312                          * Pairs with smp_load_acquire() in
5313                          * trace_find_tgid_ptr() to ensure that if it observes
5314                          * the tgid_map we just allocated then it also observes
5315                          * the corresponding tgid_map_max value.
5316                          */
5317                         smp_store_release(&tgid_map, map);
5318                 }
5319                 if (!tgid_map) {
5320                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5321                         return -ENOMEM;
5322                 }
5323
5324                 trace_event_enable_tgid_record(enabled);
5325         }
5326
5327         if (mask == TRACE_ITER_EVENT_FORK)
5328                 trace_event_follow_fork(tr, enabled);
5329
5330         if (mask == TRACE_ITER_FUNC_FORK)
5331                 ftrace_pid_follow_fork(tr, enabled);
5332
5333         if (mask == TRACE_ITER_OVERWRITE) {
5334                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5335 #ifdef CONFIG_TRACER_MAX_TRACE
5336                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5337 #endif
5338         }
5339
5340         if (mask == TRACE_ITER_PRINTK) {
5341                 trace_printk_start_stop_comm(enabled);
5342                 trace_printk_control(enabled);
5343         }
5344
5345         return 0;
5346 }
5347
5348 int trace_set_options(struct trace_array *tr, char *option)
5349 {
5350         char *cmp;
5351         int neg = 0;
5352         int ret;
5353         size_t orig_len = strlen(option);
5354         int len;
5355
5356         cmp = strstrip(option);
5357
5358         len = str_has_prefix(cmp, "no");
5359         if (len)
5360                 neg = 1;
5361
5362         cmp += len;
5363
5364         mutex_lock(&event_mutex);
5365         mutex_lock(&trace_types_lock);
5366
5367         ret = match_string(trace_options, -1, cmp);
5368         /* If no option could be set, test the specific tracer options */
5369         if (ret < 0)
5370                 ret = set_tracer_option(tr, cmp, neg);
5371         else
5372                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5373
5374         mutex_unlock(&trace_types_lock);
5375         mutex_unlock(&event_mutex);
5376
5377         /*
5378          * If the first trailing whitespace is replaced with '\0' by strstrip,
5379          * turn it back into a space.
5380          */
5381         if (orig_len > strlen(option))
5382                 option[strlen(option)] = ' ';
5383
5384         return ret;
5385 }
5386
5387 static void __init apply_trace_boot_options(void)
5388 {
5389         char *buf = trace_boot_options_buf;
5390         char *option;
5391
5392         while (true) {
5393                 option = strsep(&buf, ",");
5394
5395                 if (!option)
5396                         break;
5397
5398                 if (*option)
5399                         trace_set_options(&global_trace, option);
5400
5401                 /* Put back the comma to allow this to be called again */
5402                 if (buf)
5403                         *(buf - 1) = ',';
5404         }
5405 }
5406
5407 static ssize_t
5408 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5409                         size_t cnt, loff_t *ppos)
5410 {
5411         struct seq_file *m = filp->private_data;
5412         struct trace_array *tr = m->private;
5413         char buf[64];
5414         int ret;
5415
5416         if (cnt >= sizeof(buf))
5417                 return -EINVAL;
5418
5419         if (copy_from_user(buf, ubuf, cnt))
5420                 return -EFAULT;
5421
5422         buf[cnt] = 0;
5423
5424         ret = trace_set_options(tr, buf);
5425         if (ret < 0)
5426                 return ret;
5427
5428         *ppos += cnt;
5429
5430         return cnt;
5431 }
5432
5433 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5434 {
5435         struct trace_array *tr = inode->i_private;
5436         int ret;
5437
5438         ret = tracing_check_open_get_tr(tr);
5439         if (ret)
5440                 return ret;
5441
5442         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5443         if (ret < 0)
5444                 trace_array_put(tr);
5445
5446         return ret;
5447 }
5448
5449 static const struct file_operations tracing_iter_fops = {
5450         .open           = tracing_trace_options_open,
5451         .read           = seq_read,
5452         .llseek         = seq_lseek,
5453         .release        = tracing_single_release_tr,
5454         .write          = tracing_trace_options_write,
5455 };
5456
5457 static const char readme_msg[] =
5458         "tracing mini-HOWTO:\n\n"
5459         "# echo 0 > tracing_on : quick way to disable tracing\n"
5460         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5461         " Important files:\n"
5462         "  trace\t\t\t- The static contents of the buffer\n"
5463         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5464         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5465         "  current_tracer\t- function and latency tracers\n"
5466         "  available_tracers\t- list of configured tracers for current_tracer\n"
5467         "  error_log\t- error log for failed commands (that support it)\n"
5468         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5469         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5470         "  trace_clock\t\t-change the clock used to order events\n"
5471         "       local:   Per cpu clock but may not be synced across CPUs\n"
5472         "      global:   Synced across CPUs but slows tracing down.\n"
5473         "     counter:   Not a clock, but just an increment\n"
5474         "      uptime:   Jiffy counter from time of boot\n"
5475         "        perf:   Same clock that perf events use\n"
5476 #ifdef CONFIG_X86_64
5477         "     x86-tsc:   TSC cycle counter\n"
5478 #endif
5479         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5480         "       delta:   Delta difference against a buffer-wide timestamp\n"
5481         "    absolute:   Absolute (standalone) timestamp\n"
5482         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5483         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5484         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5485         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5486         "\t\t\t  Remove sub-buffer with rmdir\n"
5487         "  trace_options\t\t- Set format or modify how tracing happens\n"
5488         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5489         "\t\t\t  option name\n"
5490         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5491 #ifdef CONFIG_DYNAMIC_FTRACE
5492         "\n  available_filter_functions - list of functions that can be filtered on\n"
5493         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5494         "\t\t\t  functions\n"
5495         "\t     accepts: func_full_name or glob-matching-pattern\n"
5496         "\t     modules: Can select a group via module\n"
5497         "\t      Format: :mod:<module-name>\n"
5498         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5499         "\t    triggers: a command to perform when function is hit\n"
5500         "\t      Format: <function>:<trigger>[:count]\n"
5501         "\t     trigger: traceon, traceoff\n"
5502         "\t\t      enable_event:<system>:<event>\n"
5503         "\t\t      disable_event:<system>:<event>\n"
5504 #ifdef CONFIG_STACKTRACE
5505         "\t\t      stacktrace\n"
5506 #endif
5507 #ifdef CONFIG_TRACER_SNAPSHOT
5508         "\t\t      snapshot\n"
5509 #endif
5510         "\t\t      dump\n"
5511         "\t\t      cpudump\n"
5512         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5513         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5514         "\t     The first one will disable tracing every time do_fault is hit\n"
5515         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5516         "\t       The first time do trap is hit and it disables tracing, the\n"
5517         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5518         "\t       the counter will not decrement. It only decrements when the\n"
5519         "\t       trigger did work\n"
5520         "\t     To remove trigger without count:\n"
5521         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5522         "\t     To remove trigger with a count:\n"
5523         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5524         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5525         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5526         "\t    modules: Can select a group via module command :mod:\n"
5527         "\t    Does not accept triggers\n"
5528 #endif /* CONFIG_DYNAMIC_FTRACE */
5529 #ifdef CONFIG_FUNCTION_TRACER
5530         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5531         "\t\t    (function)\n"
5532         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5533         "\t\t    (function)\n"
5534 #endif
5535 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5536         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5537         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5538         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5539 #endif
5540 #ifdef CONFIG_TRACER_SNAPSHOT
5541         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5542         "\t\t\t  snapshot buffer. Read the contents for more\n"
5543         "\t\t\t  information\n"
5544 #endif
5545 #ifdef CONFIG_STACK_TRACER
5546         "  stack_trace\t\t- Shows the max stack trace when active\n"
5547         "  stack_max_size\t- Shows current max stack size that was traced\n"
5548         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5549         "\t\t\t  new trace)\n"
5550 #ifdef CONFIG_DYNAMIC_FTRACE
5551         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5552         "\t\t\t  traces\n"
5553 #endif
5554 #endif /* CONFIG_STACK_TRACER */
5555 #ifdef CONFIG_DYNAMIC_EVENTS
5556         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5557         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5558 #endif
5559 #ifdef CONFIG_KPROBE_EVENTS
5560         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5561         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5562 #endif
5563 #ifdef CONFIG_UPROBE_EVENTS
5564         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5565         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5566 #endif
5567 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5568         "\t  accepts: event-definitions (one definition per line)\n"
5569         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5570         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5571 #ifdef CONFIG_HIST_TRIGGERS
5572         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5573 #endif
5574         "\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5575         "\t           -:[<group>/]<event>\n"
5576 #ifdef CONFIG_KPROBE_EVENTS
5577         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5578   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5579 #endif
5580 #ifdef CONFIG_UPROBE_EVENTS
5581   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5582 #endif
5583         "\t     args: <name>=fetcharg[:type]\n"
5584         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5585 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5586         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5587 #else
5588         "\t           $stack<index>, $stack, $retval, $comm,\n"
5589 #endif
5590         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5591         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5592         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5593         "\t           <type>\\[<array-size>\\]\n"
5594 #ifdef CONFIG_HIST_TRIGGERS
5595         "\t    field: <stype> <name>;\n"
5596         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5597         "\t           [unsigned] char/int/long\n"
5598 #endif
5599         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5600         "\t            of the <attached-group>/<attached-event>.\n"
5601 #endif
5602         "  events/\t\t- Directory containing all trace event subsystems:\n"
5603         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5604         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5605         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5606         "\t\t\t  events\n"
5607         "      filter\t\t- If set, only events passing filter are traced\n"
5608         "  events/<system>/<event>/\t- Directory containing control files for\n"
5609         "\t\t\t  <event>:\n"
5610         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5611         "      filter\t\t- If set, only events passing filter are traced\n"
5612         "      trigger\t\t- If set, a command to perform when event is hit\n"
5613         "\t    Format: <trigger>[:count][if <filter>]\n"
5614         "\t   trigger: traceon, traceoff\n"
5615         "\t            enable_event:<system>:<event>\n"
5616         "\t            disable_event:<system>:<event>\n"
5617 #ifdef CONFIG_HIST_TRIGGERS
5618         "\t            enable_hist:<system>:<event>\n"
5619         "\t            disable_hist:<system>:<event>\n"
5620 #endif
5621 #ifdef CONFIG_STACKTRACE
5622         "\t\t    stacktrace\n"
5623 #endif
5624 #ifdef CONFIG_TRACER_SNAPSHOT
5625         "\t\t    snapshot\n"
5626 #endif
5627 #ifdef CONFIG_HIST_TRIGGERS
5628         "\t\t    hist (see below)\n"
5629 #endif
5630         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5631         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5632         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5633         "\t                  events/block/block_unplug/trigger\n"
5634         "\t   The first disables tracing every time block_unplug is hit.\n"
5635         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5636         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5637         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5638         "\t   Like function triggers, the counter is only decremented if it\n"
5639         "\t    enabled or disabled tracing.\n"
5640         "\t   To remove a trigger without a count:\n"
5641         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5642         "\t   To remove a trigger with a count:\n"
5643         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5644         "\t   Filters can be ignored when removing a trigger.\n"
5645 #ifdef CONFIG_HIST_TRIGGERS
5646         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5647         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5648         "\t            [:values=<field1[,field2,...]>]\n"
5649         "\t            [:sort=<field1[,field2,...]>]\n"
5650         "\t            [:size=#entries]\n"
5651         "\t            [:pause][:continue][:clear]\n"
5652         "\t            [:name=histname1]\n"
5653         "\t            [:<handler>.<action>]\n"
5654         "\t            [if <filter>]\n\n"
5655         "\t    Note, special fields can be used as well:\n"
5656         "\t            common_timestamp - to record current timestamp\n"
5657         "\t            common_cpu - to record the CPU the event happened on\n"
5658         "\n"
5659         "\t    When a matching event is hit, an entry is added to a hash\n"
5660         "\t    table using the key(s) and value(s) named, and the value of a\n"
5661         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5662         "\t    correspond to fields in the event's format description.  Keys\n"
5663         "\t    can be any field, or the special string 'stacktrace'.\n"
5664         "\t    Compound keys consisting of up to two fields can be specified\n"
5665         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5666         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5667         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5668         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5669         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5670         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5671         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5672         "\t    its histogram data will be shared with other triggers of the\n"
5673         "\t    same name, and trigger hits will update this common data.\n\n"
5674         "\t    Reading the 'hist' file for the event will dump the hash\n"
5675         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5676         "\t    triggers attached to an event, there will be a table for each\n"
5677         "\t    trigger in the output.  The table displayed for a named\n"
5678         "\t    trigger will be the same as any other instance having the\n"
5679         "\t    same name.  The default format used to display a given field\n"
5680         "\t    can be modified by appending any of the following modifiers\n"
5681         "\t    to the field name, as applicable:\n\n"
5682         "\t            .hex        display a number as a hex value\n"
5683         "\t            .sym        display an address as a symbol\n"
5684         "\t            .sym-offset display an address as a symbol and offset\n"
5685         "\t            .execname   display a common_pid as a program name\n"
5686         "\t            .syscall    display a syscall id as a syscall name\n"
5687         "\t            .log2       display log2 value rather than raw number\n"
5688         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5689         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5690         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5691         "\t    trigger or to start a hist trigger but not log any events\n"
5692         "\t    until told to do so.  'continue' can be used to start or\n"
5693         "\t    restart a paused hist trigger.\n\n"
5694         "\t    The 'clear' parameter will clear the contents of a running\n"
5695         "\t    hist trigger and leave its current paused/active state\n"
5696         "\t    unchanged.\n\n"
5697         "\t    The enable_hist and disable_hist triggers can be used to\n"
5698         "\t    have one event conditionally start and stop another event's\n"
5699         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5700         "\t    the enable_event and disable_event triggers.\n\n"
5701         "\t    Hist trigger handlers and actions are executed whenever a\n"
5702         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5703         "\t        <handler>.<action>\n\n"
5704         "\t    The available handlers are:\n\n"
5705         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5706         "\t        onmax(var)               - invoke if var exceeds current max\n"
5707         "\t        onchange(var)            - invoke action if var changes\n\n"
5708         "\t    The available actions are:\n\n"
5709         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5710         "\t        save(field,...)                      - save current event fields\n"
5711 #ifdef CONFIG_TRACER_SNAPSHOT
5712         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5713 #endif
5714 #ifdef CONFIG_SYNTH_EVENTS
5715         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5716         "\t  Write into this file to define/undefine new synthetic events.\n"
5717         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5718 #endif
5719 #endif
5720 ;
5721
5722 static ssize_t
5723 tracing_readme_read(struct file *filp, char __user *ubuf,
5724                        size_t cnt, loff_t *ppos)
5725 {
5726         return simple_read_from_buffer(ubuf, cnt, ppos,
5727                                         readme_msg, strlen(readme_msg));
5728 }
5729
5730 static const struct file_operations tracing_readme_fops = {
5731         .open           = tracing_open_generic,
5732         .read           = tracing_readme_read,
5733         .llseek         = generic_file_llseek,
5734 };
5735
5736 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5737 {
5738         int pid = ++(*pos);
5739
5740         return trace_find_tgid_ptr(pid);
5741 }
5742
5743 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5744 {
5745         int pid = *pos;
5746
5747         return trace_find_tgid_ptr(pid);
5748 }
5749
5750 static void saved_tgids_stop(struct seq_file *m, void *v)
5751 {
5752 }
5753
5754 static int saved_tgids_show(struct seq_file *m, void *v)
5755 {
5756         int *entry = (int *)v;
5757         int pid = entry - tgid_map;
5758         int tgid = *entry;
5759
5760         if (tgid == 0)
5761                 return SEQ_SKIP;
5762
5763         seq_printf(m, "%d %d\n", pid, tgid);
5764         return 0;
5765 }
5766
5767 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5768         .start          = saved_tgids_start,
5769         .stop           = saved_tgids_stop,
5770         .next           = saved_tgids_next,
5771         .show           = saved_tgids_show,
5772 };
5773
5774 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5775 {
5776         int ret;
5777
5778         ret = tracing_check_open_get_tr(NULL);
5779         if (ret)
5780                 return ret;
5781
5782         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5783 }
5784
5785
5786 static const struct file_operations tracing_saved_tgids_fops = {
5787         .open           = tracing_saved_tgids_open,
5788         .read           = seq_read,
5789         .llseek         = seq_lseek,
5790         .release        = seq_release,
5791 };
5792
5793 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5794 {
5795         unsigned int *ptr = v;
5796
5797         if (*pos || m->count)
5798                 ptr++;
5799
5800         (*pos)++;
5801
5802         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5803              ptr++) {
5804                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5805                         continue;
5806
5807                 return ptr;
5808         }
5809
5810         return NULL;
5811 }
5812
5813 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5814 {
5815         void *v;
5816         loff_t l = 0;
5817
5818         preempt_disable();
5819         arch_spin_lock(&trace_cmdline_lock);
5820
5821         v = &savedcmd->map_cmdline_to_pid[0];
5822         while (l <= *pos) {
5823                 v = saved_cmdlines_next(m, v, &l);
5824                 if (!v)
5825                         return NULL;
5826         }
5827
5828         return v;
5829 }
5830
5831 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5832 {
5833         arch_spin_unlock(&trace_cmdline_lock);
5834         preempt_enable();
5835 }
5836
5837 static int saved_cmdlines_show(struct seq_file *m, void *v)
5838 {
5839         char buf[TASK_COMM_LEN];
5840         unsigned int *pid = v;
5841
5842         __trace_find_cmdline(*pid, buf);
5843         seq_printf(m, "%d %s\n", *pid, buf);
5844         return 0;
5845 }
5846
5847 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5848         .start          = saved_cmdlines_start,
5849         .next           = saved_cmdlines_next,
5850         .stop           = saved_cmdlines_stop,
5851         .show           = saved_cmdlines_show,
5852 };
5853
5854 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5855 {
5856         int ret;
5857
5858         ret = tracing_check_open_get_tr(NULL);
5859         if (ret)
5860                 return ret;
5861
5862         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5863 }
5864
5865 static const struct file_operations tracing_saved_cmdlines_fops = {
5866         .open           = tracing_saved_cmdlines_open,
5867         .read           = seq_read,
5868         .llseek         = seq_lseek,
5869         .release        = seq_release,
5870 };
5871
5872 static ssize_t
5873 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5874                                  size_t cnt, loff_t *ppos)
5875 {
5876         char buf[64];
5877         int r;
5878
5879         preempt_disable();
5880         arch_spin_lock(&trace_cmdline_lock);
5881         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5882         arch_spin_unlock(&trace_cmdline_lock);
5883         preempt_enable();
5884
5885         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5886 }
5887
5888 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5889 {
5890         kfree(s->saved_cmdlines);
5891         kfree(s->map_cmdline_to_pid);
5892         kfree(s);
5893 }
5894
5895 static int tracing_resize_saved_cmdlines(unsigned int val)
5896 {
5897         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5898
5899         s = kmalloc(sizeof(*s), GFP_KERNEL);
5900         if (!s)
5901                 return -ENOMEM;
5902
5903         if (allocate_cmdlines_buffer(val, s) < 0) {
5904                 kfree(s);
5905                 return -ENOMEM;
5906         }
5907
5908         preempt_disable();
5909         arch_spin_lock(&trace_cmdline_lock);
5910         savedcmd_temp = savedcmd;
5911         savedcmd = s;
5912         arch_spin_unlock(&trace_cmdline_lock);
5913         preempt_enable();
5914         free_saved_cmdlines_buffer(savedcmd_temp);
5915
5916         return 0;
5917 }
5918
5919 static ssize_t
5920 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5921                                   size_t cnt, loff_t *ppos)
5922 {
5923         unsigned long val;
5924         int ret;
5925
5926         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5927         if (ret)
5928                 return ret;
5929
5930         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5931         if (!val || val > PID_MAX_DEFAULT)
5932                 return -EINVAL;
5933
5934         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5935         if (ret < 0)
5936                 return ret;
5937
5938         *ppos += cnt;
5939
5940         return cnt;
5941 }
5942
5943 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5944         .open           = tracing_open_generic,
5945         .read           = tracing_saved_cmdlines_size_read,
5946         .write          = tracing_saved_cmdlines_size_write,
5947 };
5948
5949 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5950 static union trace_eval_map_item *
5951 update_eval_map(union trace_eval_map_item *ptr)
5952 {
5953         if (!ptr->map.eval_string) {
5954                 if (ptr->tail.next) {
5955                         ptr = ptr->tail.next;
5956                         /* Set ptr to the next real item (skip head) */
5957                         ptr++;
5958                 } else
5959                         return NULL;
5960         }
5961         return ptr;
5962 }
5963
5964 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5965 {
5966         union trace_eval_map_item *ptr = v;
5967
5968         /*
5969          * Paranoid! If ptr points to end, we don't want to increment past it.
5970          * This really should never happen.
5971          */
5972         (*pos)++;
5973         ptr = update_eval_map(ptr);
5974         if (WARN_ON_ONCE(!ptr))
5975                 return NULL;
5976
5977         ptr++;
5978         ptr = update_eval_map(ptr);
5979
5980         return ptr;
5981 }
5982
5983 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5984 {
5985         union trace_eval_map_item *v;
5986         loff_t l = 0;
5987
5988         mutex_lock(&trace_eval_mutex);
5989
5990         v = trace_eval_maps;
5991         if (v)
5992                 v++;
5993
5994         while (v && l < *pos) {
5995                 v = eval_map_next(m, v, &l);
5996         }
5997
5998         return v;
5999 }
6000
6001 static void eval_map_stop(struct seq_file *m, void *v)
6002 {
6003         mutex_unlock(&trace_eval_mutex);
6004 }
6005
6006 static int eval_map_show(struct seq_file *m, void *v)
6007 {
6008         union trace_eval_map_item *ptr = v;
6009
6010         seq_printf(m, "%s %ld (%s)\n",
6011                    ptr->map.eval_string, ptr->map.eval_value,
6012                    ptr->map.system);
6013
6014         return 0;
6015 }
6016
6017 static const struct seq_operations tracing_eval_map_seq_ops = {
6018         .start          = eval_map_start,
6019         .next           = eval_map_next,
6020         .stop           = eval_map_stop,
6021         .show           = eval_map_show,
6022 };
6023
6024 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6025 {
6026         int ret;
6027
6028         ret = tracing_check_open_get_tr(NULL);
6029         if (ret)
6030                 return ret;
6031
6032         return seq_open(filp, &tracing_eval_map_seq_ops);
6033 }
6034
6035 static const struct file_operations tracing_eval_map_fops = {
6036         .open           = tracing_eval_map_open,
6037         .read           = seq_read,
6038         .llseek         = seq_lseek,
6039         .release        = seq_release,
6040 };
6041
6042 static inline union trace_eval_map_item *
6043 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6044 {
6045         /* Return tail of array given the head */
6046         return ptr + ptr->head.length + 1;
6047 }
6048
6049 static void
6050 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6051                            int len)
6052 {
6053         struct trace_eval_map **stop;
6054         struct trace_eval_map **map;
6055         union trace_eval_map_item *map_array;
6056         union trace_eval_map_item *ptr;
6057
6058         stop = start + len;
6059
6060         /*
6061          * The trace_eval_maps contains the map plus a head and tail item,
6062          * where the head holds the module and length of array, and the
6063          * tail holds a pointer to the next list.
6064          */
6065         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6066         if (!map_array) {
6067                 pr_warn("Unable to allocate trace eval mapping\n");
6068                 return;
6069         }
6070
6071         mutex_lock(&trace_eval_mutex);
6072
6073         if (!trace_eval_maps)
6074                 trace_eval_maps = map_array;
6075         else {
6076                 ptr = trace_eval_maps;
6077                 for (;;) {
6078                         ptr = trace_eval_jmp_to_tail(ptr);
6079                         if (!ptr->tail.next)
6080                                 break;
6081                         ptr = ptr->tail.next;
6082
6083                 }
6084                 ptr->tail.next = map_array;
6085         }
6086         map_array->head.mod = mod;
6087         map_array->head.length = len;
6088         map_array++;
6089
6090         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6091                 map_array->map = **map;
6092                 map_array++;
6093         }
6094         memset(map_array, 0, sizeof(*map_array));
6095
6096         mutex_unlock(&trace_eval_mutex);
6097 }
6098
6099 static void trace_create_eval_file(struct dentry *d_tracer)
6100 {
6101         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6102                           NULL, &tracing_eval_map_fops);
6103 }
6104
6105 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6106 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6107 static inline void trace_insert_eval_map_file(struct module *mod,
6108                               struct trace_eval_map **start, int len) { }
6109 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6110
6111 static void trace_insert_eval_map(struct module *mod,
6112                                   struct trace_eval_map **start, int len)
6113 {
6114         struct trace_eval_map **map;
6115
6116         if (len <= 0)
6117                 return;
6118
6119         map = start;
6120
6121         trace_event_eval_update(map, len);
6122
6123         trace_insert_eval_map_file(mod, start, len);
6124 }
6125
6126 static ssize_t
6127 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6128                        size_t cnt, loff_t *ppos)
6129 {
6130         struct trace_array *tr = filp->private_data;
6131         char buf[MAX_TRACER_SIZE+2];
6132         int r;
6133
6134         mutex_lock(&trace_types_lock);
6135         r = sprintf(buf, "%s\n", tr->current_trace->name);
6136         mutex_unlock(&trace_types_lock);
6137
6138         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6139 }
6140
6141 int tracer_init(struct tracer *t, struct trace_array *tr)
6142 {
6143         tracing_reset_online_cpus(&tr->array_buffer);
6144         return t->init(tr);
6145 }
6146
6147 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6148 {
6149         int cpu;
6150
6151         for_each_tracing_cpu(cpu)
6152                 per_cpu_ptr(buf->data, cpu)->entries = val;
6153 }
6154
6155 #ifdef CONFIG_TRACER_MAX_TRACE
6156 /* resize @tr's buffer to the size of @size_tr's entries */
6157 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6158                                         struct array_buffer *size_buf, int cpu_id)
6159 {
6160         int cpu, ret = 0;
6161
6162         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6163                 for_each_tracing_cpu(cpu) {
6164                         ret = ring_buffer_resize(trace_buf->buffer,
6165                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6166                         if (ret < 0)
6167                                 break;
6168                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6169                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6170                 }
6171         } else {
6172                 ret = ring_buffer_resize(trace_buf->buffer,
6173                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6174                 if (ret == 0)
6175                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6176                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6177         }
6178
6179         return ret;
6180 }
6181 #endif /* CONFIG_TRACER_MAX_TRACE */
6182
6183 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6184                                         unsigned long size, int cpu)
6185 {
6186         int ret;
6187
6188         /*
6189          * If kernel or user changes the size of the ring buffer
6190          * we use the size that was given, and we can forget about
6191          * expanding it later.
6192          */
6193         ring_buffer_expanded = true;
6194
6195         /* May be called before buffers are initialized */
6196         if (!tr->array_buffer.buffer)
6197                 return 0;
6198
6199         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6200         if (ret < 0)
6201                 return ret;
6202
6203 #ifdef CONFIG_TRACER_MAX_TRACE
6204         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6205             !tr->current_trace->use_max_tr)
6206                 goto out;
6207
6208         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6209         if (ret < 0) {
6210                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6211                                                      &tr->array_buffer, cpu);
6212                 if (r < 0) {
6213                         /*
6214                          * AARGH! We are left with different
6215                          * size max buffer!!!!
6216                          * The max buffer is our "snapshot" buffer.
6217                          * When a tracer needs a snapshot (one of the
6218                          * latency tracers), it swaps the max buffer
6219                          * with the saved snap shot. We succeeded to
6220                          * update the size of the main buffer, but failed to
6221                          * update the size of the max buffer. But when we tried
6222                          * to reset the main buffer to the original size, we
6223                          * failed there too. This is very unlikely to
6224                          * happen, but if it does, warn and kill all
6225                          * tracing.
6226                          */
6227                         WARN_ON(1);
6228                         tracing_disabled = 1;
6229                 }
6230                 return ret;
6231         }
6232
6233         if (cpu == RING_BUFFER_ALL_CPUS)
6234                 set_buffer_entries(&tr->max_buffer, size);
6235         else
6236                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6237
6238  out:
6239 #endif /* CONFIG_TRACER_MAX_TRACE */
6240
6241         if (cpu == RING_BUFFER_ALL_CPUS)
6242                 set_buffer_entries(&tr->array_buffer, size);
6243         else
6244                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6245
6246         return ret;
6247 }
6248
6249 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6250                                   unsigned long size, int cpu_id)
6251 {
6252         int ret;
6253
6254         mutex_lock(&trace_types_lock);
6255
6256         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6257                 /* make sure, this cpu is enabled in the mask */
6258                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6259                         ret = -EINVAL;
6260                         goto out;
6261                 }
6262         }
6263
6264         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6265         if (ret < 0)
6266                 ret = -ENOMEM;
6267
6268 out:
6269         mutex_unlock(&trace_types_lock);
6270
6271         return ret;
6272 }
6273
6274
6275 /**
6276  * tracing_update_buffers - used by tracing facility to expand ring buffers
6277  *
6278  * To save on memory when the tracing is never used on a system with it
6279  * configured in. The ring buffers are set to a minimum size. But once
6280  * a user starts to use the tracing facility, then they need to grow
6281  * to their default size.
6282  *
6283  * This function is to be called when a tracer is about to be used.
6284  */
6285 int tracing_update_buffers(void)
6286 {
6287         int ret = 0;
6288
6289         mutex_lock(&trace_types_lock);
6290         if (!ring_buffer_expanded)
6291                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6292                                                 RING_BUFFER_ALL_CPUS);
6293         mutex_unlock(&trace_types_lock);
6294
6295         return ret;
6296 }
6297
6298 struct trace_option_dentry;
6299
6300 static void
6301 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6302
6303 /*
6304  * Used to clear out the tracer before deletion of an instance.
6305  * Must have trace_types_lock held.
6306  */
6307 static void tracing_set_nop(struct trace_array *tr)
6308 {
6309         if (tr->current_trace == &nop_trace)
6310                 return;
6311         
6312         tr->current_trace->enabled--;
6313
6314         if (tr->current_trace->reset)
6315                 tr->current_trace->reset(tr);
6316
6317         tr->current_trace = &nop_trace;
6318 }
6319
6320 static bool tracer_options_updated;
6321
6322 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6323 {
6324         /* Only enable if the directory has been created already. */
6325         if (!tr->dir)
6326                 return;
6327
6328         /* Only create trace option files after update_tracer_options finish */
6329         if (!tracer_options_updated)
6330                 return;
6331
6332         create_trace_option_files(tr, t);
6333 }
6334
6335 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6336 {
6337         struct tracer *t;
6338 #ifdef CONFIG_TRACER_MAX_TRACE
6339         bool had_max_tr;
6340 #endif
6341         int ret = 0;
6342
6343         mutex_lock(&trace_types_lock);
6344
6345         if (!ring_buffer_expanded) {
6346                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6347                                                 RING_BUFFER_ALL_CPUS);
6348                 if (ret < 0)
6349                         goto out;
6350                 ret = 0;
6351         }
6352
6353         for (t = trace_types; t; t = t->next) {
6354                 if (strcmp(t->name, buf) == 0)
6355                         break;
6356         }
6357         if (!t) {
6358                 ret = -EINVAL;
6359                 goto out;
6360         }
6361         if (t == tr->current_trace)
6362                 goto out;
6363
6364 #ifdef CONFIG_TRACER_SNAPSHOT
6365         if (t->use_max_tr) {
6366                 local_irq_disable();
6367                 arch_spin_lock(&tr->max_lock);
6368                 if (tr->cond_snapshot)
6369                         ret = -EBUSY;
6370                 arch_spin_unlock(&tr->max_lock);
6371                 local_irq_enable();
6372                 if (ret)
6373                         goto out;
6374         }
6375 #endif
6376         /* Some tracers won't work on kernel command line */
6377         if (system_state < SYSTEM_RUNNING && t->noboot) {
6378                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6379                         t->name);
6380                 goto out;
6381         }
6382
6383         /* Some tracers are only allowed for the top level buffer */
6384         if (!trace_ok_for_array(t, tr)) {
6385                 ret = -EINVAL;
6386                 goto out;
6387         }
6388
6389         /* If trace pipe files are being read, we can't change the tracer */
6390         if (tr->trace_ref) {
6391                 ret = -EBUSY;
6392                 goto out;
6393         }
6394
6395         trace_branch_disable();
6396
6397         tr->current_trace->enabled--;
6398
6399         if (tr->current_trace->reset)
6400                 tr->current_trace->reset(tr);
6401
6402 #ifdef CONFIG_TRACER_MAX_TRACE
6403         had_max_tr = tr->current_trace->use_max_tr;
6404
6405         /* Current trace needs to be nop_trace before synchronize_rcu */
6406         tr->current_trace = &nop_trace;
6407
6408         if (had_max_tr && !t->use_max_tr) {
6409                 /*
6410                  * We need to make sure that the update_max_tr sees that
6411                  * current_trace changed to nop_trace to keep it from
6412                  * swapping the buffers after we resize it.
6413                  * The update_max_tr is called from interrupts disabled
6414                  * so a synchronized_sched() is sufficient.
6415                  */
6416                 synchronize_rcu();
6417                 free_snapshot(tr);
6418         }
6419
6420         if (t->use_max_tr && !tr->allocated_snapshot) {
6421                 ret = tracing_alloc_snapshot_instance(tr);
6422                 if (ret < 0)
6423                         goto out;
6424         }
6425 #else
6426         tr->current_trace = &nop_trace;
6427 #endif
6428
6429         if (t->init) {
6430                 ret = tracer_init(t, tr);
6431                 if (ret)
6432                         goto out;
6433         }
6434
6435         tr->current_trace = t;
6436         tr->current_trace->enabled++;
6437         trace_branch_enable(tr);
6438  out:
6439         mutex_unlock(&trace_types_lock);
6440
6441         return ret;
6442 }
6443
6444 static ssize_t
6445 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6446                         size_t cnt, loff_t *ppos)
6447 {
6448         struct trace_array *tr = filp->private_data;
6449         char buf[MAX_TRACER_SIZE+1];
6450         int i;
6451         size_t ret;
6452         int err;
6453
6454         ret = cnt;
6455
6456         if (cnt > MAX_TRACER_SIZE)
6457                 cnt = MAX_TRACER_SIZE;
6458
6459         if (copy_from_user(buf, ubuf, cnt))
6460                 return -EFAULT;
6461
6462         buf[cnt] = 0;
6463
6464         /* strip ending whitespace. */
6465         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6466                 buf[i] = 0;
6467
6468         err = tracing_set_tracer(tr, buf);
6469         if (err)
6470                 return err;
6471
6472         *ppos += ret;
6473
6474         return ret;
6475 }
6476
6477 static ssize_t
6478 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6479                    size_t cnt, loff_t *ppos)
6480 {
6481         char buf[64];
6482         int r;
6483
6484         r = snprintf(buf, sizeof(buf), "%ld\n",
6485                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6486         if (r > sizeof(buf))
6487                 r = sizeof(buf);
6488         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6489 }
6490
6491 static ssize_t
6492 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6493                     size_t cnt, loff_t *ppos)
6494 {
6495         unsigned long val;
6496         int ret;
6497
6498         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6499         if (ret)
6500                 return ret;
6501
6502         *ptr = val * 1000;
6503
6504         return cnt;
6505 }
6506
6507 static ssize_t
6508 tracing_thresh_read(struct file *filp, char __user *ubuf,
6509                     size_t cnt, loff_t *ppos)
6510 {
6511         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6512 }
6513
6514 static ssize_t
6515 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6516                      size_t cnt, loff_t *ppos)
6517 {
6518         struct trace_array *tr = filp->private_data;
6519         int ret;
6520
6521         mutex_lock(&trace_types_lock);
6522         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6523         if (ret < 0)
6524                 goto out;
6525
6526         if (tr->current_trace->update_thresh) {
6527                 ret = tr->current_trace->update_thresh(tr);
6528                 if (ret < 0)
6529                         goto out;
6530         }
6531
6532         ret = cnt;
6533 out:
6534         mutex_unlock(&trace_types_lock);
6535
6536         return ret;
6537 }
6538
6539 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6540
6541 static ssize_t
6542 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6543                      size_t cnt, loff_t *ppos)
6544 {
6545         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6546 }
6547
6548 static ssize_t
6549 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6550                       size_t cnt, loff_t *ppos)
6551 {
6552         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6553 }
6554
6555 #endif
6556
6557 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6558 {
6559         struct trace_array *tr = inode->i_private;
6560         struct trace_iterator *iter;
6561         int ret;
6562
6563         ret = tracing_check_open_get_tr(tr);
6564         if (ret)
6565                 return ret;
6566
6567         mutex_lock(&trace_types_lock);
6568
6569         /* create a buffer to store the information to pass to userspace */
6570         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6571         if (!iter) {
6572                 ret = -ENOMEM;
6573                 __trace_array_put(tr);
6574                 goto out;
6575         }
6576
6577         trace_seq_init(&iter->seq);
6578         iter->trace = tr->current_trace;
6579
6580         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6581                 ret = -ENOMEM;
6582                 goto fail;
6583         }
6584
6585         /* trace pipe does not show start of buffer */
6586         cpumask_setall(iter->started);
6587
6588         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6589                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6590
6591         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6592         if (trace_clocks[tr->clock_id].in_ns)
6593                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6594
6595         iter->tr = tr;
6596         iter->array_buffer = &tr->array_buffer;
6597         iter->cpu_file = tracing_get_cpu(inode);
6598         mutex_init(&iter->mutex);
6599         filp->private_data = iter;
6600
6601         if (iter->trace->pipe_open)
6602                 iter->trace->pipe_open(iter);
6603
6604         nonseekable_open(inode, filp);
6605
6606         tr->trace_ref++;
6607 out:
6608         mutex_unlock(&trace_types_lock);
6609         return ret;
6610
6611 fail:
6612         kfree(iter);
6613         __trace_array_put(tr);
6614         mutex_unlock(&trace_types_lock);
6615         return ret;
6616 }
6617
6618 static int tracing_release_pipe(struct inode *inode, struct file *file)
6619 {
6620         struct trace_iterator *iter = file->private_data;
6621         struct trace_array *tr = inode->i_private;
6622
6623         mutex_lock(&trace_types_lock);
6624
6625         tr->trace_ref--;
6626
6627         if (iter->trace->pipe_close)
6628                 iter->trace->pipe_close(iter);
6629
6630         mutex_unlock(&trace_types_lock);
6631
6632         free_cpumask_var(iter->started);
6633         mutex_destroy(&iter->mutex);
6634         kfree(iter);
6635
6636         trace_array_put(tr);
6637
6638         return 0;
6639 }
6640
6641 static __poll_t
6642 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6643 {
6644         struct trace_array *tr = iter->tr;
6645
6646         /* Iterators are static, they should be filled or empty */
6647         if (trace_buffer_iter(iter, iter->cpu_file))
6648                 return EPOLLIN | EPOLLRDNORM;
6649
6650         if (tr->trace_flags & TRACE_ITER_BLOCK)
6651                 /*
6652                  * Always select as readable when in blocking mode
6653                  */
6654                 return EPOLLIN | EPOLLRDNORM;
6655         else
6656                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6657                                              filp, poll_table);
6658 }
6659
6660 static __poll_t
6661 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6662 {
6663         struct trace_iterator *iter = filp->private_data;
6664
6665         return trace_poll(iter, filp, poll_table);
6666 }
6667
6668 /* Must be called with iter->mutex held. */
6669 static int tracing_wait_pipe(struct file *filp)
6670 {
6671         struct trace_iterator *iter = filp->private_data;
6672         int ret;
6673
6674         while (trace_empty(iter)) {
6675
6676                 if ((filp->f_flags & O_NONBLOCK)) {
6677                         return -EAGAIN;
6678                 }
6679
6680                 /*
6681                  * We block until we read something and tracing is disabled.
6682                  * We still block if tracing is disabled, but we have never
6683                  * read anything. This allows a user to cat this file, and
6684                  * then enable tracing. But after we have read something,
6685                  * we give an EOF when tracing is again disabled.
6686                  *
6687                  * iter->pos will be 0 if we haven't read anything.
6688                  */
6689                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6690                         break;
6691
6692                 mutex_unlock(&iter->mutex);
6693
6694                 ret = wait_on_pipe(iter, 0);
6695
6696                 mutex_lock(&iter->mutex);
6697
6698                 if (ret)
6699                         return ret;
6700         }
6701
6702         return 1;
6703 }
6704
6705 /*
6706  * Consumer reader.
6707  */
6708 static ssize_t
6709 tracing_read_pipe(struct file *filp, char __user *ubuf,
6710                   size_t cnt, loff_t *ppos)
6711 {
6712         struct trace_iterator *iter = filp->private_data;
6713         ssize_t sret;
6714
6715         /*
6716          * Avoid more than one consumer on a single file descriptor
6717          * This is just a matter of traces coherency, the ring buffer itself
6718          * is protected.
6719          */
6720         mutex_lock(&iter->mutex);
6721
6722         /* return any leftover data */
6723         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6724         if (sret != -EBUSY)
6725                 goto out;
6726
6727         trace_seq_init(&iter->seq);
6728
6729         if (iter->trace->read) {
6730                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6731                 if (sret)
6732                         goto out;
6733         }
6734
6735 waitagain:
6736         sret = tracing_wait_pipe(filp);
6737         if (sret <= 0)
6738                 goto out;
6739
6740         /* stop when tracing is finished */
6741         if (trace_empty(iter)) {
6742                 sret = 0;
6743                 goto out;
6744         }
6745
6746         if (cnt >= PAGE_SIZE)
6747                 cnt = PAGE_SIZE - 1;
6748
6749         /* reset all but tr, trace, and overruns */
6750         memset(&iter->seq, 0,
6751                sizeof(struct trace_iterator) -
6752                offsetof(struct trace_iterator, seq));
6753         cpumask_clear(iter->started);
6754         trace_seq_init(&iter->seq);
6755         iter->pos = -1;
6756
6757         trace_event_read_lock();
6758         trace_access_lock(iter->cpu_file);
6759         while (trace_find_next_entry_inc(iter) != NULL) {
6760                 enum print_line_t ret;
6761                 int save_len = iter->seq.seq.len;
6762
6763                 ret = print_trace_line(iter);
6764                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6765                         /* don't print partial lines */
6766                         iter->seq.seq.len = save_len;
6767                         break;
6768                 }
6769                 if (ret != TRACE_TYPE_NO_CONSUME)
6770                         trace_consume(iter);
6771
6772                 if (trace_seq_used(&iter->seq) >= cnt)
6773                         break;
6774
6775                 /*
6776                  * Setting the full flag means we reached the trace_seq buffer
6777                  * size and we should leave by partial output condition above.
6778                  * One of the trace_seq_* functions is not used properly.
6779                  */
6780                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6781                           iter->ent->type);
6782         }
6783         trace_access_unlock(iter->cpu_file);
6784         trace_event_read_unlock();
6785
6786         /* Now copy what we have to the user */
6787         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6788         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6789                 trace_seq_init(&iter->seq);
6790
6791         /*
6792          * If there was nothing to send to user, in spite of consuming trace
6793          * entries, go back to wait for more entries.
6794          */
6795         if (sret == -EBUSY)
6796                 goto waitagain;
6797
6798 out:
6799         mutex_unlock(&iter->mutex);
6800
6801         return sret;
6802 }
6803
6804 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6805                                      unsigned int idx)
6806 {
6807         __free_page(spd->pages[idx]);
6808 }
6809
6810 static size_t
6811 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6812 {
6813         size_t count;
6814         int save_len;
6815         int ret;
6816
6817         /* Seq buffer is page-sized, exactly what we need. */
6818         for (;;) {
6819                 save_len = iter->seq.seq.len;
6820                 ret = print_trace_line(iter);
6821
6822                 if (trace_seq_has_overflowed(&iter->seq)) {
6823                         iter->seq.seq.len = save_len;
6824                         break;
6825                 }
6826
6827                 /*
6828                  * This should not be hit, because it should only
6829                  * be set if the iter->seq overflowed. But check it
6830                  * anyway to be safe.
6831                  */
6832                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6833                         iter->seq.seq.len = save_len;
6834                         break;
6835                 }
6836
6837                 count = trace_seq_used(&iter->seq) - save_len;
6838                 if (rem < count) {
6839                         rem = 0;
6840                         iter->seq.seq.len = save_len;
6841                         break;
6842                 }
6843
6844                 if (ret != TRACE_TYPE_NO_CONSUME)
6845                         trace_consume(iter);
6846                 rem -= count;
6847                 if (!trace_find_next_entry_inc(iter))   {
6848                         rem = 0;
6849                         iter->ent = NULL;
6850                         break;
6851                 }
6852         }
6853
6854         return rem;
6855 }
6856
6857 static ssize_t tracing_splice_read_pipe(struct file *filp,
6858                                         loff_t *ppos,
6859                                         struct pipe_inode_info *pipe,
6860                                         size_t len,
6861                                         unsigned int flags)
6862 {
6863         struct page *pages_def[PIPE_DEF_BUFFERS];
6864         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6865         struct trace_iterator *iter = filp->private_data;
6866         struct splice_pipe_desc spd = {
6867                 .pages          = pages_def,
6868                 .partial        = partial_def,
6869                 .nr_pages       = 0, /* This gets updated below. */
6870                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6871                 .ops            = &default_pipe_buf_ops,
6872                 .spd_release    = tracing_spd_release_pipe,
6873         };
6874         ssize_t ret;
6875         size_t rem;
6876         unsigned int i;
6877
6878         if (splice_grow_spd(pipe, &spd))
6879                 return -ENOMEM;
6880
6881         mutex_lock(&iter->mutex);
6882
6883         if (iter->trace->splice_read) {
6884                 ret = iter->trace->splice_read(iter, filp,
6885                                                ppos, pipe, len, flags);
6886                 if (ret)
6887                         goto out_err;
6888         }
6889
6890         ret = tracing_wait_pipe(filp);
6891         if (ret <= 0)
6892                 goto out_err;
6893
6894         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6895                 ret = -EFAULT;
6896                 goto out_err;
6897         }
6898
6899         trace_event_read_lock();
6900         trace_access_lock(iter->cpu_file);
6901
6902         /* Fill as many pages as possible. */
6903         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6904                 spd.pages[i] = alloc_page(GFP_KERNEL);
6905                 if (!spd.pages[i])
6906                         break;
6907
6908                 rem = tracing_fill_pipe_page(rem, iter);
6909
6910                 /* Copy the data into the page, so we can start over. */
6911                 ret = trace_seq_to_buffer(&iter->seq,
6912                                           page_address(spd.pages[i]),
6913                                           trace_seq_used(&iter->seq));
6914                 if (ret < 0) {
6915                         __free_page(spd.pages[i]);
6916                         break;
6917                 }
6918                 spd.partial[i].offset = 0;
6919                 spd.partial[i].len = trace_seq_used(&iter->seq);
6920
6921                 trace_seq_init(&iter->seq);
6922         }
6923
6924         trace_access_unlock(iter->cpu_file);
6925         trace_event_read_unlock();
6926         mutex_unlock(&iter->mutex);
6927
6928         spd.nr_pages = i;
6929
6930         if (i)
6931                 ret = splice_to_pipe(pipe, &spd);
6932         else
6933                 ret = 0;
6934 out:
6935         splice_shrink_spd(&spd);
6936         return ret;
6937
6938 out_err:
6939         mutex_unlock(&iter->mutex);
6940         goto out;
6941 }
6942
6943 static ssize_t
6944 tracing_entries_read(struct file *filp, char __user *ubuf,
6945                      size_t cnt, loff_t *ppos)
6946 {
6947         struct inode *inode = file_inode(filp);
6948         struct trace_array *tr = inode->i_private;
6949         int cpu = tracing_get_cpu(inode);
6950         char buf[64];
6951         int r = 0;
6952         ssize_t ret;
6953
6954         mutex_lock(&trace_types_lock);
6955
6956         if (cpu == RING_BUFFER_ALL_CPUS) {
6957                 int cpu, buf_size_same;
6958                 unsigned long size;
6959
6960                 size = 0;
6961                 buf_size_same = 1;
6962                 /* check if all cpu sizes are same */
6963                 for_each_tracing_cpu(cpu) {
6964                         /* fill in the size from first enabled cpu */
6965                         if (size == 0)
6966                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6967                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6968                                 buf_size_same = 0;
6969                                 break;
6970                         }
6971                 }
6972
6973                 if (buf_size_same) {
6974                         if (!ring_buffer_expanded)
6975                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6976                                             size >> 10,
6977                                             trace_buf_size >> 10);
6978                         else
6979                                 r = sprintf(buf, "%lu\n", size >> 10);
6980                 } else
6981                         r = sprintf(buf, "X\n");
6982         } else
6983                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6984
6985         mutex_unlock(&trace_types_lock);
6986
6987         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6988         return ret;
6989 }
6990
6991 static ssize_t
6992 tracing_entries_write(struct file *filp, const char __user *ubuf,
6993                       size_t cnt, loff_t *ppos)
6994 {
6995         struct inode *inode = file_inode(filp);
6996         struct trace_array *tr = inode->i_private;
6997         unsigned long val;
6998         int ret;
6999
7000         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7001         if (ret)
7002                 return ret;
7003
7004         /* must have at least 1 entry */
7005         if (!val)
7006                 return -EINVAL;
7007
7008         /* value is in KB */
7009         val <<= 10;
7010         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7011         if (ret < 0)
7012                 return ret;
7013
7014         *ppos += cnt;
7015
7016         return cnt;
7017 }
7018
7019 static ssize_t
7020 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7021                                 size_t cnt, loff_t *ppos)
7022 {
7023         struct trace_array *tr = filp->private_data;
7024         char buf[64];
7025         int r, cpu;
7026         unsigned long size = 0, expanded_size = 0;
7027
7028         mutex_lock(&trace_types_lock);
7029         for_each_tracing_cpu(cpu) {
7030                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7031                 if (!ring_buffer_expanded)
7032                         expanded_size += trace_buf_size >> 10;
7033         }
7034         if (ring_buffer_expanded)
7035                 r = sprintf(buf, "%lu\n", size);
7036         else
7037                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7038         mutex_unlock(&trace_types_lock);
7039
7040         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7041 }
7042
7043 static ssize_t
7044 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7045                           size_t cnt, loff_t *ppos)
7046 {
7047         /*
7048          * There is no need to read what the user has written, this function
7049          * is just to make sure that there is no error when "echo" is used
7050          */
7051
7052         *ppos += cnt;
7053
7054         return cnt;
7055 }
7056
7057 static int
7058 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7059 {
7060         struct trace_array *tr = inode->i_private;
7061
7062         /* disable tracing ? */
7063         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7064                 tracer_tracing_off(tr);
7065         /* resize the ring buffer to 0 */
7066         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7067
7068         trace_array_put(tr);
7069
7070         return 0;
7071 }
7072
7073 static ssize_t
7074 tracing_mark_write(struct file *filp, const char __user *ubuf,
7075                                         size_t cnt, loff_t *fpos)
7076 {
7077         struct trace_array *tr = filp->private_data;
7078         struct ring_buffer_event *event;
7079         enum event_trigger_type tt = ETT_NONE;
7080         struct trace_buffer *buffer;
7081         struct print_entry *entry;
7082         ssize_t written;
7083         int size;
7084         int len;
7085
7086 /* Used in tracing_mark_raw_write() as well */
7087 #define FAULTED_STR "<faulted>"
7088 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7089
7090         if (tracing_disabled)
7091                 return -EINVAL;
7092
7093         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7094                 return -EINVAL;
7095
7096         if (cnt > TRACE_BUF_SIZE)
7097                 cnt = TRACE_BUF_SIZE;
7098
7099         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7100
7101         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7102
7103         /* If less than "<faulted>", then make sure we can still add that */
7104         if (cnt < FAULTED_SIZE)
7105                 size += FAULTED_SIZE - cnt;
7106
7107         buffer = tr->array_buffer.buffer;
7108         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7109                                             tracing_gen_ctx());
7110         if (unlikely(!event))
7111                 /* Ring buffer disabled, return as if not open for write */
7112                 return -EBADF;
7113
7114         entry = ring_buffer_event_data(event);
7115         entry->ip = _THIS_IP_;
7116
7117         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7118         if (len) {
7119                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7120                 cnt = FAULTED_SIZE;
7121                 written = -EFAULT;
7122         } else
7123                 written = cnt;
7124
7125         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7126                 /* do not add \n before testing triggers, but add \0 */
7127                 entry->buf[cnt] = '\0';
7128                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7129         }
7130
7131         if (entry->buf[cnt - 1] != '\n') {
7132                 entry->buf[cnt] = '\n';
7133                 entry->buf[cnt + 1] = '\0';
7134         } else
7135                 entry->buf[cnt] = '\0';
7136
7137         if (static_branch_unlikely(&trace_marker_exports_enabled))
7138                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7139         __buffer_unlock_commit(buffer, event);
7140
7141         if (tt)
7142                 event_triggers_post_call(tr->trace_marker_file, tt);
7143
7144         if (written > 0)
7145                 *fpos += written;
7146
7147         return written;
7148 }
7149
7150 /* Limit it for now to 3K (including tag) */
7151 #define RAW_DATA_MAX_SIZE (1024*3)
7152
7153 static ssize_t
7154 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7155                                         size_t cnt, loff_t *fpos)
7156 {
7157         struct trace_array *tr = filp->private_data;
7158         struct ring_buffer_event *event;
7159         struct trace_buffer *buffer;
7160         struct raw_data_entry *entry;
7161         ssize_t written;
7162         int size;
7163         int len;
7164
7165 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7166
7167         if (tracing_disabled)
7168                 return -EINVAL;
7169
7170         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7171                 return -EINVAL;
7172
7173         /* The marker must at least have a tag id */
7174         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7175                 return -EINVAL;
7176
7177         if (cnt > TRACE_BUF_SIZE)
7178                 cnt = TRACE_BUF_SIZE;
7179
7180         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7181
7182         size = sizeof(*entry) + cnt;
7183         if (cnt < FAULT_SIZE_ID)
7184                 size += FAULT_SIZE_ID - cnt;
7185
7186         buffer = tr->array_buffer.buffer;
7187         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7188                                             tracing_gen_ctx());
7189         if (!event)
7190                 /* Ring buffer disabled, return as if not open for write */
7191                 return -EBADF;
7192
7193         entry = ring_buffer_event_data(event);
7194
7195         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7196         if (len) {
7197                 entry->id = -1;
7198                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7199                 written = -EFAULT;
7200         } else
7201                 written = cnt;
7202
7203         __buffer_unlock_commit(buffer, event);
7204
7205         if (written > 0)
7206                 *fpos += written;
7207
7208         return written;
7209 }
7210
7211 static int tracing_clock_show(struct seq_file *m, void *v)
7212 {
7213         struct trace_array *tr = m->private;
7214         int i;
7215
7216         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7217                 seq_printf(m,
7218                         "%s%s%s%s", i ? " " : "",
7219                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7220                         i == tr->clock_id ? "]" : "");
7221         seq_putc(m, '\n');
7222
7223         return 0;
7224 }
7225
7226 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7227 {
7228         int i;
7229
7230         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7231                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7232                         break;
7233         }
7234         if (i == ARRAY_SIZE(trace_clocks))
7235                 return -EINVAL;
7236
7237         mutex_lock(&trace_types_lock);
7238
7239         tr->clock_id = i;
7240
7241         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7242
7243         /*
7244          * New clock may not be consistent with the previous clock.
7245          * Reset the buffer so that it doesn't have incomparable timestamps.
7246          */
7247         tracing_reset_online_cpus(&tr->array_buffer);
7248
7249 #ifdef CONFIG_TRACER_MAX_TRACE
7250         if (tr->max_buffer.buffer)
7251                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7252         tracing_reset_online_cpus(&tr->max_buffer);
7253 #endif
7254
7255         mutex_unlock(&trace_types_lock);
7256
7257         return 0;
7258 }
7259
7260 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7261                                    size_t cnt, loff_t *fpos)
7262 {
7263         struct seq_file *m = filp->private_data;
7264         struct trace_array *tr = m->private;
7265         char buf[64];
7266         const char *clockstr;
7267         int ret;
7268
7269         if (cnt >= sizeof(buf))
7270                 return -EINVAL;
7271
7272         if (copy_from_user(buf, ubuf, cnt))
7273                 return -EFAULT;
7274
7275         buf[cnt] = 0;
7276
7277         clockstr = strstrip(buf);
7278
7279         ret = tracing_set_clock(tr, clockstr);
7280         if (ret)
7281                 return ret;
7282
7283         *fpos += cnt;
7284
7285         return cnt;
7286 }
7287
7288 static int tracing_clock_open(struct inode *inode, struct file *file)
7289 {
7290         struct trace_array *tr = inode->i_private;
7291         int ret;
7292
7293         ret = tracing_check_open_get_tr(tr);
7294         if (ret)
7295                 return ret;
7296
7297         ret = single_open(file, tracing_clock_show, inode->i_private);
7298         if (ret < 0)
7299                 trace_array_put(tr);
7300
7301         return ret;
7302 }
7303
7304 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7305 {
7306         struct trace_array *tr = m->private;
7307
7308         mutex_lock(&trace_types_lock);
7309
7310         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7311                 seq_puts(m, "delta [absolute]\n");
7312         else
7313                 seq_puts(m, "[delta] absolute\n");
7314
7315         mutex_unlock(&trace_types_lock);
7316
7317         return 0;
7318 }
7319
7320 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7321 {
7322         struct trace_array *tr = inode->i_private;
7323         int ret;
7324
7325         ret = tracing_check_open_get_tr(tr);
7326         if (ret)
7327                 return ret;
7328
7329         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7330         if (ret < 0)
7331                 trace_array_put(tr);
7332
7333         return ret;
7334 }
7335
7336 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7337 {
7338         if (rbe == this_cpu_read(trace_buffered_event))
7339                 return ring_buffer_time_stamp(buffer);
7340
7341         return ring_buffer_event_time_stamp(buffer, rbe);
7342 }
7343
7344 /*
7345  * Set or disable using the per CPU trace_buffer_event when possible.
7346  */
7347 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7348 {
7349         int ret = 0;
7350
7351         mutex_lock(&trace_types_lock);
7352
7353         if (set && tr->no_filter_buffering_ref++)
7354                 goto out;
7355
7356         if (!set) {
7357                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7358                         ret = -EINVAL;
7359                         goto out;
7360                 }
7361
7362                 --tr->no_filter_buffering_ref;
7363         }
7364  out:
7365         mutex_unlock(&trace_types_lock);
7366
7367         return ret;
7368 }
7369
7370 struct ftrace_buffer_info {
7371         struct trace_iterator   iter;
7372         void                    *spare;
7373         unsigned int            spare_cpu;
7374         unsigned int            read;
7375 };
7376
7377 #ifdef CONFIG_TRACER_SNAPSHOT
7378 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7379 {
7380         struct trace_array *tr = inode->i_private;
7381         struct trace_iterator *iter;
7382         struct seq_file *m;
7383         int ret;
7384
7385         ret = tracing_check_open_get_tr(tr);
7386         if (ret)
7387                 return ret;
7388
7389         if (file->f_mode & FMODE_READ) {
7390                 iter = __tracing_open(inode, file, true);
7391                 if (IS_ERR(iter))
7392                         ret = PTR_ERR(iter);
7393         } else {
7394                 /* Writes still need the seq_file to hold the private data */
7395                 ret = -ENOMEM;
7396                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7397                 if (!m)
7398                         goto out;
7399                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7400                 if (!iter) {
7401                         kfree(m);
7402                         goto out;
7403                 }
7404                 ret = 0;
7405
7406                 iter->tr = tr;
7407                 iter->array_buffer = &tr->max_buffer;
7408                 iter->cpu_file = tracing_get_cpu(inode);
7409                 m->private = iter;
7410                 file->private_data = m;
7411         }
7412 out:
7413         if (ret < 0)
7414                 trace_array_put(tr);
7415
7416         return ret;
7417 }
7418
7419 static ssize_t
7420 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7421                        loff_t *ppos)
7422 {
7423         struct seq_file *m = filp->private_data;
7424         struct trace_iterator *iter = m->private;
7425         struct trace_array *tr = iter->tr;
7426         unsigned long val;
7427         int ret;
7428
7429         ret = tracing_update_buffers();
7430         if (ret < 0)
7431                 return ret;
7432
7433         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7434         if (ret)
7435                 return ret;
7436
7437         mutex_lock(&trace_types_lock);
7438
7439         if (tr->current_trace->use_max_tr) {
7440                 ret = -EBUSY;
7441                 goto out;
7442         }
7443
7444         local_irq_disable();
7445         arch_spin_lock(&tr->max_lock);
7446         if (tr->cond_snapshot)
7447                 ret = -EBUSY;
7448         arch_spin_unlock(&tr->max_lock);
7449         local_irq_enable();
7450         if (ret)
7451                 goto out;
7452
7453         switch (val) {
7454         case 0:
7455                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7456                         ret = -EINVAL;
7457                         break;
7458                 }
7459                 if (tr->allocated_snapshot)
7460                         free_snapshot(tr);
7461                 break;
7462         case 1:
7463 /* Only allow per-cpu swap if the ring buffer supports it */
7464 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7465                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7466                         ret = -EINVAL;
7467                         break;
7468                 }
7469 #endif
7470                 if (tr->allocated_snapshot)
7471                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7472                                         &tr->array_buffer, iter->cpu_file);
7473                 else
7474                         ret = tracing_alloc_snapshot_instance(tr);
7475                 if (ret < 0)
7476                         break;
7477                 local_irq_disable();
7478                 /* Now, we're going to swap */
7479                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7480                         update_max_tr(tr, current, smp_processor_id(), NULL);
7481                 else
7482                         update_max_tr_single(tr, current, iter->cpu_file);
7483                 local_irq_enable();
7484                 break;
7485         default:
7486                 if (tr->allocated_snapshot) {
7487                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7488                                 tracing_reset_online_cpus(&tr->max_buffer);
7489                         else
7490                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7491                 }
7492                 break;
7493         }
7494
7495         if (ret >= 0) {
7496                 *ppos += cnt;
7497                 ret = cnt;
7498         }
7499 out:
7500         mutex_unlock(&trace_types_lock);
7501         return ret;
7502 }
7503
7504 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7505 {
7506         struct seq_file *m = file->private_data;
7507         int ret;
7508
7509         ret = tracing_release(inode, file);
7510
7511         if (file->f_mode & FMODE_READ)
7512                 return ret;
7513
7514         /* If write only, the seq_file is just a stub */
7515         if (m)
7516                 kfree(m->private);
7517         kfree(m);
7518
7519         return 0;
7520 }
7521
7522 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7523 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7524                                     size_t count, loff_t *ppos);
7525 static int tracing_buffers_release(struct inode *inode, struct file *file);
7526 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7527                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7528
7529 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7530 {
7531         struct ftrace_buffer_info *info;
7532         int ret;
7533
7534         /* The following checks for tracefs lockdown */
7535         ret = tracing_buffers_open(inode, filp);
7536         if (ret < 0)
7537                 return ret;
7538
7539         info = filp->private_data;
7540
7541         if (info->iter.trace->use_max_tr) {
7542                 tracing_buffers_release(inode, filp);
7543                 return -EBUSY;
7544         }
7545
7546         info->iter.snapshot = true;
7547         info->iter.array_buffer = &info->iter.tr->max_buffer;
7548
7549         return ret;
7550 }
7551
7552 #endif /* CONFIG_TRACER_SNAPSHOT */
7553
7554
7555 static const struct file_operations tracing_thresh_fops = {
7556         .open           = tracing_open_generic,
7557         .read           = tracing_thresh_read,
7558         .write          = tracing_thresh_write,
7559         .llseek         = generic_file_llseek,
7560 };
7561
7562 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7563 static const struct file_operations tracing_max_lat_fops = {
7564         .open           = tracing_open_generic,
7565         .read           = tracing_max_lat_read,
7566         .write          = tracing_max_lat_write,
7567         .llseek         = generic_file_llseek,
7568 };
7569 #endif
7570
7571 static const struct file_operations set_tracer_fops = {
7572         .open           = tracing_open_generic,
7573         .read           = tracing_set_trace_read,
7574         .write          = tracing_set_trace_write,
7575         .llseek         = generic_file_llseek,
7576 };
7577
7578 static const struct file_operations tracing_pipe_fops = {
7579         .open           = tracing_open_pipe,
7580         .poll           = tracing_poll_pipe,
7581         .read           = tracing_read_pipe,
7582         .splice_read    = tracing_splice_read_pipe,
7583         .release        = tracing_release_pipe,
7584         .llseek         = no_llseek,
7585 };
7586
7587 static const struct file_operations tracing_entries_fops = {
7588         .open           = tracing_open_generic_tr,
7589         .read           = tracing_entries_read,
7590         .write          = tracing_entries_write,
7591         .llseek         = generic_file_llseek,
7592         .release        = tracing_release_generic_tr,
7593 };
7594
7595 static const struct file_operations tracing_total_entries_fops = {
7596         .open           = tracing_open_generic_tr,
7597         .read           = tracing_total_entries_read,
7598         .llseek         = generic_file_llseek,
7599         .release        = tracing_release_generic_tr,
7600 };
7601
7602 static const struct file_operations tracing_free_buffer_fops = {
7603         .open           = tracing_open_generic_tr,
7604         .write          = tracing_free_buffer_write,
7605         .release        = tracing_free_buffer_release,
7606 };
7607
7608 static const struct file_operations tracing_mark_fops = {
7609         .open           = tracing_open_generic_tr,
7610         .write          = tracing_mark_write,
7611         .llseek         = generic_file_llseek,
7612         .release        = tracing_release_generic_tr,
7613 };
7614
7615 static const struct file_operations tracing_mark_raw_fops = {
7616         .open           = tracing_open_generic_tr,
7617         .write          = tracing_mark_raw_write,
7618         .llseek         = generic_file_llseek,
7619         .release        = tracing_release_generic_tr,
7620 };
7621
7622 static const struct file_operations trace_clock_fops = {
7623         .open           = tracing_clock_open,
7624         .read           = seq_read,
7625         .llseek         = seq_lseek,
7626         .release        = tracing_single_release_tr,
7627         .write          = tracing_clock_write,
7628 };
7629
7630 static const struct file_operations trace_time_stamp_mode_fops = {
7631         .open           = tracing_time_stamp_mode_open,
7632         .read           = seq_read,
7633         .llseek         = seq_lseek,
7634         .release        = tracing_single_release_tr,
7635 };
7636
7637 #ifdef CONFIG_TRACER_SNAPSHOT
7638 static const struct file_operations snapshot_fops = {
7639         .open           = tracing_snapshot_open,
7640         .read           = seq_read,
7641         .write          = tracing_snapshot_write,
7642         .llseek         = tracing_lseek,
7643         .release        = tracing_snapshot_release,
7644 };
7645
7646 static const struct file_operations snapshot_raw_fops = {
7647         .open           = snapshot_raw_open,
7648         .read           = tracing_buffers_read,
7649         .release        = tracing_buffers_release,
7650         .splice_read    = tracing_buffers_splice_read,
7651         .llseek         = no_llseek,
7652 };
7653
7654 #endif /* CONFIG_TRACER_SNAPSHOT */
7655
7656 /*
7657  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7658  * @filp: The active open file structure
7659  * @ubuf: The userspace provided buffer to read value into
7660  * @cnt: The maximum number of bytes to read
7661  * @ppos: The current "file" position
7662  *
7663  * This function implements the write interface for a struct trace_min_max_param.
7664  * The filp->private_data must point to a trace_min_max_param structure that
7665  * defines where to write the value, the min and the max acceptable values,
7666  * and a lock to protect the write.
7667  */
7668 static ssize_t
7669 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7670 {
7671         struct trace_min_max_param *param = filp->private_data;
7672         u64 val;
7673         int err;
7674
7675         if (!param)
7676                 return -EFAULT;
7677
7678         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7679         if (err)
7680                 return err;
7681
7682         if (param->lock)
7683                 mutex_lock(param->lock);
7684
7685         if (param->min && val < *param->min)
7686                 err = -EINVAL;
7687
7688         if (param->max && val > *param->max)
7689                 err = -EINVAL;
7690
7691         if (!err)
7692                 *param->val = val;
7693
7694         if (param->lock)
7695                 mutex_unlock(param->lock);
7696
7697         if (err)
7698                 return err;
7699
7700         return cnt;
7701 }
7702
7703 /*
7704  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7705  * @filp: The active open file structure
7706  * @ubuf: The userspace provided buffer to read value into
7707  * @cnt: The maximum number of bytes to read
7708  * @ppos: The current "file" position
7709  *
7710  * This function implements the read interface for a struct trace_min_max_param.
7711  * The filp->private_data must point to a trace_min_max_param struct with valid
7712  * data.
7713  */
7714 static ssize_t
7715 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7716 {
7717         struct trace_min_max_param *param = filp->private_data;
7718         char buf[U64_STR_SIZE];
7719         int len;
7720         u64 val;
7721
7722         if (!param)
7723                 return -EFAULT;
7724
7725         val = *param->val;
7726
7727         if (cnt > sizeof(buf))
7728                 cnt = sizeof(buf);
7729
7730         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7731
7732         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7733 }
7734
7735 const struct file_operations trace_min_max_fops = {
7736         .open           = tracing_open_generic,
7737         .read           = trace_min_max_read,
7738         .write          = trace_min_max_write,
7739 };
7740
7741 #define TRACING_LOG_ERRS_MAX    8
7742 #define TRACING_LOG_LOC_MAX     128
7743
7744 #define CMD_PREFIX "  Command: "
7745
7746 struct err_info {
7747         const char      **errs; /* ptr to loc-specific array of err strings */
7748         u8              type;   /* index into errs -> specific err string */
7749         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7750         u64             ts;
7751 };
7752
7753 struct tracing_log_err {
7754         struct list_head        list;
7755         struct err_info         info;
7756         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7757         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7758 };
7759
7760 static DEFINE_MUTEX(tracing_err_log_lock);
7761
7762 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7763 {
7764         struct tracing_log_err *err;
7765
7766         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7767                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7768                 if (!err)
7769                         err = ERR_PTR(-ENOMEM);
7770                 else
7771                         tr->n_err_log_entries++;
7772
7773                 return err;
7774         }
7775
7776         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7777         list_del(&err->list);
7778
7779         return err;
7780 }
7781
7782 /**
7783  * err_pos - find the position of a string within a command for error careting
7784  * @cmd: The tracing command that caused the error
7785  * @str: The string to position the caret at within @cmd
7786  *
7787  * Finds the position of the first occurrence of @str within @cmd.  The
7788  * return value can be passed to tracing_log_err() for caret placement
7789  * within @cmd.
7790  *
7791  * Returns the index within @cmd of the first occurrence of @str or 0
7792  * if @str was not found.
7793  */
7794 unsigned int err_pos(char *cmd, const char *str)
7795 {
7796         char *found;
7797
7798         if (WARN_ON(!strlen(cmd)))
7799                 return 0;
7800
7801         found = strstr(cmd, str);
7802         if (found)
7803                 return found - cmd;
7804
7805         return 0;
7806 }
7807
7808 /**
7809  * tracing_log_err - write an error to the tracing error log
7810  * @tr: The associated trace array for the error (NULL for top level array)
7811  * @loc: A string describing where the error occurred
7812  * @cmd: The tracing command that caused the error
7813  * @errs: The array of loc-specific static error strings
7814  * @type: The index into errs[], which produces the specific static err string
7815  * @pos: The position the caret should be placed in the cmd
7816  *
7817  * Writes an error into tracing/error_log of the form:
7818  *
7819  * <loc>: error: <text>
7820  *   Command: <cmd>
7821  *              ^
7822  *
7823  * tracing/error_log is a small log file containing the last
7824  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7825  * unless there has been a tracing error, and the error log can be
7826  * cleared and have its memory freed by writing the empty string in
7827  * truncation mode to it i.e. echo > tracing/error_log.
7828  *
7829  * NOTE: the @errs array along with the @type param are used to
7830  * produce a static error string - this string is not copied and saved
7831  * when the error is logged - only a pointer to it is saved.  See
7832  * existing callers for examples of how static strings are typically
7833  * defined for use with tracing_log_err().
7834  */
7835 void tracing_log_err(struct trace_array *tr,
7836                      const char *loc, const char *cmd,
7837                      const char **errs, u8 type, u8 pos)
7838 {
7839         struct tracing_log_err *err;
7840
7841         if (!tr)
7842                 tr = &global_trace;
7843
7844         mutex_lock(&tracing_err_log_lock);
7845         err = get_tracing_log_err(tr);
7846         if (PTR_ERR(err) == -ENOMEM) {
7847                 mutex_unlock(&tracing_err_log_lock);
7848                 return;
7849         }
7850
7851         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7852         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7853
7854         err->info.errs = errs;
7855         err->info.type = type;
7856         err->info.pos = pos;
7857         err->info.ts = local_clock();
7858
7859         list_add_tail(&err->list, &tr->err_log);
7860         mutex_unlock(&tracing_err_log_lock);
7861 }
7862
7863 static void clear_tracing_err_log(struct trace_array *tr)
7864 {
7865         struct tracing_log_err *err, *next;
7866
7867         mutex_lock(&tracing_err_log_lock);
7868         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7869                 list_del(&err->list);
7870                 kfree(err);
7871         }
7872
7873         tr->n_err_log_entries = 0;
7874         mutex_unlock(&tracing_err_log_lock);
7875 }
7876
7877 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7878 {
7879         struct trace_array *tr = m->private;
7880
7881         mutex_lock(&tracing_err_log_lock);
7882
7883         return seq_list_start(&tr->err_log, *pos);
7884 }
7885
7886 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7887 {
7888         struct trace_array *tr = m->private;
7889
7890         return seq_list_next(v, &tr->err_log, pos);
7891 }
7892
7893 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7894 {
7895         mutex_unlock(&tracing_err_log_lock);
7896 }
7897
7898 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7899 {
7900         u8 i;
7901
7902         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7903                 seq_putc(m, ' ');
7904         for (i = 0; i < pos; i++)
7905                 seq_putc(m, ' ');
7906         seq_puts(m, "^\n");
7907 }
7908
7909 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7910 {
7911         struct tracing_log_err *err = v;
7912
7913         if (err) {
7914                 const char *err_text = err->info.errs[err->info.type];
7915                 u64 sec = err->info.ts;
7916                 u32 nsec;
7917
7918                 nsec = do_div(sec, NSEC_PER_SEC);
7919                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7920                            err->loc, err_text);
7921                 seq_printf(m, "%s", err->cmd);
7922                 tracing_err_log_show_pos(m, err->info.pos);
7923         }
7924
7925         return 0;
7926 }
7927
7928 static const struct seq_operations tracing_err_log_seq_ops = {
7929         .start  = tracing_err_log_seq_start,
7930         .next   = tracing_err_log_seq_next,
7931         .stop   = tracing_err_log_seq_stop,
7932         .show   = tracing_err_log_seq_show
7933 };
7934
7935 static int tracing_err_log_open(struct inode *inode, struct file *file)
7936 {
7937         struct trace_array *tr = inode->i_private;
7938         int ret = 0;
7939
7940         ret = tracing_check_open_get_tr(tr);
7941         if (ret)
7942                 return ret;
7943
7944         /* If this file was opened for write, then erase contents */
7945         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7946                 clear_tracing_err_log(tr);
7947
7948         if (file->f_mode & FMODE_READ) {
7949                 ret = seq_open(file, &tracing_err_log_seq_ops);
7950                 if (!ret) {
7951                         struct seq_file *m = file->private_data;
7952                         m->private = tr;
7953                 } else {
7954                         trace_array_put(tr);
7955                 }
7956         }
7957         return ret;
7958 }
7959
7960 static ssize_t tracing_err_log_write(struct file *file,
7961                                      const char __user *buffer,
7962                                      size_t count, loff_t *ppos)
7963 {
7964         return count;
7965 }
7966
7967 static int tracing_err_log_release(struct inode *inode, struct file *file)
7968 {
7969         struct trace_array *tr = inode->i_private;
7970
7971         trace_array_put(tr);
7972
7973         if (file->f_mode & FMODE_READ)
7974                 seq_release(inode, file);
7975
7976         return 0;
7977 }
7978
7979 static const struct file_operations tracing_err_log_fops = {
7980         .open           = tracing_err_log_open,
7981         .write          = tracing_err_log_write,
7982         .read           = seq_read,
7983         .llseek         = seq_lseek,
7984         .release        = tracing_err_log_release,
7985 };
7986
7987 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7988 {
7989         struct trace_array *tr = inode->i_private;
7990         struct ftrace_buffer_info *info;
7991         int ret;
7992
7993         ret = tracing_check_open_get_tr(tr);
7994         if (ret)
7995                 return ret;
7996
7997         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7998         if (!info) {
7999                 trace_array_put(tr);
8000                 return -ENOMEM;
8001         }
8002
8003         mutex_lock(&trace_types_lock);
8004
8005         info->iter.tr           = tr;
8006         info->iter.cpu_file     = tracing_get_cpu(inode);
8007         info->iter.trace        = tr->current_trace;
8008         info->iter.array_buffer = &tr->array_buffer;
8009         info->spare             = NULL;
8010         /* Force reading ring buffer for first read */
8011         info->read              = (unsigned int)-1;
8012
8013         filp->private_data = info;
8014
8015         tr->trace_ref++;
8016
8017         mutex_unlock(&trace_types_lock);
8018
8019         ret = nonseekable_open(inode, filp);
8020         if (ret < 0)
8021                 trace_array_put(tr);
8022
8023         return ret;
8024 }
8025
8026 static __poll_t
8027 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8028 {
8029         struct ftrace_buffer_info *info = filp->private_data;
8030         struct trace_iterator *iter = &info->iter;
8031
8032         return trace_poll(iter, filp, poll_table);
8033 }
8034
8035 static ssize_t
8036 tracing_buffers_read(struct file *filp, char __user *ubuf,
8037                      size_t count, loff_t *ppos)
8038 {
8039         struct ftrace_buffer_info *info = filp->private_data;
8040         struct trace_iterator *iter = &info->iter;
8041         ssize_t ret = 0;
8042         ssize_t size;
8043
8044         if (!count)
8045                 return 0;
8046
8047 #ifdef CONFIG_TRACER_MAX_TRACE
8048         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8049                 return -EBUSY;
8050 #endif
8051
8052         if (!info->spare) {
8053                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8054                                                           iter->cpu_file);
8055                 if (IS_ERR(info->spare)) {
8056                         ret = PTR_ERR(info->spare);
8057                         info->spare = NULL;
8058                 } else {
8059                         info->spare_cpu = iter->cpu_file;
8060                 }
8061         }
8062         if (!info->spare)
8063                 return ret;
8064
8065         /* Do we have previous read data to read? */
8066         if (info->read < PAGE_SIZE)
8067                 goto read;
8068
8069  again:
8070         trace_access_lock(iter->cpu_file);
8071         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8072                                     &info->spare,
8073                                     count,
8074                                     iter->cpu_file, 0);
8075         trace_access_unlock(iter->cpu_file);
8076
8077         if (ret < 0) {
8078                 if (trace_empty(iter)) {
8079                         if ((filp->f_flags & O_NONBLOCK))
8080                                 return -EAGAIN;
8081
8082                         ret = wait_on_pipe(iter, 0);
8083                         if (ret)
8084                                 return ret;
8085
8086                         goto again;
8087                 }
8088                 return 0;
8089         }
8090
8091         info->read = 0;
8092  read:
8093         size = PAGE_SIZE - info->read;
8094         if (size > count)
8095                 size = count;
8096
8097         ret = copy_to_user(ubuf, info->spare + info->read, size);
8098         if (ret == size)
8099                 return -EFAULT;
8100
8101         size -= ret;
8102
8103         *ppos += size;
8104         info->read += size;
8105
8106         return size;
8107 }
8108
8109 static int tracing_buffers_release(struct inode *inode, struct file *file)
8110 {
8111         struct ftrace_buffer_info *info = file->private_data;
8112         struct trace_iterator *iter = &info->iter;
8113
8114         mutex_lock(&trace_types_lock);
8115
8116         iter->tr->trace_ref--;
8117
8118         __trace_array_put(iter->tr);
8119
8120         iter->wait_index++;
8121         /* Make sure the waiters see the new wait_index */
8122         smp_wmb();
8123
8124         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8125
8126         if (info->spare)
8127                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8128                                            info->spare_cpu, info->spare);
8129         kvfree(info);
8130
8131         mutex_unlock(&trace_types_lock);
8132
8133         return 0;
8134 }
8135
8136 struct buffer_ref {
8137         struct trace_buffer     *buffer;
8138         void                    *page;
8139         int                     cpu;
8140         refcount_t              refcount;
8141 };
8142
8143 static void buffer_ref_release(struct buffer_ref *ref)
8144 {
8145         if (!refcount_dec_and_test(&ref->refcount))
8146                 return;
8147         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8148         kfree(ref);
8149 }
8150
8151 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8152                                     struct pipe_buffer *buf)
8153 {
8154         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8155
8156         buffer_ref_release(ref);
8157         buf->private = 0;
8158 }
8159
8160 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8161                                 struct pipe_buffer *buf)
8162 {
8163         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8164
8165         if (refcount_read(&ref->refcount) > INT_MAX/2)
8166                 return false;
8167
8168         refcount_inc(&ref->refcount);
8169         return true;
8170 }
8171
8172 /* Pipe buffer operations for a buffer. */
8173 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8174         .release                = buffer_pipe_buf_release,
8175         .get                    = buffer_pipe_buf_get,
8176 };
8177
8178 /*
8179  * Callback from splice_to_pipe(), if we need to release some pages
8180  * at the end of the spd in case we error'ed out in filling the pipe.
8181  */
8182 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8183 {
8184         struct buffer_ref *ref =
8185                 (struct buffer_ref *)spd->partial[i].private;
8186
8187         buffer_ref_release(ref);
8188         spd->partial[i].private = 0;
8189 }
8190
8191 static ssize_t
8192 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8193                             struct pipe_inode_info *pipe, size_t len,
8194                             unsigned int flags)
8195 {
8196         struct ftrace_buffer_info *info = file->private_data;
8197         struct trace_iterator *iter = &info->iter;
8198         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8199         struct page *pages_def[PIPE_DEF_BUFFERS];
8200         struct splice_pipe_desc spd = {
8201                 .pages          = pages_def,
8202                 .partial        = partial_def,
8203                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8204                 .ops            = &buffer_pipe_buf_ops,
8205                 .spd_release    = buffer_spd_release,
8206         };
8207         struct buffer_ref *ref;
8208         int entries, i;
8209         ssize_t ret = 0;
8210
8211 #ifdef CONFIG_TRACER_MAX_TRACE
8212         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8213                 return -EBUSY;
8214 #endif
8215
8216         if (*ppos & (PAGE_SIZE - 1))
8217                 return -EINVAL;
8218
8219         if (len & (PAGE_SIZE - 1)) {
8220                 if (len < PAGE_SIZE)
8221                         return -EINVAL;
8222                 len &= PAGE_MASK;
8223         }
8224
8225         if (splice_grow_spd(pipe, &spd))
8226                 return -ENOMEM;
8227
8228  again:
8229         trace_access_lock(iter->cpu_file);
8230         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8231
8232         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8233                 struct page *page;
8234                 int r;
8235
8236                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8237                 if (!ref) {
8238                         ret = -ENOMEM;
8239                         break;
8240                 }
8241
8242                 refcount_set(&ref->refcount, 1);
8243                 ref->buffer = iter->array_buffer->buffer;
8244                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8245                 if (IS_ERR(ref->page)) {
8246                         ret = PTR_ERR(ref->page);
8247                         ref->page = NULL;
8248                         kfree(ref);
8249                         break;
8250                 }
8251                 ref->cpu = iter->cpu_file;
8252
8253                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8254                                           len, iter->cpu_file, 1);
8255                 if (r < 0) {
8256                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8257                                                    ref->page);
8258                         kfree(ref);
8259                         break;
8260                 }
8261
8262                 page = virt_to_page(ref->page);
8263
8264                 spd.pages[i] = page;
8265                 spd.partial[i].len = PAGE_SIZE;
8266                 spd.partial[i].offset = 0;
8267                 spd.partial[i].private = (unsigned long)ref;
8268                 spd.nr_pages++;
8269                 *ppos += PAGE_SIZE;
8270
8271                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8272         }
8273
8274         trace_access_unlock(iter->cpu_file);
8275         spd.nr_pages = i;
8276
8277         /* did we read anything? */
8278         if (!spd.nr_pages) {
8279                 long wait_index;
8280
8281                 if (ret)
8282                         goto out;
8283
8284                 ret = -EAGAIN;
8285                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8286                         goto out;
8287
8288                 wait_index = READ_ONCE(iter->wait_index);
8289
8290                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8291                 if (ret)
8292                         goto out;
8293
8294                 /* No need to wait after waking up when tracing is off */
8295                 if (!tracer_tracing_is_on(iter->tr))
8296                         goto out;
8297
8298                 /* Make sure we see the new wait_index */
8299                 smp_rmb();
8300                 if (wait_index != iter->wait_index)
8301                         goto out;
8302
8303                 goto again;
8304         }
8305
8306         ret = splice_to_pipe(pipe, &spd);
8307 out:
8308         splice_shrink_spd(&spd);
8309
8310         return ret;
8311 }
8312
8313 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8314 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8315 {
8316         struct ftrace_buffer_info *info = file->private_data;
8317         struct trace_iterator *iter = &info->iter;
8318
8319         if (cmd)
8320                 return -ENOIOCTLCMD;
8321
8322         mutex_lock(&trace_types_lock);
8323
8324         iter->wait_index++;
8325         /* Make sure the waiters see the new wait_index */
8326         smp_wmb();
8327
8328         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8329
8330         mutex_unlock(&trace_types_lock);
8331         return 0;
8332 }
8333
8334 static const struct file_operations tracing_buffers_fops = {
8335         .open           = tracing_buffers_open,
8336         .read           = tracing_buffers_read,
8337         .poll           = tracing_buffers_poll,
8338         .release        = tracing_buffers_release,
8339         .splice_read    = tracing_buffers_splice_read,
8340         .unlocked_ioctl = tracing_buffers_ioctl,
8341         .llseek         = no_llseek,
8342 };
8343
8344 static ssize_t
8345 tracing_stats_read(struct file *filp, char __user *ubuf,
8346                    size_t count, loff_t *ppos)
8347 {
8348         struct inode *inode = file_inode(filp);
8349         struct trace_array *tr = inode->i_private;
8350         struct array_buffer *trace_buf = &tr->array_buffer;
8351         int cpu = tracing_get_cpu(inode);
8352         struct trace_seq *s;
8353         unsigned long cnt;
8354         unsigned long long t;
8355         unsigned long usec_rem;
8356
8357         s = kmalloc(sizeof(*s), GFP_KERNEL);
8358         if (!s)
8359                 return -ENOMEM;
8360
8361         trace_seq_init(s);
8362
8363         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8364         trace_seq_printf(s, "entries: %ld\n", cnt);
8365
8366         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8367         trace_seq_printf(s, "overrun: %ld\n", cnt);
8368
8369         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8370         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8371
8372         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8373         trace_seq_printf(s, "bytes: %ld\n", cnt);
8374
8375         if (trace_clocks[tr->clock_id].in_ns) {
8376                 /* local or global for trace_clock */
8377                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8378                 usec_rem = do_div(t, USEC_PER_SEC);
8379                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8380                                                                 t, usec_rem);
8381
8382                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8383                 usec_rem = do_div(t, USEC_PER_SEC);
8384                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8385         } else {
8386                 /* counter or tsc mode for trace_clock */
8387                 trace_seq_printf(s, "oldest event ts: %llu\n",
8388                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8389
8390                 trace_seq_printf(s, "now ts: %llu\n",
8391                                 ring_buffer_time_stamp(trace_buf->buffer));
8392         }
8393
8394         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8395         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8396
8397         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8398         trace_seq_printf(s, "read events: %ld\n", cnt);
8399
8400         count = simple_read_from_buffer(ubuf, count, ppos,
8401                                         s->buffer, trace_seq_used(s));
8402
8403         kfree(s);
8404
8405         return count;
8406 }
8407
8408 static const struct file_operations tracing_stats_fops = {
8409         .open           = tracing_open_generic_tr,
8410         .read           = tracing_stats_read,
8411         .llseek         = generic_file_llseek,
8412         .release        = tracing_release_generic_tr,
8413 };
8414
8415 #ifdef CONFIG_DYNAMIC_FTRACE
8416
8417 static ssize_t
8418 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8419                   size_t cnt, loff_t *ppos)
8420 {
8421         ssize_t ret;
8422         char *buf;
8423         int r;
8424
8425         /* 256 should be plenty to hold the amount needed */
8426         buf = kmalloc(256, GFP_KERNEL);
8427         if (!buf)
8428                 return -ENOMEM;
8429
8430         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8431                       ftrace_update_tot_cnt,
8432                       ftrace_number_of_pages,
8433                       ftrace_number_of_groups);
8434
8435         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8436         kfree(buf);
8437         return ret;
8438 }
8439
8440 static const struct file_operations tracing_dyn_info_fops = {
8441         .open           = tracing_open_generic,
8442         .read           = tracing_read_dyn_info,
8443         .llseek         = generic_file_llseek,
8444 };
8445 #endif /* CONFIG_DYNAMIC_FTRACE */
8446
8447 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8448 static void
8449 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8450                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8451                 void *data)
8452 {
8453         tracing_snapshot_instance(tr);
8454 }
8455
8456 static void
8457 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8458                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8459                       void *data)
8460 {
8461         struct ftrace_func_mapper *mapper = data;
8462         long *count = NULL;
8463
8464         if (mapper)
8465                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8466
8467         if (count) {
8468
8469                 if (*count <= 0)
8470                         return;
8471
8472                 (*count)--;
8473         }
8474
8475         tracing_snapshot_instance(tr);
8476 }
8477
8478 static int
8479 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8480                       struct ftrace_probe_ops *ops, void *data)
8481 {
8482         struct ftrace_func_mapper *mapper = data;
8483         long *count = NULL;
8484
8485         seq_printf(m, "%ps:", (void *)ip);
8486
8487         seq_puts(m, "snapshot");
8488
8489         if (mapper)
8490                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8491
8492         if (count)
8493                 seq_printf(m, ":count=%ld\n", *count);
8494         else
8495                 seq_puts(m, ":unlimited\n");
8496
8497         return 0;
8498 }
8499
8500 static int
8501 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8502                      unsigned long ip, void *init_data, void **data)
8503 {
8504         struct ftrace_func_mapper *mapper = *data;
8505
8506         if (!mapper) {
8507                 mapper = allocate_ftrace_func_mapper();
8508                 if (!mapper)
8509                         return -ENOMEM;
8510                 *data = mapper;
8511         }
8512
8513         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8514 }
8515
8516 static void
8517 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8518                      unsigned long ip, void *data)
8519 {
8520         struct ftrace_func_mapper *mapper = data;
8521
8522         if (!ip) {
8523                 if (!mapper)
8524                         return;
8525                 free_ftrace_func_mapper(mapper, NULL);
8526                 return;
8527         }
8528
8529         ftrace_func_mapper_remove_ip(mapper, ip);
8530 }
8531
8532 static struct ftrace_probe_ops snapshot_probe_ops = {
8533         .func                   = ftrace_snapshot,
8534         .print                  = ftrace_snapshot_print,
8535 };
8536
8537 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8538         .func                   = ftrace_count_snapshot,
8539         .print                  = ftrace_snapshot_print,
8540         .init                   = ftrace_snapshot_init,
8541         .free                   = ftrace_snapshot_free,
8542 };
8543
8544 static int
8545 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8546                                char *glob, char *cmd, char *param, int enable)
8547 {
8548         struct ftrace_probe_ops *ops;
8549         void *count = (void *)-1;
8550         char *number;
8551         int ret;
8552
8553         if (!tr)
8554                 return -ENODEV;
8555
8556         /* hash funcs only work with set_ftrace_filter */
8557         if (!enable)
8558                 return -EINVAL;
8559
8560         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8561
8562         if (glob[0] == '!')
8563                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8564
8565         if (!param)
8566                 goto out_reg;
8567
8568         number = strsep(&param, ":");
8569
8570         if (!strlen(number))
8571                 goto out_reg;
8572
8573         /*
8574          * We use the callback data field (which is a pointer)
8575          * as our counter.
8576          */
8577         ret = kstrtoul(number, 0, (unsigned long *)&count);
8578         if (ret)
8579                 return ret;
8580
8581  out_reg:
8582         ret = tracing_alloc_snapshot_instance(tr);
8583         if (ret < 0)
8584                 goto out;
8585
8586         ret = register_ftrace_function_probe(glob, tr, ops, count);
8587
8588  out:
8589         return ret < 0 ? ret : 0;
8590 }
8591
8592 static struct ftrace_func_command ftrace_snapshot_cmd = {
8593         .name                   = "snapshot",
8594         .func                   = ftrace_trace_snapshot_callback,
8595 };
8596
8597 static __init int register_snapshot_cmd(void)
8598 {
8599         return register_ftrace_command(&ftrace_snapshot_cmd);
8600 }
8601 #else
8602 static inline __init int register_snapshot_cmd(void) { return 0; }
8603 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8604
8605 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8606 {
8607         if (WARN_ON(!tr->dir))
8608                 return ERR_PTR(-ENODEV);
8609
8610         /* Top directory uses NULL as the parent */
8611         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8612                 return NULL;
8613
8614         /* All sub buffers have a descriptor */
8615         return tr->dir;
8616 }
8617
8618 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8619 {
8620         struct dentry *d_tracer;
8621
8622         if (tr->percpu_dir)
8623                 return tr->percpu_dir;
8624
8625         d_tracer = tracing_get_dentry(tr);
8626         if (IS_ERR(d_tracer))
8627                 return NULL;
8628
8629         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8630
8631         MEM_FAIL(!tr->percpu_dir,
8632                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8633
8634         return tr->percpu_dir;
8635 }
8636
8637 static struct dentry *
8638 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8639                       void *data, long cpu, const struct file_operations *fops)
8640 {
8641         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8642
8643         if (ret) /* See tracing_get_cpu() */
8644                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8645         return ret;
8646 }
8647
8648 static void
8649 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8650 {
8651         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8652         struct dentry *d_cpu;
8653         char cpu_dir[30]; /* 30 characters should be more than enough */
8654
8655         if (!d_percpu)
8656                 return;
8657
8658         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8659         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8660         if (!d_cpu) {
8661                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8662                 return;
8663         }
8664
8665         /* per cpu trace_pipe */
8666         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8667                                 tr, cpu, &tracing_pipe_fops);
8668
8669         /* per cpu trace */
8670         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8671                                 tr, cpu, &tracing_fops);
8672
8673         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8674                                 tr, cpu, &tracing_buffers_fops);
8675
8676         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8677                                 tr, cpu, &tracing_stats_fops);
8678
8679         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8680                                 tr, cpu, &tracing_entries_fops);
8681
8682 #ifdef CONFIG_TRACER_SNAPSHOT
8683         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8684                                 tr, cpu, &snapshot_fops);
8685
8686         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8687                                 tr, cpu, &snapshot_raw_fops);
8688 #endif
8689 }
8690
8691 #ifdef CONFIG_FTRACE_SELFTEST
8692 /* Let selftest have access to static functions in this file */
8693 #include "trace_selftest.c"
8694 #endif
8695
8696 static ssize_t
8697 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8698                         loff_t *ppos)
8699 {
8700         struct trace_option_dentry *topt = filp->private_data;
8701         char *buf;
8702
8703         if (topt->flags->val & topt->opt->bit)
8704                 buf = "1\n";
8705         else
8706                 buf = "0\n";
8707
8708         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8709 }
8710
8711 static ssize_t
8712 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8713                          loff_t *ppos)
8714 {
8715         struct trace_option_dentry *topt = filp->private_data;
8716         unsigned long val;
8717         int ret;
8718
8719         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8720         if (ret)
8721                 return ret;
8722
8723         if (val != 0 && val != 1)
8724                 return -EINVAL;
8725
8726         if (!!(topt->flags->val & topt->opt->bit) != val) {
8727                 mutex_lock(&trace_types_lock);
8728                 ret = __set_tracer_option(topt->tr, topt->flags,
8729                                           topt->opt, !val);
8730                 mutex_unlock(&trace_types_lock);
8731                 if (ret)
8732                         return ret;
8733         }
8734
8735         *ppos += cnt;
8736
8737         return cnt;
8738 }
8739
8740
8741 static const struct file_operations trace_options_fops = {
8742         .open = tracing_open_generic,
8743         .read = trace_options_read,
8744         .write = trace_options_write,
8745         .llseek = generic_file_llseek,
8746 };
8747
8748 /*
8749  * In order to pass in both the trace_array descriptor as well as the index
8750  * to the flag that the trace option file represents, the trace_array
8751  * has a character array of trace_flags_index[], which holds the index
8752  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8753  * The address of this character array is passed to the flag option file
8754  * read/write callbacks.
8755  *
8756  * In order to extract both the index and the trace_array descriptor,
8757  * get_tr_index() uses the following algorithm.
8758  *
8759  *   idx = *ptr;
8760  *
8761  * As the pointer itself contains the address of the index (remember
8762  * index[1] == 1).
8763  *
8764  * Then to get the trace_array descriptor, by subtracting that index
8765  * from the ptr, we get to the start of the index itself.
8766  *
8767  *   ptr - idx == &index[0]
8768  *
8769  * Then a simple container_of() from that pointer gets us to the
8770  * trace_array descriptor.
8771  */
8772 static void get_tr_index(void *data, struct trace_array **ptr,
8773                          unsigned int *pindex)
8774 {
8775         *pindex = *(unsigned char *)data;
8776
8777         *ptr = container_of(data - *pindex, struct trace_array,
8778                             trace_flags_index);
8779 }
8780
8781 static ssize_t
8782 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8783                         loff_t *ppos)
8784 {
8785         void *tr_index = filp->private_data;
8786         struct trace_array *tr;
8787         unsigned int index;
8788         char *buf;
8789
8790         get_tr_index(tr_index, &tr, &index);
8791
8792         if (tr->trace_flags & (1 << index))
8793                 buf = "1\n";
8794         else
8795                 buf = "0\n";
8796
8797         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8798 }
8799
8800 static ssize_t
8801 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8802                          loff_t *ppos)
8803 {
8804         void *tr_index = filp->private_data;
8805         struct trace_array *tr;
8806         unsigned int index;
8807         unsigned long val;
8808         int ret;
8809
8810         get_tr_index(tr_index, &tr, &index);
8811
8812         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8813         if (ret)
8814                 return ret;
8815
8816         if (val != 0 && val != 1)
8817                 return -EINVAL;
8818
8819         mutex_lock(&event_mutex);
8820         mutex_lock(&trace_types_lock);
8821         ret = set_tracer_flag(tr, 1 << index, val);
8822         mutex_unlock(&trace_types_lock);
8823         mutex_unlock(&event_mutex);
8824
8825         if (ret < 0)
8826                 return ret;
8827
8828         *ppos += cnt;
8829
8830         return cnt;
8831 }
8832
8833 static const struct file_operations trace_options_core_fops = {
8834         .open = tracing_open_generic,
8835         .read = trace_options_core_read,
8836         .write = trace_options_core_write,
8837         .llseek = generic_file_llseek,
8838 };
8839
8840 struct dentry *trace_create_file(const char *name,
8841                                  umode_t mode,
8842                                  struct dentry *parent,
8843                                  void *data,
8844                                  const struct file_operations *fops)
8845 {
8846         struct dentry *ret;
8847
8848         ret = tracefs_create_file(name, mode, parent, data, fops);
8849         if (!ret)
8850                 pr_warn("Could not create tracefs '%s' entry\n", name);
8851
8852         return ret;
8853 }
8854
8855
8856 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8857 {
8858         struct dentry *d_tracer;
8859
8860         if (tr->options)
8861                 return tr->options;
8862
8863         d_tracer = tracing_get_dentry(tr);
8864         if (IS_ERR(d_tracer))
8865                 return NULL;
8866
8867         tr->options = tracefs_create_dir("options", d_tracer);
8868         if (!tr->options) {
8869                 pr_warn("Could not create tracefs directory 'options'\n");
8870                 return NULL;
8871         }
8872
8873         return tr->options;
8874 }
8875
8876 static void
8877 create_trace_option_file(struct trace_array *tr,
8878                          struct trace_option_dentry *topt,
8879                          struct tracer_flags *flags,
8880                          struct tracer_opt *opt)
8881 {
8882         struct dentry *t_options;
8883
8884         t_options = trace_options_init_dentry(tr);
8885         if (!t_options)
8886                 return;
8887
8888         topt->flags = flags;
8889         topt->opt = opt;
8890         topt->tr = tr;
8891
8892         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8893                                         t_options, topt, &trace_options_fops);
8894
8895 }
8896
8897 static void
8898 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8899 {
8900         struct trace_option_dentry *topts;
8901         struct trace_options *tr_topts;
8902         struct tracer_flags *flags;
8903         struct tracer_opt *opts;
8904         int cnt;
8905         int i;
8906
8907         if (!tracer)
8908                 return;
8909
8910         flags = tracer->flags;
8911
8912         if (!flags || !flags->opts)
8913                 return;
8914
8915         /*
8916          * If this is an instance, only create flags for tracers
8917          * the instance may have.
8918          */
8919         if (!trace_ok_for_array(tracer, tr))
8920                 return;
8921
8922         for (i = 0; i < tr->nr_topts; i++) {
8923                 /* Make sure there's no duplicate flags. */
8924                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8925                         return;
8926         }
8927
8928         opts = flags->opts;
8929
8930         for (cnt = 0; opts[cnt].name; cnt++)
8931                 ;
8932
8933         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8934         if (!topts)
8935                 return;
8936
8937         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8938                             GFP_KERNEL);
8939         if (!tr_topts) {
8940                 kfree(topts);
8941                 return;
8942         }
8943
8944         tr->topts = tr_topts;
8945         tr->topts[tr->nr_topts].tracer = tracer;
8946         tr->topts[tr->nr_topts].topts = topts;
8947         tr->nr_topts++;
8948
8949         for (cnt = 0; opts[cnt].name; cnt++) {
8950                 create_trace_option_file(tr, &topts[cnt], flags,
8951                                          &opts[cnt]);
8952                 MEM_FAIL(topts[cnt].entry == NULL,
8953                           "Failed to create trace option: %s",
8954                           opts[cnt].name);
8955         }
8956 }
8957
8958 static struct dentry *
8959 create_trace_option_core_file(struct trace_array *tr,
8960                               const char *option, long index)
8961 {
8962         struct dentry *t_options;
8963
8964         t_options = trace_options_init_dentry(tr);
8965         if (!t_options)
8966                 return NULL;
8967
8968         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8969                                  (void *)&tr->trace_flags_index[index],
8970                                  &trace_options_core_fops);
8971 }
8972
8973 static void create_trace_options_dir(struct trace_array *tr)
8974 {
8975         struct dentry *t_options;
8976         bool top_level = tr == &global_trace;
8977         int i;
8978
8979         t_options = trace_options_init_dentry(tr);
8980         if (!t_options)
8981                 return;
8982
8983         for (i = 0; trace_options[i]; i++) {
8984                 if (top_level ||
8985                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8986                         create_trace_option_core_file(tr, trace_options[i], i);
8987         }
8988 }
8989
8990 static ssize_t
8991 rb_simple_read(struct file *filp, char __user *ubuf,
8992                size_t cnt, loff_t *ppos)
8993 {
8994         struct trace_array *tr = filp->private_data;
8995         char buf[64];
8996         int r;
8997
8998         r = tracer_tracing_is_on(tr);
8999         r = sprintf(buf, "%d\n", r);
9000
9001         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9002 }
9003
9004 static ssize_t
9005 rb_simple_write(struct file *filp, const char __user *ubuf,
9006                 size_t cnt, loff_t *ppos)
9007 {
9008         struct trace_array *tr = filp->private_data;
9009         struct trace_buffer *buffer = tr->array_buffer.buffer;
9010         unsigned long val;
9011         int ret;
9012
9013         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9014         if (ret)
9015                 return ret;
9016
9017         if (buffer) {
9018                 mutex_lock(&trace_types_lock);
9019                 if (!!val == tracer_tracing_is_on(tr)) {
9020                         val = 0; /* do nothing */
9021                 } else if (val) {
9022                         tracer_tracing_on(tr);
9023                         if (tr->current_trace->start)
9024                                 tr->current_trace->start(tr);
9025                 } else {
9026                         tracer_tracing_off(tr);
9027                         if (tr->current_trace->stop)
9028                                 tr->current_trace->stop(tr);
9029                         /* Wake up any waiters */
9030                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9031                 }
9032                 mutex_unlock(&trace_types_lock);
9033         }
9034
9035         (*ppos)++;
9036
9037         return cnt;
9038 }
9039
9040 static const struct file_operations rb_simple_fops = {
9041         .open           = tracing_open_generic_tr,
9042         .read           = rb_simple_read,
9043         .write          = rb_simple_write,
9044         .release        = tracing_release_generic_tr,
9045         .llseek         = default_llseek,
9046 };
9047
9048 static ssize_t
9049 buffer_percent_read(struct file *filp, char __user *ubuf,
9050                     size_t cnt, loff_t *ppos)
9051 {
9052         struct trace_array *tr = filp->private_data;
9053         char buf[64];
9054         int r;
9055
9056         r = tr->buffer_percent;
9057         r = sprintf(buf, "%d\n", r);
9058
9059         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9060 }
9061
9062 static ssize_t
9063 buffer_percent_write(struct file *filp, const char __user *ubuf,
9064                      size_t cnt, loff_t *ppos)
9065 {
9066         struct trace_array *tr = filp->private_data;
9067         unsigned long val;
9068         int ret;
9069
9070         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9071         if (ret)
9072                 return ret;
9073
9074         if (val > 100)
9075                 return -EINVAL;
9076
9077         if (!val)
9078                 val = 1;
9079
9080         tr->buffer_percent = val;
9081
9082         (*ppos)++;
9083
9084         return cnt;
9085 }
9086
9087 static const struct file_operations buffer_percent_fops = {
9088         .open           = tracing_open_generic_tr,
9089         .read           = buffer_percent_read,
9090         .write          = buffer_percent_write,
9091         .release        = tracing_release_generic_tr,
9092         .llseek         = default_llseek,
9093 };
9094
9095 static struct dentry *trace_instance_dir;
9096
9097 static void
9098 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9099
9100 static int
9101 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9102 {
9103         enum ring_buffer_flags rb_flags;
9104
9105         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9106
9107         buf->tr = tr;
9108
9109         buf->buffer = ring_buffer_alloc(size, rb_flags);
9110         if (!buf->buffer)
9111                 return -ENOMEM;
9112
9113         buf->data = alloc_percpu(struct trace_array_cpu);
9114         if (!buf->data) {
9115                 ring_buffer_free(buf->buffer);
9116                 buf->buffer = NULL;
9117                 return -ENOMEM;
9118         }
9119
9120         /* Allocate the first page for all buffers */
9121         set_buffer_entries(&tr->array_buffer,
9122                            ring_buffer_size(tr->array_buffer.buffer, 0));
9123
9124         return 0;
9125 }
9126
9127 static int allocate_trace_buffers(struct trace_array *tr, int size)
9128 {
9129         int ret;
9130
9131         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9132         if (ret)
9133                 return ret;
9134
9135 #ifdef CONFIG_TRACER_MAX_TRACE
9136         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9137                                     allocate_snapshot ? size : 1);
9138         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9139                 ring_buffer_free(tr->array_buffer.buffer);
9140                 tr->array_buffer.buffer = NULL;
9141                 free_percpu(tr->array_buffer.data);
9142                 tr->array_buffer.data = NULL;
9143                 return -ENOMEM;
9144         }
9145         tr->allocated_snapshot = allocate_snapshot;
9146
9147         /*
9148          * Only the top level trace array gets its snapshot allocated
9149          * from the kernel command line.
9150          */
9151         allocate_snapshot = false;
9152 #endif
9153
9154         return 0;
9155 }
9156
9157 static void free_trace_buffer(struct array_buffer *buf)
9158 {
9159         if (buf->buffer) {
9160                 ring_buffer_free(buf->buffer);
9161                 buf->buffer = NULL;
9162                 free_percpu(buf->data);
9163                 buf->data = NULL;
9164         }
9165 }
9166
9167 static void free_trace_buffers(struct trace_array *tr)
9168 {
9169         if (!tr)
9170                 return;
9171
9172         free_trace_buffer(&tr->array_buffer);
9173
9174 #ifdef CONFIG_TRACER_MAX_TRACE
9175         free_trace_buffer(&tr->max_buffer);
9176 #endif
9177 }
9178
9179 static void init_trace_flags_index(struct trace_array *tr)
9180 {
9181         int i;
9182
9183         /* Used by the trace options files */
9184         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9185                 tr->trace_flags_index[i] = i;
9186 }
9187
9188 static void __update_tracer_options(struct trace_array *tr)
9189 {
9190         struct tracer *t;
9191
9192         for (t = trace_types; t; t = t->next)
9193                 add_tracer_options(tr, t);
9194 }
9195
9196 static void update_tracer_options(struct trace_array *tr)
9197 {
9198         mutex_lock(&trace_types_lock);
9199         tracer_options_updated = true;
9200         __update_tracer_options(tr);
9201         mutex_unlock(&trace_types_lock);
9202 }
9203
9204 /* Must have trace_types_lock held */
9205 struct trace_array *trace_array_find(const char *instance)
9206 {
9207         struct trace_array *tr, *found = NULL;
9208
9209         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9210                 if (tr->name && strcmp(tr->name, instance) == 0) {
9211                         found = tr;
9212                         break;
9213                 }
9214         }
9215
9216         return found;
9217 }
9218
9219 struct trace_array *trace_array_find_get(const char *instance)
9220 {
9221         struct trace_array *tr;
9222
9223         mutex_lock(&trace_types_lock);
9224         tr = trace_array_find(instance);
9225         if (tr)
9226                 tr->ref++;
9227         mutex_unlock(&trace_types_lock);
9228
9229         return tr;
9230 }
9231
9232 static int trace_array_create_dir(struct trace_array *tr)
9233 {
9234         int ret;
9235
9236         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9237         if (!tr->dir)
9238                 return -EINVAL;
9239
9240         ret = event_trace_add_tracer(tr->dir, tr);
9241         if (ret) {
9242                 tracefs_remove(tr->dir);
9243                 return ret;
9244         }
9245
9246         init_tracer_tracefs(tr, tr->dir);
9247         __update_tracer_options(tr);
9248
9249         return ret;
9250 }
9251
9252 static struct trace_array *trace_array_create(const char *name)
9253 {
9254         struct trace_array *tr;
9255         int ret;
9256
9257         ret = -ENOMEM;
9258         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9259         if (!tr)
9260                 return ERR_PTR(ret);
9261
9262         tr->name = kstrdup(name, GFP_KERNEL);
9263         if (!tr->name)
9264                 goto out_free_tr;
9265
9266         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9267                 goto out_free_tr;
9268
9269         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9270
9271         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9272
9273         raw_spin_lock_init(&tr->start_lock);
9274
9275         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9276
9277         tr->current_trace = &nop_trace;
9278
9279         INIT_LIST_HEAD(&tr->systems);
9280         INIT_LIST_HEAD(&tr->events);
9281         INIT_LIST_HEAD(&tr->hist_vars);
9282         INIT_LIST_HEAD(&tr->err_log);
9283
9284         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9285                 goto out_free_tr;
9286
9287         if (ftrace_allocate_ftrace_ops(tr) < 0)
9288                 goto out_free_tr;
9289
9290         ftrace_init_trace_array(tr);
9291
9292         init_trace_flags_index(tr);
9293
9294         if (trace_instance_dir) {
9295                 ret = trace_array_create_dir(tr);
9296                 if (ret)
9297                         goto out_free_tr;
9298         } else
9299                 __trace_early_add_events(tr);
9300
9301         list_add(&tr->list, &ftrace_trace_arrays);
9302
9303         tr->ref++;
9304
9305         return tr;
9306
9307  out_free_tr:
9308         ftrace_free_ftrace_ops(tr);
9309         free_trace_buffers(tr);
9310         free_cpumask_var(tr->tracing_cpumask);
9311         kfree(tr->name);
9312         kfree(tr);
9313
9314         return ERR_PTR(ret);
9315 }
9316
9317 static int instance_mkdir(const char *name)
9318 {
9319         struct trace_array *tr;
9320         int ret;
9321
9322         mutex_lock(&event_mutex);
9323         mutex_lock(&trace_types_lock);
9324
9325         ret = -EEXIST;
9326         if (trace_array_find(name))
9327                 goto out_unlock;
9328
9329         tr = trace_array_create(name);
9330
9331         ret = PTR_ERR_OR_ZERO(tr);
9332
9333 out_unlock:
9334         mutex_unlock(&trace_types_lock);
9335         mutex_unlock(&event_mutex);
9336         return ret;
9337 }
9338
9339 /**
9340  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9341  * @name: The name of the trace array to be looked up/created.
9342  *
9343  * Returns pointer to trace array with given name.
9344  * NULL, if it cannot be created.
9345  *
9346  * NOTE: This function increments the reference counter associated with the
9347  * trace array returned. This makes sure it cannot be freed while in use.
9348  * Use trace_array_put() once the trace array is no longer needed.
9349  * If the trace_array is to be freed, trace_array_destroy() needs to
9350  * be called after the trace_array_put(), or simply let user space delete
9351  * it from the tracefs instances directory. But until the
9352  * trace_array_put() is called, user space can not delete it.
9353  *
9354  */
9355 struct trace_array *trace_array_get_by_name(const char *name)
9356 {
9357         struct trace_array *tr;
9358
9359         mutex_lock(&event_mutex);
9360         mutex_lock(&trace_types_lock);
9361
9362         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9363                 if (tr->name && strcmp(tr->name, name) == 0)
9364                         goto out_unlock;
9365         }
9366
9367         tr = trace_array_create(name);
9368
9369         if (IS_ERR(tr))
9370                 tr = NULL;
9371 out_unlock:
9372         if (tr)
9373                 tr->ref++;
9374
9375         mutex_unlock(&trace_types_lock);
9376         mutex_unlock(&event_mutex);
9377         return tr;
9378 }
9379 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9380
9381 static int __remove_instance(struct trace_array *tr)
9382 {
9383         int i;
9384
9385         /* Reference counter for a newly created trace array = 1. */
9386         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9387                 return -EBUSY;
9388
9389         list_del(&tr->list);
9390
9391         /* Disable all the flags that were enabled coming in */
9392         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9393                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9394                         set_tracer_flag(tr, 1 << i, 0);
9395         }
9396
9397         tracing_set_nop(tr);
9398         clear_ftrace_function_probes(tr);
9399         event_trace_del_tracer(tr);
9400         ftrace_clear_pids(tr);
9401         ftrace_destroy_function_files(tr);
9402         tracefs_remove(tr->dir);
9403         free_percpu(tr->last_func_repeats);
9404         free_trace_buffers(tr);
9405
9406         for (i = 0; i < tr->nr_topts; i++) {
9407                 kfree(tr->topts[i].topts);
9408         }
9409         kfree(tr->topts);
9410
9411         free_cpumask_var(tr->tracing_cpumask);
9412         kfree(tr->name);
9413         kfree(tr);
9414
9415         return 0;
9416 }
9417
9418 int trace_array_destroy(struct trace_array *this_tr)
9419 {
9420         struct trace_array *tr;
9421         int ret;
9422
9423         if (!this_tr)
9424                 return -EINVAL;
9425
9426         mutex_lock(&event_mutex);
9427         mutex_lock(&trace_types_lock);
9428
9429         ret = -ENODEV;
9430
9431         /* Making sure trace array exists before destroying it. */
9432         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9433                 if (tr == this_tr) {
9434                         ret = __remove_instance(tr);
9435                         break;
9436                 }
9437         }
9438
9439         mutex_unlock(&trace_types_lock);
9440         mutex_unlock(&event_mutex);
9441
9442         return ret;
9443 }
9444 EXPORT_SYMBOL_GPL(trace_array_destroy);
9445
9446 static int instance_rmdir(const char *name)
9447 {
9448         struct trace_array *tr;
9449         int ret;
9450
9451         mutex_lock(&event_mutex);
9452         mutex_lock(&trace_types_lock);
9453
9454         ret = -ENODEV;
9455         tr = trace_array_find(name);
9456         if (tr)
9457                 ret = __remove_instance(tr);
9458
9459         mutex_unlock(&trace_types_lock);
9460         mutex_unlock(&event_mutex);
9461
9462         return ret;
9463 }
9464
9465 static __init void create_trace_instances(struct dentry *d_tracer)
9466 {
9467         struct trace_array *tr;
9468
9469         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9470                                                          instance_mkdir,
9471                                                          instance_rmdir);
9472         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9473                 return;
9474
9475         mutex_lock(&event_mutex);
9476         mutex_lock(&trace_types_lock);
9477
9478         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9479                 if (!tr->name)
9480                         continue;
9481                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9482                              "Failed to create instance directory\n"))
9483                         break;
9484         }
9485
9486         mutex_unlock(&trace_types_lock);
9487         mutex_unlock(&event_mutex);
9488 }
9489
9490 static void
9491 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9492 {
9493         struct trace_event_file *file;
9494         int cpu;
9495
9496         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9497                         tr, &show_traces_fops);
9498
9499         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9500                         tr, &set_tracer_fops);
9501
9502         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9503                           tr, &tracing_cpumask_fops);
9504
9505         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9506                           tr, &tracing_iter_fops);
9507
9508         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9509                           tr, &tracing_fops);
9510
9511         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9512                           tr, &tracing_pipe_fops);
9513
9514         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9515                           tr, &tracing_entries_fops);
9516
9517         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9518                           tr, &tracing_total_entries_fops);
9519
9520         trace_create_file("free_buffer", 0200, d_tracer,
9521                           tr, &tracing_free_buffer_fops);
9522
9523         trace_create_file("trace_marker", 0220, d_tracer,
9524                           tr, &tracing_mark_fops);
9525
9526         file = __find_event_file(tr, "ftrace", "print");
9527         if (file && file->dir)
9528                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9529                                   file, &event_trigger_fops);
9530         tr->trace_marker_file = file;
9531
9532         trace_create_file("trace_marker_raw", 0220, d_tracer,
9533                           tr, &tracing_mark_raw_fops);
9534
9535         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9536                           &trace_clock_fops);
9537
9538         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9539                           tr, &rb_simple_fops);
9540
9541         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9542                           &trace_time_stamp_mode_fops);
9543
9544         tr->buffer_percent = 50;
9545
9546         trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9547                         tr, &buffer_percent_fops);
9548
9549         create_trace_options_dir(tr);
9550
9551         trace_create_maxlat_file(tr, d_tracer);
9552
9553         if (ftrace_create_function_files(tr, d_tracer))
9554                 MEM_FAIL(1, "Could not allocate function filter files");
9555
9556 #ifdef CONFIG_TRACER_SNAPSHOT
9557         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9558                           tr, &snapshot_fops);
9559 #endif
9560
9561         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9562                           tr, &tracing_err_log_fops);
9563
9564         for_each_tracing_cpu(cpu)
9565                 tracing_init_tracefs_percpu(tr, cpu);
9566
9567         ftrace_init_tracefs(tr, d_tracer);
9568 }
9569
9570 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9571 {
9572         struct vfsmount *mnt;
9573         struct file_system_type *type;
9574
9575         /*
9576          * To maintain backward compatibility for tools that mount
9577          * debugfs to get to the tracing facility, tracefs is automatically
9578          * mounted to the debugfs/tracing directory.
9579          */
9580         type = get_fs_type("tracefs");
9581         if (!type)
9582                 return NULL;
9583         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9584         put_filesystem(type);
9585         if (IS_ERR(mnt))
9586                 return NULL;
9587         mntget(mnt);
9588
9589         return mnt;
9590 }
9591
9592 /**
9593  * tracing_init_dentry - initialize top level trace array
9594  *
9595  * This is called when creating files or directories in the tracing
9596  * directory. It is called via fs_initcall() by any of the boot up code
9597  * and expects to return the dentry of the top level tracing directory.
9598  */
9599 int tracing_init_dentry(void)
9600 {
9601         struct trace_array *tr = &global_trace;
9602
9603         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9604                 pr_warn("Tracing disabled due to lockdown\n");
9605                 return -EPERM;
9606         }
9607
9608         /* The top level trace array uses  NULL as parent */
9609         if (tr->dir)
9610                 return 0;
9611
9612         if (WARN_ON(!tracefs_initialized()))
9613                 return -ENODEV;
9614
9615         /*
9616          * As there may still be users that expect the tracing
9617          * files to exist in debugfs/tracing, we must automount
9618          * the tracefs file system there, so older tools still
9619          * work with the newer kernel.
9620          */
9621         tr->dir = debugfs_create_automount("tracing", NULL,
9622                                            trace_automount, NULL);
9623
9624         return 0;
9625 }
9626
9627 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9628 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9629
9630 static struct workqueue_struct *eval_map_wq __initdata;
9631 static struct work_struct eval_map_work __initdata;
9632
9633 static void __init eval_map_work_func(struct work_struct *work)
9634 {
9635         int len;
9636
9637         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9638         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9639 }
9640
9641 static int __init trace_eval_init(void)
9642 {
9643         INIT_WORK(&eval_map_work, eval_map_work_func);
9644
9645         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9646         if (!eval_map_wq) {
9647                 pr_err("Unable to allocate eval_map_wq\n");
9648                 /* Do work here */
9649                 eval_map_work_func(&eval_map_work);
9650                 return -ENOMEM;
9651         }
9652
9653         queue_work(eval_map_wq, &eval_map_work);
9654         return 0;
9655 }
9656
9657 static int __init trace_eval_sync(void)
9658 {
9659         /* Make sure the eval map updates are finished */
9660         if (eval_map_wq)
9661                 destroy_workqueue(eval_map_wq);
9662         return 0;
9663 }
9664
9665 late_initcall_sync(trace_eval_sync);
9666
9667
9668 #ifdef CONFIG_MODULES
9669 static void trace_module_add_evals(struct module *mod)
9670 {
9671         if (!mod->num_trace_evals)
9672                 return;
9673
9674         /*
9675          * Modules with bad taint do not have events created, do
9676          * not bother with enums either.
9677          */
9678         if (trace_module_has_bad_taint(mod))
9679                 return;
9680
9681         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9682 }
9683
9684 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9685 static void trace_module_remove_evals(struct module *mod)
9686 {
9687         union trace_eval_map_item *map;
9688         union trace_eval_map_item **last = &trace_eval_maps;
9689
9690         if (!mod->num_trace_evals)
9691                 return;
9692
9693         mutex_lock(&trace_eval_mutex);
9694
9695         map = trace_eval_maps;
9696
9697         while (map) {
9698                 if (map->head.mod == mod)
9699                         break;
9700                 map = trace_eval_jmp_to_tail(map);
9701                 last = &map->tail.next;
9702                 map = map->tail.next;
9703         }
9704         if (!map)
9705                 goto out;
9706
9707         *last = trace_eval_jmp_to_tail(map)->tail.next;
9708         kfree(map);
9709  out:
9710         mutex_unlock(&trace_eval_mutex);
9711 }
9712 #else
9713 static inline void trace_module_remove_evals(struct module *mod) { }
9714 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9715
9716 static int trace_module_notify(struct notifier_block *self,
9717                                unsigned long val, void *data)
9718 {
9719         struct module *mod = data;
9720
9721         switch (val) {
9722         case MODULE_STATE_COMING:
9723                 trace_module_add_evals(mod);
9724                 break;
9725         case MODULE_STATE_GOING:
9726                 trace_module_remove_evals(mod);
9727                 break;
9728         }
9729
9730         return NOTIFY_OK;
9731 }
9732
9733 static struct notifier_block trace_module_nb = {
9734         .notifier_call = trace_module_notify,
9735         .priority = 0,
9736 };
9737 #endif /* CONFIG_MODULES */
9738
9739 static __init int tracer_init_tracefs(void)
9740 {
9741         int ret;
9742
9743         trace_access_lock_init();
9744
9745         ret = tracing_init_dentry();
9746         if (ret)
9747                 return 0;
9748
9749         event_trace_init();
9750
9751         init_tracer_tracefs(&global_trace, NULL);
9752         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9753
9754         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9755                         &global_trace, &tracing_thresh_fops);
9756
9757         trace_create_file("README", TRACE_MODE_READ, NULL,
9758                         NULL, &tracing_readme_fops);
9759
9760         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9761                         NULL, &tracing_saved_cmdlines_fops);
9762
9763         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9764                           NULL, &tracing_saved_cmdlines_size_fops);
9765
9766         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9767                         NULL, &tracing_saved_tgids_fops);
9768
9769         trace_eval_init();
9770
9771         trace_create_eval_file(NULL);
9772
9773 #ifdef CONFIG_MODULES
9774         register_module_notifier(&trace_module_nb);
9775 #endif
9776
9777 #ifdef CONFIG_DYNAMIC_FTRACE
9778         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9779                         NULL, &tracing_dyn_info_fops);
9780 #endif
9781
9782         create_trace_instances(NULL);
9783
9784         update_tracer_options(&global_trace);
9785
9786         return 0;
9787 }
9788
9789 fs_initcall(tracer_init_tracefs);
9790
9791 static int trace_panic_handler(struct notifier_block *this,
9792                                unsigned long event, void *unused)
9793 {
9794         if (ftrace_dump_on_oops)
9795                 ftrace_dump(ftrace_dump_on_oops);
9796         return NOTIFY_OK;
9797 }
9798
9799 static struct notifier_block trace_panic_notifier = {
9800         .notifier_call  = trace_panic_handler,
9801         .next           = NULL,
9802         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9803 };
9804
9805 static int trace_die_handler(struct notifier_block *self,
9806                              unsigned long val,
9807                              void *data)
9808 {
9809         switch (val) {
9810         case DIE_OOPS:
9811                 if (ftrace_dump_on_oops)
9812                         ftrace_dump(ftrace_dump_on_oops);
9813                 break;
9814         default:
9815                 break;
9816         }
9817         return NOTIFY_OK;
9818 }
9819
9820 static struct notifier_block trace_die_notifier = {
9821         .notifier_call = trace_die_handler,
9822         .priority = 200
9823 };
9824
9825 /*
9826  * printk is set to max of 1024, we really don't need it that big.
9827  * Nothing should be printing 1000 characters anyway.
9828  */
9829 #define TRACE_MAX_PRINT         1000
9830
9831 /*
9832  * Define here KERN_TRACE so that we have one place to modify
9833  * it if we decide to change what log level the ftrace dump
9834  * should be at.
9835  */
9836 #define KERN_TRACE              KERN_EMERG
9837
9838 void
9839 trace_printk_seq(struct trace_seq *s)
9840 {
9841         /* Probably should print a warning here. */
9842         if (s->seq.len >= TRACE_MAX_PRINT)
9843                 s->seq.len = TRACE_MAX_PRINT;
9844
9845         /*
9846          * More paranoid code. Although the buffer size is set to
9847          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9848          * an extra layer of protection.
9849          */
9850         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9851                 s->seq.len = s->seq.size - 1;
9852
9853         /* should be zero ended, but we are paranoid. */
9854         s->buffer[s->seq.len] = 0;
9855
9856         printk(KERN_TRACE "%s", s->buffer);
9857
9858         trace_seq_init(s);
9859 }
9860
9861 void trace_init_global_iter(struct trace_iterator *iter)
9862 {
9863         iter->tr = &global_trace;
9864         iter->trace = iter->tr->current_trace;
9865         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9866         iter->array_buffer = &global_trace.array_buffer;
9867
9868         if (iter->trace && iter->trace->open)
9869                 iter->trace->open(iter);
9870
9871         /* Annotate start of buffers if we had overruns */
9872         if (ring_buffer_overruns(iter->array_buffer->buffer))
9873                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9874
9875         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9876         if (trace_clocks[iter->tr->clock_id].in_ns)
9877                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9878
9879         /* Can not use kmalloc for iter.temp and iter.fmt */
9880         iter->temp = static_temp_buf;
9881         iter->temp_size = STATIC_TEMP_BUF_SIZE;
9882         iter->fmt = static_fmt_buf;
9883         iter->fmt_size = STATIC_FMT_BUF_SIZE;
9884 }
9885
9886 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9887 {
9888         /* use static because iter can be a bit big for the stack */
9889         static struct trace_iterator iter;
9890         static atomic_t dump_running;
9891         struct trace_array *tr = &global_trace;
9892         unsigned int old_userobj;
9893         unsigned long flags;
9894         int cnt = 0, cpu;
9895
9896         /* Only allow one dump user at a time. */
9897         if (atomic_inc_return(&dump_running) != 1) {
9898                 atomic_dec(&dump_running);
9899                 return;
9900         }
9901
9902         /*
9903          * Always turn off tracing when we dump.
9904          * We don't need to show trace output of what happens
9905          * between multiple crashes.
9906          *
9907          * If the user does a sysrq-z, then they can re-enable
9908          * tracing with echo 1 > tracing_on.
9909          */
9910         tracing_off();
9911
9912         local_irq_save(flags);
9913
9914         /* Simulate the iterator */
9915         trace_init_global_iter(&iter);
9916
9917         for_each_tracing_cpu(cpu) {
9918                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9919         }
9920
9921         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9922
9923         /* don't look at user memory in panic mode */
9924         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9925
9926         switch (oops_dump_mode) {
9927         case DUMP_ALL:
9928                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9929                 break;
9930         case DUMP_ORIG:
9931                 iter.cpu_file = raw_smp_processor_id();
9932                 break;
9933         case DUMP_NONE:
9934                 goto out_enable;
9935         default:
9936                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9937                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9938         }
9939
9940         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9941
9942         /* Did function tracer already get disabled? */
9943         if (ftrace_is_dead()) {
9944                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9945                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9946         }
9947
9948         /*
9949          * We need to stop all tracing on all CPUS to read
9950          * the next buffer. This is a bit expensive, but is
9951          * not done often. We fill all what we can read,
9952          * and then release the locks again.
9953          */
9954
9955         while (!trace_empty(&iter)) {
9956
9957                 if (!cnt)
9958                         printk(KERN_TRACE "---------------------------------\n");
9959
9960                 cnt++;
9961
9962                 trace_iterator_reset(&iter);
9963                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9964
9965                 if (trace_find_next_entry_inc(&iter) != NULL) {
9966                         int ret;
9967
9968                         ret = print_trace_line(&iter);
9969                         if (ret != TRACE_TYPE_NO_CONSUME)
9970                                 trace_consume(&iter);
9971                 }
9972                 touch_nmi_watchdog();
9973
9974                 trace_printk_seq(&iter.seq);
9975         }
9976
9977         if (!cnt)
9978                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9979         else
9980                 printk(KERN_TRACE "---------------------------------\n");
9981
9982  out_enable:
9983         tr->trace_flags |= old_userobj;
9984
9985         for_each_tracing_cpu(cpu) {
9986                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9987         }
9988         atomic_dec(&dump_running);
9989         local_irq_restore(flags);
9990 }
9991 EXPORT_SYMBOL_GPL(ftrace_dump);
9992
9993 #define WRITE_BUFSIZE  4096
9994
9995 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9996                                 size_t count, loff_t *ppos,
9997                                 int (*createfn)(const char *))
9998 {
9999         char *kbuf, *buf, *tmp;
10000         int ret = 0;
10001         size_t done = 0;
10002         size_t size;
10003
10004         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10005         if (!kbuf)
10006                 return -ENOMEM;
10007
10008         while (done < count) {
10009                 size = count - done;
10010
10011                 if (size >= WRITE_BUFSIZE)
10012                         size = WRITE_BUFSIZE - 1;
10013
10014                 if (copy_from_user(kbuf, buffer + done, size)) {
10015                         ret = -EFAULT;
10016                         goto out;
10017                 }
10018                 kbuf[size] = '\0';
10019                 buf = kbuf;
10020                 do {
10021                         tmp = strchr(buf, '\n');
10022                         if (tmp) {
10023                                 *tmp = '\0';
10024                                 size = tmp - buf + 1;
10025                         } else {
10026                                 size = strlen(buf);
10027                                 if (done + size < count) {
10028                                         if (buf != kbuf)
10029                                                 break;
10030                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10031                                         pr_warn("Line length is too long: Should be less than %d\n",
10032                                                 WRITE_BUFSIZE - 2);
10033                                         ret = -EINVAL;
10034                                         goto out;
10035                                 }
10036                         }
10037                         done += size;
10038
10039                         /* Remove comments */
10040                         tmp = strchr(buf, '#');
10041
10042                         if (tmp)
10043                                 *tmp = '\0';
10044
10045                         ret = createfn(buf);
10046                         if (ret)
10047                                 goto out;
10048                         buf += size;
10049
10050                 } while (done < count);
10051         }
10052         ret = done;
10053
10054 out:
10055         kfree(kbuf);
10056
10057         return ret;
10058 }
10059
10060 __init static int tracer_alloc_buffers(void)
10061 {
10062         int ring_buf_size;
10063         int ret = -ENOMEM;
10064
10065
10066         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10067                 pr_warn("Tracing disabled due to lockdown\n");
10068                 return -EPERM;
10069         }
10070
10071         /*
10072          * Make sure we don't accidentally add more trace options
10073          * than we have bits for.
10074          */
10075         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10076
10077         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10078                 goto out;
10079
10080         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10081                 goto out_free_buffer_mask;
10082
10083         /* Only allocate trace_printk buffers if a trace_printk exists */
10084         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10085                 /* Must be called before global_trace.buffer is allocated */
10086                 trace_printk_init_buffers();
10087
10088         /* To save memory, keep the ring buffer size to its minimum */
10089         if (ring_buffer_expanded)
10090                 ring_buf_size = trace_buf_size;
10091         else
10092                 ring_buf_size = 1;
10093
10094         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10095         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10096
10097         raw_spin_lock_init(&global_trace.start_lock);
10098
10099         /*
10100          * The prepare callbacks allocates some memory for the ring buffer. We
10101          * don't free the buffer if the CPU goes down. If we were to free
10102          * the buffer, then the user would lose any trace that was in the
10103          * buffer. The memory will be removed once the "instance" is removed.
10104          */
10105         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10106                                       "trace/RB:preapre", trace_rb_cpu_prepare,
10107                                       NULL);
10108         if (ret < 0)
10109                 goto out_free_cpumask;
10110         /* Used for event triggers */
10111         ret = -ENOMEM;
10112         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10113         if (!temp_buffer)
10114                 goto out_rm_hp_state;
10115
10116         if (trace_create_savedcmd() < 0)
10117                 goto out_free_temp_buffer;
10118
10119         /* TODO: make the number of buffers hot pluggable with CPUS */
10120         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10121                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10122                 goto out_free_savedcmd;
10123         }
10124
10125         if (global_trace.buffer_disabled)
10126                 tracing_off();
10127
10128         if (trace_boot_clock) {
10129                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10130                 if (ret < 0)
10131                         pr_warn("Trace clock %s not defined, going back to default\n",
10132                                 trace_boot_clock);
10133         }
10134
10135         /*
10136          * register_tracer() might reference current_trace, so it
10137          * needs to be set before we register anything. This is
10138          * just a bootstrap of current_trace anyway.
10139          */
10140         global_trace.current_trace = &nop_trace;
10141
10142         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10143
10144         ftrace_init_global_array_ops(&global_trace);
10145
10146         init_trace_flags_index(&global_trace);
10147
10148         register_tracer(&nop_trace);
10149
10150         /* Function tracing may start here (via kernel command line) */
10151         init_function_trace();
10152
10153         /* All seems OK, enable tracing */
10154         tracing_disabled = 0;
10155
10156         atomic_notifier_chain_register(&panic_notifier_list,
10157                                        &trace_panic_notifier);
10158
10159         register_die_notifier(&trace_die_notifier);
10160
10161         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10162
10163         INIT_LIST_HEAD(&global_trace.systems);
10164         INIT_LIST_HEAD(&global_trace.events);
10165         INIT_LIST_HEAD(&global_trace.hist_vars);
10166         INIT_LIST_HEAD(&global_trace.err_log);
10167         list_add(&global_trace.list, &ftrace_trace_arrays);
10168
10169         apply_trace_boot_options();
10170
10171         register_snapshot_cmd();
10172
10173         test_can_verify();
10174
10175         return 0;
10176
10177 out_free_savedcmd:
10178         free_saved_cmdlines_buffer(savedcmd);
10179 out_free_temp_buffer:
10180         ring_buffer_free(temp_buffer);
10181 out_rm_hp_state:
10182         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10183 out_free_cpumask:
10184         free_cpumask_var(global_trace.tracing_cpumask);
10185 out_free_buffer_mask:
10186         free_cpumask_var(tracing_buffer_mask);
10187 out:
10188         return ret;
10189 }
10190
10191 void __init early_trace_init(void)
10192 {
10193         if (tracepoint_printk) {
10194                 tracepoint_print_iter =
10195                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10196                 if (MEM_FAIL(!tracepoint_print_iter,
10197                              "Failed to allocate trace iterator\n"))
10198                         tracepoint_printk = 0;
10199                 else
10200                         static_key_enable(&tracepoint_printk_key.key);
10201         }
10202         tracer_alloc_buffers();
10203 }
10204
10205 void __init trace_init(void)
10206 {
10207         trace_event_init();
10208 }
10209
10210 __init static void clear_boot_tracer(void)
10211 {
10212         /*
10213          * The default tracer at boot buffer is an init section.
10214          * This function is called in lateinit. If we did not
10215          * find the boot tracer, then clear it out, to prevent
10216          * later registration from accessing the buffer that is
10217          * about to be freed.
10218          */
10219         if (!default_bootup_tracer)
10220                 return;
10221
10222         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10223                default_bootup_tracer);
10224         default_bootup_tracer = NULL;
10225 }
10226
10227 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10228 __init static void tracing_set_default_clock(void)
10229 {
10230         /* sched_clock_stable() is determined in late_initcall */
10231         if (!trace_boot_clock && !sched_clock_stable()) {
10232                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10233                         pr_warn("Can not set tracing clock due to lockdown\n");
10234                         return;
10235                 }
10236
10237                 printk(KERN_WARNING
10238                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10239                        "If you want to keep using the local clock, then add:\n"
10240                        "  \"trace_clock=local\"\n"
10241                        "on the kernel command line\n");
10242                 tracing_set_clock(&global_trace, "global");
10243         }
10244 }
10245 #else
10246 static inline void tracing_set_default_clock(void) { }
10247 #endif
10248
10249 __init static int late_trace_init(void)
10250 {
10251         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10252                 static_key_disable(&tracepoint_printk_key.key);
10253                 tracepoint_printk = 0;
10254         }
10255
10256         tracing_set_default_clock();
10257         clear_boot_tracer();
10258         return 0;
10259 }
10260
10261 late_initcall_sync(late_trace_init);