tracing: Add README information for synthetic_events file
[platform/kernel/linux-rpi.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82         { }
83 };
84
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88         return 0;
89 }
90
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105
106 cpumask_var_t __read_mostly     tracing_buffer_mask;
107
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132         struct module                   *mod;
133         unsigned long                   length;
134 };
135
136 union trace_eval_map_item;
137
138 struct trace_eval_map_tail {
139         /*
140          * "end" is first and points to NULL as it must be different
141          * than "mod" or "eval_string"
142          */
143         union trace_eval_map_item       *next;
144         const char                      *end;   /* points to NULL */
145 };
146
147 static DEFINE_MUTEX(trace_eval_mutex);
148
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157         struct trace_eval_map           map;
158         struct trace_eval_map_head      head;
159         struct trace_eval_map_tail      tail;
160 };
161
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_buffer *buffer,
167                                    unsigned long flags, int pc);
168
169 #define MAX_TRACER_SIZE         100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172
173 static bool allocate_snapshot;
174
175 static int __init set_cmdline_ftrace(char *str)
176 {
177         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178         default_bootup_tracer = bootup_tracer_buf;
179         /* We are using ftrace early, expand it */
180         ring_buffer_expanded = true;
181         return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187         if (*str++ != '=' || !*str) {
188                 ftrace_dump_on_oops = DUMP_ALL;
189                 return 1;
190         }
191
192         if (!strcmp("orig_cpu", str)) {
193                 ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200
201 static int __init stop_trace_on_warning(char *str)
202 {
203         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204                 __disable_trace_on_warning = 1;
205         return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208
209 static int __init boot_alloc_snapshot(char *str)
210 {
211         allocate_snapshot = true;
212         /* We also need the main ring buffer expanded */
213         ring_buffer_expanded = true;
214         return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217
218
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220
221 static int __init set_trace_boot_options(char *str)
222 {
223         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224         return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230
231 static int __init set_trace_boot_clock(char *str)
232 {
233         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234         trace_boot_clock = trace_boot_clock_buf;
235         return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238
239 static int __init set_tracepoint_printk(char *str)
240 {
241         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242                 tracepoint_printk = 1;
243         return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246
247 unsigned long long ns2usecs(u64 nsec)
248 {
249         nsec += 500;
250         do_div(nsec, 1000);
251         return nsec;
252 }
253
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS                                             \
256         (FUNCTION_DEFAULT_FLAGS |                                       \
257          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
258          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
259          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
260          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
261
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
264                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
265
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
269
270 /*
271  * The global_trace is the descriptor that holds the top-level tracing
272  * buffers for the live tracing.
273  */
274 static struct trace_array global_trace = {
275         .trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277
278 LIST_HEAD(ftrace_trace_arrays);
279
280 int trace_array_get(struct trace_array *this_tr)
281 {
282         struct trace_array *tr;
283         int ret = -ENODEV;
284
285         mutex_lock(&trace_types_lock);
286         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287                 if (tr == this_tr) {
288                         tr->ref++;
289                         ret = 0;
290                         break;
291                 }
292         }
293         mutex_unlock(&trace_types_lock);
294
295         return ret;
296 }
297
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300         WARN_ON(!this_tr->ref);
301         this_tr->ref--;
302 }
303
304 /**
305  * trace_array_put - Decrement the reference counter for this trace array.
306  *
307  * NOTE: Use this when we no longer need the trace array returned by
308  * trace_array_get_by_name(). This ensures the trace array can be later
309  * destroyed.
310  *
311  */
312 void trace_array_put(struct trace_array *this_tr)
313 {
314         if (!this_tr)
315                 return;
316
317         mutex_lock(&trace_types_lock);
318         __trace_array_put(this_tr);
319         mutex_unlock(&trace_types_lock);
320 }
321 EXPORT_SYMBOL_GPL(trace_array_put);
322
323 int tracing_check_open_get_tr(struct trace_array *tr)
324 {
325         int ret;
326
327         ret = security_locked_down(LOCKDOWN_TRACEFS);
328         if (ret)
329                 return ret;
330
331         if (tracing_disabled)
332                 return -ENODEV;
333
334         if (tr && trace_array_get(tr) < 0)
335                 return -ENODEV;
336
337         return 0;
338 }
339
340 int call_filter_check_discard(struct trace_event_call *call, void *rec,
341                               struct trace_buffer *buffer,
342                               struct ring_buffer_event *event)
343 {
344         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
345             !filter_match_preds(call->filter, rec)) {
346                 __trace_event_discard_commit(buffer, event);
347                 return 1;
348         }
349
350         return 0;
351 }
352
353 void trace_free_pid_list(struct trace_pid_list *pid_list)
354 {
355         vfree(pid_list->pids);
356         kfree(pid_list);
357 }
358
359 /**
360  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
361  * @filtered_pids: The list of pids to check
362  * @search_pid: The PID to find in @filtered_pids
363  *
364  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
365  */
366 bool
367 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
368 {
369         /*
370          * If pid_max changed after filtered_pids was created, we
371          * by default ignore all pids greater than the previous pid_max.
372          */
373         if (search_pid >= filtered_pids->pid_max)
374                 return false;
375
376         return test_bit(search_pid, filtered_pids->pids);
377 }
378
379 /**
380  * trace_ignore_this_task - should a task be ignored for tracing
381  * @filtered_pids: The list of pids to check
382  * @task: The task that should be ignored if not filtered
383  *
384  * Checks if @task should be traced or not from @filtered_pids.
385  * Returns true if @task should *NOT* be traced.
386  * Returns false if @task should be traced.
387  */
388 bool
389 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
390                        struct trace_pid_list *filtered_no_pids,
391                        struct task_struct *task)
392 {
393         /*
394          * If filterd_no_pids is not empty, and the task's pid is listed
395          * in filtered_no_pids, then return true.
396          * Otherwise, if filtered_pids is empty, that means we can
397          * trace all tasks. If it has content, then only trace pids
398          * within filtered_pids.
399          */
400
401         return (filtered_pids &&
402                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
403                 (filtered_no_pids &&
404                  trace_find_filtered_pid(filtered_no_pids, task->pid));
405 }
406
407 /**
408  * trace_filter_add_remove_task - Add or remove a task from a pid_list
409  * @pid_list: The list to modify
410  * @self: The current task for fork or NULL for exit
411  * @task: The task to add or remove
412  *
413  * If adding a task, if @self is defined, the task is only added if @self
414  * is also included in @pid_list. This happens on fork and tasks should
415  * only be added when the parent is listed. If @self is NULL, then the
416  * @task pid will be removed from the list, which would happen on exit
417  * of a task.
418  */
419 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
420                                   struct task_struct *self,
421                                   struct task_struct *task)
422 {
423         if (!pid_list)
424                 return;
425
426         /* For forks, we only add if the forking task is listed */
427         if (self) {
428                 if (!trace_find_filtered_pid(pid_list, self->pid))
429                         return;
430         }
431
432         /* Sorry, but we don't support pid_max changing after setting */
433         if (task->pid >= pid_list->pid_max)
434                 return;
435
436         /* "self" is set for forks, and NULL for exits */
437         if (self)
438                 set_bit(task->pid, pid_list->pids);
439         else
440                 clear_bit(task->pid, pid_list->pids);
441 }
442
443 /**
444  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
445  * @pid_list: The pid list to show
446  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
447  * @pos: The position of the file
448  *
449  * This is used by the seq_file "next" operation to iterate the pids
450  * listed in a trace_pid_list structure.
451  *
452  * Returns the pid+1 as we want to display pid of zero, but NULL would
453  * stop the iteration.
454  */
455 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
456 {
457         unsigned long pid = (unsigned long)v;
458
459         (*pos)++;
460
461         /* pid already is +1 of the actual prevous bit */
462         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
463
464         /* Return pid + 1 to allow zero to be represented */
465         if (pid < pid_list->pid_max)
466                 return (void *)(pid + 1);
467
468         return NULL;
469 }
470
471 /**
472  * trace_pid_start - Used for seq_file to start reading pid lists
473  * @pid_list: The pid list to show
474  * @pos: The position of the file
475  *
476  * This is used by seq_file "start" operation to start the iteration
477  * of listing pids.
478  *
479  * Returns the pid+1 as we want to display pid of zero, but NULL would
480  * stop the iteration.
481  */
482 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
483 {
484         unsigned long pid;
485         loff_t l = 0;
486
487         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
488         if (pid >= pid_list->pid_max)
489                 return NULL;
490
491         /* Return pid + 1 so that zero can be the exit value */
492         for (pid++; pid && l < *pos;
493              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
494                 ;
495         return (void *)pid;
496 }
497
498 /**
499  * trace_pid_show - show the current pid in seq_file processing
500  * @m: The seq_file structure to write into
501  * @v: A void pointer of the pid (+1) value to display
502  *
503  * Can be directly used by seq_file operations to display the current
504  * pid value.
505  */
506 int trace_pid_show(struct seq_file *m, void *v)
507 {
508         unsigned long pid = (unsigned long)v - 1;
509
510         seq_printf(m, "%lu\n", pid);
511         return 0;
512 }
513
514 /* 128 should be much more than enough */
515 #define PID_BUF_SIZE            127
516
517 int trace_pid_write(struct trace_pid_list *filtered_pids,
518                     struct trace_pid_list **new_pid_list,
519                     const char __user *ubuf, size_t cnt)
520 {
521         struct trace_pid_list *pid_list;
522         struct trace_parser parser;
523         unsigned long val;
524         int nr_pids = 0;
525         ssize_t read = 0;
526         ssize_t ret = 0;
527         loff_t pos;
528         pid_t pid;
529
530         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
531                 return -ENOMEM;
532
533         /*
534          * Always recreate a new array. The write is an all or nothing
535          * operation. Always create a new array when adding new pids by
536          * the user. If the operation fails, then the current list is
537          * not modified.
538          */
539         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
540         if (!pid_list) {
541                 trace_parser_put(&parser);
542                 return -ENOMEM;
543         }
544
545         pid_list->pid_max = READ_ONCE(pid_max);
546
547         /* Only truncating will shrink pid_max */
548         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
549                 pid_list->pid_max = filtered_pids->pid_max;
550
551         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
552         if (!pid_list->pids) {
553                 trace_parser_put(&parser);
554                 kfree(pid_list);
555                 return -ENOMEM;
556         }
557
558         if (filtered_pids) {
559                 /* copy the current bits to the new max */
560                 for_each_set_bit(pid, filtered_pids->pids,
561                                  filtered_pids->pid_max) {
562                         set_bit(pid, pid_list->pids);
563                         nr_pids++;
564                 }
565         }
566
567         while (cnt > 0) {
568
569                 pos = 0;
570
571                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
572                 if (ret < 0 || !trace_parser_loaded(&parser))
573                         break;
574
575                 read += ret;
576                 ubuf += ret;
577                 cnt -= ret;
578
579                 ret = -EINVAL;
580                 if (kstrtoul(parser.buffer, 0, &val))
581                         break;
582                 if (val >= pid_list->pid_max)
583                         break;
584
585                 pid = (pid_t)val;
586
587                 set_bit(pid, pid_list->pids);
588                 nr_pids++;
589
590                 trace_parser_clear(&parser);
591                 ret = 0;
592         }
593         trace_parser_put(&parser);
594
595         if (ret < 0) {
596                 trace_free_pid_list(pid_list);
597                 return ret;
598         }
599
600         if (!nr_pids) {
601                 /* Cleared the list of pids */
602                 trace_free_pid_list(pid_list);
603                 read = ret;
604                 pid_list = NULL;
605         }
606
607         *new_pid_list = pid_list;
608
609         return read;
610 }
611
612 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
613 {
614         u64 ts;
615
616         /* Early boot up does not have a buffer yet */
617         if (!buf->buffer)
618                 return trace_clock_local();
619
620         ts = ring_buffer_time_stamp(buf->buffer, cpu);
621         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
622
623         return ts;
624 }
625
626 u64 ftrace_now(int cpu)
627 {
628         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
629 }
630
631 /**
632  * tracing_is_enabled - Show if global_trace has been disabled
633  *
634  * Shows if the global trace has been enabled or not. It uses the
635  * mirror flag "buffer_disabled" to be used in fast paths such as for
636  * the irqsoff tracer. But it may be inaccurate due to races. If you
637  * need to know the accurate state, use tracing_is_on() which is a little
638  * slower, but accurate.
639  */
640 int tracing_is_enabled(void)
641 {
642         /*
643          * For quick access (irqsoff uses this in fast path), just
644          * return the mirror variable of the state of the ring buffer.
645          * It's a little racy, but we don't really care.
646          */
647         smp_rmb();
648         return !global_trace.buffer_disabled;
649 }
650
651 /*
652  * trace_buf_size is the size in bytes that is allocated
653  * for a buffer. Note, the number of bytes is always rounded
654  * to page size.
655  *
656  * This number is purposely set to a low number of 16384.
657  * If the dump on oops happens, it will be much appreciated
658  * to not have to wait for all that output. Anyway this can be
659  * boot time and run time configurable.
660  */
661 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
662
663 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
664
665 /* trace_types holds a link list of available tracers. */
666 static struct tracer            *trace_types __read_mostly;
667
668 /*
669  * trace_types_lock is used to protect the trace_types list.
670  */
671 DEFINE_MUTEX(trace_types_lock);
672
673 /*
674  * serialize the access of the ring buffer
675  *
676  * ring buffer serializes readers, but it is low level protection.
677  * The validity of the events (which returns by ring_buffer_peek() ..etc)
678  * are not protected by ring buffer.
679  *
680  * The content of events may become garbage if we allow other process consumes
681  * these events concurrently:
682  *   A) the page of the consumed events may become a normal page
683  *      (not reader page) in ring buffer, and this page will be rewrited
684  *      by events producer.
685  *   B) The page of the consumed events may become a page for splice_read,
686  *      and this page will be returned to system.
687  *
688  * These primitives allow multi process access to different cpu ring buffer
689  * concurrently.
690  *
691  * These primitives don't distinguish read-only and read-consume access.
692  * Multi read-only access are also serialized.
693  */
694
695 #ifdef CONFIG_SMP
696 static DECLARE_RWSEM(all_cpu_access_lock);
697 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
698
699 static inline void trace_access_lock(int cpu)
700 {
701         if (cpu == RING_BUFFER_ALL_CPUS) {
702                 /* gain it for accessing the whole ring buffer. */
703                 down_write(&all_cpu_access_lock);
704         } else {
705                 /* gain it for accessing a cpu ring buffer. */
706
707                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
708                 down_read(&all_cpu_access_lock);
709
710                 /* Secondly block other access to this @cpu ring buffer. */
711                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
712         }
713 }
714
715 static inline void trace_access_unlock(int cpu)
716 {
717         if (cpu == RING_BUFFER_ALL_CPUS) {
718                 up_write(&all_cpu_access_lock);
719         } else {
720                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
721                 up_read(&all_cpu_access_lock);
722         }
723 }
724
725 static inline void trace_access_lock_init(void)
726 {
727         int cpu;
728
729         for_each_possible_cpu(cpu)
730                 mutex_init(&per_cpu(cpu_access_lock, cpu));
731 }
732
733 #else
734
735 static DEFINE_MUTEX(access_lock);
736
737 static inline void trace_access_lock(int cpu)
738 {
739         (void)cpu;
740         mutex_lock(&access_lock);
741 }
742
743 static inline void trace_access_unlock(int cpu)
744 {
745         (void)cpu;
746         mutex_unlock(&access_lock);
747 }
748
749 static inline void trace_access_lock_init(void)
750 {
751 }
752
753 #endif
754
755 #ifdef CONFIG_STACKTRACE
756 static void __ftrace_trace_stack(struct trace_buffer *buffer,
757                                  unsigned long flags,
758                                  int skip, int pc, struct pt_regs *regs);
759 static inline void ftrace_trace_stack(struct trace_array *tr,
760                                       struct trace_buffer *buffer,
761                                       unsigned long flags,
762                                       int skip, int pc, struct pt_regs *regs);
763
764 #else
765 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
766                                         unsigned long flags,
767                                         int skip, int pc, struct pt_regs *regs)
768 {
769 }
770 static inline void ftrace_trace_stack(struct trace_array *tr,
771                                       struct trace_buffer *buffer,
772                                       unsigned long flags,
773                                       int skip, int pc, struct pt_regs *regs)
774 {
775 }
776
777 #endif
778
779 static __always_inline void
780 trace_event_setup(struct ring_buffer_event *event,
781                   int type, unsigned long flags, int pc)
782 {
783         struct trace_entry *ent = ring_buffer_event_data(event);
784
785         tracing_generic_entry_update(ent, type, flags, pc);
786 }
787
788 static __always_inline struct ring_buffer_event *
789 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
790                           int type,
791                           unsigned long len,
792                           unsigned long flags, int pc)
793 {
794         struct ring_buffer_event *event;
795
796         event = ring_buffer_lock_reserve(buffer, len);
797         if (event != NULL)
798                 trace_event_setup(event, type, flags, pc);
799
800         return event;
801 }
802
803 void tracer_tracing_on(struct trace_array *tr)
804 {
805         if (tr->array_buffer.buffer)
806                 ring_buffer_record_on(tr->array_buffer.buffer);
807         /*
808          * This flag is looked at when buffers haven't been allocated
809          * yet, or by some tracers (like irqsoff), that just want to
810          * know if the ring buffer has been disabled, but it can handle
811          * races of where it gets disabled but we still do a record.
812          * As the check is in the fast path of the tracers, it is more
813          * important to be fast than accurate.
814          */
815         tr->buffer_disabled = 0;
816         /* Make the flag seen by readers */
817         smp_wmb();
818 }
819
820 /**
821  * tracing_on - enable tracing buffers
822  *
823  * This function enables tracing buffers that may have been
824  * disabled with tracing_off.
825  */
826 void tracing_on(void)
827 {
828         tracer_tracing_on(&global_trace);
829 }
830 EXPORT_SYMBOL_GPL(tracing_on);
831
832
833 static __always_inline void
834 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
835 {
836         __this_cpu_write(trace_taskinfo_save, true);
837
838         /* If this is the temp buffer, we need to commit fully */
839         if (this_cpu_read(trace_buffered_event) == event) {
840                 /* Length is in event->array[0] */
841                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
842                 /* Release the temp buffer */
843                 this_cpu_dec(trace_buffered_event_cnt);
844         } else
845                 ring_buffer_unlock_commit(buffer, event);
846 }
847
848 /**
849  * __trace_puts - write a constant string into the trace buffer.
850  * @ip:    The address of the caller
851  * @str:   The constant string to write
852  * @size:  The size of the string.
853  */
854 int __trace_puts(unsigned long ip, const char *str, int size)
855 {
856         struct ring_buffer_event *event;
857         struct trace_buffer *buffer;
858         struct print_entry *entry;
859         unsigned long irq_flags;
860         int alloc;
861         int pc;
862
863         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
864                 return 0;
865
866         pc = preempt_count();
867
868         if (unlikely(tracing_selftest_running || tracing_disabled))
869                 return 0;
870
871         alloc = sizeof(*entry) + size + 2; /* possible \n added */
872
873         local_save_flags(irq_flags);
874         buffer = global_trace.array_buffer.buffer;
875         ring_buffer_nest_start(buffer);
876         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
877                                             irq_flags, pc);
878         if (!event) {
879                 size = 0;
880                 goto out;
881         }
882
883         entry = ring_buffer_event_data(event);
884         entry->ip = ip;
885
886         memcpy(&entry->buf, str, size);
887
888         /* Add a newline if necessary */
889         if (entry->buf[size - 1] != '\n') {
890                 entry->buf[size] = '\n';
891                 entry->buf[size + 1] = '\0';
892         } else
893                 entry->buf[size] = '\0';
894
895         __buffer_unlock_commit(buffer, event);
896         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
897  out:
898         ring_buffer_nest_end(buffer);
899         return size;
900 }
901 EXPORT_SYMBOL_GPL(__trace_puts);
902
903 /**
904  * __trace_bputs - write the pointer to a constant string into trace buffer
905  * @ip:    The address of the caller
906  * @str:   The constant string to write to the buffer to
907  */
908 int __trace_bputs(unsigned long ip, const char *str)
909 {
910         struct ring_buffer_event *event;
911         struct trace_buffer *buffer;
912         struct bputs_entry *entry;
913         unsigned long irq_flags;
914         int size = sizeof(struct bputs_entry);
915         int ret = 0;
916         int pc;
917
918         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
919                 return 0;
920
921         pc = preempt_count();
922
923         if (unlikely(tracing_selftest_running || tracing_disabled))
924                 return 0;
925
926         local_save_flags(irq_flags);
927         buffer = global_trace.array_buffer.buffer;
928
929         ring_buffer_nest_start(buffer);
930         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
931                                             irq_flags, pc);
932         if (!event)
933                 goto out;
934
935         entry = ring_buffer_event_data(event);
936         entry->ip                       = ip;
937         entry->str                      = str;
938
939         __buffer_unlock_commit(buffer, event);
940         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
941
942         ret = 1;
943  out:
944         ring_buffer_nest_end(buffer);
945         return ret;
946 }
947 EXPORT_SYMBOL_GPL(__trace_bputs);
948
949 #ifdef CONFIG_TRACER_SNAPSHOT
950 static void tracing_snapshot_instance_cond(struct trace_array *tr,
951                                            void *cond_data)
952 {
953         struct tracer *tracer = tr->current_trace;
954         unsigned long flags;
955
956         if (in_nmi()) {
957                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
958                 internal_trace_puts("*** snapshot is being ignored        ***\n");
959                 return;
960         }
961
962         if (!tr->allocated_snapshot) {
963                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
964                 internal_trace_puts("*** stopping trace here!   ***\n");
965                 tracing_off();
966                 return;
967         }
968
969         /* Note, snapshot can not be used when the tracer uses it */
970         if (tracer->use_max_tr) {
971                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
972                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
973                 return;
974         }
975
976         local_irq_save(flags);
977         update_max_tr(tr, current, smp_processor_id(), cond_data);
978         local_irq_restore(flags);
979 }
980
981 void tracing_snapshot_instance(struct trace_array *tr)
982 {
983         tracing_snapshot_instance_cond(tr, NULL);
984 }
985
986 /**
987  * tracing_snapshot - take a snapshot of the current buffer.
988  *
989  * This causes a swap between the snapshot buffer and the current live
990  * tracing buffer. You can use this to take snapshots of the live
991  * trace when some condition is triggered, but continue to trace.
992  *
993  * Note, make sure to allocate the snapshot with either
994  * a tracing_snapshot_alloc(), or by doing it manually
995  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
996  *
997  * If the snapshot buffer is not allocated, it will stop tracing.
998  * Basically making a permanent snapshot.
999  */
1000 void tracing_snapshot(void)
1001 {
1002         struct trace_array *tr = &global_trace;
1003
1004         tracing_snapshot_instance(tr);
1005 }
1006 EXPORT_SYMBOL_GPL(tracing_snapshot);
1007
1008 /**
1009  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1010  * @tr:         The tracing instance to snapshot
1011  * @cond_data:  The data to be tested conditionally, and possibly saved
1012  *
1013  * This is the same as tracing_snapshot() except that the snapshot is
1014  * conditional - the snapshot will only happen if the
1015  * cond_snapshot.update() implementation receiving the cond_data
1016  * returns true, which means that the trace array's cond_snapshot
1017  * update() operation used the cond_data to determine whether the
1018  * snapshot should be taken, and if it was, presumably saved it along
1019  * with the snapshot.
1020  */
1021 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1022 {
1023         tracing_snapshot_instance_cond(tr, cond_data);
1024 }
1025 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1026
1027 /**
1028  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1029  * @tr:         The tracing instance
1030  *
1031  * When the user enables a conditional snapshot using
1032  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1033  * with the snapshot.  This accessor is used to retrieve it.
1034  *
1035  * Should not be called from cond_snapshot.update(), since it takes
1036  * the tr->max_lock lock, which the code calling
1037  * cond_snapshot.update() has already done.
1038  *
1039  * Returns the cond_data associated with the trace array's snapshot.
1040  */
1041 void *tracing_cond_snapshot_data(struct trace_array *tr)
1042 {
1043         void *cond_data = NULL;
1044
1045         arch_spin_lock(&tr->max_lock);
1046
1047         if (tr->cond_snapshot)
1048                 cond_data = tr->cond_snapshot->cond_data;
1049
1050         arch_spin_unlock(&tr->max_lock);
1051
1052         return cond_data;
1053 }
1054 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1055
1056 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1057                                         struct array_buffer *size_buf, int cpu_id);
1058 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1059
1060 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1061 {
1062         int ret;
1063
1064         if (!tr->allocated_snapshot) {
1065
1066                 /* allocate spare buffer */
1067                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1068                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1069                 if (ret < 0)
1070                         return ret;
1071
1072                 tr->allocated_snapshot = true;
1073         }
1074
1075         return 0;
1076 }
1077
1078 static void free_snapshot(struct trace_array *tr)
1079 {
1080         /*
1081          * We don't free the ring buffer. instead, resize it because
1082          * The max_tr ring buffer has some state (e.g. ring->clock) and
1083          * we want preserve it.
1084          */
1085         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1086         set_buffer_entries(&tr->max_buffer, 1);
1087         tracing_reset_online_cpus(&tr->max_buffer);
1088         tr->allocated_snapshot = false;
1089 }
1090
1091 /**
1092  * tracing_alloc_snapshot - allocate snapshot buffer.
1093  *
1094  * This only allocates the snapshot buffer if it isn't already
1095  * allocated - it doesn't also take a snapshot.
1096  *
1097  * This is meant to be used in cases where the snapshot buffer needs
1098  * to be set up for events that can't sleep but need to be able to
1099  * trigger a snapshot.
1100  */
1101 int tracing_alloc_snapshot(void)
1102 {
1103         struct trace_array *tr = &global_trace;
1104         int ret;
1105
1106         ret = tracing_alloc_snapshot_instance(tr);
1107         WARN_ON(ret < 0);
1108
1109         return ret;
1110 }
1111 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1112
1113 /**
1114  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1115  *
1116  * This is similar to tracing_snapshot(), but it will allocate the
1117  * snapshot buffer if it isn't already allocated. Use this only
1118  * where it is safe to sleep, as the allocation may sleep.
1119  *
1120  * This causes a swap between the snapshot buffer and the current live
1121  * tracing buffer. You can use this to take snapshots of the live
1122  * trace when some condition is triggered, but continue to trace.
1123  */
1124 void tracing_snapshot_alloc(void)
1125 {
1126         int ret;
1127
1128         ret = tracing_alloc_snapshot();
1129         if (ret < 0)
1130                 return;
1131
1132         tracing_snapshot();
1133 }
1134 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1135
1136 /**
1137  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1138  * @tr:         The tracing instance
1139  * @cond_data:  User data to associate with the snapshot
1140  * @update:     Implementation of the cond_snapshot update function
1141  *
1142  * Check whether the conditional snapshot for the given instance has
1143  * already been enabled, or if the current tracer is already using a
1144  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1145  * save the cond_data and update function inside.
1146  *
1147  * Returns 0 if successful, error otherwise.
1148  */
1149 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1150                                  cond_update_fn_t update)
1151 {
1152         struct cond_snapshot *cond_snapshot;
1153         int ret = 0;
1154
1155         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1156         if (!cond_snapshot)
1157                 return -ENOMEM;
1158
1159         cond_snapshot->cond_data = cond_data;
1160         cond_snapshot->update = update;
1161
1162         mutex_lock(&trace_types_lock);
1163
1164         ret = tracing_alloc_snapshot_instance(tr);
1165         if (ret)
1166                 goto fail_unlock;
1167
1168         if (tr->current_trace->use_max_tr) {
1169                 ret = -EBUSY;
1170                 goto fail_unlock;
1171         }
1172
1173         /*
1174          * The cond_snapshot can only change to NULL without the
1175          * trace_types_lock. We don't care if we race with it going
1176          * to NULL, but we want to make sure that it's not set to
1177          * something other than NULL when we get here, which we can
1178          * do safely with only holding the trace_types_lock and not
1179          * having to take the max_lock.
1180          */
1181         if (tr->cond_snapshot) {
1182                 ret = -EBUSY;
1183                 goto fail_unlock;
1184         }
1185
1186         arch_spin_lock(&tr->max_lock);
1187         tr->cond_snapshot = cond_snapshot;
1188         arch_spin_unlock(&tr->max_lock);
1189
1190         mutex_unlock(&trace_types_lock);
1191
1192         return ret;
1193
1194  fail_unlock:
1195         mutex_unlock(&trace_types_lock);
1196         kfree(cond_snapshot);
1197         return ret;
1198 }
1199 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1200
1201 /**
1202  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1203  * @tr:         The tracing instance
1204  *
1205  * Check whether the conditional snapshot for the given instance is
1206  * enabled; if so, free the cond_snapshot associated with it,
1207  * otherwise return -EINVAL.
1208  *
1209  * Returns 0 if successful, error otherwise.
1210  */
1211 int tracing_snapshot_cond_disable(struct trace_array *tr)
1212 {
1213         int ret = 0;
1214
1215         arch_spin_lock(&tr->max_lock);
1216
1217         if (!tr->cond_snapshot)
1218                 ret = -EINVAL;
1219         else {
1220                 kfree(tr->cond_snapshot);
1221                 tr->cond_snapshot = NULL;
1222         }
1223
1224         arch_spin_unlock(&tr->max_lock);
1225
1226         return ret;
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1229 #else
1230 void tracing_snapshot(void)
1231 {
1232         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1233 }
1234 EXPORT_SYMBOL_GPL(tracing_snapshot);
1235 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1236 {
1237         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1238 }
1239 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1240 int tracing_alloc_snapshot(void)
1241 {
1242         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1243         return -ENODEV;
1244 }
1245 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1246 void tracing_snapshot_alloc(void)
1247 {
1248         /* Give warning */
1249         tracing_snapshot();
1250 }
1251 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1252 void *tracing_cond_snapshot_data(struct trace_array *tr)
1253 {
1254         return NULL;
1255 }
1256 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1257 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1258 {
1259         return -ENODEV;
1260 }
1261 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1262 int tracing_snapshot_cond_disable(struct trace_array *tr)
1263 {
1264         return false;
1265 }
1266 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1267 #endif /* CONFIG_TRACER_SNAPSHOT */
1268
1269 void tracer_tracing_off(struct trace_array *tr)
1270 {
1271         if (tr->array_buffer.buffer)
1272                 ring_buffer_record_off(tr->array_buffer.buffer);
1273         /*
1274          * This flag is looked at when buffers haven't been allocated
1275          * yet, or by some tracers (like irqsoff), that just want to
1276          * know if the ring buffer has been disabled, but it can handle
1277          * races of where it gets disabled but we still do a record.
1278          * As the check is in the fast path of the tracers, it is more
1279          * important to be fast than accurate.
1280          */
1281         tr->buffer_disabled = 1;
1282         /* Make the flag seen by readers */
1283         smp_wmb();
1284 }
1285
1286 /**
1287  * tracing_off - turn off tracing buffers
1288  *
1289  * This function stops the tracing buffers from recording data.
1290  * It does not disable any overhead the tracers themselves may
1291  * be causing. This function simply causes all recording to
1292  * the ring buffers to fail.
1293  */
1294 void tracing_off(void)
1295 {
1296         tracer_tracing_off(&global_trace);
1297 }
1298 EXPORT_SYMBOL_GPL(tracing_off);
1299
1300 void disable_trace_on_warning(void)
1301 {
1302         if (__disable_trace_on_warning) {
1303                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1304                         "Disabling tracing due to warning\n");
1305                 tracing_off();
1306         }
1307 }
1308
1309 /**
1310  * tracer_tracing_is_on - show real state of ring buffer enabled
1311  * @tr : the trace array to know if ring buffer is enabled
1312  *
1313  * Shows real state of the ring buffer if it is enabled or not.
1314  */
1315 bool tracer_tracing_is_on(struct trace_array *tr)
1316 {
1317         if (tr->array_buffer.buffer)
1318                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1319         return !tr->buffer_disabled;
1320 }
1321
1322 /**
1323  * tracing_is_on - show state of ring buffers enabled
1324  */
1325 int tracing_is_on(void)
1326 {
1327         return tracer_tracing_is_on(&global_trace);
1328 }
1329 EXPORT_SYMBOL_GPL(tracing_is_on);
1330
1331 static int __init set_buf_size(char *str)
1332 {
1333         unsigned long buf_size;
1334
1335         if (!str)
1336                 return 0;
1337         buf_size = memparse(str, &str);
1338         /* nr_entries can not be zero */
1339         if (buf_size == 0)
1340                 return 0;
1341         trace_buf_size = buf_size;
1342         return 1;
1343 }
1344 __setup("trace_buf_size=", set_buf_size);
1345
1346 static int __init set_tracing_thresh(char *str)
1347 {
1348         unsigned long threshold;
1349         int ret;
1350
1351         if (!str)
1352                 return 0;
1353         ret = kstrtoul(str, 0, &threshold);
1354         if (ret < 0)
1355                 return 0;
1356         tracing_thresh = threshold * 1000;
1357         return 1;
1358 }
1359 __setup("tracing_thresh=", set_tracing_thresh);
1360
1361 unsigned long nsecs_to_usecs(unsigned long nsecs)
1362 {
1363         return nsecs / 1000;
1364 }
1365
1366 /*
1367  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1368  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1369  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1370  * of strings in the order that the evals (enum) were defined.
1371  */
1372 #undef C
1373 #define C(a, b) b
1374
1375 /* These must match the bit postions in trace_iterator_flags */
1376 static const char *trace_options[] = {
1377         TRACE_FLAGS
1378         NULL
1379 };
1380
1381 static struct {
1382         u64 (*func)(void);
1383         const char *name;
1384         int in_ns;              /* is this clock in nanoseconds? */
1385 } trace_clocks[] = {
1386         { trace_clock_local,            "local",        1 },
1387         { trace_clock_global,           "global",       1 },
1388         { trace_clock_counter,          "counter",      0 },
1389         { trace_clock_jiffies,          "uptime",       0 },
1390         { trace_clock,                  "perf",         1 },
1391         { ktime_get_mono_fast_ns,       "mono",         1 },
1392         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1393         { ktime_get_boot_fast_ns,       "boot",         1 },
1394         ARCH_TRACE_CLOCKS
1395 };
1396
1397 bool trace_clock_in_ns(struct trace_array *tr)
1398 {
1399         if (trace_clocks[tr->clock_id].in_ns)
1400                 return true;
1401
1402         return false;
1403 }
1404
1405 /*
1406  * trace_parser_get_init - gets the buffer for trace parser
1407  */
1408 int trace_parser_get_init(struct trace_parser *parser, int size)
1409 {
1410         memset(parser, 0, sizeof(*parser));
1411
1412         parser->buffer = kmalloc(size, GFP_KERNEL);
1413         if (!parser->buffer)
1414                 return 1;
1415
1416         parser->size = size;
1417         return 0;
1418 }
1419
1420 /*
1421  * trace_parser_put - frees the buffer for trace parser
1422  */
1423 void trace_parser_put(struct trace_parser *parser)
1424 {
1425         kfree(parser->buffer);
1426         parser->buffer = NULL;
1427 }
1428
1429 /*
1430  * trace_get_user - reads the user input string separated by  space
1431  * (matched by isspace(ch))
1432  *
1433  * For each string found the 'struct trace_parser' is updated,
1434  * and the function returns.
1435  *
1436  * Returns number of bytes read.
1437  *
1438  * See kernel/trace/trace.h for 'struct trace_parser' details.
1439  */
1440 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1441         size_t cnt, loff_t *ppos)
1442 {
1443         char ch;
1444         size_t read = 0;
1445         ssize_t ret;
1446
1447         if (!*ppos)
1448                 trace_parser_clear(parser);
1449
1450         ret = get_user(ch, ubuf++);
1451         if (ret)
1452                 goto out;
1453
1454         read++;
1455         cnt--;
1456
1457         /*
1458          * The parser is not finished with the last write,
1459          * continue reading the user input without skipping spaces.
1460          */
1461         if (!parser->cont) {
1462                 /* skip white space */
1463                 while (cnt && isspace(ch)) {
1464                         ret = get_user(ch, ubuf++);
1465                         if (ret)
1466                                 goto out;
1467                         read++;
1468                         cnt--;
1469                 }
1470
1471                 parser->idx = 0;
1472
1473                 /* only spaces were written */
1474                 if (isspace(ch) || !ch) {
1475                         *ppos += read;
1476                         ret = read;
1477                         goto out;
1478                 }
1479         }
1480
1481         /* read the non-space input */
1482         while (cnt && !isspace(ch) && ch) {
1483                 if (parser->idx < parser->size - 1)
1484                         parser->buffer[parser->idx++] = ch;
1485                 else {
1486                         ret = -EINVAL;
1487                         goto out;
1488                 }
1489                 ret = get_user(ch, ubuf++);
1490                 if (ret)
1491                         goto out;
1492                 read++;
1493                 cnt--;
1494         }
1495
1496         /* We either got finished input or we have to wait for another call. */
1497         if (isspace(ch) || !ch) {
1498                 parser->buffer[parser->idx] = 0;
1499                 parser->cont = false;
1500         } else if (parser->idx < parser->size - 1) {
1501                 parser->cont = true;
1502                 parser->buffer[parser->idx++] = ch;
1503                 /* Make sure the parsed string always terminates with '\0'. */
1504                 parser->buffer[parser->idx] = 0;
1505         } else {
1506                 ret = -EINVAL;
1507                 goto out;
1508         }
1509
1510         *ppos += read;
1511         ret = read;
1512
1513 out:
1514         return ret;
1515 }
1516
1517 /* TODO add a seq_buf_to_buffer() */
1518 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1519 {
1520         int len;
1521
1522         if (trace_seq_used(s) <= s->seq.readpos)
1523                 return -EBUSY;
1524
1525         len = trace_seq_used(s) - s->seq.readpos;
1526         if (cnt > len)
1527                 cnt = len;
1528         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1529
1530         s->seq.readpos += cnt;
1531         return cnt;
1532 }
1533
1534 unsigned long __read_mostly     tracing_thresh;
1535 static const struct file_operations tracing_max_lat_fops;
1536
1537 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1538         defined(CONFIG_FSNOTIFY)
1539
1540 static struct workqueue_struct *fsnotify_wq;
1541
1542 static void latency_fsnotify_workfn(struct work_struct *work)
1543 {
1544         struct trace_array *tr = container_of(work, struct trace_array,
1545                                               fsnotify_work);
1546         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1547 }
1548
1549 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1550 {
1551         struct trace_array *tr = container_of(iwork, struct trace_array,
1552                                               fsnotify_irqwork);
1553         queue_work(fsnotify_wq, &tr->fsnotify_work);
1554 }
1555
1556 static void trace_create_maxlat_file(struct trace_array *tr,
1557                                      struct dentry *d_tracer)
1558 {
1559         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1560         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1561         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1562                                               d_tracer, &tr->max_latency,
1563                                               &tracing_max_lat_fops);
1564 }
1565
1566 __init static int latency_fsnotify_init(void)
1567 {
1568         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1569                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1570         if (!fsnotify_wq) {
1571                 pr_err("Unable to allocate tr_max_lat_wq\n");
1572                 return -ENOMEM;
1573         }
1574         return 0;
1575 }
1576
1577 late_initcall_sync(latency_fsnotify_init);
1578
1579 void latency_fsnotify(struct trace_array *tr)
1580 {
1581         if (!fsnotify_wq)
1582                 return;
1583         /*
1584          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1585          * possible that we are called from __schedule() or do_idle(), which
1586          * could cause a deadlock.
1587          */
1588         irq_work_queue(&tr->fsnotify_irqwork);
1589 }
1590
1591 /*
1592  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1593  *  defined(CONFIG_FSNOTIFY)
1594  */
1595 #else
1596
1597 #define trace_create_maxlat_file(tr, d_tracer)                          \
1598         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1599                           &tr->max_latency, &tracing_max_lat_fops)
1600
1601 #endif
1602
1603 #ifdef CONFIG_TRACER_MAX_TRACE
1604 /*
1605  * Copy the new maximum trace into the separate maximum-trace
1606  * structure. (this way the maximum trace is permanently saved,
1607  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1608  */
1609 static void
1610 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1611 {
1612         struct array_buffer *trace_buf = &tr->array_buffer;
1613         struct array_buffer *max_buf = &tr->max_buffer;
1614         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1615         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1616
1617         max_buf->cpu = cpu;
1618         max_buf->time_start = data->preempt_timestamp;
1619
1620         max_data->saved_latency = tr->max_latency;
1621         max_data->critical_start = data->critical_start;
1622         max_data->critical_end = data->critical_end;
1623
1624         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1625         max_data->pid = tsk->pid;
1626         /*
1627          * If tsk == current, then use current_uid(), as that does not use
1628          * RCU. The irq tracer can be called out of RCU scope.
1629          */
1630         if (tsk == current)
1631                 max_data->uid = current_uid();
1632         else
1633                 max_data->uid = task_uid(tsk);
1634
1635         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1636         max_data->policy = tsk->policy;
1637         max_data->rt_priority = tsk->rt_priority;
1638
1639         /* record this tasks comm */
1640         tracing_record_cmdline(tsk);
1641         latency_fsnotify(tr);
1642 }
1643
1644 /**
1645  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1646  * @tr: tracer
1647  * @tsk: the task with the latency
1648  * @cpu: The cpu that initiated the trace.
1649  * @cond_data: User data associated with a conditional snapshot
1650  *
1651  * Flip the buffers between the @tr and the max_tr and record information
1652  * about which task was the cause of this latency.
1653  */
1654 void
1655 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1656               void *cond_data)
1657 {
1658         if (tr->stop_count)
1659                 return;
1660
1661         WARN_ON_ONCE(!irqs_disabled());
1662
1663         if (!tr->allocated_snapshot) {
1664                 /* Only the nop tracer should hit this when disabling */
1665                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1666                 return;
1667         }
1668
1669         arch_spin_lock(&tr->max_lock);
1670
1671         /* Inherit the recordable setting from array_buffer */
1672         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1673                 ring_buffer_record_on(tr->max_buffer.buffer);
1674         else
1675                 ring_buffer_record_off(tr->max_buffer.buffer);
1676
1677 #ifdef CONFIG_TRACER_SNAPSHOT
1678         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1679                 goto out_unlock;
1680 #endif
1681         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1682
1683         __update_max_tr(tr, tsk, cpu);
1684
1685  out_unlock:
1686         arch_spin_unlock(&tr->max_lock);
1687 }
1688
1689 /**
1690  * update_max_tr_single - only copy one trace over, and reset the rest
1691  * @tr: tracer
1692  * @tsk: task with the latency
1693  * @cpu: the cpu of the buffer to copy.
1694  *
1695  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1696  */
1697 void
1698 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1699 {
1700         int ret;
1701
1702         if (tr->stop_count)
1703                 return;
1704
1705         WARN_ON_ONCE(!irqs_disabled());
1706         if (!tr->allocated_snapshot) {
1707                 /* Only the nop tracer should hit this when disabling */
1708                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1709                 return;
1710         }
1711
1712         arch_spin_lock(&tr->max_lock);
1713
1714         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1715
1716         if (ret == -EBUSY) {
1717                 /*
1718                  * We failed to swap the buffer due to a commit taking
1719                  * place on this CPU. We fail to record, but we reset
1720                  * the max trace buffer (no one writes directly to it)
1721                  * and flag that it failed.
1722                  */
1723                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1724                         "Failed to swap buffers due to commit in progress\n");
1725         }
1726
1727         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1728
1729         __update_max_tr(tr, tsk, cpu);
1730         arch_spin_unlock(&tr->max_lock);
1731 }
1732 #endif /* CONFIG_TRACER_MAX_TRACE */
1733
1734 static int wait_on_pipe(struct trace_iterator *iter, int full)
1735 {
1736         /* Iterators are static, they should be filled or empty */
1737         if (trace_buffer_iter(iter, iter->cpu_file))
1738                 return 0;
1739
1740         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1741                                 full);
1742 }
1743
1744 #ifdef CONFIG_FTRACE_STARTUP_TEST
1745 static bool selftests_can_run;
1746
1747 struct trace_selftests {
1748         struct list_head                list;
1749         struct tracer                   *type;
1750 };
1751
1752 static LIST_HEAD(postponed_selftests);
1753
1754 static int save_selftest(struct tracer *type)
1755 {
1756         struct trace_selftests *selftest;
1757
1758         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1759         if (!selftest)
1760                 return -ENOMEM;
1761
1762         selftest->type = type;
1763         list_add(&selftest->list, &postponed_selftests);
1764         return 0;
1765 }
1766
1767 static int run_tracer_selftest(struct tracer *type)
1768 {
1769         struct trace_array *tr = &global_trace;
1770         struct tracer *saved_tracer = tr->current_trace;
1771         int ret;
1772
1773         if (!type->selftest || tracing_selftest_disabled)
1774                 return 0;
1775
1776         /*
1777          * If a tracer registers early in boot up (before scheduling is
1778          * initialized and such), then do not run its selftests yet.
1779          * Instead, run it a little later in the boot process.
1780          */
1781         if (!selftests_can_run)
1782                 return save_selftest(type);
1783
1784         /*
1785          * Run a selftest on this tracer.
1786          * Here we reset the trace buffer, and set the current
1787          * tracer to be this tracer. The tracer can then run some
1788          * internal tracing to verify that everything is in order.
1789          * If we fail, we do not register this tracer.
1790          */
1791         tracing_reset_online_cpus(&tr->array_buffer);
1792
1793         tr->current_trace = type;
1794
1795 #ifdef CONFIG_TRACER_MAX_TRACE
1796         if (type->use_max_tr) {
1797                 /* If we expanded the buffers, make sure the max is expanded too */
1798                 if (ring_buffer_expanded)
1799                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1800                                            RING_BUFFER_ALL_CPUS);
1801                 tr->allocated_snapshot = true;
1802         }
1803 #endif
1804
1805         /* the test is responsible for initializing and enabling */
1806         pr_info("Testing tracer %s: ", type->name);
1807         ret = type->selftest(type, tr);
1808         /* the test is responsible for resetting too */
1809         tr->current_trace = saved_tracer;
1810         if (ret) {
1811                 printk(KERN_CONT "FAILED!\n");
1812                 /* Add the warning after printing 'FAILED' */
1813                 WARN_ON(1);
1814                 return -1;
1815         }
1816         /* Only reset on passing, to avoid touching corrupted buffers */
1817         tracing_reset_online_cpus(&tr->array_buffer);
1818
1819 #ifdef CONFIG_TRACER_MAX_TRACE
1820         if (type->use_max_tr) {
1821                 tr->allocated_snapshot = false;
1822
1823                 /* Shrink the max buffer again */
1824                 if (ring_buffer_expanded)
1825                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1826                                            RING_BUFFER_ALL_CPUS);
1827         }
1828 #endif
1829
1830         printk(KERN_CONT "PASSED\n");
1831         return 0;
1832 }
1833
1834 static __init int init_trace_selftests(void)
1835 {
1836         struct trace_selftests *p, *n;
1837         struct tracer *t, **last;
1838         int ret;
1839
1840         selftests_can_run = true;
1841
1842         mutex_lock(&trace_types_lock);
1843
1844         if (list_empty(&postponed_selftests))
1845                 goto out;
1846
1847         pr_info("Running postponed tracer tests:\n");
1848
1849         tracing_selftest_running = true;
1850         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1851                 /* This loop can take minutes when sanitizers are enabled, so
1852                  * lets make sure we allow RCU processing.
1853                  */
1854                 cond_resched();
1855                 ret = run_tracer_selftest(p->type);
1856                 /* If the test fails, then warn and remove from available_tracers */
1857                 if (ret < 0) {
1858                         WARN(1, "tracer: %s failed selftest, disabling\n",
1859                              p->type->name);
1860                         last = &trace_types;
1861                         for (t = trace_types; t; t = t->next) {
1862                                 if (t == p->type) {
1863                                         *last = t->next;
1864                                         break;
1865                                 }
1866                                 last = &t->next;
1867                         }
1868                 }
1869                 list_del(&p->list);
1870                 kfree(p);
1871         }
1872         tracing_selftest_running = false;
1873
1874  out:
1875         mutex_unlock(&trace_types_lock);
1876
1877         return 0;
1878 }
1879 core_initcall(init_trace_selftests);
1880 #else
1881 static inline int run_tracer_selftest(struct tracer *type)
1882 {
1883         return 0;
1884 }
1885 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1886
1887 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1888
1889 static void __init apply_trace_boot_options(void);
1890
1891 /**
1892  * register_tracer - register a tracer with the ftrace system.
1893  * @type: the plugin for the tracer
1894  *
1895  * Register a new plugin tracer.
1896  */
1897 int __init register_tracer(struct tracer *type)
1898 {
1899         struct tracer *t;
1900         int ret = 0;
1901
1902         if (!type->name) {
1903                 pr_info("Tracer must have a name\n");
1904                 return -1;
1905         }
1906
1907         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1908                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1909                 return -1;
1910         }
1911
1912         if (security_locked_down(LOCKDOWN_TRACEFS)) {
1913                 pr_warn("Can not register tracer %s due to lockdown\n",
1914                            type->name);
1915                 return -EPERM;
1916         }
1917
1918         mutex_lock(&trace_types_lock);
1919
1920         tracing_selftest_running = true;
1921
1922         for (t = trace_types; t; t = t->next) {
1923                 if (strcmp(type->name, t->name) == 0) {
1924                         /* already found */
1925                         pr_info("Tracer %s already registered\n",
1926                                 type->name);
1927                         ret = -1;
1928                         goto out;
1929                 }
1930         }
1931
1932         if (!type->set_flag)
1933                 type->set_flag = &dummy_set_flag;
1934         if (!type->flags) {
1935                 /*allocate a dummy tracer_flags*/
1936                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1937                 if (!type->flags) {
1938                         ret = -ENOMEM;
1939                         goto out;
1940                 }
1941                 type->flags->val = 0;
1942                 type->flags->opts = dummy_tracer_opt;
1943         } else
1944                 if (!type->flags->opts)
1945                         type->flags->opts = dummy_tracer_opt;
1946
1947         /* store the tracer for __set_tracer_option */
1948         type->flags->trace = type;
1949
1950         ret = run_tracer_selftest(type);
1951         if (ret < 0)
1952                 goto out;
1953
1954         type->next = trace_types;
1955         trace_types = type;
1956         add_tracer_options(&global_trace, type);
1957
1958  out:
1959         tracing_selftest_running = false;
1960         mutex_unlock(&trace_types_lock);
1961
1962         if (ret || !default_bootup_tracer)
1963                 goto out_unlock;
1964
1965         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1966                 goto out_unlock;
1967
1968         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1969         /* Do we want this tracer to start on bootup? */
1970         tracing_set_tracer(&global_trace, type->name);
1971         default_bootup_tracer = NULL;
1972
1973         apply_trace_boot_options();
1974
1975         /* disable other selftests, since this will break it. */
1976         tracing_selftest_disabled = true;
1977 #ifdef CONFIG_FTRACE_STARTUP_TEST
1978         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1979                type->name);
1980 #endif
1981
1982  out_unlock:
1983         return ret;
1984 }
1985
1986 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1987 {
1988         struct trace_buffer *buffer = buf->buffer;
1989
1990         if (!buffer)
1991                 return;
1992
1993         ring_buffer_record_disable(buffer);
1994
1995         /* Make sure all commits have finished */
1996         synchronize_rcu();
1997         ring_buffer_reset_cpu(buffer, cpu);
1998
1999         ring_buffer_record_enable(buffer);
2000 }
2001
2002 void tracing_reset_online_cpus(struct array_buffer *buf)
2003 {
2004         struct trace_buffer *buffer = buf->buffer;
2005
2006         if (!buffer)
2007                 return;
2008
2009         ring_buffer_record_disable(buffer);
2010
2011         /* Make sure all commits have finished */
2012         synchronize_rcu();
2013
2014         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2015
2016         ring_buffer_reset_online_cpus(buffer);
2017
2018         ring_buffer_record_enable(buffer);
2019 }
2020
2021 /* Must have trace_types_lock held */
2022 void tracing_reset_all_online_cpus(void)
2023 {
2024         struct trace_array *tr;
2025
2026         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2027                 if (!tr->clear_trace)
2028                         continue;
2029                 tr->clear_trace = false;
2030                 tracing_reset_online_cpus(&tr->array_buffer);
2031 #ifdef CONFIG_TRACER_MAX_TRACE
2032                 tracing_reset_online_cpus(&tr->max_buffer);
2033 #endif
2034         }
2035 }
2036
2037 static int *tgid_map;
2038
2039 #define SAVED_CMDLINES_DEFAULT 128
2040 #define NO_CMDLINE_MAP UINT_MAX
2041 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2042 struct saved_cmdlines_buffer {
2043         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2044         unsigned *map_cmdline_to_pid;
2045         unsigned cmdline_num;
2046         int cmdline_idx;
2047         char *saved_cmdlines;
2048 };
2049 static struct saved_cmdlines_buffer *savedcmd;
2050
2051 /* temporary disable recording */
2052 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2053
2054 static inline char *get_saved_cmdlines(int idx)
2055 {
2056         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2057 }
2058
2059 static inline void set_cmdline(int idx, const char *cmdline)
2060 {
2061         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2062 }
2063
2064 static int allocate_cmdlines_buffer(unsigned int val,
2065                                     struct saved_cmdlines_buffer *s)
2066 {
2067         s->map_cmdline_to_pid = kmalloc_array(val,
2068                                               sizeof(*s->map_cmdline_to_pid),
2069                                               GFP_KERNEL);
2070         if (!s->map_cmdline_to_pid)
2071                 return -ENOMEM;
2072
2073         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2074         if (!s->saved_cmdlines) {
2075                 kfree(s->map_cmdline_to_pid);
2076                 return -ENOMEM;
2077         }
2078
2079         s->cmdline_idx = 0;
2080         s->cmdline_num = val;
2081         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2082                sizeof(s->map_pid_to_cmdline));
2083         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2084                val * sizeof(*s->map_cmdline_to_pid));
2085
2086         return 0;
2087 }
2088
2089 static int trace_create_savedcmd(void)
2090 {
2091         int ret;
2092
2093         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2094         if (!savedcmd)
2095                 return -ENOMEM;
2096
2097         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2098         if (ret < 0) {
2099                 kfree(savedcmd);
2100                 savedcmd = NULL;
2101                 return -ENOMEM;
2102         }
2103
2104         return 0;
2105 }
2106
2107 int is_tracing_stopped(void)
2108 {
2109         return global_trace.stop_count;
2110 }
2111
2112 /**
2113  * tracing_start - quick start of the tracer
2114  *
2115  * If tracing is enabled but was stopped by tracing_stop,
2116  * this will start the tracer back up.
2117  */
2118 void tracing_start(void)
2119 {
2120         struct trace_buffer *buffer;
2121         unsigned long flags;
2122
2123         if (tracing_disabled)
2124                 return;
2125
2126         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2127         if (--global_trace.stop_count) {
2128                 if (global_trace.stop_count < 0) {
2129                         /* Someone screwed up their debugging */
2130                         WARN_ON_ONCE(1);
2131                         global_trace.stop_count = 0;
2132                 }
2133                 goto out;
2134         }
2135
2136         /* Prevent the buffers from switching */
2137         arch_spin_lock(&global_trace.max_lock);
2138
2139         buffer = global_trace.array_buffer.buffer;
2140         if (buffer)
2141                 ring_buffer_record_enable(buffer);
2142
2143 #ifdef CONFIG_TRACER_MAX_TRACE
2144         buffer = global_trace.max_buffer.buffer;
2145         if (buffer)
2146                 ring_buffer_record_enable(buffer);
2147 #endif
2148
2149         arch_spin_unlock(&global_trace.max_lock);
2150
2151  out:
2152         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2153 }
2154
2155 static void tracing_start_tr(struct trace_array *tr)
2156 {
2157         struct trace_buffer *buffer;
2158         unsigned long flags;
2159
2160         if (tracing_disabled)
2161                 return;
2162
2163         /* If global, we need to also start the max tracer */
2164         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2165                 return tracing_start();
2166
2167         raw_spin_lock_irqsave(&tr->start_lock, flags);
2168
2169         if (--tr->stop_count) {
2170                 if (tr->stop_count < 0) {
2171                         /* Someone screwed up their debugging */
2172                         WARN_ON_ONCE(1);
2173                         tr->stop_count = 0;
2174                 }
2175                 goto out;
2176         }
2177
2178         buffer = tr->array_buffer.buffer;
2179         if (buffer)
2180                 ring_buffer_record_enable(buffer);
2181
2182  out:
2183         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2184 }
2185
2186 /**
2187  * tracing_stop - quick stop of the tracer
2188  *
2189  * Light weight way to stop tracing. Use in conjunction with
2190  * tracing_start.
2191  */
2192 void tracing_stop(void)
2193 {
2194         struct trace_buffer *buffer;
2195         unsigned long flags;
2196
2197         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2198         if (global_trace.stop_count++)
2199                 goto out;
2200
2201         /* Prevent the buffers from switching */
2202         arch_spin_lock(&global_trace.max_lock);
2203
2204         buffer = global_trace.array_buffer.buffer;
2205         if (buffer)
2206                 ring_buffer_record_disable(buffer);
2207
2208 #ifdef CONFIG_TRACER_MAX_TRACE
2209         buffer = global_trace.max_buffer.buffer;
2210         if (buffer)
2211                 ring_buffer_record_disable(buffer);
2212 #endif
2213
2214         arch_spin_unlock(&global_trace.max_lock);
2215
2216  out:
2217         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2218 }
2219
2220 static void tracing_stop_tr(struct trace_array *tr)
2221 {
2222         struct trace_buffer *buffer;
2223         unsigned long flags;
2224
2225         /* If global, we need to also stop the max tracer */
2226         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2227                 return tracing_stop();
2228
2229         raw_spin_lock_irqsave(&tr->start_lock, flags);
2230         if (tr->stop_count++)
2231                 goto out;
2232
2233         buffer = tr->array_buffer.buffer;
2234         if (buffer)
2235                 ring_buffer_record_disable(buffer);
2236
2237  out:
2238         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2239 }
2240
2241 static int trace_save_cmdline(struct task_struct *tsk)
2242 {
2243         unsigned pid, idx;
2244
2245         /* treat recording of idle task as a success */
2246         if (!tsk->pid)
2247                 return 1;
2248
2249         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2250                 return 0;
2251
2252         /*
2253          * It's not the end of the world if we don't get
2254          * the lock, but we also don't want to spin
2255          * nor do we want to disable interrupts,
2256          * so if we miss here, then better luck next time.
2257          */
2258         if (!arch_spin_trylock(&trace_cmdline_lock))
2259                 return 0;
2260
2261         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2262         if (idx == NO_CMDLINE_MAP) {
2263                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2264
2265                 /*
2266                  * Check whether the cmdline buffer at idx has a pid
2267                  * mapped. We are going to overwrite that entry so we
2268                  * need to clear the map_pid_to_cmdline. Otherwise we
2269                  * would read the new comm for the old pid.
2270                  */
2271                 pid = savedcmd->map_cmdline_to_pid[idx];
2272                 if (pid != NO_CMDLINE_MAP)
2273                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2274
2275                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2276                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2277
2278                 savedcmd->cmdline_idx = idx;
2279         }
2280
2281         set_cmdline(idx, tsk->comm);
2282
2283         arch_spin_unlock(&trace_cmdline_lock);
2284
2285         return 1;
2286 }
2287
2288 static void __trace_find_cmdline(int pid, char comm[])
2289 {
2290         unsigned map;
2291
2292         if (!pid) {
2293                 strcpy(comm, "<idle>");
2294                 return;
2295         }
2296
2297         if (WARN_ON_ONCE(pid < 0)) {
2298                 strcpy(comm, "<XXX>");
2299                 return;
2300         }
2301
2302         if (pid > PID_MAX_DEFAULT) {
2303                 strcpy(comm, "<...>");
2304                 return;
2305         }
2306
2307         map = savedcmd->map_pid_to_cmdline[pid];
2308         if (map != NO_CMDLINE_MAP)
2309                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2310         else
2311                 strcpy(comm, "<...>");
2312 }
2313
2314 void trace_find_cmdline(int pid, char comm[])
2315 {
2316         preempt_disable();
2317         arch_spin_lock(&trace_cmdline_lock);
2318
2319         __trace_find_cmdline(pid, comm);
2320
2321         arch_spin_unlock(&trace_cmdline_lock);
2322         preempt_enable();
2323 }
2324
2325 int trace_find_tgid(int pid)
2326 {
2327         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2328                 return 0;
2329
2330         return tgid_map[pid];
2331 }
2332
2333 static int trace_save_tgid(struct task_struct *tsk)
2334 {
2335         /* treat recording of idle task as a success */
2336         if (!tsk->pid)
2337                 return 1;
2338
2339         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2340                 return 0;
2341
2342         tgid_map[tsk->pid] = tsk->tgid;
2343         return 1;
2344 }
2345
2346 static bool tracing_record_taskinfo_skip(int flags)
2347 {
2348         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2349                 return true;
2350         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2351                 return true;
2352         if (!__this_cpu_read(trace_taskinfo_save))
2353                 return true;
2354         return false;
2355 }
2356
2357 /**
2358  * tracing_record_taskinfo - record the task info of a task
2359  *
2360  * @task:  task to record
2361  * @flags: TRACE_RECORD_CMDLINE for recording comm
2362  *         TRACE_RECORD_TGID for recording tgid
2363  */
2364 void tracing_record_taskinfo(struct task_struct *task, int flags)
2365 {
2366         bool done;
2367
2368         if (tracing_record_taskinfo_skip(flags))
2369                 return;
2370
2371         /*
2372          * Record as much task information as possible. If some fail, continue
2373          * to try to record the others.
2374          */
2375         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2376         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2377
2378         /* If recording any information failed, retry again soon. */
2379         if (!done)
2380                 return;
2381
2382         __this_cpu_write(trace_taskinfo_save, false);
2383 }
2384
2385 /**
2386  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2387  *
2388  * @prev: previous task during sched_switch
2389  * @next: next task during sched_switch
2390  * @flags: TRACE_RECORD_CMDLINE for recording comm
2391  *         TRACE_RECORD_TGID for recording tgid
2392  */
2393 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2394                                           struct task_struct *next, int flags)
2395 {
2396         bool done;
2397
2398         if (tracing_record_taskinfo_skip(flags))
2399                 return;
2400
2401         /*
2402          * Record as much task information as possible. If some fail, continue
2403          * to try to record the others.
2404          */
2405         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2406         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2407         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2408         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2409
2410         /* If recording any information failed, retry again soon. */
2411         if (!done)
2412                 return;
2413
2414         __this_cpu_write(trace_taskinfo_save, false);
2415 }
2416
2417 /* Helpers to record a specific task information */
2418 void tracing_record_cmdline(struct task_struct *task)
2419 {
2420         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2421 }
2422
2423 void tracing_record_tgid(struct task_struct *task)
2424 {
2425         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2426 }
2427
2428 /*
2429  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2430  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2431  * simplifies those functions and keeps them in sync.
2432  */
2433 enum print_line_t trace_handle_return(struct trace_seq *s)
2434 {
2435         return trace_seq_has_overflowed(s) ?
2436                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2437 }
2438 EXPORT_SYMBOL_GPL(trace_handle_return);
2439
2440 void
2441 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2442                              unsigned long flags, int pc)
2443 {
2444         struct task_struct *tsk = current;
2445
2446         entry->preempt_count            = pc & 0xff;
2447         entry->pid                      = (tsk) ? tsk->pid : 0;
2448         entry->type                     = type;
2449         entry->flags =
2450 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2451                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2452 #else
2453                 TRACE_FLAG_IRQS_NOSUPPORT |
2454 #endif
2455                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2456                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2457                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2458                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2459                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2460 }
2461 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2462
2463 struct ring_buffer_event *
2464 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2465                           int type,
2466                           unsigned long len,
2467                           unsigned long flags, int pc)
2468 {
2469         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2470 }
2471
2472 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2473 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2474 static int trace_buffered_event_ref;
2475
2476 /**
2477  * trace_buffered_event_enable - enable buffering events
2478  *
2479  * When events are being filtered, it is quicker to use a temporary
2480  * buffer to write the event data into if there's a likely chance
2481  * that it will not be committed. The discard of the ring buffer
2482  * is not as fast as committing, and is much slower than copying
2483  * a commit.
2484  *
2485  * When an event is to be filtered, allocate per cpu buffers to
2486  * write the event data into, and if the event is filtered and discarded
2487  * it is simply dropped, otherwise, the entire data is to be committed
2488  * in one shot.
2489  */
2490 void trace_buffered_event_enable(void)
2491 {
2492         struct ring_buffer_event *event;
2493         struct page *page;
2494         int cpu;
2495
2496         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2497
2498         if (trace_buffered_event_ref++)
2499                 return;
2500
2501         for_each_tracing_cpu(cpu) {
2502                 page = alloc_pages_node(cpu_to_node(cpu),
2503                                         GFP_KERNEL | __GFP_NORETRY, 0);
2504                 if (!page)
2505                         goto failed;
2506
2507                 event = page_address(page);
2508                 memset(event, 0, sizeof(*event));
2509
2510                 per_cpu(trace_buffered_event, cpu) = event;
2511
2512                 preempt_disable();
2513                 if (cpu == smp_processor_id() &&
2514                     __this_cpu_read(trace_buffered_event) !=
2515                     per_cpu(trace_buffered_event, cpu))
2516                         WARN_ON_ONCE(1);
2517                 preempt_enable();
2518         }
2519
2520         return;
2521  failed:
2522         trace_buffered_event_disable();
2523 }
2524
2525 static void enable_trace_buffered_event(void *data)
2526 {
2527         /* Probably not needed, but do it anyway */
2528         smp_rmb();
2529         this_cpu_dec(trace_buffered_event_cnt);
2530 }
2531
2532 static void disable_trace_buffered_event(void *data)
2533 {
2534         this_cpu_inc(trace_buffered_event_cnt);
2535 }
2536
2537 /**
2538  * trace_buffered_event_disable - disable buffering events
2539  *
2540  * When a filter is removed, it is faster to not use the buffered
2541  * events, and to commit directly into the ring buffer. Free up
2542  * the temp buffers when there are no more users. This requires
2543  * special synchronization with current events.
2544  */
2545 void trace_buffered_event_disable(void)
2546 {
2547         int cpu;
2548
2549         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2550
2551         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2552                 return;
2553
2554         if (--trace_buffered_event_ref)
2555                 return;
2556
2557         preempt_disable();
2558         /* For each CPU, set the buffer as used. */
2559         smp_call_function_many(tracing_buffer_mask,
2560                                disable_trace_buffered_event, NULL, 1);
2561         preempt_enable();
2562
2563         /* Wait for all current users to finish */
2564         synchronize_rcu();
2565
2566         for_each_tracing_cpu(cpu) {
2567                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2568                 per_cpu(trace_buffered_event, cpu) = NULL;
2569         }
2570         /*
2571          * Make sure trace_buffered_event is NULL before clearing
2572          * trace_buffered_event_cnt.
2573          */
2574         smp_wmb();
2575
2576         preempt_disable();
2577         /* Do the work on each cpu */
2578         smp_call_function_many(tracing_buffer_mask,
2579                                enable_trace_buffered_event, NULL, 1);
2580         preempt_enable();
2581 }
2582
2583 static struct trace_buffer *temp_buffer;
2584
2585 struct ring_buffer_event *
2586 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2587                           struct trace_event_file *trace_file,
2588                           int type, unsigned long len,
2589                           unsigned long flags, int pc)
2590 {
2591         struct ring_buffer_event *entry;
2592         int val;
2593
2594         *current_rb = trace_file->tr->array_buffer.buffer;
2595
2596         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2597              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2598             (entry = this_cpu_read(trace_buffered_event))) {
2599                 /* Try to use the per cpu buffer first */
2600                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2601                 if (val == 1) {
2602                         trace_event_setup(entry, type, flags, pc);
2603                         entry->array[0] = len;
2604                         return entry;
2605                 }
2606                 this_cpu_dec(trace_buffered_event_cnt);
2607         }
2608
2609         entry = __trace_buffer_lock_reserve(*current_rb,
2610                                             type, len, flags, pc);
2611         /*
2612          * If tracing is off, but we have triggers enabled
2613          * we still need to look at the event data. Use the temp_buffer
2614          * to store the trace event for the tigger to use. It's recusive
2615          * safe and will not be recorded anywhere.
2616          */
2617         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2618                 *current_rb = temp_buffer;
2619                 entry = __trace_buffer_lock_reserve(*current_rb,
2620                                                     type, len, flags, pc);
2621         }
2622         return entry;
2623 }
2624 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2625
2626 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2627 static DEFINE_MUTEX(tracepoint_printk_mutex);
2628
2629 static void output_printk(struct trace_event_buffer *fbuffer)
2630 {
2631         struct trace_event_call *event_call;
2632         struct trace_event_file *file;
2633         struct trace_event *event;
2634         unsigned long flags;
2635         struct trace_iterator *iter = tracepoint_print_iter;
2636
2637         /* We should never get here if iter is NULL */
2638         if (WARN_ON_ONCE(!iter))
2639                 return;
2640
2641         event_call = fbuffer->trace_file->event_call;
2642         if (!event_call || !event_call->event.funcs ||
2643             !event_call->event.funcs->trace)
2644                 return;
2645
2646         file = fbuffer->trace_file;
2647         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2648             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2649              !filter_match_preds(file->filter, fbuffer->entry)))
2650                 return;
2651
2652         event = &fbuffer->trace_file->event_call->event;
2653
2654         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2655         trace_seq_init(&iter->seq);
2656         iter->ent = fbuffer->entry;
2657         event_call->event.funcs->trace(iter, 0, event);
2658         trace_seq_putc(&iter->seq, 0);
2659         printk("%s", iter->seq.buffer);
2660
2661         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2662 }
2663
2664 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2665                              void *buffer, size_t *lenp,
2666                              loff_t *ppos)
2667 {
2668         int save_tracepoint_printk;
2669         int ret;
2670
2671         mutex_lock(&tracepoint_printk_mutex);
2672         save_tracepoint_printk = tracepoint_printk;
2673
2674         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2675
2676         /*
2677          * This will force exiting early, as tracepoint_printk
2678          * is always zero when tracepoint_printk_iter is not allocated
2679          */
2680         if (!tracepoint_print_iter)
2681                 tracepoint_printk = 0;
2682
2683         if (save_tracepoint_printk == tracepoint_printk)
2684                 goto out;
2685
2686         if (tracepoint_printk)
2687                 static_key_enable(&tracepoint_printk_key.key);
2688         else
2689                 static_key_disable(&tracepoint_printk_key.key);
2690
2691  out:
2692         mutex_unlock(&tracepoint_printk_mutex);
2693
2694         return ret;
2695 }
2696
2697 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2698 {
2699         if (static_key_false(&tracepoint_printk_key.key))
2700                 output_printk(fbuffer);
2701
2702         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2703                                     fbuffer->event, fbuffer->entry,
2704                                     fbuffer->flags, fbuffer->pc, fbuffer->regs);
2705 }
2706 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2707
2708 /*
2709  * Skip 3:
2710  *
2711  *   trace_buffer_unlock_commit_regs()
2712  *   trace_event_buffer_commit()
2713  *   trace_event_raw_event_xxx()
2714  */
2715 # define STACK_SKIP 3
2716
2717 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2718                                      struct trace_buffer *buffer,
2719                                      struct ring_buffer_event *event,
2720                                      unsigned long flags, int pc,
2721                                      struct pt_regs *regs)
2722 {
2723         __buffer_unlock_commit(buffer, event);
2724
2725         /*
2726          * If regs is not set, then skip the necessary functions.
2727          * Note, we can still get here via blktrace, wakeup tracer
2728          * and mmiotrace, but that's ok if they lose a function or
2729          * two. They are not that meaningful.
2730          */
2731         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2732         ftrace_trace_userstack(buffer, flags, pc);
2733 }
2734
2735 /*
2736  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2737  */
2738 void
2739 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2740                                    struct ring_buffer_event *event)
2741 {
2742         __buffer_unlock_commit(buffer, event);
2743 }
2744
2745 static void
2746 trace_process_export(struct trace_export *export,
2747                struct ring_buffer_event *event)
2748 {
2749         struct trace_entry *entry;
2750         unsigned int size = 0;
2751
2752         entry = ring_buffer_event_data(event);
2753         size = ring_buffer_event_length(event);
2754         export->write(export, entry, size);
2755 }
2756
2757 static DEFINE_MUTEX(ftrace_export_lock);
2758
2759 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2760
2761 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2762
2763 static inline void ftrace_exports_enable(void)
2764 {
2765         static_branch_enable(&ftrace_exports_enabled);
2766 }
2767
2768 static inline void ftrace_exports_disable(void)
2769 {
2770         static_branch_disable(&ftrace_exports_enabled);
2771 }
2772
2773 static void ftrace_exports(struct ring_buffer_event *event)
2774 {
2775         struct trace_export *export;
2776
2777         preempt_disable_notrace();
2778
2779         export = rcu_dereference_raw_check(ftrace_exports_list);
2780         while (export) {
2781                 trace_process_export(export, event);
2782                 export = rcu_dereference_raw_check(export->next);
2783         }
2784
2785         preempt_enable_notrace();
2786 }
2787
2788 static inline void
2789 add_trace_export(struct trace_export **list, struct trace_export *export)
2790 {
2791         rcu_assign_pointer(export->next, *list);
2792         /*
2793          * We are entering export into the list but another
2794          * CPU might be walking that list. We need to make sure
2795          * the export->next pointer is valid before another CPU sees
2796          * the export pointer included into the list.
2797          */
2798         rcu_assign_pointer(*list, export);
2799 }
2800
2801 static inline int
2802 rm_trace_export(struct trace_export **list, struct trace_export *export)
2803 {
2804         struct trace_export **p;
2805
2806         for (p = list; *p != NULL; p = &(*p)->next)
2807                 if (*p == export)
2808                         break;
2809
2810         if (*p != export)
2811                 return -1;
2812
2813         rcu_assign_pointer(*p, (*p)->next);
2814
2815         return 0;
2816 }
2817
2818 static inline void
2819 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2820 {
2821         if (*list == NULL)
2822                 ftrace_exports_enable();
2823
2824         add_trace_export(list, export);
2825 }
2826
2827 static inline int
2828 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2829 {
2830         int ret;
2831
2832         ret = rm_trace_export(list, export);
2833         if (*list == NULL)
2834                 ftrace_exports_disable();
2835
2836         return ret;
2837 }
2838
2839 int register_ftrace_export(struct trace_export *export)
2840 {
2841         if (WARN_ON_ONCE(!export->write))
2842                 return -1;
2843
2844         mutex_lock(&ftrace_export_lock);
2845
2846         add_ftrace_export(&ftrace_exports_list, export);
2847
2848         mutex_unlock(&ftrace_export_lock);
2849
2850         return 0;
2851 }
2852 EXPORT_SYMBOL_GPL(register_ftrace_export);
2853
2854 int unregister_ftrace_export(struct trace_export *export)
2855 {
2856         int ret;
2857
2858         mutex_lock(&ftrace_export_lock);
2859
2860         ret = rm_ftrace_export(&ftrace_exports_list, export);
2861
2862         mutex_unlock(&ftrace_export_lock);
2863
2864         return ret;
2865 }
2866 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2867
2868 void
2869 trace_function(struct trace_array *tr,
2870                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2871                int pc)
2872 {
2873         struct trace_event_call *call = &event_function;
2874         struct trace_buffer *buffer = tr->array_buffer.buffer;
2875         struct ring_buffer_event *event;
2876         struct ftrace_entry *entry;
2877
2878         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2879                                             flags, pc);
2880         if (!event)
2881                 return;
2882         entry   = ring_buffer_event_data(event);
2883         entry->ip                       = ip;
2884         entry->parent_ip                = parent_ip;
2885
2886         if (!call_filter_check_discard(call, entry, buffer, event)) {
2887                 if (static_branch_unlikely(&ftrace_exports_enabled))
2888                         ftrace_exports(event);
2889                 __buffer_unlock_commit(buffer, event);
2890         }
2891 }
2892
2893 #ifdef CONFIG_STACKTRACE
2894
2895 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2896 #define FTRACE_KSTACK_NESTING   4
2897
2898 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2899
2900 struct ftrace_stack {
2901         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2902 };
2903
2904
2905 struct ftrace_stacks {
2906         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2907 };
2908
2909 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2910 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2911
2912 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2913                                  unsigned long flags,
2914                                  int skip, int pc, struct pt_regs *regs)
2915 {
2916         struct trace_event_call *call = &event_kernel_stack;
2917         struct ring_buffer_event *event;
2918         unsigned int size, nr_entries;
2919         struct ftrace_stack *fstack;
2920         struct stack_entry *entry;
2921         int stackidx;
2922
2923         /*
2924          * Add one, for this function and the call to save_stack_trace()
2925          * If regs is set, then these functions will not be in the way.
2926          */
2927 #ifndef CONFIG_UNWINDER_ORC
2928         if (!regs)
2929                 skip++;
2930 #endif
2931
2932         preempt_disable_notrace();
2933
2934         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2935
2936         /* This should never happen. If it does, yell once and skip */
2937         if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2938                 goto out;
2939
2940         /*
2941          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2942          * interrupt will either see the value pre increment or post
2943          * increment. If the interrupt happens pre increment it will have
2944          * restored the counter when it returns.  We just need a barrier to
2945          * keep gcc from moving things around.
2946          */
2947         barrier();
2948
2949         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2950         size = ARRAY_SIZE(fstack->calls);
2951
2952         if (regs) {
2953                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2954                                                    size, skip);
2955         } else {
2956                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2957         }
2958
2959         size = nr_entries * sizeof(unsigned long);
2960         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2961                                             sizeof(*entry) + size, flags, pc);
2962         if (!event)
2963                 goto out;
2964         entry = ring_buffer_event_data(event);
2965
2966         memcpy(&entry->caller, fstack->calls, size);
2967         entry->size = nr_entries;
2968
2969         if (!call_filter_check_discard(call, entry, buffer, event))
2970                 __buffer_unlock_commit(buffer, event);
2971
2972  out:
2973         /* Again, don't let gcc optimize things here */
2974         barrier();
2975         __this_cpu_dec(ftrace_stack_reserve);
2976         preempt_enable_notrace();
2977
2978 }
2979
2980 static inline void ftrace_trace_stack(struct trace_array *tr,
2981                                       struct trace_buffer *buffer,
2982                                       unsigned long flags,
2983                                       int skip, int pc, struct pt_regs *regs)
2984 {
2985         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2986                 return;
2987
2988         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2989 }
2990
2991 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2992                    int pc)
2993 {
2994         struct trace_buffer *buffer = tr->array_buffer.buffer;
2995
2996         if (rcu_is_watching()) {
2997                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2998                 return;
2999         }
3000
3001         /*
3002          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3003          * but if the above rcu_is_watching() failed, then the NMI
3004          * triggered someplace critical, and rcu_irq_enter() should
3005          * not be called from NMI.
3006          */
3007         if (unlikely(in_nmi()))
3008                 return;
3009
3010         rcu_irq_enter_irqson();
3011         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3012         rcu_irq_exit_irqson();
3013 }
3014
3015 /**
3016  * trace_dump_stack - record a stack back trace in the trace buffer
3017  * @skip: Number of functions to skip (helper handlers)
3018  */
3019 void trace_dump_stack(int skip)
3020 {
3021         unsigned long flags;
3022
3023         if (tracing_disabled || tracing_selftest_running)
3024                 return;
3025
3026         local_save_flags(flags);
3027
3028 #ifndef CONFIG_UNWINDER_ORC
3029         /* Skip 1 to skip this function. */
3030         skip++;
3031 #endif
3032         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3033                              flags, skip, preempt_count(), NULL);
3034 }
3035 EXPORT_SYMBOL_GPL(trace_dump_stack);
3036
3037 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3038 static DEFINE_PER_CPU(int, user_stack_count);
3039
3040 static void
3041 ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3042 {
3043         struct trace_event_call *call = &event_user_stack;
3044         struct ring_buffer_event *event;
3045         struct userstack_entry *entry;
3046
3047         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3048                 return;
3049
3050         /*
3051          * NMIs can not handle page faults, even with fix ups.
3052          * The save user stack can (and often does) fault.
3053          */
3054         if (unlikely(in_nmi()))
3055                 return;
3056
3057         /*
3058          * prevent recursion, since the user stack tracing may
3059          * trigger other kernel events.
3060          */
3061         preempt_disable();
3062         if (__this_cpu_read(user_stack_count))
3063                 goto out;
3064
3065         __this_cpu_inc(user_stack_count);
3066
3067         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3068                                             sizeof(*entry), flags, pc);
3069         if (!event)
3070                 goto out_drop_count;
3071         entry   = ring_buffer_event_data(event);
3072
3073         entry->tgid             = current->tgid;
3074         memset(&entry->caller, 0, sizeof(entry->caller));
3075
3076         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3077         if (!call_filter_check_discard(call, entry, buffer, event))
3078                 __buffer_unlock_commit(buffer, event);
3079
3080  out_drop_count:
3081         __this_cpu_dec(user_stack_count);
3082  out:
3083         preempt_enable();
3084 }
3085 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3086 static void ftrace_trace_userstack(struct trace_buffer *buffer,
3087                                    unsigned long flags, int pc)
3088 {
3089 }
3090 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3091
3092 #endif /* CONFIG_STACKTRACE */
3093
3094 /* created for use with alloc_percpu */
3095 struct trace_buffer_struct {
3096         int nesting;
3097         char buffer[4][TRACE_BUF_SIZE];
3098 };
3099
3100 static struct trace_buffer_struct *trace_percpu_buffer;
3101
3102 /*
3103  * Thise allows for lockless recording.  If we're nested too deeply, then
3104  * this returns NULL.
3105  */
3106 static char *get_trace_buf(void)
3107 {
3108         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3109
3110         if (!buffer || buffer->nesting >= 4)
3111                 return NULL;
3112
3113         buffer->nesting++;
3114
3115         /* Interrupts must see nesting incremented before we use the buffer */
3116         barrier();
3117         return &buffer->buffer[buffer->nesting][0];
3118 }
3119
3120 static void put_trace_buf(void)
3121 {
3122         /* Don't let the decrement of nesting leak before this */
3123         barrier();
3124         this_cpu_dec(trace_percpu_buffer->nesting);
3125 }
3126
3127 static int alloc_percpu_trace_buffer(void)
3128 {
3129         struct trace_buffer_struct *buffers;
3130
3131         if (trace_percpu_buffer)
3132                 return 0;
3133
3134         buffers = alloc_percpu(struct trace_buffer_struct);
3135         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3136                 return -ENOMEM;
3137
3138         trace_percpu_buffer = buffers;
3139         return 0;
3140 }
3141
3142 static int buffers_allocated;
3143
3144 void trace_printk_init_buffers(void)
3145 {
3146         if (buffers_allocated)
3147                 return;
3148
3149         if (alloc_percpu_trace_buffer())
3150                 return;
3151
3152         /* trace_printk() is for debug use only. Don't use it in production. */
3153
3154         pr_warn("\n");
3155         pr_warn("**********************************************************\n");
3156         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3157         pr_warn("**                                                      **\n");
3158         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3159         pr_warn("**                                                      **\n");
3160         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3161         pr_warn("** unsafe for production use.                           **\n");
3162         pr_warn("**                                                      **\n");
3163         pr_warn("** If you see this message and you are not debugging    **\n");
3164         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3165         pr_warn("**                                                      **\n");
3166         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3167         pr_warn("**********************************************************\n");
3168
3169         /* Expand the buffers to set size */
3170         tracing_update_buffers();
3171
3172         buffers_allocated = 1;
3173
3174         /*
3175          * trace_printk_init_buffers() can be called by modules.
3176          * If that happens, then we need to start cmdline recording
3177          * directly here. If the global_trace.buffer is already
3178          * allocated here, then this was called by module code.
3179          */
3180         if (global_trace.array_buffer.buffer)
3181                 tracing_start_cmdline_record();
3182 }
3183 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3184
3185 void trace_printk_start_comm(void)
3186 {
3187         /* Start tracing comms if trace printk is set */
3188         if (!buffers_allocated)
3189                 return;
3190         tracing_start_cmdline_record();
3191 }
3192
3193 static void trace_printk_start_stop_comm(int enabled)
3194 {
3195         if (!buffers_allocated)
3196                 return;
3197
3198         if (enabled)
3199                 tracing_start_cmdline_record();
3200         else
3201                 tracing_stop_cmdline_record();
3202 }
3203
3204 /**
3205  * trace_vbprintk - write binary msg to tracing buffer
3206  * @ip:    The address of the caller
3207  * @fmt:   The string format to write to the buffer
3208  * @args:  Arguments for @fmt
3209  */
3210 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3211 {
3212         struct trace_event_call *call = &event_bprint;
3213         struct ring_buffer_event *event;
3214         struct trace_buffer *buffer;
3215         struct trace_array *tr = &global_trace;
3216         struct bprint_entry *entry;
3217         unsigned long flags;
3218         char *tbuffer;
3219         int len = 0, size, pc;
3220
3221         if (unlikely(tracing_selftest_running || tracing_disabled))
3222                 return 0;
3223
3224         /* Don't pollute graph traces with trace_vprintk internals */
3225         pause_graph_tracing();
3226
3227         pc = preempt_count();
3228         preempt_disable_notrace();
3229
3230         tbuffer = get_trace_buf();
3231         if (!tbuffer) {
3232                 len = 0;
3233                 goto out_nobuffer;
3234         }
3235
3236         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3237
3238         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3239                 goto out_put;
3240
3241         local_save_flags(flags);
3242         size = sizeof(*entry) + sizeof(u32) * len;
3243         buffer = tr->array_buffer.buffer;
3244         ring_buffer_nest_start(buffer);
3245         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3246                                             flags, pc);
3247         if (!event)
3248                 goto out;
3249         entry = ring_buffer_event_data(event);
3250         entry->ip                       = ip;
3251         entry->fmt                      = fmt;
3252
3253         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3254         if (!call_filter_check_discard(call, entry, buffer, event)) {
3255                 __buffer_unlock_commit(buffer, event);
3256                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3257         }
3258
3259 out:
3260         ring_buffer_nest_end(buffer);
3261 out_put:
3262         put_trace_buf();
3263
3264 out_nobuffer:
3265         preempt_enable_notrace();
3266         unpause_graph_tracing();
3267
3268         return len;
3269 }
3270 EXPORT_SYMBOL_GPL(trace_vbprintk);
3271
3272 __printf(3, 0)
3273 static int
3274 __trace_array_vprintk(struct trace_buffer *buffer,
3275                       unsigned long ip, const char *fmt, va_list args)
3276 {
3277         struct trace_event_call *call = &event_print;
3278         struct ring_buffer_event *event;
3279         int len = 0, size, pc;
3280         struct print_entry *entry;
3281         unsigned long flags;
3282         char *tbuffer;
3283
3284         if (tracing_disabled || tracing_selftest_running)
3285                 return 0;
3286
3287         /* Don't pollute graph traces with trace_vprintk internals */
3288         pause_graph_tracing();
3289
3290         pc = preempt_count();
3291         preempt_disable_notrace();
3292
3293
3294         tbuffer = get_trace_buf();
3295         if (!tbuffer) {
3296                 len = 0;
3297                 goto out_nobuffer;
3298         }
3299
3300         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3301
3302         local_save_flags(flags);
3303         size = sizeof(*entry) + len + 1;
3304         ring_buffer_nest_start(buffer);
3305         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3306                                             flags, pc);
3307         if (!event)
3308                 goto out;
3309         entry = ring_buffer_event_data(event);
3310         entry->ip = ip;
3311
3312         memcpy(&entry->buf, tbuffer, len + 1);
3313         if (!call_filter_check_discard(call, entry, buffer, event)) {
3314                 __buffer_unlock_commit(buffer, event);
3315                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3316         }
3317
3318 out:
3319         ring_buffer_nest_end(buffer);
3320         put_trace_buf();
3321
3322 out_nobuffer:
3323         preempt_enable_notrace();
3324         unpause_graph_tracing();
3325
3326         return len;
3327 }
3328
3329 __printf(3, 0)
3330 int trace_array_vprintk(struct trace_array *tr,
3331                         unsigned long ip, const char *fmt, va_list args)
3332 {
3333         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3334 }
3335
3336 /**
3337  * trace_array_printk - Print a message to a specific instance
3338  * @tr: The instance trace_array descriptor
3339  * @ip: The instruction pointer that this is called from.
3340  * @fmt: The format to print (printf format)
3341  *
3342  * If a subsystem sets up its own instance, they have the right to
3343  * printk strings into their tracing instance buffer using this
3344  * function. Note, this function will not write into the top level
3345  * buffer (use trace_printk() for that), as writing into the top level
3346  * buffer should only have events that can be individually disabled.
3347  * trace_printk() is only used for debugging a kernel, and should not
3348  * be ever encorporated in normal use.
3349  *
3350  * trace_array_printk() can be used, as it will not add noise to the
3351  * top level tracing buffer.
3352  *
3353  * Note, trace_array_init_printk() must be called on @tr before this
3354  * can be used.
3355  */
3356 __printf(3, 0)
3357 int trace_array_printk(struct trace_array *tr,
3358                        unsigned long ip, const char *fmt, ...)
3359 {
3360         int ret;
3361         va_list ap;
3362
3363         if (!tr)
3364                 return -ENOENT;
3365
3366         /* This is only allowed for created instances */
3367         if (tr == &global_trace)
3368                 return 0;
3369
3370         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3371                 return 0;
3372
3373         va_start(ap, fmt);
3374         ret = trace_array_vprintk(tr, ip, fmt, ap);
3375         va_end(ap);
3376         return ret;
3377 }
3378 EXPORT_SYMBOL_GPL(trace_array_printk);
3379
3380 /**
3381  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3382  * @tr: The trace array to initialize the buffers for
3383  *
3384  * As trace_array_printk() only writes into instances, they are OK to
3385  * have in the kernel (unlike trace_printk()). This needs to be called
3386  * before trace_array_printk() can be used on a trace_array.
3387  */
3388 int trace_array_init_printk(struct trace_array *tr)
3389 {
3390         if (!tr)
3391                 return -ENOENT;
3392
3393         /* This is only allowed for created instances */
3394         if (tr == &global_trace)
3395                 return -EINVAL;
3396
3397         return alloc_percpu_trace_buffer();
3398 }
3399 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3400
3401 __printf(3, 4)
3402 int trace_array_printk_buf(struct trace_buffer *buffer,
3403                            unsigned long ip, const char *fmt, ...)
3404 {
3405         int ret;
3406         va_list ap;
3407
3408         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3409                 return 0;
3410
3411         va_start(ap, fmt);
3412         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3413         va_end(ap);
3414         return ret;
3415 }
3416
3417 __printf(2, 0)
3418 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3419 {
3420         return trace_array_vprintk(&global_trace, ip, fmt, args);
3421 }
3422 EXPORT_SYMBOL_GPL(trace_vprintk);
3423
3424 static void trace_iterator_increment(struct trace_iterator *iter)
3425 {
3426         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3427
3428         iter->idx++;
3429         if (buf_iter)
3430                 ring_buffer_iter_advance(buf_iter);
3431 }
3432
3433 static struct trace_entry *
3434 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3435                 unsigned long *lost_events)
3436 {
3437         struct ring_buffer_event *event;
3438         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3439
3440         if (buf_iter) {
3441                 event = ring_buffer_iter_peek(buf_iter, ts);
3442                 if (lost_events)
3443                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3444                                 (unsigned long)-1 : 0;
3445         } else {
3446                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3447                                          lost_events);
3448         }
3449
3450         if (event) {
3451                 iter->ent_size = ring_buffer_event_length(event);
3452                 return ring_buffer_event_data(event);
3453         }
3454         iter->ent_size = 0;
3455         return NULL;
3456 }
3457
3458 static struct trace_entry *
3459 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3460                   unsigned long *missing_events, u64 *ent_ts)
3461 {
3462         struct trace_buffer *buffer = iter->array_buffer->buffer;
3463         struct trace_entry *ent, *next = NULL;
3464         unsigned long lost_events = 0, next_lost = 0;
3465         int cpu_file = iter->cpu_file;
3466         u64 next_ts = 0, ts;
3467         int next_cpu = -1;
3468         int next_size = 0;
3469         int cpu;
3470
3471         /*
3472          * If we are in a per_cpu trace file, don't bother by iterating over
3473          * all cpu and peek directly.
3474          */
3475         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3476                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3477                         return NULL;
3478                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3479                 if (ent_cpu)
3480                         *ent_cpu = cpu_file;
3481
3482                 return ent;
3483         }
3484
3485         for_each_tracing_cpu(cpu) {
3486
3487                 if (ring_buffer_empty_cpu(buffer, cpu))
3488                         continue;
3489
3490                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3491
3492                 /*
3493                  * Pick the entry with the smallest timestamp:
3494                  */
3495                 if (ent && (!next || ts < next_ts)) {
3496                         next = ent;
3497                         next_cpu = cpu;
3498                         next_ts = ts;
3499                         next_lost = lost_events;
3500                         next_size = iter->ent_size;
3501                 }
3502         }
3503
3504         iter->ent_size = next_size;
3505
3506         if (ent_cpu)
3507                 *ent_cpu = next_cpu;
3508
3509         if (ent_ts)
3510                 *ent_ts = next_ts;
3511
3512         if (missing_events)
3513                 *missing_events = next_lost;
3514
3515         return next;
3516 }
3517
3518 #define STATIC_TEMP_BUF_SIZE    128
3519 static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
3520
3521 /* Find the next real entry, without updating the iterator itself */
3522 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3523                                           int *ent_cpu, u64 *ent_ts)
3524 {
3525         /* __find_next_entry will reset ent_size */
3526         int ent_size = iter->ent_size;
3527         struct trace_entry *entry;
3528
3529         /*
3530          * If called from ftrace_dump(), then the iter->temp buffer
3531          * will be the static_temp_buf and not created from kmalloc.
3532          * If the entry size is greater than the buffer, we can
3533          * not save it. Just return NULL in that case. This is only
3534          * used to add markers when two consecutive events' time
3535          * stamps have a large delta. See trace_print_lat_context()
3536          */
3537         if (iter->temp == static_temp_buf &&
3538             STATIC_TEMP_BUF_SIZE < ent_size)
3539                 return NULL;
3540
3541         /*
3542          * The __find_next_entry() may call peek_next_entry(), which may
3543          * call ring_buffer_peek() that may make the contents of iter->ent
3544          * undefined. Need to copy iter->ent now.
3545          */
3546         if (iter->ent && iter->ent != iter->temp) {
3547                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3548                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3549                         kfree(iter->temp);
3550                         iter->temp = kmalloc(iter->ent_size, GFP_KERNEL);
3551                         if (!iter->temp)
3552                                 return NULL;
3553                 }
3554                 memcpy(iter->temp, iter->ent, iter->ent_size);
3555                 iter->temp_size = iter->ent_size;
3556                 iter->ent = iter->temp;
3557         }
3558         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3559         /* Put back the original ent_size */
3560         iter->ent_size = ent_size;
3561
3562         return entry;
3563 }
3564
3565 /* Find the next real entry, and increment the iterator to the next entry */
3566 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3567 {
3568         iter->ent = __find_next_entry(iter, &iter->cpu,
3569                                       &iter->lost_events, &iter->ts);
3570
3571         if (iter->ent)
3572                 trace_iterator_increment(iter);
3573
3574         return iter->ent ? iter : NULL;
3575 }
3576
3577 static void trace_consume(struct trace_iterator *iter)
3578 {
3579         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3580                             &iter->lost_events);
3581 }
3582
3583 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3584 {
3585         struct trace_iterator *iter = m->private;
3586         int i = (int)*pos;
3587         void *ent;
3588
3589         WARN_ON_ONCE(iter->leftover);
3590
3591         (*pos)++;
3592
3593         /* can't go backwards */
3594         if (iter->idx > i)
3595                 return NULL;
3596
3597         if (iter->idx < 0)
3598                 ent = trace_find_next_entry_inc(iter);
3599         else
3600                 ent = iter;
3601
3602         while (ent && iter->idx < i)
3603                 ent = trace_find_next_entry_inc(iter);
3604
3605         iter->pos = *pos;
3606
3607         return ent;
3608 }
3609
3610 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3611 {
3612         struct ring_buffer_iter *buf_iter;
3613         unsigned long entries = 0;
3614         u64 ts;
3615
3616         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3617
3618         buf_iter = trace_buffer_iter(iter, cpu);
3619         if (!buf_iter)
3620                 return;
3621
3622         ring_buffer_iter_reset(buf_iter);
3623
3624         /*
3625          * We could have the case with the max latency tracers
3626          * that a reset never took place on a cpu. This is evident
3627          * by the timestamp being before the start of the buffer.
3628          */
3629         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3630                 if (ts >= iter->array_buffer->time_start)
3631                         break;
3632                 entries++;
3633                 ring_buffer_iter_advance(buf_iter);
3634         }
3635
3636         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3637 }
3638
3639 /*
3640  * The current tracer is copied to avoid a global locking
3641  * all around.
3642  */
3643 static void *s_start(struct seq_file *m, loff_t *pos)
3644 {
3645         struct trace_iterator *iter = m->private;
3646         struct trace_array *tr = iter->tr;
3647         int cpu_file = iter->cpu_file;
3648         void *p = NULL;
3649         loff_t l = 0;
3650         int cpu;
3651
3652         /*
3653          * copy the tracer to avoid using a global lock all around.
3654          * iter->trace is a copy of current_trace, the pointer to the
3655          * name may be used instead of a strcmp(), as iter->trace->name
3656          * will point to the same string as current_trace->name.
3657          */
3658         mutex_lock(&trace_types_lock);
3659         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3660                 *iter->trace = *tr->current_trace;
3661         mutex_unlock(&trace_types_lock);
3662
3663 #ifdef CONFIG_TRACER_MAX_TRACE
3664         if (iter->snapshot && iter->trace->use_max_tr)
3665                 return ERR_PTR(-EBUSY);
3666 #endif
3667
3668         if (!iter->snapshot)
3669                 atomic_inc(&trace_record_taskinfo_disabled);
3670
3671         if (*pos != iter->pos) {
3672                 iter->ent = NULL;
3673                 iter->cpu = 0;
3674                 iter->idx = -1;
3675
3676                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3677                         for_each_tracing_cpu(cpu)
3678                                 tracing_iter_reset(iter, cpu);
3679                 } else
3680                         tracing_iter_reset(iter, cpu_file);
3681
3682                 iter->leftover = 0;
3683                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3684                         ;
3685
3686         } else {
3687                 /*
3688                  * If we overflowed the seq_file before, then we want
3689                  * to just reuse the trace_seq buffer again.
3690                  */
3691                 if (iter->leftover)
3692                         p = iter;
3693                 else {
3694                         l = *pos - 1;
3695                         p = s_next(m, p, &l);
3696                 }
3697         }
3698
3699         trace_event_read_lock();
3700         trace_access_lock(cpu_file);
3701         return p;
3702 }
3703
3704 static void s_stop(struct seq_file *m, void *p)
3705 {
3706         struct trace_iterator *iter = m->private;
3707
3708 #ifdef CONFIG_TRACER_MAX_TRACE
3709         if (iter->snapshot && iter->trace->use_max_tr)
3710                 return;
3711 #endif
3712
3713         if (!iter->snapshot)
3714                 atomic_dec(&trace_record_taskinfo_disabled);
3715
3716         trace_access_unlock(iter->cpu_file);
3717         trace_event_read_unlock();
3718 }
3719
3720 static void
3721 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3722                       unsigned long *entries, int cpu)
3723 {
3724         unsigned long count;
3725
3726         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3727         /*
3728          * If this buffer has skipped entries, then we hold all
3729          * entries for the trace and we need to ignore the
3730          * ones before the time stamp.
3731          */
3732         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3733                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3734                 /* total is the same as the entries */
3735                 *total = count;
3736         } else
3737                 *total = count +
3738                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3739         *entries = count;
3740 }
3741
3742 static void
3743 get_total_entries(struct array_buffer *buf,
3744                   unsigned long *total, unsigned long *entries)
3745 {
3746         unsigned long t, e;
3747         int cpu;
3748
3749         *total = 0;
3750         *entries = 0;
3751
3752         for_each_tracing_cpu(cpu) {
3753                 get_total_entries_cpu(buf, &t, &e, cpu);
3754                 *total += t;
3755                 *entries += e;
3756         }
3757 }
3758
3759 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3760 {
3761         unsigned long total, entries;
3762
3763         if (!tr)
3764                 tr = &global_trace;
3765
3766         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3767
3768         return entries;
3769 }
3770
3771 unsigned long trace_total_entries(struct trace_array *tr)
3772 {
3773         unsigned long total, entries;
3774
3775         if (!tr)
3776                 tr = &global_trace;
3777
3778         get_total_entries(&tr->array_buffer, &total, &entries);
3779
3780         return entries;
3781 }
3782
3783 static void print_lat_help_header(struct seq_file *m)
3784 {
3785         seq_puts(m, "#                    _------=> CPU#            \n"
3786                     "#                   / _-----=> irqs-off        \n"
3787                     "#                  | / _----=> need-resched    \n"
3788                     "#                  || / _---=> hardirq/softirq \n"
3789                     "#                  ||| / _--=> preempt-depth   \n"
3790                     "#                  |||| /     delay            \n"
3791                     "#  cmd     pid     ||||| time  |   caller      \n"
3792                     "#     \\   /        |||||  \\    |   /         \n");
3793 }
3794
3795 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3796 {
3797         unsigned long total;
3798         unsigned long entries;
3799
3800         get_total_entries(buf, &total, &entries);
3801         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3802                    entries, total, num_online_cpus());
3803         seq_puts(m, "#\n");
3804 }
3805
3806 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3807                                    unsigned int flags)
3808 {
3809         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3810
3811         print_event_info(buf, m);
3812
3813         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
3814         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
3815 }
3816
3817 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3818                                        unsigned int flags)
3819 {
3820         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3821         const char *space = "            ";
3822         int prec = tgid ? 12 : 2;
3823
3824         print_event_info(buf, m);
3825
3826         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
3827         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
3828         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
3829         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
3830         seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
3831         seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
3832         seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
3833 }
3834
3835 void
3836 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3837 {
3838         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3839         struct array_buffer *buf = iter->array_buffer;
3840         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3841         struct tracer *type = iter->trace;
3842         unsigned long entries;
3843         unsigned long total;
3844         const char *name = "preemption";
3845
3846         name = type->name;
3847
3848         get_total_entries(buf, &total, &entries);
3849
3850         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3851                    name, UTS_RELEASE);
3852         seq_puts(m, "# -----------------------------------"
3853                  "---------------------------------\n");
3854         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3855                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3856                    nsecs_to_usecs(data->saved_latency),
3857                    entries,
3858                    total,
3859                    buf->cpu,
3860 #if defined(CONFIG_PREEMPT_NONE)
3861                    "server",
3862 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3863                    "desktop",
3864 #elif defined(CONFIG_PREEMPT)
3865                    "preempt",
3866 #elif defined(CONFIG_PREEMPT_RT)
3867                    "preempt_rt",
3868 #else
3869                    "unknown",
3870 #endif
3871                    /* These are reserved for later use */
3872                    0, 0, 0, 0);
3873 #ifdef CONFIG_SMP
3874         seq_printf(m, " #P:%d)\n", num_online_cpus());
3875 #else
3876         seq_puts(m, ")\n");
3877 #endif
3878         seq_puts(m, "#    -----------------\n");
3879         seq_printf(m, "#    | task: %.16s-%d "
3880                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3881                    data->comm, data->pid,
3882                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3883                    data->policy, data->rt_priority);
3884         seq_puts(m, "#    -----------------\n");
3885
3886         if (data->critical_start) {
3887                 seq_puts(m, "#  => started at: ");
3888                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3889                 trace_print_seq(m, &iter->seq);
3890                 seq_puts(m, "\n#  => ended at:   ");
3891                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3892                 trace_print_seq(m, &iter->seq);
3893                 seq_puts(m, "\n#\n");
3894         }
3895
3896         seq_puts(m, "#\n");
3897 }
3898
3899 static void test_cpu_buff_start(struct trace_iterator *iter)
3900 {
3901         struct trace_seq *s = &iter->seq;
3902         struct trace_array *tr = iter->tr;
3903
3904         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3905                 return;
3906
3907         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3908                 return;
3909
3910         if (cpumask_available(iter->started) &&
3911             cpumask_test_cpu(iter->cpu, iter->started))
3912                 return;
3913
3914         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3915                 return;
3916
3917         if (cpumask_available(iter->started))
3918                 cpumask_set_cpu(iter->cpu, iter->started);
3919
3920         /* Don't print started cpu buffer for the first entry of the trace */
3921         if (iter->idx > 1)
3922                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3923                                 iter->cpu);
3924 }
3925
3926 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3927 {
3928         struct trace_array *tr = iter->tr;
3929         struct trace_seq *s = &iter->seq;
3930         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3931         struct trace_entry *entry;
3932         struct trace_event *event;
3933
3934         entry = iter->ent;
3935
3936         test_cpu_buff_start(iter);
3937
3938         event = ftrace_find_event(entry->type);
3939
3940         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3941                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3942                         trace_print_lat_context(iter);
3943                 else
3944                         trace_print_context(iter);
3945         }
3946
3947         if (trace_seq_has_overflowed(s))
3948                 return TRACE_TYPE_PARTIAL_LINE;
3949
3950         if (event)
3951                 return event->funcs->trace(iter, sym_flags, event);
3952
3953         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3954
3955         return trace_handle_return(s);
3956 }
3957
3958 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3959 {
3960         struct trace_array *tr = iter->tr;
3961         struct trace_seq *s = &iter->seq;
3962         struct trace_entry *entry;
3963         struct trace_event *event;
3964
3965         entry = iter->ent;
3966
3967         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3968                 trace_seq_printf(s, "%d %d %llu ",
3969                                  entry->pid, iter->cpu, iter->ts);
3970
3971         if (trace_seq_has_overflowed(s))
3972                 return TRACE_TYPE_PARTIAL_LINE;
3973
3974         event = ftrace_find_event(entry->type);
3975         if (event)
3976                 return event->funcs->raw(iter, 0, event);
3977
3978         trace_seq_printf(s, "%d ?\n", entry->type);
3979
3980         return trace_handle_return(s);
3981 }
3982
3983 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3984 {
3985         struct trace_array *tr = iter->tr;
3986         struct trace_seq *s = &iter->seq;
3987         unsigned char newline = '\n';
3988         struct trace_entry *entry;
3989         struct trace_event *event;
3990
3991         entry = iter->ent;
3992
3993         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3994                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3995                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3996                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3997                 if (trace_seq_has_overflowed(s))
3998                         return TRACE_TYPE_PARTIAL_LINE;
3999         }
4000
4001         event = ftrace_find_event(entry->type);
4002         if (event) {
4003                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4004                 if (ret != TRACE_TYPE_HANDLED)
4005                         return ret;
4006         }
4007
4008         SEQ_PUT_FIELD(s, newline);
4009
4010         return trace_handle_return(s);
4011 }
4012
4013 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4014 {
4015         struct trace_array *tr = iter->tr;
4016         struct trace_seq *s = &iter->seq;
4017         struct trace_entry *entry;
4018         struct trace_event *event;
4019
4020         entry = iter->ent;
4021
4022         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4023                 SEQ_PUT_FIELD(s, entry->pid);
4024                 SEQ_PUT_FIELD(s, iter->cpu);
4025                 SEQ_PUT_FIELD(s, iter->ts);
4026                 if (trace_seq_has_overflowed(s))
4027                         return TRACE_TYPE_PARTIAL_LINE;
4028         }
4029
4030         event = ftrace_find_event(entry->type);
4031         return event ? event->funcs->binary(iter, 0, event) :
4032                 TRACE_TYPE_HANDLED;
4033 }
4034
4035 int trace_empty(struct trace_iterator *iter)
4036 {
4037         struct ring_buffer_iter *buf_iter;
4038         int cpu;
4039
4040         /* If we are looking at one CPU buffer, only check that one */
4041         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4042                 cpu = iter->cpu_file;
4043                 buf_iter = trace_buffer_iter(iter, cpu);
4044                 if (buf_iter) {
4045                         if (!ring_buffer_iter_empty(buf_iter))
4046                                 return 0;
4047                 } else {
4048                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4049                                 return 0;
4050                 }
4051                 return 1;
4052         }
4053
4054         for_each_tracing_cpu(cpu) {
4055                 buf_iter = trace_buffer_iter(iter, cpu);
4056                 if (buf_iter) {
4057                         if (!ring_buffer_iter_empty(buf_iter))
4058                                 return 0;
4059                 } else {
4060                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4061                                 return 0;
4062                 }
4063         }
4064
4065         return 1;
4066 }
4067
4068 /*  Called with trace_event_read_lock() held. */
4069 enum print_line_t print_trace_line(struct trace_iterator *iter)
4070 {
4071         struct trace_array *tr = iter->tr;
4072         unsigned long trace_flags = tr->trace_flags;
4073         enum print_line_t ret;
4074
4075         if (iter->lost_events) {
4076                 if (iter->lost_events == (unsigned long)-1)
4077                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4078                                          iter->cpu);
4079                 else
4080                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4081                                          iter->cpu, iter->lost_events);
4082                 if (trace_seq_has_overflowed(&iter->seq))
4083                         return TRACE_TYPE_PARTIAL_LINE;
4084         }
4085
4086         if (iter->trace && iter->trace->print_line) {
4087                 ret = iter->trace->print_line(iter);
4088                 if (ret != TRACE_TYPE_UNHANDLED)
4089                         return ret;
4090         }
4091
4092         if (iter->ent->type == TRACE_BPUTS &&
4093                         trace_flags & TRACE_ITER_PRINTK &&
4094                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4095                 return trace_print_bputs_msg_only(iter);
4096
4097         if (iter->ent->type == TRACE_BPRINT &&
4098                         trace_flags & TRACE_ITER_PRINTK &&
4099                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4100                 return trace_print_bprintk_msg_only(iter);
4101
4102         if (iter->ent->type == TRACE_PRINT &&
4103                         trace_flags & TRACE_ITER_PRINTK &&
4104                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4105                 return trace_print_printk_msg_only(iter);
4106
4107         if (trace_flags & TRACE_ITER_BIN)
4108                 return print_bin_fmt(iter);
4109
4110         if (trace_flags & TRACE_ITER_HEX)
4111                 return print_hex_fmt(iter);
4112
4113         if (trace_flags & TRACE_ITER_RAW)
4114                 return print_raw_fmt(iter);
4115
4116         return print_trace_fmt(iter);
4117 }
4118
4119 void trace_latency_header(struct seq_file *m)
4120 {
4121         struct trace_iterator *iter = m->private;
4122         struct trace_array *tr = iter->tr;
4123
4124         /* print nothing if the buffers are empty */
4125         if (trace_empty(iter))
4126                 return;
4127
4128         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4129                 print_trace_header(m, iter);
4130
4131         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4132                 print_lat_help_header(m);
4133 }
4134
4135 void trace_default_header(struct seq_file *m)
4136 {
4137         struct trace_iterator *iter = m->private;
4138         struct trace_array *tr = iter->tr;
4139         unsigned long trace_flags = tr->trace_flags;
4140
4141         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4142                 return;
4143
4144         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4145                 /* print nothing if the buffers are empty */
4146                 if (trace_empty(iter))
4147                         return;
4148                 print_trace_header(m, iter);
4149                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4150                         print_lat_help_header(m);
4151         } else {
4152                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4153                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4154                                 print_func_help_header_irq(iter->array_buffer,
4155                                                            m, trace_flags);
4156                         else
4157                                 print_func_help_header(iter->array_buffer, m,
4158                                                        trace_flags);
4159                 }
4160         }
4161 }
4162
4163 static void test_ftrace_alive(struct seq_file *m)
4164 {
4165         if (!ftrace_is_dead())
4166                 return;
4167         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4168                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4169 }
4170
4171 #ifdef CONFIG_TRACER_MAX_TRACE
4172 static void show_snapshot_main_help(struct seq_file *m)
4173 {
4174         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4175                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4176                     "#                      Takes a snapshot of the main buffer.\n"
4177                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4178                     "#                      (Doesn't have to be '2' works with any number that\n"
4179                     "#                       is not a '0' or '1')\n");
4180 }
4181
4182 static void show_snapshot_percpu_help(struct seq_file *m)
4183 {
4184         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4185 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4186         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4187                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4188 #else
4189         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4190                     "#                     Must use main snapshot file to allocate.\n");
4191 #endif
4192         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4193                     "#                      (Doesn't have to be '2' works with any number that\n"
4194                     "#                       is not a '0' or '1')\n");
4195 }
4196
4197 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4198 {
4199         if (iter->tr->allocated_snapshot)
4200                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4201         else
4202                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4203
4204         seq_puts(m, "# Snapshot commands:\n");
4205         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4206                 show_snapshot_main_help(m);
4207         else
4208                 show_snapshot_percpu_help(m);
4209 }
4210 #else
4211 /* Should never be called */
4212 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4213 #endif
4214
4215 static int s_show(struct seq_file *m, void *v)
4216 {
4217         struct trace_iterator *iter = v;
4218         int ret;
4219
4220         if (iter->ent == NULL) {
4221                 if (iter->tr) {
4222                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4223                         seq_puts(m, "#\n");
4224                         test_ftrace_alive(m);
4225                 }
4226                 if (iter->snapshot && trace_empty(iter))
4227                         print_snapshot_help(m, iter);
4228                 else if (iter->trace && iter->trace->print_header)
4229                         iter->trace->print_header(m);
4230                 else
4231                         trace_default_header(m);
4232
4233         } else if (iter->leftover) {
4234                 /*
4235                  * If we filled the seq_file buffer earlier, we
4236                  * want to just show it now.
4237                  */
4238                 ret = trace_print_seq(m, &iter->seq);
4239
4240                 /* ret should this time be zero, but you never know */
4241                 iter->leftover = ret;
4242
4243         } else {
4244                 print_trace_line(iter);
4245                 ret = trace_print_seq(m, &iter->seq);
4246                 /*
4247                  * If we overflow the seq_file buffer, then it will
4248                  * ask us for this data again at start up.
4249                  * Use that instead.
4250                  *  ret is 0 if seq_file write succeeded.
4251                  *        -1 otherwise.
4252                  */
4253                 iter->leftover = ret;
4254         }
4255
4256         return 0;
4257 }
4258
4259 /*
4260  * Should be used after trace_array_get(), trace_types_lock
4261  * ensures that i_cdev was already initialized.
4262  */
4263 static inline int tracing_get_cpu(struct inode *inode)
4264 {
4265         if (inode->i_cdev) /* See trace_create_cpu_file() */
4266                 return (long)inode->i_cdev - 1;
4267         return RING_BUFFER_ALL_CPUS;
4268 }
4269
4270 static const struct seq_operations tracer_seq_ops = {
4271         .start          = s_start,
4272         .next           = s_next,
4273         .stop           = s_stop,
4274         .show           = s_show,
4275 };
4276
4277 static struct trace_iterator *
4278 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4279 {
4280         struct trace_array *tr = inode->i_private;
4281         struct trace_iterator *iter;
4282         int cpu;
4283
4284         if (tracing_disabled)
4285                 return ERR_PTR(-ENODEV);
4286
4287         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4288         if (!iter)
4289                 return ERR_PTR(-ENOMEM);
4290
4291         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4292                                     GFP_KERNEL);
4293         if (!iter->buffer_iter)
4294                 goto release;
4295
4296         /*
4297          * trace_find_next_entry() may need to save off iter->ent.
4298          * It will place it into the iter->temp buffer. As most
4299          * events are less than 128, allocate a buffer of that size.
4300          * If one is greater, then trace_find_next_entry() will
4301          * allocate a new buffer to adjust for the bigger iter->ent.
4302          * It's not critical if it fails to get allocated here.
4303          */
4304         iter->temp = kmalloc(128, GFP_KERNEL);
4305         if (iter->temp)
4306                 iter->temp_size = 128;
4307
4308         /*
4309          * We make a copy of the current tracer to avoid concurrent
4310          * changes on it while we are reading.
4311          */
4312         mutex_lock(&trace_types_lock);
4313         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4314         if (!iter->trace)
4315                 goto fail;
4316
4317         *iter->trace = *tr->current_trace;
4318
4319         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4320                 goto fail;
4321
4322         iter->tr = tr;
4323
4324 #ifdef CONFIG_TRACER_MAX_TRACE
4325         /* Currently only the top directory has a snapshot */
4326         if (tr->current_trace->print_max || snapshot)
4327                 iter->array_buffer = &tr->max_buffer;
4328         else
4329 #endif
4330                 iter->array_buffer = &tr->array_buffer;
4331         iter->snapshot = snapshot;
4332         iter->pos = -1;
4333         iter->cpu_file = tracing_get_cpu(inode);
4334         mutex_init(&iter->mutex);
4335
4336         /* Notify the tracer early; before we stop tracing. */
4337         if (iter->trace->open)
4338                 iter->trace->open(iter);
4339
4340         /* Annotate start of buffers if we had overruns */
4341         if (ring_buffer_overruns(iter->array_buffer->buffer))
4342                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4343
4344         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4345         if (trace_clocks[tr->clock_id].in_ns)
4346                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4347
4348         /*
4349          * If pause-on-trace is enabled, then stop the trace while
4350          * dumping, unless this is the "snapshot" file
4351          */
4352         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4353                 tracing_stop_tr(tr);
4354
4355         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4356                 for_each_tracing_cpu(cpu) {
4357                         iter->buffer_iter[cpu] =
4358                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4359                                                          cpu, GFP_KERNEL);
4360                 }
4361                 ring_buffer_read_prepare_sync();
4362                 for_each_tracing_cpu(cpu) {
4363                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4364                         tracing_iter_reset(iter, cpu);
4365                 }
4366         } else {
4367                 cpu = iter->cpu_file;
4368                 iter->buffer_iter[cpu] =
4369                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4370                                                  cpu, GFP_KERNEL);
4371                 ring_buffer_read_prepare_sync();
4372                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4373                 tracing_iter_reset(iter, cpu);
4374         }
4375
4376         mutex_unlock(&trace_types_lock);
4377
4378         return iter;
4379
4380  fail:
4381         mutex_unlock(&trace_types_lock);
4382         kfree(iter->trace);
4383         kfree(iter->temp);
4384         kfree(iter->buffer_iter);
4385 release:
4386         seq_release_private(inode, file);
4387         return ERR_PTR(-ENOMEM);
4388 }
4389
4390 int tracing_open_generic(struct inode *inode, struct file *filp)
4391 {
4392         int ret;
4393
4394         ret = tracing_check_open_get_tr(NULL);
4395         if (ret)
4396                 return ret;
4397
4398         filp->private_data = inode->i_private;
4399         return 0;
4400 }
4401
4402 bool tracing_is_disabled(void)
4403 {
4404         return (tracing_disabled) ? true: false;
4405 }
4406
4407 /*
4408  * Open and update trace_array ref count.
4409  * Must have the current trace_array passed to it.
4410  */
4411 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4412 {
4413         struct trace_array *tr = inode->i_private;
4414         int ret;
4415
4416         ret = tracing_check_open_get_tr(tr);
4417         if (ret)
4418                 return ret;
4419
4420         filp->private_data = inode->i_private;
4421
4422         return 0;
4423 }
4424
4425 static int tracing_release(struct inode *inode, struct file *file)
4426 {
4427         struct trace_array *tr = inode->i_private;
4428         struct seq_file *m = file->private_data;
4429         struct trace_iterator *iter;
4430         int cpu;
4431
4432         if (!(file->f_mode & FMODE_READ)) {
4433                 trace_array_put(tr);
4434                 return 0;
4435         }
4436
4437         /* Writes do not use seq_file */
4438         iter = m->private;
4439         mutex_lock(&trace_types_lock);
4440
4441         for_each_tracing_cpu(cpu) {
4442                 if (iter->buffer_iter[cpu])
4443                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4444         }
4445
4446         if (iter->trace && iter->trace->close)
4447                 iter->trace->close(iter);
4448
4449         if (!iter->snapshot && tr->stop_count)
4450                 /* reenable tracing if it was previously enabled */
4451                 tracing_start_tr(tr);
4452
4453         __trace_array_put(tr);
4454
4455         mutex_unlock(&trace_types_lock);
4456
4457         mutex_destroy(&iter->mutex);
4458         free_cpumask_var(iter->started);
4459         kfree(iter->temp);
4460         kfree(iter->trace);
4461         kfree(iter->buffer_iter);
4462         seq_release_private(inode, file);
4463
4464         return 0;
4465 }
4466
4467 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4468 {
4469         struct trace_array *tr = inode->i_private;
4470
4471         trace_array_put(tr);
4472         return 0;
4473 }
4474
4475 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4476 {
4477         struct trace_array *tr = inode->i_private;
4478
4479         trace_array_put(tr);
4480
4481         return single_release(inode, file);
4482 }
4483
4484 static int tracing_open(struct inode *inode, struct file *file)
4485 {
4486         struct trace_array *tr = inode->i_private;
4487         struct trace_iterator *iter;
4488         int ret;
4489
4490         ret = tracing_check_open_get_tr(tr);
4491         if (ret)
4492                 return ret;
4493
4494         /* If this file was open for write, then erase contents */
4495         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4496                 int cpu = tracing_get_cpu(inode);
4497                 struct array_buffer *trace_buf = &tr->array_buffer;
4498
4499 #ifdef CONFIG_TRACER_MAX_TRACE
4500                 if (tr->current_trace->print_max)
4501                         trace_buf = &tr->max_buffer;
4502 #endif
4503
4504                 if (cpu == RING_BUFFER_ALL_CPUS)
4505                         tracing_reset_online_cpus(trace_buf);
4506                 else
4507                         tracing_reset_cpu(trace_buf, cpu);
4508         }
4509
4510         if (file->f_mode & FMODE_READ) {
4511                 iter = __tracing_open(inode, file, false);
4512                 if (IS_ERR(iter))
4513                         ret = PTR_ERR(iter);
4514                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4515                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4516         }
4517
4518         if (ret < 0)
4519                 trace_array_put(tr);
4520
4521         return ret;
4522 }
4523
4524 /*
4525  * Some tracers are not suitable for instance buffers.
4526  * A tracer is always available for the global array (toplevel)
4527  * or if it explicitly states that it is.
4528  */
4529 static bool
4530 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4531 {
4532         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4533 }
4534
4535 /* Find the next tracer that this trace array may use */
4536 static struct tracer *
4537 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4538 {
4539         while (t && !trace_ok_for_array(t, tr))
4540                 t = t->next;
4541
4542         return t;
4543 }
4544
4545 static void *
4546 t_next(struct seq_file *m, void *v, loff_t *pos)
4547 {
4548         struct trace_array *tr = m->private;
4549         struct tracer *t = v;
4550
4551         (*pos)++;
4552
4553         if (t)
4554                 t = get_tracer_for_array(tr, t->next);
4555
4556         return t;
4557 }
4558
4559 static void *t_start(struct seq_file *m, loff_t *pos)
4560 {
4561         struct trace_array *tr = m->private;
4562         struct tracer *t;
4563         loff_t l = 0;
4564
4565         mutex_lock(&trace_types_lock);
4566
4567         t = get_tracer_for_array(tr, trace_types);
4568         for (; t && l < *pos; t = t_next(m, t, &l))
4569                         ;
4570
4571         return t;
4572 }
4573
4574 static void t_stop(struct seq_file *m, void *p)
4575 {
4576         mutex_unlock(&trace_types_lock);
4577 }
4578
4579 static int t_show(struct seq_file *m, void *v)
4580 {
4581         struct tracer *t = v;
4582
4583         if (!t)
4584                 return 0;
4585
4586         seq_puts(m, t->name);
4587         if (t->next)
4588                 seq_putc(m, ' ');
4589         else
4590                 seq_putc(m, '\n');
4591
4592         return 0;
4593 }
4594
4595 static const struct seq_operations show_traces_seq_ops = {
4596         .start          = t_start,
4597         .next           = t_next,
4598         .stop           = t_stop,
4599         .show           = t_show,
4600 };
4601
4602 static int show_traces_open(struct inode *inode, struct file *file)
4603 {
4604         struct trace_array *tr = inode->i_private;
4605         struct seq_file *m;
4606         int ret;
4607
4608         ret = tracing_check_open_get_tr(tr);
4609         if (ret)
4610                 return ret;
4611
4612         ret = seq_open(file, &show_traces_seq_ops);
4613         if (ret) {
4614                 trace_array_put(tr);
4615                 return ret;
4616         }
4617
4618         m = file->private_data;
4619         m->private = tr;
4620
4621         return 0;
4622 }
4623
4624 static int show_traces_release(struct inode *inode, struct file *file)
4625 {
4626         struct trace_array *tr = inode->i_private;
4627
4628         trace_array_put(tr);
4629         return seq_release(inode, file);
4630 }
4631
4632 static ssize_t
4633 tracing_write_stub(struct file *filp, const char __user *ubuf,
4634                    size_t count, loff_t *ppos)
4635 {
4636         return count;
4637 }
4638
4639 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4640 {
4641         int ret;
4642
4643         if (file->f_mode & FMODE_READ)
4644                 ret = seq_lseek(file, offset, whence);
4645         else
4646                 file->f_pos = ret = 0;
4647
4648         return ret;
4649 }
4650
4651 static const struct file_operations tracing_fops = {
4652         .open           = tracing_open,
4653         .read           = seq_read,
4654         .write          = tracing_write_stub,
4655         .llseek         = tracing_lseek,
4656         .release        = tracing_release,
4657 };
4658
4659 static const struct file_operations show_traces_fops = {
4660         .open           = show_traces_open,
4661         .read           = seq_read,
4662         .llseek         = seq_lseek,
4663         .release        = show_traces_release,
4664 };
4665
4666 static ssize_t
4667 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4668                      size_t count, loff_t *ppos)
4669 {
4670         struct trace_array *tr = file_inode(filp)->i_private;
4671         char *mask_str;
4672         int len;
4673
4674         len = snprintf(NULL, 0, "%*pb\n",
4675                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4676         mask_str = kmalloc(len, GFP_KERNEL);
4677         if (!mask_str)
4678                 return -ENOMEM;
4679
4680         len = snprintf(mask_str, len, "%*pb\n",
4681                        cpumask_pr_args(tr->tracing_cpumask));
4682         if (len >= count) {
4683                 count = -EINVAL;
4684                 goto out_err;
4685         }
4686         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4687
4688 out_err:
4689         kfree(mask_str);
4690
4691         return count;
4692 }
4693
4694 int tracing_set_cpumask(struct trace_array *tr,
4695                         cpumask_var_t tracing_cpumask_new)
4696 {
4697         int cpu;
4698
4699         if (!tr)
4700                 return -EINVAL;
4701
4702         local_irq_disable();
4703         arch_spin_lock(&tr->max_lock);
4704         for_each_tracing_cpu(cpu) {
4705                 /*
4706                  * Increase/decrease the disabled counter if we are
4707                  * about to flip a bit in the cpumask:
4708                  */
4709                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4710                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4711                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4712                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4713                 }
4714                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4715                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4716                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4717                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4718                 }
4719         }
4720         arch_spin_unlock(&tr->max_lock);
4721         local_irq_enable();
4722
4723         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4724
4725         return 0;
4726 }
4727
4728 static ssize_t
4729 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4730                       size_t count, loff_t *ppos)
4731 {
4732         struct trace_array *tr = file_inode(filp)->i_private;
4733         cpumask_var_t tracing_cpumask_new;
4734         int err;
4735
4736         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4737                 return -ENOMEM;
4738
4739         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4740         if (err)
4741                 goto err_free;
4742
4743         err = tracing_set_cpumask(tr, tracing_cpumask_new);
4744         if (err)
4745                 goto err_free;
4746
4747         free_cpumask_var(tracing_cpumask_new);
4748
4749         return count;
4750
4751 err_free:
4752         free_cpumask_var(tracing_cpumask_new);
4753
4754         return err;
4755 }
4756
4757 static const struct file_operations tracing_cpumask_fops = {
4758         .open           = tracing_open_generic_tr,
4759         .read           = tracing_cpumask_read,
4760         .write          = tracing_cpumask_write,
4761         .release        = tracing_release_generic_tr,
4762         .llseek         = generic_file_llseek,
4763 };
4764
4765 static int tracing_trace_options_show(struct seq_file *m, void *v)
4766 {
4767         struct tracer_opt *trace_opts;
4768         struct trace_array *tr = m->private;
4769         u32 tracer_flags;
4770         int i;
4771
4772         mutex_lock(&trace_types_lock);
4773         tracer_flags = tr->current_trace->flags->val;
4774         trace_opts = tr->current_trace->flags->opts;
4775
4776         for (i = 0; trace_options[i]; i++) {
4777                 if (tr->trace_flags & (1 << i))
4778                         seq_printf(m, "%s\n", trace_options[i]);
4779                 else
4780                         seq_printf(m, "no%s\n", trace_options[i]);
4781         }
4782
4783         for (i = 0; trace_opts[i].name; i++) {
4784                 if (tracer_flags & trace_opts[i].bit)
4785                         seq_printf(m, "%s\n", trace_opts[i].name);
4786                 else
4787                         seq_printf(m, "no%s\n", trace_opts[i].name);
4788         }
4789         mutex_unlock(&trace_types_lock);
4790
4791         return 0;
4792 }
4793
4794 static int __set_tracer_option(struct trace_array *tr,
4795                                struct tracer_flags *tracer_flags,
4796                                struct tracer_opt *opts, int neg)
4797 {
4798         struct tracer *trace = tracer_flags->trace;
4799         int ret;
4800
4801         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4802         if (ret)
4803                 return ret;
4804
4805         if (neg)
4806                 tracer_flags->val &= ~opts->bit;
4807         else
4808                 tracer_flags->val |= opts->bit;
4809         return 0;
4810 }
4811
4812 /* Try to assign a tracer specific option */
4813 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4814 {
4815         struct tracer *trace = tr->current_trace;
4816         struct tracer_flags *tracer_flags = trace->flags;
4817         struct tracer_opt *opts = NULL;
4818         int i;
4819
4820         for (i = 0; tracer_flags->opts[i].name; i++) {
4821                 opts = &tracer_flags->opts[i];
4822
4823                 if (strcmp(cmp, opts->name) == 0)
4824                         return __set_tracer_option(tr, trace->flags, opts, neg);
4825         }
4826
4827         return -EINVAL;
4828 }
4829
4830 /* Some tracers require overwrite to stay enabled */
4831 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4832 {
4833         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4834                 return -1;
4835
4836         return 0;
4837 }
4838
4839 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4840 {
4841         if ((mask == TRACE_ITER_RECORD_TGID) ||
4842             (mask == TRACE_ITER_RECORD_CMD))
4843                 lockdep_assert_held(&event_mutex);
4844
4845         /* do nothing if flag is already set */
4846         if (!!(tr->trace_flags & mask) == !!enabled)
4847                 return 0;
4848
4849         /* Give the tracer a chance to approve the change */
4850         if (tr->current_trace->flag_changed)
4851                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4852                         return -EINVAL;
4853
4854         if (enabled)
4855                 tr->trace_flags |= mask;
4856         else
4857                 tr->trace_flags &= ~mask;
4858
4859         if (mask == TRACE_ITER_RECORD_CMD)
4860                 trace_event_enable_cmd_record(enabled);
4861
4862         if (mask == TRACE_ITER_RECORD_TGID) {
4863                 if (!tgid_map)
4864                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4865                                            sizeof(*tgid_map),
4866                                            GFP_KERNEL);
4867                 if (!tgid_map) {
4868                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4869                         return -ENOMEM;
4870                 }
4871
4872                 trace_event_enable_tgid_record(enabled);
4873         }
4874
4875         if (mask == TRACE_ITER_EVENT_FORK)
4876                 trace_event_follow_fork(tr, enabled);
4877
4878         if (mask == TRACE_ITER_FUNC_FORK)
4879                 ftrace_pid_follow_fork(tr, enabled);
4880
4881         if (mask == TRACE_ITER_OVERWRITE) {
4882                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4883 #ifdef CONFIG_TRACER_MAX_TRACE
4884                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4885 #endif
4886         }
4887
4888         if (mask == TRACE_ITER_PRINTK) {
4889                 trace_printk_start_stop_comm(enabled);
4890                 trace_printk_control(enabled);
4891         }
4892
4893         return 0;
4894 }
4895
4896 int trace_set_options(struct trace_array *tr, char *option)
4897 {
4898         char *cmp;
4899         int neg = 0;
4900         int ret;
4901         size_t orig_len = strlen(option);
4902         int len;
4903
4904         cmp = strstrip(option);
4905
4906         len = str_has_prefix(cmp, "no");
4907         if (len)
4908                 neg = 1;
4909
4910         cmp += len;
4911
4912         mutex_lock(&event_mutex);
4913         mutex_lock(&trace_types_lock);
4914
4915         ret = match_string(trace_options, -1, cmp);
4916         /* If no option could be set, test the specific tracer options */
4917         if (ret < 0)
4918                 ret = set_tracer_option(tr, cmp, neg);
4919         else
4920                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4921
4922         mutex_unlock(&trace_types_lock);
4923         mutex_unlock(&event_mutex);
4924
4925         /*
4926          * If the first trailing whitespace is replaced with '\0' by strstrip,
4927          * turn it back into a space.
4928          */
4929         if (orig_len > strlen(option))
4930                 option[strlen(option)] = ' ';
4931
4932         return ret;
4933 }
4934
4935 static void __init apply_trace_boot_options(void)
4936 {
4937         char *buf = trace_boot_options_buf;
4938         char *option;
4939
4940         while (true) {
4941                 option = strsep(&buf, ",");
4942
4943                 if (!option)
4944                         break;
4945
4946                 if (*option)
4947                         trace_set_options(&global_trace, option);
4948
4949                 /* Put back the comma to allow this to be called again */
4950                 if (buf)
4951                         *(buf - 1) = ',';
4952         }
4953 }
4954
4955 static ssize_t
4956 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4957                         size_t cnt, loff_t *ppos)
4958 {
4959         struct seq_file *m = filp->private_data;
4960         struct trace_array *tr = m->private;
4961         char buf[64];
4962         int ret;
4963
4964         if (cnt >= sizeof(buf))
4965                 return -EINVAL;
4966
4967         if (copy_from_user(buf, ubuf, cnt))
4968                 return -EFAULT;
4969
4970         buf[cnt] = 0;
4971
4972         ret = trace_set_options(tr, buf);
4973         if (ret < 0)
4974                 return ret;
4975
4976         *ppos += cnt;
4977
4978         return cnt;
4979 }
4980
4981 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4982 {
4983         struct trace_array *tr = inode->i_private;
4984         int ret;
4985
4986         ret = tracing_check_open_get_tr(tr);
4987         if (ret)
4988                 return ret;
4989
4990         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4991         if (ret < 0)
4992                 trace_array_put(tr);
4993
4994         return ret;
4995 }
4996
4997 static const struct file_operations tracing_iter_fops = {
4998         .open           = tracing_trace_options_open,
4999         .read           = seq_read,
5000         .llseek         = seq_lseek,
5001         .release        = tracing_single_release_tr,
5002         .write          = tracing_trace_options_write,
5003 };
5004
5005 static const char readme_msg[] =
5006         "tracing mini-HOWTO:\n\n"
5007         "# echo 0 > tracing_on : quick way to disable tracing\n"
5008         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5009         " Important files:\n"
5010         "  trace\t\t\t- The static contents of the buffer\n"
5011         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5012         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5013         "  current_tracer\t- function and latency tracers\n"
5014         "  available_tracers\t- list of configured tracers for current_tracer\n"
5015         "  error_log\t- error log for failed commands (that support it)\n"
5016         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5017         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5018         "  trace_clock\t\t-change the clock used to order events\n"
5019         "       local:   Per cpu clock but may not be synced across CPUs\n"
5020         "      global:   Synced across CPUs but slows tracing down.\n"
5021         "     counter:   Not a clock, but just an increment\n"
5022         "      uptime:   Jiffy counter from time of boot\n"
5023         "        perf:   Same clock that perf events use\n"
5024 #ifdef CONFIG_X86_64
5025         "     x86-tsc:   TSC cycle counter\n"
5026 #endif
5027         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5028         "       delta:   Delta difference against a buffer-wide timestamp\n"
5029         "    absolute:   Absolute (standalone) timestamp\n"
5030         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5031         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5032         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5033         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5034         "\t\t\t  Remove sub-buffer with rmdir\n"
5035         "  trace_options\t\t- Set format or modify how tracing happens\n"
5036         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5037         "\t\t\t  option name\n"
5038         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5039 #ifdef CONFIG_DYNAMIC_FTRACE
5040         "\n  available_filter_functions - list of functions that can be filtered on\n"
5041         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5042         "\t\t\t  functions\n"
5043         "\t     accepts: func_full_name or glob-matching-pattern\n"
5044         "\t     modules: Can select a group via module\n"
5045         "\t      Format: :mod:<module-name>\n"
5046         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5047         "\t    triggers: a command to perform when function is hit\n"
5048         "\t      Format: <function>:<trigger>[:count]\n"
5049         "\t     trigger: traceon, traceoff\n"
5050         "\t\t      enable_event:<system>:<event>\n"
5051         "\t\t      disable_event:<system>:<event>\n"
5052 #ifdef CONFIG_STACKTRACE
5053         "\t\t      stacktrace\n"
5054 #endif
5055 #ifdef CONFIG_TRACER_SNAPSHOT
5056         "\t\t      snapshot\n"
5057 #endif
5058         "\t\t      dump\n"
5059         "\t\t      cpudump\n"
5060         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5061         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5062         "\t     The first one will disable tracing every time do_fault is hit\n"
5063         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5064         "\t       The first time do trap is hit and it disables tracing, the\n"
5065         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5066         "\t       the counter will not decrement. It only decrements when the\n"
5067         "\t       trigger did work\n"
5068         "\t     To remove trigger without count:\n"
5069         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5070         "\t     To remove trigger with a count:\n"
5071         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5072         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5073         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5074         "\t    modules: Can select a group via module command :mod:\n"
5075         "\t    Does not accept triggers\n"
5076 #endif /* CONFIG_DYNAMIC_FTRACE */
5077 #ifdef CONFIG_FUNCTION_TRACER
5078         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5079         "\t\t    (function)\n"
5080         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5081         "\t\t    (function)\n"
5082 #endif
5083 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5084         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5085         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5086         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5087 #endif
5088 #ifdef CONFIG_TRACER_SNAPSHOT
5089         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5090         "\t\t\t  snapshot buffer. Read the contents for more\n"
5091         "\t\t\t  information\n"
5092 #endif
5093 #ifdef CONFIG_STACK_TRACER
5094         "  stack_trace\t\t- Shows the max stack trace when active\n"
5095         "  stack_max_size\t- Shows current max stack size that was traced\n"
5096         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5097         "\t\t\t  new trace)\n"
5098 #ifdef CONFIG_DYNAMIC_FTRACE
5099         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5100         "\t\t\t  traces\n"
5101 #endif
5102 #endif /* CONFIG_STACK_TRACER */
5103 #ifdef CONFIG_DYNAMIC_EVENTS
5104         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5105         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5106 #endif
5107 #ifdef CONFIG_KPROBE_EVENTS
5108         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5109         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5110 #endif
5111 #ifdef CONFIG_UPROBE_EVENTS
5112         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5113         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5114 #endif
5115 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5116         "\t  accepts: event-definitions (one definition per line)\n"
5117         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5118         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5119 #ifdef CONFIG_HIST_TRIGGERS
5120         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5121 #endif
5122         "\t           -:[<group>/]<event>\n"
5123 #ifdef CONFIG_KPROBE_EVENTS
5124         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5125   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5126 #endif
5127 #ifdef CONFIG_UPROBE_EVENTS
5128   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5129 #endif
5130         "\t     args: <name>=fetcharg[:type]\n"
5131         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5132 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5133         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5134 #else
5135         "\t           $stack<index>, $stack, $retval, $comm,\n"
5136 #endif
5137         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5138         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5139         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5140         "\t           <type>\\[<array-size>\\]\n"
5141 #ifdef CONFIG_HIST_TRIGGERS
5142         "\t    field: <stype> <name>;\n"
5143         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5144         "\t           [unsigned] char/int/long\n"
5145 #endif
5146 #endif
5147         "  events/\t\t- Directory containing all trace event subsystems:\n"
5148         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5149         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5150         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5151         "\t\t\t  events\n"
5152         "      filter\t\t- If set, only events passing filter are traced\n"
5153         "  events/<system>/<event>/\t- Directory containing control files for\n"
5154         "\t\t\t  <event>:\n"
5155         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5156         "      filter\t\t- If set, only events passing filter are traced\n"
5157         "      trigger\t\t- If set, a command to perform when event is hit\n"
5158         "\t    Format: <trigger>[:count][if <filter>]\n"
5159         "\t   trigger: traceon, traceoff\n"
5160         "\t            enable_event:<system>:<event>\n"
5161         "\t            disable_event:<system>:<event>\n"
5162 #ifdef CONFIG_HIST_TRIGGERS
5163         "\t            enable_hist:<system>:<event>\n"
5164         "\t            disable_hist:<system>:<event>\n"
5165 #endif
5166 #ifdef CONFIG_STACKTRACE
5167         "\t\t    stacktrace\n"
5168 #endif
5169 #ifdef CONFIG_TRACER_SNAPSHOT
5170         "\t\t    snapshot\n"
5171 #endif
5172 #ifdef CONFIG_HIST_TRIGGERS
5173         "\t\t    hist (see below)\n"
5174 #endif
5175         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5176         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5177         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5178         "\t                  events/block/block_unplug/trigger\n"
5179         "\t   The first disables tracing every time block_unplug is hit.\n"
5180         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5181         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5182         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5183         "\t   Like function triggers, the counter is only decremented if it\n"
5184         "\t    enabled or disabled tracing.\n"
5185         "\t   To remove a trigger without a count:\n"
5186         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5187         "\t   To remove a trigger with a count:\n"
5188         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5189         "\t   Filters can be ignored when removing a trigger.\n"
5190 #ifdef CONFIG_HIST_TRIGGERS
5191         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5192         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5193         "\t            [:values=<field1[,field2,...]>]\n"
5194         "\t            [:sort=<field1[,field2,...]>]\n"
5195         "\t            [:size=#entries]\n"
5196         "\t            [:pause][:continue][:clear]\n"
5197         "\t            [:name=histname1]\n"
5198         "\t            [:<handler>.<action>]\n"
5199         "\t            [if <filter>]\n\n"
5200         "\t    When a matching event is hit, an entry is added to a hash\n"
5201         "\t    table using the key(s) and value(s) named, and the value of a\n"
5202         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5203         "\t    correspond to fields in the event's format description.  Keys\n"
5204         "\t    can be any field, or the special string 'stacktrace'.\n"
5205         "\t    Compound keys consisting of up to two fields can be specified\n"
5206         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5207         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5208         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5209         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5210         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5211         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5212         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5213         "\t    its histogram data will be shared with other triggers of the\n"
5214         "\t    same name, and trigger hits will update this common data.\n\n"
5215         "\t    Reading the 'hist' file for the event will dump the hash\n"
5216         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5217         "\t    triggers attached to an event, there will be a table for each\n"
5218         "\t    trigger in the output.  The table displayed for a named\n"
5219         "\t    trigger will be the same as any other instance having the\n"
5220         "\t    same name.  The default format used to display a given field\n"
5221         "\t    can be modified by appending any of the following modifiers\n"
5222         "\t    to the field name, as applicable:\n\n"
5223         "\t            .hex        display a number as a hex value\n"
5224         "\t            .sym        display an address as a symbol\n"
5225         "\t            .sym-offset display an address as a symbol and offset\n"
5226         "\t            .execname   display a common_pid as a program name\n"
5227         "\t            .syscall    display a syscall id as a syscall name\n"
5228         "\t            .log2       display log2 value rather than raw number\n"
5229         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5230         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5231         "\t    trigger or to start a hist trigger but not log any events\n"
5232         "\t    until told to do so.  'continue' can be used to start or\n"
5233         "\t    restart a paused hist trigger.\n\n"
5234         "\t    The 'clear' parameter will clear the contents of a running\n"
5235         "\t    hist trigger and leave its current paused/active state\n"
5236         "\t    unchanged.\n\n"
5237         "\t    The enable_hist and disable_hist triggers can be used to\n"
5238         "\t    have one event conditionally start and stop another event's\n"
5239         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5240         "\t    the enable_event and disable_event triggers.\n\n"
5241         "\t    Hist trigger handlers and actions are executed whenever a\n"
5242         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5243         "\t        <handler>.<action>\n\n"
5244         "\t    The available handlers are:\n\n"
5245         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5246         "\t        onmax(var)               - invoke if var exceeds current max\n"
5247         "\t        onchange(var)            - invoke action if var changes\n\n"
5248         "\t    The available actions are:\n\n"
5249         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5250         "\t        save(field,...)                      - save current event fields\n"
5251 #ifdef CONFIG_TRACER_SNAPSHOT
5252         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5253 #endif
5254 #ifdef CONFIG_SYNTH_EVENTS
5255         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5256         "\t  Write into this file to define/undefine new synthetic events.\n"
5257         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5258 #endif
5259 #endif
5260 ;
5261
5262 static ssize_t
5263 tracing_readme_read(struct file *filp, char __user *ubuf,
5264                        size_t cnt, loff_t *ppos)
5265 {
5266         return simple_read_from_buffer(ubuf, cnt, ppos,
5267                                         readme_msg, strlen(readme_msg));
5268 }
5269
5270 static const struct file_operations tracing_readme_fops = {
5271         .open           = tracing_open_generic,
5272         .read           = tracing_readme_read,
5273         .llseek         = generic_file_llseek,
5274 };
5275
5276 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5277 {
5278         int *ptr = v;
5279
5280         if (*pos || m->count)
5281                 ptr++;
5282
5283         (*pos)++;
5284
5285         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5286                 if (trace_find_tgid(*ptr))
5287                         return ptr;
5288         }
5289
5290         return NULL;
5291 }
5292
5293 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5294 {
5295         void *v;
5296         loff_t l = 0;
5297
5298         if (!tgid_map)
5299                 return NULL;
5300
5301         v = &tgid_map[0];
5302         while (l <= *pos) {
5303                 v = saved_tgids_next(m, v, &l);
5304                 if (!v)
5305                         return NULL;
5306         }
5307
5308         return v;
5309 }
5310
5311 static void saved_tgids_stop(struct seq_file *m, void *v)
5312 {
5313 }
5314
5315 static int saved_tgids_show(struct seq_file *m, void *v)
5316 {
5317         int pid = (int *)v - tgid_map;
5318
5319         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5320         return 0;
5321 }
5322
5323 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5324         .start          = saved_tgids_start,
5325         .stop           = saved_tgids_stop,
5326         .next           = saved_tgids_next,
5327         .show           = saved_tgids_show,
5328 };
5329
5330 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5331 {
5332         int ret;
5333
5334         ret = tracing_check_open_get_tr(NULL);
5335         if (ret)
5336                 return ret;
5337
5338         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5339 }
5340
5341
5342 static const struct file_operations tracing_saved_tgids_fops = {
5343         .open           = tracing_saved_tgids_open,
5344         .read           = seq_read,
5345         .llseek         = seq_lseek,
5346         .release        = seq_release,
5347 };
5348
5349 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5350 {
5351         unsigned int *ptr = v;
5352
5353         if (*pos || m->count)
5354                 ptr++;
5355
5356         (*pos)++;
5357
5358         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5359              ptr++) {
5360                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5361                         continue;
5362
5363                 return ptr;
5364         }
5365
5366         return NULL;
5367 }
5368
5369 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5370 {
5371         void *v;
5372         loff_t l = 0;
5373
5374         preempt_disable();
5375         arch_spin_lock(&trace_cmdline_lock);
5376
5377         v = &savedcmd->map_cmdline_to_pid[0];
5378         while (l <= *pos) {
5379                 v = saved_cmdlines_next(m, v, &l);
5380                 if (!v)
5381                         return NULL;
5382         }
5383
5384         return v;
5385 }
5386
5387 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5388 {
5389         arch_spin_unlock(&trace_cmdline_lock);
5390         preempt_enable();
5391 }
5392
5393 static int saved_cmdlines_show(struct seq_file *m, void *v)
5394 {
5395         char buf[TASK_COMM_LEN];
5396         unsigned int *pid = v;
5397
5398         __trace_find_cmdline(*pid, buf);
5399         seq_printf(m, "%d %s\n", *pid, buf);
5400         return 0;
5401 }
5402
5403 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5404         .start          = saved_cmdlines_start,
5405         .next           = saved_cmdlines_next,
5406         .stop           = saved_cmdlines_stop,
5407         .show           = saved_cmdlines_show,
5408 };
5409
5410 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5411 {
5412         int ret;
5413
5414         ret = tracing_check_open_get_tr(NULL);
5415         if (ret)
5416                 return ret;
5417
5418         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5419 }
5420
5421 static const struct file_operations tracing_saved_cmdlines_fops = {
5422         .open           = tracing_saved_cmdlines_open,
5423         .read           = seq_read,
5424         .llseek         = seq_lseek,
5425         .release        = seq_release,
5426 };
5427
5428 static ssize_t
5429 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5430                                  size_t cnt, loff_t *ppos)
5431 {
5432         char buf[64];
5433         int r;
5434
5435         arch_spin_lock(&trace_cmdline_lock);
5436         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5437         arch_spin_unlock(&trace_cmdline_lock);
5438
5439         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5440 }
5441
5442 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5443 {
5444         kfree(s->saved_cmdlines);
5445         kfree(s->map_cmdline_to_pid);
5446         kfree(s);
5447 }
5448
5449 static int tracing_resize_saved_cmdlines(unsigned int val)
5450 {
5451         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5452
5453         s = kmalloc(sizeof(*s), GFP_KERNEL);
5454         if (!s)
5455                 return -ENOMEM;
5456
5457         if (allocate_cmdlines_buffer(val, s) < 0) {
5458                 kfree(s);
5459                 return -ENOMEM;
5460         }
5461
5462         arch_spin_lock(&trace_cmdline_lock);
5463         savedcmd_temp = savedcmd;
5464         savedcmd = s;
5465         arch_spin_unlock(&trace_cmdline_lock);
5466         free_saved_cmdlines_buffer(savedcmd_temp);
5467
5468         return 0;
5469 }
5470
5471 static ssize_t
5472 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5473                                   size_t cnt, loff_t *ppos)
5474 {
5475         unsigned long val;
5476         int ret;
5477
5478         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5479         if (ret)
5480                 return ret;
5481
5482         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5483         if (!val || val > PID_MAX_DEFAULT)
5484                 return -EINVAL;
5485
5486         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5487         if (ret < 0)
5488                 return ret;
5489
5490         *ppos += cnt;
5491
5492         return cnt;
5493 }
5494
5495 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5496         .open           = tracing_open_generic,
5497         .read           = tracing_saved_cmdlines_size_read,
5498         .write          = tracing_saved_cmdlines_size_write,
5499 };
5500
5501 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5502 static union trace_eval_map_item *
5503 update_eval_map(union trace_eval_map_item *ptr)
5504 {
5505         if (!ptr->map.eval_string) {
5506                 if (ptr->tail.next) {
5507                         ptr = ptr->tail.next;
5508                         /* Set ptr to the next real item (skip head) */
5509                         ptr++;
5510                 } else
5511                         return NULL;
5512         }
5513         return ptr;
5514 }
5515
5516 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5517 {
5518         union trace_eval_map_item *ptr = v;
5519
5520         /*
5521          * Paranoid! If ptr points to end, we don't want to increment past it.
5522          * This really should never happen.
5523          */
5524         (*pos)++;
5525         ptr = update_eval_map(ptr);
5526         if (WARN_ON_ONCE(!ptr))
5527                 return NULL;
5528
5529         ptr++;
5530         ptr = update_eval_map(ptr);
5531
5532         return ptr;
5533 }
5534
5535 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5536 {
5537         union trace_eval_map_item *v;
5538         loff_t l = 0;
5539
5540         mutex_lock(&trace_eval_mutex);
5541
5542         v = trace_eval_maps;
5543         if (v)
5544                 v++;
5545
5546         while (v && l < *pos) {
5547                 v = eval_map_next(m, v, &l);
5548         }
5549
5550         return v;
5551 }
5552
5553 static void eval_map_stop(struct seq_file *m, void *v)
5554 {
5555         mutex_unlock(&trace_eval_mutex);
5556 }
5557
5558 static int eval_map_show(struct seq_file *m, void *v)
5559 {
5560         union trace_eval_map_item *ptr = v;
5561
5562         seq_printf(m, "%s %ld (%s)\n",
5563                    ptr->map.eval_string, ptr->map.eval_value,
5564                    ptr->map.system);
5565
5566         return 0;
5567 }
5568
5569 static const struct seq_operations tracing_eval_map_seq_ops = {
5570         .start          = eval_map_start,
5571         .next           = eval_map_next,
5572         .stop           = eval_map_stop,
5573         .show           = eval_map_show,
5574 };
5575
5576 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5577 {
5578         int ret;
5579
5580         ret = tracing_check_open_get_tr(NULL);
5581         if (ret)
5582                 return ret;
5583
5584         return seq_open(filp, &tracing_eval_map_seq_ops);
5585 }
5586
5587 static const struct file_operations tracing_eval_map_fops = {
5588         .open           = tracing_eval_map_open,
5589         .read           = seq_read,
5590         .llseek         = seq_lseek,
5591         .release        = seq_release,
5592 };
5593
5594 static inline union trace_eval_map_item *
5595 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5596 {
5597         /* Return tail of array given the head */
5598         return ptr + ptr->head.length + 1;
5599 }
5600
5601 static void
5602 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5603                            int len)
5604 {
5605         struct trace_eval_map **stop;
5606         struct trace_eval_map **map;
5607         union trace_eval_map_item *map_array;
5608         union trace_eval_map_item *ptr;
5609
5610         stop = start + len;
5611
5612         /*
5613          * The trace_eval_maps contains the map plus a head and tail item,
5614          * where the head holds the module and length of array, and the
5615          * tail holds a pointer to the next list.
5616          */
5617         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5618         if (!map_array) {
5619                 pr_warn("Unable to allocate trace eval mapping\n");
5620                 return;
5621         }
5622
5623         mutex_lock(&trace_eval_mutex);
5624
5625         if (!trace_eval_maps)
5626                 trace_eval_maps = map_array;
5627         else {
5628                 ptr = trace_eval_maps;
5629                 for (;;) {
5630                         ptr = trace_eval_jmp_to_tail(ptr);
5631                         if (!ptr->tail.next)
5632                                 break;
5633                         ptr = ptr->tail.next;
5634
5635                 }
5636                 ptr->tail.next = map_array;
5637         }
5638         map_array->head.mod = mod;
5639         map_array->head.length = len;
5640         map_array++;
5641
5642         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5643                 map_array->map = **map;
5644                 map_array++;
5645         }
5646         memset(map_array, 0, sizeof(*map_array));
5647
5648         mutex_unlock(&trace_eval_mutex);
5649 }
5650
5651 static void trace_create_eval_file(struct dentry *d_tracer)
5652 {
5653         trace_create_file("eval_map", 0444, d_tracer,
5654                           NULL, &tracing_eval_map_fops);
5655 }
5656
5657 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5658 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5659 static inline void trace_insert_eval_map_file(struct module *mod,
5660                               struct trace_eval_map **start, int len) { }
5661 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5662
5663 static void trace_insert_eval_map(struct module *mod,
5664                                   struct trace_eval_map **start, int len)
5665 {
5666         struct trace_eval_map **map;
5667
5668         if (len <= 0)
5669                 return;
5670
5671         map = start;
5672
5673         trace_event_eval_update(map, len);
5674
5675         trace_insert_eval_map_file(mod, start, len);
5676 }
5677
5678 static ssize_t
5679 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5680                        size_t cnt, loff_t *ppos)
5681 {
5682         struct trace_array *tr = filp->private_data;
5683         char buf[MAX_TRACER_SIZE+2];
5684         int r;
5685
5686         mutex_lock(&trace_types_lock);
5687         r = sprintf(buf, "%s\n", tr->current_trace->name);
5688         mutex_unlock(&trace_types_lock);
5689
5690         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5691 }
5692
5693 int tracer_init(struct tracer *t, struct trace_array *tr)
5694 {
5695         tracing_reset_online_cpus(&tr->array_buffer);
5696         return t->init(tr);
5697 }
5698
5699 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5700 {
5701         int cpu;
5702
5703         for_each_tracing_cpu(cpu)
5704                 per_cpu_ptr(buf->data, cpu)->entries = val;
5705 }
5706
5707 #ifdef CONFIG_TRACER_MAX_TRACE
5708 /* resize @tr's buffer to the size of @size_tr's entries */
5709 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5710                                         struct array_buffer *size_buf, int cpu_id)
5711 {
5712         int cpu, ret = 0;
5713
5714         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5715                 for_each_tracing_cpu(cpu) {
5716                         ret = ring_buffer_resize(trace_buf->buffer,
5717                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5718                         if (ret < 0)
5719                                 break;
5720                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5721                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5722                 }
5723         } else {
5724                 ret = ring_buffer_resize(trace_buf->buffer,
5725                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5726                 if (ret == 0)
5727                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5728                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5729         }
5730
5731         return ret;
5732 }
5733 #endif /* CONFIG_TRACER_MAX_TRACE */
5734
5735 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5736                                         unsigned long size, int cpu)
5737 {
5738         int ret;
5739
5740         /*
5741          * If kernel or user changes the size of the ring buffer
5742          * we use the size that was given, and we can forget about
5743          * expanding it later.
5744          */
5745         ring_buffer_expanded = true;
5746
5747         /* May be called before buffers are initialized */
5748         if (!tr->array_buffer.buffer)
5749                 return 0;
5750
5751         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5752         if (ret < 0)
5753                 return ret;
5754
5755 #ifdef CONFIG_TRACER_MAX_TRACE
5756         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5757             !tr->current_trace->use_max_tr)
5758                 goto out;
5759
5760         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5761         if (ret < 0) {
5762                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5763                                                      &tr->array_buffer, cpu);
5764                 if (r < 0) {
5765                         /*
5766                          * AARGH! We are left with different
5767                          * size max buffer!!!!
5768                          * The max buffer is our "snapshot" buffer.
5769                          * When a tracer needs a snapshot (one of the
5770                          * latency tracers), it swaps the max buffer
5771                          * with the saved snap shot. We succeeded to
5772                          * update the size of the main buffer, but failed to
5773                          * update the size of the max buffer. But when we tried
5774                          * to reset the main buffer to the original size, we
5775                          * failed there too. This is very unlikely to
5776                          * happen, but if it does, warn and kill all
5777                          * tracing.
5778                          */
5779                         WARN_ON(1);
5780                         tracing_disabled = 1;
5781                 }
5782                 return ret;
5783         }
5784
5785         if (cpu == RING_BUFFER_ALL_CPUS)
5786                 set_buffer_entries(&tr->max_buffer, size);
5787         else
5788                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5789
5790  out:
5791 #endif /* CONFIG_TRACER_MAX_TRACE */
5792
5793         if (cpu == RING_BUFFER_ALL_CPUS)
5794                 set_buffer_entries(&tr->array_buffer, size);
5795         else
5796                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5797
5798         return ret;
5799 }
5800
5801 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5802                                   unsigned long size, int cpu_id)
5803 {
5804         int ret = size;
5805
5806         mutex_lock(&trace_types_lock);
5807
5808         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5809                 /* make sure, this cpu is enabled in the mask */
5810                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5811                         ret = -EINVAL;
5812                         goto out;
5813                 }
5814         }
5815
5816         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5817         if (ret < 0)
5818                 ret = -ENOMEM;
5819
5820 out:
5821         mutex_unlock(&trace_types_lock);
5822
5823         return ret;
5824 }
5825
5826
5827 /**
5828  * tracing_update_buffers - used by tracing facility to expand ring buffers
5829  *
5830  * To save on memory when the tracing is never used on a system with it
5831  * configured in. The ring buffers are set to a minimum size. But once
5832  * a user starts to use the tracing facility, then they need to grow
5833  * to their default size.
5834  *
5835  * This function is to be called when a tracer is about to be used.
5836  */
5837 int tracing_update_buffers(void)
5838 {
5839         int ret = 0;
5840
5841         mutex_lock(&trace_types_lock);
5842         if (!ring_buffer_expanded)
5843                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5844                                                 RING_BUFFER_ALL_CPUS);
5845         mutex_unlock(&trace_types_lock);
5846
5847         return ret;
5848 }
5849
5850 struct trace_option_dentry;
5851
5852 static void
5853 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5854
5855 /*
5856  * Used to clear out the tracer before deletion of an instance.
5857  * Must have trace_types_lock held.
5858  */
5859 static void tracing_set_nop(struct trace_array *tr)
5860 {
5861         if (tr->current_trace == &nop_trace)
5862                 return;
5863         
5864         tr->current_trace->enabled--;
5865
5866         if (tr->current_trace->reset)
5867                 tr->current_trace->reset(tr);
5868
5869         tr->current_trace = &nop_trace;
5870 }
5871
5872 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5873 {
5874         /* Only enable if the directory has been created already. */
5875         if (!tr->dir)
5876                 return;
5877
5878         create_trace_option_files(tr, t);
5879 }
5880
5881 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5882 {
5883         struct tracer *t;
5884 #ifdef CONFIG_TRACER_MAX_TRACE
5885         bool had_max_tr;
5886 #endif
5887         int ret = 0;
5888
5889         mutex_lock(&trace_types_lock);
5890
5891         if (!ring_buffer_expanded) {
5892                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5893                                                 RING_BUFFER_ALL_CPUS);
5894                 if (ret < 0)
5895                         goto out;
5896                 ret = 0;
5897         }
5898
5899         for (t = trace_types; t; t = t->next) {
5900                 if (strcmp(t->name, buf) == 0)
5901                         break;
5902         }
5903         if (!t) {
5904                 ret = -EINVAL;
5905                 goto out;
5906         }
5907         if (t == tr->current_trace)
5908                 goto out;
5909
5910 #ifdef CONFIG_TRACER_SNAPSHOT
5911         if (t->use_max_tr) {
5912                 arch_spin_lock(&tr->max_lock);
5913                 if (tr->cond_snapshot)
5914                         ret = -EBUSY;
5915                 arch_spin_unlock(&tr->max_lock);
5916                 if (ret)
5917                         goto out;
5918         }
5919 #endif
5920         /* Some tracers won't work on kernel command line */
5921         if (system_state < SYSTEM_RUNNING && t->noboot) {
5922                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5923                         t->name);
5924                 goto out;
5925         }
5926
5927         /* Some tracers are only allowed for the top level buffer */
5928         if (!trace_ok_for_array(t, tr)) {
5929                 ret = -EINVAL;
5930                 goto out;
5931         }
5932
5933         /* If trace pipe files are being read, we can't change the tracer */
5934         if (tr->trace_ref) {
5935                 ret = -EBUSY;
5936                 goto out;
5937         }
5938
5939         trace_branch_disable();
5940
5941         tr->current_trace->enabled--;
5942
5943         if (tr->current_trace->reset)
5944                 tr->current_trace->reset(tr);
5945
5946         /* Current trace needs to be nop_trace before synchronize_rcu */
5947         tr->current_trace = &nop_trace;
5948
5949 #ifdef CONFIG_TRACER_MAX_TRACE
5950         had_max_tr = tr->allocated_snapshot;
5951
5952         if (had_max_tr && !t->use_max_tr) {
5953                 /*
5954                  * We need to make sure that the update_max_tr sees that
5955                  * current_trace changed to nop_trace to keep it from
5956                  * swapping the buffers after we resize it.
5957                  * The update_max_tr is called from interrupts disabled
5958                  * so a synchronized_sched() is sufficient.
5959                  */
5960                 synchronize_rcu();
5961                 free_snapshot(tr);
5962         }
5963 #endif
5964
5965 #ifdef CONFIG_TRACER_MAX_TRACE
5966         if (t->use_max_tr && !had_max_tr) {
5967                 ret = tracing_alloc_snapshot_instance(tr);
5968                 if (ret < 0)
5969                         goto out;
5970         }
5971 #endif
5972
5973         if (t->init) {
5974                 ret = tracer_init(t, tr);
5975                 if (ret)
5976                         goto out;
5977         }
5978
5979         tr->current_trace = t;
5980         tr->current_trace->enabled++;
5981         trace_branch_enable(tr);
5982  out:
5983         mutex_unlock(&trace_types_lock);
5984
5985         return ret;
5986 }
5987
5988 static ssize_t
5989 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5990                         size_t cnt, loff_t *ppos)
5991 {
5992         struct trace_array *tr = filp->private_data;
5993         char buf[MAX_TRACER_SIZE+1];
5994         int i;
5995         size_t ret;
5996         int err;
5997
5998         ret = cnt;
5999
6000         if (cnt > MAX_TRACER_SIZE)
6001                 cnt = MAX_TRACER_SIZE;
6002
6003         if (copy_from_user(buf, ubuf, cnt))
6004                 return -EFAULT;
6005
6006         buf[cnt] = 0;
6007
6008         /* strip ending whitespace. */
6009         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6010                 buf[i] = 0;
6011
6012         err = tracing_set_tracer(tr, buf);
6013         if (err)
6014                 return err;
6015
6016         *ppos += ret;
6017
6018         return ret;
6019 }
6020
6021 static ssize_t
6022 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6023                    size_t cnt, loff_t *ppos)
6024 {
6025         char buf[64];
6026         int r;
6027
6028         r = snprintf(buf, sizeof(buf), "%ld\n",
6029                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6030         if (r > sizeof(buf))
6031                 r = sizeof(buf);
6032         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6033 }
6034
6035 static ssize_t
6036 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6037                     size_t cnt, loff_t *ppos)
6038 {
6039         unsigned long val;
6040         int ret;
6041
6042         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6043         if (ret)
6044                 return ret;
6045
6046         *ptr = val * 1000;
6047
6048         return cnt;
6049 }
6050
6051 static ssize_t
6052 tracing_thresh_read(struct file *filp, char __user *ubuf,
6053                     size_t cnt, loff_t *ppos)
6054 {
6055         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6056 }
6057
6058 static ssize_t
6059 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6060                      size_t cnt, loff_t *ppos)
6061 {
6062         struct trace_array *tr = filp->private_data;
6063         int ret;
6064
6065         mutex_lock(&trace_types_lock);
6066         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6067         if (ret < 0)
6068                 goto out;
6069
6070         if (tr->current_trace->update_thresh) {
6071                 ret = tr->current_trace->update_thresh(tr);
6072                 if (ret < 0)
6073                         goto out;
6074         }
6075
6076         ret = cnt;
6077 out:
6078         mutex_unlock(&trace_types_lock);
6079
6080         return ret;
6081 }
6082
6083 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6084
6085 static ssize_t
6086 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6087                      size_t cnt, loff_t *ppos)
6088 {
6089         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6090 }
6091
6092 static ssize_t
6093 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6094                       size_t cnt, loff_t *ppos)
6095 {
6096         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6097 }
6098
6099 #endif
6100
6101 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6102 {
6103         struct trace_array *tr = inode->i_private;
6104         struct trace_iterator *iter;
6105         int ret;
6106
6107         ret = tracing_check_open_get_tr(tr);
6108         if (ret)
6109                 return ret;
6110
6111         mutex_lock(&trace_types_lock);
6112
6113         /* create a buffer to store the information to pass to userspace */
6114         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6115         if (!iter) {
6116                 ret = -ENOMEM;
6117                 __trace_array_put(tr);
6118                 goto out;
6119         }
6120
6121         trace_seq_init(&iter->seq);
6122         iter->trace = tr->current_trace;
6123
6124         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6125                 ret = -ENOMEM;
6126                 goto fail;
6127         }
6128
6129         /* trace pipe does not show start of buffer */
6130         cpumask_setall(iter->started);
6131
6132         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6133                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6134
6135         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6136         if (trace_clocks[tr->clock_id].in_ns)
6137                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6138
6139         iter->tr = tr;
6140         iter->array_buffer = &tr->array_buffer;
6141         iter->cpu_file = tracing_get_cpu(inode);
6142         mutex_init(&iter->mutex);
6143         filp->private_data = iter;
6144
6145         if (iter->trace->pipe_open)
6146                 iter->trace->pipe_open(iter);
6147
6148         nonseekable_open(inode, filp);
6149
6150         tr->trace_ref++;
6151 out:
6152         mutex_unlock(&trace_types_lock);
6153         return ret;
6154
6155 fail:
6156         kfree(iter);
6157         __trace_array_put(tr);
6158         mutex_unlock(&trace_types_lock);
6159         return ret;
6160 }
6161
6162 static int tracing_release_pipe(struct inode *inode, struct file *file)
6163 {
6164         struct trace_iterator *iter = file->private_data;
6165         struct trace_array *tr = inode->i_private;
6166
6167         mutex_lock(&trace_types_lock);
6168
6169         tr->trace_ref--;
6170
6171         if (iter->trace->pipe_close)
6172                 iter->trace->pipe_close(iter);
6173
6174         mutex_unlock(&trace_types_lock);
6175
6176         free_cpumask_var(iter->started);
6177         mutex_destroy(&iter->mutex);
6178         kfree(iter);
6179
6180         trace_array_put(tr);
6181
6182         return 0;
6183 }
6184
6185 static __poll_t
6186 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6187 {
6188         struct trace_array *tr = iter->tr;
6189
6190         /* Iterators are static, they should be filled or empty */
6191         if (trace_buffer_iter(iter, iter->cpu_file))
6192                 return EPOLLIN | EPOLLRDNORM;
6193
6194         if (tr->trace_flags & TRACE_ITER_BLOCK)
6195                 /*
6196                  * Always select as readable when in blocking mode
6197                  */
6198                 return EPOLLIN | EPOLLRDNORM;
6199         else
6200                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6201                                              filp, poll_table);
6202 }
6203
6204 static __poll_t
6205 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6206 {
6207         struct trace_iterator *iter = filp->private_data;
6208
6209         return trace_poll(iter, filp, poll_table);
6210 }
6211
6212 /* Must be called with iter->mutex held. */
6213 static int tracing_wait_pipe(struct file *filp)
6214 {
6215         struct trace_iterator *iter = filp->private_data;
6216         int ret;
6217
6218         while (trace_empty(iter)) {
6219
6220                 if ((filp->f_flags & O_NONBLOCK)) {
6221                         return -EAGAIN;
6222                 }
6223
6224                 /*
6225                  * We block until we read something and tracing is disabled.
6226                  * We still block if tracing is disabled, but we have never
6227                  * read anything. This allows a user to cat this file, and
6228                  * then enable tracing. But after we have read something,
6229                  * we give an EOF when tracing is again disabled.
6230                  *
6231                  * iter->pos will be 0 if we haven't read anything.
6232                  */
6233                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6234                         break;
6235
6236                 mutex_unlock(&iter->mutex);
6237
6238                 ret = wait_on_pipe(iter, 0);
6239
6240                 mutex_lock(&iter->mutex);
6241
6242                 if (ret)
6243                         return ret;
6244         }
6245
6246         return 1;
6247 }
6248
6249 /*
6250  * Consumer reader.
6251  */
6252 static ssize_t
6253 tracing_read_pipe(struct file *filp, char __user *ubuf,
6254                   size_t cnt, loff_t *ppos)
6255 {
6256         struct trace_iterator *iter = filp->private_data;
6257         ssize_t sret;
6258
6259         /*
6260          * Avoid more than one consumer on a single file descriptor
6261          * This is just a matter of traces coherency, the ring buffer itself
6262          * is protected.
6263          */
6264         mutex_lock(&iter->mutex);
6265
6266         /* return any leftover data */
6267         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6268         if (sret != -EBUSY)
6269                 goto out;
6270
6271         trace_seq_init(&iter->seq);
6272
6273         if (iter->trace->read) {
6274                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6275                 if (sret)
6276                         goto out;
6277         }
6278
6279 waitagain:
6280         sret = tracing_wait_pipe(filp);
6281         if (sret <= 0)
6282                 goto out;
6283
6284         /* stop when tracing is finished */
6285         if (trace_empty(iter)) {
6286                 sret = 0;
6287                 goto out;
6288         }
6289
6290         if (cnt >= PAGE_SIZE)
6291                 cnt = PAGE_SIZE - 1;
6292
6293         /* reset all but tr, trace, and overruns */
6294         memset(&iter->seq, 0,
6295                sizeof(struct trace_iterator) -
6296                offsetof(struct trace_iterator, seq));
6297         cpumask_clear(iter->started);
6298         trace_seq_init(&iter->seq);
6299         iter->pos = -1;
6300
6301         trace_event_read_lock();
6302         trace_access_lock(iter->cpu_file);
6303         while (trace_find_next_entry_inc(iter) != NULL) {
6304                 enum print_line_t ret;
6305                 int save_len = iter->seq.seq.len;
6306
6307                 ret = print_trace_line(iter);
6308                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6309                         /* don't print partial lines */
6310                         iter->seq.seq.len = save_len;
6311                         break;
6312                 }
6313                 if (ret != TRACE_TYPE_NO_CONSUME)
6314                         trace_consume(iter);
6315
6316                 if (trace_seq_used(&iter->seq) >= cnt)
6317                         break;
6318
6319                 /*
6320                  * Setting the full flag means we reached the trace_seq buffer
6321                  * size and we should leave by partial output condition above.
6322                  * One of the trace_seq_* functions is not used properly.
6323                  */
6324                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6325                           iter->ent->type);
6326         }
6327         trace_access_unlock(iter->cpu_file);
6328         trace_event_read_unlock();
6329
6330         /* Now copy what we have to the user */
6331         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6332         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6333                 trace_seq_init(&iter->seq);
6334
6335         /*
6336          * If there was nothing to send to user, in spite of consuming trace
6337          * entries, go back to wait for more entries.
6338          */
6339         if (sret == -EBUSY)
6340                 goto waitagain;
6341
6342 out:
6343         mutex_unlock(&iter->mutex);
6344
6345         return sret;
6346 }
6347
6348 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6349                                      unsigned int idx)
6350 {
6351         __free_page(spd->pages[idx]);
6352 }
6353
6354 static size_t
6355 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6356 {
6357         size_t count;
6358         int save_len;
6359         int ret;
6360
6361         /* Seq buffer is page-sized, exactly what we need. */
6362         for (;;) {
6363                 save_len = iter->seq.seq.len;
6364                 ret = print_trace_line(iter);
6365
6366                 if (trace_seq_has_overflowed(&iter->seq)) {
6367                         iter->seq.seq.len = save_len;
6368                         break;
6369                 }
6370
6371                 /*
6372                  * This should not be hit, because it should only
6373                  * be set if the iter->seq overflowed. But check it
6374                  * anyway to be safe.
6375                  */
6376                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6377                         iter->seq.seq.len = save_len;
6378                         break;
6379                 }
6380
6381                 count = trace_seq_used(&iter->seq) - save_len;
6382                 if (rem < count) {
6383                         rem = 0;
6384                         iter->seq.seq.len = save_len;
6385                         break;
6386                 }
6387
6388                 if (ret != TRACE_TYPE_NO_CONSUME)
6389                         trace_consume(iter);
6390                 rem -= count;
6391                 if (!trace_find_next_entry_inc(iter))   {
6392                         rem = 0;
6393                         iter->ent = NULL;
6394                         break;
6395                 }
6396         }
6397
6398         return rem;
6399 }
6400
6401 static ssize_t tracing_splice_read_pipe(struct file *filp,
6402                                         loff_t *ppos,
6403                                         struct pipe_inode_info *pipe,
6404                                         size_t len,
6405                                         unsigned int flags)
6406 {
6407         struct page *pages_def[PIPE_DEF_BUFFERS];
6408         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6409         struct trace_iterator *iter = filp->private_data;
6410         struct splice_pipe_desc spd = {
6411                 .pages          = pages_def,
6412                 .partial        = partial_def,
6413                 .nr_pages       = 0, /* This gets updated below. */
6414                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6415                 .ops            = &default_pipe_buf_ops,
6416                 .spd_release    = tracing_spd_release_pipe,
6417         };
6418         ssize_t ret;
6419         size_t rem;
6420         unsigned int i;
6421
6422         if (splice_grow_spd(pipe, &spd))
6423                 return -ENOMEM;
6424
6425         mutex_lock(&iter->mutex);
6426
6427         if (iter->trace->splice_read) {
6428                 ret = iter->trace->splice_read(iter, filp,
6429                                                ppos, pipe, len, flags);
6430                 if (ret)
6431                         goto out_err;
6432         }
6433
6434         ret = tracing_wait_pipe(filp);
6435         if (ret <= 0)
6436                 goto out_err;
6437
6438         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6439                 ret = -EFAULT;
6440                 goto out_err;
6441         }
6442
6443         trace_event_read_lock();
6444         trace_access_lock(iter->cpu_file);
6445
6446         /* Fill as many pages as possible. */
6447         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6448                 spd.pages[i] = alloc_page(GFP_KERNEL);
6449                 if (!spd.pages[i])
6450                         break;
6451
6452                 rem = tracing_fill_pipe_page(rem, iter);
6453
6454                 /* Copy the data into the page, so we can start over. */
6455                 ret = trace_seq_to_buffer(&iter->seq,
6456                                           page_address(spd.pages[i]),
6457                                           trace_seq_used(&iter->seq));
6458                 if (ret < 0) {
6459                         __free_page(spd.pages[i]);
6460                         break;
6461                 }
6462                 spd.partial[i].offset = 0;
6463                 spd.partial[i].len = trace_seq_used(&iter->seq);
6464
6465                 trace_seq_init(&iter->seq);
6466         }
6467
6468         trace_access_unlock(iter->cpu_file);
6469         trace_event_read_unlock();
6470         mutex_unlock(&iter->mutex);
6471
6472         spd.nr_pages = i;
6473
6474         if (i)
6475                 ret = splice_to_pipe(pipe, &spd);
6476         else
6477                 ret = 0;
6478 out:
6479         splice_shrink_spd(&spd);
6480         return ret;
6481
6482 out_err:
6483         mutex_unlock(&iter->mutex);
6484         goto out;
6485 }
6486
6487 static ssize_t
6488 tracing_entries_read(struct file *filp, char __user *ubuf,
6489                      size_t cnt, loff_t *ppos)
6490 {
6491         struct inode *inode = file_inode(filp);
6492         struct trace_array *tr = inode->i_private;
6493         int cpu = tracing_get_cpu(inode);
6494         char buf[64];
6495         int r = 0;
6496         ssize_t ret;
6497
6498         mutex_lock(&trace_types_lock);
6499
6500         if (cpu == RING_BUFFER_ALL_CPUS) {
6501                 int cpu, buf_size_same;
6502                 unsigned long size;
6503
6504                 size = 0;
6505                 buf_size_same = 1;
6506                 /* check if all cpu sizes are same */
6507                 for_each_tracing_cpu(cpu) {
6508                         /* fill in the size from first enabled cpu */
6509                         if (size == 0)
6510                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6511                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6512                                 buf_size_same = 0;
6513                                 break;
6514                         }
6515                 }
6516
6517                 if (buf_size_same) {
6518                         if (!ring_buffer_expanded)
6519                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6520                                             size >> 10,
6521                                             trace_buf_size >> 10);
6522                         else
6523                                 r = sprintf(buf, "%lu\n", size >> 10);
6524                 } else
6525                         r = sprintf(buf, "X\n");
6526         } else
6527                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6528
6529         mutex_unlock(&trace_types_lock);
6530
6531         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6532         return ret;
6533 }
6534
6535 static ssize_t
6536 tracing_entries_write(struct file *filp, const char __user *ubuf,
6537                       size_t cnt, loff_t *ppos)
6538 {
6539         struct inode *inode = file_inode(filp);
6540         struct trace_array *tr = inode->i_private;
6541         unsigned long val;
6542         int ret;
6543
6544         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6545         if (ret)
6546                 return ret;
6547
6548         /* must have at least 1 entry */
6549         if (!val)
6550                 return -EINVAL;
6551
6552         /* value is in KB */
6553         val <<= 10;
6554         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6555         if (ret < 0)
6556                 return ret;
6557
6558         *ppos += cnt;
6559
6560         return cnt;
6561 }
6562
6563 static ssize_t
6564 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6565                                 size_t cnt, loff_t *ppos)
6566 {
6567         struct trace_array *tr = filp->private_data;
6568         char buf[64];
6569         int r, cpu;
6570         unsigned long size = 0, expanded_size = 0;
6571
6572         mutex_lock(&trace_types_lock);
6573         for_each_tracing_cpu(cpu) {
6574                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6575                 if (!ring_buffer_expanded)
6576                         expanded_size += trace_buf_size >> 10;
6577         }
6578         if (ring_buffer_expanded)
6579                 r = sprintf(buf, "%lu\n", size);
6580         else
6581                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6582         mutex_unlock(&trace_types_lock);
6583
6584         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6585 }
6586
6587 static ssize_t
6588 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6589                           size_t cnt, loff_t *ppos)
6590 {
6591         /*
6592          * There is no need to read what the user has written, this function
6593          * is just to make sure that there is no error when "echo" is used
6594          */
6595
6596         *ppos += cnt;
6597
6598         return cnt;
6599 }
6600
6601 static int
6602 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6603 {
6604         struct trace_array *tr = inode->i_private;
6605
6606         /* disable tracing ? */
6607         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6608                 tracer_tracing_off(tr);
6609         /* resize the ring buffer to 0 */
6610         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6611
6612         trace_array_put(tr);
6613
6614         return 0;
6615 }
6616
6617 static ssize_t
6618 tracing_mark_write(struct file *filp, const char __user *ubuf,
6619                                         size_t cnt, loff_t *fpos)
6620 {
6621         struct trace_array *tr = filp->private_data;
6622         struct ring_buffer_event *event;
6623         enum event_trigger_type tt = ETT_NONE;
6624         struct trace_buffer *buffer;
6625         struct print_entry *entry;
6626         unsigned long irq_flags;
6627         ssize_t written;
6628         int size;
6629         int len;
6630
6631 /* Used in tracing_mark_raw_write() as well */
6632 #define FAULTED_STR "<faulted>"
6633 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6634
6635         if (tracing_disabled)
6636                 return -EINVAL;
6637
6638         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6639                 return -EINVAL;
6640
6641         if (cnt > TRACE_BUF_SIZE)
6642                 cnt = TRACE_BUF_SIZE;
6643
6644         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6645
6646         local_save_flags(irq_flags);
6647         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6648
6649         /* If less than "<faulted>", then make sure we can still add that */
6650         if (cnt < FAULTED_SIZE)
6651                 size += FAULTED_SIZE - cnt;
6652
6653         buffer = tr->array_buffer.buffer;
6654         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6655                                             irq_flags, preempt_count());
6656         if (unlikely(!event))
6657                 /* Ring buffer disabled, return as if not open for write */
6658                 return -EBADF;
6659
6660         entry = ring_buffer_event_data(event);
6661         entry->ip = _THIS_IP_;
6662
6663         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6664         if (len) {
6665                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6666                 cnt = FAULTED_SIZE;
6667                 written = -EFAULT;
6668         } else
6669                 written = cnt;
6670         len = cnt;
6671
6672         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6673                 /* do not add \n before testing triggers, but add \0 */
6674                 entry->buf[cnt] = '\0';
6675                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6676         }
6677
6678         if (entry->buf[cnt - 1] != '\n') {
6679                 entry->buf[cnt] = '\n';
6680                 entry->buf[cnt + 1] = '\0';
6681         } else
6682                 entry->buf[cnt] = '\0';
6683
6684         __buffer_unlock_commit(buffer, event);
6685
6686         if (tt)
6687                 event_triggers_post_call(tr->trace_marker_file, tt);
6688
6689         if (written > 0)
6690                 *fpos += written;
6691
6692         return written;
6693 }
6694
6695 /* Limit it for now to 3K (including tag) */
6696 #define RAW_DATA_MAX_SIZE (1024*3)
6697
6698 static ssize_t
6699 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6700                                         size_t cnt, loff_t *fpos)
6701 {
6702         struct trace_array *tr = filp->private_data;
6703         struct ring_buffer_event *event;
6704         struct trace_buffer *buffer;
6705         struct raw_data_entry *entry;
6706         unsigned long irq_flags;
6707         ssize_t written;
6708         int size;
6709         int len;
6710
6711 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6712
6713         if (tracing_disabled)
6714                 return -EINVAL;
6715
6716         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6717                 return -EINVAL;
6718
6719         /* The marker must at least have a tag id */
6720         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6721                 return -EINVAL;
6722
6723         if (cnt > TRACE_BUF_SIZE)
6724                 cnt = TRACE_BUF_SIZE;
6725
6726         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6727
6728         local_save_flags(irq_flags);
6729         size = sizeof(*entry) + cnt;
6730         if (cnt < FAULT_SIZE_ID)
6731                 size += FAULT_SIZE_ID - cnt;
6732
6733         buffer = tr->array_buffer.buffer;
6734         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6735                                             irq_flags, preempt_count());
6736         if (!event)
6737                 /* Ring buffer disabled, return as if not open for write */
6738                 return -EBADF;
6739
6740         entry = ring_buffer_event_data(event);
6741
6742         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6743         if (len) {
6744                 entry->id = -1;
6745                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6746                 written = -EFAULT;
6747         } else
6748                 written = cnt;
6749
6750         __buffer_unlock_commit(buffer, event);
6751
6752         if (written > 0)
6753                 *fpos += written;
6754
6755         return written;
6756 }
6757
6758 static int tracing_clock_show(struct seq_file *m, void *v)
6759 {
6760         struct trace_array *tr = m->private;
6761         int i;
6762
6763         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6764                 seq_printf(m,
6765                         "%s%s%s%s", i ? " " : "",
6766                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6767                         i == tr->clock_id ? "]" : "");
6768         seq_putc(m, '\n');
6769
6770         return 0;
6771 }
6772
6773 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6774 {
6775         int i;
6776
6777         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6778                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6779                         break;
6780         }
6781         if (i == ARRAY_SIZE(trace_clocks))
6782                 return -EINVAL;
6783
6784         mutex_lock(&trace_types_lock);
6785
6786         tr->clock_id = i;
6787
6788         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6789
6790         /*
6791          * New clock may not be consistent with the previous clock.
6792          * Reset the buffer so that it doesn't have incomparable timestamps.
6793          */
6794         tracing_reset_online_cpus(&tr->array_buffer);
6795
6796 #ifdef CONFIG_TRACER_MAX_TRACE
6797         if (tr->max_buffer.buffer)
6798                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6799         tracing_reset_online_cpus(&tr->max_buffer);
6800 #endif
6801
6802         mutex_unlock(&trace_types_lock);
6803
6804         return 0;
6805 }
6806
6807 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6808                                    size_t cnt, loff_t *fpos)
6809 {
6810         struct seq_file *m = filp->private_data;
6811         struct trace_array *tr = m->private;
6812         char buf[64];
6813         const char *clockstr;
6814         int ret;
6815
6816         if (cnt >= sizeof(buf))
6817                 return -EINVAL;
6818
6819         if (copy_from_user(buf, ubuf, cnt))
6820                 return -EFAULT;
6821
6822         buf[cnt] = 0;
6823
6824         clockstr = strstrip(buf);
6825
6826         ret = tracing_set_clock(tr, clockstr);
6827         if (ret)
6828                 return ret;
6829
6830         *fpos += cnt;
6831
6832         return cnt;
6833 }
6834
6835 static int tracing_clock_open(struct inode *inode, struct file *file)
6836 {
6837         struct trace_array *tr = inode->i_private;
6838         int ret;
6839
6840         ret = tracing_check_open_get_tr(tr);
6841         if (ret)
6842                 return ret;
6843
6844         ret = single_open(file, tracing_clock_show, inode->i_private);
6845         if (ret < 0)
6846                 trace_array_put(tr);
6847
6848         return ret;
6849 }
6850
6851 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6852 {
6853         struct trace_array *tr = m->private;
6854
6855         mutex_lock(&trace_types_lock);
6856
6857         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6858                 seq_puts(m, "delta [absolute]\n");
6859         else
6860                 seq_puts(m, "[delta] absolute\n");
6861
6862         mutex_unlock(&trace_types_lock);
6863
6864         return 0;
6865 }
6866
6867 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6868 {
6869         struct trace_array *tr = inode->i_private;
6870         int ret;
6871
6872         ret = tracing_check_open_get_tr(tr);
6873         if (ret)
6874                 return ret;
6875
6876         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6877         if (ret < 0)
6878                 trace_array_put(tr);
6879
6880         return ret;
6881 }
6882
6883 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6884 {
6885         int ret = 0;
6886
6887         mutex_lock(&trace_types_lock);
6888
6889         if (abs && tr->time_stamp_abs_ref++)
6890                 goto out;
6891
6892         if (!abs) {
6893                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6894                         ret = -EINVAL;
6895                         goto out;
6896                 }
6897
6898                 if (--tr->time_stamp_abs_ref)
6899                         goto out;
6900         }
6901
6902         ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6903
6904 #ifdef CONFIG_TRACER_MAX_TRACE
6905         if (tr->max_buffer.buffer)
6906                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6907 #endif
6908  out:
6909         mutex_unlock(&trace_types_lock);
6910
6911         return ret;
6912 }
6913
6914 struct ftrace_buffer_info {
6915         struct trace_iterator   iter;
6916         void                    *spare;
6917         unsigned int            spare_cpu;
6918         unsigned int            read;
6919 };
6920
6921 #ifdef CONFIG_TRACER_SNAPSHOT
6922 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6923 {
6924         struct trace_array *tr = inode->i_private;
6925         struct trace_iterator *iter;
6926         struct seq_file *m;
6927         int ret;
6928
6929         ret = tracing_check_open_get_tr(tr);
6930         if (ret)
6931                 return ret;
6932
6933         if (file->f_mode & FMODE_READ) {
6934                 iter = __tracing_open(inode, file, true);
6935                 if (IS_ERR(iter))
6936                         ret = PTR_ERR(iter);
6937         } else {
6938                 /* Writes still need the seq_file to hold the private data */
6939                 ret = -ENOMEM;
6940                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6941                 if (!m)
6942                         goto out;
6943                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6944                 if (!iter) {
6945                         kfree(m);
6946                         goto out;
6947                 }
6948                 ret = 0;
6949
6950                 iter->tr = tr;
6951                 iter->array_buffer = &tr->max_buffer;
6952                 iter->cpu_file = tracing_get_cpu(inode);
6953                 m->private = iter;
6954                 file->private_data = m;
6955         }
6956 out:
6957         if (ret < 0)
6958                 trace_array_put(tr);
6959
6960         return ret;
6961 }
6962
6963 static ssize_t
6964 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6965                        loff_t *ppos)
6966 {
6967         struct seq_file *m = filp->private_data;
6968         struct trace_iterator *iter = m->private;
6969         struct trace_array *tr = iter->tr;
6970         unsigned long val;
6971         int ret;
6972
6973         ret = tracing_update_buffers();
6974         if (ret < 0)
6975                 return ret;
6976
6977         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6978         if (ret)
6979                 return ret;
6980
6981         mutex_lock(&trace_types_lock);
6982
6983         if (tr->current_trace->use_max_tr) {
6984                 ret = -EBUSY;
6985                 goto out;
6986         }
6987
6988         arch_spin_lock(&tr->max_lock);
6989         if (tr->cond_snapshot)
6990                 ret = -EBUSY;
6991         arch_spin_unlock(&tr->max_lock);
6992         if (ret)
6993                 goto out;
6994
6995         switch (val) {
6996         case 0:
6997                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6998                         ret = -EINVAL;
6999                         break;
7000                 }
7001                 if (tr->allocated_snapshot)
7002                         free_snapshot(tr);
7003                 break;
7004         case 1:
7005 /* Only allow per-cpu swap if the ring buffer supports it */
7006 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7007                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7008                         ret = -EINVAL;
7009                         break;
7010                 }
7011 #endif
7012                 if (tr->allocated_snapshot)
7013                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7014                                         &tr->array_buffer, iter->cpu_file);
7015                 else
7016                         ret = tracing_alloc_snapshot_instance(tr);
7017                 if (ret < 0)
7018                         break;
7019                 local_irq_disable();
7020                 /* Now, we're going to swap */
7021                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7022                         update_max_tr(tr, current, smp_processor_id(), NULL);
7023                 else
7024                         update_max_tr_single(tr, current, iter->cpu_file);
7025                 local_irq_enable();
7026                 break;
7027         default:
7028                 if (tr->allocated_snapshot) {
7029                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7030                                 tracing_reset_online_cpus(&tr->max_buffer);
7031                         else
7032                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7033                 }
7034                 break;
7035         }
7036
7037         if (ret >= 0) {
7038                 *ppos += cnt;
7039                 ret = cnt;
7040         }
7041 out:
7042         mutex_unlock(&trace_types_lock);
7043         return ret;
7044 }
7045
7046 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7047 {
7048         struct seq_file *m = file->private_data;
7049         int ret;
7050
7051         ret = tracing_release(inode, file);
7052
7053         if (file->f_mode & FMODE_READ)
7054                 return ret;
7055
7056         /* If write only, the seq_file is just a stub */
7057         if (m)
7058                 kfree(m->private);
7059         kfree(m);
7060
7061         return 0;
7062 }
7063
7064 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7065 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7066                                     size_t count, loff_t *ppos);
7067 static int tracing_buffers_release(struct inode *inode, struct file *file);
7068 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7069                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7070
7071 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7072 {
7073         struct ftrace_buffer_info *info;
7074         int ret;
7075
7076         /* The following checks for tracefs lockdown */
7077         ret = tracing_buffers_open(inode, filp);
7078         if (ret < 0)
7079                 return ret;
7080
7081         info = filp->private_data;
7082
7083         if (info->iter.trace->use_max_tr) {
7084                 tracing_buffers_release(inode, filp);
7085                 return -EBUSY;
7086         }
7087
7088         info->iter.snapshot = true;
7089         info->iter.array_buffer = &info->iter.tr->max_buffer;
7090
7091         return ret;
7092 }
7093
7094 #endif /* CONFIG_TRACER_SNAPSHOT */
7095
7096
7097 static const struct file_operations tracing_thresh_fops = {
7098         .open           = tracing_open_generic,
7099         .read           = tracing_thresh_read,
7100         .write          = tracing_thresh_write,
7101         .llseek         = generic_file_llseek,
7102 };
7103
7104 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7105 static const struct file_operations tracing_max_lat_fops = {
7106         .open           = tracing_open_generic,
7107         .read           = tracing_max_lat_read,
7108         .write          = tracing_max_lat_write,
7109         .llseek         = generic_file_llseek,
7110 };
7111 #endif
7112
7113 static const struct file_operations set_tracer_fops = {
7114         .open           = tracing_open_generic,
7115         .read           = tracing_set_trace_read,
7116         .write          = tracing_set_trace_write,
7117         .llseek         = generic_file_llseek,
7118 };
7119
7120 static const struct file_operations tracing_pipe_fops = {
7121         .open           = tracing_open_pipe,
7122         .poll           = tracing_poll_pipe,
7123         .read           = tracing_read_pipe,
7124         .splice_read    = tracing_splice_read_pipe,
7125         .release        = tracing_release_pipe,
7126         .llseek         = no_llseek,
7127 };
7128
7129 static const struct file_operations tracing_entries_fops = {
7130         .open           = tracing_open_generic_tr,
7131         .read           = tracing_entries_read,
7132         .write          = tracing_entries_write,
7133         .llseek         = generic_file_llseek,
7134         .release        = tracing_release_generic_tr,
7135 };
7136
7137 static const struct file_operations tracing_total_entries_fops = {
7138         .open           = tracing_open_generic_tr,
7139         .read           = tracing_total_entries_read,
7140         .llseek         = generic_file_llseek,
7141         .release        = tracing_release_generic_tr,
7142 };
7143
7144 static const struct file_operations tracing_free_buffer_fops = {
7145         .open           = tracing_open_generic_tr,
7146         .write          = tracing_free_buffer_write,
7147         .release        = tracing_free_buffer_release,
7148 };
7149
7150 static const struct file_operations tracing_mark_fops = {
7151         .open           = tracing_open_generic_tr,
7152         .write          = tracing_mark_write,
7153         .llseek         = generic_file_llseek,
7154         .release        = tracing_release_generic_tr,
7155 };
7156
7157 static const struct file_operations tracing_mark_raw_fops = {
7158         .open           = tracing_open_generic_tr,
7159         .write          = tracing_mark_raw_write,
7160         .llseek         = generic_file_llseek,
7161         .release        = tracing_release_generic_tr,
7162 };
7163
7164 static const struct file_operations trace_clock_fops = {
7165         .open           = tracing_clock_open,
7166         .read           = seq_read,
7167         .llseek         = seq_lseek,
7168         .release        = tracing_single_release_tr,
7169         .write          = tracing_clock_write,
7170 };
7171
7172 static const struct file_operations trace_time_stamp_mode_fops = {
7173         .open           = tracing_time_stamp_mode_open,
7174         .read           = seq_read,
7175         .llseek         = seq_lseek,
7176         .release        = tracing_single_release_tr,
7177 };
7178
7179 #ifdef CONFIG_TRACER_SNAPSHOT
7180 static const struct file_operations snapshot_fops = {
7181         .open           = tracing_snapshot_open,
7182         .read           = seq_read,
7183         .write          = tracing_snapshot_write,
7184         .llseek         = tracing_lseek,
7185         .release        = tracing_snapshot_release,
7186 };
7187
7188 static const struct file_operations snapshot_raw_fops = {
7189         .open           = snapshot_raw_open,
7190         .read           = tracing_buffers_read,
7191         .release        = tracing_buffers_release,
7192         .splice_read    = tracing_buffers_splice_read,
7193         .llseek         = no_llseek,
7194 };
7195
7196 #endif /* CONFIG_TRACER_SNAPSHOT */
7197
7198 #define TRACING_LOG_ERRS_MAX    8
7199 #define TRACING_LOG_LOC_MAX     128
7200
7201 #define CMD_PREFIX "  Command: "
7202
7203 struct err_info {
7204         const char      **errs; /* ptr to loc-specific array of err strings */
7205         u8              type;   /* index into errs -> specific err string */
7206         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7207         u64             ts;
7208 };
7209
7210 struct tracing_log_err {
7211         struct list_head        list;
7212         struct err_info         info;
7213         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7214         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7215 };
7216
7217 static DEFINE_MUTEX(tracing_err_log_lock);
7218
7219 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7220 {
7221         struct tracing_log_err *err;
7222
7223         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7224                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7225                 if (!err)
7226                         err = ERR_PTR(-ENOMEM);
7227                 tr->n_err_log_entries++;
7228
7229                 return err;
7230         }
7231
7232         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7233         list_del(&err->list);
7234
7235         return err;
7236 }
7237
7238 /**
7239  * err_pos - find the position of a string within a command for error careting
7240  * @cmd: The tracing command that caused the error
7241  * @str: The string to position the caret at within @cmd
7242  *
7243  * Finds the position of the first occurence of @str within @cmd.  The
7244  * return value can be passed to tracing_log_err() for caret placement
7245  * within @cmd.
7246  *
7247  * Returns the index within @cmd of the first occurence of @str or 0
7248  * if @str was not found.
7249  */
7250 unsigned int err_pos(char *cmd, const char *str)
7251 {
7252         char *found;
7253
7254         if (WARN_ON(!strlen(cmd)))
7255                 return 0;
7256
7257         found = strstr(cmd, str);
7258         if (found)
7259                 return found - cmd;
7260
7261         return 0;
7262 }
7263
7264 /**
7265  * tracing_log_err - write an error to the tracing error log
7266  * @tr: The associated trace array for the error (NULL for top level array)
7267  * @loc: A string describing where the error occurred
7268  * @cmd: The tracing command that caused the error
7269  * @errs: The array of loc-specific static error strings
7270  * @type: The index into errs[], which produces the specific static err string
7271  * @pos: The position the caret should be placed in the cmd
7272  *
7273  * Writes an error into tracing/error_log of the form:
7274  *
7275  * <loc>: error: <text>
7276  *   Command: <cmd>
7277  *              ^
7278  *
7279  * tracing/error_log is a small log file containing the last
7280  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7281  * unless there has been a tracing error, and the error log can be
7282  * cleared and have its memory freed by writing the empty string in
7283  * truncation mode to it i.e. echo > tracing/error_log.
7284  *
7285  * NOTE: the @errs array along with the @type param are used to
7286  * produce a static error string - this string is not copied and saved
7287  * when the error is logged - only a pointer to it is saved.  See
7288  * existing callers for examples of how static strings are typically
7289  * defined for use with tracing_log_err().
7290  */
7291 void tracing_log_err(struct trace_array *tr,
7292                      const char *loc, const char *cmd,
7293                      const char **errs, u8 type, u8 pos)
7294 {
7295         struct tracing_log_err *err;
7296
7297         if (!tr)
7298                 tr = &global_trace;
7299
7300         mutex_lock(&tracing_err_log_lock);
7301         err = get_tracing_log_err(tr);
7302         if (PTR_ERR(err) == -ENOMEM) {
7303                 mutex_unlock(&tracing_err_log_lock);
7304                 return;
7305         }
7306
7307         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7308         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7309
7310         err->info.errs = errs;
7311         err->info.type = type;
7312         err->info.pos = pos;
7313         err->info.ts = local_clock();
7314
7315         list_add_tail(&err->list, &tr->err_log);
7316         mutex_unlock(&tracing_err_log_lock);
7317 }
7318
7319 static void clear_tracing_err_log(struct trace_array *tr)
7320 {
7321         struct tracing_log_err *err, *next;
7322
7323         mutex_lock(&tracing_err_log_lock);
7324         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7325                 list_del(&err->list);
7326                 kfree(err);
7327         }
7328
7329         tr->n_err_log_entries = 0;
7330         mutex_unlock(&tracing_err_log_lock);
7331 }
7332
7333 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7334 {
7335         struct trace_array *tr = m->private;
7336
7337         mutex_lock(&tracing_err_log_lock);
7338
7339         return seq_list_start(&tr->err_log, *pos);
7340 }
7341
7342 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7343 {
7344         struct trace_array *tr = m->private;
7345
7346         return seq_list_next(v, &tr->err_log, pos);
7347 }
7348
7349 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7350 {
7351         mutex_unlock(&tracing_err_log_lock);
7352 }
7353
7354 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7355 {
7356         u8 i;
7357
7358         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7359                 seq_putc(m, ' ');
7360         for (i = 0; i < pos; i++)
7361                 seq_putc(m, ' ');
7362         seq_puts(m, "^\n");
7363 }
7364
7365 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7366 {
7367         struct tracing_log_err *err = v;
7368
7369         if (err) {
7370                 const char *err_text = err->info.errs[err->info.type];
7371                 u64 sec = err->info.ts;
7372                 u32 nsec;
7373
7374                 nsec = do_div(sec, NSEC_PER_SEC);
7375                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7376                            err->loc, err_text);
7377                 seq_printf(m, "%s", err->cmd);
7378                 tracing_err_log_show_pos(m, err->info.pos);
7379         }
7380
7381         return 0;
7382 }
7383
7384 static const struct seq_operations tracing_err_log_seq_ops = {
7385         .start  = tracing_err_log_seq_start,
7386         .next   = tracing_err_log_seq_next,
7387         .stop   = tracing_err_log_seq_stop,
7388         .show   = tracing_err_log_seq_show
7389 };
7390
7391 static int tracing_err_log_open(struct inode *inode, struct file *file)
7392 {
7393         struct trace_array *tr = inode->i_private;
7394         int ret = 0;
7395
7396         ret = tracing_check_open_get_tr(tr);
7397         if (ret)
7398                 return ret;
7399
7400         /* If this file was opened for write, then erase contents */
7401         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7402                 clear_tracing_err_log(tr);
7403
7404         if (file->f_mode & FMODE_READ) {
7405                 ret = seq_open(file, &tracing_err_log_seq_ops);
7406                 if (!ret) {
7407                         struct seq_file *m = file->private_data;
7408                         m->private = tr;
7409                 } else {
7410                         trace_array_put(tr);
7411                 }
7412         }
7413         return ret;
7414 }
7415
7416 static ssize_t tracing_err_log_write(struct file *file,
7417                                      const char __user *buffer,
7418                                      size_t count, loff_t *ppos)
7419 {
7420         return count;
7421 }
7422
7423 static int tracing_err_log_release(struct inode *inode, struct file *file)
7424 {
7425         struct trace_array *tr = inode->i_private;
7426
7427         trace_array_put(tr);
7428
7429         if (file->f_mode & FMODE_READ)
7430                 seq_release(inode, file);
7431
7432         return 0;
7433 }
7434
7435 static const struct file_operations tracing_err_log_fops = {
7436         .open           = tracing_err_log_open,
7437         .write          = tracing_err_log_write,
7438         .read           = seq_read,
7439         .llseek         = seq_lseek,
7440         .release        = tracing_err_log_release,
7441 };
7442
7443 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7444 {
7445         struct trace_array *tr = inode->i_private;
7446         struct ftrace_buffer_info *info;
7447         int ret;
7448
7449         ret = tracing_check_open_get_tr(tr);
7450         if (ret)
7451                 return ret;
7452
7453         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7454         if (!info) {
7455                 trace_array_put(tr);
7456                 return -ENOMEM;
7457         }
7458
7459         mutex_lock(&trace_types_lock);
7460
7461         info->iter.tr           = tr;
7462         info->iter.cpu_file     = tracing_get_cpu(inode);
7463         info->iter.trace        = tr->current_trace;
7464         info->iter.array_buffer = &tr->array_buffer;
7465         info->spare             = NULL;
7466         /* Force reading ring buffer for first read */
7467         info->read              = (unsigned int)-1;
7468
7469         filp->private_data = info;
7470
7471         tr->trace_ref++;
7472
7473         mutex_unlock(&trace_types_lock);
7474
7475         ret = nonseekable_open(inode, filp);
7476         if (ret < 0)
7477                 trace_array_put(tr);
7478
7479         return ret;
7480 }
7481
7482 static __poll_t
7483 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7484 {
7485         struct ftrace_buffer_info *info = filp->private_data;
7486         struct trace_iterator *iter = &info->iter;
7487
7488         return trace_poll(iter, filp, poll_table);
7489 }
7490
7491 static ssize_t
7492 tracing_buffers_read(struct file *filp, char __user *ubuf,
7493                      size_t count, loff_t *ppos)
7494 {
7495         struct ftrace_buffer_info *info = filp->private_data;
7496         struct trace_iterator *iter = &info->iter;
7497         ssize_t ret = 0;
7498         ssize_t size;
7499
7500         if (!count)
7501                 return 0;
7502
7503 #ifdef CONFIG_TRACER_MAX_TRACE
7504         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7505                 return -EBUSY;
7506 #endif
7507
7508         if (!info->spare) {
7509                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7510                                                           iter->cpu_file);
7511                 if (IS_ERR(info->spare)) {
7512                         ret = PTR_ERR(info->spare);
7513                         info->spare = NULL;
7514                 } else {
7515                         info->spare_cpu = iter->cpu_file;
7516                 }
7517         }
7518         if (!info->spare)
7519                 return ret;
7520
7521         /* Do we have previous read data to read? */
7522         if (info->read < PAGE_SIZE)
7523                 goto read;
7524
7525  again:
7526         trace_access_lock(iter->cpu_file);
7527         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7528                                     &info->spare,
7529                                     count,
7530                                     iter->cpu_file, 0);
7531         trace_access_unlock(iter->cpu_file);
7532
7533         if (ret < 0) {
7534                 if (trace_empty(iter)) {
7535                         if ((filp->f_flags & O_NONBLOCK))
7536                                 return -EAGAIN;
7537
7538                         ret = wait_on_pipe(iter, 0);
7539                         if (ret)
7540                                 return ret;
7541
7542                         goto again;
7543                 }
7544                 return 0;
7545         }
7546
7547         info->read = 0;
7548  read:
7549         size = PAGE_SIZE - info->read;
7550         if (size > count)
7551                 size = count;
7552
7553         ret = copy_to_user(ubuf, info->spare + info->read, size);
7554         if (ret == size)
7555                 return -EFAULT;
7556
7557         size -= ret;
7558
7559         *ppos += size;
7560         info->read += size;
7561
7562         return size;
7563 }
7564
7565 static int tracing_buffers_release(struct inode *inode, struct file *file)
7566 {
7567         struct ftrace_buffer_info *info = file->private_data;
7568         struct trace_iterator *iter = &info->iter;
7569
7570         mutex_lock(&trace_types_lock);
7571
7572         iter->tr->trace_ref--;
7573
7574         __trace_array_put(iter->tr);
7575
7576         if (info->spare)
7577                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7578                                            info->spare_cpu, info->spare);
7579         kvfree(info);
7580
7581         mutex_unlock(&trace_types_lock);
7582
7583         return 0;
7584 }
7585
7586 struct buffer_ref {
7587         struct trace_buffer     *buffer;
7588         void                    *page;
7589         int                     cpu;
7590         refcount_t              refcount;
7591 };
7592
7593 static void buffer_ref_release(struct buffer_ref *ref)
7594 {
7595         if (!refcount_dec_and_test(&ref->refcount))
7596                 return;
7597         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7598         kfree(ref);
7599 }
7600
7601 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7602                                     struct pipe_buffer *buf)
7603 {
7604         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7605
7606         buffer_ref_release(ref);
7607         buf->private = 0;
7608 }
7609
7610 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7611                                 struct pipe_buffer *buf)
7612 {
7613         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7614
7615         if (refcount_read(&ref->refcount) > INT_MAX/2)
7616                 return false;
7617
7618         refcount_inc(&ref->refcount);
7619         return true;
7620 }
7621
7622 /* Pipe buffer operations for a buffer. */
7623 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7624         .release                = buffer_pipe_buf_release,
7625         .get                    = buffer_pipe_buf_get,
7626 };
7627
7628 /*
7629  * Callback from splice_to_pipe(), if we need to release some pages
7630  * at the end of the spd in case we error'ed out in filling the pipe.
7631  */
7632 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7633 {
7634         struct buffer_ref *ref =
7635                 (struct buffer_ref *)spd->partial[i].private;
7636
7637         buffer_ref_release(ref);
7638         spd->partial[i].private = 0;
7639 }
7640
7641 static ssize_t
7642 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7643                             struct pipe_inode_info *pipe, size_t len,
7644                             unsigned int flags)
7645 {
7646         struct ftrace_buffer_info *info = file->private_data;
7647         struct trace_iterator *iter = &info->iter;
7648         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7649         struct page *pages_def[PIPE_DEF_BUFFERS];
7650         struct splice_pipe_desc spd = {
7651                 .pages          = pages_def,
7652                 .partial        = partial_def,
7653                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7654                 .ops            = &buffer_pipe_buf_ops,
7655                 .spd_release    = buffer_spd_release,
7656         };
7657         struct buffer_ref *ref;
7658         int entries, i;
7659         ssize_t ret = 0;
7660
7661 #ifdef CONFIG_TRACER_MAX_TRACE
7662         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7663                 return -EBUSY;
7664 #endif
7665
7666         if (*ppos & (PAGE_SIZE - 1))
7667                 return -EINVAL;
7668
7669         if (len & (PAGE_SIZE - 1)) {
7670                 if (len < PAGE_SIZE)
7671                         return -EINVAL;
7672                 len &= PAGE_MASK;
7673         }
7674
7675         if (splice_grow_spd(pipe, &spd))
7676                 return -ENOMEM;
7677
7678  again:
7679         trace_access_lock(iter->cpu_file);
7680         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7681
7682         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7683                 struct page *page;
7684                 int r;
7685
7686                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7687                 if (!ref) {
7688                         ret = -ENOMEM;
7689                         break;
7690                 }
7691
7692                 refcount_set(&ref->refcount, 1);
7693                 ref->buffer = iter->array_buffer->buffer;
7694                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7695                 if (IS_ERR(ref->page)) {
7696                         ret = PTR_ERR(ref->page);
7697                         ref->page = NULL;
7698                         kfree(ref);
7699                         break;
7700                 }
7701                 ref->cpu = iter->cpu_file;
7702
7703                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7704                                           len, iter->cpu_file, 1);
7705                 if (r < 0) {
7706                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7707                                                    ref->page);
7708                         kfree(ref);
7709                         break;
7710                 }
7711
7712                 page = virt_to_page(ref->page);
7713
7714                 spd.pages[i] = page;
7715                 spd.partial[i].len = PAGE_SIZE;
7716                 spd.partial[i].offset = 0;
7717                 spd.partial[i].private = (unsigned long)ref;
7718                 spd.nr_pages++;
7719                 *ppos += PAGE_SIZE;
7720
7721                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7722         }
7723
7724         trace_access_unlock(iter->cpu_file);
7725         spd.nr_pages = i;
7726
7727         /* did we read anything? */
7728         if (!spd.nr_pages) {
7729                 if (ret)
7730                         goto out;
7731
7732                 ret = -EAGAIN;
7733                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7734                         goto out;
7735
7736                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7737                 if (ret)
7738                         goto out;
7739
7740                 goto again;
7741         }
7742
7743         ret = splice_to_pipe(pipe, &spd);
7744 out:
7745         splice_shrink_spd(&spd);
7746
7747         return ret;
7748 }
7749
7750 static const struct file_operations tracing_buffers_fops = {
7751         .open           = tracing_buffers_open,
7752         .read           = tracing_buffers_read,
7753         .poll           = tracing_buffers_poll,
7754         .release        = tracing_buffers_release,
7755         .splice_read    = tracing_buffers_splice_read,
7756         .llseek         = no_llseek,
7757 };
7758
7759 static ssize_t
7760 tracing_stats_read(struct file *filp, char __user *ubuf,
7761                    size_t count, loff_t *ppos)
7762 {
7763         struct inode *inode = file_inode(filp);
7764         struct trace_array *tr = inode->i_private;
7765         struct array_buffer *trace_buf = &tr->array_buffer;
7766         int cpu = tracing_get_cpu(inode);
7767         struct trace_seq *s;
7768         unsigned long cnt;
7769         unsigned long long t;
7770         unsigned long usec_rem;
7771
7772         s = kmalloc(sizeof(*s), GFP_KERNEL);
7773         if (!s)
7774                 return -ENOMEM;
7775
7776         trace_seq_init(s);
7777
7778         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7779         trace_seq_printf(s, "entries: %ld\n", cnt);
7780
7781         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7782         trace_seq_printf(s, "overrun: %ld\n", cnt);
7783
7784         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7785         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7786
7787         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7788         trace_seq_printf(s, "bytes: %ld\n", cnt);
7789
7790         if (trace_clocks[tr->clock_id].in_ns) {
7791                 /* local or global for trace_clock */
7792                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7793                 usec_rem = do_div(t, USEC_PER_SEC);
7794                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7795                                                                 t, usec_rem);
7796
7797                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7798                 usec_rem = do_div(t, USEC_PER_SEC);
7799                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7800         } else {
7801                 /* counter or tsc mode for trace_clock */
7802                 trace_seq_printf(s, "oldest event ts: %llu\n",
7803                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7804
7805                 trace_seq_printf(s, "now ts: %llu\n",
7806                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7807         }
7808
7809         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7810         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7811
7812         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7813         trace_seq_printf(s, "read events: %ld\n", cnt);
7814
7815         count = simple_read_from_buffer(ubuf, count, ppos,
7816                                         s->buffer, trace_seq_used(s));
7817
7818         kfree(s);
7819
7820         return count;
7821 }
7822
7823 static const struct file_operations tracing_stats_fops = {
7824         .open           = tracing_open_generic_tr,
7825         .read           = tracing_stats_read,
7826         .llseek         = generic_file_llseek,
7827         .release        = tracing_release_generic_tr,
7828 };
7829
7830 #ifdef CONFIG_DYNAMIC_FTRACE
7831
7832 static ssize_t
7833 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7834                   size_t cnt, loff_t *ppos)
7835 {
7836         ssize_t ret;
7837         char *buf;
7838         int r;
7839
7840         /* 256 should be plenty to hold the amount needed */
7841         buf = kmalloc(256, GFP_KERNEL);
7842         if (!buf)
7843                 return -ENOMEM;
7844
7845         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7846                       ftrace_update_tot_cnt,
7847                       ftrace_number_of_pages,
7848                       ftrace_number_of_groups);
7849
7850         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7851         kfree(buf);
7852         return ret;
7853 }
7854
7855 static const struct file_operations tracing_dyn_info_fops = {
7856         .open           = tracing_open_generic,
7857         .read           = tracing_read_dyn_info,
7858         .llseek         = generic_file_llseek,
7859 };
7860 #endif /* CONFIG_DYNAMIC_FTRACE */
7861
7862 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7863 static void
7864 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7865                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7866                 void *data)
7867 {
7868         tracing_snapshot_instance(tr);
7869 }
7870
7871 static void
7872 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7873                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7874                       void *data)
7875 {
7876         struct ftrace_func_mapper *mapper = data;
7877         long *count = NULL;
7878
7879         if (mapper)
7880                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7881
7882         if (count) {
7883
7884                 if (*count <= 0)
7885                         return;
7886
7887                 (*count)--;
7888         }
7889
7890         tracing_snapshot_instance(tr);
7891 }
7892
7893 static int
7894 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7895                       struct ftrace_probe_ops *ops, void *data)
7896 {
7897         struct ftrace_func_mapper *mapper = data;
7898         long *count = NULL;
7899
7900         seq_printf(m, "%ps:", (void *)ip);
7901
7902         seq_puts(m, "snapshot");
7903
7904         if (mapper)
7905                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7906
7907         if (count)
7908                 seq_printf(m, ":count=%ld\n", *count);
7909         else
7910                 seq_puts(m, ":unlimited\n");
7911
7912         return 0;
7913 }
7914
7915 static int
7916 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7917                      unsigned long ip, void *init_data, void **data)
7918 {
7919         struct ftrace_func_mapper *mapper = *data;
7920
7921         if (!mapper) {
7922                 mapper = allocate_ftrace_func_mapper();
7923                 if (!mapper)
7924                         return -ENOMEM;
7925                 *data = mapper;
7926         }
7927
7928         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7929 }
7930
7931 static void
7932 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7933                      unsigned long ip, void *data)
7934 {
7935         struct ftrace_func_mapper *mapper = data;
7936
7937         if (!ip) {
7938                 if (!mapper)
7939                         return;
7940                 free_ftrace_func_mapper(mapper, NULL);
7941                 return;
7942         }
7943
7944         ftrace_func_mapper_remove_ip(mapper, ip);
7945 }
7946
7947 static struct ftrace_probe_ops snapshot_probe_ops = {
7948         .func                   = ftrace_snapshot,
7949         .print                  = ftrace_snapshot_print,
7950 };
7951
7952 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7953         .func                   = ftrace_count_snapshot,
7954         .print                  = ftrace_snapshot_print,
7955         .init                   = ftrace_snapshot_init,
7956         .free                   = ftrace_snapshot_free,
7957 };
7958
7959 static int
7960 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7961                                char *glob, char *cmd, char *param, int enable)
7962 {
7963         struct ftrace_probe_ops *ops;
7964         void *count = (void *)-1;
7965         char *number;
7966         int ret;
7967
7968         if (!tr)
7969                 return -ENODEV;
7970
7971         /* hash funcs only work with set_ftrace_filter */
7972         if (!enable)
7973                 return -EINVAL;
7974
7975         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7976
7977         if (glob[0] == '!')
7978                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7979
7980         if (!param)
7981                 goto out_reg;
7982
7983         number = strsep(&param, ":");
7984
7985         if (!strlen(number))
7986                 goto out_reg;
7987
7988         /*
7989          * We use the callback data field (which is a pointer)
7990          * as our counter.
7991          */
7992         ret = kstrtoul(number, 0, (unsigned long *)&count);
7993         if (ret)
7994                 return ret;
7995
7996  out_reg:
7997         ret = tracing_alloc_snapshot_instance(tr);
7998         if (ret < 0)
7999                 goto out;
8000
8001         ret = register_ftrace_function_probe(glob, tr, ops, count);
8002
8003  out:
8004         return ret < 0 ? ret : 0;
8005 }
8006
8007 static struct ftrace_func_command ftrace_snapshot_cmd = {
8008         .name                   = "snapshot",
8009         .func                   = ftrace_trace_snapshot_callback,
8010 };
8011
8012 static __init int register_snapshot_cmd(void)
8013 {
8014         return register_ftrace_command(&ftrace_snapshot_cmd);
8015 }
8016 #else
8017 static inline __init int register_snapshot_cmd(void) { return 0; }
8018 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8019
8020 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8021 {
8022         if (WARN_ON(!tr->dir))
8023                 return ERR_PTR(-ENODEV);
8024
8025         /* Top directory uses NULL as the parent */
8026         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8027                 return NULL;
8028
8029         /* All sub buffers have a descriptor */
8030         return tr->dir;
8031 }
8032
8033 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8034 {
8035         struct dentry *d_tracer;
8036
8037         if (tr->percpu_dir)
8038                 return tr->percpu_dir;
8039
8040         d_tracer = tracing_get_dentry(tr);
8041         if (IS_ERR(d_tracer))
8042                 return NULL;
8043
8044         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8045
8046         MEM_FAIL(!tr->percpu_dir,
8047                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8048
8049         return tr->percpu_dir;
8050 }
8051
8052 static struct dentry *
8053 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8054                       void *data, long cpu, const struct file_operations *fops)
8055 {
8056         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8057
8058         if (ret) /* See tracing_get_cpu() */
8059                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8060         return ret;
8061 }
8062
8063 static void
8064 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8065 {
8066         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8067         struct dentry *d_cpu;
8068         char cpu_dir[30]; /* 30 characters should be more than enough */
8069
8070         if (!d_percpu)
8071                 return;
8072
8073         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8074         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8075         if (!d_cpu) {
8076                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8077                 return;
8078         }
8079
8080         /* per cpu trace_pipe */
8081         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8082                                 tr, cpu, &tracing_pipe_fops);
8083
8084         /* per cpu trace */
8085         trace_create_cpu_file("trace", 0644, d_cpu,
8086                                 tr, cpu, &tracing_fops);
8087
8088         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8089                                 tr, cpu, &tracing_buffers_fops);
8090
8091         trace_create_cpu_file("stats", 0444, d_cpu,
8092                                 tr, cpu, &tracing_stats_fops);
8093
8094         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8095                                 tr, cpu, &tracing_entries_fops);
8096
8097 #ifdef CONFIG_TRACER_SNAPSHOT
8098         trace_create_cpu_file("snapshot", 0644, d_cpu,
8099                                 tr, cpu, &snapshot_fops);
8100
8101         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8102                                 tr, cpu, &snapshot_raw_fops);
8103 #endif
8104 }
8105
8106 #ifdef CONFIG_FTRACE_SELFTEST
8107 /* Let selftest have access to static functions in this file */
8108 #include "trace_selftest.c"
8109 #endif
8110
8111 static ssize_t
8112 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8113                         loff_t *ppos)
8114 {
8115         struct trace_option_dentry *topt = filp->private_data;
8116         char *buf;
8117
8118         if (topt->flags->val & topt->opt->bit)
8119                 buf = "1\n";
8120         else
8121                 buf = "0\n";
8122
8123         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8124 }
8125
8126 static ssize_t
8127 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8128                          loff_t *ppos)
8129 {
8130         struct trace_option_dentry *topt = filp->private_data;
8131         unsigned long val;
8132         int ret;
8133
8134         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8135         if (ret)
8136                 return ret;
8137
8138         if (val != 0 && val != 1)
8139                 return -EINVAL;
8140
8141         if (!!(topt->flags->val & topt->opt->bit) != val) {
8142                 mutex_lock(&trace_types_lock);
8143                 ret = __set_tracer_option(topt->tr, topt->flags,
8144                                           topt->opt, !val);
8145                 mutex_unlock(&trace_types_lock);
8146                 if (ret)
8147                         return ret;
8148         }
8149
8150         *ppos += cnt;
8151
8152         return cnt;
8153 }
8154
8155
8156 static const struct file_operations trace_options_fops = {
8157         .open = tracing_open_generic,
8158         .read = trace_options_read,
8159         .write = trace_options_write,
8160         .llseek = generic_file_llseek,
8161 };
8162
8163 /*
8164  * In order to pass in both the trace_array descriptor as well as the index
8165  * to the flag that the trace option file represents, the trace_array
8166  * has a character array of trace_flags_index[], which holds the index
8167  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8168  * The address of this character array is passed to the flag option file
8169  * read/write callbacks.
8170  *
8171  * In order to extract both the index and the trace_array descriptor,
8172  * get_tr_index() uses the following algorithm.
8173  *
8174  *   idx = *ptr;
8175  *
8176  * As the pointer itself contains the address of the index (remember
8177  * index[1] == 1).
8178  *
8179  * Then to get the trace_array descriptor, by subtracting that index
8180  * from the ptr, we get to the start of the index itself.
8181  *
8182  *   ptr - idx == &index[0]
8183  *
8184  * Then a simple container_of() from that pointer gets us to the
8185  * trace_array descriptor.
8186  */
8187 static void get_tr_index(void *data, struct trace_array **ptr,
8188                          unsigned int *pindex)
8189 {
8190         *pindex = *(unsigned char *)data;
8191
8192         *ptr = container_of(data - *pindex, struct trace_array,
8193                             trace_flags_index);
8194 }
8195
8196 static ssize_t
8197 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8198                         loff_t *ppos)
8199 {
8200         void *tr_index = filp->private_data;
8201         struct trace_array *tr;
8202         unsigned int index;
8203         char *buf;
8204
8205         get_tr_index(tr_index, &tr, &index);
8206
8207         if (tr->trace_flags & (1 << index))
8208                 buf = "1\n";
8209         else
8210                 buf = "0\n";
8211
8212         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8213 }
8214
8215 static ssize_t
8216 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8217                          loff_t *ppos)
8218 {
8219         void *tr_index = filp->private_data;
8220         struct trace_array *tr;
8221         unsigned int index;
8222         unsigned long val;
8223         int ret;
8224
8225         get_tr_index(tr_index, &tr, &index);
8226
8227         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8228         if (ret)
8229                 return ret;
8230
8231         if (val != 0 && val != 1)
8232                 return -EINVAL;
8233
8234         mutex_lock(&event_mutex);
8235         mutex_lock(&trace_types_lock);
8236         ret = set_tracer_flag(tr, 1 << index, val);
8237         mutex_unlock(&trace_types_lock);
8238         mutex_unlock(&event_mutex);
8239
8240         if (ret < 0)
8241                 return ret;
8242
8243         *ppos += cnt;
8244
8245         return cnt;
8246 }
8247
8248 static const struct file_operations trace_options_core_fops = {
8249         .open = tracing_open_generic,
8250         .read = trace_options_core_read,
8251         .write = trace_options_core_write,
8252         .llseek = generic_file_llseek,
8253 };
8254
8255 struct dentry *trace_create_file(const char *name,
8256                                  umode_t mode,
8257                                  struct dentry *parent,
8258                                  void *data,
8259                                  const struct file_operations *fops)
8260 {
8261         struct dentry *ret;
8262
8263         ret = tracefs_create_file(name, mode, parent, data, fops);
8264         if (!ret)
8265                 pr_warn("Could not create tracefs '%s' entry\n", name);
8266
8267         return ret;
8268 }
8269
8270
8271 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8272 {
8273         struct dentry *d_tracer;
8274
8275         if (tr->options)
8276                 return tr->options;
8277
8278         d_tracer = tracing_get_dentry(tr);
8279         if (IS_ERR(d_tracer))
8280                 return NULL;
8281
8282         tr->options = tracefs_create_dir("options", d_tracer);
8283         if (!tr->options) {
8284                 pr_warn("Could not create tracefs directory 'options'\n");
8285                 return NULL;
8286         }
8287
8288         return tr->options;
8289 }
8290
8291 static void
8292 create_trace_option_file(struct trace_array *tr,
8293                          struct trace_option_dentry *topt,
8294                          struct tracer_flags *flags,
8295                          struct tracer_opt *opt)
8296 {
8297         struct dentry *t_options;
8298
8299         t_options = trace_options_init_dentry(tr);
8300         if (!t_options)
8301                 return;
8302
8303         topt->flags = flags;
8304         topt->opt = opt;
8305         topt->tr = tr;
8306
8307         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8308                                     &trace_options_fops);
8309
8310 }
8311
8312 static void
8313 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8314 {
8315         struct trace_option_dentry *topts;
8316         struct trace_options *tr_topts;
8317         struct tracer_flags *flags;
8318         struct tracer_opt *opts;
8319         int cnt;
8320         int i;
8321
8322         if (!tracer)
8323                 return;
8324
8325         flags = tracer->flags;
8326
8327         if (!flags || !flags->opts)
8328                 return;
8329
8330         /*
8331          * If this is an instance, only create flags for tracers
8332          * the instance may have.
8333          */
8334         if (!trace_ok_for_array(tracer, tr))
8335                 return;
8336
8337         for (i = 0; i < tr->nr_topts; i++) {
8338                 /* Make sure there's no duplicate flags. */
8339                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8340                         return;
8341         }
8342
8343         opts = flags->opts;
8344
8345         for (cnt = 0; opts[cnt].name; cnt++)
8346                 ;
8347
8348         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8349         if (!topts)
8350                 return;
8351
8352         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8353                             GFP_KERNEL);
8354         if (!tr_topts) {
8355                 kfree(topts);
8356                 return;
8357         }
8358
8359         tr->topts = tr_topts;
8360         tr->topts[tr->nr_topts].tracer = tracer;
8361         tr->topts[tr->nr_topts].topts = topts;
8362         tr->nr_topts++;
8363
8364         for (cnt = 0; opts[cnt].name; cnt++) {
8365                 create_trace_option_file(tr, &topts[cnt], flags,
8366                                          &opts[cnt]);
8367                 MEM_FAIL(topts[cnt].entry == NULL,
8368                           "Failed to create trace option: %s",
8369                           opts[cnt].name);
8370         }
8371 }
8372
8373 static struct dentry *
8374 create_trace_option_core_file(struct trace_array *tr,
8375                               const char *option, long index)
8376 {
8377         struct dentry *t_options;
8378
8379         t_options = trace_options_init_dentry(tr);
8380         if (!t_options)
8381                 return NULL;
8382
8383         return trace_create_file(option, 0644, t_options,
8384                                  (void *)&tr->trace_flags_index[index],
8385                                  &trace_options_core_fops);
8386 }
8387
8388 static void create_trace_options_dir(struct trace_array *tr)
8389 {
8390         struct dentry *t_options;
8391         bool top_level = tr == &global_trace;
8392         int i;
8393
8394         t_options = trace_options_init_dentry(tr);
8395         if (!t_options)
8396                 return;
8397
8398         for (i = 0; trace_options[i]; i++) {
8399                 if (top_level ||
8400                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8401                         create_trace_option_core_file(tr, trace_options[i], i);
8402         }
8403 }
8404
8405 static ssize_t
8406 rb_simple_read(struct file *filp, char __user *ubuf,
8407                size_t cnt, loff_t *ppos)
8408 {
8409         struct trace_array *tr = filp->private_data;
8410         char buf[64];
8411         int r;
8412
8413         r = tracer_tracing_is_on(tr);
8414         r = sprintf(buf, "%d\n", r);
8415
8416         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8417 }
8418
8419 static ssize_t
8420 rb_simple_write(struct file *filp, const char __user *ubuf,
8421                 size_t cnt, loff_t *ppos)
8422 {
8423         struct trace_array *tr = filp->private_data;
8424         struct trace_buffer *buffer = tr->array_buffer.buffer;
8425         unsigned long val;
8426         int ret;
8427
8428         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8429         if (ret)
8430                 return ret;
8431
8432         if (buffer) {
8433                 mutex_lock(&trace_types_lock);
8434                 if (!!val == tracer_tracing_is_on(tr)) {
8435                         val = 0; /* do nothing */
8436                 } else if (val) {
8437                         tracer_tracing_on(tr);
8438                         if (tr->current_trace->start)
8439                                 tr->current_trace->start(tr);
8440                 } else {
8441                         tracer_tracing_off(tr);
8442                         if (tr->current_trace->stop)
8443                                 tr->current_trace->stop(tr);
8444                 }
8445                 mutex_unlock(&trace_types_lock);
8446         }
8447
8448         (*ppos)++;
8449
8450         return cnt;
8451 }
8452
8453 static const struct file_operations rb_simple_fops = {
8454         .open           = tracing_open_generic_tr,
8455         .read           = rb_simple_read,
8456         .write          = rb_simple_write,
8457         .release        = tracing_release_generic_tr,
8458         .llseek         = default_llseek,
8459 };
8460
8461 static ssize_t
8462 buffer_percent_read(struct file *filp, char __user *ubuf,
8463                     size_t cnt, loff_t *ppos)
8464 {
8465         struct trace_array *tr = filp->private_data;
8466         char buf[64];
8467         int r;
8468
8469         r = tr->buffer_percent;
8470         r = sprintf(buf, "%d\n", r);
8471
8472         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8473 }
8474
8475 static ssize_t
8476 buffer_percent_write(struct file *filp, const char __user *ubuf,
8477                      size_t cnt, loff_t *ppos)
8478 {
8479         struct trace_array *tr = filp->private_data;
8480         unsigned long val;
8481         int ret;
8482
8483         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8484         if (ret)
8485                 return ret;
8486
8487         if (val > 100)
8488                 return -EINVAL;
8489
8490         if (!val)
8491                 val = 1;
8492
8493         tr->buffer_percent = val;
8494
8495         (*ppos)++;
8496
8497         return cnt;
8498 }
8499
8500 static const struct file_operations buffer_percent_fops = {
8501         .open           = tracing_open_generic_tr,
8502         .read           = buffer_percent_read,
8503         .write          = buffer_percent_write,
8504         .release        = tracing_release_generic_tr,
8505         .llseek         = default_llseek,
8506 };
8507
8508 static struct dentry *trace_instance_dir;
8509
8510 static void
8511 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8512
8513 static int
8514 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8515 {
8516         enum ring_buffer_flags rb_flags;
8517
8518         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8519
8520         buf->tr = tr;
8521
8522         buf->buffer = ring_buffer_alloc(size, rb_flags);
8523         if (!buf->buffer)
8524                 return -ENOMEM;
8525
8526         buf->data = alloc_percpu(struct trace_array_cpu);
8527         if (!buf->data) {
8528                 ring_buffer_free(buf->buffer);
8529                 buf->buffer = NULL;
8530                 return -ENOMEM;
8531         }
8532
8533         /* Allocate the first page for all buffers */
8534         set_buffer_entries(&tr->array_buffer,
8535                            ring_buffer_size(tr->array_buffer.buffer, 0));
8536
8537         return 0;
8538 }
8539
8540 static int allocate_trace_buffers(struct trace_array *tr, int size)
8541 {
8542         int ret;
8543
8544         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8545         if (ret)
8546                 return ret;
8547
8548 #ifdef CONFIG_TRACER_MAX_TRACE
8549         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8550                                     allocate_snapshot ? size : 1);
8551         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8552                 ring_buffer_free(tr->array_buffer.buffer);
8553                 tr->array_buffer.buffer = NULL;
8554                 free_percpu(tr->array_buffer.data);
8555                 tr->array_buffer.data = NULL;
8556                 return -ENOMEM;
8557         }
8558         tr->allocated_snapshot = allocate_snapshot;
8559
8560         /*
8561          * Only the top level trace array gets its snapshot allocated
8562          * from the kernel command line.
8563          */
8564         allocate_snapshot = false;
8565 #endif
8566
8567         return 0;
8568 }
8569
8570 static void free_trace_buffer(struct array_buffer *buf)
8571 {
8572         if (buf->buffer) {
8573                 ring_buffer_free(buf->buffer);
8574                 buf->buffer = NULL;
8575                 free_percpu(buf->data);
8576                 buf->data = NULL;
8577         }
8578 }
8579
8580 static void free_trace_buffers(struct trace_array *tr)
8581 {
8582         if (!tr)
8583                 return;
8584
8585         free_trace_buffer(&tr->array_buffer);
8586
8587 #ifdef CONFIG_TRACER_MAX_TRACE
8588         free_trace_buffer(&tr->max_buffer);
8589 #endif
8590 }
8591
8592 static void init_trace_flags_index(struct trace_array *tr)
8593 {
8594         int i;
8595
8596         /* Used by the trace options files */
8597         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8598                 tr->trace_flags_index[i] = i;
8599 }
8600
8601 static void __update_tracer_options(struct trace_array *tr)
8602 {
8603         struct tracer *t;
8604
8605         for (t = trace_types; t; t = t->next)
8606                 add_tracer_options(tr, t);
8607 }
8608
8609 static void update_tracer_options(struct trace_array *tr)
8610 {
8611         mutex_lock(&trace_types_lock);
8612         __update_tracer_options(tr);
8613         mutex_unlock(&trace_types_lock);
8614 }
8615
8616 /* Must have trace_types_lock held */
8617 struct trace_array *trace_array_find(const char *instance)
8618 {
8619         struct trace_array *tr, *found = NULL;
8620
8621         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8622                 if (tr->name && strcmp(tr->name, instance) == 0) {
8623                         found = tr;
8624                         break;
8625                 }
8626         }
8627
8628         return found;
8629 }
8630
8631 struct trace_array *trace_array_find_get(const char *instance)
8632 {
8633         struct trace_array *tr;
8634
8635         mutex_lock(&trace_types_lock);
8636         tr = trace_array_find(instance);
8637         if (tr)
8638                 tr->ref++;
8639         mutex_unlock(&trace_types_lock);
8640
8641         return tr;
8642 }
8643
8644 static int trace_array_create_dir(struct trace_array *tr)
8645 {
8646         int ret;
8647
8648         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8649         if (!tr->dir)
8650                 return -EINVAL;
8651
8652         ret = event_trace_add_tracer(tr->dir, tr);
8653         if (ret)
8654                 tracefs_remove(tr->dir);
8655
8656         init_tracer_tracefs(tr, tr->dir);
8657         __update_tracer_options(tr);
8658
8659         return ret;
8660 }
8661
8662 static struct trace_array *trace_array_create(const char *name)
8663 {
8664         struct trace_array *tr;
8665         int ret;
8666
8667         ret = -ENOMEM;
8668         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8669         if (!tr)
8670                 return ERR_PTR(ret);
8671
8672         tr->name = kstrdup(name, GFP_KERNEL);
8673         if (!tr->name)
8674                 goto out_free_tr;
8675
8676         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8677                 goto out_free_tr;
8678
8679         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8680
8681         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8682
8683         raw_spin_lock_init(&tr->start_lock);
8684
8685         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8686
8687         tr->current_trace = &nop_trace;
8688
8689         INIT_LIST_HEAD(&tr->systems);
8690         INIT_LIST_HEAD(&tr->events);
8691         INIT_LIST_HEAD(&tr->hist_vars);
8692         INIT_LIST_HEAD(&tr->err_log);
8693
8694         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8695                 goto out_free_tr;
8696
8697         if (ftrace_allocate_ftrace_ops(tr) < 0)
8698                 goto out_free_tr;
8699
8700         ftrace_init_trace_array(tr);
8701
8702         init_trace_flags_index(tr);
8703
8704         if (trace_instance_dir) {
8705                 ret = trace_array_create_dir(tr);
8706                 if (ret)
8707                         goto out_free_tr;
8708         } else
8709                 __trace_early_add_events(tr);
8710
8711         list_add(&tr->list, &ftrace_trace_arrays);
8712
8713         tr->ref++;
8714
8715         return tr;
8716
8717  out_free_tr:
8718         ftrace_free_ftrace_ops(tr);
8719         free_trace_buffers(tr);
8720         free_cpumask_var(tr->tracing_cpumask);
8721         kfree(tr->name);
8722         kfree(tr);
8723
8724         return ERR_PTR(ret);
8725 }
8726
8727 static int instance_mkdir(const char *name)
8728 {
8729         struct trace_array *tr;
8730         int ret;
8731
8732         mutex_lock(&event_mutex);
8733         mutex_lock(&trace_types_lock);
8734
8735         ret = -EEXIST;
8736         if (trace_array_find(name))
8737                 goto out_unlock;
8738
8739         tr = trace_array_create(name);
8740
8741         ret = PTR_ERR_OR_ZERO(tr);
8742
8743 out_unlock:
8744         mutex_unlock(&trace_types_lock);
8745         mutex_unlock(&event_mutex);
8746         return ret;
8747 }
8748
8749 /**
8750  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8751  * @name: The name of the trace array to be looked up/created.
8752  *
8753  * Returns pointer to trace array with given name.
8754  * NULL, if it cannot be created.
8755  *
8756  * NOTE: This function increments the reference counter associated with the
8757  * trace array returned. This makes sure it cannot be freed while in use.
8758  * Use trace_array_put() once the trace array is no longer needed.
8759  * If the trace_array is to be freed, trace_array_destroy() needs to
8760  * be called after the trace_array_put(), or simply let user space delete
8761  * it from the tracefs instances directory. But until the
8762  * trace_array_put() is called, user space can not delete it.
8763  *
8764  */
8765 struct trace_array *trace_array_get_by_name(const char *name)
8766 {
8767         struct trace_array *tr;
8768
8769         mutex_lock(&event_mutex);
8770         mutex_lock(&trace_types_lock);
8771
8772         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8773                 if (tr->name && strcmp(tr->name, name) == 0)
8774                         goto out_unlock;
8775         }
8776
8777         tr = trace_array_create(name);
8778
8779         if (IS_ERR(tr))
8780                 tr = NULL;
8781 out_unlock:
8782         if (tr)
8783                 tr->ref++;
8784
8785         mutex_unlock(&trace_types_lock);
8786         mutex_unlock(&event_mutex);
8787         return tr;
8788 }
8789 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8790
8791 static int __remove_instance(struct trace_array *tr)
8792 {
8793         int i;
8794
8795         /* Reference counter for a newly created trace array = 1. */
8796         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8797                 return -EBUSY;
8798
8799         list_del(&tr->list);
8800
8801         /* Disable all the flags that were enabled coming in */
8802         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8803                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8804                         set_tracer_flag(tr, 1 << i, 0);
8805         }
8806
8807         tracing_set_nop(tr);
8808         clear_ftrace_function_probes(tr);
8809         event_trace_del_tracer(tr);
8810         ftrace_clear_pids(tr);
8811         ftrace_destroy_function_files(tr);
8812         tracefs_remove(tr->dir);
8813         free_trace_buffers(tr);
8814
8815         for (i = 0; i < tr->nr_topts; i++) {
8816                 kfree(tr->topts[i].topts);
8817         }
8818         kfree(tr->topts);
8819
8820         free_cpumask_var(tr->tracing_cpumask);
8821         kfree(tr->name);
8822         kfree(tr);
8823
8824         return 0;
8825 }
8826
8827 int trace_array_destroy(struct trace_array *this_tr)
8828 {
8829         struct trace_array *tr;
8830         int ret;
8831
8832         if (!this_tr)
8833                 return -EINVAL;
8834
8835         mutex_lock(&event_mutex);
8836         mutex_lock(&trace_types_lock);
8837
8838         ret = -ENODEV;
8839
8840         /* Making sure trace array exists before destroying it. */
8841         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8842                 if (tr == this_tr) {
8843                         ret = __remove_instance(tr);
8844                         break;
8845                 }
8846         }
8847
8848         mutex_unlock(&trace_types_lock);
8849         mutex_unlock(&event_mutex);
8850
8851         return ret;
8852 }
8853 EXPORT_SYMBOL_GPL(trace_array_destroy);
8854
8855 static int instance_rmdir(const char *name)
8856 {
8857         struct trace_array *tr;
8858         int ret;
8859
8860         mutex_lock(&event_mutex);
8861         mutex_lock(&trace_types_lock);
8862
8863         ret = -ENODEV;
8864         tr = trace_array_find(name);
8865         if (tr)
8866                 ret = __remove_instance(tr);
8867
8868         mutex_unlock(&trace_types_lock);
8869         mutex_unlock(&event_mutex);
8870
8871         return ret;
8872 }
8873
8874 static __init void create_trace_instances(struct dentry *d_tracer)
8875 {
8876         struct trace_array *tr;
8877
8878         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8879                                                          instance_mkdir,
8880                                                          instance_rmdir);
8881         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8882                 return;
8883
8884         mutex_lock(&event_mutex);
8885         mutex_lock(&trace_types_lock);
8886
8887         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8888                 if (!tr->name)
8889                         continue;
8890                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
8891                              "Failed to create instance directory\n"))
8892                         break;
8893         }
8894
8895         mutex_unlock(&trace_types_lock);
8896         mutex_unlock(&event_mutex);
8897 }
8898
8899 static void
8900 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8901 {
8902         struct trace_event_file *file;
8903         int cpu;
8904
8905         trace_create_file("available_tracers", 0444, d_tracer,
8906                         tr, &show_traces_fops);
8907
8908         trace_create_file("current_tracer", 0644, d_tracer,
8909                         tr, &set_tracer_fops);
8910
8911         trace_create_file("tracing_cpumask", 0644, d_tracer,
8912                           tr, &tracing_cpumask_fops);
8913
8914         trace_create_file("trace_options", 0644, d_tracer,
8915                           tr, &tracing_iter_fops);
8916
8917         trace_create_file("trace", 0644, d_tracer,
8918                           tr, &tracing_fops);
8919
8920         trace_create_file("trace_pipe", 0444, d_tracer,
8921                           tr, &tracing_pipe_fops);
8922
8923         trace_create_file("buffer_size_kb", 0644, d_tracer,
8924                           tr, &tracing_entries_fops);
8925
8926         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8927                           tr, &tracing_total_entries_fops);
8928
8929         trace_create_file("free_buffer", 0200, d_tracer,
8930                           tr, &tracing_free_buffer_fops);
8931
8932         trace_create_file("trace_marker", 0220, d_tracer,
8933                           tr, &tracing_mark_fops);
8934
8935         file = __find_event_file(tr, "ftrace", "print");
8936         if (file && file->dir)
8937                 trace_create_file("trigger", 0644, file->dir, file,
8938                                   &event_trigger_fops);
8939         tr->trace_marker_file = file;
8940
8941         trace_create_file("trace_marker_raw", 0220, d_tracer,
8942                           tr, &tracing_mark_raw_fops);
8943
8944         trace_create_file("trace_clock", 0644, d_tracer, tr,
8945                           &trace_clock_fops);
8946
8947         trace_create_file("tracing_on", 0644, d_tracer,
8948                           tr, &rb_simple_fops);
8949
8950         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8951                           &trace_time_stamp_mode_fops);
8952
8953         tr->buffer_percent = 50;
8954
8955         trace_create_file("buffer_percent", 0444, d_tracer,
8956                         tr, &buffer_percent_fops);
8957
8958         create_trace_options_dir(tr);
8959
8960 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8961         trace_create_maxlat_file(tr, d_tracer);
8962 #endif
8963
8964         if (ftrace_create_function_files(tr, d_tracer))
8965                 MEM_FAIL(1, "Could not allocate function filter files");
8966
8967 #ifdef CONFIG_TRACER_SNAPSHOT
8968         trace_create_file("snapshot", 0644, d_tracer,
8969                           tr, &snapshot_fops);
8970 #endif
8971
8972         trace_create_file("error_log", 0644, d_tracer,
8973                           tr, &tracing_err_log_fops);
8974
8975         for_each_tracing_cpu(cpu)
8976                 tracing_init_tracefs_percpu(tr, cpu);
8977
8978         ftrace_init_tracefs(tr, d_tracer);
8979 }
8980
8981 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8982 {
8983         struct vfsmount *mnt;
8984         struct file_system_type *type;
8985
8986         /*
8987          * To maintain backward compatibility for tools that mount
8988          * debugfs to get to the tracing facility, tracefs is automatically
8989          * mounted to the debugfs/tracing directory.
8990          */
8991         type = get_fs_type("tracefs");
8992         if (!type)
8993                 return NULL;
8994         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8995         put_filesystem(type);
8996         if (IS_ERR(mnt))
8997                 return NULL;
8998         mntget(mnt);
8999
9000         return mnt;
9001 }
9002
9003 /**
9004  * tracing_init_dentry - initialize top level trace array
9005  *
9006  * This is called when creating files or directories in the tracing
9007  * directory. It is called via fs_initcall() by any of the boot up code
9008  * and expects to return the dentry of the top level tracing directory.
9009  */
9010 int tracing_init_dentry(void)
9011 {
9012         struct trace_array *tr = &global_trace;
9013
9014         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9015                 pr_warn("Tracing disabled due to lockdown\n");
9016                 return -EPERM;
9017         }
9018
9019         /* The top level trace array uses  NULL as parent */
9020         if (tr->dir)
9021                 return 0;
9022
9023         if (WARN_ON(!tracefs_initialized()))
9024                 return -ENODEV;
9025
9026         /*
9027          * As there may still be users that expect the tracing
9028          * files to exist in debugfs/tracing, we must automount
9029          * the tracefs file system there, so older tools still
9030          * work with the newer kerenl.
9031          */
9032         tr->dir = debugfs_create_automount("tracing", NULL,
9033                                            trace_automount, NULL);
9034
9035         return 0;
9036 }
9037
9038 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9039 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9040
9041 static void __init trace_eval_init(void)
9042 {
9043         int len;
9044
9045         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9046         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9047 }
9048
9049 #ifdef CONFIG_MODULES
9050 static void trace_module_add_evals(struct module *mod)
9051 {
9052         if (!mod->num_trace_evals)
9053                 return;
9054
9055         /*
9056          * Modules with bad taint do not have events created, do
9057          * not bother with enums either.
9058          */
9059         if (trace_module_has_bad_taint(mod))
9060                 return;
9061
9062         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9063 }
9064
9065 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9066 static void trace_module_remove_evals(struct module *mod)
9067 {
9068         union trace_eval_map_item *map;
9069         union trace_eval_map_item **last = &trace_eval_maps;
9070
9071         if (!mod->num_trace_evals)
9072                 return;
9073
9074         mutex_lock(&trace_eval_mutex);
9075
9076         map = trace_eval_maps;
9077
9078         while (map) {
9079                 if (map->head.mod == mod)
9080                         break;
9081                 map = trace_eval_jmp_to_tail(map);
9082                 last = &map->tail.next;
9083                 map = map->tail.next;
9084         }
9085         if (!map)
9086                 goto out;
9087
9088         *last = trace_eval_jmp_to_tail(map)->tail.next;
9089         kfree(map);
9090  out:
9091         mutex_unlock(&trace_eval_mutex);
9092 }
9093 #else
9094 static inline void trace_module_remove_evals(struct module *mod) { }
9095 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9096
9097 static int trace_module_notify(struct notifier_block *self,
9098                                unsigned long val, void *data)
9099 {
9100         struct module *mod = data;
9101
9102         switch (val) {
9103         case MODULE_STATE_COMING:
9104                 trace_module_add_evals(mod);
9105                 break;
9106         case MODULE_STATE_GOING:
9107                 trace_module_remove_evals(mod);
9108                 break;
9109         }
9110
9111         return 0;
9112 }
9113
9114 static struct notifier_block trace_module_nb = {
9115         .notifier_call = trace_module_notify,
9116         .priority = 0,
9117 };
9118 #endif /* CONFIG_MODULES */
9119
9120 static __init int tracer_init_tracefs(void)
9121 {
9122         int ret;
9123
9124         trace_access_lock_init();
9125
9126         ret = tracing_init_dentry();
9127         if (ret)
9128                 return 0;
9129
9130         event_trace_init();
9131
9132         init_tracer_tracefs(&global_trace, NULL);
9133         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9134
9135         trace_create_file("tracing_thresh", 0644, NULL,
9136                         &global_trace, &tracing_thresh_fops);
9137
9138         trace_create_file("README", 0444, NULL,
9139                         NULL, &tracing_readme_fops);
9140
9141         trace_create_file("saved_cmdlines", 0444, NULL,
9142                         NULL, &tracing_saved_cmdlines_fops);
9143
9144         trace_create_file("saved_cmdlines_size", 0644, NULL,
9145                           NULL, &tracing_saved_cmdlines_size_fops);
9146
9147         trace_create_file("saved_tgids", 0444, NULL,
9148                         NULL, &tracing_saved_tgids_fops);
9149
9150         trace_eval_init();
9151
9152         trace_create_eval_file(NULL);
9153
9154 #ifdef CONFIG_MODULES
9155         register_module_notifier(&trace_module_nb);
9156 #endif
9157
9158 #ifdef CONFIG_DYNAMIC_FTRACE
9159         trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9160                         NULL, &tracing_dyn_info_fops);
9161 #endif
9162
9163         create_trace_instances(NULL);
9164
9165         update_tracer_options(&global_trace);
9166
9167         return 0;
9168 }
9169
9170 static int trace_panic_handler(struct notifier_block *this,
9171                                unsigned long event, void *unused)
9172 {
9173         if (ftrace_dump_on_oops)
9174                 ftrace_dump(ftrace_dump_on_oops);
9175         return NOTIFY_OK;
9176 }
9177
9178 static struct notifier_block trace_panic_notifier = {
9179         .notifier_call  = trace_panic_handler,
9180         .next           = NULL,
9181         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9182 };
9183
9184 static int trace_die_handler(struct notifier_block *self,
9185                              unsigned long val,
9186                              void *data)
9187 {
9188         switch (val) {
9189         case DIE_OOPS:
9190                 if (ftrace_dump_on_oops)
9191                         ftrace_dump(ftrace_dump_on_oops);
9192                 break;
9193         default:
9194                 break;
9195         }
9196         return NOTIFY_OK;
9197 }
9198
9199 static struct notifier_block trace_die_notifier = {
9200         .notifier_call = trace_die_handler,
9201         .priority = 200
9202 };
9203
9204 /*
9205  * printk is set to max of 1024, we really don't need it that big.
9206  * Nothing should be printing 1000 characters anyway.
9207  */
9208 #define TRACE_MAX_PRINT         1000
9209
9210 /*
9211  * Define here KERN_TRACE so that we have one place to modify
9212  * it if we decide to change what log level the ftrace dump
9213  * should be at.
9214  */
9215 #define KERN_TRACE              KERN_EMERG
9216
9217 void
9218 trace_printk_seq(struct trace_seq *s)
9219 {
9220         /* Probably should print a warning here. */
9221         if (s->seq.len >= TRACE_MAX_PRINT)
9222                 s->seq.len = TRACE_MAX_PRINT;
9223
9224         /*
9225          * More paranoid code. Although the buffer size is set to
9226          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9227          * an extra layer of protection.
9228          */
9229         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9230                 s->seq.len = s->seq.size - 1;
9231
9232         /* should be zero ended, but we are paranoid. */
9233         s->buffer[s->seq.len] = 0;
9234
9235         printk(KERN_TRACE "%s", s->buffer);
9236
9237         trace_seq_init(s);
9238 }
9239
9240 void trace_init_global_iter(struct trace_iterator *iter)
9241 {
9242         iter->tr = &global_trace;
9243         iter->trace = iter->tr->current_trace;
9244         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9245         iter->array_buffer = &global_trace.array_buffer;
9246
9247         if (iter->trace && iter->trace->open)
9248                 iter->trace->open(iter);
9249
9250         /* Annotate start of buffers if we had overruns */
9251         if (ring_buffer_overruns(iter->array_buffer->buffer))
9252                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9253
9254         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9255         if (trace_clocks[iter->tr->clock_id].in_ns)
9256                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9257 }
9258
9259 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9260 {
9261         /* use static because iter can be a bit big for the stack */
9262         static struct trace_iterator iter;
9263         static atomic_t dump_running;
9264         struct trace_array *tr = &global_trace;
9265         unsigned int old_userobj;
9266         unsigned long flags;
9267         int cnt = 0, cpu;
9268
9269         /* Only allow one dump user at a time. */
9270         if (atomic_inc_return(&dump_running) != 1) {
9271                 atomic_dec(&dump_running);
9272                 return;
9273         }
9274
9275         /*
9276          * Always turn off tracing when we dump.
9277          * We don't need to show trace output of what happens
9278          * between multiple crashes.
9279          *
9280          * If the user does a sysrq-z, then they can re-enable
9281          * tracing with echo 1 > tracing_on.
9282          */
9283         tracing_off();
9284
9285         local_irq_save(flags);
9286         printk_nmi_direct_enter();
9287
9288         /* Simulate the iterator */
9289         trace_init_global_iter(&iter);
9290         /* Can not use kmalloc for iter.temp */
9291         iter.temp = static_temp_buf;
9292         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9293
9294         for_each_tracing_cpu(cpu) {
9295                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9296         }
9297
9298         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9299
9300         /* don't look at user memory in panic mode */
9301         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9302
9303         switch (oops_dump_mode) {
9304         case DUMP_ALL:
9305                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9306                 break;
9307         case DUMP_ORIG:
9308                 iter.cpu_file = raw_smp_processor_id();
9309                 break;
9310         case DUMP_NONE:
9311                 goto out_enable;
9312         default:
9313                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9314                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9315         }
9316
9317         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9318
9319         /* Did function tracer already get disabled? */
9320         if (ftrace_is_dead()) {
9321                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9322                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9323         }
9324
9325         /*
9326          * We need to stop all tracing on all CPUS to read
9327          * the next buffer. This is a bit expensive, but is
9328          * not done often. We fill all what we can read,
9329          * and then release the locks again.
9330          */
9331
9332         while (!trace_empty(&iter)) {
9333
9334                 if (!cnt)
9335                         printk(KERN_TRACE "---------------------------------\n");
9336
9337                 cnt++;
9338
9339                 trace_iterator_reset(&iter);
9340                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9341
9342                 if (trace_find_next_entry_inc(&iter) != NULL) {
9343                         int ret;
9344
9345                         ret = print_trace_line(&iter);
9346                         if (ret != TRACE_TYPE_NO_CONSUME)
9347                                 trace_consume(&iter);
9348                 }
9349                 touch_nmi_watchdog();
9350
9351                 trace_printk_seq(&iter.seq);
9352         }
9353
9354         if (!cnt)
9355                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9356         else
9357                 printk(KERN_TRACE "---------------------------------\n");
9358
9359  out_enable:
9360         tr->trace_flags |= old_userobj;
9361
9362         for_each_tracing_cpu(cpu) {
9363                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9364         }
9365         atomic_dec(&dump_running);
9366         printk_nmi_direct_exit();
9367         local_irq_restore(flags);
9368 }
9369 EXPORT_SYMBOL_GPL(ftrace_dump);
9370
9371 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9372 {
9373         char **argv;
9374         int argc, ret;
9375
9376         argc = 0;
9377         ret = 0;
9378         argv = argv_split(GFP_KERNEL, buf, &argc);
9379         if (!argv)
9380                 return -ENOMEM;
9381
9382         if (argc)
9383                 ret = createfn(argc, argv);
9384
9385         argv_free(argv);
9386
9387         return ret;
9388 }
9389
9390 #define WRITE_BUFSIZE  4096
9391
9392 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9393                                 size_t count, loff_t *ppos,
9394                                 int (*createfn)(int, char **))
9395 {
9396         char *kbuf, *buf, *tmp;
9397         int ret = 0;
9398         size_t done = 0;
9399         size_t size;
9400
9401         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9402         if (!kbuf)
9403                 return -ENOMEM;
9404
9405         while (done < count) {
9406                 size = count - done;
9407
9408                 if (size >= WRITE_BUFSIZE)
9409                         size = WRITE_BUFSIZE - 1;
9410
9411                 if (copy_from_user(kbuf, buffer + done, size)) {
9412                         ret = -EFAULT;
9413                         goto out;
9414                 }
9415                 kbuf[size] = '\0';
9416                 buf = kbuf;
9417                 do {
9418                         tmp = strchr(buf, '\n');
9419                         if (tmp) {
9420                                 *tmp = '\0';
9421                                 size = tmp - buf + 1;
9422                         } else {
9423                                 size = strlen(buf);
9424                                 if (done + size < count) {
9425                                         if (buf != kbuf)
9426                                                 break;
9427                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9428                                         pr_warn("Line length is too long: Should be less than %d\n",
9429                                                 WRITE_BUFSIZE - 2);
9430                                         ret = -EINVAL;
9431                                         goto out;
9432                                 }
9433                         }
9434                         done += size;
9435
9436                         /* Remove comments */
9437                         tmp = strchr(buf, '#');
9438
9439                         if (tmp)
9440                                 *tmp = '\0';
9441
9442                         ret = trace_run_command(buf, createfn);
9443                         if (ret)
9444                                 goto out;
9445                         buf += size;
9446
9447                 } while (done < count);
9448         }
9449         ret = done;
9450
9451 out:
9452         kfree(kbuf);
9453
9454         return ret;
9455 }
9456
9457 __init static int tracer_alloc_buffers(void)
9458 {
9459         int ring_buf_size;
9460         int ret = -ENOMEM;
9461
9462
9463         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9464                 pr_warn("Tracing disabled due to lockdown\n");
9465                 return -EPERM;
9466         }
9467
9468         /*
9469          * Make sure we don't accidently add more trace options
9470          * than we have bits for.
9471          */
9472         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9473
9474         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9475                 goto out;
9476
9477         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9478                 goto out_free_buffer_mask;
9479
9480         /* Only allocate trace_printk buffers if a trace_printk exists */
9481         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9482                 /* Must be called before global_trace.buffer is allocated */
9483                 trace_printk_init_buffers();
9484
9485         /* To save memory, keep the ring buffer size to its minimum */
9486         if (ring_buffer_expanded)
9487                 ring_buf_size = trace_buf_size;
9488         else
9489                 ring_buf_size = 1;
9490
9491         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9492         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9493
9494         raw_spin_lock_init(&global_trace.start_lock);
9495
9496         /*
9497          * The prepare callbacks allocates some memory for the ring buffer. We
9498          * don't free the buffer if the if the CPU goes down. If we were to free
9499          * the buffer, then the user would lose any trace that was in the
9500          * buffer. The memory will be removed once the "instance" is removed.
9501          */
9502         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9503                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9504                                       NULL);
9505         if (ret < 0)
9506                 goto out_free_cpumask;
9507         /* Used for event triggers */
9508         ret = -ENOMEM;
9509         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9510         if (!temp_buffer)
9511                 goto out_rm_hp_state;
9512
9513         if (trace_create_savedcmd() < 0)
9514                 goto out_free_temp_buffer;
9515
9516         /* TODO: make the number of buffers hot pluggable with CPUS */
9517         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9518                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9519                 goto out_free_savedcmd;
9520         }
9521
9522         if (global_trace.buffer_disabled)
9523                 tracing_off();
9524
9525         if (trace_boot_clock) {
9526                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9527                 if (ret < 0)
9528                         pr_warn("Trace clock %s not defined, going back to default\n",
9529                                 trace_boot_clock);
9530         }
9531
9532         /*
9533          * register_tracer() might reference current_trace, so it
9534          * needs to be set before we register anything. This is
9535          * just a bootstrap of current_trace anyway.
9536          */
9537         global_trace.current_trace = &nop_trace;
9538
9539         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9540
9541         ftrace_init_global_array_ops(&global_trace);
9542
9543         init_trace_flags_index(&global_trace);
9544
9545         register_tracer(&nop_trace);
9546
9547         /* Function tracing may start here (via kernel command line) */
9548         init_function_trace();
9549
9550         /* All seems OK, enable tracing */
9551         tracing_disabled = 0;
9552
9553         atomic_notifier_chain_register(&panic_notifier_list,
9554                                        &trace_panic_notifier);
9555
9556         register_die_notifier(&trace_die_notifier);
9557
9558         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9559
9560         INIT_LIST_HEAD(&global_trace.systems);
9561         INIT_LIST_HEAD(&global_trace.events);
9562         INIT_LIST_HEAD(&global_trace.hist_vars);
9563         INIT_LIST_HEAD(&global_trace.err_log);
9564         list_add(&global_trace.list, &ftrace_trace_arrays);
9565
9566         apply_trace_boot_options();
9567
9568         register_snapshot_cmd();
9569
9570         return 0;
9571
9572 out_free_savedcmd:
9573         free_saved_cmdlines_buffer(savedcmd);
9574 out_free_temp_buffer:
9575         ring_buffer_free(temp_buffer);
9576 out_rm_hp_state:
9577         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9578 out_free_cpumask:
9579         free_cpumask_var(global_trace.tracing_cpumask);
9580 out_free_buffer_mask:
9581         free_cpumask_var(tracing_buffer_mask);
9582 out:
9583         return ret;
9584 }
9585
9586 void __init early_trace_init(void)
9587 {
9588         if (tracepoint_printk) {
9589                 tracepoint_print_iter =
9590                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9591                 if (MEM_FAIL(!tracepoint_print_iter,
9592                              "Failed to allocate trace iterator\n"))
9593                         tracepoint_printk = 0;
9594                 else
9595                         static_key_enable(&tracepoint_printk_key.key);
9596         }
9597         tracer_alloc_buffers();
9598 }
9599
9600 void __init trace_init(void)
9601 {
9602         trace_event_init();
9603 }
9604
9605 __init static int clear_boot_tracer(void)
9606 {
9607         /*
9608          * The default tracer at boot buffer is an init section.
9609          * This function is called in lateinit. If we did not
9610          * find the boot tracer, then clear it out, to prevent
9611          * later registration from accessing the buffer that is
9612          * about to be freed.
9613          */
9614         if (!default_bootup_tracer)
9615                 return 0;
9616
9617         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9618                default_bootup_tracer);
9619         default_bootup_tracer = NULL;
9620
9621         return 0;
9622 }
9623
9624 fs_initcall(tracer_init_tracefs);
9625 late_initcall_sync(clear_boot_tracer);
9626
9627 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9628 __init static int tracing_set_default_clock(void)
9629 {
9630         /* sched_clock_stable() is determined in late_initcall */
9631         if (!trace_boot_clock && !sched_clock_stable()) {
9632                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9633                         pr_warn("Can not set tracing clock due to lockdown\n");
9634                         return -EPERM;
9635                 }
9636
9637                 printk(KERN_WARNING
9638                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9639                        "If you want to keep using the local clock, then add:\n"
9640                        "  \"trace_clock=local\"\n"
9641                        "on the kernel command line\n");
9642                 tracing_set_clock(&global_trace, "global");
9643         }
9644
9645         return 0;
9646 }
9647 late_initcall_sync(tracing_set_default_clock);
9648 #endif