fs: prevent page refcount overflow in pipe_buf_get
[platform/kernel/linux-rpi.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/seq_file.h>
21 #include <linux/notifier.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/trace.h>
45 #include <linux/sched/clock.h>
46 #include <linux/sched/rt.h>
47
48 #include "trace.h"
49 #include "trace_output.h"
50
51 /*
52  * On boot up, the ring buffer is set to the minimum size, so that
53  * we do not waste memory on systems that are not using tracing.
54  */
55 bool ring_buffer_expanded;
56
57 /*
58  * We need to change this state when a selftest is running.
59  * A selftest will lurk into the ring-buffer to count the
60  * entries inserted during the selftest although some concurrent
61  * insertions into the ring-buffer such as trace_printk could occurred
62  * at the same time, giving false positive or negative results.
63  */
64 static bool __read_mostly tracing_selftest_running;
65
66 /*
67  * If a tracer is running, we do not want to run SELFTEST.
68  */
69 bool __read_mostly tracing_selftest_disabled;
70
71 /* Pipe tracepoints to printk */
72 struct trace_iterator *tracepoint_print_iter;
73 int tracepoint_printk;
74 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75
76 /* For tracers that don't implement custom flags */
77 static struct tracer_opt dummy_tracer_opt[] = {
78         { }
79 };
80
81 static int
82 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83 {
84         return 0;
85 }
86
87 /*
88  * To prevent the comm cache from being overwritten when no
89  * tracing is active, only save the comm when a trace event
90  * occurred.
91  */
92 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93
94 /*
95  * Kill all tracing for good (never come back).
96  * It is initialized to 1 but will turn to zero if the initialization
97  * of the tracer is successful. But that is the only place that sets
98  * this back to zero.
99  */
100 static int tracing_disabled = 1;
101
102 cpumask_var_t __read_mostly     tracing_buffer_mask;
103
104 /*
105  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106  *
107  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108  * is set, then ftrace_dump is called. This will output the contents
109  * of the ftrace buffers to the console.  This is very useful for
110  * capturing traces that lead to crashes and outputing it to a
111  * serial console.
112  *
113  * It is default off, but you can enable it with either specifying
114  * "ftrace_dump_on_oops" in the kernel command line, or setting
115  * /proc/sys/kernel/ftrace_dump_on_oops
116  * Set 1 if you want to dump buffers of all CPUs
117  * Set 2 if you want to dump the buffer of the CPU that triggered oops
118  */
119
120 enum ftrace_dump_mode ftrace_dump_on_oops;
121
122 /* When set, tracing will stop when a WARN*() is hit */
123 int __disable_trace_on_warning;
124
125 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
126 /* Map of enums to their values, for "eval_map" file */
127 struct trace_eval_map_head {
128         struct module                   *mod;
129         unsigned long                   length;
130 };
131
132 union trace_eval_map_item;
133
134 struct trace_eval_map_tail {
135         /*
136          * "end" is first and points to NULL as it must be different
137          * than "mod" or "eval_string"
138          */
139         union trace_eval_map_item       *next;
140         const char                      *end;   /* points to NULL */
141 };
142
143 static DEFINE_MUTEX(trace_eval_mutex);
144
145 /*
146  * The trace_eval_maps are saved in an array with two extra elements,
147  * one at the beginning, and one at the end. The beginning item contains
148  * the count of the saved maps (head.length), and the module they
149  * belong to if not built in (head.mod). The ending item contains a
150  * pointer to the next array of saved eval_map items.
151  */
152 union trace_eval_map_item {
153         struct trace_eval_map           map;
154         struct trace_eval_map_head      head;
155         struct trace_eval_map_tail      tail;
156 };
157
158 static union trace_eval_map_item *trace_eval_maps;
159 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160
161 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162
163 #define MAX_TRACER_SIZE         100
164 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
165 static char *default_bootup_tracer;
166
167 static bool allocate_snapshot;
168
169 static int __init set_cmdline_ftrace(char *str)
170 {
171         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
172         default_bootup_tracer = bootup_tracer_buf;
173         /* We are using ftrace early, expand it */
174         ring_buffer_expanded = true;
175         return 1;
176 }
177 __setup("ftrace=", set_cmdline_ftrace);
178
179 static int __init set_ftrace_dump_on_oops(char *str)
180 {
181         if (*str++ != '=' || !*str) {
182                 ftrace_dump_on_oops = DUMP_ALL;
183                 return 1;
184         }
185
186         if (!strcmp("orig_cpu", str)) {
187                 ftrace_dump_on_oops = DUMP_ORIG;
188                 return 1;
189         }
190
191         return 0;
192 }
193 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
194
195 static int __init stop_trace_on_warning(char *str)
196 {
197         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
198                 __disable_trace_on_warning = 1;
199         return 1;
200 }
201 __setup("traceoff_on_warning", stop_trace_on_warning);
202
203 static int __init boot_alloc_snapshot(char *str)
204 {
205         allocate_snapshot = true;
206         /* We also need the main ring buffer expanded */
207         ring_buffer_expanded = true;
208         return 1;
209 }
210 __setup("alloc_snapshot", boot_alloc_snapshot);
211
212
213 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
214
215 static int __init set_trace_boot_options(char *str)
216 {
217         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218         return 0;
219 }
220 __setup("trace_options=", set_trace_boot_options);
221
222 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
223 static char *trace_boot_clock __initdata;
224
225 static int __init set_trace_boot_clock(char *str)
226 {
227         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228         trace_boot_clock = trace_boot_clock_buf;
229         return 0;
230 }
231 __setup("trace_clock=", set_trace_boot_clock);
232
233 static int __init set_tracepoint_printk(char *str)
234 {
235         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236                 tracepoint_printk = 1;
237         return 1;
238 }
239 __setup("tp_printk", set_tracepoint_printk);
240
241 unsigned long long ns2usecs(u64 nsec)
242 {
243         nsec += 500;
244         do_div(nsec, 1000);
245         return nsec;
246 }
247
248 /* trace_flags holds trace_options default values */
249 #define TRACE_DEFAULT_FLAGS                                             \
250         (FUNCTION_DEFAULT_FLAGS |                                       \
251          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
252          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
253          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
254          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255
256 /* trace_options that are only supported by global_trace */
257 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
258                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259
260 /* trace_flags that are default zero for instances */
261 #define ZEROED_TRACE_FLAGS \
262         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
263
264 /*
265  * The global_trace is the descriptor that holds the top-level tracing
266  * buffers for the live tracing.
267  */
268 static struct trace_array global_trace = {
269         .trace_flags = TRACE_DEFAULT_FLAGS,
270 };
271
272 LIST_HEAD(ftrace_trace_arrays);
273
274 int trace_array_get(struct trace_array *this_tr)
275 {
276         struct trace_array *tr;
277         int ret = -ENODEV;
278
279         mutex_lock(&trace_types_lock);
280         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
281                 if (tr == this_tr) {
282                         tr->ref++;
283                         ret = 0;
284                         break;
285                 }
286         }
287         mutex_unlock(&trace_types_lock);
288
289         return ret;
290 }
291
292 static void __trace_array_put(struct trace_array *this_tr)
293 {
294         WARN_ON(!this_tr->ref);
295         this_tr->ref--;
296 }
297
298 void trace_array_put(struct trace_array *this_tr)
299 {
300         mutex_lock(&trace_types_lock);
301         __trace_array_put(this_tr);
302         mutex_unlock(&trace_types_lock);
303 }
304
305 int call_filter_check_discard(struct trace_event_call *call, void *rec,
306                               struct ring_buffer *buffer,
307                               struct ring_buffer_event *event)
308 {
309         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
310             !filter_match_preds(call->filter, rec)) {
311                 __trace_event_discard_commit(buffer, event);
312                 return 1;
313         }
314
315         return 0;
316 }
317
318 void trace_free_pid_list(struct trace_pid_list *pid_list)
319 {
320         vfree(pid_list->pids);
321         kfree(pid_list);
322 }
323
324 /**
325  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
326  * @filtered_pids: The list of pids to check
327  * @search_pid: The PID to find in @filtered_pids
328  *
329  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
330  */
331 bool
332 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
333 {
334         /*
335          * If pid_max changed after filtered_pids was created, we
336          * by default ignore all pids greater than the previous pid_max.
337          */
338         if (search_pid >= filtered_pids->pid_max)
339                 return false;
340
341         return test_bit(search_pid, filtered_pids->pids);
342 }
343
344 /**
345  * trace_ignore_this_task - should a task be ignored for tracing
346  * @filtered_pids: The list of pids to check
347  * @task: The task that should be ignored if not filtered
348  *
349  * Checks if @task should be traced or not from @filtered_pids.
350  * Returns true if @task should *NOT* be traced.
351  * Returns false if @task should be traced.
352  */
353 bool
354 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
355 {
356         /*
357          * Return false, because if filtered_pids does not exist,
358          * all pids are good to trace.
359          */
360         if (!filtered_pids)
361                 return false;
362
363         return !trace_find_filtered_pid(filtered_pids, task->pid);
364 }
365
366 /**
367  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
368  * @pid_list: The list to modify
369  * @self: The current task for fork or NULL for exit
370  * @task: The task to add or remove
371  *
372  * If adding a task, if @self is defined, the task is only added if @self
373  * is also included in @pid_list. This happens on fork and tasks should
374  * only be added when the parent is listed. If @self is NULL, then the
375  * @task pid will be removed from the list, which would happen on exit
376  * of a task.
377  */
378 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
379                                   struct task_struct *self,
380                                   struct task_struct *task)
381 {
382         if (!pid_list)
383                 return;
384
385         /* For forks, we only add if the forking task is listed */
386         if (self) {
387                 if (!trace_find_filtered_pid(pid_list, self->pid))
388                         return;
389         }
390
391         /* Sorry, but we don't support pid_max changing after setting */
392         if (task->pid >= pid_list->pid_max)
393                 return;
394
395         /* "self" is set for forks, and NULL for exits */
396         if (self)
397                 set_bit(task->pid, pid_list->pids);
398         else
399                 clear_bit(task->pid, pid_list->pids);
400 }
401
402 /**
403  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
404  * @pid_list: The pid list to show
405  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
406  * @pos: The position of the file
407  *
408  * This is used by the seq_file "next" operation to iterate the pids
409  * listed in a trace_pid_list structure.
410  *
411  * Returns the pid+1 as we want to display pid of zero, but NULL would
412  * stop the iteration.
413  */
414 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
415 {
416         unsigned long pid = (unsigned long)v;
417
418         (*pos)++;
419
420         /* pid already is +1 of the actual prevous bit */
421         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
422
423         /* Return pid + 1 to allow zero to be represented */
424         if (pid < pid_list->pid_max)
425                 return (void *)(pid + 1);
426
427         return NULL;
428 }
429
430 /**
431  * trace_pid_start - Used for seq_file to start reading pid lists
432  * @pid_list: The pid list to show
433  * @pos: The position of the file
434  *
435  * This is used by seq_file "start" operation to start the iteration
436  * of listing pids.
437  *
438  * Returns the pid+1 as we want to display pid of zero, but NULL would
439  * stop the iteration.
440  */
441 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
442 {
443         unsigned long pid;
444         loff_t l = 0;
445
446         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
447         if (pid >= pid_list->pid_max)
448                 return NULL;
449
450         /* Return pid + 1 so that zero can be the exit value */
451         for (pid++; pid && l < *pos;
452              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
453                 ;
454         return (void *)pid;
455 }
456
457 /**
458  * trace_pid_show - show the current pid in seq_file processing
459  * @m: The seq_file structure to write into
460  * @v: A void pointer of the pid (+1) value to display
461  *
462  * Can be directly used by seq_file operations to display the current
463  * pid value.
464  */
465 int trace_pid_show(struct seq_file *m, void *v)
466 {
467         unsigned long pid = (unsigned long)v - 1;
468
469         seq_printf(m, "%lu\n", pid);
470         return 0;
471 }
472
473 /* 128 should be much more than enough */
474 #define PID_BUF_SIZE            127
475
476 int trace_pid_write(struct trace_pid_list *filtered_pids,
477                     struct trace_pid_list **new_pid_list,
478                     const char __user *ubuf, size_t cnt)
479 {
480         struct trace_pid_list *pid_list;
481         struct trace_parser parser;
482         unsigned long val;
483         int nr_pids = 0;
484         ssize_t read = 0;
485         ssize_t ret = 0;
486         loff_t pos;
487         pid_t pid;
488
489         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
490                 return -ENOMEM;
491
492         /*
493          * Always recreate a new array. The write is an all or nothing
494          * operation. Always create a new array when adding new pids by
495          * the user. If the operation fails, then the current list is
496          * not modified.
497          */
498         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
499         if (!pid_list) {
500                 trace_parser_put(&parser);
501                 return -ENOMEM;
502         }
503
504         pid_list->pid_max = READ_ONCE(pid_max);
505
506         /* Only truncating will shrink pid_max */
507         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
508                 pid_list->pid_max = filtered_pids->pid_max;
509
510         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
511         if (!pid_list->pids) {
512                 trace_parser_put(&parser);
513                 kfree(pid_list);
514                 return -ENOMEM;
515         }
516
517         if (filtered_pids) {
518                 /* copy the current bits to the new max */
519                 for_each_set_bit(pid, filtered_pids->pids,
520                                  filtered_pids->pid_max) {
521                         set_bit(pid, pid_list->pids);
522                         nr_pids++;
523                 }
524         }
525
526         while (cnt > 0) {
527
528                 pos = 0;
529
530                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
531                 if (ret < 0 || !trace_parser_loaded(&parser))
532                         break;
533
534                 read += ret;
535                 ubuf += ret;
536                 cnt -= ret;
537
538                 ret = -EINVAL;
539                 if (kstrtoul(parser.buffer, 0, &val))
540                         break;
541                 if (val >= pid_list->pid_max)
542                         break;
543
544                 pid = (pid_t)val;
545
546                 set_bit(pid, pid_list->pids);
547                 nr_pids++;
548
549                 trace_parser_clear(&parser);
550                 ret = 0;
551         }
552         trace_parser_put(&parser);
553
554         if (ret < 0) {
555                 trace_free_pid_list(pid_list);
556                 return ret;
557         }
558
559         if (!nr_pids) {
560                 /* Cleared the list of pids */
561                 trace_free_pid_list(pid_list);
562                 read = ret;
563                 pid_list = NULL;
564         }
565
566         *new_pid_list = pid_list;
567
568         return read;
569 }
570
571 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
572 {
573         u64 ts;
574
575         /* Early boot up does not have a buffer yet */
576         if (!buf->buffer)
577                 return trace_clock_local();
578
579         ts = ring_buffer_time_stamp(buf->buffer, cpu);
580         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
581
582         return ts;
583 }
584
585 u64 ftrace_now(int cpu)
586 {
587         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
588 }
589
590 /**
591  * tracing_is_enabled - Show if global_trace has been disabled
592  *
593  * Shows if the global trace has been enabled or not. It uses the
594  * mirror flag "buffer_disabled" to be used in fast paths such as for
595  * the irqsoff tracer. But it may be inaccurate due to races. If you
596  * need to know the accurate state, use tracing_is_on() which is a little
597  * slower, but accurate.
598  */
599 int tracing_is_enabled(void)
600 {
601         /*
602          * For quick access (irqsoff uses this in fast path), just
603          * return the mirror variable of the state of the ring buffer.
604          * It's a little racy, but we don't really care.
605          */
606         smp_rmb();
607         return !global_trace.buffer_disabled;
608 }
609
610 /*
611  * trace_buf_size is the size in bytes that is allocated
612  * for a buffer. Note, the number of bytes is always rounded
613  * to page size.
614  *
615  * This number is purposely set to a low number of 16384.
616  * If the dump on oops happens, it will be much appreciated
617  * to not have to wait for all that output. Anyway this can be
618  * boot time and run time configurable.
619  */
620 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
621
622 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
623
624 /* trace_types holds a link list of available tracers. */
625 static struct tracer            *trace_types __read_mostly;
626
627 /*
628  * trace_types_lock is used to protect the trace_types list.
629  */
630 DEFINE_MUTEX(trace_types_lock);
631
632 /*
633  * serialize the access of the ring buffer
634  *
635  * ring buffer serializes readers, but it is low level protection.
636  * The validity of the events (which returns by ring_buffer_peek() ..etc)
637  * are not protected by ring buffer.
638  *
639  * The content of events may become garbage if we allow other process consumes
640  * these events concurrently:
641  *   A) the page of the consumed events may become a normal page
642  *      (not reader page) in ring buffer, and this page will be rewrited
643  *      by events producer.
644  *   B) The page of the consumed events may become a page for splice_read,
645  *      and this page will be returned to system.
646  *
647  * These primitives allow multi process access to different cpu ring buffer
648  * concurrently.
649  *
650  * These primitives don't distinguish read-only and read-consume access.
651  * Multi read-only access are also serialized.
652  */
653
654 #ifdef CONFIG_SMP
655 static DECLARE_RWSEM(all_cpu_access_lock);
656 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
657
658 static inline void trace_access_lock(int cpu)
659 {
660         if (cpu == RING_BUFFER_ALL_CPUS) {
661                 /* gain it for accessing the whole ring buffer. */
662                 down_write(&all_cpu_access_lock);
663         } else {
664                 /* gain it for accessing a cpu ring buffer. */
665
666                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
667                 down_read(&all_cpu_access_lock);
668
669                 /* Secondly block other access to this @cpu ring buffer. */
670                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
671         }
672 }
673
674 static inline void trace_access_unlock(int cpu)
675 {
676         if (cpu == RING_BUFFER_ALL_CPUS) {
677                 up_write(&all_cpu_access_lock);
678         } else {
679                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
680                 up_read(&all_cpu_access_lock);
681         }
682 }
683
684 static inline void trace_access_lock_init(void)
685 {
686         int cpu;
687
688         for_each_possible_cpu(cpu)
689                 mutex_init(&per_cpu(cpu_access_lock, cpu));
690 }
691
692 #else
693
694 static DEFINE_MUTEX(access_lock);
695
696 static inline void trace_access_lock(int cpu)
697 {
698         (void)cpu;
699         mutex_lock(&access_lock);
700 }
701
702 static inline void trace_access_unlock(int cpu)
703 {
704         (void)cpu;
705         mutex_unlock(&access_lock);
706 }
707
708 static inline void trace_access_lock_init(void)
709 {
710 }
711
712 #endif
713
714 #ifdef CONFIG_STACKTRACE
715 static void __ftrace_trace_stack(struct ring_buffer *buffer,
716                                  unsigned long flags,
717                                  int skip, int pc, struct pt_regs *regs);
718 static inline void ftrace_trace_stack(struct trace_array *tr,
719                                       struct ring_buffer *buffer,
720                                       unsigned long flags,
721                                       int skip, int pc, struct pt_regs *regs);
722
723 #else
724 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
725                                         unsigned long flags,
726                                         int skip, int pc, struct pt_regs *regs)
727 {
728 }
729 static inline void ftrace_trace_stack(struct trace_array *tr,
730                                       struct ring_buffer *buffer,
731                                       unsigned long flags,
732                                       int skip, int pc, struct pt_regs *regs)
733 {
734 }
735
736 #endif
737
738 static __always_inline void
739 trace_event_setup(struct ring_buffer_event *event,
740                   int type, unsigned long flags, int pc)
741 {
742         struct trace_entry *ent = ring_buffer_event_data(event);
743
744         tracing_generic_entry_update(ent, flags, pc);
745         ent->type = type;
746 }
747
748 static __always_inline struct ring_buffer_event *
749 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
750                           int type,
751                           unsigned long len,
752                           unsigned long flags, int pc)
753 {
754         struct ring_buffer_event *event;
755
756         event = ring_buffer_lock_reserve(buffer, len);
757         if (event != NULL)
758                 trace_event_setup(event, type, flags, pc);
759
760         return event;
761 }
762
763 void tracer_tracing_on(struct trace_array *tr)
764 {
765         if (tr->trace_buffer.buffer)
766                 ring_buffer_record_on(tr->trace_buffer.buffer);
767         /*
768          * This flag is looked at when buffers haven't been allocated
769          * yet, or by some tracers (like irqsoff), that just want to
770          * know if the ring buffer has been disabled, but it can handle
771          * races of where it gets disabled but we still do a record.
772          * As the check is in the fast path of the tracers, it is more
773          * important to be fast than accurate.
774          */
775         tr->buffer_disabled = 0;
776         /* Make the flag seen by readers */
777         smp_wmb();
778 }
779
780 /**
781  * tracing_on - enable tracing buffers
782  *
783  * This function enables tracing buffers that may have been
784  * disabled with tracing_off.
785  */
786 void tracing_on(void)
787 {
788         tracer_tracing_on(&global_trace);
789 }
790 EXPORT_SYMBOL_GPL(tracing_on);
791
792
793 static __always_inline void
794 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
795 {
796         __this_cpu_write(trace_taskinfo_save, true);
797
798         /* If this is the temp buffer, we need to commit fully */
799         if (this_cpu_read(trace_buffered_event) == event) {
800                 /* Length is in event->array[0] */
801                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
802                 /* Release the temp buffer */
803                 this_cpu_dec(trace_buffered_event_cnt);
804         } else
805                 ring_buffer_unlock_commit(buffer, event);
806 }
807
808 /**
809  * __trace_puts - write a constant string into the trace buffer.
810  * @ip:    The address of the caller
811  * @str:   The constant string to write
812  * @size:  The size of the string.
813  */
814 int __trace_puts(unsigned long ip, const char *str, int size)
815 {
816         struct ring_buffer_event *event;
817         struct ring_buffer *buffer;
818         struct print_entry *entry;
819         unsigned long irq_flags;
820         int alloc;
821         int pc;
822
823         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
824                 return 0;
825
826         pc = preempt_count();
827
828         if (unlikely(tracing_selftest_running || tracing_disabled))
829                 return 0;
830
831         alloc = sizeof(*entry) + size + 2; /* possible \n added */
832
833         local_save_flags(irq_flags);
834         buffer = global_trace.trace_buffer.buffer;
835         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
836                                             irq_flags, pc);
837         if (!event)
838                 return 0;
839
840         entry = ring_buffer_event_data(event);
841         entry->ip = ip;
842
843         memcpy(&entry->buf, str, size);
844
845         /* Add a newline if necessary */
846         if (entry->buf[size - 1] != '\n') {
847                 entry->buf[size] = '\n';
848                 entry->buf[size + 1] = '\0';
849         } else
850                 entry->buf[size] = '\0';
851
852         __buffer_unlock_commit(buffer, event);
853         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
854
855         return size;
856 }
857 EXPORT_SYMBOL_GPL(__trace_puts);
858
859 /**
860  * __trace_bputs - write the pointer to a constant string into trace buffer
861  * @ip:    The address of the caller
862  * @str:   The constant string to write to the buffer to
863  */
864 int __trace_bputs(unsigned long ip, const char *str)
865 {
866         struct ring_buffer_event *event;
867         struct ring_buffer *buffer;
868         struct bputs_entry *entry;
869         unsigned long irq_flags;
870         int size = sizeof(struct bputs_entry);
871         int pc;
872
873         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
874                 return 0;
875
876         pc = preempt_count();
877
878         if (unlikely(tracing_selftest_running || tracing_disabled))
879                 return 0;
880
881         local_save_flags(irq_flags);
882         buffer = global_trace.trace_buffer.buffer;
883         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
884                                             irq_flags, pc);
885         if (!event)
886                 return 0;
887
888         entry = ring_buffer_event_data(event);
889         entry->ip                       = ip;
890         entry->str                      = str;
891
892         __buffer_unlock_commit(buffer, event);
893         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
894
895         return 1;
896 }
897 EXPORT_SYMBOL_GPL(__trace_bputs);
898
899 #ifdef CONFIG_TRACER_SNAPSHOT
900 void tracing_snapshot_instance(struct trace_array *tr)
901 {
902         struct tracer *tracer = tr->current_trace;
903         unsigned long flags;
904
905         if (in_nmi()) {
906                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
907                 internal_trace_puts("*** snapshot is being ignored        ***\n");
908                 return;
909         }
910
911         if (!tr->allocated_snapshot) {
912                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
913                 internal_trace_puts("*** stopping trace here!   ***\n");
914                 tracing_off();
915                 return;
916         }
917
918         /* Note, snapshot can not be used when the tracer uses it */
919         if (tracer->use_max_tr) {
920                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
921                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
922                 return;
923         }
924
925         local_irq_save(flags);
926         update_max_tr(tr, current, smp_processor_id());
927         local_irq_restore(flags);
928 }
929
930 /**
931  * tracing_snapshot - take a snapshot of the current buffer.
932  *
933  * This causes a swap between the snapshot buffer and the current live
934  * tracing buffer. You can use this to take snapshots of the live
935  * trace when some condition is triggered, but continue to trace.
936  *
937  * Note, make sure to allocate the snapshot with either
938  * a tracing_snapshot_alloc(), or by doing it manually
939  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
940  *
941  * If the snapshot buffer is not allocated, it will stop tracing.
942  * Basically making a permanent snapshot.
943  */
944 void tracing_snapshot(void)
945 {
946         struct trace_array *tr = &global_trace;
947
948         tracing_snapshot_instance(tr);
949 }
950 EXPORT_SYMBOL_GPL(tracing_snapshot);
951
952 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
953                                         struct trace_buffer *size_buf, int cpu_id);
954 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
955
956 int tracing_alloc_snapshot_instance(struct trace_array *tr)
957 {
958         int ret;
959
960         if (!tr->allocated_snapshot) {
961
962                 /* allocate spare buffer */
963                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
964                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
965                 if (ret < 0)
966                         return ret;
967
968                 tr->allocated_snapshot = true;
969         }
970
971         return 0;
972 }
973
974 static void free_snapshot(struct trace_array *tr)
975 {
976         /*
977          * We don't free the ring buffer. instead, resize it because
978          * The max_tr ring buffer has some state (e.g. ring->clock) and
979          * we want preserve it.
980          */
981         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
982         set_buffer_entries(&tr->max_buffer, 1);
983         tracing_reset_online_cpus(&tr->max_buffer);
984         tr->allocated_snapshot = false;
985 }
986
987 /**
988  * tracing_alloc_snapshot - allocate snapshot buffer.
989  *
990  * This only allocates the snapshot buffer if it isn't already
991  * allocated - it doesn't also take a snapshot.
992  *
993  * This is meant to be used in cases where the snapshot buffer needs
994  * to be set up for events that can't sleep but need to be able to
995  * trigger a snapshot.
996  */
997 int tracing_alloc_snapshot(void)
998 {
999         struct trace_array *tr = &global_trace;
1000         int ret;
1001
1002         ret = tracing_alloc_snapshot_instance(tr);
1003         WARN_ON(ret < 0);
1004
1005         return ret;
1006 }
1007 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1008
1009 /**
1010  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1011  *
1012  * This is similar to tracing_snapshot(), but it will allocate the
1013  * snapshot buffer if it isn't already allocated. Use this only
1014  * where it is safe to sleep, as the allocation may sleep.
1015  *
1016  * This causes a swap between the snapshot buffer and the current live
1017  * tracing buffer. You can use this to take snapshots of the live
1018  * trace when some condition is triggered, but continue to trace.
1019  */
1020 void tracing_snapshot_alloc(void)
1021 {
1022         int ret;
1023
1024         ret = tracing_alloc_snapshot();
1025         if (ret < 0)
1026                 return;
1027
1028         tracing_snapshot();
1029 }
1030 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1031 #else
1032 void tracing_snapshot(void)
1033 {
1034         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1035 }
1036 EXPORT_SYMBOL_GPL(tracing_snapshot);
1037 int tracing_alloc_snapshot(void)
1038 {
1039         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1040         return -ENODEV;
1041 }
1042 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1043 void tracing_snapshot_alloc(void)
1044 {
1045         /* Give warning */
1046         tracing_snapshot();
1047 }
1048 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1049 #endif /* CONFIG_TRACER_SNAPSHOT */
1050
1051 void tracer_tracing_off(struct trace_array *tr)
1052 {
1053         if (tr->trace_buffer.buffer)
1054                 ring_buffer_record_off(tr->trace_buffer.buffer);
1055         /*
1056          * This flag is looked at when buffers haven't been allocated
1057          * yet, or by some tracers (like irqsoff), that just want to
1058          * know if the ring buffer has been disabled, but it can handle
1059          * races of where it gets disabled but we still do a record.
1060          * As the check is in the fast path of the tracers, it is more
1061          * important to be fast than accurate.
1062          */
1063         tr->buffer_disabled = 1;
1064         /* Make the flag seen by readers */
1065         smp_wmb();
1066 }
1067
1068 /**
1069  * tracing_off - turn off tracing buffers
1070  *
1071  * This function stops the tracing buffers from recording data.
1072  * It does not disable any overhead the tracers themselves may
1073  * be causing. This function simply causes all recording to
1074  * the ring buffers to fail.
1075  */
1076 void tracing_off(void)
1077 {
1078         tracer_tracing_off(&global_trace);
1079 }
1080 EXPORT_SYMBOL_GPL(tracing_off);
1081
1082 void disable_trace_on_warning(void)
1083 {
1084         if (__disable_trace_on_warning)
1085                 tracing_off();
1086 }
1087
1088 /**
1089  * tracer_tracing_is_on - show real state of ring buffer enabled
1090  * @tr : the trace array to know if ring buffer is enabled
1091  *
1092  * Shows real state of the ring buffer if it is enabled or not.
1093  */
1094 bool tracer_tracing_is_on(struct trace_array *tr)
1095 {
1096         if (tr->trace_buffer.buffer)
1097                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1098         return !tr->buffer_disabled;
1099 }
1100
1101 /**
1102  * tracing_is_on - show state of ring buffers enabled
1103  */
1104 int tracing_is_on(void)
1105 {
1106         return tracer_tracing_is_on(&global_trace);
1107 }
1108 EXPORT_SYMBOL_GPL(tracing_is_on);
1109
1110 static int __init set_buf_size(char *str)
1111 {
1112         unsigned long buf_size;
1113
1114         if (!str)
1115                 return 0;
1116         buf_size = memparse(str, &str);
1117         /* nr_entries can not be zero */
1118         if (buf_size == 0)
1119                 return 0;
1120         trace_buf_size = buf_size;
1121         return 1;
1122 }
1123 __setup("trace_buf_size=", set_buf_size);
1124
1125 static int __init set_tracing_thresh(char *str)
1126 {
1127         unsigned long threshold;
1128         int ret;
1129
1130         if (!str)
1131                 return 0;
1132         ret = kstrtoul(str, 0, &threshold);
1133         if (ret < 0)
1134                 return 0;
1135         tracing_thresh = threshold * 1000;
1136         return 1;
1137 }
1138 __setup("tracing_thresh=", set_tracing_thresh);
1139
1140 unsigned long nsecs_to_usecs(unsigned long nsecs)
1141 {
1142         return nsecs / 1000;
1143 }
1144
1145 /*
1146  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1147  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1148  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1149  * of strings in the order that the evals (enum) were defined.
1150  */
1151 #undef C
1152 #define C(a, b) b
1153
1154 /* These must match the bit postions in trace_iterator_flags */
1155 static const char *trace_options[] = {
1156         TRACE_FLAGS
1157         NULL
1158 };
1159
1160 static struct {
1161         u64 (*func)(void);
1162         const char *name;
1163         int in_ns;              /* is this clock in nanoseconds? */
1164 } trace_clocks[] = {
1165         { trace_clock_local,            "local",        1 },
1166         { trace_clock_global,           "global",       1 },
1167         { trace_clock_counter,          "counter",      0 },
1168         { trace_clock_jiffies,          "uptime",       0 },
1169         { trace_clock,                  "perf",         1 },
1170         { ktime_get_mono_fast_ns,       "mono",         1 },
1171         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1172         { ktime_get_boot_fast_ns,       "boot",         1 },
1173         ARCH_TRACE_CLOCKS
1174 };
1175
1176 bool trace_clock_in_ns(struct trace_array *tr)
1177 {
1178         if (trace_clocks[tr->clock_id].in_ns)
1179                 return true;
1180
1181         return false;
1182 }
1183
1184 /*
1185  * trace_parser_get_init - gets the buffer for trace parser
1186  */
1187 int trace_parser_get_init(struct trace_parser *parser, int size)
1188 {
1189         memset(parser, 0, sizeof(*parser));
1190
1191         parser->buffer = kmalloc(size, GFP_KERNEL);
1192         if (!parser->buffer)
1193                 return 1;
1194
1195         parser->size = size;
1196         return 0;
1197 }
1198
1199 /*
1200  * trace_parser_put - frees the buffer for trace parser
1201  */
1202 void trace_parser_put(struct trace_parser *parser)
1203 {
1204         kfree(parser->buffer);
1205         parser->buffer = NULL;
1206 }
1207
1208 /*
1209  * trace_get_user - reads the user input string separated by  space
1210  * (matched by isspace(ch))
1211  *
1212  * For each string found the 'struct trace_parser' is updated,
1213  * and the function returns.
1214  *
1215  * Returns number of bytes read.
1216  *
1217  * See kernel/trace/trace.h for 'struct trace_parser' details.
1218  */
1219 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1220         size_t cnt, loff_t *ppos)
1221 {
1222         char ch;
1223         size_t read = 0;
1224         ssize_t ret;
1225
1226         if (!*ppos)
1227                 trace_parser_clear(parser);
1228
1229         ret = get_user(ch, ubuf++);
1230         if (ret)
1231                 goto out;
1232
1233         read++;
1234         cnt--;
1235
1236         /*
1237          * The parser is not finished with the last write,
1238          * continue reading the user input without skipping spaces.
1239          */
1240         if (!parser->cont) {
1241                 /* skip white space */
1242                 while (cnt && isspace(ch)) {
1243                         ret = get_user(ch, ubuf++);
1244                         if (ret)
1245                                 goto out;
1246                         read++;
1247                         cnt--;
1248                 }
1249
1250                 parser->idx = 0;
1251
1252                 /* only spaces were written */
1253                 if (isspace(ch) || !ch) {
1254                         *ppos += read;
1255                         ret = read;
1256                         goto out;
1257                 }
1258         }
1259
1260         /* read the non-space input */
1261         while (cnt && !isspace(ch) && ch) {
1262                 if (parser->idx < parser->size - 1)
1263                         parser->buffer[parser->idx++] = ch;
1264                 else {
1265                         ret = -EINVAL;
1266                         goto out;
1267                 }
1268                 ret = get_user(ch, ubuf++);
1269                 if (ret)
1270                         goto out;
1271                 read++;
1272                 cnt--;
1273         }
1274
1275         /* We either got finished input or we have to wait for another call. */
1276         if (isspace(ch) || !ch) {
1277                 parser->buffer[parser->idx] = 0;
1278                 parser->cont = false;
1279         } else if (parser->idx < parser->size - 1) {
1280                 parser->cont = true;
1281                 parser->buffer[parser->idx++] = ch;
1282                 /* Make sure the parsed string always terminates with '\0'. */
1283                 parser->buffer[parser->idx] = 0;
1284         } else {
1285                 ret = -EINVAL;
1286                 goto out;
1287         }
1288
1289         *ppos += read;
1290         ret = read;
1291
1292 out:
1293         return ret;
1294 }
1295
1296 /* TODO add a seq_buf_to_buffer() */
1297 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1298 {
1299         int len;
1300
1301         if (trace_seq_used(s) <= s->seq.readpos)
1302                 return -EBUSY;
1303
1304         len = trace_seq_used(s) - s->seq.readpos;
1305         if (cnt > len)
1306                 cnt = len;
1307         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1308
1309         s->seq.readpos += cnt;
1310         return cnt;
1311 }
1312
1313 unsigned long __read_mostly     tracing_thresh;
1314
1315 #ifdef CONFIG_TRACER_MAX_TRACE
1316 /*
1317  * Copy the new maximum trace into the separate maximum-trace
1318  * structure. (this way the maximum trace is permanently saved,
1319  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1320  */
1321 static void
1322 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1323 {
1324         struct trace_buffer *trace_buf = &tr->trace_buffer;
1325         struct trace_buffer *max_buf = &tr->max_buffer;
1326         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1327         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1328
1329         max_buf->cpu = cpu;
1330         max_buf->time_start = data->preempt_timestamp;
1331
1332         max_data->saved_latency = tr->max_latency;
1333         max_data->critical_start = data->critical_start;
1334         max_data->critical_end = data->critical_end;
1335
1336         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1337         max_data->pid = tsk->pid;
1338         /*
1339          * If tsk == current, then use current_uid(), as that does not use
1340          * RCU. The irq tracer can be called out of RCU scope.
1341          */
1342         if (tsk == current)
1343                 max_data->uid = current_uid();
1344         else
1345                 max_data->uid = task_uid(tsk);
1346
1347         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1348         max_data->policy = tsk->policy;
1349         max_data->rt_priority = tsk->rt_priority;
1350
1351         /* record this tasks comm */
1352         tracing_record_cmdline(tsk);
1353 }
1354
1355 /**
1356  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1357  * @tr: tracer
1358  * @tsk: the task with the latency
1359  * @cpu: The cpu that initiated the trace.
1360  *
1361  * Flip the buffers between the @tr and the max_tr and record information
1362  * about which task was the cause of this latency.
1363  */
1364 void
1365 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1366 {
1367         if (tr->stop_count)
1368                 return;
1369
1370         WARN_ON_ONCE(!irqs_disabled());
1371
1372         if (!tr->allocated_snapshot) {
1373                 /* Only the nop tracer should hit this when disabling */
1374                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1375                 return;
1376         }
1377
1378         arch_spin_lock(&tr->max_lock);
1379
1380         /* Inherit the recordable setting from trace_buffer */
1381         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1382                 ring_buffer_record_on(tr->max_buffer.buffer);
1383         else
1384                 ring_buffer_record_off(tr->max_buffer.buffer);
1385
1386         swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1387
1388         __update_max_tr(tr, tsk, cpu);
1389         arch_spin_unlock(&tr->max_lock);
1390 }
1391
1392 /**
1393  * update_max_tr_single - only copy one trace over, and reset the rest
1394  * @tr - tracer
1395  * @tsk - task with the latency
1396  * @cpu - the cpu of the buffer to copy.
1397  *
1398  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1399  */
1400 void
1401 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1402 {
1403         int ret;
1404
1405         if (tr->stop_count)
1406                 return;
1407
1408         WARN_ON_ONCE(!irqs_disabled());
1409         if (!tr->allocated_snapshot) {
1410                 /* Only the nop tracer should hit this when disabling */
1411                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1412                 return;
1413         }
1414
1415         arch_spin_lock(&tr->max_lock);
1416
1417         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1418
1419         if (ret == -EBUSY) {
1420                 /*
1421                  * We failed to swap the buffer due to a commit taking
1422                  * place on this CPU. We fail to record, but we reset
1423                  * the max trace buffer (no one writes directly to it)
1424                  * and flag that it failed.
1425                  */
1426                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1427                         "Failed to swap buffers due to commit in progress\n");
1428         }
1429
1430         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1431
1432         __update_max_tr(tr, tsk, cpu);
1433         arch_spin_unlock(&tr->max_lock);
1434 }
1435 #endif /* CONFIG_TRACER_MAX_TRACE */
1436
1437 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1438 {
1439         /* Iterators are static, they should be filled or empty */
1440         if (trace_buffer_iter(iter, iter->cpu_file))
1441                 return 0;
1442
1443         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1444                                 full);
1445 }
1446
1447 #ifdef CONFIG_FTRACE_STARTUP_TEST
1448 static bool selftests_can_run;
1449
1450 struct trace_selftests {
1451         struct list_head                list;
1452         struct tracer                   *type;
1453 };
1454
1455 static LIST_HEAD(postponed_selftests);
1456
1457 static int save_selftest(struct tracer *type)
1458 {
1459         struct trace_selftests *selftest;
1460
1461         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1462         if (!selftest)
1463                 return -ENOMEM;
1464
1465         selftest->type = type;
1466         list_add(&selftest->list, &postponed_selftests);
1467         return 0;
1468 }
1469
1470 static int run_tracer_selftest(struct tracer *type)
1471 {
1472         struct trace_array *tr = &global_trace;
1473         struct tracer *saved_tracer = tr->current_trace;
1474         int ret;
1475
1476         if (!type->selftest || tracing_selftest_disabled)
1477                 return 0;
1478
1479         /*
1480          * If a tracer registers early in boot up (before scheduling is
1481          * initialized and such), then do not run its selftests yet.
1482          * Instead, run it a little later in the boot process.
1483          */
1484         if (!selftests_can_run)
1485                 return save_selftest(type);
1486
1487         /*
1488          * Run a selftest on this tracer.
1489          * Here we reset the trace buffer, and set the current
1490          * tracer to be this tracer. The tracer can then run some
1491          * internal tracing to verify that everything is in order.
1492          * If we fail, we do not register this tracer.
1493          */
1494         tracing_reset_online_cpus(&tr->trace_buffer);
1495
1496         tr->current_trace = type;
1497
1498 #ifdef CONFIG_TRACER_MAX_TRACE
1499         if (type->use_max_tr) {
1500                 /* If we expanded the buffers, make sure the max is expanded too */
1501                 if (ring_buffer_expanded)
1502                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1503                                            RING_BUFFER_ALL_CPUS);
1504                 tr->allocated_snapshot = true;
1505         }
1506 #endif
1507
1508         /* the test is responsible for initializing and enabling */
1509         pr_info("Testing tracer %s: ", type->name);
1510         ret = type->selftest(type, tr);
1511         /* the test is responsible for resetting too */
1512         tr->current_trace = saved_tracer;
1513         if (ret) {
1514                 printk(KERN_CONT "FAILED!\n");
1515                 /* Add the warning after printing 'FAILED' */
1516                 WARN_ON(1);
1517                 return -1;
1518         }
1519         /* Only reset on passing, to avoid touching corrupted buffers */
1520         tracing_reset_online_cpus(&tr->trace_buffer);
1521
1522 #ifdef CONFIG_TRACER_MAX_TRACE
1523         if (type->use_max_tr) {
1524                 tr->allocated_snapshot = false;
1525
1526                 /* Shrink the max buffer again */
1527                 if (ring_buffer_expanded)
1528                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1529                                            RING_BUFFER_ALL_CPUS);
1530         }
1531 #endif
1532
1533         printk(KERN_CONT "PASSED\n");
1534         return 0;
1535 }
1536
1537 static __init int init_trace_selftests(void)
1538 {
1539         struct trace_selftests *p, *n;
1540         struct tracer *t, **last;
1541         int ret;
1542
1543         selftests_can_run = true;
1544
1545         mutex_lock(&trace_types_lock);
1546
1547         if (list_empty(&postponed_selftests))
1548                 goto out;
1549
1550         pr_info("Running postponed tracer tests:\n");
1551
1552         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1553                 ret = run_tracer_selftest(p->type);
1554                 /* If the test fails, then warn and remove from available_tracers */
1555                 if (ret < 0) {
1556                         WARN(1, "tracer: %s failed selftest, disabling\n",
1557                              p->type->name);
1558                         last = &trace_types;
1559                         for (t = trace_types; t; t = t->next) {
1560                                 if (t == p->type) {
1561                                         *last = t->next;
1562                                         break;
1563                                 }
1564                                 last = &t->next;
1565                         }
1566                 }
1567                 list_del(&p->list);
1568                 kfree(p);
1569         }
1570
1571  out:
1572         mutex_unlock(&trace_types_lock);
1573
1574         return 0;
1575 }
1576 core_initcall(init_trace_selftests);
1577 #else
1578 static inline int run_tracer_selftest(struct tracer *type)
1579 {
1580         return 0;
1581 }
1582 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1583
1584 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1585
1586 static void __init apply_trace_boot_options(void);
1587
1588 /**
1589  * register_tracer - register a tracer with the ftrace system.
1590  * @type - the plugin for the tracer
1591  *
1592  * Register a new plugin tracer.
1593  */
1594 int __init register_tracer(struct tracer *type)
1595 {
1596         struct tracer *t;
1597         int ret = 0;
1598
1599         if (!type->name) {
1600                 pr_info("Tracer must have a name\n");
1601                 return -1;
1602         }
1603
1604         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1605                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1606                 return -1;
1607         }
1608
1609         mutex_lock(&trace_types_lock);
1610
1611         tracing_selftest_running = true;
1612
1613         for (t = trace_types; t; t = t->next) {
1614                 if (strcmp(type->name, t->name) == 0) {
1615                         /* already found */
1616                         pr_info("Tracer %s already registered\n",
1617                                 type->name);
1618                         ret = -1;
1619                         goto out;
1620                 }
1621         }
1622
1623         if (!type->set_flag)
1624                 type->set_flag = &dummy_set_flag;
1625         if (!type->flags) {
1626                 /*allocate a dummy tracer_flags*/
1627                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1628                 if (!type->flags) {
1629                         ret = -ENOMEM;
1630                         goto out;
1631                 }
1632                 type->flags->val = 0;
1633                 type->flags->opts = dummy_tracer_opt;
1634         } else
1635                 if (!type->flags->opts)
1636                         type->flags->opts = dummy_tracer_opt;
1637
1638         /* store the tracer for __set_tracer_option */
1639         type->flags->trace = type;
1640
1641         ret = run_tracer_selftest(type);
1642         if (ret < 0)
1643                 goto out;
1644
1645         type->next = trace_types;
1646         trace_types = type;
1647         add_tracer_options(&global_trace, type);
1648
1649  out:
1650         tracing_selftest_running = false;
1651         mutex_unlock(&trace_types_lock);
1652
1653         if (ret || !default_bootup_tracer)
1654                 goto out_unlock;
1655
1656         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1657                 goto out_unlock;
1658
1659         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1660         /* Do we want this tracer to start on bootup? */
1661         tracing_set_tracer(&global_trace, type->name);
1662         default_bootup_tracer = NULL;
1663
1664         apply_trace_boot_options();
1665
1666         /* disable other selftests, since this will break it. */
1667         tracing_selftest_disabled = true;
1668 #ifdef CONFIG_FTRACE_STARTUP_TEST
1669         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1670                type->name);
1671 #endif
1672
1673  out_unlock:
1674         return ret;
1675 }
1676
1677 void tracing_reset(struct trace_buffer *buf, int cpu)
1678 {
1679         struct ring_buffer *buffer = buf->buffer;
1680
1681         if (!buffer)
1682                 return;
1683
1684         ring_buffer_record_disable(buffer);
1685
1686         /* Make sure all commits have finished */
1687         synchronize_sched();
1688         ring_buffer_reset_cpu(buffer, cpu);
1689
1690         ring_buffer_record_enable(buffer);
1691 }
1692
1693 void tracing_reset_online_cpus(struct trace_buffer *buf)
1694 {
1695         struct ring_buffer *buffer = buf->buffer;
1696         int cpu;
1697
1698         if (!buffer)
1699                 return;
1700
1701         ring_buffer_record_disable(buffer);
1702
1703         /* Make sure all commits have finished */
1704         synchronize_sched();
1705
1706         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1707
1708         for_each_online_cpu(cpu)
1709                 ring_buffer_reset_cpu(buffer, cpu);
1710
1711         ring_buffer_record_enable(buffer);
1712 }
1713
1714 /* Must have trace_types_lock held */
1715 void tracing_reset_all_online_cpus(void)
1716 {
1717         struct trace_array *tr;
1718
1719         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1720                 if (!tr->clear_trace)
1721                         continue;
1722                 tr->clear_trace = false;
1723                 tracing_reset_online_cpus(&tr->trace_buffer);
1724 #ifdef CONFIG_TRACER_MAX_TRACE
1725                 tracing_reset_online_cpus(&tr->max_buffer);
1726 #endif
1727         }
1728 }
1729
1730 static int *tgid_map;
1731
1732 #define SAVED_CMDLINES_DEFAULT 128
1733 #define NO_CMDLINE_MAP UINT_MAX
1734 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1735 struct saved_cmdlines_buffer {
1736         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1737         unsigned *map_cmdline_to_pid;
1738         unsigned cmdline_num;
1739         int cmdline_idx;
1740         char *saved_cmdlines;
1741 };
1742 static struct saved_cmdlines_buffer *savedcmd;
1743
1744 /* temporary disable recording */
1745 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1746
1747 static inline char *get_saved_cmdlines(int idx)
1748 {
1749         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1750 }
1751
1752 static inline void set_cmdline(int idx, const char *cmdline)
1753 {
1754         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1755 }
1756
1757 static int allocate_cmdlines_buffer(unsigned int val,
1758                                     struct saved_cmdlines_buffer *s)
1759 {
1760         s->map_cmdline_to_pid = kmalloc_array(val,
1761                                               sizeof(*s->map_cmdline_to_pid),
1762                                               GFP_KERNEL);
1763         if (!s->map_cmdline_to_pid)
1764                 return -ENOMEM;
1765
1766         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1767         if (!s->saved_cmdlines) {
1768                 kfree(s->map_cmdline_to_pid);
1769                 return -ENOMEM;
1770         }
1771
1772         s->cmdline_idx = 0;
1773         s->cmdline_num = val;
1774         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1775                sizeof(s->map_pid_to_cmdline));
1776         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1777                val * sizeof(*s->map_cmdline_to_pid));
1778
1779         return 0;
1780 }
1781
1782 static int trace_create_savedcmd(void)
1783 {
1784         int ret;
1785
1786         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1787         if (!savedcmd)
1788                 return -ENOMEM;
1789
1790         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1791         if (ret < 0) {
1792                 kfree(savedcmd);
1793                 savedcmd = NULL;
1794                 return -ENOMEM;
1795         }
1796
1797         return 0;
1798 }
1799
1800 int is_tracing_stopped(void)
1801 {
1802         return global_trace.stop_count;
1803 }
1804
1805 /**
1806  * tracing_start - quick start of the tracer
1807  *
1808  * If tracing is enabled but was stopped by tracing_stop,
1809  * this will start the tracer back up.
1810  */
1811 void tracing_start(void)
1812 {
1813         struct ring_buffer *buffer;
1814         unsigned long flags;
1815
1816         if (tracing_disabled)
1817                 return;
1818
1819         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1820         if (--global_trace.stop_count) {
1821                 if (global_trace.stop_count < 0) {
1822                         /* Someone screwed up their debugging */
1823                         WARN_ON_ONCE(1);
1824                         global_trace.stop_count = 0;
1825                 }
1826                 goto out;
1827         }
1828
1829         /* Prevent the buffers from switching */
1830         arch_spin_lock(&global_trace.max_lock);
1831
1832         buffer = global_trace.trace_buffer.buffer;
1833         if (buffer)
1834                 ring_buffer_record_enable(buffer);
1835
1836 #ifdef CONFIG_TRACER_MAX_TRACE
1837         buffer = global_trace.max_buffer.buffer;
1838         if (buffer)
1839                 ring_buffer_record_enable(buffer);
1840 #endif
1841
1842         arch_spin_unlock(&global_trace.max_lock);
1843
1844  out:
1845         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1846 }
1847
1848 static void tracing_start_tr(struct trace_array *tr)
1849 {
1850         struct ring_buffer *buffer;
1851         unsigned long flags;
1852
1853         if (tracing_disabled)
1854                 return;
1855
1856         /* If global, we need to also start the max tracer */
1857         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1858                 return tracing_start();
1859
1860         raw_spin_lock_irqsave(&tr->start_lock, flags);
1861
1862         if (--tr->stop_count) {
1863                 if (tr->stop_count < 0) {
1864                         /* Someone screwed up their debugging */
1865                         WARN_ON_ONCE(1);
1866                         tr->stop_count = 0;
1867                 }
1868                 goto out;
1869         }
1870
1871         buffer = tr->trace_buffer.buffer;
1872         if (buffer)
1873                 ring_buffer_record_enable(buffer);
1874
1875  out:
1876         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1877 }
1878
1879 /**
1880  * tracing_stop - quick stop of the tracer
1881  *
1882  * Light weight way to stop tracing. Use in conjunction with
1883  * tracing_start.
1884  */
1885 void tracing_stop(void)
1886 {
1887         struct ring_buffer *buffer;
1888         unsigned long flags;
1889
1890         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1891         if (global_trace.stop_count++)
1892                 goto out;
1893
1894         /* Prevent the buffers from switching */
1895         arch_spin_lock(&global_trace.max_lock);
1896
1897         buffer = global_trace.trace_buffer.buffer;
1898         if (buffer)
1899                 ring_buffer_record_disable(buffer);
1900
1901 #ifdef CONFIG_TRACER_MAX_TRACE
1902         buffer = global_trace.max_buffer.buffer;
1903         if (buffer)
1904                 ring_buffer_record_disable(buffer);
1905 #endif
1906
1907         arch_spin_unlock(&global_trace.max_lock);
1908
1909  out:
1910         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1911 }
1912
1913 static void tracing_stop_tr(struct trace_array *tr)
1914 {
1915         struct ring_buffer *buffer;
1916         unsigned long flags;
1917
1918         /* If global, we need to also stop the max tracer */
1919         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1920                 return tracing_stop();
1921
1922         raw_spin_lock_irqsave(&tr->start_lock, flags);
1923         if (tr->stop_count++)
1924                 goto out;
1925
1926         buffer = tr->trace_buffer.buffer;
1927         if (buffer)
1928                 ring_buffer_record_disable(buffer);
1929
1930  out:
1931         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1932 }
1933
1934 static int trace_save_cmdline(struct task_struct *tsk)
1935 {
1936         unsigned pid, idx;
1937
1938         /* treat recording of idle task as a success */
1939         if (!tsk->pid)
1940                 return 1;
1941
1942         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1943                 return 0;
1944
1945         /*
1946          * It's not the end of the world if we don't get
1947          * the lock, but we also don't want to spin
1948          * nor do we want to disable interrupts,
1949          * so if we miss here, then better luck next time.
1950          */
1951         if (!arch_spin_trylock(&trace_cmdline_lock))
1952                 return 0;
1953
1954         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1955         if (idx == NO_CMDLINE_MAP) {
1956                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1957
1958                 /*
1959                  * Check whether the cmdline buffer at idx has a pid
1960                  * mapped. We are going to overwrite that entry so we
1961                  * need to clear the map_pid_to_cmdline. Otherwise we
1962                  * would read the new comm for the old pid.
1963                  */
1964                 pid = savedcmd->map_cmdline_to_pid[idx];
1965                 if (pid != NO_CMDLINE_MAP)
1966                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1967
1968                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1969                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1970
1971                 savedcmd->cmdline_idx = idx;
1972         }
1973
1974         set_cmdline(idx, tsk->comm);
1975
1976         arch_spin_unlock(&trace_cmdline_lock);
1977
1978         return 1;
1979 }
1980
1981 static void __trace_find_cmdline(int pid, char comm[])
1982 {
1983         unsigned map;
1984
1985         if (!pid) {
1986                 strcpy(comm, "<idle>");
1987                 return;
1988         }
1989
1990         if (WARN_ON_ONCE(pid < 0)) {
1991                 strcpy(comm, "<XXX>");
1992                 return;
1993         }
1994
1995         if (pid > PID_MAX_DEFAULT) {
1996                 strcpy(comm, "<...>");
1997                 return;
1998         }
1999
2000         map = savedcmd->map_pid_to_cmdline[pid];
2001         if (map != NO_CMDLINE_MAP)
2002                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2003         else
2004                 strcpy(comm, "<...>");
2005 }
2006
2007 void trace_find_cmdline(int pid, char comm[])
2008 {
2009         preempt_disable();
2010         arch_spin_lock(&trace_cmdline_lock);
2011
2012         __trace_find_cmdline(pid, comm);
2013
2014         arch_spin_unlock(&trace_cmdline_lock);
2015         preempt_enable();
2016 }
2017
2018 int trace_find_tgid(int pid)
2019 {
2020         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2021                 return 0;
2022
2023         return tgid_map[pid];
2024 }
2025
2026 static int trace_save_tgid(struct task_struct *tsk)
2027 {
2028         /* treat recording of idle task as a success */
2029         if (!tsk->pid)
2030                 return 1;
2031
2032         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2033                 return 0;
2034
2035         tgid_map[tsk->pid] = tsk->tgid;
2036         return 1;
2037 }
2038
2039 static bool tracing_record_taskinfo_skip(int flags)
2040 {
2041         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2042                 return true;
2043         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2044                 return true;
2045         if (!__this_cpu_read(trace_taskinfo_save))
2046                 return true;
2047         return false;
2048 }
2049
2050 /**
2051  * tracing_record_taskinfo - record the task info of a task
2052  *
2053  * @task  - task to record
2054  * @flags - TRACE_RECORD_CMDLINE for recording comm
2055  *        - TRACE_RECORD_TGID for recording tgid
2056  */
2057 void tracing_record_taskinfo(struct task_struct *task, int flags)
2058 {
2059         bool done;
2060
2061         if (tracing_record_taskinfo_skip(flags))
2062                 return;
2063
2064         /*
2065          * Record as much task information as possible. If some fail, continue
2066          * to try to record the others.
2067          */
2068         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2069         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2070
2071         /* If recording any information failed, retry again soon. */
2072         if (!done)
2073                 return;
2074
2075         __this_cpu_write(trace_taskinfo_save, false);
2076 }
2077
2078 /**
2079  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2080  *
2081  * @prev - previous task during sched_switch
2082  * @next - next task during sched_switch
2083  * @flags - TRACE_RECORD_CMDLINE for recording comm
2084  *          TRACE_RECORD_TGID for recording tgid
2085  */
2086 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2087                                           struct task_struct *next, int flags)
2088 {
2089         bool done;
2090
2091         if (tracing_record_taskinfo_skip(flags))
2092                 return;
2093
2094         /*
2095          * Record as much task information as possible. If some fail, continue
2096          * to try to record the others.
2097          */
2098         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2099         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2100         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2101         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2102
2103         /* If recording any information failed, retry again soon. */
2104         if (!done)
2105                 return;
2106
2107         __this_cpu_write(trace_taskinfo_save, false);
2108 }
2109
2110 /* Helpers to record a specific task information */
2111 void tracing_record_cmdline(struct task_struct *task)
2112 {
2113         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2114 }
2115
2116 void tracing_record_tgid(struct task_struct *task)
2117 {
2118         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2119 }
2120
2121 /*
2122  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2123  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2124  * simplifies those functions and keeps them in sync.
2125  */
2126 enum print_line_t trace_handle_return(struct trace_seq *s)
2127 {
2128         return trace_seq_has_overflowed(s) ?
2129                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2130 }
2131 EXPORT_SYMBOL_GPL(trace_handle_return);
2132
2133 void
2134 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2135                              int pc)
2136 {
2137         struct task_struct *tsk = current;
2138
2139         entry->preempt_count            = pc & 0xff;
2140         entry->pid                      = (tsk) ? tsk->pid : 0;
2141         entry->flags =
2142 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2143                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2144 #else
2145                 TRACE_FLAG_IRQS_NOSUPPORT |
2146 #endif
2147                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2148                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2149                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2150                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2151                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2152 }
2153 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2154
2155 struct ring_buffer_event *
2156 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2157                           int type,
2158                           unsigned long len,
2159                           unsigned long flags, int pc)
2160 {
2161         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2162 }
2163
2164 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2165 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2166 static int trace_buffered_event_ref;
2167
2168 /**
2169  * trace_buffered_event_enable - enable buffering events
2170  *
2171  * When events are being filtered, it is quicker to use a temporary
2172  * buffer to write the event data into if there's a likely chance
2173  * that it will not be committed. The discard of the ring buffer
2174  * is not as fast as committing, and is much slower than copying
2175  * a commit.
2176  *
2177  * When an event is to be filtered, allocate per cpu buffers to
2178  * write the event data into, and if the event is filtered and discarded
2179  * it is simply dropped, otherwise, the entire data is to be committed
2180  * in one shot.
2181  */
2182 void trace_buffered_event_enable(void)
2183 {
2184         struct ring_buffer_event *event;
2185         struct page *page;
2186         int cpu;
2187
2188         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2189
2190         if (trace_buffered_event_ref++)
2191                 return;
2192
2193         for_each_tracing_cpu(cpu) {
2194                 page = alloc_pages_node(cpu_to_node(cpu),
2195                                         GFP_KERNEL | __GFP_NORETRY, 0);
2196                 if (!page)
2197                         goto failed;
2198
2199                 event = page_address(page);
2200                 memset(event, 0, sizeof(*event));
2201
2202                 per_cpu(trace_buffered_event, cpu) = event;
2203
2204                 preempt_disable();
2205                 if (cpu == smp_processor_id() &&
2206                     this_cpu_read(trace_buffered_event) !=
2207                     per_cpu(trace_buffered_event, cpu))
2208                         WARN_ON_ONCE(1);
2209                 preempt_enable();
2210         }
2211
2212         return;
2213  failed:
2214         trace_buffered_event_disable();
2215 }
2216
2217 static void enable_trace_buffered_event(void *data)
2218 {
2219         /* Probably not needed, but do it anyway */
2220         smp_rmb();
2221         this_cpu_dec(trace_buffered_event_cnt);
2222 }
2223
2224 static void disable_trace_buffered_event(void *data)
2225 {
2226         this_cpu_inc(trace_buffered_event_cnt);
2227 }
2228
2229 /**
2230  * trace_buffered_event_disable - disable buffering events
2231  *
2232  * When a filter is removed, it is faster to not use the buffered
2233  * events, and to commit directly into the ring buffer. Free up
2234  * the temp buffers when there are no more users. This requires
2235  * special synchronization with current events.
2236  */
2237 void trace_buffered_event_disable(void)
2238 {
2239         int cpu;
2240
2241         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2242
2243         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2244                 return;
2245
2246         if (--trace_buffered_event_ref)
2247                 return;
2248
2249         preempt_disable();
2250         /* For each CPU, set the buffer as used. */
2251         smp_call_function_many(tracing_buffer_mask,
2252                                disable_trace_buffered_event, NULL, 1);
2253         preempt_enable();
2254
2255         /* Wait for all current users to finish */
2256         synchronize_sched();
2257
2258         for_each_tracing_cpu(cpu) {
2259                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2260                 per_cpu(trace_buffered_event, cpu) = NULL;
2261         }
2262         /*
2263          * Make sure trace_buffered_event is NULL before clearing
2264          * trace_buffered_event_cnt.
2265          */
2266         smp_wmb();
2267
2268         preempt_disable();
2269         /* Do the work on each cpu */
2270         smp_call_function_many(tracing_buffer_mask,
2271                                enable_trace_buffered_event, NULL, 1);
2272         preempt_enable();
2273 }
2274
2275 static struct ring_buffer *temp_buffer;
2276
2277 struct ring_buffer_event *
2278 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2279                           struct trace_event_file *trace_file,
2280                           int type, unsigned long len,
2281                           unsigned long flags, int pc)
2282 {
2283         struct ring_buffer_event *entry;
2284         int val;
2285
2286         *current_rb = trace_file->tr->trace_buffer.buffer;
2287
2288         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2289              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2290             (entry = this_cpu_read(trace_buffered_event))) {
2291                 /* Try to use the per cpu buffer first */
2292                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2293                 if (val == 1) {
2294                         trace_event_setup(entry, type, flags, pc);
2295                         entry->array[0] = len;
2296                         return entry;
2297                 }
2298                 this_cpu_dec(trace_buffered_event_cnt);
2299         }
2300
2301         entry = __trace_buffer_lock_reserve(*current_rb,
2302                                             type, len, flags, pc);
2303         /*
2304          * If tracing is off, but we have triggers enabled
2305          * we still need to look at the event data. Use the temp_buffer
2306          * to store the trace event for the tigger to use. It's recusive
2307          * safe and will not be recorded anywhere.
2308          */
2309         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2310                 *current_rb = temp_buffer;
2311                 entry = __trace_buffer_lock_reserve(*current_rb,
2312                                                     type, len, flags, pc);
2313         }
2314         return entry;
2315 }
2316 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2317
2318 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2319 static DEFINE_MUTEX(tracepoint_printk_mutex);
2320
2321 static void output_printk(struct trace_event_buffer *fbuffer)
2322 {
2323         struct trace_event_call *event_call;
2324         struct trace_event *event;
2325         unsigned long flags;
2326         struct trace_iterator *iter = tracepoint_print_iter;
2327
2328         /* We should never get here if iter is NULL */
2329         if (WARN_ON_ONCE(!iter))
2330                 return;
2331
2332         event_call = fbuffer->trace_file->event_call;
2333         if (!event_call || !event_call->event.funcs ||
2334             !event_call->event.funcs->trace)
2335                 return;
2336
2337         event = &fbuffer->trace_file->event_call->event;
2338
2339         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2340         trace_seq_init(&iter->seq);
2341         iter->ent = fbuffer->entry;
2342         event_call->event.funcs->trace(iter, 0, event);
2343         trace_seq_putc(&iter->seq, 0);
2344         printk("%s", iter->seq.buffer);
2345
2346         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2347 }
2348
2349 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2350                              void __user *buffer, size_t *lenp,
2351                              loff_t *ppos)
2352 {
2353         int save_tracepoint_printk;
2354         int ret;
2355
2356         mutex_lock(&tracepoint_printk_mutex);
2357         save_tracepoint_printk = tracepoint_printk;
2358
2359         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2360
2361         /*
2362          * This will force exiting early, as tracepoint_printk
2363          * is always zero when tracepoint_printk_iter is not allocated
2364          */
2365         if (!tracepoint_print_iter)
2366                 tracepoint_printk = 0;
2367
2368         if (save_tracepoint_printk == tracepoint_printk)
2369                 goto out;
2370
2371         if (tracepoint_printk)
2372                 static_key_enable(&tracepoint_printk_key.key);
2373         else
2374                 static_key_disable(&tracepoint_printk_key.key);
2375
2376  out:
2377         mutex_unlock(&tracepoint_printk_mutex);
2378
2379         return ret;
2380 }
2381
2382 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2383 {
2384         if (static_key_false(&tracepoint_printk_key.key))
2385                 output_printk(fbuffer);
2386
2387         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2388                                     fbuffer->event, fbuffer->entry,
2389                                     fbuffer->flags, fbuffer->pc);
2390 }
2391 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2392
2393 /*
2394  * Skip 3:
2395  *
2396  *   trace_buffer_unlock_commit_regs()
2397  *   trace_event_buffer_commit()
2398  *   trace_event_raw_event_xxx()
2399  */
2400 # define STACK_SKIP 3
2401
2402 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2403                                      struct ring_buffer *buffer,
2404                                      struct ring_buffer_event *event,
2405                                      unsigned long flags, int pc,
2406                                      struct pt_regs *regs)
2407 {
2408         __buffer_unlock_commit(buffer, event);
2409
2410         /*
2411          * If regs is not set, then skip the necessary functions.
2412          * Note, we can still get here via blktrace, wakeup tracer
2413          * and mmiotrace, but that's ok if they lose a function or
2414          * two. They are not that meaningful.
2415          */
2416         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2417         ftrace_trace_userstack(buffer, flags, pc);
2418 }
2419
2420 /*
2421  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2422  */
2423 void
2424 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2425                                    struct ring_buffer_event *event)
2426 {
2427         __buffer_unlock_commit(buffer, event);
2428 }
2429
2430 static void
2431 trace_process_export(struct trace_export *export,
2432                struct ring_buffer_event *event)
2433 {
2434         struct trace_entry *entry;
2435         unsigned int size = 0;
2436
2437         entry = ring_buffer_event_data(event);
2438         size = ring_buffer_event_length(event);
2439         export->write(export, entry, size);
2440 }
2441
2442 static DEFINE_MUTEX(ftrace_export_lock);
2443
2444 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2445
2446 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2447
2448 static inline void ftrace_exports_enable(void)
2449 {
2450         static_branch_enable(&ftrace_exports_enabled);
2451 }
2452
2453 static inline void ftrace_exports_disable(void)
2454 {
2455         static_branch_disable(&ftrace_exports_enabled);
2456 }
2457
2458 void ftrace_exports(struct ring_buffer_event *event)
2459 {
2460         struct trace_export *export;
2461
2462         preempt_disable_notrace();
2463
2464         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2465         while (export) {
2466                 trace_process_export(export, event);
2467                 export = rcu_dereference_raw_notrace(export->next);
2468         }
2469
2470         preempt_enable_notrace();
2471 }
2472
2473 static inline void
2474 add_trace_export(struct trace_export **list, struct trace_export *export)
2475 {
2476         rcu_assign_pointer(export->next, *list);
2477         /*
2478          * We are entering export into the list but another
2479          * CPU might be walking that list. We need to make sure
2480          * the export->next pointer is valid before another CPU sees
2481          * the export pointer included into the list.
2482          */
2483         rcu_assign_pointer(*list, export);
2484 }
2485
2486 static inline int
2487 rm_trace_export(struct trace_export **list, struct trace_export *export)
2488 {
2489         struct trace_export **p;
2490
2491         for (p = list; *p != NULL; p = &(*p)->next)
2492                 if (*p == export)
2493                         break;
2494
2495         if (*p != export)
2496                 return -1;
2497
2498         rcu_assign_pointer(*p, (*p)->next);
2499
2500         return 0;
2501 }
2502
2503 static inline void
2504 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2505 {
2506         if (*list == NULL)
2507                 ftrace_exports_enable();
2508
2509         add_trace_export(list, export);
2510 }
2511
2512 static inline int
2513 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2514 {
2515         int ret;
2516
2517         ret = rm_trace_export(list, export);
2518         if (*list == NULL)
2519                 ftrace_exports_disable();
2520
2521         return ret;
2522 }
2523
2524 int register_ftrace_export(struct trace_export *export)
2525 {
2526         if (WARN_ON_ONCE(!export->write))
2527                 return -1;
2528
2529         mutex_lock(&ftrace_export_lock);
2530
2531         add_ftrace_export(&ftrace_exports_list, export);
2532
2533         mutex_unlock(&ftrace_export_lock);
2534
2535         return 0;
2536 }
2537 EXPORT_SYMBOL_GPL(register_ftrace_export);
2538
2539 int unregister_ftrace_export(struct trace_export *export)
2540 {
2541         int ret;
2542
2543         mutex_lock(&ftrace_export_lock);
2544
2545         ret = rm_ftrace_export(&ftrace_exports_list, export);
2546
2547         mutex_unlock(&ftrace_export_lock);
2548
2549         return ret;
2550 }
2551 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2552
2553 void
2554 trace_function(struct trace_array *tr,
2555                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2556                int pc)
2557 {
2558         struct trace_event_call *call = &event_function;
2559         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2560         struct ring_buffer_event *event;
2561         struct ftrace_entry *entry;
2562
2563         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2564                                             flags, pc);
2565         if (!event)
2566                 return;
2567         entry   = ring_buffer_event_data(event);
2568         entry->ip                       = ip;
2569         entry->parent_ip                = parent_ip;
2570
2571         if (!call_filter_check_discard(call, entry, buffer, event)) {
2572                 if (static_branch_unlikely(&ftrace_exports_enabled))
2573                         ftrace_exports(event);
2574                 __buffer_unlock_commit(buffer, event);
2575         }
2576 }
2577
2578 #ifdef CONFIG_STACKTRACE
2579
2580 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2581 struct ftrace_stack {
2582         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2583 };
2584
2585 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2586 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2587
2588 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2589                                  unsigned long flags,
2590                                  int skip, int pc, struct pt_regs *regs)
2591 {
2592         struct trace_event_call *call = &event_kernel_stack;
2593         struct ring_buffer_event *event;
2594         struct stack_entry *entry;
2595         struct stack_trace trace;
2596         int use_stack;
2597         int size = FTRACE_STACK_ENTRIES;
2598
2599         trace.nr_entries        = 0;
2600         trace.skip              = skip;
2601
2602         /*
2603          * Add one, for this function and the call to save_stack_trace()
2604          * If regs is set, then these functions will not be in the way.
2605          */
2606 #ifndef CONFIG_UNWINDER_ORC
2607         if (!regs)
2608                 trace.skip++;
2609 #endif
2610
2611         /*
2612          * Since events can happen in NMIs there's no safe way to
2613          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2614          * or NMI comes in, it will just have to use the default
2615          * FTRACE_STACK_SIZE.
2616          */
2617         preempt_disable_notrace();
2618
2619         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2620         /*
2621          * We don't need any atomic variables, just a barrier.
2622          * If an interrupt comes in, we don't care, because it would
2623          * have exited and put the counter back to what we want.
2624          * We just need a barrier to keep gcc from moving things
2625          * around.
2626          */
2627         barrier();
2628         if (use_stack == 1) {
2629                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2630                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2631
2632                 if (regs)
2633                         save_stack_trace_regs(regs, &trace);
2634                 else
2635                         save_stack_trace(&trace);
2636
2637                 if (trace.nr_entries > size)
2638                         size = trace.nr_entries;
2639         } else
2640                 /* From now on, use_stack is a boolean */
2641                 use_stack = 0;
2642
2643         size *= sizeof(unsigned long);
2644
2645         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2646                                             sizeof(*entry) + size, flags, pc);
2647         if (!event)
2648                 goto out;
2649         entry = ring_buffer_event_data(event);
2650
2651         memset(&entry->caller, 0, size);
2652
2653         if (use_stack)
2654                 memcpy(&entry->caller, trace.entries,
2655                        trace.nr_entries * sizeof(unsigned long));
2656         else {
2657                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2658                 trace.entries           = entry->caller;
2659                 if (regs)
2660                         save_stack_trace_regs(regs, &trace);
2661                 else
2662                         save_stack_trace(&trace);
2663         }
2664
2665         entry->size = trace.nr_entries;
2666
2667         if (!call_filter_check_discard(call, entry, buffer, event))
2668                 __buffer_unlock_commit(buffer, event);
2669
2670  out:
2671         /* Again, don't let gcc optimize things here */
2672         barrier();
2673         __this_cpu_dec(ftrace_stack_reserve);
2674         preempt_enable_notrace();
2675
2676 }
2677
2678 static inline void ftrace_trace_stack(struct trace_array *tr,
2679                                       struct ring_buffer *buffer,
2680                                       unsigned long flags,
2681                                       int skip, int pc, struct pt_regs *regs)
2682 {
2683         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2684                 return;
2685
2686         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2687 }
2688
2689 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2690                    int pc)
2691 {
2692         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2693
2694         if (rcu_is_watching()) {
2695                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2696                 return;
2697         }
2698
2699         /*
2700          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2701          * but if the above rcu_is_watching() failed, then the NMI
2702          * triggered someplace critical, and rcu_irq_enter() should
2703          * not be called from NMI.
2704          */
2705         if (unlikely(in_nmi()))
2706                 return;
2707
2708         rcu_irq_enter_irqson();
2709         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2710         rcu_irq_exit_irqson();
2711 }
2712
2713 /**
2714  * trace_dump_stack - record a stack back trace in the trace buffer
2715  * @skip: Number of functions to skip (helper handlers)
2716  */
2717 void trace_dump_stack(int skip)
2718 {
2719         unsigned long flags;
2720
2721         if (tracing_disabled || tracing_selftest_running)
2722                 return;
2723
2724         local_save_flags(flags);
2725
2726 #ifndef CONFIG_UNWINDER_ORC
2727         /* Skip 1 to skip this function. */
2728         skip++;
2729 #endif
2730         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2731                              flags, skip, preempt_count(), NULL);
2732 }
2733
2734 static DEFINE_PER_CPU(int, user_stack_count);
2735
2736 void
2737 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2738 {
2739         struct trace_event_call *call = &event_user_stack;
2740         struct ring_buffer_event *event;
2741         struct userstack_entry *entry;
2742         struct stack_trace trace;
2743
2744         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2745                 return;
2746
2747         /*
2748          * NMIs can not handle page faults, even with fix ups.
2749          * The save user stack can (and often does) fault.
2750          */
2751         if (unlikely(in_nmi()))
2752                 return;
2753
2754         /*
2755          * prevent recursion, since the user stack tracing may
2756          * trigger other kernel events.
2757          */
2758         preempt_disable();
2759         if (__this_cpu_read(user_stack_count))
2760                 goto out;
2761
2762         __this_cpu_inc(user_stack_count);
2763
2764         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2765                                             sizeof(*entry), flags, pc);
2766         if (!event)
2767                 goto out_drop_count;
2768         entry   = ring_buffer_event_data(event);
2769
2770         entry->tgid             = current->tgid;
2771         memset(&entry->caller, 0, sizeof(entry->caller));
2772
2773         trace.nr_entries        = 0;
2774         trace.max_entries       = FTRACE_STACK_ENTRIES;
2775         trace.skip              = 0;
2776         trace.entries           = entry->caller;
2777
2778         save_stack_trace_user(&trace);
2779         if (!call_filter_check_discard(call, entry, buffer, event))
2780                 __buffer_unlock_commit(buffer, event);
2781
2782  out_drop_count:
2783         __this_cpu_dec(user_stack_count);
2784  out:
2785         preempt_enable();
2786 }
2787
2788 #ifdef UNUSED
2789 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2790 {
2791         ftrace_trace_userstack(tr, flags, preempt_count());
2792 }
2793 #endif /* UNUSED */
2794
2795 #endif /* CONFIG_STACKTRACE */
2796
2797 /* created for use with alloc_percpu */
2798 struct trace_buffer_struct {
2799         int nesting;
2800         char buffer[4][TRACE_BUF_SIZE];
2801 };
2802
2803 static struct trace_buffer_struct *trace_percpu_buffer;
2804
2805 /*
2806  * Thise allows for lockless recording.  If we're nested too deeply, then
2807  * this returns NULL.
2808  */
2809 static char *get_trace_buf(void)
2810 {
2811         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2812
2813         if (!buffer || buffer->nesting >= 4)
2814                 return NULL;
2815
2816         buffer->nesting++;
2817
2818         /* Interrupts must see nesting incremented before we use the buffer */
2819         barrier();
2820         return &buffer->buffer[buffer->nesting][0];
2821 }
2822
2823 static void put_trace_buf(void)
2824 {
2825         /* Don't let the decrement of nesting leak before this */
2826         barrier();
2827         this_cpu_dec(trace_percpu_buffer->nesting);
2828 }
2829
2830 static int alloc_percpu_trace_buffer(void)
2831 {
2832         struct trace_buffer_struct *buffers;
2833
2834         buffers = alloc_percpu(struct trace_buffer_struct);
2835         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2836                 return -ENOMEM;
2837
2838         trace_percpu_buffer = buffers;
2839         return 0;
2840 }
2841
2842 static int buffers_allocated;
2843
2844 void trace_printk_init_buffers(void)
2845 {
2846         if (buffers_allocated)
2847                 return;
2848
2849         if (alloc_percpu_trace_buffer())
2850                 return;
2851
2852         /* trace_printk() is for debug use only. Don't use it in production. */
2853
2854         pr_warn("\n");
2855         pr_warn("**********************************************************\n");
2856         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2857         pr_warn("**                                                      **\n");
2858         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2859         pr_warn("**                                                      **\n");
2860         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2861         pr_warn("** unsafe for production use.                           **\n");
2862         pr_warn("**                                                      **\n");
2863         pr_warn("** If you see this message and you are not debugging    **\n");
2864         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2865         pr_warn("**                                                      **\n");
2866         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2867         pr_warn("**********************************************************\n");
2868
2869         /* Expand the buffers to set size */
2870         tracing_update_buffers();
2871
2872         buffers_allocated = 1;
2873
2874         /*
2875          * trace_printk_init_buffers() can be called by modules.
2876          * If that happens, then we need to start cmdline recording
2877          * directly here. If the global_trace.buffer is already
2878          * allocated here, then this was called by module code.
2879          */
2880         if (global_trace.trace_buffer.buffer)
2881                 tracing_start_cmdline_record();
2882 }
2883
2884 void trace_printk_start_comm(void)
2885 {
2886         /* Start tracing comms if trace printk is set */
2887         if (!buffers_allocated)
2888                 return;
2889         tracing_start_cmdline_record();
2890 }
2891
2892 static void trace_printk_start_stop_comm(int enabled)
2893 {
2894         if (!buffers_allocated)
2895                 return;
2896
2897         if (enabled)
2898                 tracing_start_cmdline_record();
2899         else
2900                 tracing_stop_cmdline_record();
2901 }
2902
2903 /**
2904  * trace_vbprintk - write binary msg to tracing buffer
2905  *
2906  */
2907 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2908 {
2909         struct trace_event_call *call = &event_bprint;
2910         struct ring_buffer_event *event;
2911         struct ring_buffer *buffer;
2912         struct trace_array *tr = &global_trace;
2913         struct bprint_entry *entry;
2914         unsigned long flags;
2915         char *tbuffer;
2916         int len = 0, size, pc;
2917
2918         if (unlikely(tracing_selftest_running || tracing_disabled))
2919                 return 0;
2920
2921         /* Don't pollute graph traces with trace_vprintk internals */
2922         pause_graph_tracing();
2923
2924         pc = preempt_count();
2925         preempt_disable_notrace();
2926
2927         tbuffer = get_trace_buf();
2928         if (!tbuffer) {
2929                 len = 0;
2930                 goto out_nobuffer;
2931         }
2932
2933         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2934
2935         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2936                 goto out;
2937
2938         local_save_flags(flags);
2939         size = sizeof(*entry) + sizeof(u32) * len;
2940         buffer = tr->trace_buffer.buffer;
2941         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2942                                             flags, pc);
2943         if (!event)
2944                 goto out;
2945         entry = ring_buffer_event_data(event);
2946         entry->ip                       = ip;
2947         entry->fmt                      = fmt;
2948
2949         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2950         if (!call_filter_check_discard(call, entry, buffer, event)) {
2951                 __buffer_unlock_commit(buffer, event);
2952                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2953         }
2954
2955 out:
2956         put_trace_buf();
2957
2958 out_nobuffer:
2959         preempt_enable_notrace();
2960         unpause_graph_tracing();
2961
2962         return len;
2963 }
2964 EXPORT_SYMBOL_GPL(trace_vbprintk);
2965
2966 __printf(3, 0)
2967 static int
2968 __trace_array_vprintk(struct ring_buffer *buffer,
2969                       unsigned long ip, const char *fmt, va_list args)
2970 {
2971         struct trace_event_call *call = &event_print;
2972         struct ring_buffer_event *event;
2973         int len = 0, size, pc;
2974         struct print_entry *entry;
2975         unsigned long flags;
2976         char *tbuffer;
2977
2978         if (tracing_disabled || tracing_selftest_running)
2979                 return 0;
2980
2981         /* Don't pollute graph traces with trace_vprintk internals */
2982         pause_graph_tracing();
2983
2984         pc = preempt_count();
2985         preempt_disable_notrace();
2986
2987
2988         tbuffer = get_trace_buf();
2989         if (!tbuffer) {
2990                 len = 0;
2991                 goto out_nobuffer;
2992         }
2993
2994         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2995
2996         local_save_flags(flags);
2997         size = sizeof(*entry) + len + 1;
2998         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2999                                             flags, pc);
3000         if (!event)
3001                 goto out;
3002         entry = ring_buffer_event_data(event);
3003         entry->ip = ip;
3004
3005         memcpy(&entry->buf, tbuffer, len + 1);
3006         if (!call_filter_check_discard(call, entry, buffer, event)) {
3007                 __buffer_unlock_commit(buffer, event);
3008                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3009         }
3010
3011 out:
3012         put_trace_buf();
3013
3014 out_nobuffer:
3015         preempt_enable_notrace();
3016         unpause_graph_tracing();
3017
3018         return len;
3019 }
3020
3021 __printf(3, 0)
3022 int trace_array_vprintk(struct trace_array *tr,
3023                         unsigned long ip, const char *fmt, va_list args)
3024 {
3025         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3026 }
3027
3028 __printf(3, 0)
3029 int trace_array_printk(struct trace_array *tr,
3030                        unsigned long ip, const char *fmt, ...)
3031 {
3032         int ret;
3033         va_list ap;
3034
3035         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3036                 return 0;
3037
3038         va_start(ap, fmt);
3039         ret = trace_array_vprintk(tr, ip, fmt, ap);
3040         va_end(ap);
3041         return ret;
3042 }
3043
3044 __printf(3, 4)
3045 int trace_array_printk_buf(struct ring_buffer *buffer,
3046                            unsigned long ip, const char *fmt, ...)
3047 {
3048         int ret;
3049         va_list ap;
3050
3051         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3052                 return 0;
3053
3054         va_start(ap, fmt);
3055         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3056         va_end(ap);
3057         return ret;
3058 }
3059
3060 __printf(2, 0)
3061 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3062 {
3063         return trace_array_vprintk(&global_trace, ip, fmt, args);
3064 }
3065 EXPORT_SYMBOL_GPL(trace_vprintk);
3066
3067 static void trace_iterator_increment(struct trace_iterator *iter)
3068 {
3069         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3070
3071         iter->idx++;
3072         if (buf_iter)
3073                 ring_buffer_read(buf_iter, NULL);
3074 }
3075
3076 static struct trace_entry *
3077 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3078                 unsigned long *lost_events)
3079 {
3080         struct ring_buffer_event *event;
3081         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3082
3083         if (buf_iter)
3084                 event = ring_buffer_iter_peek(buf_iter, ts);
3085         else
3086                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3087                                          lost_events);
3088
3089         if (event) {
3090                 iter->ent_size = ring_buffer_event_length(event);
3091                 return ring_buffer_event_data(event);
3092         }
3093         iter->ent_size = 0;
3094         return NULL;
3095 }
3096
3097 static struct trace_entry *
3098 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3099                   unsigned long *missing_events, u64 *ent_ts)
3100 {
3101         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3102         struct trace_entry *ent, *next = NULL;
3103         unsigned long lost_events = 0, next_lost = 0;
3104         int cpu_file = iter->cpu_file;
3105         u64 next_ts = 0, ts;
3106         int next_cpu = -1;
3107         int next_size = 0;
3108         int cpu;
3109
3110         /*
3111          * If we are in a per_cpu trace file, don't bother by iterating over
3112          * all cpu and peek directly.
3113          */
3114         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3115                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3116                         return NULL;
3117                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3118                 if (ent_cpu)
3119                         *ent_cpu = cpu_file;
3120
3121                 return ent;
3122         }
3123
3124         for_each_tracing_cpu(cpu) {
3125
3126                 if (ring_buffer_empty_cpu(buffer, cpu))
3127                         continue;
3128
3129                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3130
3131                 /*
3132                  * Pick the entry with the smallest timestamp:
3133                  */
3134                 if (ent && (!next || ts < next_ts)) {
3135                         next = ent;
3136                         next_cpu = cpu;
3137                         next_ts = ts;
3138                         next_lost = lost_events;
3139                         next_size = iter->ent_size;
3140                 }
3141         }
3142
3143         iter->ent_size = next_size;
3144
3145         if (ent_cpu)
3146                 *ent_cpu = next_cpu;
3147
3148         if (ent_ts)
3149                 *ent_ts = next_ts;
3150
3151         if (missing_events)
3152                 *missing_events = next_lost;
3153
3154         return next;
3155 }
3156
3157 /* Find the next real entry, without updating the iterator itself */
3158 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3159                                           int *ent_cpu, u64 *ent_ts)
3160 {
3161         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3162 }
3163
3164 /* Find the next real entry, and increment the iterator to the next entry */
3165 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3166 {
3167         iter->ent = __find_next_entry(iter, &iter->cpu,
3168                                       &iter->lost_events, &iter->ts);
3169
3170         if (iter->ent)
3171                 trace_iterator_increment(iter);
3172
3173         return iter->ent ? iter : NULL;
3174 }
3175
3176 static void trace_consume(struct trace_iterator *iter)
3177 {
3178         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3179                             &iter->lost_events);
3180 }
3181
3182 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3183 {
3184         struct trace_iterator *iter = m->private;
3185         int i = (int)*pos;
3186         void *ent;
3187
3188         WARN_ON_ONCE(iter->leftover);
3189
3190         (*pos)++;
3191
3192         /* can't go backwards */
3193         if (iter->idx > i)
3194                 return NULL;
3195
3196         if (iter->idx < 0)
3197                 ent = trace_find_next_entry_inc(iter);
3198         else
3199                 ent = iter;
3200
3201         while (ent && iter->idx < i)
3202                 ent = trace_find_next_entry_inc(iter);
3203
3204         iter->pos = *pos;
3205
3206         return ent;
3207 }
3208
3209 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3210 {
3211         struct ring_buffer_event *event;
3212         struct ring_buffer_iter *buf_iter;
3213         unsigned long entries = 0;
3214         u64 ts;
3215
3216         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3217
3218         buf_iter = trace_buffer_iter(iter, cpu);
3219         if (!buf_iter)
3220                 return;
3221
3222         ring_buffer_iter_reset(buf_iter);
3223
3224         /*
3225          * We could have the case with the max latency tracers
3226          * that a reset never took place on a cpu. This is evident
3227          * by the timestamp being before the start of the buffer.
3228          */
3229         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3230                 if (ts >= iter->trace_buffer->time_start)
3231                         break;
3232                 entries++;
3233                 ring_buffer_read(buf_iter, NULL);
3234         }
3235
3236         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3237 }
3238
3239 /*
3240  * The current tracer is copied to avoid a global locking
3241  * all around.
3242  */
3243 static void *s_start(struct seq_file *m, loff_t *pos)
3244 {
3245         struct trace_iterator *iter = m->private;
3246         struct trace_array *tr = iter->tr;
3247         int cpu_file = iter->cpu_file;
3248         void *p = NULL;
3249         loff_t l = 0;
3250         int cpu;
3251
3252         /*
3253          * copy the tracer to avoid using a global lock all around.
3254          * iter->trace is a copy of current_trace, the pointer to the
3255          * name may be used instead of a strcmp(), as iter->trace->name
3256          * will point to the same string as current_trace->name.
3257          */
3258         mutex_lock(&trace_types_lock);
3259         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3260                 *iter->trace = *tr->current_trace;
3261         mutex_unlock(&trace_types_lock);
3262
3263 #ifdef CONFIG_TRACER_MAX_TRACE
3264         if (iter->snapshot && iter->trace->use_max_tr)
3265                 return ERR_PTR(-EBUSY);
3266 #endif
3267
3268         if (!iter->snapshot)
3269                 atomic_inc(&trace_record_taskinfo_disabled);
3270
3271         if (*pos != iter->pos) {
3272                 iter->ent = NULL;
3273                 iter->cpu = 0;
3274                 iter->idx = -1;
3275
3276                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3277                         for_each_tracing_cpu(cpu)
3278                                 tracing_iter_reset(iter, cpu);
3279                 } else
3280                         tracing_iter_reset(iter, cpu_file);
3281
3282                 iter->leftover = 0;
3283                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3284                         ;
3285
3286         } else {
3287                 /*
3288                  * If we overflowed the seq_file before, then we want
3289                  * to just reuse the trace_seq buffer again.
3290                  */
3291                 if (iter->leftover)
3292                         p = iter;
3293                 else {
3294                         l = *pos - 1;
3295                         p = s_next(m, p, &l);
3296                 }
3297         }
3298
3299         trace_event_read_lock();
3300         trace_access_lock(cpu_file);
3301         return p;
3302 }
3303
3304 static void s_stop(struct seq_file *m, void *p)
3305 {
3306         struct trace_iterator *iter = m->private;
3307
3308 #ifdef CONFIG_TRACER_MAX_TRACE
3309         if (iter->snapshot && iter->trace->use_max_tr)
3310                 return;
3311 #endif
3312
3313         if (!iter->snapshot)
3314                 atomic_dec(&trace_record_taskinfo_disabled);
3315
3316         trace_access_unlock(iter->cpu_file);
3317         trace_event_read_unlock();
3318 }
3319
3320 static void
3321 get_total_entries(struct trace_buffer *buf,
3322                   unsigned long *total, unsigned long *entries)
3323 {
3324         unsigned long count;
3325         int cpu;
3326
3327         *total = 0;
3328         *entries = 0;
3329
3330         for_each_tracing_cpu(cpu) {
3331                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3332                 /*
3333                  * If this buffer has skipped entries, then we hold all
3334                  * entries for the trace and we need to ignore the
3335                  * ones before the time stamp.
3336                  */
3337                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3338                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3339                         /* total is the same as the entries */
3340                         *total += count;
3341                 } else
3342                         *total += count +
3343                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3344                 *entries += count;
3345         }
3346 }
3347
3348 static void print_lat_help_header(struct seq_file *m)
3349 {
3350         seq_puts(m, "#                  _------=> CPU#            \n"
3351                     "#                 / _-----=> irqs-off        \n"
3352                     "#                | / _----=> need-resched    \n"
3353                     "#                || / _---=> hardirq/softirq \n"
3354                     "#                ||| / _--=> preempt-depth   \n"
3355                     "#                |||| /     delay            \n"
3356                     "#  cmd     pid   ||||| time  |   caller      \n"
3357                     "#     \\   /      |||||  \\    |   /         \n");
3358 }
3359
3360 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3361 {
3362         unsigned long total;
3363         unsigned long entries;
3364
3365         get_total_entries(buf, &total, &entries);
3366         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3367                    entries, total, num_online_cpus());
3368         seq_puts(m, "#\n");
3369 }
3370
3371 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3372                                    unsigned int flags)
3373 {
3374         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3375
3376         print_event_info(buf, m);
3377
3378         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3379         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3380 }
3381
3382 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3383                                        unsigned int flags)
3384 {
3385         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3386         const char tgid_space[] = "          ";
3387         const char space[] = "  ";
3388
3389         print_event_info(buf, m);
3390
3391         seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3392                    tgid ? tgid_space : space);
3393         seq_printf(m, "#                          %s / _----=> need-resched\n",
3394                    tgid ? tgid_space : space);
3395         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3396                    tgid ? tgid_space : space);
3397         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3398                    tgid ? tgid_space : space);
3399         seq_printf(m, "#                          %s||| /     delay\n",
3400                    tgid ? tgid_space : space);
3401         seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3402                    tgid ? "   TGID   " : space);
3403         seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3404                    tgid ? "     |    " : space);
3405 }
3406
3407 void
3408 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3409 {
3410         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3411         struct trace_buffer *buf = iter->trace_buffer;
3412         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3413         struct tracer *type = iter->trace;
3414         unsigned long entries;
3415         unsigned long total;
3416         const char *name = "preemption";
3417
3418         name = type->name;
3419
3420         get_total_entries(buf, &total, &entries);
3421
3422         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3423                    name, UTS_RELEASE);
3424         seq_puts(m, "# -----------------------------------"
3425                  "---------------------------------\n");
3426         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3427                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3428                    nsecs_to_usecs(data->saved_latency),
3429                    entries,
3430                    total,
3431                    buf->cpu,
3432 #if defined(CONFIG_PREEMPT_NONE)
3433                    "server",
3434 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3435                    "desktop",
3436 #elif defined(CONFIG_PREEMPT)
3437                    "preempt",
3438 #else
3439                    "unknown",
3440 #endif
3441                    /* These are reserved for later use */
3442                    0, 0, 0, 0);
3443 #ifdef CONFIG_SMP
3444         seq_printf(m, " #P:%d)\n", num_online_cpus());
3445 #else
3446         seq_puts(m, ")\n");
3447 #endif
3448         seq_puts(m, "#    -----------------\n");
3449         seq_printf(m, "#    | task: %.16s-%d "
3450                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3451                    data->comm, data->pid,
3452                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3453                    data->policy, data->rt_priority);
3454         seq_puts(m, "#    -----------------\n");
3455
3456         if (data->critical_start) {
3457                 seq_puts(m, "#  => started at: ");
3458                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3459                 trace_print_seq(m, &iter->seq);
3460                 seq_puts(m, "\n#  => ended at:   ");
3461                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3462                 trace_print_seq(m, &iter->seq);
3463                 seq_puts(m, "\n#\n");
3464         }
3465
3466         seq_puts(m, "#\n");
3467 }
3468
3469 static void test_cpu_buff_start(struct trace_iterator *iter)
3470 {
3471         struct trace_seq *s = &iter->seq;
3472         struct trace_array *tr = iter->tr;
3473
3474         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3475                 return;
3476
3477         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3478                 return;
3479
3480         if (cpumask_available(iter->started) &&
3481             cpumask_test_cpu(iter->cpu, iter->started))
3482                 return;
3483
3484         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3485                 return;
3486
3487         if (cpumask_available(iter->started))
3488                 cpumask_set_cpu(iter->cpu, iter->started);
3489
3490         /* Don't print started cpu buffer for the first entry of the trace */
3491         if (iter->idx > 1)
3492                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3493                                 iter->cpu);
3494 }
3495
3496 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3497 {
3498         struct trace_array *tr = iter->tr;
3499         struct trace_seq *s = &iter->seq;
3500         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3501         struct trace_entry *entry;
3502         struct trace_event *event;
3503
3504         entry = iter->ent;
3505
3506         test_cpu_buff_start(iter);
3507
3508         event = ftrace_find_event(entry->type);
3509
3510         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3511                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3512                         trace_print_lat_context(iter);
3513                 else
3514                         trace_print_context(iter);
3515         }
3516
3517         if (trace_seq_has_overflowed(s))
3518                 return TRACE_TYPE_PARTIAL_LINE;
3519
3520         if (event)
3521                 return event->funcs->trace(iter, sym_flags, event);
3522
3523         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3524
3525         return trace_handle_return(s);
3526 }
3527
3528 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3529 {
3530         struct trace_array *tr = iter->tr;
3531         struct trace_seq *s = &iter->seq;
3532         struct trace_entry *entry;
3533         struct trace_event *event;
3534
3535         entry = iter->ent;
3536
3537         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3538                 trace_seq_printf(s, "%d %d %llu ",
3539                                  entry->pid, iter->cpu, iter->ts);
3540
3541         if (trace_seq_has_overflowed(s))
3542                 return TRACE_TYPE_PARTIAL_LINE;
3543
3544         event = ftrace_find_event(entry->type);
3545         if (event)
3546                 return event->funcs->raw(iter, 0, event);
3547
3548         trace_seq_printf(s, "%d ?\n", entry->type);
3549
3550         return trace_handle_return(s);
3551 }
3552
3553 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3554 {
3555         struct trace_array *tr = iter->tr;
3556         struct trace_seq *s = &iter->seq;
3557         unsigned char newline = '\n';
3558         struct trace_entry *entry;
3559         struct trace_event *event;
3560
3561         entry = iter->ent;
3562
3563         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3564                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3565                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3566                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3567                 if (trace_seq_has_overflowed(s))
3568                         return TRACE_TYPE_PARTIAL_LINE;
3569         }
3570
3571         event = ftrace_find_event(entry->type);
3572         if (event) {
3573                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3574                 if (ret != TRACE_TYPE_HANDLED)
3575                         return ret;
3576         }
3577
3578         SEQ_PUT_FIELD(s, newline);
3579
3580         return trace_handle_return(s);
3581 }
3582
3583 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3584 {
3585         struct trace_array *tr = iter->tr;
3586         struct trace_seq *s = &iter->seq;
3587         struct trace_entry *entry;
3588         struct trace_event *event;
3589
3590         entry = iter->ent;
3591
3592         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3593                 SEQ_PUT_FIELD(s, entry->pid);
3594                 SEQ_PUT_FIELD(s, iter->cpu);
3595                 SEQ_PUT_FIELD(s, iter->ts);
3596                 if (trace_seq_has_overflowed(s))
3597                         return TRACE_TYPE_PARTIAL_LINE;
3598         }
3599
3600         event = ftrace_find_event(entry->type);
3601         return event ? event->funcs->binary(iter, 0, event) :
3602                 TRACE_TYPE_HANDLED;
3603 }
3604
3605 int trace_empty(struct trace_iterator *iter)
3606 {
3607         struct ring_buffer_iter *buf_iter;
3608         int cpu;
3609
3610         /* If we are looking at one CPU buffer, only check that one */
3611         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3612                 cpu = iter->cpu_file;
3613                 buf_iter = trace_buffer_iter(iter, cpu);
3614                 if (buf_iter) {
3615                         if (!ring_buffer_iter_empty(buf_iter))
3616                                 return 0;
3617                 } else {
3618                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3619                                 return 0;
3620                 }
3621                 return 1;
3622         }
3623
3624         for_each_tracing_cpu(cpu) {
3625                 buf_iter = trace_buffer_iter(iter, cpu);
3626                 if (buf_iter) {
3627                         if (!ring_buffer_iter_empty(buf_iter))
3628                                 return 0;
3629                 } else {
3630                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3631                                 return 0;
3632                 }
3633         }
3634
3635         return 1;
3636 }
3637
3638 /*  Called with trace_event_read_lock() held. */
3639 enum print_line_t print_trace_line(struct trace_iterator *iter)
3640 {
3641         struct trace_array *tr = iter->tr;
3642         unsigned long trace_flags = tr->trace_flags;
3643         enum print_line_t ret;
3644
3645         if (iter->lost_events) {
3646                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3647                                  iter->cpu, iter->lost_events);
3648                 if (trace_seq_has_overflowed(&iter->seq))
3649                         return TRACE_TYPE_PARTIAL_LINE;
3650         }
3651
3652         if (iter->trace && iter->trace->print_line) {
3653                 ret = iter->trace->print_line(iter);
3654                 if (ret != TRACE_TYPE_UNHANDLED)
3655                         return ret;
3656         }
3657
3658         if (iter->ent->type == TRACE_BPUTS &&
3659                         trace_flags & TRACE_ITER_PRINTK &&
3660                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3661                 return trace_print_bputs_msg_only(iter);
3662
3663         if (iter->ent->type == TRACE_BPRINT &&
3664                         trace_flags & TRACE_ITER_PRINTK &&
3665                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3666                 return trace_print_bprintk_msg_only(iter);
3667
3668         if (iter->ent->type == TRACE_PRINT &&
3669                         trace_flags & TRACE_ITER_PRINTK &&
3670                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3671                 return trace_print_printk_msg_only(iter);
3672
3673         if (trace_flags & TRACE_ITER_BIN)
3674                 return print_bin_fmt(iter);
3675
3676         if (trace_flags & TRACE_ITER_HEX)
3677                 return print_hex_fmt(iter);
3678
3679         if (trace_flags & TRACE_ITER_RAW)
3680                 return print_raw_fmt(iter);
3681
3682         return print_trace_fmt(iter);
3683 }
3684
3685 void trace_latency_header(struct seq_file *m)
3686 {
3687         struct trace_iterator *iter = m->private;
3688         struct trace_array *tr = iter->tr;
3689
3690         /* print nothing if the buffers are empty */
3691         if (trace_empty(iter))
3692                 return;
3693
3694         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3695                 print_trace_header(m, iter);
3696
3697         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3698                 print_lat_help_header(m);
3699 }
3700
3701 void trace_default_header(struct seq_file *m)
3702 {
3703         struct trace_iterator *iter = m->private;
3704         struct trace_array *tr = iter->tr;
3705         unsigned long trace_flags = tr->trace_flags;
3706
3707         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3708                 return;
3709
3710         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3711                 /* print nothing if the buffers are empty */
3712                 if (trace_empty(iter))
3713                         return;
3714                 print_trace_header(m, iter);
3715                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3716                         print_lat_help_header(m);
3717         } else {
3718                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3719                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3720                                 print_func_help_header_irq(iter->trace_buffer,
3721                                                            m, trace_flags);
3722                         else
3723                                 print_func_help_header(iter->trace_buffer, m,
3724                                                        trace_flags);
3725                 }
3726         }
3727 }
3728
3729 static void test_ftrace_alive(struct seq_file *m)
3730 {
3731         if (!ftrace_is_dead())
3732                 return;
3733         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3734                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3735 }
3736
3737 #ifdef CONFIG_TRACER_MAX_TRACE
3738 static void show_snapshot_main_help(struct seq_file *m)
3739 {
3740         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3741                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3742                     "#                      Takes a snapshot of the main buffer.\n"
3743                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3744                     "#                      (Doesn't have to be '2' works with any number that\n"
3745                     "#                       is not a '0' or '1')\n");
3746 }
3747
3748 static void show_snapshot_percpu_help(struct seq_file *m)
3749 {
3750         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3751 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3752         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3753                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3754 #else
3755         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3756                     "#                     Must use main snapshot file to allocate.\n");
3757 #endif
3758         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3759                     "#                      (Doesn't have to be '2' works with any number that\n"
3760                     "#                       is not a '0' or '1')\n");
3761 }
3762
3763 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3764 {
3765         if (iter->tr->allocated_snapshot)
3766                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3767         else
3768                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3769
3770         seq_puts(m, "# Snapshot commands:\n");
3771         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3772                 show_snapshot_main_help(m);
3773         else
3774                 show_snapshot_percpu_help(m);
3775 }
3776 #else
3777 /* Should never be called */
3778 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3779 #endif
3780
3781 static int s_show(struct seq_file *m, void *v)
3782 {
3783         struct trace_iterator *iter = v;
3784         int ret;
3785
3786         if (iter->ent == NULL) {
3787                 if (iter->tr) {
3788                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3789                         seq_puts(m, "#\n");
3790                         test_ftrace_alive(m);
3791                 }
3792                 if (iter->snapshot && trace_empty(iter))
3793                         print_snapshot_help(m, iter);
3794                 else if (iter->trace && iter->trace->print_header)
3795                         iter->trace->print_header(m);
3796                 else
3797                         trace_default_header(m);
3798
3799         } else if (iter->leftover) {
3800                 /*
3801                  * If we filled the seq_file buffer earlier, we
3802                  * want to just show it now.
3803                  */
3804                 ret = trace_print_seq(m, &iter->seq);
3805
3806                 /* ret should this time be zero, but you never know */
3807                 iter->leftover = ret;
3808
3809         } else {
3810                 print_trace_line(iter);
3811                 ret = trace_print_seq(m, &iter->seq);
3812                 /*
3813                  * If we overflow the seq_file buffer, then it will
3814                  * ask us for this data again at start up.
3815                  * Use that instead.
3816                  *  ret is 0 if seq_file write succeeded.
3817                  *        -1 otherwise.
3818                  */
3819                 iter->leftover = ret;
3820         }
3821
3822         return 0;
3823 }
3824
3825 /*
3826  * Should be used after trace_array_get(), trace_types_lock
3827  * ensures that i_cdev was already initialized.
3828  */
3829 static inline int tracing_get_cpu(struct inode *inode)
3830 {
3831         if (inode->i_cdev) /* See trace_create_cpu_file() */
3832                 return (long)inode->i_cdev - 1;
3833         return RING_BUFFER_ALL_CPUS;
3834 }
3835
3836 static const struct seq_operations tracer_seq_ops = {
3837         .start          = s_start,
3838         .next           = s_next,
3839         .stop           = s_stop,
3840         .show           = s_show,
3841 };
3842
3843 static struct trace_iterator *
3844 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3845 {
3846         struct trace_array *tr = inode->i_private;
3847         struct trace_iterator *iter;
3848         int cpu;
3849
3850         if (tracing_disabled)
3851                 return ERR_PTR(-ENODEV);
3852
3853         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3854         if (!iter)
3855                 return ERR_PTR(-ENOMEM);
3856
3857         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3858                                     GFP_KERNEL);
3859         if (!iter->buffer_iter)
3860                 goto release;
3861
3862         /*
3863          * We make a copy of the current tracer to avoid concurrent
3864          * changes on it while we are reading.
3865          */
3866         mutex_lock(&trace_types_lock);
3867         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3868         if (!iter->trace)
3869                 goto fail;
3870
3871         *iter->trace = *tr->current_trace;
3872
3873         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3874                 goto fail;
3875
3876         iter->tr = tr;
3877
3878 #ifdef CONFIG_TRACER_MAX_TRACE
3879         /* Currently only the top directory has a snapshot */
3880         if (tr->current_trace->print_max || snapshot)
3881                 iter->trace_buffer = &tr->max_buffer;
3882         else
3883 #endif
3884                 iter->trace_buffer = &tr->trace_buffer;
3885         iter->snapshot = snapshot;
3886         iter->pos = -1;
3887         iter->cpu_file = tracing_get_cpu(inode);
3888         mutex_init(&iter->mutex);
3889
3890         /* Notify the tracer early; before we stop tracing. */
3891         if (iter->trace && iter->trace->open)
3892                 iter->trace->open(iter);
3893
3894         /* Annotate start of buffers if we had overruns */
3895         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3896                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3897
3898         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3899         if (trace_clocks[tr->clock_id].in_ns)
3900                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3901
3902         /* stop the trace while dumping if we are not opening "snapshot" */
3903         if (!iter->snapshot)
3904                 tracing_stop_tr(tr);
3905
3906         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3907                 for_each_tracing_cpu(cpu) {
3908                         iter->buffer_iter[cpu] =
3909                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
3910                                                          cpu, GFP_KERNEL);
3911                 }
3912                 ring_buffer_read_prepare_sync();
3913                 for_each_tracing_cpu(cpu) {
3914                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3915                         tracing_iter_reset(iter, cpu);
3916                 }
3917         } else {
3918                 cpu = iter->cpu_file;
3919                 iter->buffer_iter[cpu] =
3920                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
3921                                                  cpu, GFP_KERNEL);
3922                 ring_buffer_read_prepare_sync();
3923                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3924                 tracing_iter_reset(iter, cpu);
3925         }
3926
3927         mutex_unlock(&trace_types_lock);
3928
3929         return iter;
3930
3931  fail:
3932         mutex_unlock(&trace_types_lock);
3933         kfree(iter->trace);
3934         kfree(iter->buffer_iter);
3935 release:
3936         seq_release_private(inode, file);
3937         return ERR_PTR(-ENOMEM);
3938 }
3939
3940 int tracing_open_generic(struct inode *inode, struct file *filp)
3941 {
3942         if (tracing_disabled)
3943                 return -ENODEV;
3944
3945         filp->private_data = inode->i_private;
3946         return 0;
3947 }
3948
3949 bool tracing_is_disabled(void)
3950 {
3951         return (tracing_disabled) ? true: false;
3952 }
3953
3954 /*
3955  * Open and update trace_array ref count.
3956  * Must have the current trace_array passed to it.
3957  */
3958 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3959 {
3960         struct trace_array *tr = inode->i_private;
3961
3962         if (tracing_disabled)
3963                 return -ENODEV;
3964
3965         if (trace_array_get(tr) < 0)
3966                 return -ENODEV;
3967
3968         filp->private_data = inode->i_private;
3969
3970         return 0;
3971 }
3972
3973 static int tracing_release(struct inode *inode, struct file *file)
3974 {
3975         struct trace_array *tr = inode->i_private;
3976         struct seq_file *m = file->private_data;
3977         struct trace_iterator *iter;
3978         int cpu;
3979
3980         if (!(file->f_mode & FMODE_READ)) {
3981                 trace_array_put(tr);
3982                 return 0;
3983         }
3984
3985         /* Writes do not use seq_file */
3986         iter = m->private;
3987         mutex_lock(&trace_types_lock);
3988
3989         for_each_tracing_cpu(cpu) {
3990                 if (iter->buffer_iter[cpu])
3991                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3992         }
3993
3994         if (iter->trace && iter->trace->close)
3995                 iter->trace->close(iter);
3996
3997         if (!iter->snapshot)
3998                 /* reenable tracing if it was previously enabled */
3999                 tracing_start_tr(tr);
4000
4001         __trace_array_put(tr);
4002
4003         mutex_unlock(&trace_types_lock);
4004
4005         mutex_destroy(&iter->mutex);
4006         free_cpumask_var(iter->started);
4007         kfree(iter->trace);
4008         kfree(iter->buffer_iter);
4009         seq_release_private(inode, file);
4010
4011         return 0;
4012 }
4013
4014 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4015 {
4016         struct trace_array *tr = inode->i_private;
4017
4018         trace_array_put(tr);
4019         return 0;
4020 }
4021
4022 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4023 {
4024         struct trace_array *tr = inode->i_private;
4025
4026         trace_array_put(tr);
4027
4028         return single_release(inode, file);
4029 }
4030
4031 static int tracing_open(struct inode *inode, struct file *file)
4032 {
4033         struct trace_array *tr = inode->i_private;
4034         struct trace_iterator *iter;
4035         int ret = 0;
4036
4037         if (trace_array_get(tr) < 0)
4038                 return -ENODEV;
4039
4040         /* If this file was open for write, then erase contents */
4041         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4042                 int cpu = tracing_get_cpu(inode);
4043                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4044
4045 #ifdef CONFIG_TRACER_MAX_TRACE
4046                 if (tr->current_trace->print_max)
4047                         trace_buf = &tr->max_buffer;
4048 #endif
4049
4050                 if (cpu == RING_BUFFER_ALL_CPUS)
4051                         tracing_reset_online_cpus(trace_buf);
4052                 else
4053                         tracing_reset(trace_buf, cpu);
4054         }
4055
4056         if (file->f_mode & FMODE_READ) {
4057                 iter = __tracing_open(inode, file, false);
4058                 if (IS_ERR(iter))
4059                         ret = PTR_ERR(iter);
4060                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4061                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4062         }
4063
4064         if (ret < 0)
4065                 trace_array_put(tr);
4066
4067         return ret;
4068 }
4069
4070 /*
4071  * Some tracers are not suitable for instance buffers.
4072  * A tracer is always available for the global array (toplevel)
4073  * or if it explicitly states that it is.
4074  */
4075 static bool
4076 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4077 {
4078         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4079 }
4080
4081 /* Find the next tracer that this trace array may use */
4082 static struct tracer *
4083 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4084 {
4085         while (t && !trace_ok_for_array(t, tr))
4086                 t = t->next;
4087
4088         return t;
4089 }
4090
4091 static void *
4092 t_next(struct seq_file *m, void *v, loff_t *pos)
4093 {
4094         struct trace_array *tr = m->private;
4095         struct tracer *t = v;
4096
4097         (*pos)++;
4098
4099         if (t)
4100                 t = get_tracer_for_array(tr, t->next);
4101
4102         return t;
4103 }
4104
4105 static void *t_start(struct seq_file *m, loff_t *pos)
4106 {
4107         struct trace_array *tr = m->private;
4108         struct tracer *t;
4109         loff_t l = 0;
4110
4111         mutex_lock(&trace_types_lock);
4112
4113         t = get_tracer_for_array(tr, trace_types);
4114         for (; t && l < *pos; t = t_next(m, t, &l))
4115                         ;
4116
4117         return t;
4118 }
4119
4120 static void t_stop(struct seq_file *m, void *p)
4121 {
4122         mutex_unlock(&trace_types_lock);
4123 }
4124
4125 static int t_show(struct seq_file *m, void *v)
4126 {
4127         struct tracer *t = v;
4128
4129         if (!t)
4130                 return 0;
4131
4132         seq_puts(m, t->name);
4133         if (t->next)
4134                 seq_putc(m, ' ');
4135         else
4136                 seq_putc(m, '\n');
4137
4138         return 0;
4139 }
4140
4141 static const struct seq_operations show_traces_seq_ops = {
4142         .start          = t_start,
4143         .next           = t_next,
4144         .stop           = t_stop,
4145         .show           = t_show,
4146 };
4147
4148 static int show_traces_open(struct inode *inode, struct file *file)
4149 {
4150         struct trace_array *tr = inode->i_private;
4151         struct seq_file *m;
4152         int ret;
4153
4154         if (tracing_disabled)
4155                 return -ENODEV;
4156
4157         ret = seq_open(file, &show_traces_seq_ops);
4158         if (ret)
4159                 return ret;
4160
4161         m = file->private_data;
4162         m->private = tr;
4163
4164         return 0;
4165 }
4166
4167 static ssize_t
4168 tracing_write_stub(struct file *filp, const char __user *ubuf,
4169                    size_t count, loff_t *ppos)
4170 {
4171         return count;
4172 }
4173
4174 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4175 {
4176         int ret;
4177
4178         if (file->f_mode & FMODE_READ)
4179                 ret = seq_lseek(file, offset, whence);
4180         else
4181                 file->f_pos = ret = 0;
4182
4183         return ret;
4184 }
4185
4186 static const struct file_operations tracing_fops = {
4187         .open           = tracing_open,
4188         .read           = seq_read,
4189         .write          = tracing_write_stub,
4190         .llseek         = tracing_lseek,
4191         .release        = tracing_release,
4192 };
4193
4194 static const struct file_operations show_traces_fops = {
4195         .open           = show_traces_open,
4196         .read           = seq_read,
4197         .release        = seq_release,
4198         .llseek         = seq_lseek,
4199 };
4200
4201 static ssize_t
4202 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4203                      size_t count, loff_t *ppos)
4204 {
4205         struct trace_array *tr = file_inode(filp)->i_private;
4206         char *mask_str;
4207         int len;
4208
4209         len = snprintf(NULL, 0, "%*pb\n",
4210                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4211         mask_str = kmalloc(len, GFP_KERNEL);
4212         if (!mask_str)
4213                 return -ENOMEM;
4214
4215         len = snprintf(mask_str, len, "%*pb\n",
4216                        cpumask_pr_args(tr->tracing_cpumask));
4217         if (len >= count) {
4218                 count = -EINVAL;
4219                 goto out_err;
4220         }
4221         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4222
4223 out_err:
4224         kfree(mask_str);
4225
4226         return count;
4227 }
4228
4229 static ssize_t
4230 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4231                       size_t count, loff_t *ppos)
4232 {
4233         struct trace_array *tr = file_inode(filp)->i_private;
4234         cpumask_var_t tracing_cpumask_new;
4235         int err, cpu;
4236
4237         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4238                 return -ENOMEM;
4239
4240         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4241         if (err)
4242                 goto err_unlock;
4243
4244         local_irq_disable();
4245         arch_spin_lock(&tr->max_lock);
4246         for_each_tracing_cpu(cpu) {
4247                 /*
4248                  * Increase/decrease the disabled counter if we are
4249                  * about to flip a bit in the cpumask:
4250                  */
4251                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4252                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4253                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4254                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4255                 }
4256                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4257                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4258                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4259                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4260                 }
4261         }
4262         arch_spin_unlock(&tr->max_lock);
4263         local_irq_enable();
4264
4265         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4266         free_cpumask_var(tracing_cpumask_new);
4267
4268         return count;
4269
4270 err_unlock:
4271         free_cpumask_var(tracing_cpumask_new);
4272
4273         return err;
4274 }
4275
4276 static const struct file_operations tracing_cpumask_fops = {
4277         .open           = tracing_open_generic_tr,
4278         .read           = tracing_cpumask_read,
4279         .write          = tracing_cpumask_write,
4280         .release        = tracing_release_generic_tr,
4281         .llseek         = generic_file_llseek,
4282 };
4283
4284 static int tracing_trace_options_show(struct seq_file *m, void *v)
4285 {
4286         struct tracer_opt *trace_opts;
4287         struct trace_array *tr = m->private;
4288         u32 tracer_flags;
4289         int i;
4290
4291         mutex_lock(&trace_types_lock);
4292         tracer_flags = tr->current_trace->flags->val;
4293         trace_opts = tr->current_trace->flags->opts;
4294
4295         for (i = 0; trace_options[i]; i++) {
4296                 if (tr->trace_flags & (1 << i))
4297                         seq_printf(m, "%s\n", trace_options[i]);
4298                 else
4299                         seq_printf(m, "no%s\n", trace_options[i]);
4300         }
4301
4302         for (i = 0; trace_opts[i].name; i++) {
4303                 if (tracer_flags & trace_opts[i].bit)
4304                         seq_printf(m, "%s\n", trace_opts[i].name);
4305                 else
4306                         seq_printf(m, "no%s\n", trace_opts[i].name);
4307         }
4308         mutex_unlock(&trace_types_lock);
4309
4310         return 0;
4311 }
4312
4313 static int __set_tracer_option(struct trace_array *tr,
4314                                struct tracer_flags *tracer_flags,
4315                                struct tracer_opt *opts, int neg)
4316 {
4317         struct tracer *trace = tracer_flags->trace;
4318         int ret;
4319
4320         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4321         if (ret)
4322                 return ret;
4323
4324         if (neg)
4325                 tracer_flags->val &= ~opts->bit;
4326         else
4327                 tracer_flags->val |= opts->bit;
4328         return 0;
4329 }
4330
4331 /* Try to assign a tracer specific option */
4332 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4333 {
4334         struct tracer *trace = tr->current_trace;
4335         struct tracer_flags *tracer_flags = trace->flags;
4336         struct tracer_opt *opts = NULL;
4337         int i;
4338
4339         for (i = 0; tracer_flags->opts[i].name; i++) {
4340                 opts = &tracer_flags->opts[i];
4341
4342                 if (strcmp(cmp, opts->name) == 0)
4343                         return __set_tracer_option(tr, trace->flags, opts, neg);
4344         }
4345
4346         return -EINVAL;
4347 }
4348
4349 /* Some tracers require overwrite to stay enabled */
4350 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4351 {
4352         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4353                 return -1;
4354
4355         return 0;
4356 }
4357
4358 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4359 {
4360         /* do nothing if flag is already set */
4361         if (!!(tr->trace_flags & mask) == !!enabled)
4362                 return 0;
4363
4364         /* Give the tracer a chance to approve the change */
4365         if (tr->current_trace->flag_changed)
4366                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4367                         return -EINVAL;
4368
4369         if (enabled)
4370                 tr->trace_flags |= mask;
4371         else
4372                 tr->trace_flags &= ~mask;
4373
4374         if (mask == TRACE_ITER_RECORD_CMD)
4375                 trace_event_enable_cmd_record(enabled);
4376
4377         if (mask == TRACE_ITER_RECORD_TGID) {
4378                 if (!tgid_map)
4379                         tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4380                                            sizeof(*tgid_map),
4381                                            GFP_KERNEL);
4382                 if (!tgid_map) {
4383                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4384                         return -ENOMEM;
4385                 }
4386
4387                 trace_event_enable_tgid_record(enabled);
4388         }
4389
4390         if (mask == TRACE_ITER_EVENT_FORK)
4391                 trace_event_follow_fork(tr, enabled);
4392
4393         if (mask == TRACE_ITER_FUNC_FORK)
4394                 ftrace_pid_follow_fork(tr, enabled);
4395
4396         if (mask == TRACE_ITER_OVERWRITE) {
4397                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4398 #ifdef CONFIG_TRACER_MAX_TRACE
4399                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4400 #endif
4401         }
4402
4403         if (mask == TRACE_ITER_PRINTK) {
4404                 trace_printk_start_stop_comm(enabled);
4405                 trace_printk_control(enabled);
4406         }
4407
4408         return 0;
4409 }
4410
4411 static int trace_set_options(struct trace_array *tr, char *option)
4412 {
4413         char *cmp;
4414         int neg = 0;
4415         int ret;
4416         size_t orig_len = strlen(option);
4417
4418         cmp = strstrip(option);
4419
4420         if (strncmp(cmp, "no", 2) == 0) {
4421                 neg = 1;
4422                 cmp += 2;
4423         }
4424
4425         mutex_lock(&trace_types_lock);
4426
4427         ret = match_string(trace_options, -1, cmp);
4428         /* If no option could be set, test the specific tracer options */
4429         if (ret < 0)
4430                 ret = set_tracer_option(tr, cmp, neg);
4431         else
4432                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4433
4434         mutex_unlock(&trace_types_lock);
4435
4436         /*
4437          * If the first trailing whitespace is replaced with '\0' by strstrip,
4438          * turn it back into a space.
4439          */
4440         if (orig_len > strlen(option))
4441                 option[strlen(option)] = ' ';
4442
4443         return ret;
4444 }
4445
4446 static void __init apply_trace_boot_options(void)
4447 {
4448         char *buf = trace_boot_options_buf;
4449         char *option;
4450
4451         while (true) {
4452                 option = strsep(&buf, ",");
4453
4454                 if (!option)
4455                         break;
4456
4457                 if (*option)
4458                         trace_set_options(&global_trace, option);
4459
4460                 /* Put back the comma to allow this to be called again */
4461                 if (buf)
4462                         *(buf - 1) = ',';
4463         }
4464 }
4465
4466 static ssize_t
4467 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4468                         size_t cnt, loff_t *ppos)
4469 {
4470         struct seq_file *m = filp->private_data;
4471         struct trace_array *tr = m->private;
4472         char buf[64];
4473         int ret;
4474
4475         if (cnt >= sizeof(buf))
4476                 return -EINVAL;
4477
4478         if (copy_from_user(buf, ubuf, cnt))
4479                 return -EFAULT;
4480
4481         buf[cnt] = 0;
4482
4483         ret = trace_set_options(tr, buf);
4484         if (ret < 0)
4485                 return ret;
4486
4487         *ppos += cnt;
4488
4489         return cnt;
4490 }
4491
4492 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4493 {
4494         struct trace_array *tr = inode->i_private;
4495         int ret;
4496
4497         if (tracing_disabled)
4498                 return -ENODEV;
4499
4500         if (trace_array_get(tr) < 0)
4501                 return -ENODEV;
4502
4503         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4504         if (ret < 0)
4505                 trace_array_put(tr);
4506
4507         return ret;
4508 }
4509
4510 static const struct file_operations tracing_iter_fops = {
4511         .open           = tracing_trace_options_open,
4512         .read           = seq_read,
4513         .llseek         = seq_lseek,
4514         .release        = tracing_single_release_tr,
4515         .write          = tracing_trace_options_write,
4516 };
4517
4518 static const char readme_msg[] =
4519         "tracing mini-HOWTO:\n\n"
4520         "# echo 0 > tracing_on : quick way to disable tracing\n"
4521         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4522         " Important files:\n"
4523         "  trace\t\t\t- The static contents of the buffer\n"
4524         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4525         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4526         "  current_tracer\t- function and latency tracers\n"
4527         "  available_tracers\t- list of configured tracers for current_tracer\n"
4528         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4529         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4530         "  trace_clock\t\t-change the clock used to order events\n"
4531         "       local:   Per cpu clock but may not be synced across CPUs\n"
4532         "      global:   Synced across CPUs but slows tracing down.\n"
4533         "     counter:   Not a clock, but just an increment\n"
4534         "      uptime:   Jiffy counter from time of boot\n"
4535         "        perf:   Same clock that perf events use\n"
4536 #ifdef CONFIG_X86_64
4537         "     x86-tsc:   TSC cycle counter\n"
4538 #endif
4539         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4540         "       delta:   Delta difference against a buffer-wide timestamp\n"
4541         "    absolute:   Absolute (standalone) timestamp\n"
4542         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4543         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4544         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4545         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4546         "\t\t\t  Remove sub-buffer with rmdir\n"
4547         "  trace_options\t\t- Set format or modify how tracing happens\n"
4548         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4549         "\t\t\t  option name\n"
4550         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4551 #ifdef CONFIG_DYNAMIC_FTRACE
4552         "\n  available_filter_functions - list of functions that can be filtered on\n"
4553         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4554         "\t\t\t  functions\n"
4555         "\t     accepts: func_full_name or glob-matching-pattern\n"
4556         "\t     modules: Can select a group via module\n"
4557         "\t      Format: :mod:<module-name>\n"
4558         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4559         "\t    triggers: a command to perform when function is hit\n"
4560         "\t      Format: <function>:<trigger>[:count]\n"
4561         "\t     trigger: traceon, traceoff\n"
4562         "\t\t      enable_event:<system>:<event>\n"
4563         "\t\t      disable_event:<system>:<event>\n"
4564 #ifdef CONFIG_STACKTRACE
4565         "\t\t      stacktrace\n"
4566 #endif
4567 #ifdef CONFIG_TRACER_SNAPSHOT
4568         "\t\t      snapshot\n"
4569 #endif
4570         "\t\t      dump\n"
4571         "\t\t      cpudump\n"
4572         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4573         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4574         "\t     The first one will disable tracing every time do_fault is hit\n"
4575         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4576         "\t       The first time do trap is hit and it disables tracing, the\n"
4577         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4578         "\t       the counter will not decrement. It only decrements when the\n"
4579         "\t       trigger did work\n"
4580         "\t     To remove trigger without count:\n"
4581         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4582         "\t     To remove trigger with a count:\n"
4583         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4584         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4585         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4586         "\t    modules: Can select a group via module command :mod:\n"
4587         "\t    Does not accept triggers\n"
4588 #endif /* CONFIG_DYNAMIC_FTRACE */
4589 #ifdef CONFIG_FUNCTION_TRACER
4590         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4591         "\t\t    (function)\n"
4592 #endif
4593 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4594         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4595         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4596         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4597 #endif
4598 #ifdef CONFIG_TRACER_SNAPSHOT
4599         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4600         "\t\t\t  snapshot buffer. Read the contents for more\n"
4601         "\t\t\t  information\n"
4602 #endif
4603 #ifdef CONFIG_STACK_TRACER
4604         "  stack_trace\t\t- Shows the max stack trace when active\n"
4605         "  stack_max_size\t- Shows current max stack size that was traced\n"
4606         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4607         "\t\t\t  new trace)\n"
4608 #ifdef CONFIG_DYNAMIC_FTRACE
4609         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4610         "\t\t\t  traces\n"
4611 #endif
4612 #endif /* CONFIG_STACK_TRACER */
4613 #ifdef CONFIG_KPROBE_EVENTS
4614         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4615         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4616 #endif
4617 #ifdef CONFIG_UPROBE_EVENTS
4618         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4619         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4620 #endif
4621 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4622         "\t  accepts: event-definitions (one definition per line)\n"
4623         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4624         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4625         "\t           -:[<group>/]<event>\n"
4626 #ifdef CONFIG_KPROBE_EVENTS
4627         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4628   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4629 #endif
4630 #ifdef CONFIG_UPROBE_EVENTS
4631         "\t    place: <path>:<offset>\n"
4632 #endif
4633         "\t     args: <name>=fetcharg[:type]\n"
4634         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4635         "\t           $stack<index>, $stack, $retval, $comm\n"
4636         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4637         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4638 #endif
4639         "  events/\t\t- Directory containing all trace event subsystems:\n"
4640         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4641         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4642         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4643         "\t\t\t  events\n"
4644         "      filter\t\t- If set, only events passing filter are traced\n"
4645         "  events/<system>/<event>/\t- Directory containing control files for\n"
4646         "\t\t\t  <event>:\n"
4647         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4648         "      filter\t\t- If set, only events passing filter are traced\n"
4649         "      trigger\t\t- If set, a command to perform when event is hit\n"
4650         "\t    Format: <trigger>[:count][if <filter>]\n"
4651         "\t   trigger: traceon, traceoff\n"
4652         "\t            enable_event:<system>:<event>\n"
4653         "\t            disable_event:<system>:<event>\n"
4654 #ifdef CONFIG_HIST_TRIGGERS
4655         "\t            enable_hist:<system>:<event>\n"
4656         "\t            disable_hist:<system>:<event>\n"
4657 #endif
4658 #ifdef CONFIG_STACKTRACE
4659         "\t\t    stacktrace\n"
4660 #endif
4661 #ifdef CONFIG_TRACER_SNAPSHOT
4662         "\t\t    snapshot\n"
4663 #endif
4664 #ifdef CONFIG_HIST_TRIGGERS
4665         "\t\t    hist (see below)\n"
4666 #endif
4667         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4668         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4669         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4670         "\t                  events/block/block_unplug/trigger\n"
4671         "\t   The first disables tracing every time block_unplug is hit.\n"
4672         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4673         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4674         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4675         "\t   Like function triggers, the counter is only decremented if it\n"
4676         "\t    enabled or disabled tracing.\n"
4677         "\t   To remove a trigger without a count:\n"
4678         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4679         "\t   To remove a trigger with a count:\n"
4680         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4681         "\t   Filters can be ignored when removing a trigger.\n"
4682 #ifdef CONFIG_HIST_TRIGGERS
4683         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4684         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4685         "\t            [:values=<field1[,field2,...]>]\n"
4686         "\t            [:sort=<field1[,field2,...]>]\n"
4687         "\t            [:size=#entries]\n"
4688         "\t            [:pause][:continue][:clear]\n"
4689         "\t            [:name=histname1]\n"
4690         "\t            [if <filter>]\n\n"
4691         "\t    When a matching event is hit, an entry is added to a hash\n"
4692         "\t    table using the key(s) and value(s) named, and the value of a\n"
4693         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4694         "\t    correspond to fields in the event's format description.  Keys\n"
4695         "\t    can be any field, or the special string 'stacktrace'.\n"
4696         "\t    Compound keys consisting of up to two fields can be specified\n"
4697         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4698         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4699         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4700         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4701         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4702         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4703         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4704         "\t    its histogram data will be shared with other triggers of the\n"
4705         "\t    same name, and trigger hits will update this common data.\n\n"
4706         "\t    Reading the 'hist' file for the event will dump the hash\n"
4707         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4708         "\t    triggers attached to an event, there will be a table for each\n"
4709         "\t    trigger in the output.  The table displayed for a named\n"
4710         "\t    trigger will be the same as any other instance having the\n"
4711         "\t    same name.  The default format used to display a given field\n"
4712         "\t    can be modified by appending any of the following modifiers\n"
4713         "\t    to the field name, as applicable:\n\n"
4714         "\t            .hex        display a number as a hex value\n"
4715         "\t            .sym        display an address as a symbol\n"
4716         "\t            .sym-offset display an address as a symbol and offset\n"
4717         "\t            .execname   display a common_pid as a program name\n"
4718         "\t            .syscall    display a syscall id as a syscall name\n"
4719         "\t            .log2       display log2 value rather than raw number\n"
4720         "\t            .usecs      display a common_timestamp in microseconds\n\n"
4721         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4722         "\t    trigger or to start a hist trigger but not log any events\n"
4723         "\t    until told to do so.  'continue' can be used to start or\n"
4724         "\t    restart a paused hist trigger.\n\n"
4725         "\t    The 'clear' parameter will clear the contents of a running\n"
4726         "\t    hist trigger and leave its current paused/active state\n"
4727         "\t    unchanged.\n\n"
4728         "\t    The enable_hist and disable_hist triggers can be used to\n"
4729         "\t    have one event conditionally start and stop another event's\n"
4730         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4731         "\t    the enable_event and disable_event triggers.\n"
4732 #endif
4733 ;
4734
4735 static ssize_t
4736 tracing_readme_read(struct file *filp, char __user *ubuf,
4737                        size_t cnt, loff_t *ppos)
4738 {
4739         return simple_read_from_buffer(ubuf, cnt, ppos,
4740                                         readme_msg, strlen(readme_msg));
4741 }
4742
4743 static const struct file_operations tracing_readme_fops = {
4744         .open           = tracing_open_generic,
4745         .read           = tracing_readme_read,
4746         .llseek         = generic_file_llseek,
4747 };
4748
4749 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4750 {
4751         int *ptr = v;
4752
4753         if (*pos || m->count)
4754                 ptr++;
4755
4756         (*pos)++;
4757
4758         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4759                 if (trace_find_tgid(*ptr))
4760                         return ptr;
4761         }
4762
4763         return NULL;
4764 }
4765
4766 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4767 {
4768         void *v;
4769         loff_t l = 0;
4770
4771         if (!tgid_map)
4772                 return NULL;
4773
4774         v = &tgid_map[0];
4775         while (l <= *pos) {
4776                 v = saved_tgids_next(m, v, &l);
4777                 if (!v)
4778                         return NULL;
4779         }
4780
4781         return v;
4782 }
4783
4784 static void saved_tgids_stop(struct seq_file *m, void *v)
4785 {
4786 }
4787
4788 static int saved_tgids_show(struct seq_file *m, void *v)
4789 {
4790         int pid = (int *)v - tgid_map;
4791
4792         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4793         return 0;
4794 }
4795
4796 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4797         .start          = saved_tgids_start,
4798         .stop           = saved_tgids_stop,
4799         .next           = saved_tgids_next,
4800         .show           = saved_tgids_show,
4801 };
4802
4803 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4804 {
4805         if (tracing_disabled)
4806                 return -ENODEV;
4807
4808         return seq_open(filp, &tracing_saved_tgids_seq_ops);
4809 }
4810
4811
4812 static const struct file_operations tracing_saved_tgids_fops = {
4813         .open           = tracing_saved_tgids_open,
4814         .read           = seq_read,
4815         .llseek         = seq_lseek,
4816         .release        = seq_release,
4817 };
4818
4819 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4820 {
4821         unsigned int *ptr = v;
4822
4823         if (*pos || m->count)
4824                 ptr++;
4825
4826         (*pos)++;
4827
4828         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4829              ptr++) {
4830                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4831                         continue;
4832
4833                 return ptr;
4834         }
4835
4836         return NULL;
4837 }
4838
4839 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4840 {
4841         void *v;
4842         loff_t l = 0;
4843
4844         preempt_disable();
4845         arch_spin_lock(&trace_cmdline_lock);
4846
4847         v = &savedcmd->map_cmdline_to_pid[0];
4848         while (l <= *pos) {
4849                 v = saved_cmdlines_next(m, v, &l);
4850                 if (!v)
4851                         return NULL;
4852         }
4853
4854         return v;
4855 }
4856
4857 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4858 {
4859         arch_spin_unlock(&trace_cmdline_lock);
4860         preempt_enable();
4861 }
4862
4863 static int saved_cmdlines_show(struct seq_file *m, void *v)
4864 {
4865         char buf[TASK_COMM_LEN];
4866         unsigned int *pid = v;
4867
4868         __trace_find_cmdline(*pid, buf);
4869         seq_printf(m, "%d %s\n", *pid, buf);
4870         return 0;
4871 }
4872
4873 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4874         .start          = saved_cmdlines_start,
4875         .next           = saved_cmdlines_next,
4876         .stop           = saved_cmdlines_stop,
4877         .show           = saved_cmdlines_show,
4878 };
4879
4880 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4881 {
4882         if (tracing_disabled)
4883                 return -ENODEV;
4884
4885         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4886 }
4887
4888 static const struct file_operations tracing_saved_cmdlines_fops = {
4889         .open           = tracing_saved_cmdlines_open,
4890         .read           = seq_read,
4891         .llseek         = seq_lseek,
4892         .release        = seq_release,
4893 };
4894
4895 static ssize_t
4896 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4897                                  size_t cnt, loff_t *ppos)
4898 {
4899         char buf[64];
4900         int r;
4901
4902         arch_spin_lock(&trace_cmdline_lock);
4903         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4904         arch_spin_unlock(&trace_cmdline_lock);
4905
4906         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4907 }
4908
4909 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4910 {
4911         kfree(s->saved_cmdlines);
4912         kfree(s->map_cmdline_to_pid);
4913         kfree(s);
4914 }
4915
4916 static int tracing_resize_saved_cmdlines(unsigned int val)
4917 {
4918         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4919
4920         s = kmalloc(sizeof(*s), GFP_KERNEL);
4921         if (!s)
4922                 return -ENOMEM;
4923
4924         if (allocate_cmdlines_buffer(val, s) < 0) {
4925                 kfree(s);
4926                 return -ENOMEM;
4927         }
4928
4929         arch_spin_lock(&trace_cmdline_lock);
4930         savedcmd_temp = savedcmd;
4931         savedcmd = s;
4932         arch_spin_unlock(&trace_cmdline_lock);
4933         free_saved_cmdlines_buffer(savedcmd_temp);
4934
4935         return 0;
4936 }
4937
4938 static ssize_t
4939 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4940                                   size_t cnt, loff_t *ppos)
4941 {
4942         unsigned long val;
4943         int ret;
4944
4945         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4946         if (ret)
4947                 return ret;
4948
4949         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4950         if (!val || val > PID_MAX_DEFAULT)
4951                 return -EINVAL;
4952
4953         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4954         if (ret < 0)
4955                 return ret;
4956
4957         *ppos += cnt;
4958
4959         return cnt;
4960 }
4961
4962 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4963         .open           = tracing_open_generic,
4964         .read           = tracing_saved_cmdlines_size_read,
4965         .write          = tracing_saved_cmdlines_size_write,
4966 };
4967
4968 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4969 static union trace_eval_map_item *
4970 update_eval_map(union trace_eval_map_item *ptr)
4971 {
4972         if (!ptr->map.eval_string) {
4973                 if (ptr->tail.next) {
4974                         ptr = ptr->tail.next;
4975                         /* Set ptr to the next real item (skip head) */
4976                         ptr++;
4977                 } else
4978                         return NULL;
4979         }
4980         return ptr;
4981 }
4982
4983 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4984 {
4985         union trace_eval_map_item *ptr = v;
4986
4987         /*
4988          * Paranoid! If ptr points to end, we don't want to increment past it.
4989          * This really should never happen.
4990          */
4991         ptr = update_eval_map(ptr);
4992         if (WARN_ON_ONCE(!ptr))
4993                 return NULL;
4994
4995         ptr++;
4996
4997         (*pos)++;
4998
4999         ptr = update_eval_map(ptr);
5000
5001         return ptr;
5002 }
5003
5004 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5005 {
5006         union trace_eval_map_item *v;
5007         loff_t l = 0;
5008
5009         mutex_lock(&trace_eval_mutex);
5010
5011         v = trace_eval_maps;
5012         if (v)
5013                 v++;
5014
5015         while (v && l < *pos) {
5016                 v = eval_map_next(m, v, &l);
5017         }
5018
5019         return v;
5020 }
5021
5022 static void eval_map_stop(struct seq_file *m, void *v)
5023 {
5024         mutex_unlock(&trace_eval_mutex);
5025 }
5026
5027 static int eval_map_show(struct seq_file *m, void *v)
5028 {
5029         union trace_eval_map_item *ptr = v;
5030
5031         seq_printf(m, "%s %ld (%s)\n",
5032                    ptr->map.eval_string, ptr->map.eval_value,
5033                    ptr->map.system);
5034
5035         return 0;
5036 }
5037
5038 static const struct seq_operations tracing_eval_map_seq_ops = {
5039         .start          = eval_map_start,
5040         .next           = eval_map_next,
5041         .stop           = eval_map_stop,
5042         .show           = eval_map_show,
5043 };
5044
5045 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5046 {
5047         if (tracing_disabled)
5048                 return -ENODEV;
5049
5050         return seq_open(filp, &tracing_eval_map_seq_ops);
5051 }
5052
5053 static const struct file_operations tracing_eval_map_fops = {
5054         .open           = tracing_eval_map_open,
5055         .read           = seq_read,
5056         .llseek         = seq_lseek,
5057         .release        = seq_release,
5058 };
5059
5060 static inline union trace_eval_map_item *
5061 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5062 {
5063         /* Return tail of array given the head */
5064         return ptr + ptr->head.length + 1;
5065 }
5066
5067 static void
5068 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5069                            int len)
5070 {
5071         struct trace_eval_map **stop;
5072         struct trace_eval_map **map;
5073         union trace_eval_map_item *map_array;
5074         union trace_eval_map_item *ptr;
5075
5076         stop = start + len;
5077
5078         /*
5079          * The trace_eval_maps contains the map plus a head and tail item,
5080          * where the head holds the module and length of array, and the
5081          * tail holds a pointer to the next list.
5082          */
5083         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5084         if (!map_array) {
5085                 pr_warn("Unable to allocate trace eval mapping\n");
5086                 return;
5087         }
5088
5089         mutex_lock(&trace_eval_mutex);
5090
5091         if (!trace_eval_maps)
5092                 trace_eval_maps = map_array;
5093         else {
5094                 ptr = trace_eval_maps;
5095                 for (;;) {
5096                         ptr = trace_eval_jmp_to_tail(ptr);
5097                         if (!ptr->tail.next)
5098                                 break;
5099                         ptr = ptr->tail.next;
5100
5101                 }
5102                 ptr->tail.next = map_array;
5103         }
5104         map_array->head.mod = mod;
5105         map_array->head.length = len;
5106         map_array++;
5107
5108         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5109                 map_array->map = **map;
5110                 map_array++;
5111         }
5112         memset(map_array, 0, sizeof(*map_array));
5113
5114         mutex_unlock(&trace_eval_mutex);
5115 }
5116
5117 static void trace_create_eval_file(struct dentry *d_tracer)
5118 {
5119         trace_create_file("eval_map", 0444, d_tracer,
5120                           NULL, &tracing_eval_map_fops);
5121 }
5122
5123 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5124 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5125 static inline void trace_insert_eval_map_file(struct module *mod,
5126                               struct trace_eval_map **start, int len) { }
5127 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5128
5129 static void trace_insert_eval_map(struct module *mod,
5130                                   struct trace_eval_map **start, int len)
5131 {
5132         struct trace_eval_map **map;
5133
5134         if (len <= 0)
5135                 return;
5136
5137         map = start;
5138
5139         trace_event_eval_update(map, len);
5140
5141         trace_insert_eval_map_file(mod, start, len);
5142 }
5143
5144 static ssize_t
5145 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5146                        size_t cnt, loff_t *ppos)
5147 {
5148         struct trace_array *tr = filp->private_data;
5149         char buf[MAX_TRACER_SIZE+2];
5150         int r;
5151
5152         mutex_lock(&trace_types_lock);
5153         r = sprintf(buf, "%s\n", tr->current_trace->name);
5154         mutex_unlock(&trace_types_lock);
5155
5156         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5157 }
5158
5159 int tracer_init(struct tracer *t, struct trace_array *tr)
5160 {
5161         tracing_reset_online_cpus(&tr->trace_buffer);
5162         return t->init(tr);
5163 }
5164
5165 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5166 {
5167         int cpu;
5168
5169         for_each_tracing_cpu(cpu)
5170                 per_cpu_ptr(buf->data, cpu)->entries = val;
5171 }
5172
5173 #ifdef CONFIG_TRACER_MAX_TRACE
5174 /* resize @tr's buffer to the size of @size_tr's entries */
5175 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5176                                         struct trace_buffer *size_buf, int cpu_id)
5177 {
5178         int cpu, ret = 0;
5179
5180         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5181                 for_each_tracing_cpu(cpu) {
5182                         ret = ring_buffer_resize(trace_buf->buffer,
5183                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5184                         if (ret < 0)
5185                                 break;
5186                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5187                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5188                 }
5189         } else {
5190                 ret = ring_buffer_resize(trace_buf->buffer,
5191                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5192                 if (ret == 0)
5193                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5194                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5195         }
5196
5197         return ret;
5198 }
5199 #endif /* CONFIG_TRACER_MAX_TRACE */
5200
5201 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5202                                         unsigned long size, int cpu)
5203 {
5204         int ret;
5205
5206         /*
5207          * If kernel or user changes the size of the ring buffer
5208          * we use the size that was given, and we can forget about
5209          * expanding it later.
5210          */
5211         ring_buffer_expanded = true;
5212
5213         /* May be called before buffers are initialized */
5214         if (!tr->trace_buffer.buffer)
5215                 return 0;
5216
5217         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5218         if (ret < 0)
5219                 return ret;
5220
5221 #ifdef CONFIG_TRACER_MAX_TRACE
5222         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5223             !tr->current_trace->use_max_tr)
5224                 goto out;
5225
5226         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5227         if (ret < 0) {
5228                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5229                                                      &tr->trace_buffer, cpu);
5230                 if (r < 0) {
5231                         /*
5232                          * AARGH! We are left with different
5233                          * size max buffer!!!!
5234                          * The max buffer is our "snapshot" buffer.
5235                          * When a tracer needs a snapshot (one of the
5236                          * latency tracers), it swaps the max buffer
5237                          * with the saved snap shot. We succeeded to
5238                          * update the size of the main buffer, but failed to
5239                          * update the size of the max buffer. But when we tried
5240                          * to reset the main buffer to the original size, we
5241                          * failed there too. This is very unlikely to
5242                          * happen, but if it does, warn and kill all
5243                          * tracing.
5244                          */
5245                         WARN_ON(1);
5246                         tracing_disabled = 1;
5247                 }
5248                 return ret;
5249         }
5250
5251         if (cpu == RING_BUFFER_ALL_CPUS)
5252                 set_buffer_entries(&tr->max_buffer, size);
5253         else
5254                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5255
5256  out:
5257 #endif /* CONFIG_TRACER_MAX_TRACE */
5258
5259         if (cpu == RING_BUFFER_ALL_CPUS)
5260                 set_buffer_entries(&tr->trace_buffer, size);
5261         else
5262                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5263
5264         return ret;
5265 }
5266
5267 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5268                                           unsigned long size, int cpu_id)
5269 {
5270         int ret = size;
5271
5272         mutex_lock(&trace_types_lock);
5273
5274         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5275                 /* make sure, this cpu is enabled in the mask */
5276                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5277                         ret = -EINVAL;
5278                         goto out;
5279                 }
5280         }
5281
5282         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5283         if (ret < 0)
5284                 ret = -ENOMEM;
5285
5286 out:
5287         mutex_unlock(&trace_types_lock);
5288
5289         return ret;
5290 }
5291
5292
5293 /**
5294  * tracing_update_buffers - used by tracing facility to expand ring buffers
5295  *
5296  * To save on memory when the tracing is never used on a system with it
5297  * configured in. The ring buffers are set to a minimum size. But once
5298  * a user starts to use the tracing facility, then they need to grow
5299  * to their default size.
5300  *
5301  * This function is to be called when a tracer is about to be used.
5302  */
5303 int tracing_update_buffers(void)
5304 {
5305         int ret = 0;
5306
5307         mutex_lock(&trace_types_lock);
5308         if (!ring_buffer_expanded)
5309                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5310                                                 RING_BUFFER_ALL_CPUS);
5311         mutex_unlock(&trace_types_lock);
5312
5313         return ret;
5314 }
5315
5316 struct trace_option_dentry;
5317
5318 static void
5319 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5320
5321 /*
5322  * Used to clear out the tracer before deletion of an instance.
5323  * Must have trace_types_lock held.
5324  */
5325 static void tracing_set_nop(struct trace_array *tr)
5326 {
5327         if (tr->current_trace == &nop_trace)
5328                 return;
5329         
5330         tr->current_trace->enabled--;
5331
5332         if (tr->current_trace->reset)
5333                 tr->current_trace->reset(tr);
5334
5335         tr->current_trace = &nop_trace;
5336 }
5337
5338 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5339 {
5340         /* Only enable if the directory has been created already. */
5341         if (!tr->dir)
5342                 return;
5343
5344         create_trace_option_files(tr, t);
5345 }
5346
5347 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5348 {
5349         struct tracer *t;
5350 #ifdef CONFIG_TRACER_MAX_TRACE
5351         bool had_max_tr;
5352 #endif
5353         int ret = 0;
5354
5355         mutex_lock(&trace_types_lock);
5356
5357         if (!ring_buffer_expanded) {
5358                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5359                                                 RING_BUFFER_ALL_CPUS);
5360                 if (ret < 0)
5361                         goto out;
5362                 ret = 0;
5363         }
5364
5365         for (t = trace_types; t; t = t->next) {
5366                 if (strcmp(t->name, buf) == 0)
5367                         break;
5368         }
5369         if (!t) {
5370                 ret = -EINVAL;
5371                 goto out;
5372         }
5373         if (t == tr->current_trace)
5374                 goto out;
5375
5376         /* Some tracers won't work on kernel command line */
5377         if (system_state < SYSTEM_RUNNING && t->noboot) {
5378                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5379                         t->name);
5380                 goto out;
5381         }
5382
5383         /* Some tracers are only allowed for the top level buffer */
5384         if (!trace_ok_for_array(t, tr)) {
5385                 ret = -EINVAL;
5386                 goto out;
5387         }
5388
5389         /* If trace pipe files are being read, we can't change the tracer */
5390         if (tr->current_trace->ref) {
5391                 ret = -EBUSY;
5392                 goto out;
5393         }
5394
5395         trace_branch_disable();
5396
5397         tr->current_trace->enabled--;
5398
5399         if (tr->current_trace->reset)
5400                 tr->current_trace->reset(tr);
5401
5402         /* Current trace needs to be nop_trace before synchronize_sched */
5403         tr->current_trace = &nop_trace;
5404
5405 #ifdef CONFIG_TRACER_MAX_TRACE
5406         had_max_tr = tr->allocated_snapshot;
5407
5408         if (had_max_tr && !t->use_max_tr) {
5409                 /*
5410                  * We need to make sure that the update_max_tr sees that
5411                  * current_trace changed to nop_trace to keep it from
5412                  * swapping the buffers after we resize it.
5413                  * The update_max_tr is called from interrupts disabled
5414                  * so a synchronized_sched() is sufficient.
5415                  */
5416                 synchronize_sched();
5417                 free_snapshot(tr);
5418         }
5419 #endif
5420
5421 #ifdef CONFIG_TRACER_MAX_TRACE
5422         if (t->use_max_tr && !had_max_tr) {
5423                 ret = tracing_alloc_snapshot_instance(tr);
5424                 if (ret < 0)
5425                         goto out;
5426         }
5427 #endif
5428
5429         if (t->init) {
5430                 ret = tracer_init(t, tr);
5431                 if (ret)
5432                         goto out;
5433         }
5434
5435         tr->current_trace = t;
5436         tr->current_trace->enabled++;
5437         trace_branch_enable(tr);
5438  out:
5439         mutex_unlock(&trace_types_lock);
5440
5441         return ret;
5442 }
5443
5444 static ssize_t
5445 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5446                         size_t cnt, loff_t *ppos)
5447 {
5448         struct trace_array *tr = filp->private_data;
5449         char buf[MAX_TRACER_SIZE+1];
5450         int i;
5451         size_t ret;
5452         int err;
5453
5454         ret = cnt;
5455
5456         if (cnt > MAX_TRACER_SIZE)
5457                 cnt = MAX_TRACER_SIZE;
5458
5459         if (copy_from_user(buf, ubuf, cnt))
5460                 return -EFAULT;
5461
5462         buf[cnt] = 0;
5463
5464         /* strip ending whitespace. */
5465         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5466                 buf[i] = 0;
5467
5468         err = tracing_set_tracer(tr, buf);
5469         if (err)
5470                 return err;
5471
5472         *ppos += ret;
5473
5474         return ret;
5475 }
5476
5477 static ssize_t
5478 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5479                    size_t cnt, loff_t *ppos)
5480 {
5481         char buf[64];
5482         int r;
5483
5484         r = snprintf(buf, sizeof(buf), "%ld\n",
5485                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5486         if (r > sizeof(buf))
5487                 r = sizeof(buf);
5488         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5489 }
5490
5491 static ssize_t
5492 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5493                     size_t cnt, loff_t *ppos)
5494 {
5495         unsigned long val;
5496         int ret;
5497
5498         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5499         if (ret)
5500                 return ret;
5501
5502         *ptr = val * 1000;
5503
5504         return cnt;
5505 }
5506
5507 static ssize_t
5508 tracing_thresh_read(struct file *filp, char __user *ubuf,
5509                     size_t cnt, loff_t *ppos)
5510 {
5511         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5512 }
5513
5514 static ssize_t
5515 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5516                      size_t cnt, loff_t *ppos)
5517 {
5518         struct trace_array *tr = filp->private_data;
5519         int ret;
5520
5521         mutex_lock(&trace_types_lock);
5522         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5523         if (ret < 0)
5524                 goto out;
5525
5526         if (tr->current_trace->update_thresh) {
5527                 ret = tr->current_trace->update_thresh(tr);
5528                 if (ret < 0)
5529                         goto out;
5530         }
5531
5532         ret = cnt;
5533 out:
5534         mutex_unlock(&trace_types_lock);
5535
5536         return ret;
5537 }
5538
5539 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5540
5541 static ssize_t
5542 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5543                      size_t cnt, loff_t *ppos)
5544 {
5545         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5546 }
5547
5548 static ssize_t
5549 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5550                       size_t cnt, loff_t *ppos)
5551 {
5552         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5553 }
5554
5555 #endif
5556
5557 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5558 {
5559         struct trace_array *tr = inode->i_private;
5560         struct trace_iterator *iter;
5561         int ret = 0;
5562
5563         if (tracing_disabled)
5564                 return -ENODEV;
5565
5566         if (trace_array_get(tr) < 0)
5567                 return -ENODEV;
5568
5569         mutex_lock(&trace_types_lock);
5570
5571         /* create a buffer to store the information to pass to userspace */
5572         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5573         if (!iter) {
5574                 ret = -ENOMEM;
5575                 __trace_array_put(tr);
5576                 goto out;
5577         }
5578
5579         trace_seq_init(&iter->seq);
5580         iter->trace = tr->current_trace;
5581
5582         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5583                 ret = -ENOMEM;
5584                 goto fail;
5585         }
5586
5587         /* trace pipe does not show start of buffer */
5588         cpumask_setall(iter->started);
5589
5590         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5591                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5592
5593         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5594         if (trace_clocks[tr->clock_id].in_ns)
5595                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5596
5597         iter->tr = tr;
5598         iter->trace_buffer = &tr->trace_buffer;
5599         iter->cpu_file = tracing_get_cpu(inode);
5600         mutex_init(&iter->mutex);
5601         filp->private_data = iter;
5602
5603         if (iter->trace->pipe_open)
5604                 iter->trace->pipe_open(iter);
5605
5606         nonseekable_open(inode, filp);
5607
5608         tr->current_trace->ref++;
5609 out:
5610         mutex_unlock(&trace_types_lock);
5611         return ret;
5612
5613 fail:
5614         kfree(iter);
5615         __trace_array_put(tr);
5616         mutex_unlock(&trace_types_lock);
5617         return ret;
5618 }
5619
5620 static int tracing_release_pipe(struct inode *inode, struct file *file)
5621 {
5622         struct trace_iterator *iter = file->private_data;
5623         struct trace_array *tr = inode->i_private;
5624
5625         mutex_lock(&trace_types_lock);
5626
5627         tr->current_trace->ref--;
5628
5629         if (iter->trace->pipe_close)
5630                 iter->trace->pipe_close(iter);
5631
5632         mutex_unlock(&trace_types_lock);
5633
5634         free_cpumask_var(iter->started);
5635         mutex_destroy(&iter->mutex);
5636         kfree(iter);
5637
5638         trace_array_put(tr);
5639
5640         return 0;
5641 }
5642
5643 static __poll_t
5644 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5645 {
5646         struct trace_array *tr = iter->tr;
5647
5648         /* Iterators are static, they should be filled or empty */
5649         if (trace_buffer_iter(iter, iter->cpu_file))
5650                 return EPOLLIN | EPOLLRDNORM;
5651
5652         if (tr->trace_flags & TRACE_ITER_BLOCK)
5653                 /*
5654                  * Always select as readable when in blocking mode
5655                  */
5656                 return EPOLLIN | EPOLLRDNORM;
5657         else
5658                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5659                                              filp, poll_table);
5660 }
5661
5662 static __poll_t
5663 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5664 {
5665         struct trace_iterator *iter = filp->private_data;
5666
5667         return trace_poll(iter, filp, poll_table);
5668 }
5669
5670 /* Must be called with iter->mutex held. */
5671 static int tracing_wait_pipe(struct file *filp)
5672 {
5673         struct trace_iterator *iter = filp->private_data;
5674         int ret;
5675
5676         while (trace_empty(iter)) {
5677
5678                 if ((filp->f_flags & O_NONBLOCK)) {
5679                         return -EAGAIN;
5680                 }
5681
5682                 /*
5683                  * We block until we read something and tracing is disabled.
5684                  * We still block if tracing is disabled, but we have never
5685                  * read anything. This allows a user to cat this file, and
5686                  * then enable tracing. But after we have read something,
5687                  * we give an EOF when tracing is again disabled.
5688                  *
5689                  * iter->pos will be 0 if we haven't read anything.
5690                  */
5691                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5692                         break;
5693
5694                 mutex_unlock(&iter->mutex);
5695
5696                 ret = wait_on_pipe(iter, false);
5697
5698                 mutex_lock(&iter->mutex);
5699
5700                 if (ret)
5701                         return ret;
5702         }
5703
5704         return 1;
5705 }
5706
5707 /*
5708  * Consumer reader.
5709  */
5710 static ssize_t
5711 tracing_read_pipe(struct file *filp, char __user *ubuf,
5712                   size_t cnt, loff_t *ppos)
5713 {
5714         struct trace_iterator *iter = filp->private_data;
5715         ssize_t sret;
5716
5717         /*
5718          * Avoid more than one consumer on a single file descriptor
5719          * This is just a matter of traces coherency, the ring buffer itself
5720          * is protected.
5721          */
5722         mutex_lock(&iter->mutex);
5723
5724         /* return any leftover data */
5725         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5726         if (sret != -EBUSY)
5727                 goto out;
5728
5729         trace_seq_init(&iter->seq);
5730
5731         if (iter->trace->read) {
5732                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5733                 if (sret)
5734                         goto out;
5735         }
5736
5737 waitagain:
5738         sret = tracing_wait_pipe(filp);
5739         if (sret <= 0)
5740                 goto out;
5741
5742         /* stop when tracing is finished */
5743         if (trace_empty(iter)) {
5744                 sret = 0;
5745                 goto out;
5746         }
5747
5748         if (cnt >= PAGE_SIZE)
5749                 cnt = PAGE_SIZE - 1;
5750
5751         /* reset all but tr, trace, and overruns */
5752         memset(&iter->seq, 0,
5753                sizeof(struct trace_iterator) -
5754                offsetof(struct trace_iterator, seq));
5755         cpumask_clear(iter->started);
5756         iter->pos = -1;
5757
5758         trace_event_read_lock();
5759         trace_access_lock(iter->cpu_file);
5760         while (trace_find_next_entry_inc(iter) != NULL) {
5761                 enum print_line_t ret;
5762                 int save_len = iter->seq.seq.len;
5763
5764                 ret = print_trace_line(iter);
5765                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5766                         /* don't print partial lines */
5767                         iter->seq.seq.len = save_len;
5768                         break;
5769                 }
5770                 if (ret != TRACE_TYPE_NO_CONSUME)
5771                         trace_consume(iter);
5772
5773                 if (trace_seq_used(&iter->seq) >= cnt)
5774                         break;
5775
5776                 /*
5777                  * Setting the full flag means we reached the trace_seq buffer
5778                  * size and we should leave by partial output condition above.
5779                  * One of the trace_seq_* functions is not used properly.
5780                  */
5781                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5782                           iter->ent->type);
5783         }
5784         trace_access_unlock(iter->cpu_file);
5785         trace_event_read_unlock();
5786
5787         /* Now copy what we have to the user */
5788         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5789         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5790                 trace_seq_init(&iter->seq);
5791
5792         /*
5793          * If there was nothing to send to user, in spite of consuming trace
5794          * entries, go back to wait for more entries.
5795          */
5796         if (sret == -EBUSY)
5797                 goto waitagain;
5798
5799 out:
5800         mutex_unlock(&iter->mutex);
5801
5802         return sret;
5803 }
5804
5805 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5806                                      unsigned int idx)
5807 {
5808         __free_page(spd->pages[idx]);
5809 }
5810
5811 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5812         .can_merge              = 0,
5813         .confirm                = generic_pipe_buf_confirm,
5814         .release                = generic_pipe_buf_release,
5815         .steal                  = generic_pipe_buf_steal,
5816         .get                    = generic_pipe_buf_get,
5817 };
5818
5819 static size_t
5820 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5821 {
5822         size_t count;
5823         int save_len;
5824         int ret;
5825
5826         /* Seq buffer is page-sized, exactly what we need. */
5827         for (;;) {
5828                 save_len = iter->seq.seq.len;
5829                 ret = print_trace_line(iter);
5830
5831                 if (trace_seq_has_overflowed(&iter->seq)) {
5832                         iter->seq.seq.len = save_len;
5833                         break;
5834                 }
5835
5836                 /*
5837                  * This should not be hit, because it should only
5838                  * be set if the iter->seq overflowed. But check it
5839                  * anyway to be safe.
5840                  */
5841                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5842                         iter->seq.seq.len = save_len;
5843                         break;
5844                 }
5845
5846                 count = trace_seq_used(&iter->seq) - save_len;
5847                 if (rem < count) {
5848                         rem = 0;
5849                         iter->seq.seq.len = save_len;
5850                         break;
5851                 }
5852
5853                 if (ret != TRACE_TYPE_NO_CONSUME)
5854                         trace_consume(iter);
5855                 rem -= count;
5856                 if (!trace_find_next_entry_inc(iter))   {
5857                         rem = 0;
5858                         iter->ent = NULL;
5859                         break;
5860                 }
5861         }
5862
5863         return rem;
5864 }
5865
5866 static ssize_t tracing_splice_read_pipe(struct file *filp,
5867                                         loff_t *ppos,
5868                                         struct pipe_inode_info *pipe,
5869                                         size_t len,
5870                                         unsigned int flags)
5871 {
5872         struct page *pages_def[PIPE_DEF_BUFFERS];
5873         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5874         struct trace_iterator *iter = filp->private_data;
5875         struct splice_pipe_desc spd = {
5876                 .pages          = pages_def,
5877                 .partial        = partial_def,
5878                 .nr_pages       = 0, /* This gets updated below. */
5879                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5880                 .ops            = &tracing_pipe_buf_ops,
5881                 .spd_release    = tracing_spd_release_pipe,
5882         };
5883         ssize_t ret;
5884         size_t rem;
5885         unsigned int i;
5886
5887         if (splice_grow_spd(pipe, &spd))
5888                 return -ENOMEM;
5889
5890         mutex_lock(&iter->mutex);
5891
5892         if (iter->trace->splice_read) {
5893                 ret = iter->trace->splice_read(iter, filp,
5894                                                ppos, pipe, len, flags);
5895                 if (ret)
5896                         goto out_err;
5897         }
5898
5899         ret = tracing_wait_pipe(filp);
5900         if (ret <= 0)
5901                 goto out_err;
5902
5903         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5904                 ret = -EFAULT;
5905                 goto out_err;
5906         }
5907
5908         trace_event_read_lock();
5909         trace_access_lock(iter->cpu_file);
5910
5911         /* Fill as many pages as possible. */
5912         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5913                 spd.pages[i] = alloc_page(GFP_KERNEL);
5914                 if (!spd.pages[i])
5915                         break;
5916
5917                 rem = tracing_fill_pipe_page(rem, iter);
5918
5919                 /* Copy the data into the page, so we can start over. */
5920                 ret = trace_seq_to_buffer(&iter->seq,
5921                                           page_address(spd.pages[i]),
5922                                           trace_seq_used(&iter->seq));
5923                 if (ret < 0) {
5924                         __free_page(spd.pages[i]);
5925                         break;
5926                 }
5927                 spd.partial[i].offset = 0;
5928                 spd.partial[i].len = trace_seq_used(&iter->seq);
5929
5930                 trace_seq_init(&iter->seq);
5931         }
5932
5933         trace_access_unlock(iter->cpu_file);
5934         trace_event_read_unlock();
5935         mutex_unlock(&iter->mutex);
5936
5937         spd.nr_pages = i;
5938
5939         if (i)
5940                 ret = splice_to_pipe(pipe, &spd);
5941         else
5942                 ret = 0;
5943 out:
5944         splice_shrink_spd(&spd);
5945         return ret;
5946
5947 out_err:
5948         mutex_unlock(&iter->mutex);
5949         goto out;
5950 }
5951
5952 static ssize_t
5953 tracing_entries_read(struct file *filp, char __user *ubuf,
5954                      size_t cnt, loff_t *ppos)
5955 {
5956         struct inode *inode = file_inode(filp);
5957         struct trace_array *tr = inode->i_private;
5958         int cpu = tracing_get_cpu(inode);
5959         char buf[64];
5960         int r = 0;
5961         ssize_t ret;
5962
5963         mutex_lock(&trace_types_lock);
5964
5965         if (cpu == RING_BUFFER_ALL_CPUS) {
5966                 int cpu, buf_size_same;
5967                 unsigned long size;
5968
5969                 size = 0;
5970                 buf_size_same = 1;
5971                 /* check if all cpu sizes are same */
5972                 for_each_tracing_cpu(cpu) {
5973                         /* fill in the size from first enabled cpu */
5974                         if (size == 0)
5975                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5976                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5977                                 buf_size_same = 0;
5978                                 break;
5979                         }
5980                 }
5981
5982                 if (buf_size_same) {
5983                         if (!ring_buffer_expanded)
5984                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5985                                             size >> 10,
5986                                             trace_buf_size >> 10);
5987                         else
5988                                 r = sprintf(buf, "%lu\n", size >> 10);
5989                 } else
5990                         r = sprintf(buf, "X\n");
5991         } else
5992                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5993
5994         mutex_unlock(&trace_types_lock);
5995
5996         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5997         return ret;
5998 }
5999
6000 static ssize_t
6001 tracing_entries_write(struct file *filp, const char __user *ubuf,
6002                       size_t cnt, loff_t *ppos)
6003 {
6004         struct inode *inode = file_inode(filp);
6005         struct trace_array *tr = inode->i_private;
6006         unsigned long val;
6007         int ret;
6008
6009         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6010         if (ret)
6011                 return ret;
6012
6013         /* must have at least 1 entry */
6014         if (!val)
6015                 return -EINVAL;
6016
6017         /* value is in KB */
6018         val <<= 10;
6019         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6020         if (ret < 0)
6021                 return ret;
6022
6023         *ppos += cnt;
6024
6025         return cnt;
6026 }
6027
6028 static ssize_t
6029 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6030                                 size_t cnt, loff_t *ppos)
6031 {
6032         struct trace_array *tr = filp->private_data;
6033         char buf[64];
6034         int r, cpu;
6035         unsigned long size = 0, expanded_size = 0;
6036
6037         mutex_lock(&trace_types_lock);
6038         for_each_tracing_cpu(cpu) {
6039                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6040                 if (!ring_buffer_expanded)
6041                         expanded_size += trace_buf_size >> 10;
6042         }
6043         if (ring_buffer_expanded)
6044                 r = sprintf(buf, "%lu\n", size);
6045         else
6046                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6047         mutex_unlock(&trace_types_lock);
6048
6049         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6050 }
6051
6052 static ssize_t
6053 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6054                           size_t cnt, loff_t *ppos)
6055 {
6056         /*
6057          * There is no need to read what the user has written, this function
6058          * is just to make sure that there is no error when "echo" is used
6059          */
6060
6061         *ppos += cnt;
6062
6063         return cnt;
6064 }
6065
6066 static int
6067 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6068 {
6069         struct trace_array *tr = inode->i_private;
6070
6071         /* disable tracing ? */
6072         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6073                 tracer_tracing_off(tr);
6074         /* resize the ring buffer to 0 */
6075         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6076
6077         trace_array_put(tr);
6078
6079         return 0;
6080 }
6081
6082 static ssize_t
6083 tracing_mark_write(struct file *filp, const char __user *ubuf,
6084                                         size_t cnt, loff_t *fpos)
6085 {
6086         struct trace_array *tr = filp->private_data;
6087         struct ring_buffer_event *event;
6088         enum event_trigger_type tt = ETT_NONE;
6089         struct ring_buffer *buffer;
6090         struct print_entry *entry;
6091         unsigned long irq_flags;
6092         const char faulted[] = "<faulted>";
6093         ssize_t written;
6094         int size;
6095         int len;
6096
6097 /* Used in tracing_mark_raw_write() as well */
6098 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6099
6100         if (tracing_disabled)
6101                 return -EINVAL;
6102
6103         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6104                 return -EINVAL;
6105
6106         if (cnt > TRACE_BUF_SIZE)
6107                 cnt = TRACE_BUF_SIZE;
6108
6109         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6110
6111         local_save_flags(irq_flags);
6112         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6113
6114         /* If less than "<faulted>", then make sure we can still add that */
6115         if (cnt < FAULTED_SIZE)
6116                 size += FAULTED_SIZE - cnt;
6117
6118         buffer = tr->trace_buffer.buffer;
6119         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6120                                             irq_flags, preempt_count());
6121         if (unlikely(!event))
6122                 /* Ring buffer disabled, return as if not open for write */
6123                 return -EBADF;
6124
6125         entry = ring_buffer_event_data(event);
6126         entry->ip = _THIS_IP_;
6127
6128         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6129         if (len) {
6130                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6131                 cnt = FAULTED_SIZE;
6132                 written = -EFAULT;
6133         } else
6134                 written = cnt;
6135         len = cnt;
6136
6137         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6138                 /* do not add \n before testing triggers, but add \0 */
6139                 entry->buf[cnt] = '\0';
6140                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6141         }
6142
6143         if (entry->buf[cnt - 1] != '\n') {
6144                 entry->buf[cnt] = '\n';
6145                 entry->buf[cnt + 1] = '\0';
6146         } else
6147                 entry->buf[cnt] = '\0';
6148
6149         __buffer_unlock_commit(buffer, event);
6150
6151         if (tt)
6152                 event_triggers_post_call(tr->trace_marker_file, tt);
6153
6154         if (written > 0)
6155                 *fpos += written;
6156
6157         return written;
6158 }
6159
6160 /* Limit it for now to 3K (including tag) */
6161 #define RAW_DATA_MAX_SIZE (1024*3)
6162
6163 static ssize_t
6164 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6165                                         size_t cnt, loff_t *fpos)
6166 {
6167         struct trace_array *tr = filp->private_data;
6168         struct ring_buffer_event *event;
6169         struct ring_buffer *buffer;
6170         struct raw_data_entry *entry;
6171         const char faulted[] = "<faulted>";
6172         unsigned long irq_flags;
6173         ssize_t written;
6174         int size;
6175         int len;
6176
6177 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6178
6179         if (tracing_disabled)
6180                 return -EINVAL;
6181
6182         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6183                 return -EINVAL;
6184
6185         /* The marker must at least have a tag id */
6186         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6187                 return -EINVAL;
6188
6189         if (cnt > TRACE_BUF_SIZE)
6190                 cnt = TRACE_BUF_SIZE;
6191
6192         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6193
6194         local_save_flags(irq_flags);
6195         size = sizeof(*entry) + cnt;
6196         if (cnt < FAULT_SIZE_ID)
6197                 size += FAULT_SIZE_ID - cnt;
6198
6199         buffer = tr->trace_buffer.buffer;
6200         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6201                                             irq_flags, preempt_count());
6202         if (!event)
6203                 /* Ring buffer disabled, return as if not open for write */
6204                 return -EBADF;
6205
6206         entry = ring_buffer_event_data(event);
6207
6208         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6209         if (len) {
6210                 entry->id = -1;
6211                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6212                 written = -EFAULT;
6213         } else
6214                 written = cnt;
6215
6216         __buffer_unlock_commit(buffer, event);
6217
6218         if (written > 0)
6219                 *fpos += written;
6220
6221         return written;
6222 }
6223
6224 static int tracing_clock_show(struct seq_file *m, void *v)
6225 {
6226         struct trace_array *tr = m->private;
6227         int i;
6228
6229         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6230                 seq_printf(m,
6231                         "%s%s%s%s", i ? " " : "",
6232                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6233                         i == tr->clock_id ? "]" : "");
6234         seq_putc(m, '\n');
6235
6236         return 0;
6237 }
6238
6239 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6240 {
6241         int i;
6242
6243         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6244                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6245                         break;
6246         }
6247         if (i == ARRAY_SIZE(trace_clocks))
6248                 return -EINVAL;
6249
6250         mutex_lock(&trace_types_lock);
6251
6252         tr->clock_id = i;
6253
6254         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6255
6256         /*
6257          * New clock may not be consistent with the previous clock.
6258          * Reset the buffer so that it doesn't have incomparable timestamps.
6259          */
6260         tracing_reset_online_cpus(&tr->trace_buffer);
6261
6262 #ifdef CONFIG_TRACER_MAX_TRACE
6263         if (tr->max_buffer.buffer)
6264                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6265         tracing_reset_online_cpus(&tr->max_buffer);
6266 #endif
6267
6268         mutex_unlock(&trace_types_lock);
6269
6270         return 0;
6271 }
6272
6273 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6274                                    size_t cnt, loff_t *fpos)
6275 {
6276         struct seq_file *m = filp->private_data;
6277         struct trace_array *tr = m->private;
6278         char buf[64];
6279         const char *clockstr;
6280         int ret;
6281
6282         if (cnt >= sizeof(buf))
6283                 return -EINVAL;
6284
6285         if (copy_from_user(buf, ubuf, cnt))
6286                 return -EFAULT;
6287
6288         buf[cnt] = 0;
6289
6290         clockstr = strstrip(buf);
6291
6292         ret = tracing_set_clock(tr, clockstr);
6293         if (ret)
6294                 return ret;
6295
6296         *fpos += cnt;
6297
6298         return cnt;
6299 }
6300
6301 static int tracing_clock_open(struct inode *inode, struct file *file)
6302 {
6303         struct trace_array *tr = inode->i_private;
6304         int ret;
6305
6306         if (tracing_disabled)
6307                 return -ENODEV;
6308
6309         if (trace_array_get(tr))
6310                 return -ENODEV;
6311
6312         ret = single_open(file, tracing_clock_show, inode->i_private);
6313         if (ret < 0)
6314                 trace_array_put(tr);
6315
6316         return ret;
6317 }
6318
6319 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6320 {
6321         struct trace_array *tr = m->private;
6322
6323         mutex_lock(&trace_types_lock);
6324
6325         if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6326                 seq_puts(m, "delta [absolute]\n");
6327         else
6328                 seq_puts(m, "[delta] absolute\n");
6329
6330         mutex_unlock(&trace_types_lock);
6331
6332         return 0;
6333 }
6334
6335 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6336 {
6337         struct trace_array *tr = inode->i_private;
6338         int ret;
6339
6340         if (tracing_disabled)
6341                 return -ENODEV;
6342
6343         if (trace_array_get(tr))
6344                 return -ENODEV;
6345
6346         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6347         if (ret < 0)
6348                 trace_array_put(tr);
6349
6350         return ret;
6351 }
6352
6353 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6354 {
6355         int ret = 0;
6356
6357         mutex_lock(&trace_types_lock);
6358
6359         if (abs && tr->time_stamp_abs_ref++)
6360                 goto out;
6361
6362         if (!abs) {
6363                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6364                         ret = -EINVAL;
6365                         goto out;
6366                 }
6367
6368                 if (--tr->time_stamp_abs_ref)
6369                         goto out;
6370         }
6371
6372         ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6373
6374 #ifdef CONFIG_TRACER_MAX_TRACE
6375         if (tr->max_buffer.buffer)
6376                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6377 #endif
6378  out:
6379         mutex_unlock(&trace_types_lock);
6380
6381         return ret;
6382 }
6383
6384 struct ftrace_buffer_info {
6385         struct trace_iterator   iter;
6386         void                    *spare;
6387         unsigned int            spare_cpu;
6388         unsigned int            read;
6389 };
6390
6391 #ifdef CONFIG_TRACER_SNAPSHOT
6392 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6393 {
6394         struct trace_array *tr = inode->i_private;
6395         struct trace_iterator *iter;
6396         struct seq_file *m;
6397         int ret = 0;
6398
6399         if (trace_array_get(tr) < 0)
6400                 return -ENODEV;
6401
6402         if (file->f_mode & FMODE_READ) {
6403                 iter = __tracing_open(inode, file, true);
6404                 if (IS_ERR(iter))
6405                         ret = PTR_ERR(iter);
6406         } else {
6407                 /* Writes still need the seq_file to hold the private data */
6408                 ret = -ENOMEM;
6409                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6410                 if (!m)
6411                         goto out;
6412                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6413                 if (!iter) {
6414                         kfree(m);
6415                         goto out;
6416                 }
6417                 ret = 0;
6418
6419                 iter->tr = tr;
6420                 iter->trace_buffer = &tr->max_buffer;
6421                 iter->cpu_file = tracing_get_cpu(inode);
6422                 m->private = iter;
6423                 file->private_data = m;
6424         }
6425 out:
6426         if (ret < 0)
6427                 trace_array_put(tr);
6428
6429         return ret;
6430 }
6431
6432 static ssize_t
6433 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6434                        loff_t *ppos)
6435 {
6436         struct seq_file *m = filp->private_data;
6437         struct trace_iterator *iter = m->private;
6438         struct trace_array *tr = iter->tr;
6439         unsigned long val;
6440         int ret;
6441
6442         ret = tracing_update_buffers();
6443         if (ret < 0)
6444                 return ret;
6445
6446         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6447         if (ret)
6448                 return ret;
6449
6450         mutex_lock(&trace_types_lock);
6451
6452         if (tr->current_trace->use_max_tr) {
6453                 ret = -EBUSY;
6454                 goto out;
6455         }
6456
6457         switch (val) {
6458         case 0:
6459                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6460                         ret = -EINVAL;
6461                         break;
6462                 }
6463                 if (tr->allocated_snapshot)
6464                         free_snapshot(tr);
6465                 break;
6466         case 1:
6467 /* Only allow per-cpu swap if the ring buffer supports it */
6468 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6469                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6470                         ret = -EINVAL;
6471                         break;
6472                 }
6473 #endif
6474                 if (!tr->allocated_snapshot) {
6475                         ret = tracing_alloc_snapshot_instance(tr);
6476                         if (ret < 0)
6477                                 break;
6478                 }
6479                 local_irq_disable();
6480                 /* Now, we're going to swap */
6481                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6482                         update_max_tr(tr, current, smp_processor_id());
6483                 else
6484                         update_max_tr_single(tr, current, iter->cpu_file);
6485                 local_irq_enable();
6486                 break;
6487         default:
6488                 if (tr->allocated_snapshot) {
6489                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6490                                 tracing_reset_online_cpus(&tr->max_buffer);
6491                         else
6492                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6493                 }
6494                 break;
6495         }
6496
6497         if (ret >= 0) {
6498                 *ppos += cnt;
6499                 ret = cnt;
6500         }
6501 out:
6502         mutex_unlock(&trace_types_lock);
6503         return ret;
6504 }
6505
6506 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6507 {
6508         struct seq_file *m = file->private_data;
6509         int ret;
6510
6511         ret = tracing_release(inode, file);
6512
6513         if (file->f_mode & FMODE_READ)
6514                 return ret;
6515
6516         /* If write only, the seq_file is just a stub */
6517         if (m)
6518                 kfree(m->private);
6519         kfree(m);
6520
6521         return 0;
6522 }
6523
6524 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6525 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6526                                     size_t count, loff_t *ppos);
6527 static int tracing_buffers_release(struct inode *inode, struct file *file);
6528 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6529                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6530
6531 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6532 {
6533         struct ftrace_buffer_info *info;
6534         int ret;
6535
6536         ret = tracing_buffers_open(inode, filp);
6537         if (ret < 0)
6538                 return ret;
6539
6540         info = filp->private_data;
6541
6542         if (info->iter.trace->use_max_tr) {
6543                 tracing_buffers_release(inode, filp);
6544                 return -EBUSY;
6545         }
6546
6547         info->iter.snapshot = true;
6548         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6549
6550         return ret;
6551 }
6552
6553 #endif /* CONFIG_TRACER_SNAPSHOT */
6554
6555
6556 static const struct file_operations tracing_thresh_fops = {
6557         .open           = tracing_open_generic,
6558         .read           = tracing_thresh_read,
6559         .write          = tracing_thresh_write,
6560         .llseek         = generic_file_llseek,
6561 };
6562
6563 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6564 static const struct file_operations tracing_max_lat_fops = {
6565         .open           = tracing_open_generic,
6566         .read           = tracing_max_lat_read,
6567         .write          = tracing_max_lat_write,
6568         .llseek         = generic_file_llseek,
6569 };
6570 #endif
6571
6572 static const struct file_operations set_tracer_fops = {
6573         .open           = tracing_open_generic,
6574         .read           = tracing_set_trace_read,
6575         .write          = tracing_set_trace_write,
6576         .llseek         = generic_file_llseek,
6577 };
6578
6579 static const struct file_operations tracing_pipe_fops = {
6580         .open           = tracing_open_pipe,
6581         .poll           = tracing_poll_pipe,
6582         .read           = tracing_read_pipe,
6583         .splice_read    = tracing_splice_read_pipe,
6584         .release        = tracing_release_pipe,
6585         .llseek         = no_llseek,
6586 };
6587
6588 static const struct file_operations tracing_entries_fops = {
6589         .open           = tracing_open_generic_tr,
6590         .read           = tracing_entries_read,
6591         .write          = tracing_entries_write,
6592         .llseek         = generic_file_llseek,
6593         .release        = tracing_release_generic_tr,
6594 };
6595
6596 static const struct file_operations tracing_total_entries_fops = {
6597         .open           = tracing_open_generic_tr,
6598         .read           = tracing_total_entries_read,
6599         .llseek         = generic_file_llseek,
6600         .release        = tracing_release_generic_tr,
6601 };
6602
6603 static const struct file_operations tracing_free_buffer_fops = {
6604         .open           = tracing_open_generic_tr,
6605         .write          = tracing_free_buffer_write,
6606         .release        = tracing_free_buffer_release,
6607 };
6608
6609 static const struct file_operations tracing_mark_fops = {
6610         .open           = tracing_open_generic_tr,
6611         .write          = tracing_mark_write,
6612         .llseek         = generic_file_llseek,
6613         .release        = tracing_release_generic_tr,
6614 };
6615
6616 static const struct file_operations tracing_mark_raw_fops = {
6617         .open           = tracing_open_generic_tr,
6618         .write          = tracing_mark_raw_write,
6619         .llseek         = generic_file_llseek,
6620         .release        = tracing_release_generic_tr,
6621 };
6622
6623 static const struct file_operations trace_clock_fops = {
6624         .open           = tracing_clock_open,
6625         .read           = seq_read,
6626         .llseek         = seq_lseek,
6627         .release        = tracing_single_release_tr,
6628         .write          = tracing_clock_write,
6629 };
6630
6631 static const struct file_operations trace_time_stamp_mode_fops = {
6632         .open           = tracing_time_stamp_mode_open,
6633         .read           = seq_read,
6634         .llseek         = seq_lseek,
6635         .release        = tracing_single_release_tr,
6636 };
6637
6638 #ifdef CONFIG_TRACER_SNAPSHOT
6639 static const struct file_operations snapshot_fops = {
6640         .open           = tracing_snapshot_open,
6641         .read           = seq_read,
6642         .write          = tracing_snapshot_write,
6643         .llseek         = tracing_lseek,
6644         .release        = tracing_snapshot_release,
6645 };
6646
6647 static const struct file_operations snapshot_raw_fops = {
6648         .open           = snapshot_raw_open,
6649         .read           = tracing_buffers_read,
6650         .release        = tracing_buffers_release,
6651         .splice_read    = tracing_buffers_splice_read,
6652         .llseek         = no_llseek,
6653 };
6654
6655 #endif /* CONFIG_TRACER_SNAPSHOT */
6656
6657 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6658 {
6659         struct trace_array *tr = inode->i_private;
6660         struct ftrace_buffer_info *info;
6661         int ret;
6662
6663         if (tracing_disabled)
6664                 return -ENODEV;
6665
6666         if (trace_array_get(tr) < 0)
6667                 return -ENODEV;
6668
6669         info = kzalloc(sizeof(*info), GFP_KERNEL);
6670         if (!info) {
6671                 trace_array_put(tr);
6672                 return -ENOMEM;
6673         }
6674
6675         mutex_lock(&trace_types_lock);
6676
6677         info->iter.tr           = tr;
6678         info->iter.cpu_file     = tracing_get_cpu(inode);
6679         info->iter.trace        = tr->current_trace;
6680         info->iter.trace_buffer = &tr->trace_buffer;
6681         info->spare             = NULL;
6682         /* Force reading ring buffer for first read */
6683         info->read              = (unsigned int)-1;
6684
6685         filp->private_data = info;
6686
6687         tr->current_trace->ref++;
6688
6689         mutex_unlock(&trace_types_lock);
6690
6691         ret = nonseekable_open(inode, filp);
6692         if (ret < 0)
6693                 trace_array_put(tr);
6694
6695         return ret;
6696 }
6697
6698 static __poll_t
6699 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6700 {
6701         struct ftrace_buffer_info *info = filp->private_data;
6702         struct trace_iterator *iter = &info->iter;
6703
6704         return trace_poll(iter, filp, poll_table);
6705 }
6706
6707 static ssize_t
6708 tracing_buffers_read(struct file *filp, char __user *ubuf,
6709                      size_t count, loff_t *ppos)
6710 {
6711         struct ftrace_buffer_info *info = filp->private_data;
6712         struct trace_iterator *iter = &info->iter;
6713         ssize_t ret = 0;
6714         ssize_t size;
6715
6716         if (!count)
6717                 return 0;
6718
6719 #ifdef CONFIG_TRACER_MAX_TRACE
6720         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6721                 return -EBUSY;
6722 #endif
6723
6724         if (!info->spare) {
6725                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6726                                                           iter->cpu_file);
6727                 if (IS_ERR(info->spare)) {
6728                         ret = PTR_ERR(info->spare);
6729                         info->spare = NULL;
6730                 } else {
6731                         info->spare_cpu = iter->cpu_file;
6732                 }
6733         }
6734         if (!info->spare)
6735                 return ret;
6736
6737         /* Do we have previous read data to read? */
6738         if (info->read < PAGE_SIZE)
6739                 goto read;
6740
6741  again:
6742         trace_access_lock(iter->cpu_file);
6743         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6744                                     &info->spare,
6745                                     count,
6746                                     iter->cpu_file, 0);
6747         trace_access_unlock(iter->cpu_file);
6748
6749         if (ret < 0) {
6750                 if (trace_empty(iter)) {
6751                         if ((filp->f_flags & O_NONBLOCK))
6752                                 return -EAGAIN;
6753
6754                         ret = wait_on_pipe(iter, false);
6755                         if (ret)
6756                                 return ret;
6757
6758                         goto again;
6759                 }
6760                 return 0;
6761         }
6762
6763         info->read = 0;
6764  read:
6765         size = PAGE_SIZE - info->read;
6766         if (size > count)
6767                 size = count;
6768
6769         ret = copy_to_user(ubuf, info->spare + info->read, size);
6770         if (ret == size)
6771                 return -EFAULT;
6772
6773         size -= ret;
6774
6775         *ppos += size;
6776         info->read += size;
6777
6778         return size;
6779 }
6780
6781 static int tracing_buffers_release(struct inode *inode, struct file *file)
6782 {
6783         struct ftrace_buffer_info *info = file->private_data;
6784         struct trace_iterator *iter = &info->iter;
6785
6786         mutex_lock(&trace_types_lock);
6787
6788         iter->tr->current_trace->ref--;
6789
6790         __trace_array_put(iter->tr);
6791
6792         if (info->spare)
6793                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6794                                            info->spare_cpu, info->spare);
6795         kfree(info);
6796
6797         mutex_unlock(&trace_types_lock);
6798
6799         return 0;
6800 }
6801
6802 struct buffer_ref {
6803         struct ring_buffer      *buffer;
6804         void                    *page;
6805         int                     cpu;
6806         refcount_t              refcount;
6807 };
6808
6809 static void buffer_ref_release(struct buffer_ref *ref)
6810 {
6811         if (!refcount_dec_and_test(&ref->refcount))
6812                 return;
6813         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6814         kfree(ref);
6815 }
6816
6817 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6818                                     struct pipe_buffer *buf)
6819 {
6820         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6821
6822         buffer_ref_release(ref);
6823         buf->private = 0;
6824 }
6825
6826 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6827                                 struct pipe_buffer *buf)
6828 {
6829         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6830
6831         if (refcount_read(&ref->refcount) > INT_MAX/2)
6832                 return false;
6833
6834         refcount_inc(&ref->refcount);
6835         return true;
6836 }
6837
6838 /* Pipe buffer operations for a buffer. */
6839 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6840         .can_merge              = 0,
6841         .confirm                = generic_pipe_buf_confirm,
6842         .release                = buffer_pipe_buf_release,
6843         .steal                  = generic_pipe_buf_nosteal,
6844         .get                    = buffer_pipe_buf_get,
6845 };
6846
6847 /*
6848  * Callback from splice_to_pipe(), if we need to release some pages
6849  * at the end of the spd in case we error'ed out in filling the pipe.
6850  */
6851 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6852 {
6853         struct buffer_ref *ref =
6854                 (struct buffer_ref *)spd->partial[i].private;
6855
6856         buffer_ref_release(ref);
6857         spd->partial[i].private = 0;
6858 }
6859
6860 static ssize_t
6861 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6862                             struct pipe_inode_info *pipe, size_t len,
6863                             unsigned int flags)
6864 {
6865         struct ftrace_buffer_info *info = file->private_data;
6866         struct trace_iterator *iter = &info->iter;
6867         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6868         struct page *pages_def[PIPE_DEF_BUFFERS];
6869         struct splice_pipe_desc spd = {
6870                 .pages          = pages_def,
6871                 .partial        = partial_def,
6872                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6873                 .ops            = &buffer_pipe_buf_ops,
6874                 .spd_release    = buffer_spd_release,
6875         };
6876         struct buffer_ref *ref;
6877         int entries, i;
6878         ssize_t ret = 0;
6879
6880 #ifdef CONFIG_TRACER_MAX_TRACE
6881         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6882                 return -EBUSY;
6883 #endif
6884
6885         if (*ppos & (PAGE_SIZE - 1))
6886                 return -EINVAL;
6887
6888         if (len & (PAGE_SIZE - 1)) {
6889                 if (len < PAGE_SIZE)
6890                         return -EINVAL;
6891                 len &= PAGE_MASK;
6892         }
6893
6894         if (splice_grow_spd(pipe, &spd))
6895                 return -ENOMEM;
6896
6897  again:
6898         trace_access_lock(iter->cpu_file);
6899         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6900
6901         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6902                 struct page *page;
6903                 int r;
6904
6905                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6906                 if (!ref) {
6907                         ret = -ENOMEM;
6908                         break;
6909                 }
6910
6911                 refcount_set(&ref->refcount, 1);
6912                 ref->buffer = iter->trace_buffer->buffer;
6913                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6914                 if (IS_ERR(ref->page)) {
6915                         ret = PTR_ERR(ref->page);
6916                         ref->page = NULL;
6917                         kfree(ref);
6918                         break;
6919                 }
6920                 ref->cpu = iter->cpu_file;
6921
6922                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6923                                           len, iter->cpu_file, 1);
6924                 if (r < 0) {
6925                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
6926                                                    ref->page);
6927                         kfree(ref);
6928                         break;
6929                 }
6930
6931                 page = virt_to_page(ref->page);
6932
6933                 spd.pages[i] = page;
6934                 spd.partial[i].len = PAGE_SIZE;
6935                 spd.partial[i].offset = 0;
6936                 spd.partial[i].private = (unsigned long)ref;
6937                 spd.nr_pages++;
6938                 *ppos += PAGE_SIZE;
6939
6940                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6941         }
6942
6943         trace_access_unlock(iter->cpu_file);
6944         spd.nr_pages = i;
6945
6946         /* did we read anything? */
6947         if (!spd.nr_pages) {
6948                 if (ret)
6949                         goto out;
6950
6951                 ret = -EAGAIN;
6952                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6953                         goto out;
6954
6955                 ret = wait_on_pipe(iter, true);
6956                 if (ret)
6957                         goto out;
6958
6959                 goto again;
6960         }
6961
6962         ret = splice_to_pipe(pipe, &spd);
6963 out:
6964         splice_shrink_spd(&spd);
6965
6966         return ret;
6967 }
6968
6969 static const struct file_operations tracing_buffers_fops = {
6970         .open           = tracing_buffers_open,
6971         .read           = tracing_buffers_read,
6972         .poll           = tracing_buffers_poll,
6973         .release        = tracing_buffers_release,
6974         .splice_read    = tracing_buffers_splice_read,
6975         .llseek         = no_llseek,
6976 };
6977
6978 static ssize_t
6979 tracing_stats_read(struct file *filp, char __user *ubuf,
6980                    size_t count, loff_t *ppos)
6981 {
6982         struct inode *inode = file_inode(filp);
6983         struct trace_array *tr = inode->i_private;
6984         struct trace_buffer *trace_buf = &tr->trace_buffer;
6985         int cpu = tracing_get_cpu(inode);
6986         struct trace_seq *s;
6987         unsigned long cnt;
6988         unsigned long long t;
6989         unsigned long usec_rem;
6990
6991         s = kmalloc(sizeof(*s), GFP_KERNEL);
6992         if (!s)
6993                 return -ENOMEM;
6994
6995         trace_seq_init(s);
6996
6997         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6998         trace_seq_printf(s, "entries: %ld\n", cnt);
6999
7000         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7001         trace_seq_printf(s, "overrun: %ld\n", cnt);
7002
7003         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7004         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7005
7006         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7007         trace_seq_printf(s, "bytes: %ld\n", cnt);
7008
7009         if (trace_clocks[tr->clock_id].in_ns) {
7010                 /* local or global for trace_clock */
7011                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7012                 usec_rem = do_div(t, USEC_PER_SEC);
7013                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7014                                                                 t, usec_rem);
7015
7016                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7017                 usec_rem = do_div(t, USEC_PER_SEC);
7018                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7019         } else {
7020                 /* counter or tsc mode for trace_clock */
7021                 trace_seq_printf(s, "oldest event ts: %llu\n",
7022                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7023
7024                 trace_seq_printf(s, "now ts: %llu\n",
7025                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7026         }
7027
7028         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7029         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7030
7031         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7032         trace_seq_printf(s, "read events: %ld\n", cnt);
7033
7034         count = simple_read_from_buffer(ubuf, count, ppos,
7035                                         s->buffer, trace_seq_used(s));
7036
7037         kfree(s);
7038
7039         return count;
7040 }
7041
7042 static const struct file_operations tracing_stats_fops = {
7043         .open           = tracing_open_generic_tr,
7044         .read           = tracing_stats_read,
7045         .llseek         = generic_file_llseek,
7046         .release        = tracing_release_generic_tr,
7047 };
7048
7049 #ifdef CONFIG_DYNAMIC_FTRACE
7050
7051 static ssize_t
7052 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7053                   size_t cnt, loff_t *ppos)
7054 {
7055         unsigned long *p = filp->private_data;
7056         char buf[64]; /* Not too big for a shallow stack */
7057         int r;
7058
7059         r = scnprintf(buf, 63, "%ld", *p);
7060         buf[r++] = '\n';
7061
7062         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7063 }
7064
7065 static const struct file_operations tracing_dyn_info_fops = {
7066         .open           = tracing_open_generic,
7067         .read           = tracing_read_dyn_info,
7068         .llseek         = generic_file_llseek,
7069 };
7070 #endif /* CONFIG_DYNAMIC_FTRACE */
7071
7072 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7073 static void
7074 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7075                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7076                 void *data)
7077 {
7078         tracing_snapshot_instance(tr);
7079 }
7080
7081 static void
7082 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7083                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7084                       void *data)
7085 {
7086         struct ftrace_func_mapper *mapper = data;
7087         long *count = NULL;
7088
7089         if (mapper)
7090                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7091
7092         if (count) {
7093
7094                 if (*count <= 0)
7095                         return;
7096
7097                 (*count)--;
7098         }
7099
7100         tracing_snapshot_instance(tr);
7101 }
7102
7103 static int
7104 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7105                       struct ftrace_probe_ops *ops, void *data)
7106 {
7107         struct ftrace_func_mapper *mapper = data;
7108         long *count = NULL;
7109
7110         seq_printf(m, "%ps:", (void *)ip);
7111
7112         seq_puts(m, "snapshot");
7113
7114         if (mapper)
7115                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7116
7117         if (count)
7118                 seq_printf(m, ":count=%ld\n", *count);
7119         else
7120                 seq_puts(m, ":unlimited\n");
7121
7122         return 0;
7123 }
7124
7125 static int
7126 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7127                      unsigned long ip, void *init_data, void **data)
7128 {
7129         struct ftrace_func_mapper *mapper = *data;
7130
7131         if (!mapper) {
7132                 mapper = allocate_ftrace_func_mapper();
7133                 if (!mapper)
7134                         return -ENOMEM;
7135                 *data = mapper;
7136         }
7137
7138         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7139 }
7140
7141 static void
7142 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7143                      unsigned long ip, void *data)
7144 {
7145         struct ftrace_func_mapper *mapper = data;
7146
7147         if (!ip) {
7148                 if (!mapper)
7149                         return;
7150                 free_ftrace_func_mapper(mapper, NULL);
7151                 return;
7152         }
7153
7154         ftrace_func_mapper_remove_ip(mapper, ip);
7155 }
7156
7157 static struct ftrace_probe_ops snapshot_probe_ops = {
7158         .func                   = ftrace_snapshot,
7159         .print                  = ftrace_snapshot_print,
7160 };
7161
7162 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7163         .func                   = ftrace_count_snapshot,
7164         .print                  = ftrace_snapshot_print,
7165         .init                   = ftrace_snapshot_init,
7166         .free                   = ftrace_snapshot_free,
7167 };
7168
7169 static int
7170 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7171                                char *glob, char *cmd, char *param, int enable)
7172 {
7173         struct ftrace_probe_ops *ops;
7174         void *count = (void *)-1;
7175         char *number;
7176         int ret;
7177
7178         if (!tr)
7179                 return -ENODEV;
7180
7181         /* hash funcs only work with set_ftrace_filter */
7182         if (!enable)
7183                 return -EINVAL;
7184
7185         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7186
7187         if (glob[0] == '!')
7188                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7189
7190         if (!param)
7191                 goto out_reg;
7192
7193         number = strsep(&param, ":");
7194
7195         if (!strlen(number))
7196                 goto out_reg;
7197
7198         /*
7199          * We use the callback data field (which is a pointer)
7200          * as our counter.
7201          */
7202         ret = kstrtoul(number, 0, (unsigned long *)&count);
7203         if (ret)
7204                 return ret;
7205
7206  out_reg:
7207         ret = tracing_alloc_snapshot_instance(tr);
7208         if (ret < 0)
7209                 goto out;
7210
7211         ret = register_ftrace_function_probe(glob, tr, ops, count);
7212
7213  out:
7214         return ret < 0 ? ret : 0;
7215 }
7216
7217 static struct ftrace_func_command ftrace_snapshot_cmd = {
7218         .name                   = "snapshot",
7219         .func                   = ftrace_trace_snapshot_callback,
7220 };
7221
7222 static __init int register_snapshot_cmd(void)
7223 {
7224         return register_ftrace_command(&ftrace_snapshot_cmd);
7225 }
7226 #else
7227 static inline __init int register_snapshot_cmd(void) { return 0; }
7228 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7229
7230 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7231 {
7232         if (WARN_ON(!tr->dir))
7233                 return ERR_PTR(-ENODEV);
7234
7235         /* Top directory uses NULL as the parent */
7236         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7237                 return NULL;
7238
7239         /* All sub buffers have a descriptor */
7240         return tr->dir;
7241 }
7242
7243 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7244 {
7245         struct dentry *d_tracer;
7246
7247         if (tr->percpu_dir)
7248                 return tr->percpu_dir;
7249
7250         d_tracer = tracing_get_dentry(tr);
7251         if (IS_ERR(d_tracer))
7252                 return NULL;
7253
7254         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7255
7256         WARN_ONCE(!tr->percpu_dir,
7257                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7258
7259         return tr->percpu_dir;
7260 }
7261
7262 static struct dentry *
7263 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7264                       void *data, long cpu, const struct file_operations *fops)
7265 {
7266         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7267
7268         if (ret) /* See tracing_get_cpu() */
7269                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7270         return ret;
7271 }
7272
7273 static void
7274 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7275 {
7276         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7277         struct dentry *d_cpu;
7278         char cpu_dir[30]; /* 30 characters should be more than enough */
7279
7280         if (!d_percpu)
7281                 return;
7282
7283         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7284         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7285         if (!d_cpu) {
7286                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7287                 return;
7288         }
7289
7290         /* per cpu trace_pipe */
7291         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7292                                 tr, cpu, &tracing_pipe_fops);
7293
7294         /* per cpu trace */
7295         trace_create_cpu_file("trace", 0644, d_cpu,
7296                                 tr, cpu, &tracing_fops);
7297
7298         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7299                                 tr, cpu, &tracing_buffers_fops);
7300
7301         trace_create_cpu_file("stats", 0444, d_cpu,
7302                                 tr, cpu, &tracing_stats_fops);
7303
7304         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7305                                 tr, cpu, &tracing_entries_fops);
7306
7307 #ifdef CONFIG_TRACER_SNAPSHOT
7308         trace_create_cpu_file("snapshot", 0644, d_cpu,
7309                                 tr, cpu, &snapshot_fops);
7310
7311         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7312                                 tr, cpu, &snapshot_raw_fops);
7313 #endif
7314 }
7315
7316 #ifdef CONFIG_FTRACE_SELFTEST
7317 /* Let selftest have access to static functions in this file */
7318 #include "trace_selftest.c"
7319 #endif
7320
7321 static ssize_t
7322 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7323                         loff_t *ppos)
7324 {
7325         struct trace_option_dentry *topt = filp->private_data;
7326         char *buf;
7327
7328         if (topt->flags->val & topt->opt->bit)
7329                 buf = "1\n";
7330         else
7331                 buf = "0\n";
7332
7333         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7334 }
7335
7336 static ssize_t
7337 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7338                          loff_t *ppos)
7339 {
7340         struct trace_option_dentry *topt = filp->private_data;
7341         unsigned long val;
7342         int ret;
7343
7344         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7345         if (ret)
7346                 return ret;
7347
7348         if (val != 0 && val != 1)
7349                 return -EINVAL;
7350
7351         if (!!(topt->flags->val & topt->opt->bit) != val) {
7352                 mutex_lock(&trace_types_lock);
7353                 ret = __set_tracer_option(topt->tr, topt->flags,
7354                                           topt->opt, !val);
7355                 mutex_unlock(&trace_types_lock);
7356                 if (ret)
7357                         return ret;
7358         }
7359
7360         *ppos += cnt;
7361
7362         return cnt;
7363 }
7364
7365
7366 static const struct file_operations trace_options_fops = {
7367         .open = tracing_open_generic,
7368         .read = trace_options_read,
7369         .write = trace_options_write,
7370         .llseek = generic_file_llseek,
7371 };
7372
7373 /*
7374  * In order to pass in both the trace_array descriptor as well as the index
7375  * to the flag that the trace option file represents, the trace_array
7376  * has a character array of trace_flags_index[], which holds the index
7377  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7378  * The address of this character array is passed to the flag option file
7379  * read/write callbacks.
7380  *
7381  * In order to extract both the index and the trace_array descriptor,
7382  * get_tr_index() uses the following algorithm.
7383  *
7384  *   idx = *ptr;
7385  *
7386  * As the pointer itself contains the address of the index (remember
7387  * index[1] == 1).
7388  *
7389  * Then to get the trace_array descriptor, by subtracting that index
7390  * from the ptr, we get to the start of the index itself.
7391  *
7392  *   ptr - idx == &index[0]
7393  *
7394  * Then a simple container_of() from that pointer gets us to the
7395  * trace_array descriptor.
7396  */
7397 static void get_tr_index(void *data, struct trace_array **ptr,
7398                          unsigned int *pindex)
7399 {
7400         *pindex = *(unsigned char *)data;
7401
7402         *ptr = container_of(data - *pindex, struct trace_array,
7403                             trace_flags_index);
7404 }
7405
7406 static ssize_t
7407 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7408                         loff_t *ppos)
7409 {
7410         void *tr_index = filp->private_data;
7411         struct trace_array *tr;
7412         unsigned int index;
7413         char *buf;
7414
7415         get_tr_index(tr_index, &tr, &index);
7416
7417         if (tr->trace_flags & (1 << index))
7418                 buf = "1\n";
7419         else
7420                 buf = "0\n";
7421
7422         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7423 }
7424
7425 static ssize_t
7426 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7427                          loff_t *ppos)
7428 {
7429         void *tr_index = filp->private_data;
7430         struct trace_array *tr;
7431         unsigned int index;
7432         unsigned long val;
7433         int ret;
7434
7435         get_tr_index(tr_index, &tr, &index);
7436
7437         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7438         if (ret)
7439                 return ret;
7440
7441         if (val != 0 && val != 1)
7442                 return -EINVAL;
7443
7444         mutex_lock(&trace_types_lock);
7445         ret = set_tracer_flag(tr, 1 << index, val);
7446         mutex_unlock(&trace_types_lock);
7447
7448         if (ret < 0)
7449                 return ret;
7450
7451         *ppos += cnt;
7452
7453         return cnt;
7454 }
7455
7456 static const struct file_operations trace_options_core_fops = {
7457         .open = tracing_open_generic,
7458         .read = trace_options_core_read,
7459         .write = trace_options_core_write,
7460         .llseek = generic_file_llseek,
7461 };
7462
7463 struct dentry *trace_create_file(const char *name,
7464                                  umode_t mode,
7465                                  struct dentry *parent,
7466                                  void *data,
7467                                  const struct file_operations *fops)
7468 {
7469         struct dentry *ret;
7470
7471         ret = tracefs_create_file(name, mode, parent, data, fops);
7472         if (!ret)
7473                 pr_warn("Could not create tracefs '%s' entry\n", name);
7474
7475         return ret;
7476 }
7477
7478
7479 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7480 {
7481         struct dentry *d_tracer;
7482
7483         if (tr->options)
7484                 return tr->options;
7485
7486         d_tracer = tracing_get_dentry(tr);
7487         if (IS_ERR(d_tracer))
7488                 return NULL;
7489
7490         tr->options = tracefs_create_dir("options", d_tracer);
7491         if (!tr->options) {
7492                 pr_warn("Could not create tracefs directory 'options'\n");
7493                 return NULL;
7494         }
7495
7496         return tr->options;
7497 }
7498
7499 static void
7500 create_trace_option_file(struct trace_array *tr,
7501                          struct trace_option_dentry *topt,
7502                          struct tracer_flags *flags,
7503                          struct tracer_opt *opt)
7504 {
7505         struct dentry *t_options;
7506
7507         t_options = trace_options_init_dentry(tr);
7508         if (!t_options)
7509                 return;
7510
7511         topt->flags = flags;
7512         topt->opt = opt;
7513         topt->tr = tr;
7514
7515         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7516                                     &trace_options_fops);
7517
7518 }
7519
7520 static void
7521 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7522 {
7523         struct trace_option_dentry *topts;
7524         struct trace_options *tr_topts;
7525         struct tracer_flags *flags;
7526         struct tracer_opt *opts;
7527         int cnt;
7528         int i;
7529
7530         if (!tracer)
7531                 return;
7532
7533         flags = tracer->flags;
7534
7535         if (!flags || !flags->opts)
7536                 return;
7537
7538         /*
7539          * If this is an instance, only create flags for tracers
7540          * the instance may have.
7541          */
7542         if (!trace_ok_for_array(tracer, tr))
7543                 return;
7544
7545         for (i = 0; i < tr->nr_topts; i++) {
7546                 /* Make sure there's no duplicate flags. */
7547                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7548                         return;
7549         }
7550
7551         opts = flags->opts;
7552
7553         for (cnt = 0; opts[cnt].name; cnt++)
7554                 ;
7555
7556         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7557         if (!topts)
7558                 return;
7559
7560         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7561                             GFP_KERNEL);
7562         if (!tr_topts) {
7563                 kfree(topts);
7564                 return;
7565         }
7566
7567         tr->topts = tr_topts;
7568         tr->topts[tr->nr_topts].tracer = tracer;
7569         tr->topts[tr->nr_topts].topts = topts;
7570         tr->nr_topts++;
7571
7572         for (cnt = 0; opts[cnt].name; cnt++) {
7573                 create_trace_option_file(tr, &topts[cnt], flags,
7574                                          &opts[cnt]);
7575                 WARN_ONCE(topts[cnt].entry == NULL,
7576                           "Failed to create trace option: %s",
7577                           opts[cnt].name);
7578         }
7579 }
7580
7581 static struct dentry *
7582 create_trace_option_core_file(struct trace_array *tr,
7583                               const char *option, long index)
7584 {
7585         struct dentry *t_options;
7586
7587         t_options = trace_options_init_dentry(tr);
7588         if (!t_options)
7589                 return NULL;
7590
7591         return trace_create_file(option, 0644, t_options,
7592                                  (void *)&tr->trace_flags_index[index],
7593                                  &trace_options_core_fops);
7594 }
7595
7596 static void create_trace_options_dir(struct trace_array *tr)
7597 {
7598         struct dentry *t_options;
7599         bool top_level = tr == &global_trace;
7600         int i;
7601
7602         t_options = trace_options_init_dentry(tr);
7603         if (!t_options)
7604                 return;
7605
7606         for (i = 0; trace_options[i]; i++) {
7607                 if (top_level ||
7608                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7609                         create_trace_option_core_file(tr, trace_options[i], i);
7610         }
7611 }
7612
7613 static ssize_t
7614 rb_simple_read(struct file *filp, char __user *ubuf,
7615                size_t cnt, loff_t *ppos)
7616 {
7617         struct trace_array *tr = filp->private_data;
7618         char buf[64];
7619         int r;
7620
7621         r = tracer_tracing_is_on(tr);
7622         r = sprintf(buf, "%d\n", r);
7623
7624         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7625 }
7626
7627 static ssize_t
7628 rb_simple_write(struct file *filp, const char __user *ubuf,
7629                 size_t cnt, loff_t *ppos)
7630 {
7631         struct trace_array *tr = filp->private_data;
7632         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7633         unsigned long val;
7634         int ret;
7635
7636         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7637         if (ret)
7638                 return ret;
7639
7640         if (buffer) {
7641                 mutex_lock(&trace_types_lock);
7642                 if (!!val == tracer_tracing_is_on(tr)) {
7643                         val = 0; /* do nothing */
7644                 } else if (val) {
7645                         tracer_tracing_on(tr);
7646                         if (tr->current_trace->start)
7647                                 tr->current_trace->start(tr);
7648                 } else {
7649                         tracer_tracing_off(tr);
7650                         if (tr->current_trace->stop)
7651                                 tr->current_trace->stop(tr);
7652                 }
7653                 mutex_unlock(&trace_types_lock);
7654         }
7655
7656         (*ppos)++;
7657
7658         return cnt;
7659 }
7660
7661 static const struct file_operations rb_simple_fops = {
7662         .open           = tracing_open_generic_tr,
7663         .read           = rb_simple_read,
7664         .write          = rb_simple_write,
7665         .release        = tracing_release_generic_tr,
7666         .llseek         = default_llseek,
7667 };
7668
7669 struct dentry *trace_instance_dir;
7670
7671 static void
7672 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7673
7674 static int
7675 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7676 {
7677         enum ring_buffer_flags rb_flags;
7678
7679         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7680
7681         buf->tr = tr;
7682
7683         buf->buffer = ring_buffer_alloc(size, rb_flags);
7684         if (!buf->buffer)
7685                 return -ENOMEM;
7686
7687         buf->data = alloc_percpu(struct trace_array_cpu);
7688         if (!buf->data) {
7689                 ring_buffer_free(buf->buffer);
7690                 buf->buffer = NULL;
7691                 return -ENOMEM;
7692         }
7693
7694         /* Allocate the first page for all buffers */
7695         set_buffer_entries(&tr->trace_buffer,
7696                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7697
7698         return 0;
7699 }
7700
7701 static int allocate_trace_buffers(struct trace_array *tr, int size)
7702 {
7703         int ret;
7704
7705         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7706         if (ret)
7707                 return ret;
7708
7709 #ifdef CONFIG_TRACER_MAX_TRACE
7710         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7711                                     allocate_snapshot ? size : 1);
7712         if (WARN_ON(ret)) {
7713                 ring_buffer_free(tr->trace_buffer.buffer);
7714                 tr->trace_buffer.buffer = NULL;
7715                 free_percpu(tr->trace_buffer.data);
7716                 tr->trace_buffer.data = NULL;
7717                 return -ENOMEM;
7718         }
7719         tr->allocated_snapshot = allocate_snapshot;
7720
7721         /*
7722          * Only the top level trace array gets its snapshot allocated
7723          * from the kernel command line.
7724          */
7725         allocate_snapshot = false;
7726 #endif
7727         return 0;
7728 }
7729
7730 static void free_trace_buffer(struct trace_buffer *buf)
7731 {
7732         if (buf->buffer) {
7733                 ring_buffer_free(buf->buffer);
7734                 buf->buffer = NULL;
7735                 free_percpu(buf->data);
7736                 buf->data = NULL;
7737         }
7738 }
7739
7740 static void free_trace_buffers(struct trace_array *tr)
7741 {
7742         if (!tr)
7743                 return;
7744
7745         free_trace_buffer(&tr->trace_buffer);
7746
7747 #ifdef CONFIG_TRACER_MAX_TRACE
7748         free_trace_buffer(&tr->max_buffer);
7749 #endif
7750 }
7751
7752 static void init_trace_flags_index(struct trace_array *tr)
7753 {
7754         int i;
7755
7756         /* Used by the trace options files */
7757         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7758                 tr->trace_flags_index[i] = i;
7759 }
7760
7761 static void __update_tracer_options(struct trace_array *tr)
7762 {
7763         struct tracer *t;
7764
7765         for (t = trace_types; t; t = t->next)
7766                 add_tracer_options(tr, t);
7767 }
7768
7769 static void update_tracer_options(struct trace_array *tr)
7770 {
7771         mutex_lock(&trace_types_lock);
7772         __update_tracer_options(tr);
7773         mutex_unlock(&trace_types_lock);
7774 }
7775
7776 static int instance_mkdir(const char *name)
7777 {
7778         struct trace_array *tr;
7779         int ret;
7780
7781         mutex_lock(&event_mutex);
7782         mutex_lock(&trace_types_lock);
7783
7784         ret = -EEXIST;
7785         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7786                 if (tr->name && strcmp(tr->name, name) == 0)
7787                         goto out_unlock;
7788         }
7789
7790         ret = -ENOMEM;
7791         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7792         if (!tr)
7793                 goto out_unlock;
7794
7795         tr->name = kstrdup(name, GFP_KERNEL);
7796         if (!tr->name)
7797                 goto out_free_tr;
7798
7799         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7800                 goto out_free_tr;
7801
7802         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7803
7804         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7805
7806         raw_spin_lock_init(&tr->start_lock);
7807
7808         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7809
7810         tr->current_trace = &nop_trace;
7811
7812         INIT_LIST_HEAD(&tr->systems);
7813         INIT_LIST_HEAD(&tr->events);
7814         INIT_LIST_HEAD(&tr->hist_vars);
7815
7816         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7817                 goto out_free_tr;
7818
7819         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7820         if (!tr->dir)
7821                 goto out_free_tr;
7822
7823         ret = event_trace_add_tracer(tr->dir, tr);
7824         if (ret) {
7825                 tracefs_remove_recursive(tr->dir);
7826                 goto out_free_tr;
7827         }
7828
7829         ftrace_init_trace_array(tr);
7830
7831         init_tracer_tracefs(tr, tr->dir);
7832         init_trace_flags_index(tr);
7833         __update_tracer_options(tr);
7834
7835         list_add(&tr->list, &ftrace_trace_arrays);
7836
7837         mutex_unlock(&trace_types_lock);
7838         mutex_unlock(&event_mutex);
7839
7840         return 0;
7841
7842  out_free_tr:
7843         free_trace_buffers(tr);
7844         free_cpumask_var(tr->tracing_cpumask);
7845         kfree(tr->name);
7846         kfree(tr);
7847
7848  out_unlock:
7849         mutex_unlock(&trace_types_lock);
7850         mutex_unlock(&event_mutex);
7851
7852         return ret;
7853
7854 }
7855
7856 static int instance_rmdir(const char *name)
7857 {
7858         struct trace_array *tr;
7859         int found = 0;
7860         int ret;
7861         int i;
7862
7863         mutex_lock(&event_mutex);
7864         mutex_lock(&trace_types_lock);
7865
7866         ret = -ENODEV;
7867         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7868                 if (tr->name && strcmp(tr->name, name) == 0) {
7869                         found = 1;
7870                         break;
7871                 }
7872         }
7873         if (!found)
7874                 goto out_unlock;
7875
7876         ret = -EBUSY;
7877         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7878                 goto out_unlock;
7879
7880         list_del(&tr->list);
7881
7882         /* Disable all the flags that were enabled coming in */
7883         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7884                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7885                         set_tracer_flag(tr, 1 << i, 0);
7886         }
7887
7888         tracing_set_nop(tr);
7889         clear_ftrace_function_probes(tr);
7890         event_trace_del_tracer(tr);
7891         ftrace_clear_pids(tr);
7892         ftrace_destroy_function_files(tr);
7893         tracefs_remove_recursive(tr->dir);
7894         free_trace_buffers(tr);
7895
7896         for (i = 0; i < tr->nr_topts; i++) {
7897                 kfree(tr->topts[i].topts);
7898         }
7899         kfree(tr->topts);
7900
7901         free_cpumask_var(tr->tracing_cpumask);
7902         kfree(tr->name);
7903         kfree(tr);
7904
7905         ret = 0;
7906
7907  out_unlock:
7908         mutex_unlock(&trace_types_lock);
7909         mutex_unlock(&event_mutex);
7910
7911         return ret;
7912 }
7913
7914 static __init void create_trace_instances(struct dentry *d_tracer)
7915 {
7916         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7917                                                          instance_mkdir,
7918                                                          instance_rmdir);
7919         if (WARN_ON(!trace_instance_dir))
7920                 return;
7921 }
7922
7923 static void
7924 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7925 {
7926         struct trace_event_file *file;
7927         int cpu;
7928
7929         trace_create_file("available_tracers", 0444, d_tracer,
7930                         tr, &show_traces_fops);
7931
7932         trace_create_file("current_tracer", 0644, d_tracer,
7933                         tr, &set_tracer_fops);
7934
7935         trace_create_file("tracing_cpumask", 0644, d_tracer,
7936                           tr, &tracing_cpumask_fops);
7937
7938         trace_create_file("trace_options", 0644, d_tracer,
7939                           tr, &tracing_iter_fops);
7940
7941         trace_create_file("trace", 0644, d_tracer,
7942                           tr, &tracing_fops);
7943
7944         trace_create_file("trace_pipe", 0444, d_tracer,
7945                           tr, &tracing_pipe_fops);
7946
7947         trace_create_file("buffer_size_kb", 0644, d_tracer,
7948                           tr, &tracing_entries_fops);
7949
7950         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7951                           tr, &tracing_total_entries_fops);
7952
7953         trace_create_file("free_buffer", 0200, d_tracer,
7954                           tr, &tracing_free_buffer_fops);
7955
7956         trace_create_file("trace_marker", 0220, d_tracer,
7957                           tr, &tracing_mark_fops);
7958
7959         file = __find_event_file(tr, "ftrace", "print");
7960         if (file && file->dir)
7961                 trace_create_file("trigger", 0644, file->dir, file,
7962                                   &event_trigger_fops);
7963         tr->trace_marker_file = file;
7964
7965         trace_create_file("trace_marker_raw", 0220, d_tracer,
7966                           tr, &tracing_mark_raw_fops);
7967
7968         trace_create_file("trace_clock", 0644, d_tracer, tr,
7969                           &trace_clock_fops);
7970
7971         trace_create_file("tracing_on", 0644, d_tracer,
7972                           tr, &rb_simple_fops);
7973
7974         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
7975                           &trace_time_stamp_mode_fops);
7976
7977         create_trace_options_dir(tr);
7978
7979 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7980         trace_create_file("tracing_max_latency", 0644, d_tracer,
7981                         &tr->max_latency, &tracing_max_lat_fops);
7982 #endif
7983
7984         if (ftrace_create_function_files(tr, d_tracer))
7985                 WARN(1, "Could not allocate function filter files");
7986
7987 #ifdef CONFIG_TRACER_SNAPSHOT
7988         trace_create_file("snapshot", 0644, d_tracer,
7989                           tr, &snapshot_fops);
7990 #endif
7991
7992         for_each_tracing_cpu(cpu)
7993                 tracing_init_tracefs_percpu(tr, cpu);
7994
7995         ftrace_init_tracefs(tr, d_tracer);
7996 }
7997
7998 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7999 {
8000         struct vfsmount *mnt;
8001         struct file_system_type *type;
8002
8003         /*
8004          * To maintain backward compatibility for tools that mount
8005          * debugfs to get to the tracing facility, tracefs is automatically
8006          * mounted to the debugfs/tracing directory.
8007          */
8008         type = get_fs_type("tracefs");
8009         if (!type)
8010                 return NULL;
8011         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8012         put_filesystem(type);
8013         if (IS_ERR(mnt))
8014                 return NULL;
8015         mntget(mnt);
8016
8017         return mnt;
8018 }
8019
8020 /**
8021  * tracing_init_dentry - initialize top level trace array
8022  *
8023  * This is called when creating files or directories in the tracing
8024  * directory. It is called via fs_initcall() by any of the boot up code
8025  * and expects to return the dentry of the top level tracing directory.
8026  */
8027 struct dentry *tracing_init_dentry(void)
8028 {
8029         struct trace_array *tr = &global_trace;
8030
8031         /* The top level trace array uses  NULL as parent */
8032         if (tr->dir)
8033                 return NULL;
8034
8035         if (WARN_ON(!tracefs_initialized()) ||
8036                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8037                  WARN_ON(!debugfs_initialized())))
8038                 return ERR_PTR(-ENODEV);
8039
8040         /*
8041          * As there may still be users that expect the tracing
8042          * files to exist in debugfs/tracing, we must automount
8043          * the tracefs file system there, so older tools still
8044          * work with the newer kerenl.
8045          */
8046         tr->dir = debugfs_create_automount("tracing", NULL,
8047                                            trace_automount, NULL);
8048         if (!tr->dir) {
8049                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
8050                 return ERR_PTR(-ENOMEM);
8051         }
8052
8053         return NULL;
8054 }
8055
8056 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8057 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8058
8059 static void __init trace_eval_init(void)
8060 {
8061         int len;
8062
8063         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8064         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8065 }
8066
8067 #ifdef CONFIG_MODULES
8068 static void trace_module_add_evals(struct module *mod)
8069 {
8070         if (!mod->num_trace_evals)
8071                 return;
8072
8073         /*
8074          * Modules with bad taint do not have events created, do
8075          * not bother with enums either.
8076          */
8077         if (trace_module_has_bad_taint(mod))
8078                 return;
8079
8080         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8081 }
8082
8083 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8084 static void trace_module_remove_evals(struct module *mod)
8085 {
8086         union trace_eval_map_item *map;
8087         union trace_eval_map_item **last = &trace_eval_maps;
8088
8089         if (!mod->num_trace_evals)
8090                 return;
8091
8092         mutex_lock(&trace_eval_mutex);
8093
8094         map = trace_eval_maps;
8095
8096         while (map) {
8097                 if (map->head.mod == mod)
8098                         break;
8099                 map = trace_eval_jmp_to_tail(map);
8100                 last = &map->tail.next;
8101                 map = map->tail.next;
8102         }
8103         if (!map)
8104                 goto out;
8105
8106         *last = trace_eval_jmp_to_tail(map)->tail.next;
8107         kfree(map);
8108  out:
8109         mutex_unlock(&trace_eval_mutex);
8110 }
8111 #else
8112 static inline void trace_module_remove_evals(struct module *mod) { }
8113 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8114
8115 static int trace_module_notify(struct notifier_block *self,
8116                                unsigned long val, void *data)
8117 {
8118         struct module *mod = data;
8119
8120         switch (val) {
8121         case MODULE_STATE_COMING:
8122                 trace_module_add_evals(mod);
8123                 break;
8124         case MODULE_STATE_GOING:
8125                 trace_module_remove_evals(mod);
8126                 break;
8127         }
8128
8129         return 0;
8130 }
8131
8132 static struct notifier_block trace_module_nb = {
8133         .notifier_call = trace_module_notify,
8134         .priority = 0,
8135 };
8136 #endif /* CONFIG_MODULES */
8137
8138 static __init int tracer_init_tracefs(void)
8139 {
8140         struct dentry *d_tracer;
8141
8142         trace_access_lock_init();
8143
8144         d_tracer = tracing_init_dentry();
8145         if (IS_ERR(d_tracer))
8146                 return 0;
8147
8148         event_trace_init();
8149
8150         init_tracer_tracefs(&global_trace, d_tracer);
8151         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8152
8153         trace_create_file("tracing_thresh", 0644, d_tracer,
8154                         &global_trace, &tracing_thresh_fops);
8155
8156         trace_create_file("README", 0444, d_tracer,
8157                         NULL, &tracing_readme_fops);
8158
8159         trace_create_file("saved_cmdlines", 0444, d_tracer,
8160                         NULL, &tracing_saved_cmdlines_fops);
8161
8162         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8163                           NULL, &tracing_saved_cmdlines_size_fops);
8164
8165         trace_create_file("saved_tgids", 0444, d_tracer,
8166                         NULL, &tracing_saved_tgids_fops);
8167
8168         trace_eval_init();
8169
8170         trace_create_eval_file(d_tracer);
8171
8172 #ifdef CONFIG_MODULES
8173         register_module_notifier(&trace_module_nb);
8174 #endif
8175
8176 #ifdef CONFIG_DYNAMIC_FTRACE
8177         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8178                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8179 #endif
8180
8181         create_trace_instances(d_tracer);
8182
8183         update_tracer_options(&global_trace);
8184
8185         return 0;
8186 }
8187
8188 static int trace_panic_handler(struct notifier_block *this,
8189                                unsigned long event, void *unused)
8190 {
8191         if (ftrace_dump_on_oops)
8192                 ftrace_dump(ftrace_dump_on_oops);
8193         return NOTIFY_OK;
8194 }
8195
8196 static struct notifier_block trace_panic_notifier = {
8197         .notifier_call  = trace_panic_handler,
8198         .next           = NULL,
8199         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8200 };
8201
8202 static int trace_die_handler(struct notifier_block *self,
8203                              unsigned long val,
8204                              void *data)
8205 {
8206         switch (val) {
8207         case DIE_OOPS:
8208                 if (ftrace_dump_on_oops)
8209                         ftrace_dump(ftrace_dump_on_oops);
8210                 break;
8211         default:
8212                 break;
8213         }
8214         return NOTIFY_OK;
8215 }
8216
8217 static struct notifier_block trace_die_notifier = {
8218         .notifier_call = trace_die_handler,
8219         .priority = 200
8220 };
8221
8222 /*
8223  * printk is set to max of 1024, we really don't need it that big.
8224  * Nothing should be printing 1000 characters anyway.
8225  */
8226 #define TRACE_MAX_PRINT         1000
8227
8228 /*
8229  * Define here KERN_TRACE so that we have one place to modify
8230  * it if we decide to change what log level the ftrace dump
8231  * should be at.
8232  */
8233 #define KERN_TRACE              KERN_EMERG
8234
8235 void
8236 trace_printk_seq(struct trace_seq *s)
8237 {
8238         /* Probably should print a warning here. */
8239         if (s->seq.len >= TRACE_MAX_PRINT)
8240                 s->seq.len = TRACE_MAX_PRINT;
8241
8242         /*
8243          * More paranoid code. Although the buffer size is set to
8244          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8245          * an extra layer of protection.
8246          */
8247         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8248                 s->seq.len = s->seq.size - 1;
8249
8250         /* should be zero ended, but we are paranoid. */
8251         s->buffer[s->seq.len] = 0;
8252
8253         printk(KERN_TRACE "%s", s->buffer);
8254
8255         trace_seq_init(s);
8256 }
8257
8258 void trace_init_global_iter(struct trace_iterator *iter)
8259 {
8260         iter->tr = &global_trace;
8261         iter->trace = iter->tr->current_trace;
8262         iter->cpu_file = RING_BUFFER_ALL_CPUS;
8263         iter->trace_buffer = &global_trace.trace_buffer;
8264
8265         if (iter->trace && iter->trace->open)
8266                 iter->trace->open(iter);
8267
8268         /* Annotate start of buffers if we had overruns */
8269         if (ring_buffer_overruns(iter->trace_buffer->buffer))
8270                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8271
8272         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8273         if (trace_clocks[iter->tr->clock_id].in_ns)
8274                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8275 }
8276
8277 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8278 {
8279         /* use static because iter can be a bit big for the stack */
8280         static struct trace_iterator iter;
8281         static atomic_t dump_running;
8282         struct trace_array *tr = &global_trace;
8283         unsigned int old_userobj;
8284         unsigned long flags;
8285         int cnt = 0, cpu;
8286
8287         /* Only allow one dump user at a time. */
8288         if (atomic_inc_return(&dump_running) != 1) {
8289                 atomic_dec(&dump_running);
8290                 return;
8291         }
8292
8293         /*
8294          * Always turn off tracing when we dump.
8295          * We don't need to show trace output of what happens
8296          * between multiple crashes.
8297          *
8298          * If the user does a sysrq-z, then they can re-enable
8299          * tracing with echo 1 > tracing_on.
8300          */
8301         tracing_off();
8302
8303         local_irq_save(flags);
8304         printk_nmi_direct_enter();
8305
8306         /* Simulate the iterator */
8307         trace_init_global_iter(&iter);
8308
8309         for_each_tracing_cpu(cpu) {
8310                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8311         }
8312
8313         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8314
8315         /* don't look at user memory in panic mode */
8316         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8317
8318         switch (oops_dump_mode) {
8319         case DUMP_ALL:
8320                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8321                 break;
8322         case DUMP_ORIG:
8323                 iter.cpu_file = raw_smp_processor_id();
8324                 break;
8325         case DUMP_NONE:
8326                 goto out_enable;
8327         default:
8328                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8329                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8330         }
8331
8332         printk(KERN_TRACE "Dumping ftrace buffer:\n");
8333
8334         /* Did function tracer already get disabled? */
8335         if (ftrace_is_dead()) {
8336                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8337                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8338         }
8339
8340         /*
8341          * We need to stop all tracing on all CPUS to read the
8342          * the next buffer. This is a bit expensive, but is
8343          * not done often. We fill all what we can read,
8344          * and then release the locks again.
8345          */
8346
8347         while (!trace_empty(&iter)) {
8348
8349                 if (!cnt)
8350                         printk(KERN_TRACE "---------------------------------\n");
8351
8352                 cnt++;
8353
8354                 /* reset all but tr, trace, and overruns */
8355                 memset(&iter.seq, 0,
8356                        sizeof(struct trace_iterator) -
8357                        offsetof(struct trace_iterator, seq));
8358                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8359                 iter.pos = -1;
8360
8361                 if (trace_find_next_entry_inc(&iter) != NULL) {
8362                         int ret;
8363
8364                         ret = print_trace_line(&iter);
8365                         if (ret != TRACE_TYPE_NO_CONSUME)
8366                                 trace_consume(&iter);
8367                 }
8368                 touch_nmi_watchdog();
8369
8370                 trace_printk_seq(&iter.seq);
8371         }
8372
8373         if (!cnt)
8374                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8375         else
8376                 printk(KERN_TRACE "---------------------------------\n");
8377
8378  out_enable:
8379         tr->trace_flags |= old_userobj;
8380
8381         for_each_tracing_cpu(cpu) {
8382                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8383         }
8384         atomic_dec(&dump_running);
8385         printk_nmi_direct_exit();
8386         local_irq_restore(flags);
8387 }
8388 EXPORT_SYMBOL_GPL(ftrace_dump);
8389
8390 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8391 {
8392         char **argv;
8393         int argc, ret;
8394
8395         argc = 0;
8396         ret = 0;
8397         argv = argv_split(GFP_KERNEL, buf, &argc);
8398         if (!argv)
8399                 return -ENOMEM;
8400
8401         if (argc)
8402                 ret = createfn(argc, argv);
8403
8404         argv_free(argv);
8405
8406         return ret;
8407 }
8408
8409 #define WRITE_BUFSIZE  4096
8410
8411 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8412                                 size_t count, loff_t *ppos,
8413                                 int (*createfn)(int, char **))
8414 {
8415         char *kbuf, *buf, *tmp;
8416         int ret = 0;
8417         size_t done = 0;
8418         size_t size;
8419
8420         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8421         if (!kbuf)
8422                 return -ENOMEM;
8423
8424         while (done < count) {
8425                 size = count - done;
8426
8427                 if (size >= WRITE_BUFSIZE)
8428                         size = WRITE_BUFSIZE - 1;
8429
8430                 if (copy_from_user(kbuf, buffer + done, size)) {
8431                         ret = -EFAULT;
8432                         goto out;
8433                 }
8434                 kbuf[size] = '\0';
8435                 buf = kbuf;
8436                 do {
8437                         tmp = strchr(buf, '\n');
8438                         if (tmp) {
8439                                 *tmp = '\0';
8440                                 size = tmp - buf + 1;
8441                         } else {
8442                                 size = strlen(buf);
8443                                 if (done + size < count) {
8444                                         if (buf != kbuf)
8445                                                 break;
8446                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8447                                         pr_warn("Line length is too long: Should be less than %d\n",
8448                                                 WRITE_BUFSIZE - 2);
8449                                         ret = -EINVAL;
8450                                         goto out;
8451                                 }
8452                         }
8453                         done += size;
8454
8455                         /* Remove comments */
8456                         tmp = strchr(buf, '#');
8457
8458                         if (tmp)
8459                                 *tmp = '\0';
8460
8461                         ret = trace_run_command(buf, createfn);
8462                         if (ret)
8463                                 goto out;
8464                         buf += size;
8465
8466                 } while (done < count);
8467         }
8468         ret = done;
8469
8470 out:
8471         kfree(kbuf);
8472
8473         return ret;
8474 }
8475
8476 __init static int tracer_alloc_buffers(void)
8477 {
8478         int ring_buf_size;
8479         int ret = -ENOMEM;
8480
8481         /*
8482          * Make sure we don't accidently add more trace options
8483          * than we have bits for.
8484          */
8485         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8486
8487         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8488                 goto out;
8489
8490         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8491                 goto out_free_buffer_mask;
8492
8493         /* Only allocate trace_printk buffers if a trace_printk exists */
8494         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8495                 /* Must be called before global_trace.buffer is allocated */
8496                 trace_printk_init_buffers();
8497
8498         /* To save memory, keep the ring buffer size to its minimum */
8499         if (ring_buffer_expanded)
8500                 ring_buf_size = trace_buf_size;
8501         else
8502                 ring_buf_size = 1;
8503
8504         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8505         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8506
8507         raw_spin_lock_init(&global_trace.start_lock);
8508
8509         /*
8510          * The prepare callbacks allocates some memory for the ring buffer. We
8511          * don't free the buffer if the if the CPU goes down. If we were to free
8512          * the buffer, then the user would lose any trace that was in the
8513          * buffer. The memory will be removed once the "instance" is removed.
8514          */
8515         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8516                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8517                                       NULL);
8518         if (ret < 0)
8519                 goto out_free_cpumask;
8520         /* Used for event triggers */
8521         ret = -ENOMEM;
8522         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8523         if (!temp_buffer)
8524                 goto out_rm_hp_state;
8525
8526         if (trace_create_savedcmd() < 0)
8527                 goto out_free_temp_buffer;
8528
8529         /* TODO: make the number of buffers hot pluggable with CPUS */
8530         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8531                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8532                 WARN_ON(1);
8533                 goto out_free_savedcmd;
8534         }
8535
8536         if (global_trace.buffer_disabled)
8537                 tracing_off();
8538
8539         if (trace_boot_clock) {
8540                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8541                 if (ret < 0)
8542                         pr_warn("Trace clock %s not defined, going back to default\n",
8543                                 trace_boot_clock);
8544         }
8545
8546         /*
8547          * register_tracer() might reference current_trace, so it
8548          * needs to be set before we register anything. This is
8549          * just a bootstrap of current_trace anyway.
8550          */
8551         global_trace.current_trace = &nop_trace;
8552
8553         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8554
8555         ftrace_init_global_array_ops(&global_trace);
8556
8557         init_trace_flags_index(&global_trace);
8558
8559         register_tracer(&nop_trace);
8560
8561         /* Function tracing may start here (via kernel command line) */
8562         init_function_trace();
8563
8564         /* All seems OK, enable tracing */
8565         tracing_disabled = 0;
8566
8567         atomic_notifier_chain_register(&panic_notifier_list,
8568                                        &trace_panic_notifier);
8569
8570         register_die_notifier(&trace_die_notifier);
8571
8572         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8573
8574         INIT_LIST_HEAD(&global_trace.systems);
8575         INIT_LIST_HEAD(&global_trace.events);
8576         INIT_LIST_HEAD(&global_trace.hist_vars);
8577         list_add(&global_trace.list, &ftrace_trace_arrays);
8578
8579         apply_trace_boot_options();
8580
8581         register_snapshot_cmd();
8582
8583         return 0;
8584
8585 out_free_savedcmd:
8586         free_saved_cmdlines_buffer(savedcmd);
8587 out_free_temp_buffer:
8588         ring_buffer_free(temp_buffer);
8589 out_rm_hp_state:
8590         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8591 out_free_cpumask:
8592         free_cpumask_var(global_trace.tracing_cpumask);
8593 out_free_buffer_mask:
8594         free_cpumask_var(tracing_buffer_mask);
8595 out:
8596         return ret;
8597 }
8598
8599 void __init early_trace_init(void)
8600 {
8601         if (tracepoint_printk) {
8602                 tracepoint_print_iter =
8603                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8604                 if (WARN_ON(!tracepoint_print_iter))
8605                         tracepoint_printk = 0;
8606                 else
8607                         static_key_enable(&tracepoint_printk_key.key);
8608         }
8609         tracer_alloc_buffers();
8610 }
8611
8612 void __init trace_init(void)
8613 {
8614         trace_event_init();
8615 }
8616
8617 __init static int clear_boot_tracer(void)
8618 {
8619         /*
8620          * The default tracer at boot buffer is an init section.
8621          * This function is called in lateinit. If we did not
8622          * find the boot tracer, then clear it out, to prevent
8623          * later registration from accessing the buffer that is
8624          * about to be freed.
8625          */
8626         if (!default_bootup_tracer)
8627                 return 0;
8628
8629         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8630                default_bootup_tracer);
8631         default_bootup_tracer = NULL;
8632
8633         return 0;
8634 }
8635
8636 fs_initcall(tracer_init_tracefs);
8637 late_initcall_sync(clear_boot_tracer);
8638
8639 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8640 __init static int tracing_set_default_clock(void)
8641 {
8642         /* sched_clock_stable() is determined in late_initcall */
8643         if (!trace_boot_clock && !sched_clock_stable()) {
8644                 printk(KERN_WARNING
8645                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
8646                        "If you want to keep using the local clock, then add:\n"
8647                        "  \"trace_clock=local\"\n"
8648                        "on the kernel command line\n");
8649                 tracing_set_clock(&global_trace, "global");
8650         }
8651
8652         return 0;
8653 }
8654 late_initcall_sync(tracing_set_default_clock);
8655 #endif