selftests: ftrace: Add a testcase for types of kprobe event
[platform/kernel/linux-rpi.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74         { }
75 };
76
77 static int
78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80         return 0;
81 }
82
83 /*
84  * To prevent the comm cache from being overwritten when no
85  * tracing is active, only save the comm when a trace event
86  * occurred.
87  */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89
90 /*
91  * Kill all tracing for good (never come back).
92  * It is initialized to 1 but will turn to zero if the initialization
93  * of the tracer is successful. But that is the only place that sets
94  * this back to zero.
95  */
96 static int tracing_disabled = 1;
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124         struct module                   *mod;
125         unsigned long                   length;
126 };
127
128 union trace_enum_map_item;
129
130 struct trace_enum_map_tail {
131         /*
132          * "end" is first and points to NULL as it must be different
133          * than "mod" or "enum_string"
134          */
135         union trace_enum_map_item       *next;
136         const char                      *end;   /* points to NULL */
137 };
138
139 static DEFINE_MUTEX(trace_enum_mutex);
140
141 /*
142  * The trace_enum_maps are saved in an array with two extra elements,
143  * one at the beginning, and one at the end. The beginning item contains
144  * the count of the saved maps (head.length), and the module they
145  * belong to if not built in (head.mod). The ending item contains a
146  * pointer to the next array of saved enum_map items.
147  */
148 union trace_enum_map_item {
149         struct trace_enum_map           map;
150         struct trace_enum_map_head      head;
151         struct trace_enum_map_tail      tail;
152 };
153
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158
159 #define MAX_TRACER_SIZE         100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162
163 static bool allocate_snapshot;
164
165 static int __init set_cmdline_ftrace(char *str)
166 {
167         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168         default_bootup_tracer = bootup_tracer_buf;
169         /* We are using ftrace early, expand it */
170         ring_buffer_expanded = true;
171         return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174
175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177         if (*str++ != '=' || !*str) {
178                 ftrace_dump_on_oops = DUMP_ALL;
179                 return 1;
180         }
181
182         if (!strcmp("orig_cpu", str)) {
183                 ftrace_dump_on_oops = DUMP_ORIG;
184                 return 1;
185         }
186
187         return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190
191 static int __init stop_trace_on_warning(char *str)
192 {
193         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194                 __disable_trace_on_warning = 1;
195         return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198
199 static int __init boot_alloc_snapshot(char *str)
200 {
201         allocate_snapshot = true;
202         /* We also need the main ring buffer expanded */
203         ring_buffer_expanded = true;
204         return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207
208
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210
211 static int __init set_trace_boot_options(char *str)
212 {
213         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214         return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220
221 static int __init set_trace_boot_clock(char *str)
222 {
223         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224         trace_boot_clock = trace_boot_clock_buf;
225         return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228
229 static int __init set_tracepoint_printk(char *str)
230 {
231         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
232                 tracepoint_printk = 1;
233         return 1;
234 }
235 __setup("tp_printk", set_tracepoint_printk);
236
237 unsigned long long ns2usecs(cycle_t nsec)
238 {
239         nsec += 500;
240         do_div(nsec, 1000);
241         return nsec;
242 }
243
244 /* trace_flags holds trace_options default values */
245 #define TRACE_DEFAULT_FLAGS                                             \
246         (FUNCTION_DEFAULT_FLAGS |                                       \
247          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
248          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
249          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
250          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
251
252 /* trace_options that are only supported by global_trace */
253 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
254                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
255
256 /* trace_flags that are default zero for instances */
257 #define ZEROED_TRACE_FLAGS \
258         TRACE_ITER_EVENT_FORK
259
260 /*
261  * The global_trace is the descriptor that holds the tracing
262  * buffers for the live tracing. For each CPU, it contains
263  * a link list of pages that will store trace entries. The
264  * page descriptor of the pages in the memory is used to hold
265  * the link list by linking the lru item in the page descriptor
266  * to each of the pages in the buffer per CPU.
267  *
268  * For each active CPU there is a data field that holds the
269  * pages for the buffer for that CPU. Each CPU has the same number
270  * of pages allocated for its buffer.
271  */
272 static struct trace_array global_trace = {
273         .trace_flags = TRACE_DEFAULT_FLAGS,
274 };
275
276 LIST_HEAD(ftrace_trace_arrays);
277
278 int trace_array_get(struct trace_array *this_tr)
279 {
280         struct trace_array *tr;
281         int ret = -ENODEV;
282
283         mutex_lock(&trace_types_lock);
284         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285                 if (tr == this_tr) {
286                         tr->ref++;
287                         ret = 0;
288                         break;
289                 }
290         }
291         mutex_unlock(&trace_types_lock);
292
293         return ret;
294 }
295
296 static void __trace_array_put(struct trace_array *this_tr)
297 {
298         WARN_ON(!this_tr->ref);
299         this_tr->ref--;
300 }
301
302 void trace_array_put(struct trace_array *this_tr)
303 {
304         mutex_lock(&trace_types_lock);
305         __trace_array_put(this_tr);
306         mutex_unlock(&trace_types_lock);
307 }
308
309 int call_filter_check_discard(struct trace_event_call *call, void *rec,
310                               struct ring_buffer *buffer,
311                               struct ring_buffer_event *event)
312 {
313         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
314             !filter_match_preds(call->filter, rec)) {
315                 __trace_event_discard_commit(buffer, event);
316                 return 1;
317         }
318
319         return 0;
320 }
321
322 void trace_free_pid_list(struct trace_pid_list *pid_list)
323 {
324         vfree(pid_list->pids);
325         kfree(pid_list);
326 }
327
328 /**
329  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
330  * @filtered_pids: The list of pids to check
331  * @search_pid: The PID to find in @filtered_pids
332  *
333  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
334  */
335 bool
336 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
337 {
338         /*
339          * If pid_max changed after filtered_pids was created, we
340          * by default ignore all pids greater than the previous pid_max.
341          */
342         if (search_pid >= filtered_pids->pid_max)
343                 return false;
344
345         return test_bit(search_pid, filtered_pids->pids);
346 }
347
348 /**
349  * trace_ignore_this_task - should a task be ignored for tracing
350  * @filtered_pids: The list of pids to check
351  * @task: The task that should be ignored if not filtered
352  *
353  * Checks if @task should be traced or not from @filtered_pids.
354  * Returns true if @task should *NOT* be traced.
355  * Returns false if @task should be traced.
356  */
357 bool
358 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
359 {
360         /*
361          * Return false, because if filtered_pids does not exist,
362          * all pids are good to trace.
363          */
364         if (!filtered_pids)
365                 return false;
366
367         return !trace_find_filtered_pid(filtered_pids, task->pid);
368 }
369
370 /**
371  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
372  * @pid_list: The list to modify
373  * @self: The current task for fork or NULL for exit
374  * @task: The task to add or remove
375  *
376  * If adding a task, if @self is defined, the task is only added if @self
377  * is also included in @pid_list. This happens on fork and tasks should
378  * only be added when the parent is listed. If @self is NULL, then the
379  * @task pid will be removed from the list, which would happen on exit
380  * of a task.
381  */
382 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
383                                   struct task_struct *self,
384                                   struct task_struct *task)
385 {
386         if (!pid_list)
387                 return;
388
389         /* For forks, we only add if the forking task is listed */
390         if (self) {
391                 if (!trace_find_filtered_pid(pid_list, self->pid))
392                         return;
393         }
394
395         /* Sorry, but we don't support pid_max changing after setting */
396         if (task->pid >= pid_list->pid_max)
397                 return;
398
399         /* "self" is set for forks, and NULL for exits */
400         if (self)
401                 set_bit(task->pid, pid_list->pids);
402         else
403                 clear_bit(task->pid, pid_list->pids);
404 }
405
406 /**
407  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
408  * @pid_list: The pid list to show
409  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
410  * @pos: The position of the file
411  *
412  * This is used by the seq_file "next" operation to iterate the pids
413  * listed in a trace_pid_list structure.
414  *
415  * Returns the pid+1 as we want to display pid of zero, but NULL would
416  * stop the iteration.
417  */
418 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
419 {
420         unsigned long pid = (unsigned long)v;
421
422         (*pos)++;
423
424         /* pid already is +1 of the actual prevous bit */
425         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
426
427         /* Return pid + 1 to allow zero to be represented */
428         if (pid < pid_list->pid_max)
429                 return (void *)(pid + 1);
430
431         return NULL;
432 }
433
434 /**
435  * trace_pid_start - Used for seq_file to start reading pid lists
436  * @pid_list: The pid list to show
437  * @pos: The position of the file
438  *
439  * This is used by seq_file "start" operation to start the iteration
440  * of listing pids.
441  *
442  * Returns the pid+1 as we want to display pid of zero, but NULL would
443  * stop the iteration.
444  */
445 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
446 {
447         unsigned long pid;
448         loff_t l = 0;
449
450         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
451         if (pid >= pid_list->pid_max)
452                 return NULL;
453
454         /* Return pid + 1 so that zero can be the exit value */
455         for (pid++; pid && l < *pos;
456              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
457                 ;
458         return (void *)pid;
459 }
460
461 /**
462  * trace_pid_show - show the current pid in seq_file processing
463  * @m: The seq_file structure to write into
464  * @v: A void pointer of the pid (+1) value to display
465  *
466  * Can be directly used by seq_file operations to display the current
467  * pid value.
468  */
469 int trace_pid_show(struct seq_file *m, void *v)
470 {
471         unsigned long pid = (unsigned long)v - 1;
472
473         seq_printf(m, "%lu\n", pid);
474         return 0;
475 }
476
477 /* 128 should be much more than enough */
478 #define PID_BUF_SIZE            127
479
480 int trace_pid_write(struct trace_pid_list *filtered_pids,
481                     struct trace_pid_list **new_pid_list,
482                     const char __user *ubuf, size_t cnt)
483 {
484         struct trace_pid_list *pid_list;
485         struct trace_parser parser;
486         unsigned long val;
487         int nr_pids = 0;
488         ssize_t read = 0;
489         ssize_t ret = 0;
490         loff_t pos;
491         pid_t pid;
492
493         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
494                 return -ENOMEM;
495
496         /*
497          * Always recreate a new array. The write is an all or nothing
498          * operation. Always create a new array when adding new pids by
499          * the user. If the operation fails, then the current list is
500          * not modified.
501          */
502         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
503         if (!pid_list)
504                 return -ENOMEM;
505
506         pid_list->pid_max = READ_ONCE(pid_max);
507
508         /* Only truncating will shrink pid_max */
509         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
510                 pid_list->pid_max = filtered_pids->pid_max;
511
512         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
513         if (!pid_list->pids) {
514                 kfree(pid_list);
515                 return -ENOMEM;
516         }
517
518         if (filtered_pids) {
519                 /* copy the current bits to the new max */
520                 for_each_set_bit(pid, filtered_pids->pids,
521                                  filtered_pids->pid_max) {
522                         set_bit(pid, pid_list->pids);
523                         nr_pids++;
524                 }
525         }
526
527         while (cnt > 0) {
528
529                 pos = 0;
530
531                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
532                 if (ret < 0 || !trace_parser_loaded(&parser))
533                         break;
534
535                 read += ret;
536                 ubuf += ret;
537                 cnt -= ret;
538
539                 parser.buffer[parser.idx] = 0;
540
541                 ret = -EINVAL;
542                 if (kstrtoul(parser.buffer, 0, &val))
543                         break;
544                 if (val >= pid_list->pid_max)
545                         break;
546
547                 pid = (pid_t)val;
548
549                 set_bit(pid, pid_list->pids);
550                 nr_pids++;
551
552                 trace_parser_clear(&parser);
553                 ret = 0;
554         }
555         trace_parser_put(&parser);
556
557         if (ret < 0) {
558                 trace_free_pid_list(pid_list);
559                 return ret;
560         }
561
562         if (!nr_pids) {
563                 /* Cleared the list of pids */
564                 trace_free_pid_list(pid_list);
565                 read = ret;
566                 pid_list = NULL;
567         }
568
569         *new_pid_list = pid_list;
570
571         return read;
572 }
573
574 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
575 {
576         u64 ts;
577
578         /* Early boot up does not have a buffer yet */
579         if (!buf->buffer)
580                 return trace_clock_local();
581
582         ts = ring_buffer_time_stamp(buf->buffer, cpu);
583         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
584
585         return ts;
586 }
587
588 cycle_t ftrace_now(int cpu)
589 {
590         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
591 }
592
593 /**
594  * tracing_is_enabled - Show if global_trace has been disabled
595  *
596  * Shows if the global trace has been enabled or not. It uses the
597  * mirror flag "buffer_disabled" to be used in fast paths such as for
598  * the irqsoff tracer. But it may be inaccurate due to races. If you
599  * need to know the accurate state, use tracing_is_on() which is a little
600  * slower, but accurate.
601  */
602 int tracing_is_enabled(void)
603 {
604         /*
605          * For quick access (irqsoff uses this in fast path), just
606          * return the mirror variable of the state of the ring buffer.
607          * It's a little racy, but we don't really care.
608          */
609         smp_rmb();
610         return !global_trace.buffer_disabled;
611 }
612
613 /*
614  * trace_buf_size is the size in bytes that is allocated
615  * for a buffer. Note, the number of bytes is always rounded
616  * to page size.
617  *
618  * This number is purposely set to a low number of 16384.
619  * If the dump on oops happens, it will be much appreciated
620  * to not have to wait for all that output. Anyway this can be
621  * boot time and run time configurable.
622  */
623 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
624
625 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
626
627 /* trace_types holds a link list of available tracers. */
628 static struct tracer            *trace_types __read_mostly;
629
630 /*
631  * trace_types_lock is used to protect the trace_types list.
632  */
633 DEFINE_MUTEX(trace_types_lock);
634
635 /*
636  * serialize the access of the ring buffer
637  *
638  * ring buffer serializes readers, but it is low level protection.
639  * The validity of the events (which returns by ring_buffer_peek() ..etc)
640  * are not protected by ring buffer.
641  *
642  * The content of events may become garbage if we allow other process consumes
643  * these events concurrently:
644  *   A) the page of the consumed events may become a normal page
645  *      (not reader page) in ring buffer, and this page will be rewrited
646  *      by events producer.
647  *   B) The page of the consumed events may become a page for splice_read,
648  *      and this page will be returned to system.
649  *
650  * These primitives allow multi process access to different cpu ring buffer
651  * concurrently.
652  *
653  * These primitives don't distinguish read-only and read-consume access.
654  * Multi read-only access are also serialized.
655  */
656
657 #ifdef CONFIG_SMP
658 static DECLARE_RWSEM(all_cpu_access_lock);
659 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
660
661 static inline void trace_access_lock(int cpu)
662 {
663         if (cpu == RING_BUFFER_ALL_CPUS) {
664                 /* gain it for accessing the whole ring buffer. */
665                 down_write(&all_cpu_access_lock);
666         } else {
667                 /* gain it for accessing a cpu ring buffer. */
668
669                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
670                 down_read(&all_cpu_access_lock);
671
672                 /* Secondly block other access to this @cpu ring buffer. */
673                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
674         }
675 }
676
677 static inline void trace_access_unlock(int cpu)
678 {
679         if (cpu == RING_BUFFER_ALL_CPUS) {
680                 up_write(&all_cpu_access_lock);
681         } else {
682                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
683                 up_read(&all_cpu_access_lock);
684         }
685 }
686
687 static inline void trace_access_lock_init(void)
688 {
689         int cpu;
690
691         for_each_possible_cpu(cpu)
692                 mutex_init(&per_cpu(cpu_access_lock, cpu));
693 }
694
695 #else
696
697 static DEFINE_MUTEX(access_lock);
698
699 static inline void trace_access_lock(int cpu)
700 {
701         (void)cpu;
702         mutex_lock(&access_lock);
703 }
704
705 static inline void trace_access_unlock(int cpu)
706 {
707         (void)cpu;
708         mutex_unlock(&access_lock);
709 }
710
711 static inline void trace_access_lock_init(void)
712 {
713 }
714
715 #endif
716
717 #ifdef CONFIG_STACKTRACE
718 static void __ftrace_trace_stack(struct ring_buffer *buffer,
719                                  unsigned long flags,
720                                  int skip, int pc, struct pt_regs *regs);
721 static inline void ftrace_trace_stack(struct trace_array *tr,
722                                       struct ring_buffer *buffer,
723                                       unsigned long flags,
724                                       int skip, int pc, struct pt_regs *regs);
725
726 #else
727 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
728                                         unsigned long flags,
729                                         int skip, int pc, struct pt_regs *regs)
730 {
731 }
732 static inline void ftrace_trace_stack(struct trace_array *tr,
733                                       struct ring_buffer *buffer,
734                                       unsigned long flags,
735                                       int skip, int pc, struct pt_regs *regs)
736 {
737 }
738
739 #endif
740
741 static void tracer_tracing_on(struct trace_array *tr)
742 {
743         if (tr->trace_buffer.buffer)
744                 ring_buffer_record_on(tr->trace_buffer.buffer);
745         /*
746          * This flag is looked at when buffers haven't been allocated
747          * yet, or by some tracers (like irqsoff), that just want to
748          * know if the ring buffer has been disabled, but it can handle
749          * races of where it gets disabled but we still do a record.
750          * As the check is in the fast path of the tracers, it is more
751          * important to be fast than accurate.
752          */
753         tr->buffer_disabled = 0;
754         /* Make the flag seen by readers */
755         smp_wmb();
756 }
757
758 /**
759  * tracing_on - enable tracing buffers
760  *
761  * This function enables tracing buffers that may have been
762  * disabled with tracing_off.
763  */
764 void tracing_on(void)
765 {
766         tracer_tracing_on(&global_trace);
767 }
768 EXPORT_SYMBOL_GPL(tracing_on);
769
770 /**
771  * __trace_puts - write a constant string into the trace buffer.
772  * @ip:    The address of the caller
773  * @str:   The constant string to write
774  * @size:  The size of the string.
775  */
776 int __trace_puts(unsigned long ip, const char *str, int size)
777 {
778         struct ring_buffer_event *event;
779         struct ring_buffer *buffer;
780         struct print_entry *entry;
781         unsigned long irq_flags;
782         int alloc;
783         int pc;
784
785         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
786                 return 0;
787
788         pc = preempt_count();
789
790         if (unlikely(tracing_selftest_running || tracing_disabled))
791                 return 0;
792
793         alloc = sizeof(*entry) + size + 2; /* possible \n added */
794
795         local_save_flags(irq_flags);
796         buffer = global_trace.trace_buffer.buffer;
797         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
798                                           irq_flags, pc);
799         if (!event)
800                 return 0;
801
802         entry = ring_buffer_event_data(event);
803         entry->ip = ip;
804
805         memcpy(&entry->buf, str, size);
806
807         /* Add a newline if necessary */
808         if (entry->buf[size - 1] != '\n') {
809                 entry->buf[size] = '\n';
810                 entry->buf[size + 1] = '\0';
811         } else
812                 entry->buf[size] = '\0';
813
814         __buffer_unlock_commit(buffer, event);
815         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
816
817         return size;
818 }
819 EXPORT_SYMBOL_GPL(__trace_puts);
820
821 /**
822  * __trace_bputs - write the pointer to a constant string into trace buffer
823  * @ip:    The address of the caller
824  * @str:   The constant string to write to the buffer to
825  */
826 int __trace_bputs(unsigned long ip, const char *str)
827 {
828         struct ring_buffer_event *event;
829         struct ring_buffer *buffer;
830         struct bputs_entry *entry;
831         unsigned long irq_flags;
832         int size = sizeof(struct bputs_entry);
833         int pc;
834
835         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
836                 return 0;
837
838         pc = preempt_count();
839
840         if (unlikely(tracing_selftest_running || tracing_disabled))
841                 return 0;
842
843         local_save_flags(irq_flags);
844         buffer = global_trace.trace_buffer.buffer;
845         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
846                                           irq_flags, pc);
847         if (!event)
848                 return 0;
849
850         entry = ring_buffer_event_data(event);
851         entry->ip                       = ip;
852         entry->str                      = str;
853
854         __buffer_unlock_commit(buffer, event);
855         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
856
857         return 1;
858 }
859 EXPORT_SYMBOL_GPL(__trace_bputs);
860
861 #ifdef CONFIG_TRACER_SNAPSHOT
862 /**
863  * trace_snapshot - take a snapshot of the current buffer.
864  *
865  * This causes a swap between the snapshot buffer and the current live
866  * tracing buffer. You can use this to take snapshots of the live
867  * trace when some condition is triggered, but continue to trace.
868  *
869  * Note, make sure to allocate the snapshot with either
870  * a tracing_snapshot_alloc(), or by doing it manually
871  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
872  *
873  * If the snapshot buffer is not allocated, it will stop tracing.
874  * Basically making a permanent snapshot.
875  */
876 void tracing_snapshot(void)
877 {
878         struct trace_array *tr = &global_trace;
879         struct tracer *tracer = tr->current_trace;
880         unsigned long flags;
881
882         if (in_nmi()) {
883                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
884                 internal_trace_puts("*** snapshot is being ignored        ***\n");
885                 return;
886         }
887
888         if (!tr->allocated_snapshot) {
889                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
890                 internal_trace_puts("*** stopping trace here!   ***\n");
891                 tracing_off();
892                 return;
893         }
894
895         /* Note, snapshot can not be used when the tracer uses it */
896         if (tracer->use_max_tr) {
897                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
898                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
899                 return;
900         }
901
902         local_irq_save(flags);
903         update_max_tr(tr, current, smp_processor_id());
904         local_irq_restore(flags);
905 }
906 EXPORT_SYMBOL_GPL(tracing_snapshot);
907
908 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
909                                         struct trace_buffer *size_buf, int cpu_id);
910 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
911
912 static int alloc_snapshot(struct trace_array *tr)
913 {
914         int ret;
915
916         if (!tr->allocated_snapshot) {
917
918                 /* allocate spare buffer */
919                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
920                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
921                 if (ret < 0)
922                         return ret;
923
924                 tr->allocated_snapshot = true;
925         }
926
927         return 0;
928 }
929
930 static void free_snapshot(struct trace_array *tr)
931 {
932         /*
933          * We don't free the ring buffer. instead, resize it because
934          * The max_tr ring buffer has some state (e.g. ring->clock) and
935          * we want preserve it.
936          */
937         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
938         set_buffer_entries(&tr->max_buffer, 1);
939         tracing_reset_online_cpus(&tr->max_buffer);
940         tr->allocated_snapshot = false;
941 }
942
943 /**
944  * tracing_alloc_snapshot - allocate snapshot buffer.
945  *
946  * This only allocates the snapshot buffer if it isn't already
947  * allocated - it doesn't also take a snapshot.
948  *
949  * This is meant to be used in cases where the snapshot buffer needs
950  * to be set up for events that can't sleep but need to be able to
951  * trigger a snapshot.
952  */
953 int tracing_alloc_snapshot(void)
954 {
955         struct trace_array *tr = &global_trace;
956         int ret;
957
958         ret = alloc_snapshot(tr);
959         WARN_ON(ret < 0);
960
961         return ret;
962 }
963 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
964
965 /**
966  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
967  *
968  * This is similar to trace_snapshot(), but it will allocate the
969  * snapshot buffer if it isn't already allocated. Use this only
970  * where it is safe to sleep, as the allocation may sleep.
971  *
972  * This causes a swap between the snapshot buffer and the current live
973  * tracing buffer. You can use this to take snapshots of the live
974  * trace when some condition is triggered, but continue to trace.
975  */
976 void tracing_snapshot_alloc(void)
977 {
978         int ret;
979
980         ret = tracing_alloc_snapshot();
981         if (ret < 0)
982                 return;
983
984         tracing_snapshot();
985 }
986 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
987 #else
988 void tracing_snapshot(void)
989 {
990         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
991 }
992 EXPORT_SYMBOL_GPL(tracing_snapshot);
993 int tracing_alloc_snapshot(void)
994 {
995         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
996         return -ENODEV;
997 }
998 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
999 void tracing_snapshot_alloc(void)
1000 {
1001         /* Give warning */
1002         tracing_snapshot();
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1005 #endif /* CONFIG_TRACER_SNAPSHOT */
1006
1007 static void tracer_tracing_off(struct trace_array *tr)
1008 {
1009         if (tr->trace_buffer.buffer)
1010                 ring_buffer_record_off(tr->trace_buffer.buffer);
1011         /*
1012          * This flag is looked at when buffers haven't been allocated
1013          * yet, or by some tracers (like irqsoff), that just want to
1014          * know if the ring buffer has been disabled, but it can handle
1015          * races of where it gets disabled but we still do a record.
1016          * As the check is in the fast path of the tracers, it is more
1017          * important to be fast than accurate.
1018          */
1019         tr->buffer_disabled = 1;
1020         /* Make the flag seen by readers */
1021         smp_wmb();
1022 }
1023
1024 /**
1025  * tracing_off - turn off tracing buffers
1026  *
1027  * This function stops the tracing buffers from recording data.
1028  * It does not disable any overhead the tracers themselves may
1029  * be causing. This function simply causes all recording to
1030  * the ring buffers to fail.
1031  */
1032 void tracing_off(void)
1033 {
1034         tracer_tracing_off(&global_trace);
1035 }
1036 EXPORT_SYMBOL_GPL(tracing_off);
1037
1038 void disable_trace_on_warning(void)
1039 {
1040         if (__disable_trace_on_warning)
1041                 tracing_off();
1042 }
1043
1044 /**
1045  * tracer_tracing_is_on - show real state of ring buffer enabled
1046  * @tr : the trace array to know if ring buffer is enabled
1047  *
1048  * Shows real state of the ring buffer if it is enabled or not.
1049  */
1050 int tracer_tracing_is_on(struct trace_array *tr)
1051 {
1052         if (tr->trace_buffer.buffer)
1053                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1054         return !tr->buffer_disabled;
1055 }
1056
1057 /**
1058  * tracing_is_on - show state of ring buffers enabled
1059  */
1060 int tracing_is_on(void)
1061 {
1062         return tracer_tracing_is_on(&global_trace);
1063 }
1064 EXPORT_SYMBOL_GPL(tracing_is_on);
1065
1066 static int __init set_buf_size(char *str)
1067 {
1068         unsigned long buf_size;
1069
1070         if (!str)
1071                 return 0;
1072         buf_size = memparse(str, &str);
1073         /* nr_entries can not be zero */
1074         if (buf_size == 0)
1075                 return 0;
1076         trace_buf_size = buf_size;
1077         return 1;
1078 }
1079 __setup("trace_buf_size=", set_buf_size);
1080
1081 static int __init set_tracing_thresh(char *str)
1082 {
1083         unsigned long threshold;
1084         int ret;
1085
1086         if (!str)
1087                 return 0;
1088         ret = kstrtoul(str, 0, &threshold);
1089         if (ret < 0)
1090                 return 0;
1091         tracing_thresh = threshold * 1000;
1092         return 1;
1093 }
1094 __setup("tracing_thresh=", set_tracing_thresh);
1095
1096 unsigned long nsecs_to_usecs(unsigned long nsecs)
1097 {
1098         return nsecs / 1000;
1099 }
1100
1101 /*
1102  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1103  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1104  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1105  * of strings in the order that the enums were defined.
1106  */
1107 #undef C
1108 #define C(a, b) b
1109
1110 /* These must match the bit postions in trace_iterator_flags */
1111 static const char *trace_options[] = {
1112         TRACE_FLAGS
1113         NULL
1114 };
1115
1116 static struct {
1117         u64 (*func)(void);
1118         const char *name;
1119         int in_ns;              /* is this clock in nanoseconds? */
1120 } trace_clocks[] = {
1121         { trace_clock_local,            "local",        1 },
1122         { trace_clock_global,           "global",       1 },
1123         { trace_clock_counter,          "counter",      0 },
1124         { trace_clock_jiffies,          "uptime",       0 },
1125         { trace_clock,                  "perf",         1 },
1126         { ktime_get_mono_fast_ns,       "mono",         1 },
1127         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1128         ARCH_TRACE_CLOCKS
1129 };
1130
1131 /*
1132  * trace_parser_get_init - gets the buffer for trace parser
1133  */
1134 int trace_parser_get_init(struct trace_parser *parser, int size)
1135 {
1136         memset(parser, 0, sizeof(*parser));
1137
1138         parser->buffer = kmalloc(size, GFP_KERNEL);
1139         if (!parser->buffer)
1140                 return 1;
1141
1142         parser->size = size;
1143         return 0;
1144 }
1145
1146 /*
1147  * trace_parser_put - frees the buffer for trace parser
1148  */
1149 void trace_parser_put(struct trace_parser *parser)
1150 {
1151         kfree(parser->buffer);
1152 }
1153
1154 /*
1155  * trace_get_user - reads the user input string separated by  space
1156  * (matched by isspace(ch))
1157  *
1158  * For each string found the 'struct trace_parser' is updated,
1159  * and the function returns.
1160  *
1161  * Returns number of bytes read.
1162  *
1163  * See kernel/trace/trace.h for 'struct trace_parser' details.
1164  */
1165 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1166         size_t cnt, loff_t *ppos)
1167 {
1168         char ch;
1169         size_t read = 0;
1170         ssize_t ret;
1171
1172         if (!*ppos)
1173                 trace_parser_clear(parser);
1174
1175         ret = get_user(ch, ubuf++);
1176         if (ret)
1177                 goto out;
1178
1179         read++;
1180         cnt--;
1181
1182         /*
1183          * The parser is not finished with the last write,
1184          * continue reading the user input without skipping spaces.
1185          */
1186         if (!parser->cont) {
1187                 /* skip white space */
1188                 while (cnt && isspace(ch)) {
1189                         ret = get_user(ch, ubuf++);
1190                         if (ret)
1191                                 goto out;
1192                         read++;
1193                         cnt--;
1194                 }
1195
1196                 /* only spaces were written */
1197                 if (isspace(ch)) {
1198                         *ppos += read;
1199                         ret = read;
1200                         goto out;
1201                 }
1202
1203                 parser->idx = 0;
1204         }
1205
1206         /* read the non-space input */
1207         while (cnt && !isspace(ch)) {
1208                 if (parser->idx < parser->size - 1)
1209                         parser->buffer[parser->idx++] = ch;
1210                 else {
1211                         ret = -EINVAL;
1212                         goto out;
1213                 }
1214                 ret = get_user(ch, ubuf++);
1215                 if (ret)
1216                         goto out;
1217                 read++;
1218                 cnt--;
1219         }
1220
1221         /* We either got finished input or we have to wait for another call. */
1222         if (isspace(ch)) {
1223                 parser->buffer[parser->idx] = 0;
1224                 parser->cont = false;
1225         } else if (parser->idx < parser->size - 1) {
1226                 parser->cont = true;
1227                 parser->buffer[parser->idx++] = ch;
1228         } else {
1229                 ret = -EINVAL;
1230                 goto out;
1231         }
1232
1233         *ppos += read;
1234         ret = read;
1235
1236 out:
1237         return ret;
1238 }
1239
1240 /* TODO add a seq_buf_to_buffer() */
1241 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1242 {
1243         int len;
1244
1245         if (trace_seq_used(s) <= s->seq.readpos)
1246                 return -EBUSY;
1247
1248         len = trace_seq_used(s) - s->seq.readpos;
1249         if (cnt > len)
1250                 cnt = len;
1251         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1252
1253         s->seq.readpos += cnt;
1254         return cnt;
1255 }
1256
1257 unsigned long __read_mostly     tracing_thresh;
1258
1259 #ifdef CONFIG_TRACER_MAX_TRACE
1260 /*
1261  * Copy the new maximum trace into the separate maximum-trace
1262  * structure. (this way the maximum trace is permanently saved,
1263  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1264  */
1265 static void
1266 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1267 {
1268         struct trace_buffer *trace_buf = &tr->trace_buffer;
1269         struct trace_buffer *max_buf = &tr->max_buffer;
1270         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1271         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1272
1273         max_buf->cpu = cpu;
1274         max_buf->time_start = data->preempt_timestamp;
1275
1276         max_data->saved_latency = tr->max_latency;
1277         max_data->critical_start = data->critical_start;
1278         max_data->critical_end = data->critical_end;
1279
1280         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1281         max_data->pid = tsk->pid;
1282         /*
1283          * If tsk == current, then use current_uid(), as that does not use
1284          * RCU. The irq tracer can be called out of RCU scope.
1285          */
1286         if (tsk == current)
1287                 max_data->uid = current_uid();
1288         else
1289                 max_data->uid = task_uid(tsk);
1290
1291         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1292         max_data->policy = tsk->policy;
1293         max_data->rt_priority = tsk->rt_priority;
1294
1295         /* record this tasks comm */
1296         tracing_record_cmdline(tsk);
1297 }
1298
1299 /**
1300  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1301  * @tr: tracer
1302  * @tsk: the task with the latency
1303  * @cpu: The cpu that initiated the trace.
1304  *
1305  * Flip the buffers between the @tr and the max_tr and record information
1306  * about which task was the cause of this latency.
1307  */
1308 void
1309 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1310 {
1311         struct ring_buffer *buf;
1312
1313         if (tr->stop_count)
1314                 return;
1315
1316         WARN_ON_ONCE(!irqs_disabled());
1317
1318         if (!tr->allocated_snapshot) {
1319                 /* Only the nop tracer should hit this when disabling */
1320                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1321                 return;
1322         }
1323
1324         arch_spin_lock(&tr->max_lock);
1325
1326         buf = tr->trace_buffer.buffer;
1327         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1328         tr->max_buffer.buffer = buf;
1329
1330         __update_max_tr(tr, tsk, cpu);
1331         arch_spin_unlock(&tr->max_lock);
1332 }
1333
1334 /**
1335  * update_max_tr_single - only copy one trace over, and reset the rest
1336  * @tr - tracer
1337  * @tsk - task with the latency
1338  * @cpu - the cpu of the buffer to copy.
1339  *
1340  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1341  */
1342 void
1343 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1344 {
1345         int ret;
1346
1347         if (tr->stop_count)
1348                 return;
1349
1350         WARN_ON_ONCE(!irqs_disabled());
1351         if (!tr->allocated_snapshot) {
1352                 /* Only the nop tracer should hit this when disabling */
1353                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1354                 return;
1355         }
1356
1357         arch_spin_lock(&tr->max_lock);
1358
1359         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1360
1361         if (ret == -EBUSY) {
1362                 /*
1363                  * We failed to swap the buffer due to a commit taking
1364                  * place on this CPU. We fail to record, but we reset
1365                  * the max trace buffer (no one writes directly to it)
1366                  * and flag that it failed.
1367                  */
1368                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1369                         "Failed to swap buffers due to commit in progress\n");
1370         }
1371
1372         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1373
1374         __update_max_tr(tr, tsk, cpu);
1375         arch_spin_unlock(&tr->max_lock);
1376 }
1377 #endif /* CONFIG_TRACER_MAX_TRACE */
1378
1379 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1380 {
1381         /* Iterators are static, they should be filled or empty */
1382         if (trace_buffer_iter(iter, iter->cpu_file))
1383                 return 0;
1384
1385         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1386                                 full);
1387 }
1388
1389 #ifdef CONFIG_FTRACE_STARTUP_TEST
1390 static int run_tracer_selftest(struct tracer *type)
1391 {
1392         struct trace_array *tr = &global_trace;
1393         struct tracer *saved_tracer = tr->current_trace;
1394         int ret;
1395
1396         if (!type->selftest || tracing_selftest_disabled)
1397                 return 0;
1398
1399         /*
1400          * Run a selftest on this tracer.
1401          * Here we reset the trace buffer, and set the current
1402          * tracer to be this tracer. The tracer can then run some
1403          * internal tracing to verify that everything is in order.
1404          * If we fail, we do not register this tracer.
1405          */
1406         tracing_reset_online_cpus(&tr->trace_buffer);
1407
1408         tr->current_trace = type;
1409
1410 #ifdef CONFIG_TRACER_MAX_TRACE
1411         if (type->use_max_tr) {
1412                 /* If we expanded the buffers, make sure the max is expanded too */
1413                 if (ring_buffer_expanded)
1414                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1415                                            RING_BUFFER_ALL_CPUS);
1416                 tr->allocated_snapshot = true;
1417         }
1418 #endif
1419
1420         /* the test is responsible for initializing and enabling */
1421         pr_info("Testing tracer %s: ", type->name);
1422         ret = type->selftest(type, tr);
1423         /* the test is responsible for resetting too */
1424         tr->current_trace = saved_tracer;
1425         if (ret) {
1426                 printk(KERN_CONT "FAILED!\n");
1427                 /* Add the warning after printing 'FAILED' */
1428                 WARN_ON(1);
1429                 return -1;
1430         }
1431         /* Only reset on passing, to avoid touching corrupted buffers */
1432         tracing_reset_online_cpus(&tr->trace_buffer);
1433
1434 #ifdef CONFIG_TRACER_MAX_TRACE
1435         if (type->use_max_tr) {
1436                 tr->allocated_snapshot = false;
1437
1438                 /* Shrink the max buffer again */
1439                 if (ring_buffer_expanded)
1440                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1441                                            RING_BUFFER_ALL_CPUS);
1442         }
1443 #endif
1444
1445         printk(KERN_CONT "PASSED\n");
1446         return 0;
1447 }
1448 #else
1449 static inline int run_tracer_selftest(struct tracer *type)
1450 {
1451         return 0;
1452 }
1453 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1454
1455 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1456
1457 static void __init apply_trace_boot_options(void);
1458
1459 /**
1460  * register_tracer - register a tracer with the ftrace system.
1461  * @type - the plugin for the tracer
1462  *
1463  * Register a new plugin tracer.
1464  */
1465 int __init register_tracer(struct tracer *type)
1466 {
1467         struct tracer *t;
1468         int ret = 0;
1469
1470         if (!type->name) {
1471                 pr_info("Tracer must have a name\n");
1472                 return -1;
1473         }
1474
1475         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1476                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1477                 return -1;
1478         }
1479
1480         mutex_lock(&trace_types_lock);
1481
1482         tracing_selftest_running = true;
1483
1484         for (t = trace_types; t; t = t->next) {
1485                 if (strcmp(type->name, t->name) == 0) {
1486                         /* already found */
1487                         pr_info("Tracer %s already registered\n",
1488                                 type->name);
1489                         ret = -1;
1490                         goto out;
1491                 }
1492         }
1493
1494         if (!type->set_flag)
1495                 type->set_flag = &dummy_set_flag;
1496         if (!type->flags) {
1497                 /*allocate a dummy tracer_flags*/
1498                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1499                 if (!type->flags) {
1500                         ret = -ENOMEM;
1501                         goto out;
1502                 }
1503                 type->flags->val = 0;
1504                 type->flags->opts = dummy_tracer_opt;
1505         } else
1506                 if (!type->flags->opts)
1507                         type->flags->opts = dummy_tracer_opt;
1508
1509         /* store the tracer for __set_tracer_option */
1510         type->flags->trace = type;
1511
1512         ret = run_tracer_selftest(type);
1513         if (ret < 0)
1514                 goto out;
1515
1516         type->next = trace_types;
1517         trace_types = type;
1518         add_tracer_options(&global_trace, type);
1519
1520  out:
1521         tracing_selftest_running = false;
1522         mutex_unlock(&trace_types_lock);
1523
1524         if (ret || !default_bootup_tracer)
1525                 goto out_unlock;
1526
1527         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1528                 goto out_unlock;
1529
1530         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1531         /* Do we want this tracer to start on bootup? */
1532         tracing_set_tracer(&global_trace, type->name);
1533         default_bootup_tracer = NULL;
1534
1535         apply_trace_boot_options();
1536
1537         /* disable other selftests, since this will break it. */
1538         tracing_selftest_disabled = true;
1539 #ifdef CONFIG_FTRACE_STARTUP_TEST
1540         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1541                type->name);
1542 #endif
1543
1544  out_unlock:
1545         return ret;
1546 }
1547
1548 void tracing_reset(struct trace_buffer *buf, int cpu)
1549 {
1550         struct ring_buffer *buffer = buf->buffer;
1551
1552         if (!buffer)
1553                 return;
1554
1555         ring_buffer_record_disable(buffer);
1556
1557         /* Make sure all commits have finished */
1558         synchronize_sched();
1559         ring_buffer_reset_cpu(buffer, cpu);
1560
1561         ring_buffer_record_enable(buffer);
1562 }
1563
1564 void tracing_reset_online_cpus(struct trace_buffer *buf)
1565 {
1566         struct ring_buffer *buffer = buf->buffer;
1567         int cpu;
1568
1569         if (!buffer)
1570                 return;
1571
1572         ring_buffer_record_disable(buffer);
1573
1574         /* Make sure all commits have finished */
1575         synchronize_sched();
1576
1577         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1578
1579         for_each_online_cpu(cpu)
1580                 ring_buffer_reset_cpu(buffer, cpu);
1581
1582         ring_buffer_record_enable(buffer);
1583 }
1584
1585 /* Must have trace_types_lock held */
1586 void tracing_reset_all_online_cpus(void)
1587 {
1588         struct trace_array *tr;
1589
1590         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1591                 tracing_reset_online_cpus(&tr->trace_buffer);
1592 #ifdef CONFIG_TRACER_MAX_TRACE
1593                 tracing_reset_online_cpus(&tr->max_buffer);
1594 #endif
1595         }
1596 }
1597
1598 #define SAVED_CMDLINES_DEFAULT 128
1599 #define NO_CMDLINE_MAP UINT_MAX
1600 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1601 struct saved_cmdlines_buffer {
1602         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1603         unsigned *map_cmdline_to_pid;
1604         unsigned cmdline_num;
1605         int cmdline_idx;
1606         char *saved_cmdlines;
1607 };
1608 static struct saved_cmdlines_buffer *savedcmd;
1609
1610 /* temporary disable recording */
1611 static atomic_t trace_record_cmdline_disabled __read_mostly;
1612
1613 static inline char *get_saved_cmdlines(int idx)
1614 {
1615         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1616 }
1617
1618 static inline void set_cmdline(int idx, const char *cmdline)
1619 {
1620         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1621 }
1622
1623 static int allocate_cmdlines_buffer(unsigned int val,
1624                                     struct saved_cmdlines_buffer *s)
1625 {
1626         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1627                                         GFP_KERNEL);
1628         if (!s->map_cmdline_to_pid)
1629                 return -ENOMEM;
1630
1631         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1632         if (!s->saved_cmdlines) {
1633                 kfree(s->map_cmdline_to_pid);
1634                 return -ENOMEM;
1635         }
1636
1637         s->cmdline_idx = 0;
1638         s->cmdline_num = val;
1639         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1640                sizeof(s->map_pid_to_cmdline));
1641         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1642                val * sizeof(*s->map_cmdline_to_pid));
1643
1644         return 0;
1645 }
1646
1647 static int trace_create_savedcmd(void)
1648 {
1649         int ret;
1650
1651         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1652         if (!savedcmd)
1653                 return -ENOMEM;
1654
1655         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1656         if (ret < 0) {
1657                 kfree(savedcmd);
1658                 savedcmd = NULL;
1659                 return -ENOMEM;
1660         }
1661
1662         return 0;
1663 }
1664
1665 int is_tracing_stopped(void)
1666 {
1667         return global_trace.stop_count;
1668 }
1669
1670 /**
1671  * tracing_start - quick start of the tracer
1672  *
1673  * If tracing is enabled but was stopped by tracing_stop,
1674  * this will start the tracer back up.
1675  */
1676 void tracing_start(void)
1677 {
1678         struct ring_buffer *buffer;
1679         unsigned long flags;
1680
1681         if (tracing_disabled)
1682                 return;
1683
1684         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1685         if (--global_trace.stop_count) {
1686                 if (global_trace.stop_count < 0) {
1687                         /* Someone screwed up their debugging */
1688                         WARN_ON_ONCE(1);
1689                         global_trace.stop_count = 0;
1690                 }
1691                 goto out;
1692         }
1693
1694         /* Prevent the buffers from switching */
1695         arch_spin_lock(&global_trace.max_lock);
1696
1697         buffer = global_trace.trace_buffer.buffer;
1698         if (buffer)
1699                 ring_buffer_record_enable(buffer);
1700
1701 #ifdef CONFIG_TRACER_MAX_TRACE
1702         buffer = global_trace.max_buffer.buffer;
1703         if (buffer)
1704                 ring_buffer_record_enable(buffer);
1705 #endif
1706
1707         arch_spin_unlock(&global_trace.max_lock);
1708
1709  out:
1710         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1711 }
1712
1713 static void tracing_start_tr(struct trace_array *tr)
1714 {
1715         struct ring_buffer *buffer;
1716         unsigned long flags;
1717
1718         if (tracing_disabled)
1719                 return;
1720
1721         /* If global, we need to also start the max tracer */
1722         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1723                 return tracing_start();
1724
1725         raw_spin_lock_irqsave(&tr->start_lock, flags);
1726
1727         if (--tr->stop_count) {
1728                 if (tr->stop_count < 0) {
1729                         /* Someone screwed up their debugging */
1730                         WARN_ON_ONCE(1);
1731                         tr->stop_count = 0;
1732                 }
1733                 goto out;
1734         }
1735
1736         buffer = tr->trace_buffer.buffer;
1737         if (buffer)
1738                 ring_buffer_record_enable(buffer);
1739
1740  out:
1741         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1742 }
1743
1744 /**
1745  * tracing_stop - quick stop of the tracer
1746  *
1747  * Light weight way to stop tracing. Use in conjunction with
1748  * tracing_start.
1749  */
1750 void tracing_stop(void)
1751 {
1752         struct ring_buffer *buffer;
1753         unsigned long flags;
1754
1755         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1756         if (global_trace.stop_count++)
1757                 goto out;
1758
1759         /* Prevent the buffers from switching */
1760         arch_spin_lock(&global_trace.max_lock);
1761
1762         buffer = global_trace.trace_buffer.buffer;
1763         if (buffer)
1764                 ring_buffer_record_disable(buffer);
1765
1766 #ifdef CONFIG_TRACER_MAX_TRACE
1767         buffer = global_trace.max_buffer.buffer;
1768         if (buffer)
1769                 ring_buffer_record_disable(buffer);
1770 #endif
1771
1772         arch_spin_unlock(&global_trace.max_lock);
1773
1774  out:
1775         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1776 }
1777
1778 static void tracing_stop_tr(struct trace_array *tr)
1779 {
1780         struct ring_buffer *buffer;
1781         unsigned long flags;
1782
1783         /* If global, we need to also stop the max tracer */
1784         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1785                 return tracing_stop();
1786
1787         raw_spin_lock_irqsave(&tr->start_lock, flags);
1788         if (tr->stop_count++)
1789                 goto out;
1790
1791         buffer = tr->trace_buffer.buffer;
1792         if (buffer)
1793                 ring_buffer_record_disable(buffer);
1794
1795  out:
1796         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1797 }
1798
1799 void trace_stop_cmdline_recording(void);
1800
1801 static int trace_save_cmdline(struct task_struct *tsk)
1802 {
1803         unsigned pid, idx;
1804
1805         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1806                 return 0;
1807
1808         /*
1809          * It's not the end of the world if we don't get
1810          * the lock, but we also don't want to spin
1811          * nor do we want to disable interrupts,
1812          * so if we miss here, then better luck next time.
1813          */
1814         if (!arch_spin_trylock(&trace_cmdline_lock))
1815                 return 0;
1816
1817         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1818         if (idx == NO_CMDLINE_MAP) {
1819                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1820
1821                 /*
1822                  * Check whether the cmdline buffer at idx has a pid
1823                  * mapped. We are going to overwrite that entry so we
1824                  * need to clear the map_pid_to_cmdline. Otherwise we
1825                  * would read the new comm for the old pid.
1826                  */
1827                 pid = savedcmd->map_cmdline_to_pid[idx];
1828                 if (pid != NO_CMDLINE_MAP)
1829                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1830
1831                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1832                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1833
1834                 savedcmd->cmdline_idx = idx;
1835         }
1836
1837         set_cmdline(idx, tsk->comm);
1838
1839         arch_spin_unlock(&trace_cmdline_lock);
1840
1841         return 1;
1842 }
1843
1844 static void __trace_find_cmdline(int pid, char comm[])
1845 {
1846         unsigned map;
1847
1848         if (!pid) {
1849                 strcpy(comm, "<idle>");
1850                 return;
1851         }
1852
1853         if (WARN_ON_ONCE(pid < 0)) {
1854                 strcpy(comm, "<XXX>");
1855                 return;
1856         }
1857
1858         if (pid > PID_MAX_DEFAULT) {
1859                 strcpy(comm, "<...>");
1860                 return;
1861         }
1862
1863         map = savedcmd->map_pid_to_cmdline[pid];
1864         if (map != NO_CMDLINE_MAP)
1865                 strcpy(comm, get_saved_cmdlines(map));
1866         else
1867                 strcpy(comm, "<...>");
1868 }
1869
1870 void trace_find_cmdline(int pid, char comm[])
1871 {
1872         preempt_disable();
1873         arch_spin_lock(&trace_cmdline_lock);
1874
1875         __trace_find_cmdline(pid, comm);
1876
1877         arch_spin_unlock(&trace_cmdline_lock);
1878         preempt_enable();
1879 }
1880
1881 void tracing_record_cmdline(struct task_struct *tsk)
1882 {
1883         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1884                 return;
1885
1886         if (!__this_cpu_read(trace_cmdline_save))
1887                 return;
1888
1889         if (trace_save_cmdline(tsk))
1890                 __this_cpu_write(trace_cmdline_save, false);
1891 }
1892
1893 void
1894 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1895                              int pc)
1896 {
1897         struct task_struct *tsk = current;
1898
1899         entry->preempt_count            = pc & 0xff;
1900         entry->pid                      = (tsk) ? tsk->pid : 0;
1901         entry->flags =
1902 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1903                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1904 #else
1905                 TRACE_FLAG_IRQS_NOSUPPORT |
1906 #endif
1907                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1908                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1909                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1910                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1911                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1912 }
1913 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1914
1915 static __always_inline void
1916 trace_event_setup(struct ring_buffer_event *event,
1917                   int type, unsigned long flags, int pc)
1918 {
1919         struct trace_entry *ent = ring_buffer_event_data(event);
1920
1921         tracing_generic_entry_update(ent, flags, pc);
1922         ent->type = type;
1923 }
1924
1925 struct ring_buffer_event *
1926 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1927                           int type,
1928                           unsigned long len,
1929                           unsigned long flags, int pc)
1930 {
1931         struct ring_buffer_event *event;
1932
1933         event = ring_buffer_lock_reserve(buffer, len);
1934         if (event != NULL)
1935                 trace_event_setup(event, type, flags, pc);
1936
1937         return event;
1938 }
1939
1940 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1941 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1942 static int trace_buffered_event_ref;
1943
1944 /**
1945  * trace_buffered_event_enable - enable buffering events
1946  *
1947  * When events are being filtered, it is quicker to use a temporary
1948  * buffer to write the event data into if there's a likely chance
1949  * that it will not be committed. The discard of the ring buffer
1950  * is not as fast as committing, and is much slower than copying
1951  * a commit.
1952  *
1953  * When an event is to be filtered, allocate per cpu buffers to
1954  * write the event data into, and if the event is filtered and discarded
1955  * it is simply dropped, otherwise, the entire data is to be committed
1956  * in one shot.
1957  */
1958 void trace_buffered_event_enable(void)
1959 {
1960         struct ring_buffer_event *event;
1961         struct page *page;
1962         int cpu;
1963
1964         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1965
1966         if (trace_buffered_event_ref++)
1967                 return;
1968
1969         for_each_tracing_cpu(cpu) {
1970                 page = alloc_pages_node(cpu_to_node(cpu),
1971                                         GFP_KERNEL | __GFP_NORETRY, 0);
1972                 if (!page)
1973                         goto failed;
1974
1975                 event = page_address(page);
1976                 memset(event, 0, sizeof(*event));
1977
1978                 per_cpu(trace_buffered_event, cpu) = event;
1979
1980                 preempt_disable();
1981                 if (cpu == smp_processor_id() &&
1982                     this_cpu_read(trace_buffered_event) !=
1983                     per_cpu(trace_buffered_event, cpu))
1984                         WARN_ON_ONCE(1);
1985                 preempt_enable();
1986         }
1987
1988         return;
1989  failed:
1990         trace_buffered_event_disable();
1991 }
1992
1993 static void enable_trace_buffered_event(void *data)
1994 {
1995         /* Probably not needed, but do it anyway */
1996         smp_rmb();
1997         this_cpu_dec(trace_buffered_event_cnt);
1998 }
1999
2000 static void disable_trace_buffered_event(void *data)
2001 {
2002         this_cpu_inc(trace_buffered_event_cnt);
2003 }
2004
2005 /**
2006  * trace_buffered_event_disable - disable buffering events
2007  *
2008  * When a filter is removed, it is faster to not use the buffered
2009  * events, and to commit directly into the ring buffer. Free up
2010  * the temp buffers when there are no more users. This requires
2011  * special synchronization with current events.
2012  */
2013 void trace_buffered_event_disable(void)
2014 {
2015         int cpu;
2016
2017         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2018
2019         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2020                 return;
2021
2022         if (--trace_buffered_event_ref)
2023                 return;
2024
2025         preempt_disable();
2026         /* For each CPU, set the buffer as used. */
2027         smp_call_function_many(tracing_buffer_mask,
2028                                disable_trace_buffered_event, NULL, 1);
2029         preempt_enable();
2030
2031         /* Wait for all current users to finish */
2032         synchronize_sched();
2033
2034         for_each_tracing_cpu(cpu) {
2035                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2036                 per_cpu(trace_buffered_event, cpu) = NULL;
2037         }
2038         /*
2039          * Make sure trace_buffered_event is NULL before clearing
2040          * trace_buffered_event_cnt.
2041          */
2042         smp_wmb();
2043
2044         preempt_disable();
2045         /* Do the work on each cpu */
2046         smp_call_function_many(tracing_buffer_mask,
2047                                enable_trace_buffered_event, NULL, 1);
2048         preempt_enable();
2049 }
2050
2051 void
2052 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
2053 {
2054         __this_cpu_write(trace_cmdline_save, true);
2055
2056         /* If this is the temp buffer, we need to commit fully */
2057         if (this_cpu_read(trace_buffered_event) == event) {
2058                 /* Length is in event->array[0] */
2059                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
2060                 /* Release the temp buffer */
2061                 this_cpu_dec(trace_buffered_event_cnt);
2062         } else
2063                 ring_buffer_unlock_commit(buffer, event);
2064 }
2065
2066 static struct ring_buffer *temp_buffer;
2067
2068 struct ring_buffer_event *
2069 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2070                           struct trace_event_file *trace_file,
2071                           int type, unsigned long len,
2072                           unsigned long flags, int pc)
2073 {
2074         struct ring_buffer_event *entry;
2075         int val;
2076
2077         *current_rb = trace_file->tr->trace_buffer.buffer;
2078
2079         if ((trace_file->flags &
2080              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2081             (entry = this_cpu_read(trace_buffered_event))) {
2082                 /* Try to use the per cpu buffer first */
2083                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2084                 if (val == 1) {
2085                         trace_event_setup(entry, type, flags, pc);
2086                         entry->array[0] = len;
2087                         return entry;
2088                 }
2089                 this_cpu_dec(trace_buffered_event_cnt);
2090         }
2091
2092         entry = trace_buffer_lock_reserve(*current_rb,
2093                                          type, len, flags, pc);
2094         /*
2095          * If tracing is off, but we have triggers enabled
2096          * we still need to look at the event data. Use the temp_buffer
2097          * to store the trace event for the tigger to use. It's recusive
2098          * safe and will not be recorded anywhere.
2099          */
2100         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2101                 *current_rb = temp_buffer;
2102                 entry = trace_buffer_lock_reserve(*current_rb,
2103                                                   type, len, flags, pc);
2104         }
2105         return entry;
2106 }
2107 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2108
2109 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2110                                      struct ring_buffer *buffer,
2111                                      struct ring_buffer_event *event,
2112                                      unsigned long flags, int pc,
2113                                      struct pt_regs *regs)
2114 {
2115         __buffer_unlock_commit(buffer, event);
2116
2117         /*
2118          * If regs is not set, then skip the following callers:
2119          *   trace_buffer_unlock_commit_regs
2120          *   event_trigger_unlock_commit
2121          *   trace_event_buffer_commit
2122          *   trace_event_raw_event_sched_switch
2123          * Note, we can still get here via blktrace, wakeup tracer
2124          * and mmiotrace, but that's ok if they lose a function or
2125          * two. They are that meaningful.
2126          */
2127         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2128         ftrace_trace_userstack(buffer, flags, pc);
2129 }
2130
2131 void
2132 trace_function(struct trace_array *tr,
2133                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2134                int pc)
2135 {
2136         struct trace_event_call *call = &event_function;
2137         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2138         struct ring_buffer_event *event;
2139         struct ftrace_entry *entry;
2140
2141         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2142                                           flags, pc);
2143         if (!event)
2144                 return;
2145         entry   = ring_buffer_event_data(event);
2146         entry->ip                       = ip;
2147         entry->parent_ip                = parent_ip;
2148
2149         if (!call_filter_check_discard(call, entry, buffer, event))
2150                 __buffer_unlock_commit(buffer, event);
2151 }
2152
2153 #ifdef CONFIG_STACKTRACE
2154
2155 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2156 struct ftrace_stack {
2157         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2158 };
2159
2160 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2161 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2162
2163 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2164                                  unsigned long flags,
2165                                  int skip, int pc, struct pt_regs *regs)
2166 {
2167         struct trace_event_call *call = &event_kernel_stack;
2168         struct ring_buffer_event *event;
2169         struct stack_entry *entry;
2170         struct stack_trace trace;
2171         int use_stack;
2172         int size = FTRACE_STACK_ENTRIES;
2173
2174         trace.nr_entries        = 0;
2175         trace.skip              = skip;
2176
2177         /*
2178          * Add two, for this function and the call to save_stack_trace()
2179          * If regs is set, then these functions will not be in the way.
2180          */
2181         if (!regs)
2182                 trace.skip += 2;
2183
2184         /*
2185          * Since events can happen in NMIs there's no safe way to
2186          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2187          * or NMI comes in, it will just have to use the default
2188          * FTRACE_STACK_SIZE.
2189          */
2190         preempt_disable_notrace();
2191
2192         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2193         /*
2194          * We don't need any atomic variables, just a barrier.
2195          * If an interrupt comes in, we don't care, because it would
2196          * have exited and put the counter back to what we want.
2197          * We just need a barrier to keep gcc from moving things
2198          * around.
2199          */
2200         barrier();
2201         if (use_stack == 1) {
2202                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2203                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2204
2205                 if (regs)
2206                         save_stack_trace_regs(regs, &trace);
2207                 else
2208                         save_stack_trace(&trace);
2209
2210                 if (trace.nr_entries > size)
2211                         size = trace.nr_entries;
2212         } else
2213                 /* From now on, use_stack is a boolean */
2214                 use_stack = 0;
2215
2216         size *= sizeof(unsigned long);
2217
2218         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
2219                                           sizeof(*entry) + size, flags, pc);
2220         if (!event)
2221                 goto out;
2222         entry = ring_buffer_event_data(event);
2223
2224         memset(&entry->caller, 0, size);
2225
2226         if (use_stack)
2227                 memcpy(&entry->caller, trace.entries,
2228                        trace.nr_entries * sizeof(unsigned long));
2229         else {
2230                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2231                 trace.entries           = entry->caller;
2232                 if (regs)
2233                         save_stack_trace_regs(regs, &trace);
2234                 else
2235                         save_stack_trace(&trace);
2236         }
2237
2238         entry->size = trace.nr_entries;
2239
2240         if (!call_filter_check_discard(call, entry, buffer, event))
2241                 __buffer_unlock_commit(buffer, event);
2242
2243  out:
2244         /* Again, don't let gcc optimize things here */
2245         barrier();
2246         __this_cpu_dec(ftrace_stack_reserve);
2247         preempt_enable_notrace();
2248
2249 }
2250
2251 static inline void ftrace_trace_stack(struct trace_array *tr,
2252                                       struct ring_buffer *buffer,
2253                                       unsigned long flags,
2254                                       int skip, int pc, struct pt_regs *regs)
2255 {
2256         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2257                 return;
2258
2259         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2260 }
2261
2262 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2263                    int pc)
2264 {
2265         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2266 }
2267
2268 /**
2269  * trace_dump_stack - record a stack back trace in the trace buffer
2270  * @skip: Number of functions to skip (helper handlers)
2271  */
2272 void trace_dump_stack(int skip)
2273 {
2274         unsigned long flags;
2275
2276         if (tracing_disabled || tracing_selftest_running)
2277                 return;
2278
2279         local_save_flags(flags);
2280
2281         /*
2282          * Skip 3 more, seems to get us at the caller of
2283          * this function.
2284          */
2285         skip += 3;
2286         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2287                              flags, skip, preempt_count(), NULL);
2288 }
2289
2290 static DEFINE_PER_CPU(int, user_stack_count);
2291
2292 void
2293 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2294 {
2295         struct trace_event_call *call = &event_user_stack;
2296         struct ring_buffer_event *event;
2297         struct userstack_entry *entry;
2298         struct stack_trace trace;
2299
2300         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2301                 return;
2302
2303         /*
2304          * NMIs can not handle page faults, even with fix ups.
2305          * The save user stack can (and often does) fault.
2306          */
2307         if (unlikely(in_nmi()))
2308                 return;
2309
2310         /*
2311          * prevent recursion, since the user stack tracing may
2312          * trigger other kernel events.
2313          */
2314         preempt_disable();
2315         if (__this_cpu_read(user_stack_count))
2316                 goto out;
2317
2318         __this_cpu_inc(user_stack_count);
2319
2320         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2321                                           sizeof(*entry), flags, pc);
2322         if (!event)
2323                 goto out_drop_count;
2324         entry   = ring_buffer_event_data(event);
2325
2326         entry->tgid             = current->tgid;
2327         memset(&entry->caller, 0, sizeof(entry->caller));
2328
2329         trace.nr_entries        = 0;
2330         trace.max_entries       = FTRACE_STACK_ENTRIES;
2331         trace.skip              = 0;
2332         trace.entries           = entry->caller;
2333
2334         save_stack_trace_user(&trace);
2335         if (!call_filter_check_discard(call, entry, buffer, event))
2336                 __buffer_unlock_commit(buffer, event);
2337
2338  out_drop_count:
2339         __this_cpu_dec(user_stack_count);
2340  out:
2341         preempt_enable();
2342 }
2343
2344 #ifdef UNUSED
2345 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2346 {
2347         ftrace_trace_userstack(tr, flags, preempt_count());
2348 }
2349 #endif /* UNUSED */
2350
2351 #endif /* CONFIG_STACKTRACE */
2352
2353 /* created for use with alloc_percpu */
2354 struct trace_buffer_struct {
2355         int nesting;
2356         char buffer[4][TRACE_BUF_SIZE];
2357 };
2358
2359 static struct trace_buffer_struct *trace_percpu_buffer;
2360
2361 /*
2362  * Thise allows for lockless recording.  If we're nested too deeply, then
2363  * this returns NULL.
2364  */
2365 static char *get_trace_buf(void)
2366 {
2367         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2368
2369         if (!buffer || buffer->nesting >= 4)
2370                 return NULL;
2371
2372         return &buffer->buffer[buffer->nesting++][0];
2373 }
2374
2375 static void put_trace_buf(void)
2376 {
2377         this_cpu_dec(trace_percpu_buffer->nesting);
2378 }
2379
2380 static int alloc_percpu_trace_buffer(void)
2381 {
2382         struct trace_buffer_struct *buffers;
2383
2384         buffers = alloc_percpu(struct trace_buffer_struct);
2385         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2386                 return -ENOMEM;
2387
2388         trace_percpu_buffer = buffers;
2389         return 0;
2390 }
2391
2392 static int buffers_allocated;
2393
2394 void trace_printk_init_buffers(void)
2395 {
2396         if (buffers_allocated)
2397                 return;
2398
2399         if (alloc_percpu_trace_buffer())
2400                 return;
2401
2402         /* trace_printk() is for debug use only. Don't use it in production. */
2403
2404         pr_warn("\n");
2405         pr_warn("**********************************************************\n");
2406         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2407         pr_warn("**                                                      **\n");
2408         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2409         pr_warn("**                                                      **\n");
2410         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2411         pr_warn("** unsafe for production use.                           **\n");
2412         pr_warn("**                                                      **\n");
2413         pr_warn("** If you see this message and you are not debugging    **\n");
2414         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2415         pr_warn("**                                                      **\n");
2416         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2417         pr_warn("**********************************************************\n");
2418
2419         /* Expand the buffers to set size */
2420         tracing_update_buffers();
2421
2422         buffers_allocated = 1;
2423
2424         /*
2425          * trace_printk_init_buffers() can be called by modules.
2426          * If that happens, then we need to start cmdline recording
2427          * directly here. If the global_trace.buffer is already
2428          * allocated here, then this was called by module code.
2429          */
2430         if (global_trace.trace_buffer.buffer)
2431                 tracing_start_cmdline_record();
2432 }
2433
2434 void trace_printk_start_comm(void)
2435 {
2436         /* Start tracing comms if trace printk is set */
2437         if (!buffers_allocated)
2438                 return;
2439         tracing_start_cmdline_record();
2440 }
2441
2442 static void trace_printk_start_stop_comm(int enabled)
2443 {
2444         if (!buffers_allocated)
2445                 return;
2446
2447         if (enabled)
2448                 tracing_start_cmdline_record();
2449         else
2450                 tracing_stop_cmdline_record();
2451 }
2452
2453 /**
2454  * trace_vbprintk - write binary msg to tracing buffer
2455  *
2456  */
2457 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2458 {
2459         struct trace_event_call *call = &event_bprint;
2460         struct ring_buffer_event *event;
2461         struct ring_buffer *buffer;
2462         struct trace_array *tr = &global_trace;
2463         struct bprint_entry *entry;
2464         unsigned long flags;
2465         char *tbuffer;
2466         int len = 0, size, pc;
2467
2468         if (unlikely(tracing_selftest_running || tracing_disabled))
2469                 return 0;
2470
2471         /* Don't pollute graph traces with trace_vprintk internals */
2472         pause_graph_tracing();
2473
2474         pc = preempt_count();
2475         preempt_disable_notrace();
2476
2477         tbuffer = get_trace_buf();
2478         if (!tbuffer) {
2479                 len = 0;
2480                 goto out_nobuffer;
2481         }
2482
2483         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2484
2485         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2486                 goto out;
2487
2488         local_save_flags(flags);
2489         size = sizeof(*entry) + sizeof(u32) * len;
2490         buffer = tr->trace_buffer.buffer;
2491         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2492                                           flags, pc);
2493         if (!event)
2494                 goto out;
2495         entry = ring_buffer_event_data(event);
2496         entry->ip                       = ip;
2497         entry->fmt                      = fmt;
2498
2499         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2500         if (!call_filter_check_discard(call, entry, buffer, event)) {
2501                 __buffer_unlock_commit(buffer, event);
2502                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2503         }
2504
2505 out:
2506         put_trace_buf();
2507
2508 out_nobuffer:
2509         preempt_enable_notrace();
2510         unpause_graph_tracing();
2511
2512         return len;
2513 }
2514 EXPORT_SYMBOL_GPL(trace_vbprintk);
2515
2516 static int
2517 __trace_array_vprintk(struct ring_buffer *buffer,
2518                       unsigned long ip, const char *fmt, va_list args)
2519 {
2520         struct trace_event_call *call = &event_print;
2521         struct ring_buffer_event *event;
2522         int len = 0, size, pc;
2523         struct print_entry *entry;
2524         unsigned long flags;
2525         char *tbuffer;
2526
2527         if (tracing_disabled || tracing_selftest_running)
2528                 return 0;
2529
2530         /* Don't pollute graph traces with trace_vprintk internals */
2531         pause_graph_tracing();
2532
2533         pc = preempt_count();
2534         preempt_disable_notrace();
2535
2536
2537         tbuffer = get_trace_buf();
2538         if (!tbuffer) {
2539                 len = 0;
2540                 goto out_nobuffer;
2541         }
2542
2543         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2544
2545         local_save_flags(flags);
2546         size = sizeof(*entry) + len + 1;
2547         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2548                                           flags, pc);
2549         if (!event)
2550                 goto out;
2551         entry = ring_buffer_event_data(event);
2552         entry->ip = ip;
2553
2554         memcpy(&entry->buf, tbuffer, len + 1);
2555         if (!call_filter_check_discard(call, entry, buffer, event)) {
2556                 __buffer_unlock_commit(buffer, event);
2557                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2558         }
2559
2560 out:
2561         put_trace_buf();
2562
2563 out_nobuffer:
2564         preempt_enable_notrace();
2565         unpause_graph_tracing();
2566
2567         return len;
2568 }
2569
2570 int trace_array_vprintk(struct trace_array *tr,
2571                         unsigned long ip, const char *fmt, va_list args)
2572 {
2573         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2574 }
2575
2576 int trace_array_printk(struct trace_array *tr,
2577                        unsigned long ip, const char *fmt, ...)
2578 {
2579         int ret;
2580         va_list ap;
2581
2582         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2583                 return 0;
2584
2585         va_start(ap, fmt);
2586         ret = trace_array_vprintk(tr, ip, fmt, ap);
2587         va_end(ap);
2588         return ret;
2589 }
2590
2591 int trace_array_printk_buf(struct ring_buffer *buffer,
2592                            unsigned long ip, const char *fmt, ...)
2593 {
2594         int ret;
2595         va_list ap;
2596
2597         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2598                 return 0;
2599
2600         va_start(ap, fmt);
2601         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2602         va_end(ap);
2603         return ret;
2604 }
2605
2606 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2607 {
2608         return trace_array_vprintk(&global_trace, ip, fmt, args);
2609 }
2610 EXPORT_SYMBOL_GPL(trace_vprintk);
2611
2612 static void trace_iterator_increment(struct trace_iterator *iter)
2613 {
2614         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2615
2616         iter->idx++;
2617         if (buf_iter)
2618                 ring_buffer_read(buf_iter, NULL);
2619 }
2620
2621 static struct trace_entry *
2622 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2623                 unsigned long *lost_events)
2624 {
2625         struct ring_buffer_event *event;
2626         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2627
2628         if (buf_iter)
2629                 event = ring_buffer_iter_peek(buf_iter, ts);
2630         else
2631                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2632                                          lost_events);
2633
2634         if (event) {
2635                 iter->ent_size = ring_buffer_event_length(event);
2636                 return ring_buffer_event_data(event);
2637         }
2638         iter->ent_size = 0;
2639         return NULL;
2640 }
2641
2642 static struct trace_entry *
2643 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2644                   unsigned long *missing_events, u64 *ent_ts)
2645 {
2646         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2647         struct trace_entry *ent, *next = NULL;
2648         unsigned long lost_events = 0, next_lost = 0;
2649         int cpu_file = iter->cpu_file;
2650         u64 next_ts = 0, ts;
2651         int next_cpu = -1;
2652         int next_size = 0;
2653         int cpu;
2654
2655         /*
2656          * If we are in a per_cpu trace file, don't bother by iterating over
2657          * all cpu and peek directly.
2658          */
2659         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2660                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2661                         return NULL;
2662                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2663                 if (ent_cpu)
2664                         *ent_cpu = cpu_file;
2665
2666                 return ent;
2667         }
2668
2669         for_each_tracing_cpu(cpu) {
2670
2671                 if (ring_buffer_empty_cpu(buffer, cpu))
2672                         continue;
2673
2674                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2675
2676                 /*
2677                  * Pick the entry with the smallest timestamp:
2678                  */
2679                 if (ent && (!next || ts < next_ts)) {
2680                         next = ent;
2681                         next_cpu = cpu;
2682                         next_ts = ts;
2683                         next_lost = lost_events;
2684                         next_size = iter->ent_size;
2685                 }
2686         }
2687
2688         iter->ent_size = next_size;
2689
2690         if (ent_cpu)
2691                 *ent_cpu = next_cpu;
2692
2693         if (ent_ts)
2694                 *ent_ts = next_ts;
2695
2696         if (missing_events)
2697                 *missing_events = next_lost;
2698
2699         return next;
2700 }
2701
2702 /* Find the next real entry, without updating the iterator itself */
2703 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2704                                           int *ent_cpu, u64 *ent_ts)
2705 {
2706         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2707 }
2708
2709 /* Find the next real entry, and increment the iterator to the next entry */
2710 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2711 {
2712         iter->ent = __find_next_entry(iter, &iter->cpu,
2713                                       &iter->lost_events, &iter->ts);
2714
2715         if (iter->ent)
2716                 trace_iterator_increment(iter);
2717
2718         return iter->ent ? iter : NULL;
2719 }
2720
2721 static void trace_consume(struct trace_iterator *iter)
2722 {
2723         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2724                             &iter->lost_events);
2725 }
2726
2727 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2728 {
2729         struct trace_iterator *iter = m->private;
2730         int i = (int)*pos;
2731         void *ent;
2732
2733         WARN_ON_ONCE(iter->leftover);
2734
2735         (*pos)++;
2736
2737         /* can't go backwards */
2738         if (iter->idx > i)
2739                 return NULL;
2740
2741         if (iter->idx < 0)
2742                 ent = trace_find_next_entry_inc(iter);
2743         else
2744                 ent = iter;
2745
2746         while (ent && iter->idx < i)
2747                 ent = trace_find_next_entry_inc(iter);
2748
2749         iter->pos = *pos;
2750
2751         return ent;
2752 }
2753
2754 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2755 {
2756         struct ring_buffer_event *event;
2757         struct ring_buffer_iter *buf_iter;
2758         unsigned long entries = 0;
2759         u64 ts;
2760
2761         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2762
2763         buf_iter = trace_buffer_iter(iter, cpu);
2764         if (!buf_iter)
2765                 return;
2766
2767         ring_buffer_iter_reset(buf_iter);
2768
2769         /*
2770          * We could have the case with the max latency tracers
2771          * that a reset never took place on a cpu. This is evident
2772          * by the timestamp being before the start of the buffer.
2773          */
2774         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2775                 if (ts >= iter->trace_buffer->time_start)
2776                         break;
2777                 entries++;
2778                 ring_buffer_read(buf_iter, NULL);
2779         }
2780
2781         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2782 }
2783
2784 /*
2785  * The current tracer is copied to avoid a global locking
2786  * all around.
2787  */
2788 static void *s_start(struct seq_file *m, loff_t *pos)
2789 {
2790         struct trace_iterator *iter = m->private;
2791         struct trace_array *tr = iter->tr;
2792         int cpu_file = iter->cpu_file;
2793         void *p = NULL;
2794         loff_t l = 0;
2795         int cpu;
2796
2797         /*
2798          * copy the tracer to avoid using a global lock all around.
2799          * iter->trace is a copy of current_trace, the pointer to the
2800          * name may be used instead of a strcmp(), as iter->trace->name
2801          * will point to the same string as current_trace->name.
2802          */
2803         mutex_lock(&trace_types_lock);
2804         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2805                 *iter->trace = *tr->current_trace;
2806         mutex_unlock(&trace_types_lock);
2807
2808 #ifdef CONFIG_TRACER_MAX_TRACE
2809         if (iter->snapshot && iter->trace->use_max_tr)
2810                 return ERR_PTR(-EBUSY);
2811 #endif
2812
2813         if (!iter->snapshot)
2814                 atomic_inc(&trace_record_cmdline_disabled);
2815
2816         if (*pos != iter->pos) {
2817                 iter->ent = NULL;
2818                 iter->cpu = 0;
2819                 iter->idx = -1;
2820
2821                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2822                         for_each_tracing_cpu(cpu)
2823                                 tracing_iter_reset(iter, cpu);
2824                 } else
2825                         tracing_iter_reset(iter, cpu_file);
2826
2827                 iter->leftover = 0;
2828                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2829                         ;
2830
2831         } else {
2832                 /*
2833                  * If we overflowed the seq_file before, then we want
2834                  * to just reuse the trace_seq buffer again.
2835                  */
2836                 if (iter->leftover)
2837                         p = iter;
2838                 else {
2839                         l = *pos - 1;
2840                         p = s_next(m, p, &l);
2841                 }
2842         }
2843
2844         trace_event_read_lock();
2845         trace_access_lock(cpu_file);
2846         return p;
2847 }
2848
2849 static void s_stop(struct seq_file *m, void *p)
2850 {
2851         struct trace_iterator *iter = m->private;
2852
2853 #ifdef CONFIG_TRACER_MAX_TRACE
2854         if (iter->snapshot && iter->trace->use_max_tr)
2855                 return;
2856 #endif
2857
2858         if (!iter->snapshot)
2859                 atomic_dec(&trace_record_cmdline_disabled);
2860
2861         trace_access_unlock(iter->cpu_file);
2862         trace_event_read_unlock();
2863 }
2864
2865 static void
2866 get_total_entries(struct trace_buffer *buf,
2867                   unsigned long *total, unsigned long *entries)
2868 {
2869         unsigned long count;
2870         int cpu;
2871
2872         *total = 0;
2873         *entries = 0;
2874
2875         for_each_tracing_cpu(cpu) {
2876                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2877                 /*
2878                  * If this buffer has skipped entries, then we hold all
2879                  * entries for the trace and we need to ignore the
2880                  * ones before the time stamp.
2881                  */
2882                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2883                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2884                         /* total is the same as the entries */
2885                         *total += count;
2886                 } else
2887                         *total += count +
2888                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2889                 *entries += count;
2890         }
2891 }
2892
2893 static void print_lat_help_header(struct seq_file *m)
2894 {
2895         seq_puts(m, "#                  _------=> CPU#            \n"
2896                     "#                 / _-----=> irqs-off        \n"
2897                     "#                | / _----=> need-resched    \n"
2898                     "#                || / _---=> hardirq/softirq \n"
2899                     "#                ||| / _--=> preempt-depth   \n"
2900                     "#                |||| /     delay            \n"
2901                     "#  cmd     pid   ||||| time  |   caller      \n"
2902                     "#     \\   /      |||||  \\    |   /         \n");
2903 }
2904
2905 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2906 {
2907         unsigned long total;
2908         unsigned long entries;
2909
2910         get_total_entries(buf, &total, &entries);
2911         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2912                    entries, total, num_online_cpus());
2913         seq_puts(m, "#\n");
2914 }
2915
2916 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2917 {
2918         print_event_info(buf, m);
2919         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2920                     "#              | |       |          |         |\n");
2921 }
2922
2923 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2924 {
2925         print_event_info(buf, m);
2926         seq_puts(m, "#                              _-----=> irqs-off\n"
2927                     "#                             / _----=> need-resched\n"
2928                     "#                            | / _---=> hardirq/softirq\n"
2929                     "#                            || / _--=> preempt-depth\n"
2930                     "#                            ||| /     delay\n"
2931                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2932                     "#              | |       |   ||||       |         |\n");
2933 }
2934
2935 void
2936 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2937 {
2938         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2939         struct trace_buffer *buf = iter->trace_buffer;
2940         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2941         struct tracer *type = iter->trace;
2942         unsigned long entries;
2943         unsigned long total;
2944         const char *name = "preemption";
2945
2946         name = type->name;
2947
2948         get_total_entries(buf, &total, &entries);
2949
2950         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2951                    name, UTS_RELEASE);
2952         seq_puts(m, "# -----------------------------------"
2953                  "---------------------------------\n");
2954         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2955                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2956                    nsecs_to_usecs(data->saved_latency),
2957                    entries,
2958                    total,
2959                    buf->cpu,
2960 #if defined(CONFIG_PREEMPT_NONE)
2961                    "server",
2962 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2963                    "desktop",
2964 #elif defined(CONFIG_PREEMPT)
2965                    "preempt",
2966 #else
2967                    "unknown",
2968 #endif
2969                    /* These are reserved for later use */
2970                    0, 0, 0, 0);
2971 #ifdef CONFIG_SMP
2972         seq_printf(m, " #P:%d)\n", num_online_cpus());
2973 #else
2974         seq_puts(m, ")\n");
2975 #endif
2976         seq_puts(m, "#    -----------------\n");
2977         seq_printf(m, "#    | task: %.16s-%d "
2978                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2979                    data->comm, data->pid,
2980                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2981                    data->policy, data->rt_priority);
2982         seq_puts(m, "#    -----------------\n");
2983
2984         if (data->critical_start) {
2985                 seq_puts(m, "#  => started at: ");
2986                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2987                 trace_print_seq(m, &iter->seq);
2988                 seq_puts(m, "\n#  => ended at:   ");
2989                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2990                 trace_print_seq(m, &iter->seq);
2991                 seq_puts(m, "\n#\n");
2992         }
2993
2994         seq_puts(m, "#\n");
2995 }
2996
2997 static void test_cpu_buff_start(struct trace_iterator *iter)
2998 {
2999         struct trace_seq *s = &iter->seq;
3000         struct trace_array *tr = iter->tr;
3001
3002         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3003                 return;
3004
3005         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3006                 return;
3007
3008         if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
3009                 return;
3010
3011         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3012                 return;
3013
3014         if (iter->started)
3015                 cpumask_set_cpu(iter->cpu, iter->started);
3016
3017         /* Don't print started cpu buffer for the first entry of the trace */
3018         if (iter->idx > 1)
3019                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3020                                 iter->cpu);
3021 }
3022
3023 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3024 {
3025         struct trace_array *tr = iter->tr;
3026         struct trace_seq *s = &iter->seq;
3027         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3028         struct trace_entry *entry;
3029         struct trace_event *event;
3030
3031         entry = iter->ent;
3032
3033         test_cpu_buff_start(iter);
3034
3035         event = ftrace_find_event(entry->type);
3036
3037         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3038                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3039                         trace_print_lat_context(iter);
3040                 else
3041                         trace_print_context(iter);
3042         }
3043
3044         if (trace_seq_has_overflowed(s))
3045                 return TRACE_TYPE_PARTIAL_LINE;
3046
3047         if (event)
3048                 return event->funcs->trace(iter, sym_flags, event);
3049
3050         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3051
3052         return trace_handle_return(s);
3053 }
3054
3055 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3056 {
3057         struct trace_array *tr = iter->tr;
3058         struct trace_seq *s = &iter->seq;
3059         struct trace_entry *entry;
3060         struct trace_event *event;
3061
3062         entry = iter->ent;
3063
3064         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3065                 trace_seq_printf(s, "%d %d %llu ",
3066                                  entry->pid, iter->cpu, iter->ts);
3067
3068         if (trace_seq_has_overflowed(s))
3069                 return TRACE_TYPE_PARTIAL_LINE;
3070
3071         event = ftrace_find_event(entry->type);
3072         if (event)
3073                 return event->funcs->raw(iter, 0, event);
3074
3075         trace_seq_printf(s, "%d ?\n", entry->type);
3076
3077         return trace_handle_return(s);
3078 }
3079
3080 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3081 {
3082         struct trace_array *tr = iter->tr;
3083         struct trace_seq *s = &iter->seq;
3084         unsigned char newline = '\n';
3085         struct trace_entry *entry;
3086         struct trace_event *event;
3087
3088         entry = iter->ent;
3089
3090         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3091                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3092                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3093                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3094                 if (trace_seq_has_overflowed(s))
3095                         return TRACE_TYPE_PARTIAL_LINE;
3096         }
3097
3098         event = ftrace_find_event(entry->type);
3099         if (event) {
3100                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3101                 if (ret != TRACE_TYPE_HANDLED)
3102                         return ret;
3103         }
3104
3105         SEQ_PUT_FIELD(s, newline);
3106
3107         return trace_handle_return(s);
3108 }
3109
3110 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3111 {
3112         struct trace_array *tr = iter->tr;
3113         struct trace_seq *s = &iter->seq;
3114         struct trace_entry *entry;
3115         struct trace_event *event;
3116
3117         entry = iter->ent;
3118
3119         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3120                 SEQ_PUT_FIELD(s, entry->pid);
3121                 SEQ_PUT_FIELD(s, iter->cpu);
3122                 SEQ_PUT_FIELD(s, iter->ts);
3123                 if (trace_seq_has_overflowed(s))
3124                         return TRACE_TYPE_PARTIAL_LINE;
3125         }
3126
3127         event = ftrace_find_event(entry->type);
3128         return event ? event->funcs->binary(iter, 0, event) :
3129                 TRACE_TYPE_HANDLED;
3130 }
3131
3132 int trace_empty(struct trace_iterator *iter)
3133 {
3134         struct ring_buffer_iter *buf_iter;
3135         int cpu;
3136
3137         /* If we are looking at one CPU buffer, only check that one */
3138         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3139                 cpu = iter->cpu_file;
3140                 buf_iter = trace_buffer_iter(iter, cpu);
3141                 if (buf_iter) {
3142                         if (!ring_buffer_iter_empty(buf_iter))
3143                                 return 0;
3144                 } else {
3145                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3146                                 return 0;
3147                 }
3148                 return 1;
3149         }
3150
3151         for_each_tracing_cpu(cpu) {
3152                 buf_iter = trace_buffer_iter(iter, cpu);
3153                 if (buf_iter) {
3154                         if (!ring_buffer_iter_empty(buf_iter))
3155                                 return 0;
3156                 } else {
3157                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3158                                 return 0;
3159                 }
3160         }
3161
3162         return 1;
3163 }
3164
3165 /*  Called with trace_event_read_lock() held. */
3166 enum print_line_t print_trace_line(struct trace_iterator *iter)
3167 {
3168         struct trace_array *tr = iter->tr;
3169         unsigned long trace_flags = tr->trace_flags;
3170         enum print_line_t ret;
3171
3172         if (iter->lost_events) {
3173                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3174                                  iter->cpu, iter->lost_events);
3175                 if (trace_seq_has_overflowed(&iter->seq))
3176                         return TRACE_TYPE_PARTIAL_LINE;
3177         }
3178
3179         if (iter->trace && iter->trace->print_line) {
3180                 ret = iter->trace->print_line(iter);
3181                 if (ret != TRACE_TYPE_UNHANDLED)
3182                         return ret;
3183         }
3184
3185         if (iter->ent->type == TRACE_BPUTS &&
3186                         trace_flags & TRACE_ITER_PRINTK &&
3187                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3188                 return trace_print_bputs_msg_only(iter);
3189
3190         if (iter->ent->type == TRACE_BPRINT &&
3191                         trace_flags & TRACE_ITER_PRINTK &&
3192                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3193                 return trace_print_bprintk_msg_only(iter);
3194
3195         if (iter->ent->type == TRACE_PRINT &&
3196                         trace_flags & TRACE_ITER_PRINTK &&
3197                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3198                 return trace_print_printk_msg_only(iter);
3199
3200         if (trace_flags & TRACE_ITER_BIN)
3201                 return print_bin_fmt(iter);
3202
3203         if (trace_flags & TRACE_ITER_HEX)
3204                 return print_hex_fmt(iter);
3205
3206         if (trace_flags & TRACE_ITER_RAW)
3207                 return print_raw_fmt(iter);
3208
3209         return print_trace_fmt(iter);
3210 }
3211
3212 void trace_latency_header(struct seq_file *m)
3213 {
3214         struct trace_iterator *iter = m->private;
3215         struct trace_array *tr = iter->tr;
3216
3217         /* print nothing if the buffers are empty */
3218         if (trace_empty(iter))
3219                 return;
3220
3221         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3222                 print_trace_header(m, iter);
3223
3224         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3225                 print_lat_help_header(m);
3226 }
3227
3228 void trace_default_header(struct seq_file *m)
3229 {
3230         struct trace_iterator *iter = m->private;
3231         struct trace_array *tr = iter->tr;
3232         unsigned long trace_flags = tr->trace_flags;
3233
3234         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3235                 return;
3236
3237         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3238                 /* print nothing if the buffers are empty */
3239                 if (trace_empty(iter))
3240                         return;
3241                 print_trace_header(m, iter);
3242                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3243                         print_lat_help_header(m);
3244         } else {
3245                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3246                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3247                                 print_func_help_header_irq(iter->trace_buffer, m);
3248                         else
3249                                 print_func_help_header(iter->trace_buffer, m);
3250                 }
3251         }
3252 }
3253
3254 static void test_ftrace_alive(struct seq_file *m)
3255 {
3256         if (!ftrace_is_dead())
3257                 return;
3258         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3259                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3260 }
3261
3262 #ifdef CONFIG_TRACER_MAX_TRACE
3263 static void show_snapshot_main_help(struct seq_file *m)
3264 {
3265         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3266                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3267                     "#                      Takes a snapshot of the main buffer.\n"
3268                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3269                     "#                      (Doesn't have to be '2' works with any number that\n"
3270                     "#                       is not a '0' or '1')\n");
3271 }
3272
3273 static void show_snapshot_percpu_help(struct seq_file *m)
3274 {
3275         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3276 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3277         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3278                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3279 #else
3280         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3281                     "#                     Must use main snapshot file to allocate.\n");
3282 #endif
3283         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3284                     "#                      (Doesn't have to be '2' works with any number that\n"
3285                     "#                       is not a '0' or '1')\n");
3286 }
3287
3288 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3289 {
3290         if (iter->tr->allocated_snapshot)
3291                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3292         else
3293                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3294
3295         seq_puts(m, "# Snapshot commands:\n");
3296         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3297                 show_snapshot_main_help(m);
3298         else
3299                 show_snapshot_percpu_help(m);
3300 }
3301 #else
3302 /* Should never be called */
3303 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3304 #endif
3305
3306 static int s_show(struct seq_file *m, void *v)
3307 {
3308         struct trace_iterator *iter = v;
3309         int ret;
3310
3311         if (iter->ent == NULL) {
3312                 if (iter->tr) {
3313                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3314                         seq_puts(m, "#\n");
3315                         test_ftrace_alive(m);
3316                 }
3317                 if (iter->snapshot && trace_empty(iter))
3318                         print_snapshot_help(m, iter);
3319                 else if (iter->trace && iter->trace->print_header)
3320                         iter->trace->print_header(m);
3321                 else
3322                         trace_default_header(m);
3323
3324         } else if (iter->leftover) {
3325                 /*
3326                  * If we filled the seq_file buffer earlier, we
3327                  * want to just show it now.
3328                  */
3329                 ret = trace_print_seq(m, &iter->seq);
3330
3331                 /* ret should this time be zero, but you never know */
3332                 iter->leftover = ret;
3333
3334         } else {
3335                 print_trace_line(iter);
3336                 ret = trace_print_seq(m, &iter->seq);
3337                 /*
3338                  * If we overflow the seq_file buffer, then it will
3339                  * ask us for this data again at start up.
3340                  * Use that instead.
3341                  *  ret is 0 if seq_file write succeeded.
3342                  *        -1 otherwise.
3343                  */
3344                 iter->leftover = ret;
3345         }
3346
3347         return 0;
3348 }
3349
3350 /*
3351  * Should be used after trace_array_get(), trace_types_lock
3352  * ensures that i_cdev was already initialized.
3353  */
3354 static inline int tracing_get_cpu(struct inode *inode)
3355 {
3356         if (inode->i_cdev) /* See trace_create_cpu_file() */
3357                 return (long)inode->i_cdev - 1;
3358         return RING_BUFFER_ALL_CPUS;
3359 }
3360
3361 static const struct seq_operations tracer_seq_ops = {
3362         .start          = s_start,
3363         .next           = s_next,
3364         .stop           = s_stop,
3365         .show           = s_show,
3366 };
3367
3368 static struct trace_iterator *
3369 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3370 {
3371         struct trace_array *tr = inode->i_private;
3372         struct trace_iterator *iter;
3373         int cpu;
3374
3375         if (tracing_disabled)
3376                 return ERR_PTR(-ENODEV);
3377
3378         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3379         if (!iter)
3380                 return ERR_PTR(-ENOMEM);
3381
3382         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3383                                     GFP_KERNEL);
3384         if (!iter->buffer_iter)
3385                 goto release;
3386
3387         /*
3388          * We make a copy of the current tracer to avoid concurrent
3389          * changes on it while we are reading.
3390          */
3391         mutex_lock(&trace_types_lock);
3392         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3393         if (!iter->trace)
3394                 goto fail;
3395
3396         *iter->trace = *tr->current_trace;
3397
3398         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3399                 goto fail;
3400
3401         iter->tr = tr;
3402
3403 #ifdef CONFIG_TRACER_MAX_TRACE
3404         /* Currently only the top directory has a snapshot */
3405         if (tr->current_trace->print_max || snapshot)
3406                 iter->trace_buffer = &tr->max_buffer;
3407         else
3408 #endif
3409                 iter->trace_buffer = &tr->trace_buffer;
3410         iter->snapshot = snapshot;
3411         iter->pos = -1;
3412         iter->cpu_file = tracing_get_cpu(inode);
3413         mutex_init(&iter->mutex);
3414
3415         /* Notify the tracer early; before we stop tracing. */
3416         if (iter->trace && iter->trace->open)
3417                 iter->trace->open(iter);
3418
3419         /* Annotate start of buffers if we had overruns */
3420         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3421                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3422
3423         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3424         if (trace_clocks[tr->clock_id].in_ns)
3425                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3426
3427         /* stop the trace while dumping if we are not opening "snapshot" */
3428         if (!iter->snapshot)
3429                 tracing_stop_tr(tr);
3430
3431         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3432                 for_each_tracing_cpu(cpu) {
3433                         iter->buffer_iter[cpu] =
3434                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3435                 }
3436                 ring_buffer_read_prepare_sync();
3437                 for_each_tracing_cpu(cpu) {
3438                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3439                         tracing_iter_reset(iter, cpu);
3440                 }
3441         } else {
3442                 cpu = iter->cpu_file;
3443                 iter->buffer_iter[cpu] =
3444                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3445                 ring_buffer_read_prepare_sync();
3446                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3447                 tracing_iter_reset(iter, cpu);
3448         }
3449
3450         mutex_unlock(&trace_types_lock);
3451
3452         return iter;
3453
3454  fail:
3455         mutex_unlock(&trace_types_lock);
3456         kfree(iter->trace);
3457         kfree(iter->buffer_iter);
3458 release:
3459         seq_release_private(inode, file);
3460         return ERR_PTR(-ENOMEM);
3461 }
3462
3463 int tracing_open_generic(struct inode *inode, struct file *filp)
3464 {
3465         if (tracing_disabled)
3466                 return -ENODEV;
3467
3468         filp->private_data = inode->i_private;
3469         return 0;
3470 }
3471
3472 bool tracing_is_disabled(void)
3473 {
3474         return (tracing_disabled) ? true: false;
3475 }
3476
3477 /*
3478  * Open and update trace_array ref count.
3479  * Must have the current trace_array passed to it.
3480  */
3481 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3482 {
3483         struct trace_array *tr = inode->i_private;
3484
3485         if (tracing_disabled)
3486                 return -ENODEV;
3487
3488         if (trace_array_get(tr) < 0)
3489                 return -ENODEV;
3490
3491         filp->private_data = inode->i_private;
3492
3493         return 0;
3494 }
3495
3496 static int tracing_release(struct inode *inode, struct file *file)
3497 {
3498         struct trace_array *tr = inode->i_private;
3499         struct seq_file *m = file->private_data;
3500         struct trace_iterator *iter;
3501         int cpu;
3502
3503         if (!(file->f_mode & FMODE_READ)) {
3504                 trace_array_put(tr);
3505                 return 0;
3506         }
3507
3508         /* Writes do not use seq_file */
3509         iter = m->private;
3510         mutex_lock(&trace_types_lock);
3511
3512         for_each_tracing_cpu(cpu) {
3513                 if (iter->buffer_iter[cpu])
3514                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3515         }
3516
3517         if (iter->trace && iter->trace->close)
3518                 iter->trace->close(iter);
3519
3520         if (!iter->snapshot)
3521                 /* reenable tracing if it was previously enabled */
3522                 tracing_start_tr(tr);
3523
3524         __trace_array_put(tr);
3525
3526         mutex_unlock(&trace_types_lock);
3527
3528         mutex_destroy(&iter->mutex);
3529         free_cpumask_var(iter->started);
3530         kfree(iter->trace);
3531         kfree(iter->buffer_iter);
3532         seq_release_private(inode, file);
3533
3534         return 0;
3535 }
3536
3537 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3538 {
3539         struct trace_array *tr = inode->i_private;
3540
3541         trace_array_put(tr);
3542         return 0;
3543 }
3544
3545 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3546 {
3547         struct trace_array *tr = inode->i_private;
3548
3549         trace_array_put(tr);
3550
3551         return single_release(inode, file);
3552 }
3553
3554 static int tracing_open(struct inode *inode, struct file *file)
3555 {
3556         struct trace_array *tr = inode->i_private;
3557         struct trace_iterator *iter;
3558         int ret = 0;
3559
3560         if (trace_array_get(tr) < 0)
3561                 return -ENODEV;
3562
3563         /* If this file was open for write, then erase contents */
3564         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3565                 int cpu = tracing_get_cpu(inode);
3566
3567                 if (cpu == RING_BUFFER_ALL_CPUS)
3568                         tracing_reset_online_cpus(&tr->trace_buffer);
3569                 else
3570                         tracing_reset(&tr->trace_buffer, cpu);
3571         }
3572
3573         if (file->f_mode & FMODE_READ) {
3574                 iter = __tracing_open(inode, file, false);
3575                 if (IS_ERR(iter))
3576                         ret = PTR_ERR(iter);
3577                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3578                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3579         }
3580
3581         if (ret < 0)
3582                 trace_array_put(tr);
3583
3584         return ret;
3585 }
3586
3587 /*
3588  * Some tracers are not suitable for instance buffers.
3589  * A tracer is always available for the global array (toplevel)
3590  * or if it explicitly states that it is.
3591  */
3592 static bool
3593 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3594 {
3595         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3596 }
3597
3598 /* Find the next tracer that this trace array may use */
3599 static struct tracer *
3600 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3601 {
3602         while (t && !trace_ok_for_array(t, tr))
3603                 t = t->next;
3604
3605         return t;
3606 }
3607
3608 static void *
3609 t_next(struct seq_file *m, void *v, loff_t *pos)
3610 {
3611         struct trace_array *tr = m->private;
3612         struct tracer *t = v;
3613
3614         (*pos)++;
3615
3616         if (t)
3617                 t = get_tracer_for_array(tr, t->next);
3618
3619         return t;
3620 }
3621
3622 static void *t_start(struct seq_file *m, loff_t *pos)
3623 {
3624         struct trace_array *tr = m->private;
3625         struct tracer *t;
3626         loff_t l = 0;
3627
3628         mutex_lock(&trace_types_lock);
3629
3630         t = get_tracer_for_array(tr, trace_types);
3631         for (; t && l < *pos; t = t_next(m, t, &l))
3632                         ;
3633
3634         return t;
3635 }
3636
3637 static void t_stop(struct seq_file *m, void *p)
3638 {
3639         mutex_unlock(&trace_types_lock);
3640 }
3641
3642 static int t_show(struct seq_file *m, void *v)
3643 {
3644         struct tracer *t = v;
3645
3646         if (!t)
3647                 return 0;
3648
3649         seq_puts(m, t->name);
3650         if (t->next)
3651                 seq_putc(m, ' ');
3652         else
3653                 seq_putc(m, '\n');
3654
3655         return 0;
3656 }
3657
3658 static const struct seq_operations show_traces_seq_ops = {
3659         .start          = t_start,
3660         .next           = t_next,
3661         .stop           = t_stop,
3662         .show           = t_show,
3663 };
3664
3665 static int show_traces_open(struct inode *inode, struct file *file)
3666 {
3667         struct trace_array *tr = inode->i_private;
3668         struct seq_file *m;
3669         int ret;
3670
3671         if (tracing_disabled)
3672                 return -ENODEV;
3673
3674         ret = seq_open(file, &show_traces_seq_ops);
3675         if (ret)
3676                 return ret;
3677
3678         m = file->private_data;
3679         m->private = tr;
3680
3681         return 0;
3682 }
3683
3684 static ssize_t
3685 tracing_write_stub(struct file *filp, const char __user *ubuf,
3686                    size_t count, loff_t *ppos)
3687 {
3688         return count;
3689 }
3690
3691 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3692 {
3693         int ret;
3694
3695         if (file->f_mode & FMODE_READ)
3696                 ret = seq_lseek(file, offset, whence);
3697         else
3698                 file->f_pos = ret = 0;
3699
3700         return ret;
3701 }
3702
3703 static const struct file_operations tracing_fops = {
3704         .open           = tracing_open,
3705         .read           = seq_read,
3706         .write          = tracing_write_stub,
3707         .llseek         = tracing_lseek,
3708         .release        = tracing_release,
3709 };
3710
3711 static const struct file_operations show_traces_fops = {
3712         .open           = show_traces_open,
3713         .read           = seq_read,
3714         .release        = seq_release,
3715         .llseek         = seq_lseek,
3716 };
3717
3718 /*
3719  * The tracer itself will not take this lock, but still we want
3720  * to provide a consistent cpumask to user-space:
3721  */
3722 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3723
3724 /*
3725  * Temporary storage for the character representation of the
3726  * CPU bitmask (and one more byte for the newline):
3727  */
3728 static char mask_str[NR_CPUS + 1];
3729
3730 static ssize_t
3731 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3732                      size_t count, loff_t *ppos)
3733 {
3734         struct trace_array *tr = file_inode(filp)->i_private;
3735         int len;
3736
3737         mutex_lock(&tracing_cpumask_update_lock);
3738
3739         len = snprintf(mask_str, count, "%*pb\n",
3740                        cpumask_pr_args(tr->tracing_cpumask));
3741         if (len >= count) {
3742                 count = -EINVAL;
3743                 goto out_err;
3744         }
3745         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3746
3747 out_err:
3748         mutex_unlock(&tracing_cpumask_update_lock);
3749
3750         return count;
3751 }
3752
3753 static ssize_t
3754 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3755                       size_t count, loff_t *ppos)
3756 {
3757         struct trace_array *tr = file_inode(filp)->i_private;
3758         cpumask_var_t tracing_cpumask_new;
3759         int err, cpu;
3760
3761         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3762                 return -ENOMEM;
3763
3764         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3765         if (err)
3766                 goto err_unlock;
3767
3768         mutex_lock(&tracing_cpumask_update_lock);
3769
3770         local_irq_disable();
3771         arch_spin_lock(&tr->max_lock);
3772         for_each_tracing_cpu(cpu) {
3773                 /*
3774                  * Increase/decrease the disabled counter if we are
3775                  * about to flip a bit in the cpumask:
3776                  */
3777                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3778                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3779                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3780                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3781                 }
3782                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3783                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3784                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3785                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3786                 }
3787         }
3788         arch_spin_unlock(&tr->max_lock);
3789         local_irq_enable();
3790
3791         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3792
3793         mutex_unlock(&tracing_cpumask_update_lock);
3794         free_cpumask_var(tracing_cpumask_new);
3795
3796         return count;
3797
3798 err_unlock:
3799         free_cpumask_var(tracing_cpumask_new);
3800
3801         return err;
3802 }
3803
3804 static const struct file_operations tracing_cpumask_fops = {
3805         .open           = tracing_open_generic_tr,
3806         .read           = tracing_cpumask_read,
3807         .write          = tracing_cpumask_write,
3808         .release        = tracing_release_generic_tr,
3809         .llseek         = generic_file_llseek,
3810 };
3811
3812 static int tracing_trace_options_show(struct seq_file *m, void *v)
3813 {
3814         struct tracer_opt *trace_opts;
3815         struct trace_array *tr = m->private;
3816         u32 tracer_flags;
3817         int i;
3818
3819         mutex_lock(&trace_types_lock);
3820         tracer_flags = tr->current_trace->flags->val;
3821         trace_opts = tr->current_trace->flags->opts;
3822
3823         for (i = 0; trace_options[i]; i++) {
3824                 if (tr->trace_flags & (1 << i))
3825                         seq_printf(m, "%s\n", trace_options[i]);
3826                 else
3827                         seq_printf(m, "no%s\n", trace_options[i]);
3828         }
3829
3830         for (i = 0; trace_opts[i].name; i++) {
3831                 if (tracer_flags & trace_opts[i].bit)
3832                         seq_printf(m, "%s\n", trace_opts[i].name);
3833                 else
3834                         seq_printf(m, "no%s\n", trace_opts[i].name);
3835         }
3836         mutex_unlock(&trace_types_lock);
3837
3838         return 0;
3839 }
3840
3841 static int __set_tracer_option(struct trace_array *tr,
3842                                struct tracer_flags *tracer_flags,
3843                                struct tracer_opt *opts, int neg)
3844 {
3845         struct tracer *trace = tracer_flags->trace;
3846         int ret;
3847
3848         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3849         if (ret)
3850                 return ret;
3851
3852         if (neg)
3853                 tracer_flags->val &= ~opts->bit;
3854         else
3855                 tracer_flags->val |= opts->bit;
3856         return 0;
3857 }
3858
3859 /* Try to assign a tracer specific option */
3860 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3861 {
3862         struct tracer *trace = tr->current_trace;
3863         struct tracer_flags *tracer_flags = trace->flags;
3864         struct tracer_opt *opts = NULL;
3865         int i;
3866
3867         for (i = 0; tracer_flags->opts[i].name; i++) {
3868                 opts = &tracer_flags->opts[i];
3869
3870                 if (strcmp(cmp, opts->name) == 0)
3871                         return __set_tracer_option(tr, trace->flags, opts, neg);
3872         }
3873
3874         return -EINVAL;
3875 }
3876
3877 /* Some tracers require overwrite to stay enabled */
3878 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3879 {
3880         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3881                 return -1;
3882
3883         return 0;
3884 }
3885
3886 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3887 {
3888         /* do nothing if flag is already set */
3889         if (!!(tr->trace_flags & mask) == !!enabled)
3890                 return 0;
3891
3892         /* Give the tracer a chance to approve the change */
3893         if (tr->current_trace->flag_changed)
3894                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3895                         return -EINVAL;
3896
3897         if (enabled)
3898                 tr->trace_flags |= mask;
3899         else
3900                 tr->trace_flags &= ~mask;
3901
3902         if (mask == TRACE_ITER_RECORD_CMD)
3903                 trace_event_enable_cmd_record(enabled);
3904
3905         if (mask == TRACE_ITER_EVENT_FORK)
3906                 trace_event_follow_fork(tr, enabled);
3907
3908         if (mask == TRACE_ITER_OVERWRITE) {
3909                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3910 #ifdef CONFIG_TRACER_MAX_TRACE
3911                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3912 #endif
3913         }
3914
3915         if (mask == TRACE_ITER_PRINTK) {
3916                 trace_printk_start_stop_comm(enabled);
3917                 trace_printk_control(enabled);
3918         }
3919
3920         return 0;
3921 }
3922
3923 static int trace_set_options(struct trace_array *tr, char *option)
3924 {
3925         char *cmp;
3926         int neg = 0;
3927         int ret = -ENODEV;
3928         int i;
3929         size_t orig_len = strlen(option);
3930
3931         cmp = strstrip(option);
3932
3933         if (strncmp(cmp, "no", 2) == 0) {
3934                 neg = 1;
3935                 cmp += 2;
3936         }
3937
3938         mutex_lock(&trace_types_lock);
3939
3940         for (i = 0; trace_options[i]; i++) {
3941                 if (strcmp(cmp, trace_options[i]) == 0) {
3942                         ret = set_tracer_flag(tr, 1 << i, !neg);
3943                         break;
3944                 }
3945         }
3946
3947         /* If no option could be set, test the specific tracer options */
3948         if (!trace_options[i])
3949                 ret = set_tracer_option(tr, cmp, neg);
3950
3951         mutex_unlock(&trace_types_lock);
3952
3953         /*
3954          * If the first trailing whitespace is replaced with '\0' by strstrip,
3955          * turn it back into a space.
3956          */
3957         if (orig_len > strlen(option))
3958                 option[strlen(option)] = ' ';
3959
3960         return ret;
3961 }
3962
3963 static void __init apply_trace_boot_options(void)
3964 {
3965         char *buf = trace_boot_options_buf;
3966         char *option;
3967
3968         while (true) {
3969                 option = strsep(&buf, ",");
3970
3971                 if (!option)
3972                         break;
3973
3974                 if (*option)
3975                         trace_set_options(&global_trace, option);
3976
3977                 /* Put back the comma to allow this to be called again */
3978                 if (buf)
3979                         *(buf - 1) = ',';
3980         }
3981 }
3982
3983 static ssize_t
3984 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3985                         size_t cnt, loff_t *ppos)
3986 {
3987         struct seq_file *m = filp->private_data;
3988         struct trace_array *tr = m->private;
3989         char buf[64];
3990         int ret;
3991
3992         if (cnt >= sizeof(buf))
3993                 return -EINVAL;
3994
3995         if (copy_from_user(buf, ubuf, cnt))
3996                 return -EFAULT;
3997
3998         buf[cnt] = 0;
3999
4000         ret = trace_set_options(tr, buf);
4001         if (ret < 0)
4002                 return ret;
4003
4004         *ppos += cnt;
4005
4006         return cnt;
4007 }
4008
4009 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4010 {
4011         struct trace_array *tr = inode->i_private;
4012         int ret;
4013
4014         if (tracing_disabled)
4015                 return -ENODEV;
4016
4017         if (trace_array_get(tr) < 0)
4018                 return -ENODEV;
4019
4020         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4021         if (ret < 0)
4022                 trace_array_put(tr);
4023
4024         return ret;
4025 }
4026
4027 static const struct file_operations tracing_iter_fops = {
4028         .open           = tracing_trace_options_open,
4029         .read           = seq_read,
4030         .llseek         = seq_lseek,
4031         .release        = tracing_single_release_tr,
4032         .write          = tracing_trace_options_write,
4033 };
4034
4035 static const char readme_msg[] =
4036         "tracing mini-HOWTO:\n\n"
4037         "# echo 0 > tracing_on : quick way to disable tracing\n"
4038         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4039         " Important files:\n"
4040         "  trace\t\t\t- The static contents of the buffer\n"
4041         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4042         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4043         "  current_tracer\t- function and latency tracers\n"
4044         "  available_tracers\t- list of configured tracers for current_tracer\n"
4045         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4046         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4047         "  trace_clock\t\t-change the clock used to order events\n"
4048         "       local:   Per cpu clock but may not be synced across CPUs\n"
4049         "      global:   Synced across CPUs but slows tracing down.\n"
4050         "     counter:   Not a clock, but just an increment\n"
4051         "      uptime:   Jiffy counter from time of boot\n"
4052         "        perf:   Same clock that perf events use\n"
4053 #ifdef CONFIG_X86_64
4054         "     x86-tsc:   TSC cycle counter\n"
4055 #endif
4056         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4057         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4058         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4059         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4060         "\t\t\t  Remove sub-buffer with rmdir\n"
4061         "  trace_options\t\t- Set format or modify how tracing happens\n"
4062         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4063         "\t\t\t  option name\n"
4064         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4065 #ifdef CONFIG_DYNAMIC_FTRACE
4066         "\n  available_filter_functions - list of functions that can be filtered on\n"
4067         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4068         "\t\t\t  functions\n"
4069         "\t     accepts: func_full_name or glob-matching-pattern\n"
4070         "\t     modules: Can select a group via module\n"
4071         "\t      Format: :mod:<module-name>\n"
4072         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4073         "\t    triggers: a command to perform when function is hit\n"
4074         "\t      Format: <function>:<trigger>[:count]\n"
4075         "\t     trigger: traceon, traceoff\n"
4076         "\t\t      enable_event:<system>:<event>\n"
4077         "\t\t      disable_event:<system>:<event>\n"
4078 #ifdef CONFIG_STACKTRACE
4079         "\t\t      stacktrace\n"
4080 #endif
4081 #ifdef CONFIG_TRACER_SNAPSHOT
4082         "\t\t      snapshot\n"
4083 #endif
4084         "\t\t      dump\n"
4085         "\t\t      cpudump\n"
4086         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4087         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4088         "\t     The first one will disable tracing every time do_fault is hit\n"
4089         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4090         "\t       The first time do trap is hit and it disables tracing, the\n"
4091         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4092         "\t       the counter will not decrement. It only decrements when the\n"
4093         "\t       trigger did work\n"
4094         "\t     To remove trigger without count:\n"
4095         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4096         "\t     To remove trigger with a count:\n"
4097         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4098         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4099         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4100         "\t    modules: Can select a group via module command :mod:\n"
4101         "\t    Does not accept triggers\n"
4102 #endif /* CONFIG_DYNAMIC_FTRACE */
4103 #ifdef CONFIG_FUNCTION_TRACER
4104         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4105         "\t\t    (function)\n"
4106 #endif
4107 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4108         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4109         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4110         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4111 #endif
4112 #ifdef CONFIG_TRACER_SNAPSHOT
4113         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4114         "\t\t\t  snapshot buffer. Read the contents for more\n"
4115         "\t\t\t  information\n"
4116 #endif
4117 #ifdef CONFIG_STACK_TRACER
4118         "  stack_trace\t\t- Shows the max stack trace when active\n"
4119         "  stack_max_size\t- Shows current max stack size that was traced\n"
4120         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4121         "\t\t\t  new trace)\n"
4122 #ifdef CONFIG_DYNAMIC_FTRACE
4123         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4124         "\t\t\t  traces\n"
4125 #endif
4126 #endif /* CONFIG_STACK_TRACER */
4127 #ifdef CONFIG_KPROBE_EVENT
4128         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4129         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4130 #endif
4131 #ifdef CONFIG_UPROBE_EVENT
4132         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4133         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4134 #endif
4135 #if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
4136         "\t  accepts: event-definitions (one definition per line)\n"
4137         "\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4138         "\t           -:[<group>/]<event>\n"
4139 #ifdef CONFIG_KPROBE_EVENT
4140         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4141 #endif
4142 #ifdef CONFIG_UPROBE_EVENT
4143         "\t    place: <path>:<offset>\n"
4144 #endif
4145         "\t     args: <name>=fetcharg[:type]\n"
4146         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4147         "\t           $stack<index>, $stack, $retval, $comm\n"
4148         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4149         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4150 #endif
4151         "  events/\t\t- Directory containing all trace event subsystems:\n"
4152         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4153         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4154         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4155         "\t\t\t  events\n"
4156         "      filter\t\t- If set, only events passing filter are traced\n"
4157         "  events/<system>/<event>/\t- Directory containing control files for\n"
4158         "\t\t\t  <event>:\n"
4159         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4160         "      filter\t\t- If set, only events passing filter are traced\n"
4161         "      trigger\t\t- If set, a command to perform when event is hit\n"
4162         "\t    Format: <trigger>[:count][if <filter>]\n"
4163         "\t   trigger: traceon, traceoff\n"
4164         "\t            enable_event:<system>:<event>\n"
4165         "\t            disable_event:<system>:<event>\n"
4166 #ifdef CONFIG_HIST_TRIGGERS
4167         "\t            enable_hist:<system>:<event>\n"
4168         "\t            disable_hist:<system>:<event>\n"
4169 #endif
4170 #ifdef CONFIG_STACKTRACE
4171         "\t\t    stacktrace\n"
4172 #endif
4173 #ifdef CONFIG_TRACER_SNAPSHOT
4174         "\t\t    snapshot\n"
4175 #endif
4176 #ifdef CONFIG_HIST_TRIGGERS
4177         "\t\t    hist (see below)\n"
4178 #endif
4179         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4180         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4181         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4182         "\t                  events/block/block_unplug/trigger\n"
4183         "\t   The first disables tracing every time block_unplug is hit.\n"
4184         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4185         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4186         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4187         "\t   Like function triggers, the counter is only decremented if it\n"
4188         "\t    enabled or disabled tracing.\n"
4189         "\t   To remove a trigger without a count:\n"
4190         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4191         "\t   To remove a trigger with a count:\n"
4192         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4193         "\t   Filters can be ignored when removing a trigger.\n"
4194 #ifdef CONFIG_HIST_TRIGGERS
4195         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4196         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4197         "\t            [:values=<field1[,field2,...]>]\n"
4198         "\t            [:sort=<field1[,field2,...]>]\n"
4199         "\t            [:size=#entries]\n"
4200         "\t            [:pause][:continue][:clear]\n"
4201         "\t            [:name=histname1]\n"
4202         "\t            [if <filter>]\n\n"
4203         "\t    When a matching event is hit, an entry is added to a hash\n"
4204         "\t    table using the key(s) and value(s) named, and the value of a\n"
4205         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4206         "\t    correspond to fields in the event's format description.  Keys\n"
4207         "\t    can be any field, or the special string 'stacktrace'.\n"
4208         "\t    Compound keys consisting of up to two fields can be specified\n"
4209         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4210         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4211         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4212         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4213         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4214         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4215         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4216         "\t    its histogram data will be shared with other triggers of the\n"
4217         "\t    same name, and trigger hits will update this common data.\n\n"
4218         "\t    Reading the 'hist' file for the event will dump the hash\n"
4219         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4220         "\t    triggers attached to an event, there will be a table for each\n"
4221         "\t    trigger in the output.  The table displayed for a named\n"
4222         "\t    trigger will be the same as any other instance having the\n"
4223         "\t    same name.  The default format used to display a given field\n"
4224         "\t    can be modified by appending any of the following modifiers\n"
4225         "\t    to the field name, as applicable:\n\n"
4226         "\t            .hex        display a number as a hex value\n"
4227         "\t            .sym        display an address as a symbol\n"
4228         "\t            .sym-offset display an address as a symbol and offset\n"
4229         "\t            .execname   display a common_pid as a program name\n"
4230         "\t            .syscall    display a syscall id as a syscall name\n\n"
4231         "\t            .log2       display log2 value rather than raw number\n\n"
4232         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4233         "\t    trigger or to start a hist trigger but not log any events\n"
4234         "\t    until told to do so.  'continue' can be used to start or\n"
4235         "\t    restart a paused hist trigger.\n\n"
4236         "\t    The 'clear' parameter will clear the contents of a running\n"
4237         "\t    hist trigger and leave its current paused/active state\n"
4238         "\t    unchanged.\n\n"
4239         "\t    The enable_hist and disable_hist triggers can be used to\n"
4240         "\t    have one event conditionally start and stop another event's\n"
4241         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4242         "\t    the enable_event and disable_event triggers.\n"
4243 #endif
4244 ;
4245
4246 static ssize_t
4247 tracing_readme_read(struct file *filp, char __user *ubuf,
4248                        size_t cnt, loff_t *ppos)
4249 {
4250         return simple_read_from_buffer(ubuf, cnt, ppos,
4251                                         readme_msg, strlen(readme_msg));
4252 }
4253
4254 static const struct file_operations tracing_readme_fops = {
4255         .open           = tracing_open_generic,
4256         .read           = tracing_readme_read,
4257         .llseek         = generic_file_llseek,
4258 };
4259
4260 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4261 {
4262         unsigned int *ptr = v;
4263
4264         if (*pos || m->count)
4265                 ptr++;
4266
4267         (*pos)++;
4268
4269         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4270              ptr++) {
4271                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4272                         continue;
4273
4274                 return ptr;
4275         }
4276
4277         return NULL;
4278 }
4279
4280 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4281 {
4282         void *v;
4283         loff_t l = 0;
4284
4285         preempt_disable();
4286         arch_spin_lock(&trace_cmdline_lock);
4287
4288         v = &savedcmd->map_cmdline_to_pid[0];
4289         while (l <= *pos) {
4290                 v = saved_cmdlines_next(m, v, &l);
4291                 if (!v)
4292                         return NULL;
4293         }
4294
4295         return v;
4296 }
4297
4298 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4299 {
4300         arch_spin_unlock(&trace_cmdline_lock);
4301         preempt_enable();
4302 }
4303
4304 static int saved_cmdlines_show(struct seq_file *m, void *v)
4305 {
4306         char buf[TASK_COMM_LEN];
4307         unsigned int *pid = v;
4308
4309         __trace_find_cmdline(*pid, buf);
4310         seq_printf(m, "%d %s\n", *pid, buf);
4311         return 0;
4312 }
4313
4314 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4315         .start          = saved_cmdlines_start,
4316         .next           = saved_cmdlines_next,
4317         .stop           = saved_cmdlines_stop,
4318         .show           = saved_cmdlines_show,
4319 };
4320
4321 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4322 {
4323         if (tracing_disabled)
4324                 return -ENODEV;
4325
4326         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4327 }
4328
4329 static const struct file_operations tracing_saved_cmdlines_fops = {
4330         .open           = tracing_saved_cmdlines_open,
4331         .read           = seq_read,
4332         .llseek         = seq_lseek,
4333         .release        = seq_release,
4334 };
4335
4336 static ssize_t
4337 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4338                                  size_t cnt, loff_t *ppos)
4339 {
4340         char buf[64];
4341         int r;
4342
4343         arch_spin_lock(&trace_cmdline_lock);
4344         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4345         arch_spin_unlock(&trace_cmdline_lock);
4346
4347         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4348 }
4349
4350 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4351 {
4352         kfree(s->saved_cmdlines);
4353         kfree(s->map_cmdline_to_pid);
4354         kfree(s);
4355 }
4356
4357 static int tracing_resize_saved_cmdlines(unsigned int val)
4358 {
4359         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4360
4361         s = kmalloc(sizeof(*s), GFP_KERNEL);
4362         if (!s)
4363                 return -ENOMEM;
4364
4365         if (allocate_cmdlines_buffer(val, s) < 0) {
4366                 kfree(s);
4367                 return -ENOMEM;
4368         }
4369
4370         arch_spin_lock(&trace_cmdline_lock);
4371         savedcmd_temp = savedcmd;
4372         savedcmd = s;
4373         arch_spin_unlock(&trace_cmdline_lock);
4374         free_saved_cmdlines_buffer(savedcmd_temp);
4375
4376         return 0;
4377 }
4378
4379 static ssize_t
4380 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4381                                   size_t cnt, loff_t *ppos)
4382 {
4383         unsigned long val;
4384         int ret;
4385
4386         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4387         if (ret)
4388                 return ret;
4389
4390         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4391         if (!val || val > PID_MAX_DEFAULT)
4392                 return -EINVAL;
4393
4394         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4395         if (ret < 0)
4396                 return ret;
4397
4398         *ppos += cnt;
4399
4400         return cnt;
4401 }
4402
4403 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4404         .open           = tracing_open_generic,
4405         .read           = tracing_saved_cmdlines_size_read,
4406         .write          = tracing_saved_cmdlines_size_write,
4407 };
4408
4409 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4410 static union trace_enum_map_item *
4411 update_enum_map(union trace_enum_map_item *ptr)
4412 {
4413         if (!ptr->map.enum_string) {
4414                 if (ptr->tail.next) {
4415                         ptr = ptr->tail.next;
4416                         /* Set ptr to the next real item (skip head) */
4417                         ptr++;
4418                 } else
4419                         return NULL;
4420         }
4421         return ptr;
4422 }
4423
4424 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4425 {
4426         union trace_enum_map_item *ptr = v;
4427
4428         /*
4429          * Paranoid! If ptr points to end, we don't want to increment past it.
4430          * This really should never happen.
4431          */
4432         ptr = update_enum_map(ptr);
4433         if (WARN_ON_ONCE(!ptr))
4434                 return NULL;
4435
4436         ptr++;
4437
4438         (*pos)++;
4439
4440         ptr = update_enum_map(ptr);
4441
4442         return ptr;
4443 }
4444
4445 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4446 {
4447         union trace_enum_map_item *v;
4448         loff_t l = 0;
4449
4450         mutex_lock(&trace_enum_mutex);
4451
4452         v = trace_enum_maps;
4453         if (v)
4454                 v++;
4455
4456         while (v && l < *pos) {
4457                 v = enum_map_next(m, v, &l);
4458         }
4459
4460         return v;
4461 }
4462
4463 static void enum_map_stop(struct seq_file *m, void *v)
4464 {
4465         mutex_unlock(&trace_enum_mutex);
4466 }
4467
4468 static int enum_map_show(struct seq_file *m, void *v)
4469 {
4470         union trace_enum_map_item *ptr = v;
4471
4472         seq_printf(m, "%s %ld (%s)\n",
4473                    ptr->map.enum_string, ptr->map.enum_value,
4474                    ptr->map.system);
4475
4476         return 0;
4477 }
4478
4479 static const struct seq_operations tracing_enum_map_seq_ops = {
4480         .start          = enum_map_start,
4481         .next           = enum_map_next,
4482         .stop           = enum_map_stop,
4483         .show           = enum_map_show,
4484 };
4485
4486 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4487 {
4488         if (tracing_disabled)
4489                 return -ENODEV;
4490
4491         return seq_open(filp, &tracing_enum_map_seq_ops);
4492 }
4493
4494 static const struct file_operations tracing_enum_map_fops = {
4495         .open           = tracing_enum_map_open,
4496         .read           = seq_read,
4497         .llseek         = seq_lseek,
4498         .release        = seq_release,
4499 };
4500
4501 static inline union trace_enum_map_item *
4502 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4503 {
4504         /* Return tail of array given the head */
4505         return ptr + ptr->head.length + 1;
4506 }
4507
4508 static void
4509 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4510                            int len)
4511 {
4512         struct trace_enum_map **stop;
4513         struct trace_enum_map **map;
4514         union trace_enum_map_item *map_array;
4515         union trace_enum_map_item *ptr;
4516
4517         stop = start + len;
4518
4519         /*
4520          * The trace_enum_maps contains the map plus a head and tail item,
4521          * where the head holds the module and length of array, and the
4522          * tail holds a pointer to the next list.
4523          */
4524         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4525         if (!map_array) {
4526                 pr_warn("Unable to allocate trace enum mapping\n");
4527                 return;
4528         }
4529
4530         mutex_lock(&trace_enum_mutex);
4531
4532         if (!trace_enum_maps)
4533                 trace_enum_maps = map_array;
4534         else {
4535                 ptr = trace_enum_maps;
4536                 for (;;) {
4537                         ptr = trace_enum_jmp_to_tail(ptr);
4538                         if (!ptr->tail.next)
4539                                 break;
4540                         ptr = ptr->tail.next;
4541
4542                 }
4543                 ptr->tail.next = map_array;
4544         }
4545         map_array->head.mod = mod;
4546         map_array->head.length = len;
4547         map_array++;
4548
4549         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4550                 map_array->map = **map;
4551                 map_array++;
4552         }
4553         memset(map_array, 0, sizeof(*map_array));
4554
4555         mutex_unlock(&trace_enum_mutex);
4556 }
4557
4558 static void trace_create_enum_file(struct dentry *d_tracer)
4559 {
4560         trace_create_file("enum_map", 0444, d_tracer,
4561                           NULL, &tracing_enum_map_fops);
4562 }
4563
4564 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4565 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4566 static inline void trace_insert_enum_map_file(struct module *mod,
4567                               struct trace_enum_map **start, int len) { }
4568 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4569
4570 static void trace_insert_enum_map(struct module *mod,
4571                                   struct trace_enum_map **start, int len)
4572 {
4573         struct trace_enum_map **map;
4574
4575         if (len <= 0)
4576                 return;
4577
4578         map = start;
4579
4580         trace_event_enum_update(map, len);
4581
4582         trace_insert_enum_map_file(mod, start, len);
4583 }
4584
4585 static ssize_t
4586 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4587                        size_t cnt, loff_t *ppos)
4588 {
4589         struct trace_array *tr = filp->private_data;
4590         char buf[MAX_TRACER_SIZE+2];
4591         int r;
4592
4593         mutex_lock(&trace_types_lock);
4594         r = sprintf(buf, "%s\n", tr->current_trace->name);
4595         mutex_unlock(&trace_types_lock);
4596
4597         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4598 }
4599
4600 int tracer_init(struct tracer *t, struct trace_array *tr)
4601 {
4602         tracing_reset_online_cpus(&tr->trace_buffer);
4603         return t->init(tr);
4604 }
4605
4606 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4607 {
4608         int cpu;
4609
4610         for_each_tracing_cpu(cpu)
4611                 per_cpu_ptr(buf->data, cpu)->entries = val;
4612 }
4613
4614 #ifdef CONFIG_TRACER_MAX_TRACE
4615 /* resize @tr's buffer to the size of @size_tr's entries */
4616 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4617                                         struct trace_buffer *size_buf, int cpu_id)
4618 {
4619         int cpu, ret = 0;
4620
4621         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4622                 for_each_tracing_cpu(cpu) {
4623                         ret = ring_buffer_resize(trace_buf->buffer,
4624                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4625                         if (ret < 0)
4626                                 break;
4627                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4628                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4629                 }
4630         } else {
4631                 ret = ring_buffer_resize(trace_buf->buffer,
4632                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4633                 if (ret == 0)
4634                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4635                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4636         }
4637
4638         return ret;
4639 }
4640 #endif /* CONFIG_TRACER_MAX_TRACE */
4641
4642 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4643                                         unsigned long size, int cpu)
4644 {
4645         int ret;
4646
4647         /*
4648          * If kernel or user changes the size of the ring buffer
4649          * we use the size that was given, and we can forget about
4650          * expanding it later.
4651          */
4652         ring_buffer_expanded = true;
4653
4654         /* May be called before buffers are initialized */
4655         if (!tr->trace_buffer.buffer)
4656                 return 0;
4657
4658         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4659         if (ret < 0)
4660                 return ret;
4661
4662 #ifdef CONFIG_TRACER_MAX_TRACE
4663         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4664             !tr->current_trace->use_max_tr)
4665                 goto out;
4666
4667         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4668         if (ret < 0) {
4669                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4670                                                      &tr->trace_buffer, cpu);
4671                 if (r < 0) {
4672                         /*
4673                          * AARGH! We are left with different
4674                          * size max buffer!!!!
4675                          * The max buffer is our "snapshot" buffer.
4676                          * When a tracer needs a snapshot (one of the
4677                          * latency tracers), it swaps the max buffer
4678                          * with the saved snap shot. We succeeded to
4679                          * update the size of the main buffer, but failed to
4680                          * update the size of the max buffer. But when we tried
4681                          * to reset the main buffer to the original size, we
4682                          * failed there too. This is very unlikely to
4683                          * happen, but if it does, warn and kill all
4684                          * tracing.
4685                          */
4686                         WARN_ON(1);
4687                         tracing_disabled = 1;
4688                 }
4689                 return ret;
4690         }
4691
4692         if (cpu == RING_BUFFER_ALL_CPUS)
4693                 set_buffer_entries(&tr->max_buffer, size);
4694         else
4695                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4696
4697  out:
4698 #endif /* CONFIG_TRACER_MAX_TRACE */
4699
4700         if (cpu == RING_BUFFER_ALL_CPUS)
4701                 set_buffer_entries(&tr->trace_buffer, size);
4702         else
4703                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4704
4705         return ret;
4706 }
4707
4708 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4709                                           unsigned long size, int cpu_id)
4710 {
4711         int ret = size;
4712
4713         mutex_lock(&trace_types_lock);
4714
4715         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4716                 /* make sure, this cpu is enabled in the mask */
4717                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4718                         ret = -EINVAL;
4719                         goto out;
4720                 }
4721         }
4722
4723         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4724         if (ret < 0)
4725                 ret = -ENOMEM;
4726
4727 out:
4728         mutex_unlock(&trace_types_lock);
4729
4730         return ret;
4731 }
4732
4733
4734 /**
4735  * tracing_update_buffers - used by tracing facility to expand ring buffers
4736  *
4737  * To save on memory when the tracing is never used on a system with it
4738  * configured in. The ring buffers are set to a minimum size. But once
4739  * a user starts to use the tracing facility, then they need to grow
4740  * to their default size.
4741  *
4742  * This function is to be called when a tracer is about to be used.
4743  */
4744 int tracing_update_buffers(void)
4745 {
4746         int ret = 0;
4747
4748         mutex_lock(&trace_types_lock);
4749         if (!ring_buffer_expanded)
4750                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4751                                                 RING_BUFFER_ALL_CPUS);
4752         mutex_unlock(&trace_types_lock);
4753
4754         return ret;
4755 }
4756
4757 struct trace_option_dentry;
4758
4759 static void
4760 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4761
4762 /*
4763  * Used to clear out the tracer before deletion of an instance.
4764  * Must have trace_types_lock held.
4765  */
4766 static void tracing_set_nop(struct trace_array *tr)
4767 {
4768         if (tr->current_trace == &nop_trace)
4769                 return;
4770         
4771         tr->current_trace->enabled--;
4772
4773         if (tr->current_trace->reset)
4774                 tr->current_trace->reset(tr);
4775
4776         tr->current_trace = &nop_trace;
4777 }
4778
4779 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4780 {
4781         /* Only enable if the directory has been created already. */
4782         if (!tr->dir)
4783                 return;
4784
4785         create_trace_option_files(tr, t);
4786 }
4787
4788 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4789 {
4790         struct tracer *t;
4791 #ifdef CONFIG_TRACER_MAX_TRACE
4792         bool had_max_tr;
4793 #endif
4794         int ret = 0;
4795
4796         mutex_lock(&trace_types_lock);
4797
4798         if (!ring_buffer_expanded) {
4799                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4800                                                 RING_BUFFER_ALL_CPUS);
4801                 if (ret < 0)
4802                         goto out;
4803                 ret = 0;
4804         }
4805
4806         for (t = trace_types; t; t = t->next) {
4807                 if (strcmp(t->name, buf) == 0)
4808                         break;
4809         }
4810         if (!t) {
4811                 ret = -EINVAL;
4812                 goto out;
4813         }
4814         if (t == tr->current_trace)
4815                 goto out;
4816
4817         /* Some tracers are only allowed for the top level buffer */
4818         if (!trace_ok_for_array(t, tr)) {
4819                 ret = -EINVAL;
4820                 goto out;
4821         }
4822
4823         /* If trace pipe files are being read, we can't change the tracer */
4824         if (tr->current_trace->ref) {
4825                 ret = -EBUSY;
4826                 goto out;
4827         }
4828
4829         trace_branch_disable();
4830
4831         tr->current_trace->enabled--;
4832
4833         if (tr->current_trace->reset)
4834                 tr->current_trace->reset(tr);
4835
4836         /* Current trace needs to be nop_trace before synchronize_sched */
4837         tr->current_trace = &nop_trace;
4838
4839 #ifdef CONFIG_TRACER_MAX_TRACE
4840         had_max_tr = tr->allocated_snapshot;
4841
4842         if (had_max_tr && !t->use_max_tr) {
4843                 /*
4844                  * We need to make sure that the update_max_tr sees that
4845                  * current_trace changed to nop_trace to keep it from
4846                  * swapping the buffers after we resize it.
4847                  * The update_max_tr is called from interrupts disabled
4848                  * so a synchronized_sched() is sufficient.
4849                  */
4850                 synchronize_sched();
4851                 free_snapshot(tr);
4852         }
4853 #endif
4854
4855 #ifdef CONFIG_TRACER_MAX_TRACE
4856         if (t->use_max_tr && !had_max_tr) {
4857                 ret = alloc_snapshot(tr);
4858                 if (ret < 0)
4859                         goto out;
4860         }
4861 #endif
4862
4863         if (t->init) {
4864                 ret = tracer_init(t, tr);
4865                 if (ret)
4866                         goto out;
4867         }
4868
4869         tr->current_trace = t;
4870         tr->current_trace->enabled++;
4871         trace_branch_enable(tr);
4872  out:
4873         mutex_unlock(&trace_types_lock);
4874
4875         return ret;
4876 }
4877
4878 static ssize_t
4879 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4880                         size_t cnt, loff_t *ppos)
4881 {
4882         struct trace_array *tr = filp->private_data;
4883         char buf[MAX_TRACER_SIZE+1];
4884         int i;
4885         size_t ret;
4886         int err;
4887
4888         ret = cnt;
4889
4890         if (cnt > MAX_TRACER_SIZE)
4891                 cnt = MAX_TRACER_SIZE;
4892
4893         if (copy_from_user(buf, ubuf, cnt))
4894                 return -EFAULT;
4895
4896         buf[cnt] = 0;
4897
4898         /* strip ending whitespace. */
4899         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4900                 buf[i] = 0;
4901
4902         err = tracing_set_tracer(tr, buf);
4903         if (err)
4904                 return err;
4905
4906         *ppos += ret;
4907
4908         return ret;
4909 }
4910
4911 static ssize_t
4912 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4913                    size_t cnt, loff_t *ppos)
4914 {
4915         char buf[64];
4916         int r;
4917
4918         r = snprintf(buf, sizeof(buf), "%ld\n",
4919                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4920         if (r > sizeof(buf))
4921                 r = sizeof(buf);
4922         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4923 }
4924
4925 static ssize_t
4926 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4927                     size_t cnt, loff_t *ppos)
4928 {
4929         unsigned long val;
4930         int ret;
4931
4932         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4933         if (ret)
4934                 return ret;
4935
4936         *ptr = val * 1000;
4937
4938         return cnt;
4939 }
4940
4941 static ssize_t
4942 tracing_thresh_read(struct file *filp, char __user *ubuf,
4943                     size_t cnt, loff_t *ppos)
4944 {
4945         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4946 }
4947
4948 static ssize_t
4949 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4950                      size_t cnt, loff_t *ppos)
4951 {
4952         struct trace_array *tr = filp->private_data;
4953         int ret;
4954
4955         mutex_lock(&trace_types_lock);
4956         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4957         if (ret < 0)
4958                 goto out;
4959
4960         if (tr->current_trace->update_thresh) {
4961                 ret = tr->current_trace->update_thresh(tr);
4962                 if (ret < 0)
4963                         goto out;
4964         }
4965
4966         ret = cnt;
4967 out:
4968         mutex_unlock(&trace_types_lock);
4969
4970         return ret;
4971 }
4972
4973 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
4974
4975 static ssize_t
4976 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4977                      size_t cnt, loff_t *ppos)
4978 {
4979         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4980 }
4981
4982 static ssize_t
4983 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4984                       size_t cnt, loff_t *ppos)
4985 {
4986         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4987 }
4988
4989 #endif
4990
4991 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4992 {
4993         struct trace_array *tr = inode->i_private;
4994         struct trace_iterator *iter;
4995         int ret = 0;
4996
4997         if (tracing_disabled)
4998                 return -ENODEV;
4999
5000         if (trace_array_get(tr) < 0)
5001                 return -ENODEV;
5002
5003         mutex_lock(&trace_types_lock);
5004
5005         /* create a buffer to store the information to pass to userspace */
5006         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5007         if (!iter) {
5008                 ret = -ENOMEM;
5009                 __trace_array_put(tr);
5010                 goto out;
5011         }
5012
5013         trace_seq_init(&iter->seq);
5014         iter->trace = tr->current_trace;
5015
5016         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5017                 ret = -ENOMEM;
5018                 goto fail;
5019         }
5020
5021         /* trace pipe does not show start of buffer */
5022         cpumask_setall(iter->started);
5023
5024         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5025                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5026
5027         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5028         if (trace_clocks[tr->clock_id].in_ns)
5029                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5030
5031         iter->tr = tr;
5032         iter->trace_buffer = &tr->trace_buffer;
5033         iter->cpu_file = tracing_get_cpu(inode);
5034         mutex_init(&iter->mutex);
5035         filp->private_data = iter;
5036
5037         if (iter->trace->pipe_open)
5038                 iter->trace->pipe_open(iter);
5039
5040         nonseekable_open(inode, filp);
5041
5042         tr->current_trace->ref++;
5043 out:
5044         mutex_unlock(&trace_types_lock);
5045         return ret;
5046
5047 fail:
5048         kfree(iter->trace);
5049         kfree(iter);
5050         __trace_array_put(tr);
5051         mutex_unlock(&trace_types_lock);
5052         return ret;
5053 }
5054
5055 static int tracing_release_pipe(struct inode *inode, struct file *file)
5056 {
5057         struct trace_iterator *iter = file->private_data;
5058         struct trace_array *tr = inode->i_private;
5059
5060         mutex_lock(&trace_types_lock);
5061
5062         tr->current_trace->ref--;
5063
5064         if (iter->trace->pipe_close)
5065                 iter->trace->pipe_close(iter);
5066
5067         mutex_unlock(&trace_types_lock);
5068
5069         free_cpumask_var(iter->started);
5070         mutex_destroy(&iter->mutex);
5071         kfree(iter);
5072
5073         trace_array_put(tr);
5074
5075         return 0;
5076 }
5077
5078 static unsigned int
5079 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5080 {
5081         struct trace_array *tr = iter->tr;
5082
5083         /* Iterators are static, they should be filled or empty */
5084         if (trace_buffer_iter(iter, iter->cpu_file))
5085                 return POLLIN | POLLRDNORM;
5086
5087         if (tr->trace_flags & TRACE_ITER_BLOCK)
5088                 /*
5089                  * Always select as readable when in blocking mode
5090                  */
5091                 return POLLIN | POLLRDNORM;
5092         else
5093                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5094                                              filp, poll_table);
5095 }
5096
5097 static unsigned int
5098 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5099 {
5100         struct trace_iterator *iter = filp->private_data;
5101
5102         return trace_poll(iter, filp, poll_table);
5103 }
5104
5105 /* Must be called with iter->mutex held. */
5106 static int tracing_wait_pipe(struct file *filp)
5107 {
5108         struct trace_iterator *iter = filp->private_data;
5109         int ret;
5110
5111         while (trace_empty(iter)) {
5112
5113                 if ((filp->f_flags & O_NONBLOCK)) {
5114                         return -EAGAIN;
5115                 }
5116
5117                 /*
5118                  * We block until we read something and tracing is disabled.
5119                  * We still block if tracing is disabled, but we have never
5120                  * read anything. This allows a user to cat this file, and
5121                  * then enable tracing. But after we have read something,
5122                  * we give an EOF when tracing is again disabled.
5123                  *
5124                  * iter->pos will be 0 if we haven't read anything.
5125                  */
5126                 if (!tracing_is_on() && iter->pos)
5127                         break;
5128
5129                 mutex_unlock(&iter->mutex);
5130
5131                 ret = wait_on_pipe(iter, false);
5132
5133                 mutex_lock(&iter->mutex);
5134
5135                 if (ret)
5136                         return ret;
5137         }
5138
5139         return 1;
5140 }
5141
5142 /*
5143  * Consumer reader.
5144  */
5145 static ssize_t
5146 tracing_read_pipe(struct file *filp, char __user *ubuf,
5147                   size_t cnt, loff_t *ppos)
5148 {
5149         struct trace_iterator *iter = filp->private_data;
5150         ssize_t sret;
5151
5152         /*
5153          * Avoid more than one consumer on a single file descriptor
5154          * This is just a matter of traces coherency, the ring buffer itself
5155          * is protected.
5156          */
5157         mutex_lock(&iter->mutex);
5158
5159         /* return any leftover data */
5160         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5161         if (sret != -EBUSY)
5162                 goto out;
5163
5164         trace_seq_init(&iter->seq);
5165
5166         if (iter->trace->read) {
5167                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5168                 if (sret)
5169                         goto out;
5170         }
5171
5172 waitagain:
5173         sret = tracing_wait_pipe(filp);
5174         if (sret <= 0)
5175                 goto out;
5176
5177         /* stop when tracing is finished */
5178         if (trace_empty(iter)) {
5179                 sret = 0;
5180                 goto out;
5181         }
5182
5183         if (cnt >= PAGE_SIZE)
5184                 cnt = PAGE_SIZE - 1;
5185
5186         /* reset all but tr, trace, and overruns */
5187         memset(&iter->seq, 0,
5188                sizeof(struct trace_iterator) -
5189                offsetof(struct trace_iterator, seq));
5190         cpumask_clear(iter->started);
5191         iter->pos = -1;
5192
5193         trace_event_read_lock();
5194         trace_access_lock(iter->cpu_file);
5195         while (trace_find_next_entry_inc(iter) != NULL) {
5196                 enum print_line_t ret;
5197                 int save_len = iter->seq.seq.len;
5198
5199                 ret = print_trace_line(iter);
5200                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5201                         /* don't print partial lines */
5202                         iter->seq.seq.len = save_len;
5203                         break;
5204                 }
5205                 if (ret != TRACE_TYPE_NO_CONSUME)
5206                         trace_consume(iter);
5207
5208                 if (trace_seq_used(&iter->seq) >= cnt)
5209                         break;
5210
5211                 /*
5212                  * Setting the full flag means we reached the trace_seq buffer
5213                  * size and we should leave by partial output condition above.
5214                  * One of the trace_seq_* functions is not used properly.
5215                  */
5216                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5217                           iter->ent->type);
5218         }
5219         trace_access_unlock(iter->cpu_file);
5220         trace_event_read_unlock();
5221
5222         /* Now copy what we have to the user */
5223         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5224         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5225                 trace_seq_init(&iter->seq);
5226
5227         /*
5228          * If there was nothing to send to user, in spite of consuming trace
5229          * entries, go back to wait for more entries.
5230          */
5231         if (sret == -EBUSY)
5232                 goto waitagain;
5233
5234 out:
5235         mutex_unlock(&iter->mutex);
5236
5237         return sret;
5238 }
5239
5240 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5241                                      unsigned int idx)
5242 {
5243         __free_page(spd->pages[idx]);
5244 }
5245
5246 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5247         .can_merge              = 0,
5248         .confirm                = generic_pipe_buf_confirm,
5249         .release                = generic_pipe_buf_release,
5250         .steal                  = generic_pipe_buf_steal,
5251         .get                    = generic_pipe_buf_get,
5252 };
5253
5254 static size_t
5255 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5256 {
5257         size_t count;
5258         int save_len;
5259         int ret;
5260
5261         /* Seq buffer is page-sized, exactly what we need. */
5262         for (;;) {
5263                 save_len = iter->seq.seq.len;
5264                 ret = print_trace_line(iter);
5265
5266                 if (trace_seq_has_overflowed(&iter->seq)) {
5267                         iter->seq.seq.len = save_len;
5268                         break;
5269                 }
5270
5271                 /*
5272                  * This should not be hit, because it should only
5273                  * be set if the iter->seq overflowed. But check it
5274                  * anyway to be safe.
5275                  */
5276                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5277                         iter->seq.seq.len = save_len;
5278                         break;
5279                 }
5280
5281                 count = trace_seq_used(&iter->seq) - save_len;
5282                 if (rem < count) {
5283                         rem = 0;
5284                         iter->seq.seq.len = save_len;
5285                         break;
5286                 }
5287
5288                 if (ret != TRACE_TYPE_NO_CONSUME)
5289                         trace_consume(iter);
5290                 rem -= count;
5291                 if (!trace_find_next_entry_inc(iter))   {
5292                         rem = 0;
5293                         iter->ent = NULL;
5294                         break;
5295                 }
5296         }
5297
5298         return rem;
5299 }
5300
5301 static ssize_t tracing_splice_read_pipe(struct file *filp,
5302                                         loff_t *ppos,
5303                                         struct pipe_inode_info *pipe,
5304                                         size_t len,
5305                                         unsigned int flags)
5306 {
5307         struct page *pages_def[PIPE_DEF_BUFFERS];
5308         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5309         struct trace_iterator *iter = filp->private_data;
5310         struct splice_pipe_desc spd = {
5311                 .pages          = pages_def,
5312                 .partial        = partial_def,
5313                 .nr_pages       = 0, /* This gets updated below. */
5314                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5315                 .flags          = flags,
5316                 .ops            = &tracing_pipe_buf_ops,
5317                 .spd_release    = tracing_spd_release_pipe,
5318         };
5319         ssize_t ret;
5320         size_t rem;
5321         unsigned int i;
5322
5323         if (splice_grow_spd(pipe, &spd))
5324                 return -ENOMEM;
5325
5326         mutex_lock(&iter->mutex);
5327
5328         if (iter->trace->splice_read) {
5329                 ret = iter->trace->splice_read(iter, filp,
5330                                                ppos, pipe, len, flags);
5331                 if (ret)
5332                         goto out_err;
5333         }
5334
5335         ret = tracing_wait_pipe(filp);
5336         if (ret <= 0)
5337                 goto out_err;
5338
5339         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5340                 ret = -EFAULT;
5341                 goto out_err;
5342         }
5343
5344         trace_event_read_lock();
5345         trace_access_lock(iter->cpu_file);
5346
5347         /* Fill as many pages as possible. */
5348         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5349                 spd.pages[i] = alloc_page(GFP_KERNEL);
5350                 if (!spd.pages[i])
5351                         break;
5352
5353                 rem = tracing_fill_pipe_page(rem, iter);
5354
5355                 /* Copy the data into the page, so we can start over. */
5356                 ret = trace_seq_to_buffer(&iter->seq,
5357                                           page_address(spd.pages[i]),
5358                                           trace_seq_used(&iter->seq));
5359                 if (ret < 0) {
5360                         __free_page(spd.pages[i]);
5361                         break;
5362                 }
5363                 spd.partial[i].offset = 0;
5364                 spd.partial[i].len = trace_seq_used(&iter->seq);
5365
5366                 trace_seq_init(&iter->seq);
5367         }
5368
5369         trace_access_unlock(iter->cpu_file);
5370         trace_event_read_unlock();
5371         mutex_unlock(&iter->mutex);
5372
5373         spd.nr_pages = i;
5374
5375         if (i)
5376                 ret = splice_to_pipe(pipe, &spd);
5377         else
5378                 ret = 0;
5379 out:
5380         splice_shrink_spd(&spd);
5381         return ret;
5382
5383 out_err:
5384         mutex_unlock(&iter->mutex);
5385         goto out;
5386 }
5387
5388 static ssize_t
5389 tracing_entries_read(struct file *filp, char __user *ubuf,
5390                      size_t cnt, loff_t *ppos)
5391 {
5392         struct inode *inode = file_inode(filp);
5393         struct trace_array *tr = inode->i_private;
5394         int cpu = tracing_get_cpu(inode);
5395         char buf[64];
5396         int r = 0;
5397         ssize_t ret;
5398
5399         mutex_lock(&trace_types_lock);
5400
5401         if (cpu == RING_BUFFER_ALL_CPUS) {
5402                 int cpu, buf_size_same;
5403                 unsigned long size;
5404
5405                 size = 0;
5406                 buf_size_same = 1;
5407                 /* check if all cpu sizes are same */
5408                 for_each_tracing_cpu(cpu) {
5409                         /* fill in the size from first enabled cpu */
5410                         if (size == 0)
5411                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5412                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5413                                 buf_size_same = 0;
5414                                 break;
5415                         }
5416                 }
5417
5418                 if (buf_size_same) {
5419                         if (!ring_buffer_expanded)
5420                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5421                                             size >> 10,
5422                                             trace_buf_size >> 10);
5423                         else
5424                                 r = sprintf(buf, "%lu\n", size >> 10);
5425                 } else
5426                         r = sprintf(buf, "X\n");
5427         } else
5428                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5429
5430         mutex_unlock(&trace_types_lock);
5431
5432         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5433         return ret;
5434 }
5435
5436 static ssize_t
5437 tracing_entries_write(struct file *filp, const char __user *ubuf,
5438                       size_t cnt, loff_t *ppos)
5439 {
5440         struct inode *inode = file_inode(filp);
5441         struct trace_array *tr = inode->i_private;
5442         unsigned long val;
5443         int ret;
5444
5445         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5446         if (ret)
5447                 return ret;
5448
5449         /* must have at least 1 entry */
5450         if (!val)
5451                 return -EINVAL;
5452
5453         /* value is in KB */
5454         val <<= 10;
5455         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5456         if (ret < 0)
5457                 return ret;
5458
5459         *ppos += cnt;
5460
5461         return cnt;
5462 }
5463
5464 static ssize_t
5465 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5466                                 size_t cnt, loff_t *ppos)
5467 {
5468         struct trace_array *tr = filp->private_data;
5469         char buf[64];
5470         int r, cpu;
5471         unsigned long size = 0, expanded_size = 0;
5472
5473         mutex_lock(&trace_types_lock);
5474         for_each_tracing_cpu(cpu) {
5475                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5476                 if (!ring_buffer_expanded)
5477                         expanded_size += trace_buf_size >> 10;
5478         }
5479         if (ring_buffer_expanded)
5480                 r = sprintf(buf, "%lu\n", size);
5481         else
5482                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5483         mutex_unlock(&trace_types_lock);
5484
5485         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5486 }
5487
5488 static ssize_t
5489 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5490                           size_t cnt, loff_t *ppos)
5491 {
5492         /*
5493          * There is no need to read what the user has written, this function
5494          * is just to make sure that there is no error when "echo" is used
5495          */
5496
5497         *ppos += cnt;
5498
5499         return cnt;
5500 }
5501
5502 static int
5503 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5504 {
5505         struct trace_array *tr = inode->i_private;
5506
5507         /* disable tracing ? */
5508         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5509                 tracer_tracing_off(tr);
5510         /* resize the ring buffer to 0 */
5511         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5512
5513         trace_array_put(tr);
5514
5515         return 0;
5516 }
5517
5518 static inline int lock_user_pages(const char __user *ubuf, size_t cnt,
5519                                   struct page **pages, void **map_page,
5520                                   int *offset)
5521 {
5522         unsigned long addr = (unsigned long)ubuf;
5523         int nr_pages = 1;
5524         int ret;
5525         int i;
5526
5527         /*
5528          * Userspace is injecting traces into the kernel trace buffer.
5529          * We want to be as non intrusive as possible.
5530          * To do so, we do not want to allocate any special buffers
5531          * or take any locks, but instead write the userspace data
5532          * straight into the ring buffer.
5533          *
5534          * First we need to pin the userspace buffer into memory,
5535          * which, most likely it is, because it just referenced it.
5536          * But there's no guarantee that it is. By using get_user_pages_fast()
5537          * and kmap_atomic/kunmap_atomic() we can get access to the
5538          * pages directly. We then write the data directly into the
5539          * ring buffer.
5540          */
5541
5542         /* check if we cross pages */
5543         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5544                 nr_pages = 2;
5545
5546         *offset = addr & (PAGE_SIZE - 1);
5547         addr &= PAGE_MASK;
5548
5549         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5550         if (ret < nr_pages) {
5551                 while (--ret >= 0)
5552                         put_page(pages[ret]);
5553                 return -EFAULT;
5554         }
5555
5556         for (i = 0; i < nr_pages; i++)
5557                 map_page[i] = kmap_atomic(pages[i]);
5558
5559         return nr_pages;
5560 }
5561
5562 static inline void unlock_user_pages(struct page **pages,
5563                                      void **map_page, int nr_pages)
5564 {
5565         int i;
5566
5567         for (i = nr_pages - 1; i >= 0; i--) {
5568                 kunmap_atomic(map_page[i]);
5569                 put_page(pages[i]);
5570         }
5571 }
5572
5573 static ssize_t
5574 tracing_mark_write(struct file *filp, const char __user *ubuf,
5575                                         size_t cnt, loff_t *fpos)
5576 {
5577         struct trace_array *tr = filp->private_data;
5578         struct ring_buffer_event *event;
5579         struct ring_buffer *buffer;
5580         struct print_entry *entry;
5581         unsigned long irq_flags;
5582         struct page *pages[2];
5583         void *map_page[2];
5584         int nr_pages = 1;
5585         ssize_t written;
5586         int offset;
5587         int size;
5588         int len;
5589
5590         if (tracing_disabled)
5591                 return -EINVAL;
5592
5593         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5594                 return -EINVAL;
5595
5596         if (cnt > TRACE_BUF_SIZE)
5597                 cnt = TRACE_BUF_SIZE;
5598
5599         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5600
5601         nr_pages = lock_user_pages(ubuf, cnt, pages, map_page, &offset);
5602         if (nr_pages < 0)
5603                 return nr_pages;
5604
5605         local_save_flags(irq_flags);
5606         size = sizeof(*entry) + cnt + 2; /* possible \n added */
5607         buffer = tr->trace_buffer.buffer;
5608         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5609                                           irq_flags, preempt_count());
5610         if (!event) {
5611                 /* Ring buffer disabled, return as if not open for write */
5612                 written = -EBADF;
5613                 goto out_unlock;
5614         }
5615
5616         entry = ring_buffer_event_data(event);
5617         entry->ip = _THIS_IP_;
5618
5619         if (nr_pages == 2) {
5620                 len = PAGE_SIZE - offset;
5621                 memcpy(&entry->buf, map_page[0] + offset, len);
5622                 memcpy(&entry->buf[len], map_page[1], cnt - len);
5623         } else
5624                 memcpy(&entry->buf, map_page[0] + offset, cnt);
5625
5626         if (entry->buf[cnt - 1] != '\n') {
5627                 entry->buf[cnt] = '\n';
5628                 entry->buf[cnt + 1] = '\0';
5629         } else
5630                 entry->buf[cnt] = '\0';
5631
5632         __buffer_unlock_commit(buffer, event);
5633
5634         written = cnt;
5635
5636         *fpos += written;
5637
5638  out_unlock:
5639         unlock_user_pages(pages, map_page, nr_pages);
5640
5641         return written;
5642 }
5643
5644 /* Limit it for now to 3K (including tag) */
5645 #define RAW_DATA_MAX_SIZE (1024*3)
5646
5647 static ssize_t
5648 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
5649                                         size_t cnt, loff_t *fpos)
5650 {
5651         struct trace_array *tr = filp->private_data;
5652         struct ring_buffer_event *event;
5653         struct ring_buffer *buffer;
5654         struct raw_data_entry *entry;
5655         unsigned long irq_flags;
5656         struct page *pages[2];
5657         void *map_page[2];
5658         int nr_pages = 1;
5659         ssize_t written;
5660         int offset;
5661         int size;
5662         int len;
5663
5664         if (tracing_disabled)
5665                 return -EINVAL;
5666
5667         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5668                 return -EINVAL;
5669
5670         /* The marker must at least have a tag id */
5671         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
5672                 return -EINVAL;
5673
5674         if (cnt > TRACE_BUF_SIZE)
5675                 cnt = TRACE_BUF_SIZE;
5676
5677         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5678
5679         nr_pages = lock_user_pages(ubuf, cnt, pages, map_page, &offset);
5680         if (nr_pages < 0)
5681                 return nr_pages;
5682
5683         local_save_flags(irq_flags);
5684         size = sizeof(*entry) + cnt;
5685         buffer = tr->trace_buffer.buffer;
5686         event = trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
5687                                           irq_flags, preempt_count());
5688         if (!event) {
5689                 /* Ring buffer disabled, return as if not open for write */
5690                 written = -EBADF;
5691                 goto out_unlock;
5692         }
5693
5694         entry = ring_buffer_event_data(event);
5695
5696         if (nr_pages == 2) {
5697                 len = PAGE_SIZE - offset;
5698                 memcpy(&entry->id, map_page[0] + offset, len);
5699                 memcpy(((char *)&entry->id) + len, map_page[1], cnt - len);
5700         } else
5701                 memcpy(&entry->id, map_page[0] + offset, cnt);
5702
5703         __buffer_unlock_commit(buffer, event);
5704
5705         written = cnt;
5706
5707         *fpos += written;
5708
5709  out_unlock:
5710         unlock_user_pages(pages, map_page, nr_pages);
5711
5712         return written;
5713 }
5714
5715 static int tracing_clock_show(struct seq_file *m, void *v)
5716 {
5717         struct trace_array *tr = m->private;
5718         int i;
5719
5720         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5721                 seq_printf(m,
5722                         "%s%s%s%s", i ? " " : "",
5723                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5724                         i == tr->clock_id ? "]" : "");
5725         seq_putc(m, '\n');
5726
5727         return 0;
5728 }
5729
5730 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5731 {
5732         int i;
5733
5734         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5735                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5736                         break;
5737         }
5738         if (i == ARRAY_SIZE(trace_clocks))
5739                 return -EINVAL;
5740
5741         mutex_lock(&trace_types_lock);
5742
5743         tr->clock_id = i;
5744
5745         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5746
5747         /*
5748          * New clock may not be consistent with the previous clock.
5749          * Reset the buffer so that it doesn't have incomparable timestamps.
5750          */
5751         tracing_reset_online_cpus(&tr->trace_buffer);
5752
5753 #ifdef CONFIG_TRACER_MAX_TRACE
5754         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5755                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5756         tracing_reset_online_cpus(&tr->max_buffer);
5757 #endif
5758
5759         mutex_unlock(&trace_types_lock);
5760
5761         return 0;
5762 }
5763
5764 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5765                                    size_t cnt, loff_t *fpos)
5766 {
5767         struct seq_file *m = filp->private_data;
5768         struct trace_array *tr = m->private;
5769         char buf[64];
5770         const char *clockstr;
5771         int ret;
5772
5773         if (cnt >= sizeof(buf))
5774                 return -EINVAL;
5775
5776         if (copy_from_user(buf, ubuf, cnt))
5777                 return -EFAULT;
5778
5779         buf[cnt] = 0;
5780
5781         clockstr = strstrip(buf);
5782
5783         ret = tracing_set_clock(tr, clockstr);
5784         if (ret)
5785                 return ret;
5786
5787         *fpos += cnt;
5788
5789         return cnt;
5790 }
5791
5792 static int tracing_clock_open(struct inode *inode, struct file *file)
5793 {
5794         struct trace_array *tr = inode->i_private;
5795         int ret;
5796
5797         if (tracing_disabled)
5798                 return -ENODEV;
5799
5800         if (trace_array_get(tr))
5801                 return -ENODEV;
5802
5803         ret = single_open(file, tracing_clock_show, inode->i_private);
5804         if (ret < 0)
5805                 trace_array_put(tr);
5806
5807         return ret;
5808 }
5809
5810 struct ftrace_buffer_info {
5811         struct trace_iterator   iter;
5812         void                    *spare;
5813         unsigned int            read;
5814 };
5815
5816 #ifdef CONFIG_TRACER_SNAPSHOT
5817 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5818 {
5819         struct trace_array *tr = inode->i_private;
5820         struct trace_iterator *iter;
5821         struct seq_file *m;
5822         int ret = 0;
5823
5824         if (trace_array_get(tr) < 0)
5825                 return -ENODEV;
5826
5827         if (file->f_mode & FMODE_READ) {
5828                 iter = __tracing_open(inode, file, true);
5829                 if (IS_ERR(iter))
5830                         ret = PTR_ERR(iter);
5831         } else {
5832                 /* Writes still need the seq_file to hold the private data */
5833                 ret = -ENOMEM;
5834                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5835                 if (!m)
5836                         goto out;
5837                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5838                 if (!iter) {
5839                         kfree(m);
5840                         goto out;
5841                 }
5842                 ret = 0;
5843
5844                 iter->tr = tr;
5845                 iter->trace_buffer = &tr->max_buffer;
5846                 iter->cpu_file = tracing_get_cpu(inode);
5847                 m->private = iter;
5848                 file->private_data = m;
5849         }
5850 out:
5851         if (ret < 0)
5852                 trace_array_put(tr);
5853
5854         return ret;
5855 }
5856
5857 static ssize_t
5858 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5859                        loff_t *ppos)
5860 {
5861         struct seq_file *m = filp->private_data;
5862         struct trace_iterator *iter = m->private;
5863         struct trace_array *tr = iter->tr;
5864         unsigned long val;
5865         int ret;
5866
5867         ret = tracing_update_buffers();
5868         if (ret < 0)
5869                 return ret;
5870
5871         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5872         if (ret)
5873                 return ret;
5874
5875         mutex_lock(&trace_types_lock);
5876
5877         if (tr->current_trace->use_max_tr) {
5878                 ret = -EBUSY;
5879                 goto out;
5880         }
5881
5882         switch (val) {
5883         case 0:
5884                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5885                         ret = -EINVAL;
5886                         break;
5887                 }
5888                 if (tr->allocated_snapshot)
5889                         free_snapshot(tr);
5890                 break;
5891         case 1:
5892 /* Only allow per-cpu swap if the ring buffer supports it */
5893 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5894                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5895                         ret = -EINVAL;
5896                         break;
5897                 }
5898 #endif
5899                 if (!tr->allocated_snapshot) {
5900                         ret = alloc_snapshot(tr);
5901                         if (ret < 0)
5902                                 break;
5903                 }
5904                 local_irq_disable();
5905                 /* Now, we're going to swap */
5906                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5907                         update_max_tr(tr, current, smp_processor_id());
5908                 else
5909                         update_max_tr_single(tr, current, iter->cpu_file);
5910                 local_irq_enable();
5911                 break;
5912         default:
5913                 if (tr->allocated_snapshot) {
5914                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5915                                 tracing_reset_online_cpus(&tr->max_buffer);
5916                         else
5917                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5918                 }
5919                 break;
5920         }
5921
5922         if (ret >= 0) {
5923                 *ppos += cnt;
5924                 ret = cnt;
5925         }
5926 out:
5927         mutex_unlock(&trace_types_lock);
5928         return ret;
5929 }
5930
5931 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5932 {
5933         struct seq_file *m = file->private_data;
5934         int ret;
5935
5936         ret = tracing_release(inode, file);
5937
5938         if (file->f_mode & FMODE_READ)
5939                 return ret;
5940
5941         /* If write only, the seq_file is just a stub */
5942         if (m)
5943                 kfree(m->private);
5944         kfree(m);
5945
5946         return 0;
5947 }
5948
5949 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5950 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5951                                     size_t count, loff_t *ppos);
5952 static int tracing_buffers_release(struct inode *inode, struct file *file);
5953 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5954                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5955
5956 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5957 {
5958         struct ftrace_buffer_info *info;
5959         int ret;
5960
5961         ret = tracing_buffers_open(inode, filp);
5962         if (ret < 0)
5963                 return ret;
5964
5965         info = filp->private_data;
5966
5967         if (info->iter.trace->use_max_tr) {
5968                 tracing_buffers_release(inode, filp);
5969                 return -EBUSY;
5970         }
5971
5972         info->iter.snapshot = true;
5973         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5974
5975         return ret;
5976 }
5977
5978 #endif /* CONFIG_TRACER_SNAPSHOT */
5979
5980
5981 static const struct file_operations tracing_thresh_fops = {
5982         .open           = tracing_open_generic,
5983         .read           = tracing_thresh_read,
5984         .write          = tracing_thresh_write,
5985         .llseek         = generic_file_llseek,
5986 };
5987
5988 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5989 static const struct file_operations tracing_max_lat_fops = {
5990         .open           = tracing_open_generic,
5991         .read           = tracing_max_lat_read,
5992         .write          = tracing_max_lat_write,
5993         .llseek         = generic_file_llseek,
5994 };
5995 #endif
5996
5997 static const struct file_operations set_tracer_fops = {
5998         .open           = tracing_open_generic,
5999         .read           = tracing_set_trace_read,
6000         .write          = tracing_set_trace_write,
6001         .llseek         = generic_file_llseek,
6002 };
6003
6004 static const struct file_operations tracing_pipe_fops = {
6005         .open           = tracing_open_pipe,
6006         .poll           = tracing_poll_pipe,
6007         .read           = tracing_read_pipe,
6008         .splice_read    = tracing_splice_read_pipe,
6009         .release        = tracing_release_pipe,
6010         .llseek         = no_llseek,
6011 };
6012
6013 static const struct file_operations tracing_entries_fops = {
6014         .open           = tracing_open_generic_tr,
6015         .read           = tracing_entries_read,
6016         .write          = tracing_entries_write,
6017         .llseek         = generic_file_llseek,
6018         .release        = tracing_release_generic_tr,
6019 };
6020
6021 static const struct file_operations tracing_total_entries_fops = {
6022         .open           = tracing_open_generic_tr,
6023         .read           = tracing_total_entries_read,
6024         .llseek         = generic_file_llseek,
6025         .release        = tracing_release_generic_tr,
6026 };
6027
6028 static const struct file_operations tracing_free_buffer_fops = {
6029         .open           = tracing_open_generic_tr,
6030         .write          = tracing_free_buffer_write,
6031         .release        = tracing_free_buffer_release,
6032 };
6033
6034 static const struct file_operations tracing_mark_fops = {
6035         .open           = tracing_open_generic_tr,
6036         .write          = tracing_mark_write,
6037         .llseek         = generic_file_llseek,
6038         .release        = tracing_release_generic_tr,
6039 };
6040
6041 static const struct file_operations tracing_mark_raw_fops = {
6042         .open           = tracing_open_generic_tr,
6043         .write          = tracing_mark_raw_write,
6044         .llseek         = generic_file_llseek,
6045         .release        = tracing_release_generic_tr,
6046 };
6047
6048 static const struct file_operations trace_clock_fops = {
6049         .open           = tracing_clock_open,
6050         .read           = seq_read,
6051         .llseek         = seq_lseek,
6052         .release        = tracing_single_release_tr,
6053         .write          = tracing_clock_write,
6054 };
6055
6056 #ifdef CONFIG_TRACER_SNAPSHOT
6057 static const struct file_operations snapshot_fops = {
6058         .open           = tracing_snapshot_open,
6059         .read           = seq_read,
6060         .write          = tracing_snapshot_write,
6061         .llseek         = tracing_lseek,
6062         .release        = tracing_snapshot_release,
6063 };
6064
6065 static const struct file_operations snapshot_raw_fops = {
6066         .open           = snapshot_raw_open,
6067         .read           = tracing_buffers_read,
6068         .release        = tracing_buffers_release,
6069         .splice_read    = tracing_buffers_splice_read,
6070         .llseek         = no_llseek,
6071 };
6072
6073 #endif /* CONFIG_TRACER_SNAPSHOT */
6074
6075 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6076 {
6077         struct trace_array *tr = inode->i_private;
6078         struct ftrace_buffer_info *info;
6079         int ret;
6080
6081         if (tracing_disabled)
6082                 return -ENODEV;
6083
6084         if (trace_array_get(tr) < 0)
6085                 return -ENODEV;
6086
6087         info = kzalloc(sizeof(*info), GFP_KERNEL);
6088         if (!info) {
6089                 trace_array_put(tr);
6090                 return -ENOMEM;
6091         }
6092
6093         mutex_lock(&trace_types_lock);
6094
6095         info->iter.tr           = tr;
6096         info->iter.cpu_file     = tracing_get_cpu(inode);
6097         info->iter.trace        = tr->current_trace;
6098         info->iter.trace_buffer = &tr->trace_buffer;
6099         info->spare             = NULL;
6100         /* Force reading ring buffer for first read */
6101         info->read              = (unsigned int)-1;
6102
6103         filp->private_data = info;
6104
6105         tr->current_trace->ref++;
6106
6107         mutex_unlock(&trace_types_lock);
6108
6109         ret = nonseekable_open(inode, filp);
6110         if (ret < 0)
6111                 trace_array_put(tr);
6112
6113         return ret;
6114 }
6115
6116 static unsigned int
6117 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6118 {
6119         struct ftrace_buffer_info *info = filp->private_data;
6120         struct trace_iterator *iter = &info->iter;
6121
6122         return trace_poll(iter, filp, poll_table);
6123 }
6124
6125 static ssize_t
6126 tracing_buffers_read(struct file *filp, char __user *ubuf,
6127                      size_t count, loff_t *ppos)
6128 {
6129         struct ftrace_buffer_info *info = filp->private_data;
6130         struct trace_iterator *iter = &info->iter;
6131         ssize_t ret;
6132         ssize_t size;
6133
6134         if (!count)
6135                 return 0;
6136
6137 #ifdef CONFIG_TRACER_MAX_TRACE
6138         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6139                 return -EBUSY;
6140 #endif
6141
6142         if (!info->spare)
6143                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6144                                                           iter->cpu_file);
6145         if (!info->spare)
6146                 return -ENOMEM;
6147
6148         /* Do we have previous read data to read? */
6149         if (info->read < PAGE_SIZE)
6150                 goto read;
6151
6152  again:
6153         trace_access_lock(iter->cpu_file);
6154         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6155                                     &info->spare,
6156                                     count,
6157                                     iter->cpu_file, 0);
6158         trace_access_unlock(iter->cpu_file);
6159
6160         if (ret < 0) {
6161                 if (trace_empty(iter)) {
6162                         if ((filp->f_flags & O_NONBLOCK))
6163                                 return -EAGAIN;
6164
6165                         ret = wait_on_pipe(iter, false);
6166                         if (ret)
6167                                 return ret;
6168
6169                         goto again;
6170                 }
6171                 return 0;
6172         }
6173
6174         info->read = 0;
6175  read:
6176         size = PAGE_SIZE - info->read;
6177         if (size > count)
6178                 size = count;
6179
6180         ret = copy_to_user(ubuf, info->spare + info->read, size);
6181         if (ret == size)
6182                 return -EFAULT;
6183
6184         size -= ret;
6185
6186         *ppos += size;
6187         info->read += size;
6188
6189         return size;
6190 }
6191
6192 static int tracing_buffers_release(struct inode *inode, struct file *file)
6193 {
6194         struct ftrace_buffer_info *info = file->private_data;
6195         struct trace_iterator *iter = &info->iter;
6196
6197         mutex_lock(&trace_types_lock);
6198
6199         iter->tr->current_trace->ref--;
6200
6201         __trace_array_put(iter->tr);
6202
6203         if (info->spare)
6204                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6205         kfree(info);
6206
6207         mutex_unlock(&trace_types_lock);
6208
6209         return 0;
6210 }
6211
6212 struct buffer_ref {
6213         struct ring_buffer      *buffer;
6214         void                    *page;
6215         int                     ref;
6216 };
6217
6218 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6219                                     struct pipe_buffer *buf)
6220 {
6221         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6222
6223         if (--ref->ref)
6224                 return;
6225
6226         ring_buffer_free_read_page(ref->buffer, ref->page);
6227         kfree(ref);
6228         buf->private = 0;
6229 }
6230
6231 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6232                                 struct pipe_buffer *buf)
6233 {
6234         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6235
6236         ref->ref++;
6237 }
6238
6239 /* Pipe buffer operations for a buffer. */
6240 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6241         .can_merge              = 0,
6242         .confirm                = generic_pipe_buf_confirm,
6243         .release                = buffer_pipe_buf_release,
6244         .steal                  = generic_pipe_buf_steal,
6245         .get                    = buffer_pipe_buf_get,
6246 };
6247
6248 /*
6249  * Callback from splice_to_pipe(), if we need to release some pages
6250  * at the end of the spd in case we error'ed out in filling the pipe.
6251  */
6252 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6253 {
6254         struct buffer_ref *ref =
6255                 (struct buffer_ref *)spd->partial[i].private;
6256
6257         if (--ref->ref)
6258                 return;
6259
6260         ring_buffer_free_read_page(ref->buffer, ref->page);
6261         kfree(ref);
6262         spd->partial[i].private = 0;
6263 }
6264
6265 static ssize_t
6266 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6267                             struct pipe_inode_info *pipe, size_t len,
6268                             unsigned int flags)
6269 {
6270         struct ftrace_buffer_info *info = file->private_data;
6271         struct trace_iterator *iter = &info->iter;
6272         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6273         struct page *pages_def[PIPE_DEF_BUFFERS];
6274         struct splice_pipe_desc spd = {
6275                 .pages          = pages_def,
6276                 .partial        = partial_def,
6277                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6278                 .flags          = flags,
6279                 .ops            = &buffer_pipe_buf_ops,
6280                 .spd_release    = buffer_spd_release,
6281         };
6282         struct buffer_ref *ref;
6283         int entries, size, i;
6284         ssize_t ret = 0;
6285
6286 #ifdef CONFIG_TRACER_MAX_TRACE
6287         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6288                 return -EBUSY;
6289 #endif
6290
6291         if (*ppos & (PAGE_SIZE - 1))
6292                 return -EINVAL;
6293
6294         if (len & (PAGE_SIZE - 1)) {
6295                 if (len < PAGE_SIZE)
6296                         return -EINVAL;
6297                 len &= PAGE_MASK;
6298         }
6299
6300         if (splice_grow_spd(pipe, &spd))
6301                 return -ENOMEM;
6302
6303  again:
6304         trace_access_lock(iter->cpu_file);
6305         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6306
6307         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6308                 struct page *page;
6309                 int r;
6310
6311                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6312                 if (!ref) {
6313                         ret = -ENOMEM;
6314                         break;
6315                 }
6316
6317                 ref->ref = 1;
6318                 ref->buffer = iter->trace_buffer->buffer;
6319                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6320                 if (!ref->page) {
6321                         ret = -ENOMEM;
6322                         kfree(ref);
6323                         break;
6324                 }
6325
6326                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6327                                           len, iter->cpu_file, 1);
6328                 if (r < 0) {
6329                         ring_buffer_free_read_page(ref->buffer, ref->page);
6330                         kfree(ref);
6331                         break;
6332                 }
6333
6334                 /*
6335                  * zero out any left over data, this is going to
6336                  * user land.
6337                  */
6338                 size = ring_buffer_page_len(ref->page);
6339                 if (size < PAGE_SIZE)
6340                         memset(ref->page + size, 0, PAGE_SIZE - size);
6341
6342                 page = virt_to_page(ref->page);
6343
6344                 spd.pages[i] = page;
6345                 spd.partial[i].len = PAGE_SIZE;
6346                 spd.partial[i].offset = 0;
6347                 spd.partial[i].private = (unsigned long)ref;
6348                 spd.nr_pages++;
6349                 *ppos += PAGE_SIZE;
6350
6351                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6352         }
6353
6354         trace_access_unlock(iter->cpu_file);
6355         spd.nr_pages = i;
6356
6357         /* did we read anything? */
6358         if (!spd.nr_pages) {
6359                 if (ret)
6360                         goto out;
6361
6362                 ret = -EAGAIN;
6363                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6364                         goto out;
6365
6366                 ret = wait_on_pipe(iter, true);
6367                 if (ret)
6368                         goto out;
6369
6370                 goto again;
6371         }
6372
6373         ret = splice_to_pipe(pipe, &spd);
6374 out:
6375         splice_shrink_spd(&spd);
6376
6377         return ret;
6378 }
6379
6380 static const struct file_operations tracing_buffers_fops = {
6381         .open           = tracing_buffers_open,
6382         .read           = tracing_buffers_read,
6383         .poll           = tracing_buffers_poll,
6384         .release        = tracing_buffers_release,
6385         .splice_read    = tracing_buffers_splice_read,
6386         .llseek         = no_llseek,
6387 };
6388
6389 static ssize_t
6390 tracing_stats_read(struct file *filp, char __user *ubuf,
6391                    size_t count, loff_t *ppos)
6392 {
6393         struct inode *inode = file_inode(filp);
6394         struct trace_array *tr = inode->i_private;
6395         struct trace_buffer *trace_buf = &tr->trace_buffer;
6396         int cpu = tracing_get_cpu(inode);
6397         struct trace_seq *s;
6398         unsigned long cnt;
6399         unsigned long long t;
6400         unsigned long usec_rem;
6401
6402         s = kmalloc(sizeof(*s), GFP_KERNEL);
6403         if (!s)
6404                 return -ENOMEM;
6405
6406         trace_seq_init(s);
6407
6408         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6409         trace_seq_printf(s, "entries: %ld\n", cnt);
6410
6411         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6412         trace_seq_printf(s, "overrun: %ld\n", cnt);
6413
6414         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6415         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6416
6417         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6418         trace_seq_printf(s, "bytes: %ld\n", cnt);
6419
6420         if (trace_clocks[tr->clock_id].in_ns) {
6421                 /* local or global for trace_clock */
6422                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6423                 usec_rem = do_div(t, USEC_PER_SEC);
6424                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6425                                                                 t, usec_rem);
6426
6427                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6428                 usec_rem = do_div(t, USEC_PER_SEC);
6429                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6430         } else {
6431                 /* counter or tsc mode for trace_clock */
6432                 trace_seq_printf(s, "oldest event ts: %llu\n",
6433                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6434
6435                 trace_seq_printf(s, "now ts: %llu\n",
6436                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6437         }
6438
6439         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6440         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6441
6442         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6443         trace_seq_printf(s, "read events: %ld\n", cnt);
6444
6445         count = simple_read_from_buffer(ubuf, count, ppos,
6446                                         s->buffer, trace_seq_used(s));
6447
6448         kfree(s);
6449
6450         return count;
6451 }
6452
6453 static const struct file_operations tracing_stats_fops = {
6454         .open           = tracing_open_generic_tr,
6455         .read           = tracing_stats_read,
6456         .llseek         = generic_file_llseek,
6457         .release        = tracing_release_generic_tr,
6458 };
6459
6460 #ifdef CONFIG_DYNAMIC_FTRACE
6461
6462 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6463 {
6464         return 0;
6465 }
6466
6467 static ssize_t
6468 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6469                   size_t cnt, loff_t *ppos)
6470 {
6471         static char ftrace_dyn_info_buffer[1024];
6472         static DEFINE_MUTEX(dyn_info_mutex);
6473         unsigned long *p = filp->private_data;
6474         char *buf = ftrace_dyn_info_buffer;
6475         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6476         int r;
6477
6478         mutex_lock(&dyn_info_mutex);
6479         r = sprintf(buf, "%ld ", *p);
6480
6481         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6482         buf[r++] = '\n';
6483
6484         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6485
6486         mutex_unlock(&dyn_info_mutex);
6487
6488         return r;
6489 }
6490
6491 static const struct file_operations tracing_dyn_info_fops = {
6492         .open           = tracing_open_generic,
6493         .read           = tracing_read_dyn_info,
6494         .llseek         = generic_file_llseek,
6495 };
6496 #endif /* CONFIG_DYNAMIC_FTRACE */
6497
6498 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6499 static void
6500 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6501 {
6502         tracing_snapshot();
6503 }
6504
6505 static void
6506 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6507 {
6508         unsigned long *count = (long *)data;
6509
6510         if (!*count)
6511                 return;
6512
6513         if (*count != -1)
6514                 (*count)--;
6515
6516         tracing_snapshot();
6517 }
6518
6519 static int
6520 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6521                       struct ftrace_probe_ops *ops, void *data)
6522 {
6523         long count = (long)data;
6524
6525         seq_printf(m, "%ps:", (void *)ip);
6526
6527         seq_puts(m, "snapshot");
6528
6529         if (count == -1)
6530                 seq_puts(m, ":unlimited\n");
6531         else
6532                 seq_printf(m, ":count=%ld\n", count);
6533
6534         return 0;
6535 }
6536
6537 static struct ftrace_probe_ops snapshot_probe_ops = {
6538         .func                   = ftrace_snapshot,
6539         .print                  = ftrace_snapshot_print,
6540 };
6541
6542 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6543         .func                   = ftrace_count_snapshot,
6544         .print                  = ftrace_snapshot_print,
6545 };
6546
6547 static int
6548 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6549                                char *glob, char *cmd, char *param, int enable)
6550 {
6551         struct ftrace_probe_ops *ops;
6552         void *count = (void *)-1;
6553         char *number;
6554         int ret;
6555
6556         /* hash funcs only work with set_ftrace_filter */
6557         if (!enable)
6558                 return -EINVAL;
6559
6560         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6561
6562         if (glob[0] == '!') {
6563                 unregister_ftrace_function_probe_func(glob+1, ops);
6564                 return 0;
6565         }
6566
6567         if (!param)
6568                 goto out_reg;
6569
6570         number = strsep(&param, ":");
6571
6572         if (!strlen(number))
6573                 goto out_reg;
6574
6575         /*
6576          * We use the callback data field (which is a pointer)
6577          * as our counter.
6578          */
6579         ret = kstrtoul(number, 0, (unsigned long *)&count);
6580         if (ret)
6581                 return ret;
6582
6583  out_reg:
6584         ret = register_ftrace_function_probe(glob, ops, count);
6585
6586         if (ret >= 0)
6587                 alloc_snapshot(&global_trace);
6588
6589         return ret < 0 ? ret : 0;
6590 }
6591
6592 static struct ftrace_func_command ftrace_snapshot_cmd = {
6593         .name                   = "snapshot",
6594         .func                   = ftrace_trace_snapshot_callback,
6595 };
6596
6597 static __init int register_snapshot_cmd(void)
6598 {
6599         return register_ftrace_command(&ftrace_snapshot_cmd);
6600 }
6601 #else
6602 static inline __init int register_snapshot_cmd(void) { return 0; }
6603 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6604
6605 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6606 {
6607         if (WARN_ON(!tr->dir))
6608                 return ERR_PTR(-ENODEV);
6609
6610         /* Top directory uses NULL as the parent */
6611         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6612                 return NULL;
6613
6614         /* All sub buffers have a descriptor */
6615         return tr->dir;
6616 }
6617
6618 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6619 {
6620         struct dentry *d_tracer;
6621
6622         if (tr->percpu_dir)
6623                 return tr->percpu_dir;
6624
6625         d_tracer = tracing_get_dentry(tr);
6626         if (IS_ERR(d_tracer))
6627                 return NULL;
6628
6629         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6630
6631         WARN_ONCE(!tr->percpu_dir,
6632                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6633
6634         return tr->percpu_dir;
6635 }
6636
6637 static struct dentry *
6638 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6639                       void *data, long cpu, const struct file_operations *fops)
6640 {
6641         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6642
6643         if (ret) /* See tracing_get_cpu() */
6644                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6645         return ret;
6646 }
6647
6648 static void
6649 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6650 {
6651         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6652         struct dentry *d_cpu;
6653         char cpu_dir[30]; /* 30 characters should be more than enough */
6654
6655         if (!d_percpu)
6656                 return;
6657
6658         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6659         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6660         if (!d_cpu) {
6661                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6662                 return;
6663         }
6664
6665         /* per cpu trace_pipe */
6666         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6667                                 tr, cpu, &tracing_pipe_fops);
6668
6669         /* per cpu trace */
6670         trace_create_cpu_file("trace", 0644, d_cpu,
6671                                 tr, cpu, &tracing_fops);
6672
6673         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6674                                 tr, cpu, &tracing_buffers_fops);
6675
6676         trace_create_cpu_file("stats", 0444, d_cpu,
6677                                 tr, cpu, &tracing_stats_fops);
6678
6679         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6680                                 tr, cpu, &tracing_entries_fops);
6681
6682 #ifdef CONFIG_TRACER_SNAPSHOT
6683         trace_create_cpu_file("snapshot", 0644, d_cpu,
6684                                 tr, cpu, &snapshot_fops);
6685
6686         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6687                                 tr, cpu, &snapshot_raw_fops);
6688 #endif
6689 }
6690
6691 #ifdef CONFIG_FTRACE_SELFTEST
6692 /* Let selftest have access to static functions in this file */
6693 #include "trace_selftest.c"
6694 #endif
6695
6696 static ssize_t
6697 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6698                         loff_t *ppos)
6699 {
6700         struct trace_option_dentry *topt = filp->private_data;
6701         char *buf;
6702
6703         if (topt->flags->val & topt->opt->bit)
6704                 buf = "1\n";
6705         else
6706                 buf = "0\n";
6707
6708         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6709 }
6710
6711 static ssize_t
6712 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6713                          loff_t *ppos)
6714 {
6715         struct trace_option_dentry *topt = filp->private_data;
6716         unsigned long val;
6717         int ret;
6718
6719         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6720         if (ret)
6721                 return ret;
6722
6723         if (val != 0 && val != 1)
6724                 return -EINVAL;
6725
6726         if (!!(topt->flags->val & topt->opt->bit) != val) {
6727                 mutex_lock(&trace_types_lock);
6728                 ret = __set_tracer_option(topt->tr, topt->flags,
6729                                           topt->opt, !val);
6730                 mutex_unlock(&trace_types_lock);
6731                 if (ret)
6732                         return ret;
6733         }
6734
6735         *ppos += cnt;
6736
6737         return cnt;
6738 }
6739
6740
6741 static const struct file_operations trace_options_fops = {
6742         .open = tracing_open_generic,
6743         .read = trace_options_read,
6744         .write = trace_options_write,
6745         .llseek = generic_file_llseek,
6746 };
6747
6748 /*
6749  * In order to pass in both the trace_array descriptor as well as the index
6750  * to the flag that the trace option file represents, the trace_array
6751  * has a character array of trace_flags_index[], which holds the index
6752  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6753  * The address of this character array is passed to the flag option file
6754  * read/write callbacks.
6755  *
6756  * In order to extract both the index and the trace_array descriptor,
6757  * get_tr_index() uses the following algorithm.
6758  *
6759  *   idx = *ptr;
6760  *
6761  * As the pointer itself contains the address of the index (remember
6762  * index[1] == 1).
6763  *
6764  * Then to get the trace_array descriptor, by subtracting that index
6765  * from the ptr, we get to the start of the index itself.
6766  *
6767  *   ptr - idx == &index[0]
6768  *
6769  * Then a simple container_of() from that pointer gets us to the
6770  * trace_array descriptor.
6771  */
6772 static void get_tr_index(void *data, struct trace_array **ptr,
6773                          unsigned int *pindex)
6774 {
6775         *pindex = *(unsigned char *)data;
6776
6777         *ptr = container_of(data - *pindex, struct trace_array,
6778                             trace_flags_index);
6779 }
6780
6781 static ssize_t
6782 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6783                         loff_t *ppos)
6784 {
6785         void *tr_index = filp->private_data;
6786         struct trace_array *tr;
6787         unsigned int index;
6788         char *buf;
6789
6790         get_tr_index(tr_index, &tr, &index);
6791
6792         if (tr->trace_flags & (1 << index))
6793                 buf = "1\n";
6794         else
6795                 buf = "0\n";
6796
6797         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6798 }
6799
6800 static ssize_t
6801 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6802                          loff_t *ppos)
6803 {
6804         void *tr_index = filp->private_data;
6805         struct trace_array *tr;
6806         unsigned int index;
6807         unsigned long val;
6808         int ret;
6809
6810         get_tr_index(tr_index, &tr, &index);
6811
6812         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6813         if (ret)
6814                 return ret;
6815
6816         if (val != 0 && val != 1)
6817                 return -EINVAL;
6818
6819         mutex_lock(&trace_types_lock);
6820         ret = set_tracer_flag(tr, 1 << index, val);
6821         mutex_unlock(&trace_types_lock);
6822
6823         if (ret < 0)
6824                 return ret;
6825
6826         *ppos += cnt;
6827
6828         return cnt;
6829 }
6830
6831 static const struct file_operations trace_options_core_fops = {
6832         .open = tracing_open_generic,
6833         .read = trace_options_core_read,
6834         .write = trace_options_core_write,
6835         .llseek = generic_file_llseek,
6836 };
6837
6838 struct dentry *trace_create_file(const char *name,
6839                                  umode_t mode,
6840                                  struct dentry *parent,
6841                                  void *data,
6842                                  const struct file_operations *fops)
6843 {
6844         struct dentry *ret;
6845
6846         ret = tracefs_create_file(name, mode, parent, data, fops);
6847         if (!ret)
6848                 pr_warn("Could not create tracefs '%s' entry\n", name);
6849
6850         return ret;
6851 }
6852
6853
6854 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6855 {
6856         struct dentry *d_tracer;
6857
6858         if (tr->options)
6859                 return tr->options;
6860
6861         d_tracer = tracing_get_dentry(tr);
6862         if (IS_ERR(d_tracer))
6863                 return NULL;
6864
6865         tr->options = tracefs_create_dir("options", d_tracer);
6866         if (!tr->options) {
6867                 pr_warn("Could not create tracefs directory 'options'\n");
6868                 return NULL;
6869         }
6870
6871         return tr->options;
6872 }
6873
6874 static void
6875 create_trace_option_file(struct trace_array *tr,
6876                          struct trace_option_dentry *topt,
6877                          struct tracer_flags *flags,
6878                          struct tracer_opt *opt)
6879 {
6880         struct dentry *t_options;
6881
6882         t_options = trace_options_init_dentry(tr);
6883         if (!t_options)
6884                 return;
6885
6886         topt->flags = flags;
6887         topt->opt = opt;
6888         topt->tr = tr;
6889
6890         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6891                                     &trace_options_fops);
6892
6893 }
6894
6895 static void
6896 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6897 {
6898         struct trace_option_dentry *topts;
6899         struct trace_options *tr_topts;
6900         struct tracer_flags *flags;
6901         struct tracer_opt *opts;
6902         int cnt;
6903         int i;
6904
6905         if (!tracer)
6906                 return;
6907
6908         flags = tracer->flags;
6909
6910         if (!flags || !flags->opts)
6911                 return;
6912
6913         /*
6914          * If this is an instance, only create flags for tracers
6915          * the instance may have.
6916          */
6917         if (!trace_ok_for_array(tracer, tr))
6918                 return;
6919
6920         for (i = 0; i < tr->nr_topts; i++) {
6921                 /* Make sure there's no duplicate flags. */
6922                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6923                         return;
6924         }
6925
6926         opts = flags->opts;
6927
6928         for (cnt = 0; opts[cnt].name; cnt++)
6929                 ;
6930
6931         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6932         if (!topts)
6933                 return;
6934
6935         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6936                             GFP_KERNEL);
6937         if (!tr_topts) {
6938                 kfree(topts);
6939                 return;
6940         }
6941
6942         tr->topts = tr_topts;
6943         tr->topts[tr->nr_topts].tracer = tracer;
6944         tr->topts[tr->nr_topts].topts = topts;
6945         tr->nr_topts++;
6946
6947         for (cnt = 0; opts[cnt].name; cnt++) {
6948                 create_trace_option_file(tr, &topts[cnt], flags,
6949                                          &opts[cnt]);
6950                 WARN_ONCE(topts[cnt].entry == NULL,
6951                           "Failed to create trace option: %s",
6952                           opts[cnt].name);
6953         }
6954 }
6955
6956 static struct dentry *
6957 create_trace_option_core_file(struct trace_array *tr,
6958                               const char *option, long index)
6959 {
6960         struct dentry *t_options;
6961
6962         t_options = trace_options_init_dentry(tr);
6963         if (!t_options)
6964                 return NULL;
6965
6966         return trace_create_file(option, 0644, t_options,
6967                                  (void *)&tr->trace_flags_index[index],
6968                                  &trace_options_core_fops);
6969 }
6970
6971 static void create_trace_options_dir(struct trace_array *tr)
6972 {
6973         struct dentry *t_options;
6974         bool top_level = tr == &global_trace;
6975         int i;
6976
6977         t_options = trace_options_init_dentry(tr);
6978         if (!t_options)
6979                 return;
6980
6981         for (i = 0; trace_options[i]; i++) {
6982                 if (top_level ||
6983                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6984                         create_trace_option_core_file(tr, trace_options[i], i);
6985         }
6986 }
6987
6988 static ssize_t
6989 rb_simple_read(struct file *filp, char __user *ubuf,
6990                size_t cnt, loff_t *ppos)
6991 {
6992         struct trace_array *tr = filp->private_data;
6993         char buf[64];
6994         int r;
6995
6996         r = tracer_tracing_is_on(tr);
6997         r = sprintf(buf, "%d\n", r);
6998
6999         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7000 }
7001
7002 static ssize_t
7003 rb_simple_write(struct file *filp, const char __user *ubuf,
7004                 size_t cnt, loff_t *ppos)
7005 {
7006         struct trace_array *tr = filp->private_data;
7007         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7008         unsigned long val;
7009         int ret;
7010
7011         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7012         if (ret)
7013                 return ret;
7014
7015         if (buffer) {
7016                 mutex_lock(&trace_types_lock);
7017                 if (val) {
7018                         tracer_tracing_on(tr);
7019                         if (tr->current_trace->start)
7020                                 tr->current_trace->start(tr);
7021                 } else {
7022                         tracer_tracing_off(tr);
7023                         if (tr->current_trace->stop)
7024                                 tr->current_trace->stop(tr);
7025                 }
7026                 mutex_unlock(&trace_types_lock);
7027         }
7028
7029         (*ppos)++;
7030
7031         return cnt;
7032 }
7033
7034 static const struct file_operations rb_simple_fops = {
7035         .open           = tracing_open_generic_tr,
7036         .read           = rb_simple_read,
7037         .write          = rb_simple_write,
7038         .release        = tracing_release_generic_tr,
7039         .llseek         = default_llseek,
7040 };
7041
7042 struct dentry *trace_instance_dir;
7043
7044 static void
7045 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7046
7047 static int
7048 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7049 {
7050         enum ring_buffer_flags rb_flags;
7051
7052         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7053
7054         buf->tr = tr;
7055
7056         buf->buffer = ring_buffer_alloc(size, rb_flags);
7057         if (!buf->buffer)
7058                 return -ENOMEM;
7059
7060         buf->data = alloc_percpu(struct trace_array_cpu);
7061         if (!buf->data) {
7062                 ring_buffer_free(buf->buffer);
7063                 return -ENOMEM;
7064         }
7065
7066         /* Allocate the first page for all buffers */
7067         set_buffer_entries(&tr->trace_buffer,
7068                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7069
7070         return 0;
7071 }
7072
7073 static int allocate_trace_buffers(struct trace_array *tr, int size)
7074 {
7075         int ret;
7076
7077         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7078         if (ret)
7079                 return ret;
7080
7081 #ifdef CONFIG_TRACER_MAX_TRACE
7082         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7083                                     allocate_snapshot ? size : 1);
7084         if (WARN_ON(ret)) {
7085                 ring_buffer_free(tr->trace_buffer.buffer);
7086                 free_percpu(tr->trace_buffer.data);
7087                 return -ENOMEM;
7088         }
7089         tr->allocated_snapshot = allocate_snapshot;
7090
7091         /*
7092          * Only the top level trace array gets its snapshot allocated
7093          * from the kernel command line.
7094          */
7095         allocate_snapshot = false;
7096 #endif
7097         return 0;
7098 }
7099
7100 static void free_trace_buffer(struct trace_buffer *buf)
7101 {
7102         if (buf->buffer) {
7103                 ring_buffer_free(buf->buffer);
7104                 buf->buffer = NULL;
7105                 free_percpu(buf->data);
7106                 buf->data = NULL;
7107         }
7108 }
7109
7110 static void free_trace_buffers(struct trace_array *tr)
7111 {
7112         if (!tr)
7113                 return;
7114
7115         free_trace_buffer(&tr->trace_buffer);
7116
7117 #ifdef CONFIG_TRACER_MAX_TRACE
7118         free_trace_buffer(&tr->max_buffer);
7119 #endif
7120 }
7121
7122 static void init_trace_flags_index(struct trace_array *tr)
7123 {
7124         int i;
7125
7126         /* Used by the trace options files */
7127         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7128                 tr->trace_flags_index[i] = i;
7129 }
7130
7131 static void __update_tracer_options(struct trace_array *tr)
7132 {
7133         struct tracer *t;
7134
7135         for (t = trace_types; t; t = t->next)
7136                 add_tracer_options(tr, t);
7137 }
7138
7139 static void update_tracer_options(struct trace_array *tr)
7140 {
7141         mutex_lock(&trace_types_lock);
7142         __update_tracer_options(tr);
7143         mutex_unlock(&trace_types_lock);
7144 }
7145
7146 static int instance_mkdir(const char *name)
7147 {
7148         struct trace_array *tr;
7149         int ret;
7150
7151         mutex_lock(&trace_types_lock);
7152
7153         ret = -EEXIST;
7154         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7155                 if (tr->name && strcmp(tr->name, name) == 0)
7156                         goto out_unlock;
7157         }
7158
7159         ret = -ENOMEM;
7160         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7161         if (!tr)
7162                 goto out_unlock;
7163
7164         tr->name = kstrdup(name, GFP_KERNEL);
7165         if (!tr->name)
7166                 goto out_free_tr;
7167
7168         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7169                 goto out_free_tr;
7170
7171         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7172
7173         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7174
7175         raw_spin_lock_init(&tr->start_lock);
7176
7177         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7178
7179         tr->current_trace = &nop_trace;
7180
7181         INIT_LIST_HEAD(&tr->systems);
7182         INIT_LIST_HEAD(&tr->events);
7183
7184         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7185                 goto out_free_tr;
7186
7187         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7188         if (!tr->dir)
7189                 goto out_free_tr;
7190
7191         ret = event_trace_add_tracer(tr->dir, tr);
7192         if (ret) {
7193                 tracefs_remove_recursive(tr->dir);
7194                 goto out_free_tr;
7195         }
7196
7197         init_tracer_tracefs(tr, tr->dir);
7198         init_trace_flags_index(tr);
7199         __update_tracer_options(tr);
7200
7201         list_add(&tr->list, &ftrace_trace_arrays);
7202
7203         mutex_unlock(&trace_types_lock);
7204
7205         return 0;
7206
7207  out_free_tr:
7208         free_trace_buffers(tr);
7209         free_cpumask_var(tr->tracing_cpumask);
7210         kfree(tr->name);
7211         kfree(tr);
7212
7213  out_unlock:
7214         mutex_unlock(&trace_types_lock);
7215
7216         return ret;
7217
7218 }
7219
7220 static int instance_rmdir(const char *name)
7221 {
7222         struct trace_array *tr;
7223         int found = 0;
7224         int ret;
7225         int i;
7226
7227         mutex_lock(&trace_types_lock);
7228
7229         ret = -ENODEV;
7230         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7231                 if (tr->name && strcmp(tr->name, name) == 0) {
7232                         found = 1;
7233                         break;
7234                 }
7235         }
7236         if (!found)
7237                 goto out_unlock;
7238
7239         ret = -EBUSY;
7240         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7241                 goto out_unlock;
7242
7243         list_del(&tr->list);
7244
7245         /* Disable all the flags that were enabled coming in */
7246         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7247                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7248                         set_tracer_flag(tr, 1 << i, 0);
7249         }
7250
7251         tracing_set_nop(tr);
7252         event_trace_del_tracer(tr);
7253         ftrace_destroy_function_files(tr);
7254         tracefs_remove_recursive(tr->dir);
7255         free_trace_buffers(tr);
7256
7257         for (i = 0; i < tr->nr_topts; i++) {
7258                 kfree(tr->topts[i].topts);
7259         }
7260         kfree(tr->topts);
7261
7262         kfree(tr->name);
7263         kfree(tr);
7264
7265         ret = 0;
7266
7267  out_unlock:
7268         mutex_unlock(&trace_types_lock);
7269
7270         return ret;
7271 }
7272
7273 static __init void create_trace_instances(struct dentry *d_tracer)
7274 {
7275         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7276                                                          instance_mkdir,
7277                                                          instance_rmdir);
7278         if (WARN_ON(!trace_instance_dir))
7279                 return;
7280 }
7281
7282 static void
7283 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7284 {
7285         int cpu;
7286
7287         trace_create_file("available_tracers", 0444, d_tracer,
7288                         tr, &show_traces_fops);
7289
7290         trace_create_file("current_tracer", 0644, d_tracer,
7291                         tr, &set_tracer_fops);
7292
7293         trace_create_file("tracing_cpumask", 0644, d_tracer,
7294                           tr, &tracing_cpumask_fops);
7295
7296         trace_create_file("trace_options", 0644, d_tracer,
7297                           tr, &tracing_iter_fops);
7298
7299         trace_create_file("trace", 0644, d_tracer,
7300                           tr, &tracing_fops);
7301
7302         trace_create_file("trace_pipe", 0444, d_tracer,
7303                           tr, &tracing_pipe_fops);
7304
7305         trace_create_file("buffer_size_kb", 0644, d_tracer,
7306                           tr, &tracing_entries_fops);
7307
7308         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7309                           tr, &tracing_total_entries_fops);
7310
7311         trace_create_file("free_buffer", 0200, d_tracer,
7312                           tr, &tracing_free_buffer_fops);
7313
7314         trace_create_file("trace_marker", 0220, d_tracer,
7315                           tr, &tracing_mark_fops);
7316
7317         trace_create_file("trace_marker_raw", 0220, d_tracer,
7318                           tr, &tracing_mark_raw_fops);
7319
7320         trace_create_file("trace_clock", 0644, d_tracer, tr,
7321                           &trace_clock_fops);
7322
7323         trace_create_file("tracing_on", 0644, d_tracer,
7324                           tr, &rb_simple_fops);
7325
7326         create_trace_options_dir(tr);
7327
7328 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7329         trace_create_file("tracing_max_latency", 0644, d_tracer,
7330                         &tr->max_latency, &tracing_max_lat_fops);
7331 #endif
7332
7333         if (ftrace_create_function_files(tr, d_tracer))
7334                 WARN(1, "Could not allocate function filter files");
7335
7336 #ifdef CONFIG_TRACER_SNAPSHOT
7337         trace_create_file("snapshot", 0644, d_tracer,
7338                           tr, &snapshot_fops);
7339 #endif
7340
7341         for_each_tracing_cpu(cpu)
7342                 tracing_init_tracefs_percpu(tr, cpu);
7343
7344         ftrace_init_tracefs(tr, d_tracer);
7345 }
7346
7347 static struct vfsmount *trace_automount(void *ingore)
7348 {
7349         struct vfsmount *mnt;
7350         struct file_system_type *type;
7351
7352         /*
7353          * To maintain backward compatibility for tools that mount
7354          * debugfs to get to the tracing facility, tracefs is automatically
7355          * mounted to the debugfs/tracing directory.
7356          */
7357         type = get_fs_type("tracefs");
7358         if (!type)
7359                 return NULL;
7360         mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
7361         put_filesystem(type);
7362         if (IS_ERR(mnt))
7363                 return NULL;
7364         mntget(mnt);
7365
7366         return mnt;
7367 }
7368
7369 /**
7370  * tracing_init_dentry - initialize top level trace array
7371  *
7372  * This is called when creating files or directories in the tracing
7373  * directory. It is called via fs_initcall() by any of the boot up code
7374  * and expects to return the dentry of the top level tracing directory.
7375  */
7376 struct dentry *tracing_init_dentry(void)
7377 {
7378         struct trace_array *tr = &global_trace;
7379
7380         /* The top level trace array uses  NULL as parent */
7381         if (tr->dir)
7382                 return NULL;
7383
7384         if (WARN_ON(!tracefs_initialized()) ||
7385                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7386                  WARN_ON(!debugfs_initialized())))
7387                 return ERR_PTR(-ENODEV);
7388
7389         /*
7390          * As there may still be users that expect the tracing
7391          * files to exist in debugfs/tracing, we must automount
7392          * the tracefs file system there, so older tools still
7393          * work with the newer kerenl.
7394          */
7395         tr->dir = debugfs_create_automount("tracing", NULL,
7396                                            trace_automount, NULL);
7397         if (!tr->dir) {
7398                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7399                 return ERR_PTR(-ENOMEM);
7400         }
7401
7402         return NULL;
7403 }
7404
7405 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7406 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7407
7408 static void __init trace_enum_init(void)
7409 {
7410         int len;
7411
7412         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7413         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7414 }
7415
7416 #ifdef CONFIG_MODULES
7417 static void trace_module_add_enums(struct module *mod)
7418 {
7419         if (!mod->num_trace_enums)
7420                 return;
7421
7422         /*
7423          * Modules with bad taint do not have events created, do
7424          * not bother with enums either.
7425          */
7426         if (trace_module_has_bad_taint(mod))
7427                 return;
7428
7429         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7430 }
7431
7432 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7433 static void trace_module_remove_enums(struct module *mod)
7434 {
7435         union trace_enum_map_item *map;
7436         union trace_enum_map_item **last = &trace_enum_maps;
7437
7438         if (!mod->num_trace_enums)
7439                 return;
7440
7441         mutex_lock(&trace_enum_mutex);
7442
7443         map = trace_enum_maps;
7444
7445         while (map) {
7446                 if (map->head.mod == mod)
7447                         break;
7448                 map = trace_enum_jmp_to_tail(map);
7449                 last = &map->tail.next;
7450                 map = map->tail.next;
7451         }
7452         if (!map)
7453                 goto out;
7454
7455         *last = trace_enum_jmp_to_tail(map)->tail.next;
7456         kfree(map);
7457  out:
7458         mutex_unlock(&trace_enum_mutex);
7459 }
7460 #else
7461 static inline void trace_module_remove_enums(struct module *mod) { }
7462 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7463
7464 static int trace_module_notify(struct notifier_block *self,
7465                                unsigned long val, void *data)
7466 {
7467         struct module *mod = data;
7468
7469         switch (val) {
7470         case MODULE_STATE_COMING:
7471                 trace_module_add_enums(mod);
7472                 break;
7473         case MODULE_STATE_GOING:
7474                 trace_module_remove_enums(mod);
7475                 break;
7476         }
7477
7478         return 0;
7479 }
7480
7481 static struct notifier_block trace_module_nb = {
7482         .notifier_call = trace_module_notify,
7483         .priority = 0,
7484 };
7485 #endif /* CONFIG_MODULES */
7486
7487 static __init int tracer_init_tracefs(void)
7488 {
7489         struct dentry *d_tracer;
7490
7491         trace_access_lock_init();
7492
7493         d_tracer = tracing_init_dentry();
7494         if (IS_ERR(d_tracer))
7495                 return 0;
7496
7497         init_tracer_tracefs(&global_trace, d_tracer);
7498         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7499
7500         trace_create_file("tracing_thresh", 0644, d_tracer,
7501                         &global_trace, &tracing_thresh_fops);
7502
7503         trace_create_file("README", 0444, d_tracer,
7504                         NULL, &tracing_readme_fops);
7505
7506         trace_create_file("saved_cmdlines", 0444, d_tracer,
7507                         NULL, &tracing_saved_cmdlines_fops);
7508
7509         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7510                           NULL, &tracing_saved_cmdlines_size_fops);
7511
7512         trace_enum_init();
7513
7514         trace_create_enum_file(d_tracer);
7515
7516 #ifdef CONFIG_MODULES
7517         register_module_notifier(&trace_module_nb);
7518 #endif
7519
7520 #ifdef CONFIG_DYNAMIC_FTRACE
7521         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7522                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7523 #endif
7524
7525         create_trace_instances(d_tracer);
7526
7527         update_tracer_options(&global_trace);
7528
7529         return 0;
7530 }
7531
7532 static int trace_panic_handler(struct notifier_block *this,
7533                                unsigned long event, void *unused)
7534 {
7535         if (ftrace_dump_on_oops)
7536                 ftrace_dump(ftrace_dump_on_oops);
7537         return NOTIFY_OK;
7538 }
7539
7540 static struct notifier_block trace_panic_notifier = {
7541         .notifier_call  = trace_panic_handler,
7542         .next           = NULL,
7543         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7544 };
7545
7546 static int trace_die_handler(struct notifier_block *self,
7547                              unsigned long val,
7548                              void *data)
7549 {
7550         switch (val) {
7551         case DIE_OOPS:
7552                 if (ftrace_dump_on_oops)
7553                         ftrace_dump(ftrace_dump_on_oops);
7554                 break;
7555         default:
7556                 break;
7557         }
7558         return NOTIFY_OK;
7559 }
7560
7561 static struct notifier_block trace_die_notifier = {
7562         .notifier_call = trace_die_handler,
7563         .priority = 200
7564 };
7565
7566 /*
7567  * printk is set to max of 1024, we really don't need it that big.
7568  * Nothing should be printing 1000 characters anyway.
7569  */
7570 #define TRACE_MAX_PRINT         1000
7571
7572 /*
7573  * Define here KERN_TRACE so that we have one place to modify
7574  * it if we decide to change what log level the ftrace dump
7575  * should be at.
7576  */
7577 #define KERN_TRACE              KERN_EMERG
7578
7579 void
7580 trace_printk_seq(struct trace_seq *s)
7581 {
7582         /* Probably should print a warning here. */
7583         if (s->seq.len >= TRACE_MAX_PRINT)
7584                 s->seq.len = TRACE_MAX_PRINT;
7585
7586         /*
7587          * More paranoid code. Although the buffer size is set to
7588          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7589          * an extra layer of protection.
7590          */
7591         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7592                 s->seq.len = s->seq.size - 1;
7593
7594         /* should be zero ended, but we are paranoid. */
7595         s->buffer[s->seq.len] = 0;
7596
7597         printk(KERN_TRACE "%s", s->buffer);
7598
7599         trace_seq_init(s);
7600 }
7601
7602 void trace_init_global_iter(struct trace_iterator *iter)
7603 {
7604         iter->tr = &global_trace;
7605         iter->trace = iter->tr->current_trace;
7606         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7607         iter->trace_buffer = &global_trace.trace_buffer;
7608
7609         if (iter->trace && iter->trace->open)
7610                 iter->trace->open(iter);
7611
7612         /* Annotate start of buffers if we had overruns */
7613         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7614                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7615
7616         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7617         if (trace_clocks[iter->tr->clock_id].in_ns)
7618                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7619 }
7620
7621 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7622 {
7623         /* use static because iter can be a bit big for the stack */
7624         static struct trace_iterator iter;
7625         static atomic_t dump_running;
7626         struct trace_array *tr = &global_trace;
7627         unsigned int old_userobj;
7628         unsigned long flags;
7629         int cnt = 0, cpu;
7630
7631         /* Only allow one dump user at a time. */
7632         if (atomic_inc_return(&dump_running) != 1) {
7633                 atomic_dec(&dump_running);
7634                 return;
7635         }
7636
7637         /*
7638          * Always turn off tracing when we dump.
7639          * We don't need to show trace output of what happens
7640          * between multiple crashes.
7641          *
7642          * If the user does a sysrq-z, then they can re-enable
7643          * tracing with echo 1 > tracing_on.
7644          */
7645         tracing_off();
7646
7647         local_irq_save(flags);
7648
7649         /* Simulate the iterator */
7650         trace_init_global_iter(&iter);
7651
7652         for_each_tracing_cpu(cpu) {
7653                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7654         }
7655
7656         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7657
7658         /* don't look at user memory in panic mode */
7659         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7660
7661         switch (oops_dump_mode) {
7662         case DUMP_ALL:
7663                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7664                 break;
7665         case DUMP_ORIG:
7666                 iter.cpu_file = raw_smp_processor_id();
7667                 break;
7668         case DUMP_NONE:
7669                 goto out_enable;
7670         default:
7671                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7672                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7673         }
7674
7675         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7676
7677         /* Did function tracer already get disabled? */
7678         if (ftrace_is_dead()) {
7679                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7680                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7681         }
7682
7683         /*
7684          * We need to stop all tracing on all CPUS to read the
7685          * the next buffer. This is a bit expensive, but is
7686          * not done often. We fill all what we can read,
7687          * and then release the locks again.
7688          */
7689
7690         while (!trace_empty(&iter)) {
7691
7692                 if (!cnt)
7693                         printk(KERN_TRACE "---------------------------------\n");
7694
7695                 cnt++;
7696
7697                 /* reset all but tr, trace, and overruns */
7698                 memset(&iter.seq, 0,
7699                        sizeof(struct trace_iterator) -
7700                        offsetof(struct trace_iterator, seq));
7701                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7702                 iter.pos = -1;
7703
7704                 if (trace_find_next_entry_inc(&iter) != NULL) {
7705                         int ret;
7706
7707                         ret = print_trace_line(&iter);
7708                         if (ret != TRACE_TYPE_NO_CONSUME)
7709                                 trace_consume(&iter);
7710                 }
7711                 touch_nmi_watchdog();
7712
7713                 trace_printk_seq(&iter.seq);
7714         }
7715
7716         if (!cnt)
7717                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
7718         else
7719                 printk(KERN_TRACE "---------------------------------\n");
7720
7721  out_enable:
7722         tr->trace_flags |= old_userobj;
7723
7724         for_each_tracing_cpu(cpu) {
7725                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7726         }
7727         atomic_dec(&dump_running);
7728         local_irq_restore(flags);
7729 }
7730 EXPORT_SYMBOL_GPL(ftrace_dump);
7731
7732 __init static int tracer_alloc_buffers(void)
7733 {
7734         int ring_buf_size;
7735         int ret = -ENOMEM;
7736
7737         /*
7738          * Make sure we don't accidently add more trace options
7739          * than we have bits for.
7740          */
7741         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7742
7743         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7744                 goto out;
7745
7746         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7747                 goto out_free_buffer_mask;
7748
7749         /* Only allocate trace_printk buffers if a trace_printk exists */
7750         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7751                 /* Must be called before global_trace.buffer is allocated */
7752                 trace_printk_init_buffers();
7753
7754         /* To save memory, keep the ring buffer size to its minimum */
7755         if (ring_buffer_expanded)
7756                 ring_buf_size = trace_buf_size;
7757         else
7758                 ring_buf_size = 1;
7759
7760         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7761         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7762
7763         raw_spin_lock_init(&global_trace.start_lock);
7764
7765         /* Used for event triggers */
7766         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7767         if (!temp_buffer)
7768                 goto out_free_cpumask;
7769
7770         if (trace_create_savedcmd() < 0)
7771                 goto out_free_temp_buffer;
7772
7773         /* TODO: make the number of buffers hot pluggable with CPUS */
7774         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7775                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7776                 WARN_ON(1);
7777                 goto out_free_savedcmd;
7778         }
7779
7780         if (global_trace.buffer_disabled)
7781                 tracing_off();
7782
7783         if (trace_boot_clock) {
7784                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7785                 if (ret < 0)
7786                         pr_warn("Trace clock %s not defined, going back to default\n",
7787                                 trace_boot_clock);
7788         }
7789
7790         /*
7791          * register_tracer() might reference current_trace, so it
7792          * needs to be set before we register anything. This is
7793          * just a bootstrap of current_trace anyway.
7794          */
7795         global_trace.current_trace = &nop_trace;
7796
7797         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7798
7799         ftrace_init_global_array_ops(&global_trace);
7800
7801         init_trace_flags_index(&global_trace);
7802
7803         register_tracer(&nop_trace);
7804
7805         /* All seems OK, enable tracing */
7806         tracing_disabled = 0;
7807
7808         atomic_notifier_chain_register(&panic_notifier_list,
7809                                        &trace_panic_notifier);
7810
7811         register_die_notifier(&trace_die_notifier);
7812
7813         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7814
7815         INIT_LIST_HEAD(&global_trace.systems);
7816         INIT_LIST_HEAD(&global_trace.events);
7817         list_add(&global_trace.list, &ftrace_trace_arrays);
7818
7819         apply_trace_boot_options();
7820
7821         register_snapshot_cmd();
7822
7823         return 0;
7824
7825 out_free_savedcmd:
7826         free_saved_cmdlines_buffer(savedcmd);
7827 out_free_temp_buffer:
7828         ring_buffer_free(temp_buffer);
7829 out_free_cpumask:
7830         free_cpumask_var(global_trace.tracing_cpumask);
7831 out_free_buffer_mask:
7832         free_cpumask_var(tracing_buffer_mask);
7833 out:
7834         return ret;
7835 }
7836
7837 void __init trace_init(void)
7838 {
7839         if (tracepoint_printk) {
7840                 tracepoint_print_iter =
7841                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7842                 if (WARN_ON(!tracepoint_print_iter))
7843                         tracepoint_printk = 0;
7844         }
7845         tracer_alloc_buffers();
7846         trace_event_init();
7847 }
7848
7849 __init static int clear_boot_tracer(void)
7850 {
7851         /*
7852          * The default tracer at boot buffer is an init section.
7853          * This function is called in lateinit. If we did not
7854          * find the boot tracer, then clear it out, to prevent
7855          * later registration from accessing the buffer that is
7856          * about to be freed.
7857          */
7858         if (!default_bootup_tracer)
7859                 return 0;
7860
7861         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7862                default_bootup_tracer);
7863         default_bootup_tracer = NULL;
7864
7865         return 0;
7866 }
7867
7868 fs_initcall(tracer_init_tracefs);
7869 late_initcall(clear_boot_tracer);