tracing: Add ftrace_trace_stack into __trace_puts/__trace_bputs
[platform/adaptation/renesas_rcar/renesas_kernel.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int dummy_set_flag(u32 old_flags, u32 bit, int set)
77 {
78         return 0;
79 }
80
81 /*
82  * To prevent the comm cache from being overwritten when no
83  * tracing is active, only save the comm when a trace event
84  * occurred.
85  */
86 static DEFINE_PER_CPU(bool, trace_cmdline_save);
87
88 /*
89  * Kill all tracing for good (never come back).
90  * It is initialized to 1 but will turn to zero if the initialization
91  * of the tracer is successful. But that is the only place that sets
92  * this back to zero.
93  */
94 static int tracing_disabled = 1;
95
96 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 static int tracing_set_tracer(const char *buf);
122
123 #define MAX_TRACER_SIZE         100
124 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
125 static char *default_bootup_tracer;
126
127 static bool allocate_snapshot;
128
129 static int __init set_cmdline_ftrace(char *str)
130 {
131         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
132         default_bootup_tracer = bootup_tracer_buf;
133         /* We are using ftrace early, expand it */
134         ring_buffer_expanded = true;
135         return 1;
136 }
137 __setup("ftrace=", set_cmdline_ftrace);
138
139 static int __init set_ftrace_dump_on_oops(char *str)
140 {
141         if (*str++ != '=' || !*str) {
142                 ftrace_dump_on_oops = DUMP_ALL;
143                 return 1;
144         }
145
146         if (!strcmp("orig_cpu", str)) {
147                 ftrace_dump_on_oops = DUMP_ORIG;
148                 return 1;
149         }
150
151         return 0;
152 }
153 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
154
155 static int __init stop_trace_on_warning(char *str)
156 {
157         __disable_trace_on_warning = 1;
158         return 1;
159 }
160 __setup("traceoff_on_warning=", stop_trace_on_warning);
161
162 static int __init boot_alloc_snapshot(char *str)
163 {
164         allocate_snapshot = true;
165         /* We also need the main ring buffer expanded */
166         ring_buffer_expanded = true;
167         return 1;
168 }
169 __setup("alloc_snapshot", boot_alloc_snapshot);
170
171
172 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
173 static char *trace_boot_options __initdata;
174
175 static int __init set_trace_boot_options(char *str)
176 {
177         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
178         trace_boot_options = trace_boot_options_buf;
179         return 0;
180 }
181 __setup("trace_options=", set_trace_boot_options);
182
183
184 unsigned long long ns2usecs(cycle_t nsec)
185 {
186         nsec += 500;
187         do_div(nsec, 1000);
188         return nsec;
189 }
190
191 /*
192  * The global_trace is the descriptor that holds the tracing
193  * buffers for the live tracing. For each CPU, it contains
194  * a link list of pages that will store trace entries. The
195  * page descriptor of the pages in the memory is used to hold
196  * the link list by linking the lru item in the page descriptor
197  * to each of the pages in the buffer per CPU.
198  *
199  * For each active CPU there is a data field that holds the
200  * pages for the buffer for that CPU. Each CPU has the same number
201  * of pages allocated for its buffer.
202  */
203 static struct trace_array       global_trace;
204
205 LIST_HEAD(ftrace_trace_arrays);
206
207 int trace_array_get(struct trace_array *this_tr)
208 {
209         struct trace_array *tr;
210         int ret = -ENODEV;
211
212         mutex_lock(&trace_types_lock);
213         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
214                 if (tr == this_tr) {
215                         tr->ref++;
216                         ret = 0;
217                         break;
218                 }
219         }
220         mutex_unlock(&trace_types_lock);
221
222         return ret;
223 }
224
225 static void __trace_array_put(struct trace_array *this_tr)
226 {
227         WARN_ON(!this_tr->ref);
228         this_tr->ref--;
229 }
230
231 void trace_array_put(struct trace_array *this_tr)
232 {
233         mutex_lock(&trace_types_lock);
234         __trace_array_put(this_tr);
235         mutex_unlock(&trace_types_lock);
236 }
237
238 int filter_check_discard(struct ftrace_event_file *file, void *rec,
239                          struct ring_buffer *buffer,
240                          struct ring_buffer_event *event)
241 {
242         if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
243             !filter_match_preds(file->filter, rec)) {
244                 ring_buffer_discard_commit(buffer, event);
245                 return 1;
246         }
247
248         return 0;
249 }
250 EXPORT_SYMBOL_GPL(filter_check_discard);
251
252 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
253                               struct ring_buffer *buffer,
254                               struct ring_buffer_event *event)
255 {
256         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
257             !filter_match_preds(call->filter, rec)) {
258                 ring_buffer_discard_commit(buffer, event);
259                 return 1;
260         }
261
262         return 0;
263 }
264 EXPORT_SYMBOL_GPL(call_filter_check_discard);
265
266 cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
267 {
268         u64 ts;
269
270         /* Early boot up does not have a buffer yet */
271         if (!buf->buffer)
272                 return trace_clock_local();
273
274         ts = ring_buffer_time_stamp(buf->buffer, cpu);
275         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
276
277         return ts;
278 }
279
280 cycle_t ftrace_now(int cpu)
281 {
282         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
283 }
284
285 /**
286  * tracing_is_enabled - Show if global_trace has been disabled
287  *
288  * Shows if the global trace has been enabled or not. It uses the
289  * mirror flag "buffer_disabled" to be used in fast paths such as for
290  * the irqsoff tracer. But it may be inaccurate due to races. If you
291  * need to know the accurate state, use tracing_is_on() which is a little
292  * slower, but accurate.
293  */
294 int tracing_is_enabled(void)
295 {
296         /*
297          * For quick access (irqsoff uses this in fast path), just
298          * return the mirror variable of the state of the ring buffer.
299          * It's a little racy, but we don't really care.
300          */
301         smp_rmb();
302         return !global_trace.buffer_disabled;
303 }
304
305 /*
306  * trace_buf_size is the size in bytes that is allocated
307  * for a buffer. Note, the number of bytes is always rounded
308  * to page size.
309  *
310  * This number is purposely set to a low number of 16384.
311  * If the dump on oops happens, it will be much appreciated
312  * to not have to wait for all that output. Anyway this can be
313  * boot time and run time configurable.
314  */
315 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
316
317 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
318
319 /* trace_types holds a link list of available tracers. */
320 static struct tracer            *trace_types __read_mostly;
321
322 /*
323  * trace_types_lock is used to protect the trace_types list.
324  */
325 DEFINE_MUTEX(trace_types_lock);
326
327 /*
328  * serialize the access of the ring buffer
329  *
330  * ring buffer serializes readers, but it is low level protection.
331  * The validity of the events (which returns by ring_buffer_peek() ..etc)
332  * are not protected by ring buffer.
333  *
334  * The content of events may become garbage if we allow other process consumes
335  * these events concurrently:
336  *   A) the page of the consumed events may become a normal page
337  *      (not reader page) in ring buffer, and this page will be rewrited
338  *      by events producer.
339  *   B) The page of the consumed events may become a page for splice_read,
340  *      and this page will be returned to system.
341  *
342  * These primitives allow multi process access to different cpu ring buffer
343  * concurrently.
344  *
345  * These primitives don't distinguish read-only and read-consume access.
346  * Multi read-only access are also serialized.
347  */
348
349 #ifdef CONFIG_SMP
350 static DECLARE_RWSEM(all_cpu_access_lock);
351 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
352
353 static inline void trace_access_lock(int cpu)
354 {
355         if (cpu == RING_BUFFER_ALL_CPUS) {
356                 /* gain it for accessing the whole ring buffer. */
357                 down_write(&all_cpu_access_lock);
358         } else {
359                 /* gain it for accessing a cpu ring buffer. */
360
361                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
362                 down_read(&all_cpu_access_lock);
363
364                 /* Secondly block other access to this @cpu ring buffer. */
365                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
366         }
367 }
368
369 static inline void trace_access_unlock(int cpu)
370 {
371         if (cpu == RING_BUFFER_ALL_CPUS) {
372                 up_write(&all_cpu_access_lock);
373         } else {
374                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
375                 up_read(&all_cpu_access_lock);
376         }
377 }
378
379 static inline void trace_access_lock_init(void)
380 {
381         int cpu;
382
383         for_each_possible_cpu(cpu)
384                 mutex_init(&per_cpu(cpu_access_lock, cpu));
385 }
386
387 #else
388
389 static DEFINE_MUTEX(access_lock);
390
391 static inline void trace_access_lock(int cpu)
392 {
393         (void)cpu;
394         mutex_lock(&access_lock);
395 }
396
397 static inline void trace_access_unlock(int cpu)
398 {
399         (void)cpu;
400         mutex_unlock(&access_lock);
401 }
402
403 static inline void trace_access_lock_init(void)
404 {
405 }
406
407 #endif
408
409 /* trace_flags holds trace_options default values */
410 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
411         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
412         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
413         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
414
415 static void tracer_tracing_on(struct trace_array *tr)
416 {
417         if (tr->trace_buffer.buffer)
418                 ring_buffer_record_on(tr->trace_buffer.buffer);
419         /*
420          * This flag is looked at when buffers haven't been allocated
421          * yet, or by some tracers (like irqsoff), that just want to
422          * know if the ring buffer has been disabled, but it can handle
423          * races of where it gets disabled but we still do a record.
424          * As the check is in the fast path of the tracers, it is more
425          * important to be fast than accurate.
426          */
427         tr->buffer_disabled = 0;
428         /* Make the flag seen by readers */
429         smp_wmb();
430 }
431
432 /**
433  * tracing_on - enable tracing buffers
434  *
435  * This function enables tracing buffers that may have been
436  * disabled with tracing_off.
437  */
438 void tracing_on(void)
439 {
440         tracer_tracing_on(&global_trace);
441 }
442 EXPORT_SYMBOL_GPL(tracing_on);
443
444 /**
445  * __trace_puts - write a constant string into the trace buffer.
446  * @ip:    The address of the caller
447  * @str:   The constant string to write
448  * @size:  The size of the string.
449  */
450 int __trace_puts(unsigned long ip, const char *str, int size)
451 {
452         struct ring_buffer_event *event;
453         struct ring_buffer *buffer;
454         struct print_entry *entry;
455         unsigned long irq_flags;
456         int alloc;
457         int pc;
458
459         pc = preempt_count();
460
461         if (unlikely(tracing_selftest_running || tracing_disabled))
462                 return 0;
463
464         alloc = sizeof(*entry) + size + 2; /* possible \n added */
465
466         local_save_flags(irq_flags);
467         buffer = global_trace.trace_buffer.buffer;
468         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
469                                           irq_flags, pc);
470         if (!event)
471                 return 0;
472
473         entry = ring_buffer_event_data(event);
474         entry->ip = ip;
475
476         memcpy(&entry->buf, str, size);
477
478         /* Add a newline if necessary */
479         if (entry->buf[size - 1] != '\n') {
480                 entry->buf[size] = '\n';
481                 entry->buf[size + 1] = '\0';
482         } else
483                 entry->buf[size] = '\0';
484
485         __buffer_unlock_commit(buffer, event);
486         ftrace_trace_stack(buffer, irq_flags, 4, pc);
487
488         return size;
489 }
490 EXPORT_SYMBOL_GPL(__trace_puts);
491
492 /**
493  * __trace_bputs - write the pointer to a constant string into trace buffer
494  * @ip:    The address of the caller
495  * @str:   The constant string to write to the buffer to
496  */
497 int __trace_bputs(unsigned long ip, const char *str)
498 {
499         struct ring_buffer_event *event;
500         struct ring_buffer *buffer;
501         struct bputs_entry *entry;
502         unsigned long irq_flags;
503         int size = sizeof(struct bputs_entry);
504         int pc;
505
506         pc = preempt_count();
507
508         if (unlikely(tracing_selftest_running || tracing_disabled))
509                 return 0;
510
511         local_save_flags(irq_flags);
512         buffer = global_trace.trace_buffer.buffer;
513         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
514                                           irq_flags, pc);
515         if (!event)
516                 return 0;
517
518         entry = ring_buffer_event_data(event);
519         entry->ip                       = ip;
520         entry->str                      = str;
521
522         __buffer_unlock_commit(buffer, event);
523         ftrace_trace_stack(buffer, irq_flags, 4, pc);
524
525         return 1;
526 }
527 EXPORT_SYMBOL_GPL(__trace_bputs);
528
529 #ifdef CONFIG_TRACER_SNAPSHOT
530 /**
531  * trace_snapshot - take a snapshot of the current buffer.
532  *
533  * This causes a swap between the snapshot buffer and the current live
534  * tracing buffer. You can use this to take snapshots of the live
535  * trace when some condition is triggered, but continue to trace.
536  *
537  * Note, make sure to allocate the snapshot with either
538  * a tracing_snapshot_alloc(), or by doing it manually
539  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
540  *
541  * If the snapshot buffer is not allocated, it will stop tracing.
542  * Basically making a permanent snapshot.
543  */
544 void tracing_snapshot(void)
545 {
546         struct trace_array *tr = &global_trace;
547         struct tracer *tracer = tr->current_trace;
548         unsigned long flags;
549
550         if (in_nmi()) {
551                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
552                 internal_trace_puts("*** snapshot is being ignored        ***\n");
553                 return;
554         }
555
556         if (!tr->allocated_snapshot) {
557                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
558                 internal_trace_puts("*** stopping trace here!   ***\n");
559                 tracing_off();
560                 return;
561         }
562
563         /* Note, snapshot can not be used when the tracer uses it */
564         if (tracer->use_max_tr) {
565                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
566                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
567                 return;
568         }
569
570         local_irq_save(flags);
571         update_max_tr(tr, current, smp_processor_id());
572         local_irq_restore(flags);
573 }
574 EXPORT_SYMBOL_GPL(tracing_snapshot);
575
576 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
577                                         struct trace_buffer *size_buf, int cpu_id);
578 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
579
580 static int alloc_snapshot(struct trace_array *tr)
581 {
582         int ret;
583
584         if (!tr->allocated_snapshot) {
585
586                 /* allocate spare buffer */
587                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
588                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
589                 if (ret < 0)
590                         return ret;
591
592                 tr->allocated_snapshot = true;
593         }
594
595         return 0;
596 }
597
598 void free_snapshot(struct trace_array *tr)
599 {
600         /*
601          * We don't free the ring buffer. instead, resize it because
602          * The max_tr ring buffer has some state (e.g. ring->clock) and
603          * we want preserve it.
604          */
605         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
606         set_buffer_entries(&tr->max_buffer, 1);
607         tracing_reset_online_cpus(&tr->max_buffer);
608         tr->allocated_snapshot = false;
609 }
610
611 /**
612  * tracing_alloc_snapshot - allocate snapshot buffer.
613  *
614  * This only allocates the snapshot buffer if it isn't already
615  * allocated - it doesn't also take a snapshot.
616  *
617  * This is meant to be used in cases where the snapshot buffer needs
618  * to be set up for events that can't sleep but need to be able to
619  * trigger a snapshot.
620  */
621 int tracing_alloc_snapshot(void)
622 {
623         struct trace_array *tr = &global_trace;
624         int ret;
625
626         ret = alloc_snapshot(tr);
627         WARN_ON(ret < 0);
628
629         return ret;
630 }
631 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
632
633 /**
634  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
635  *
636  * This is similar to trace_snapshot(), but it will allocate the
637  * snapshot buffer if it isn't already allocated. Use this only
638  * where it is safe to sleep, as the allocation may sleep.
639  *
640  * This causes a swap between the snapshot buffer and the current live
641  * tracing buffer. You can use this to take snapshots of the live
642  * trace when some condition is triggered, but continue to trace.
643  */
644 void tracing_snapshot_alloc(void)
645 {
646         int ret;
647
648         ret = tracing_alloc_snapshot();
649         if (ret < 0)
650                 return;
651
652         tracing_snapshot();
653 }
654 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
655 #else
656 void tracing_snapshot(void)
657 {
658         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
659 }
660 EXPORT_SYMBOL_GPL(tracing_snapshot);
661 int tracing_alloc_snapshot(void)
662 {
663         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
664         return -ENODEV;
665 }
666 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
667 void tracing_snapshot_alloc(void)
668 {
669         /* Give warning */
670         tracing_snapshot();
671 }
672 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
673 #endif /* CONFIG_TRACER_SNAPSHOT */
674
675 static void tracer_tracing_off(struct trace_array *tr)
676 {
677         if (tr->trace_buffer.buffer)
678                 ring_buffer_record_off(tr->trace_buffer.buffer);
679         /*
680          * This flag is looked at when buffers haven't been allocated
681          * yet, or by some tracers (like irqsoff), that just want to
682          * know if the ring buffer has been disabled, but it can handle
683          * races of where it gets disabled but we still do a record.
684          * As the check is in the fast path of the tracers, it is more
685          * important to be fast than accurate.
686          */
687         tr->buffer_disabled = 1;
688         /* Make the flag seen by readers */
689         smp_wmb();
690 }
691
692 /**
693  * tracing_off - turn off tracing buffers
694  *
695  * This function stops the tracing buffers from recording data.
696  * It does not disable any overhead the tracers themselves may
697  * be causing. This function simply causes all recording to
698  * the ring buffers to fail.
699  */
700 void tracing_off(void)
701 {
702         tracer_tracing_off(&global_trace);
703 }
704 EXPORT_SYMBOL_GPL(tracing_off);
705
706 void disable_trace_on_warning(void)
707 {
708         if (__disable_trace_on_warning)
709                 tracing_off();
710 }
711
712 /**
713  * tracer_tracing_is_on - show real state of ring buffer enabled
714  * @tr : the trace array to know if ring buffer is enabled
715  *
716  * Shows real state of the ring buffer if it is enabled or not.
717  */
718 static int tracer_tracing_is_on(struct trace_array *tr)
719 {
720         if (tr->trace_buffer.buffer)
721                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
722         return !tr->buffer_disabled;
723 }
724
725 /**
726  * tracing_is_on - show state of ring buffers enabled
727  */
728 int tracing_is_on(void)
729 {
730         return tracer_tracing_is_on(&global_trace);
731 }
732 EXPORT_SYMBOL_GPL(tracing_is_on);
733
734 static int __init set_buf_size(char *str)
735 {
736         unsigned long buf_size;
737
738         if (!str)
739                 return 0;
740         buf_size = memparse(str, &str);
741         /* nr_entries can not be zero */
742         if (buf_size == 0)
743                 return 0;
744         trace_buf_size = buf_size;
745         return 1;
746 }
747 __setup("trace_buf_size=", set_buf_size);
748
749 static int __init set_tracing_thresh(char *str)
750 {
751         unsigned long threshold;
752         int ret;
753
754         if (!str)
755                 return 0;
756         ret = kstrtoul(str, 0, &threshold);
757         if (ret < 0)
758                 return 0;
759         tracing_thresh = threshold * 1000;
760         return 1;
761 }
762 __setup("tracing_thresh=", set_tracing_thresh);
763
764 unsigned long nsecs_to_usecs(unsigned long nsecs)
765 {
766         return nsecs / 1000;
767 }
768
769 /* These must match the bit postions in trace_iterator_flags */
770 static const char *trace_options[] = {
771         "print-parent",
772         "sym-offset",
773         "sym-addr",
774         "verbose",
775         "raw",
776         "hex",
777         "bin",
778         "block",
779         "stacktrace",
780         "trace_printk",
781         "ftrace_preempt",
782         "branch",
783         "annotate",
784         "userstacktrace",
785         "sym-userobj",
786         "printk-msg-only",
787         "context-info",
788         "latency-format",
789         "sleep-time",
790         "graph-time",
791         "record-cmd",
792         "overwrite",
793         "disable_on_free",
794         "irq-info",
795         "markers",
796         "function-trace",
797         NULL
798 };
799
800 static struct {
801         u64 (*func)(void);
802         const char *name;
803         int in_ns;              /* is this clock in nanoseconds? */
804 } trace_clocks[] = {
805         { trace_clock_local,    "local",        1 },
806         { trace_clock_global,   "global",       1 },
807         { trace_clock_counter,  "counter",      0 },
808         { trace_clock_jiffies,  "uptime",       1 },
809         { trace_clock,          "perf",         1 },
810         ARCH_TRACE_CLOCKS
811 };
812
813 /*
814  * trace_parser_get_init - gets the buffer for trace parser
815  */
816 int trace_parser_get_init(struct trace_parser *parser, int size)
817 {
818         memset(parser, 0, sizeof(*parser));
819
820         parser->buffer = kmalloc(size, GFP_KERNEL);
821         if (!parser->buffer)
822                 return 1;
823
824         parser->size = size;
825         return 0;
826 }
827
828 /*
829  * trace_parser_put - frees the buffer for trace parser
830  */
831 void trace_parser_put(struct trace_parser *parser)
832 {
833         kfree(parser->buffer);
834 }
835
836 /*
837  * trace_get_user - reads the user input string separated by  space
838  * (matched by isspace(ch))
839  *
840  * For each string found the 'struct trace_parser' is updated,
841  * and the function returns.
842  *
843  * Returns number of bytes read.
844  *
845  * See kernel/trace/trace.h for 'struct trace_parser' details.
846  */
847 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
848         size_t cnt, loff_t *ppos)
849 {
850         char ch;
851         size_t read = 0;
852         ssize_t ret;
853
854         if (!*ppos)
855                 trace_parser_clear(parser);
856
857         ret = get_user(ch, ubuf++);
858         if (ret)
859                 goto out;
860
861         read++;
862         cnt--;
863
864         /*
865          * The parser is not finished with the last write,
866          * continue reading the user input without skipping spaces.
867          */
868         if (!parser->cont) {
869                 /* skip white space */
870                 while (cnt && isspace(ch)) {
871                         ret = get_user(ch, ubuf++);
872                         if (ret)
873                                 goto out;
874                         read++;
875                         cnt--;
876                 }
877
878                 /* only spaces were written */
879                 if (isspace(ch)) {
880                         *ppos += read;
881                         ret = read;
882                         goto out;
883                 }
884
885                 parser->idx = 0;
886         }
887
888         /* read the non-space input */
889         while (cnt && !isspace(ch)) {
890                 if (parser->idx < parser->size - 1)
891                         parser->buffer[parser->idx++] = ch;
892                 else {
893                         ret = -EINVAL;
894                         goto out;
895                 }
896                 ret = get_user(ch, ubuf++);
897                 if (ret)
898                         goto out;
899                 read++;
900                 cnt--;
901         }
902
903         /* We either got finished input or we have to wait for another call. */
904         if (isspace(ch)) {
905                 parser->buffer[parser->idx] = 0;
906                 parser->cont = false;
907         } else if (parser->idx < parser->size - 1) {
908                 parser->cont = true;
909                 parser->buffer[parser->idx++] = ch;
910         } else {
911                 ret = -EINVAL;
912                 goto out;
913         }
914
915         *ppos += read;
916         ret = read;
917
918 out:
919         return ret;
920 }
921
922 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
923 {
924         int len;
925         int ret;
926
927         if (!cnt)
928                 return 0;
929
930         if (s->len <= s->readpos)
931                 return -EBUSY;
932
933         len = s->len - s->readpos;
934         if (cnt > len)
935                 cnt = len;
936         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
937         if (ret == cnt)
938                 return -EFAULT;
939
940         cnt -= ret;
941
942         s->readpos += cnt;
943         return cnt;
944 }
945
946 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
947 {
948         int len;
949
950         if (s->len <= s->readpos)
951                 return -EBUSY;
952
953         len = s->len - s->readpos;
954         if (cnt > len)
955                 cnt = len;
956         memcpy(buf, s->buffer + s->readpos, cnt);
957
958         s->readpos += cnt;
959         return cnt;
960 }
961
962 /*
963  * ftrace_max_lock is used to protect the swapping of buffers
964  * when taking a max snapshot. The buffers themselves are
965  * protected by per_cpu spinlocks. But the action of the swap
966  * needs its own lock.
967  *
968  * This is defined as a arch_spinlock_t in order to help
969  * with performance when lockdep debugging is enabled.
970  *
971  * It is also used in other places outside the update_max_tr
972  * so it needs to be defined outside of the
973  * CONFIG_TRACER_MAX_TRACE.
974  */
975 static arch_spinlock_t ftrace_max_lock =
976         (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
977
978 unsigned long __read_mostly     tracing_thresh;
979
980 #ifdef CONFIG_TRACER_MAX_TRACE
981 unsigned long __read_mostly     tracing_max_latency;
982
983 /*
984  * Copy the new maximum trace into the separate maximum-trace
985  * structure. (this way the maximum trace is permanently saved,
986  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
987  */
988 static void
989 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
990 {
991         struct trace_buffer *trace_buf = &tr->trace_buffer;
992         struct trace_buffer *max_buf = &tr->max_buffer;
993         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
994         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
995
996         max_buf->cpu = cpu;
997         max_buf->time_start = data->preempt_timestamp;
998
999         max_data->saved_latency = tracing_max_latency;
1000         max_data->critical_start = data->critical_start;
1001         max_data->critical_end = data->critical_end;
1002
1003         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1004         max_data->pid = tsk->pid;
1005         /*
1006          * If tsk == current, then use current_uid(), as that does not use
1007          * RCU. The irq tracer can be called out of RCU scope.
1008          */
1009         if (tsk == current)
1010                 max_data->uid = current_uid();
1011         else
1012                 max_data->uid = task_uid(tsk);
1013
1014         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1015         max_data->policy = tsk->policy;
1016         max_data->rt_priority = tsk->rt_priority;
1017
1018         /* record this tasks comm */
1019         tracing_record_cmdline(tsk);
1020 }
1021
1022 /**
1023  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1024  * @tr: tracer
1025  * @tsk: the task with the latency
1026  * @cpu: The cpu that initiated the trace.
1027  *
1028  * Flip the buffers between the @tr and the max_tr and record information
1029  * about which task was the cause of this latency.
1030  */
1031 void
1032 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1033 {
1034         struct ring_buffer *buf;
1035
1036         if (tr->stop_count)
1037                 return;
1038
1039         WARN_ON_ONCE(!irqs_disabled());
1040
1041         if (!tr->allocated_snapshot) {
1042                 /* Only the nop tracer should hit this when disabling */
1043                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1044                 return;
1045         }
1046
1047         arch_spin_lock(&ftrace_max_lock);
1048
1049         buf = tr->trace_buffer.buffer;
1050         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1051         tr->max_buffer.buffer = buf;
1052
1053         __update_max_tr(tr, tsk, cpu);
1054         arch_spin_unlock(&ftrace_max_lock);
1055 }
1056
1057 /**
1058  * update_max_tr_single - only copy one trace over, and reset the rest
1059  * @tr - tracer
1060  * @tsk - task with the latency
1061  * @cpu - the cpu of the buffer to copy.
1062  *
1063  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1064  */
1065 void
1066 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1067 {
1068         int ret;
1069
1070         if (tr->stop_count)
1071                 return;
1072
1073         WARN_ON_ONCE(!irqs_disabled());
1074         if (!tr->allocated_snapshot) {
1075                 /* Only the nop tracer should hit this when disabling */
1076                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1077                 return;
1078         }
1079
1080         arch_spin_lock(&ftrace_max_lock);
1081
1082         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1083
1084         if (ret == -EBUSY) {
1085                 /*
1086                  * We failed to swap the buffer due to a commit taking
1087                  * place on this CPU. We fail to record, but we reset
1088                  * the max trace buffer (no one writes directly to it)
1089                  * and flag that it failed.
1090                  */
1091                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1092                         "Failed to swap buffers due to commit in progress\n");
1093         }
1094
1095         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1096
1097         __update_max_tr(tr, tsk, cpu);
1098         arch_spin_unlock(&ftrace_max_lock);
1099 }
1100 #endif /* CONFIG_TRACER_MAX_TRACE */
1101
1102 static int default_wait_pipe(struct trace_iterator *iter)
1103 {
1104         /* Iterators are static, they should be filled or empty */
1105         if (trace_buffer_iter(iter, iter->cpu_file))
1106                 return 0;
1107
1108         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
1109 }
1110
1111 #ifdef CONFIG_FTRACE_STARTUP_TEST
1112 static int run_tracer_selftest(struct tracer *type)
1113 {
1114         struct trace_array *tr = &global_trace;
1115         struct tracer *saved_tracer = tr->current_trace;
1116         int ret;
1117
1118         if (!type->selftest || tracing_selftest_disabled)
1119                 return 0;
1120
1121         /*
1122          * Run a selftest on this tracer.
1123          * Here we reset the trace buffer, and set the current
1124          * tracer to be this tracer. The tracer can then run some
1125          * internal tracing to verify that everything is in order.
1126          * If we fail, we do not register this tracer.
1127          */
1128         tracing_reset_online_cpus(&tr->trace_buffer);
1129
1130         tr->current_trace = type;
1131
1132 #ifdef CONFIG_TRACER_MAX_TRACE
1133         if (type->use_max_tr) {
1134                 /* If we expanded the buffers, make sure the max is expanded too */
1135                 if (ring_buffer_expanded)
1136                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1137                                            RING_BUFFER_ALL_CPUS);
1138                 tr->allocated_snapshot = true;
1139         }
1140 #endif
1141
1142         /* the test is responsible for initializing and enabling */
1143         pr_info("Testing tracer %s: ", type->name);
1144         ret = type->selftest(type, tr);
1145         /* the test is responsible for resetting too */
1146         tr->current_trace = saved_tracer;
1147         if (ret) {
1148                 printk(KERN_CONT "FAILED!\n");
1149                 /* Add the warning after printing 'FAILED' */
1150                 WARN_ON(1);
1151                 return -1;
1152         }
1153         /* Only reset on passing, to avoid touching corrupted buffers */
1154         tracing_reset_online_cpus(&tr->trace_buffer);
1155
1156 #ifdef CONFIG_TRACER_MAX_TRACE
1157         if (type->use_max_tr) {
1158                 tr->allocated_snapshot = false;
1159
1160                 /* Shrink the max buffer again */
1161                 if (ring_buffer_expanded)
1162                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1163                                            RING_BUFFER_ALL_CPUS);
1164         }
1165 #endif
1166
1167         printk(KERN_CONT "PASSED\n");
1168         return 0;
1169 }
1170 #else
1171 static inline int run_tracer_selftest(struct tracer *type)
1172 {
1173         return 0;
1174 }
1175 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1176
1177 /**
1178  * register_tracer - register a tracer with the ftrace system.
1179  * @type - the plugin for the tracer
1180  *
1181  * Register a new plugin tracer.
1182  */
1183 int register_tracer(struct tracer *type)
1184 {
1185         struct tracer *t;
1186         int ret = 0;
1187
1188         if (!type->name) {
1189                 pr_info("Tracer must have a name\n");
1190                 return -1;
1191         }
1192
1193         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1194                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1195                 return -1;
1196         }
1197
1198         mutex_lock(&trace_types_lock);
1199
1200         tracing_selftest_running = true;
1201
1202         for (t = trace_types; t; t = t->next) {
1203                 if (strcmp(type->name, t->name) == 0) {
1204                         /* already found */
1205                         pr_info("Tracer %s already registered\n",
1206                                 type->name);
1207                         ret = -1;
1208                         goto out;
1209                 }
1210         }
1211
1212         if (!type->set_flag)
1213                 type->set_flag = &dummy_set_flag;
1214         if (!type->flags)
1215                 type->flags = &dummy_tracer_flags;
1216         else
1217                 if (!type->flags->opts)
1218                         type->flags->opts = dummy_tracer_opt;
1219         if (!type->wait_pipe)
1220                 type->wait_pipe = default_wait_pipe;
1221
1222         ret = run_tracer_selftest(type);
1223         if (ret < 0)
1224                 goto out;
1225
1226         type->next = trace_types;
1227         trace_types = type;
1228
1229  out:
1230         tracing_selftest_running = false;
1231         mutex_unlock(&trace_types_lock);
1232
1233         if (ret || !default_bootup_tracer)
1234                 goto out_unlock;
1235
1236         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1237                 goto out_unlock;
1238
1239         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1240         /* Do we want this tracer to start on bootup? */
1241         tracing_set_tracer(type->name);
1242         default_bootup_tracer = NULL;
1243         /* disable other selftests, since this will break it. */
1244         tracing_selftest_disabled = true;
1245 #ifdef CONFIG_FTRACE_STARTUP_TEST
1246         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1247                type->name);
1248 #endif
1249
1250  out_unlock:
1251         return ret;
1252 }
1253
1254 void tracing_reset(struct trace_buffer *buf, int cpu)
1255 {
1256         struct ring_buffer *buffer = buf->buffer;
1257
1258         if (!buffer)
1259                 return;
1260
1261         ring_buffer_record_disable(buffer);
1262
1263         /* Make sure all commits have finished */
1264         synchronize_sched();
1265         ring_buffer_reset_cpu(buffer, cpu);
1266
1267         ring_buffer_record_enable(buffer);
1268 }
1269
1270 void tracing_reset_online_cpus(struct trace_buffer *buf)
1271 {
1272         struct ring_buffer *buffer = buf->buffer;
1273         int cpu;
1274
1275         if (!buffer)
1276                 return;
1277
1278         ring_buffer_record_disable(buffer);
1279
1280         /* Make sure all commits have finished */
1281         synchronize_sched();
1282
1283         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1284
1285         for_each_online_cpu(cpu)
1286                 ring_buffer_reset_cpu(buffer, cpu);
1287
1288         ring_buffer_record_enable(buffer);
1289 }
1290
1291 /* Must have trace_types_lock held */
1292 void tracing_reset_all_online_cpus(void)
1293 {
1294         struct trace_array *tr;
1295
1296         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1297                 tracing_reset_online_cpus(&tr->trace_buffer);
1298 #ifdef CONFIG_TRACER_MAX_TRACE
1299                 tracing_reset_online_cpus(&tr->max_buffer);
1300 #endif
1301         }
1302 }
1303
1304 #define SAVED_CMDLINES 128
1305 #define NO_CMDLINE_MAP UINT_MAX
1306 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1307 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
1308 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
1309 static int cmdline_idx;
1310 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1311
1312 /* temporary disable recording */
1313 static atomic_t trace_record_cmdline_disabled __read_mostly;
1314
1315 static void trace_init_cmdlines(void)
1316 {
1317         memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
1318         memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
1319         cmdline_idx = 0;
1320 }
1321
1322 int is_tracing_stopped(void)
1323 {
1324         return global_trace.stop_count;
1325 }
1326
1327 /**
1328  * tracing_start - quick start of the tracer
1329  *
1330  * If tracing is enabled but was stopped by tracing_stop,
1331  * this will start the tracer back up.
1332  */
1333 void tracing_start(void)
1334 {
1335         struct ring_buffer *buffer;
1336         unsigned long flags;
1337
1338         if (tracing_disabled)
1339                 return;
1340
1341         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1342         if (--global_trace.stop_count) {
1343                 if (global_trace.stop_count < 0) {
1344                         /* Someone screwed up their debugging */
1345                         WARN_ON_ONCE(1);
1346                         global_trace.stop_count = 0;
1347                 }
1348                 goto out;
1349         }
1350
1351         /* Prevent the buffers from switching */
1352         arch_spin_lock(&ftrace_max_lock);
1353
1354         buffer = global_trace.trace_buffer.buffer;
1355         if (buffer)
1356                 ring_buffer_record_enable(buffer);
1357
1358 #ifdef CONFIG_TRACER_MAX_TRACE
1359         buffer = global_trace.max_buffer.buffer;
1360         if (buffer)
1361                 ring_buffer_record_enable(buffer);
1362 #endif
1363
1364         arch_spin_unlock(&ftrace_max_lock);
1365
1366  out:
1367         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1368 }
1369
1370 static void tracing_start_tr(struct trace_array *tr)
1371 {
1372         struct ring_buffer *buffer;
1373         unsigned long flags;
1374
1375         if (tracing_disabled)
1376                 return;
1377
1378         /* If global, we need to also start the max tracer */
1379         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1380                 return tracing_start();
1381
1382         raw_spin_lock_irqsave(&tr->start_lock, flags);
1383
1384         if (--tr->stop_count) {
1385                 if (tr->stop_count < 0) {
1386                         /* Someone screwed up their debugging */
1387                         WARN_ON_ONCE(1);
1388                         tr->stop_count = 0;
1389                 }
1390                 goto out;
1391         }
1392
1393         buffer = tr->trace_buffer.buffer;
1394         if (buffer)
1395                 ring_buffer_record_enable(buffer);
1396
1397  out:
1398         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1399 }
1400
1401 /**
1402  * tracing_stop - quick stop of the tracer
1403  *
1404  * Light weight way to stop tracing. Use in conjunction with
1405  * tracing_start.
1406  */
1407 void tracing_stop(void)
1408 {
1409         struct ring_buffer *buffer;
1410         unsigned long flags;
1411
1412         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1413         if (global_trace.stop_count++)
1414                 goto out;
1415
1416         /* Prevent the buffers from switching */
1417         arch_spin_lock(&ftrace_max_lock);
1418
1419         buffer = global_trace.trace_buffer.buffer;
1420         if (buffer)
1421                 ring_buffer_record_disable(buffer);
1422
1423 #ifdef CONFIG_TRACER_MAX_TRACE
1424         buffer = global_trace.max_buffer.buffer;
1425         if (buffer)
1426                 ring_buffer_record_disable(buffer);
1427 #endif
1428
1429         arch_spin_unlock(&ftrace_max_lock);
1430
1431  out:
1432         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1433 }
1434
1435 static void tracing_stop_tr(struct trace_array *tr)
1436 {
1437         struct ring_buffer *buffer;
1438         unsigned long flags;
1439
1440         /* If global, we need to also stop the max tracer */
1441         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1442                 return tracing_stop();
1443
1444         raw_spin_lock_irqsave(&tr->start_lock, flags);
1445         if (tr->stop_count++)
1446                 goto out;
1447
1448         buffer = tr->trace_buffer.buffer;
1449         if (buffer)
1450                 ring_buffer_record_disable(buffer);
1451
1452  out:
1453         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1454 }
1455
1456 void trace_stop_cmdline_recording(void);
1457
1458 static int trace_save_cmdline(struct task_struct *tsk)
1459 {
1460         unsigned pid, idx;
1461
1462         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1463                 return 0;
1464
1465         /*
1466          * It's not the end of the world if we don't get
1467          * the lock, but we also don't want to spin
1468          * nor do we want to disable interrupts,
1469          * so if we miss here, then better luck next time.
1470          */
1471         if (!arch_spin_trylock(&trace_cmdline_lock))
1472                 return 0;
1473
1474         idx = map_pid_to_cmdline[tsk->pid];
1475         if (idx == NO_CMDLINE_MAP) {
1476                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
1477
1478                 /*
1479                  * Check whether the cmdline buffer at idx has a pid
1480                  * mapped. We are going to overwrite that entry so we
1481                  * need to clear the map_pid_to_cmdline. Otherwise we
1482                  * would read the new comm for the old pid.
1483                  */
1484                 pid = map_cmdline_to_pid[idx];
1485                 if (pid != NO_CMDLINE_MAP)
1486                         map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1487
1488                 map_cmdline_to_pid[idx] = tsk->pid;
1489                 map_pid_to_cmdline[tsk->pid] = idx;
1490
1491                 cmdline_idx = idx;
1492         }
1493
1494         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
1495
1496         arch_spin_unlock(&trace_cmdline_lock);
1497
1498         return 1;
1499 }
1500
1501 void trace_find_cmdline(int pid, char comm[])
1502 {
1503         unsigned map;
1504
1505         if (!pid) {
1506                 strcpy(comm, "<idle>");
1507                 return;
1508         }
1509
1510         if (WARN_ON_ONCE(pid < 0)) {
1511                 strcpy(comm, "<XXX>");
1512                 return;
1513         }
1514
1515         if (pid > PID_MAX_DEFAULT) {
1516                 strcpy(comm, "<...>");
1517                 return;
1518         }
1519
1520         preempt_disable();
1521         arch_spin_lock(&trace_cmdline_lock);
1522         map = map_pid_to_cmdline[pid];
1523         if (map != NO_CMDLINE_MAP)
1524                 strcpy(comm, saved_cmdlines[map]);
1525         else
1526                 strcpy(comm, "<...>");
1527
1528         arch_spin_unlock(&trace_cmdline_lock);
1529         preempt_enable();
1530 }
1531
1532 void tracing_record_cmdline(struct task_struct *tsk)
1533 {
1534         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1535                 return;
1536
1537         if (!__this_cpu_read(trace_cmdline_save))
1538                 return;
1539
1540         if (trace_save_cmdline(tsk))
1541                 __this_cpu_write(trace_cmdline_save, false);
1542 }
1543
1544 void
1545 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1546                              int pc)
1547 {
1548         struct task_struct *tsk = current;
1549
1550         entry->preempt_count            = pc & 0xff;
1551         entry->pid                      = (tsk) ? tsk->pid : 0;
1552         entry->flags =
1553 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1554                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1555 #else
1556                 TRACE_FLAG_IRQS_NOSUPPORT |
1557 #endif
1558                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1559                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1560                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1561                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1562 }
1563 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1564
1565 struct ring_buffer_event *
1566 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1567                           int type,
1568                           unsigned long len,
1569                           unsigned long flags, int pc)
1570 {
1571         struct ring_buffer_event *event;
1572
1573         event = ring_buffer_lock_reserve(buffer, len);
1574         if (event != NULL) {
1575                 struct trace_entry *ent = ring_buffer_event_data(event);
1576
1577                 tracing_generic_entry_update(ent, flags, pc);
1578                 ent->type = type;
1579         }
1580
1581         return event;
1582 }
1583
1584 void
1585 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1586 {
1587         __this_cpu_write(trace_cmdline_save, true);
1588         ring_buffer_unlock_commit(buffer, event);
1589 }
1590
1591 static inline void
1592 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1593                              struct ring_buffer_event *event,
1594                              unsigned long flags, int pc)
1595 {
1596         __buffer_unlock_commit(buffer, event);
1597
1598         ftrace_trace_stack(buffer, flags, 6, pc);
1599         ftrace_trace_userstack(buffer, flags, pc);
1600 }
1601
1602 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1603                                 struct ring_buffer_event *event,
1604                                 unsigned long flags, int pc)
1605 {
1606         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1607 }
1608 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1609
1610 static struct ring_buffer *temp_buffer;
1611
1612 struct ring_buffer_event *
1613 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1614                           struct ftrace_event_file *ftrace_file,
1615                           int type, unsigned long len,
1616                           unsigned long flags, int pc)
1617 {
1618         struct ring_buffer_event *entry;
1619
1620         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1621         entry = trace_buffer_lock_reserve(*current_rb,
1622                                          type, len, flags, pc);
1623         /*
1624          * If tracing is off, but we have triggers enabled
1625          * we still need to look at the event data. Use the temp_buffer
1626          * to store the trace event for the tigger to use. It's recusive
1627          * safe and will not be recorded anywhere.
1628          */
1629         if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1630                 *current_rb = temp_buffer;
1631                 entry = trace_buffer_lock_reserve(*current_rb,
1632                                                   type, len, flags, pc);
1633         }
1634         return entry;
1635 }
1636 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1637
1638 struct ring_buffer_event *
1639 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1640                                   int type, unsigned long len,
1641                                   unsigned long flags, int pc)
1642 {
1643         *current_rb = global_trace.trace_buffer.buffer;
1644         return trace_buffer_lock_reserve(*current_rb,
1645                                          type, len, flags, pc);
1646 }
1647 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1648
1649 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1650                                         struct ring_buffer_event *event,
1651                                         unsigned long flags, int pc)
1652 {
1653         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1654 }
1655 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1656
1657 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1658                                      struct ring_buffer_event *event,
1659                                      unsigned long flags, int pc,
1660                                      struct pt_regs *regs)
1661 {
1662         __buffer_unlock_commit(buffer, event);
1663
1664         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1665         ftrace_trace_userstack(buffer, flags, pc);
1666 }
1667 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1668
1669 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1670                                          struct ring_buffer_event *event)
1671 {
1672         ring_buffer_discard_commit(buffer, event);
1673 }
1674 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1675
1676 void
1677 trace_function(struct trace_array *tr,
1678                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1679                int pc)
1680 {
1681         struct ftrace_event_call *call = &event_function;
1682         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1683         struct ring_buffer_event *event;
1684         struct ftrace_entry *entry;
1685
1686         /* If we are reading the ring buffer, don't trace */
1687         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1688                 return;
1689
1690         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1691                                           flags, pc);
1692         if (!event)
1693                 return;
1694         entry   = ring_buffer_event_data(event);
1695         entry->ip                       = ip;
1696         entry->parent_ip                = parent_ip;
1697
1698         if (!call_filter_check_discard(call, entry, buffer, event))
1699                 __buffer_unlock_commit(buffer, event);
1700 }
1701
1702 #ifdef CONFIG_STACKTRACE
1703
1704 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1705 struct ftrace_stack {
1706         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1707 };
1708
1709 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1710 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1711
1712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1713                                  unsigned long flags,
1714                                  int skip, int pc, struct pt_regs *regs)
1715 {
1716         struct ftrace_event_call *call = &event_kernel_stack;
1717         struct ring_buffer_event *event;
1718         struct stack_entry *entry;
1719         struct stack_trace trace;
1720         int use_stack;
1721         int size = FTRACE_STACK_ENTRIES;
1722
1723         trace.nr_entries        = 0;
1724         trace.skip              = skip;
1725
1726         /*
1727          * Since events can happen in NMIs there's no safe way to
1728          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1729          * or NMI comes in, it will just have to use the default
1730          * FTRACE_STACK_SIZE.
1731          */
1732         preempt_disable_notrace();
1733
1734         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1735         /*
1736          * We don't need any atomic variables, just a barrier.
1737          * If an interrupt comes in, we don't care, because it would
1738          * have exited and put the counter back to what we want.
1739          * We just need a barrier to keep gcc from moving things
1740          * around.
1741          */
1742         barrier();
1743         if (use_stack == 1) {
1744                 trace.entries           = &__get_cpu_var(ftrace_stack).calls[0];
1745                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1746
1747                 if (regs)
1748                         save_stack_trace_regs(regs, &trace);
1749                 else
1750                         save_stack_trace(&trace);
1751
1752                 if (trace.nr_entries > size)
1753                         size = trace.nr_entries;
1754         } else
1755                 /* From now on, use_stack is a boolean */
1756                 use_stack = 0;
1757
1758         size *= sizeof(unsigned long);
1759
1760         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1761                                           sizeof(*entry) + size, flags, pc);
1762         if (!event)
1763                 goto out;
1764         entry = ring_buffer_event_data(event);
1765
1766         memset(&entry->caller, 0, size);
1767
1768         if (use_stack)
1769                 memcpy(&entry->caller, trace.entries,
1770                        trace.nr_entries * sizeof(unsigned long));
1771         else {
1772                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1773                 trace.entries           = entry->caller;
1774                 if (regs)
1775                         save_stack_trace_regs(regs, &trace);
1776                 else
1777                         save_stack_trace(&trace);
1778         }
1779
1780         entry->size = trace.nr_entries;
1781
1782         if (!call_filter_check_discard(call, entry, buffer, event))
1783                 __buffer_unlock_commit(buffer, event);
1784
1785  out:
1786         /* Again, don't let gcc optimize things here */
1787         barrier();
1788         __this_cpu_dec(ftrace_stack_reserve);
1789         preempt_enable_notrace();
1790
1791 }
1792
1793 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1794                              int skip, int pc, struct pt_regs *regs)
1795 {
1796         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1797                 return;
1798
1799         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1800 }
1801
1802 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1803                         int skip, int pc)
1804 {
1805         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1806                 return;
1807
1808         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1809 }
1810
1811 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1812                    int pc)
1813 {
1814         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1815 }
1816
1817 /**
1818  * trace_dump_stack - record a stack back trace in the trace buffer
1819  * @skip: Number of functions to skip (helper handlers)
1820  */
1821 void trace_dump_stack(int skip)
1822 {
1823         unsigned long flags;
1824
1825         if (tracing_disabled || tracing_selftest_running)
1826                 return;
1827
1828         local_save_flags(flags);
1829
1830         /*
1831          * Skip 3 more, seems to get us at the caller of
1832          * this function.
1833          */
1834         skip += 3;
1835         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1836                              flags, skip, preempt_count(), NULL);
1837 }
1838
1839 static DEFINE_PER_CPU(int, user_stack_count);
1840
1841 void
1842 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1843 {
1844         struct ftrace_event_call *call = &event_user_stack;
1845         struct ring_buffer_event *event;
1846         struct userstack_entry *entry;
1847         struct stack_trace trace;
1848
1849         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1850                 return;
1851
1852         /*
1853          * NMIs can not handle page faults, even with fix ups.
1854          * The save user stack can (and often does) fault.
1855          */
1856         if (unlikely(in_nmi()))
1857                 return;
1858
1859         /*
1860          * prevent recursion, since the user stack tracing may
1861          * trigger other kernel events.
1862          */
1863         preempt_disable();
1864         if (__this_cpu_read(user_stack_count))
1865                 goto out;
1866
1867         __this_cpu_inc(user_stack_count);
1868
1869         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1870                                           sizeof(*entry), flags, pc);
1871         if (!event)
1872                 goto out_drop_count;
1873         entry   = ring_buffer_event_data(event);
1874
1875         entry->tgid             = current->tgid;
1876         memset(&entry->caller, 0, sizeof(entry->caller));
1877
1878         trace.nr_entries        = 0;
1879         trace.max_entries       = FTRACE_STACK_ENTRIES;
1880         trace.skip              = 0;
1881         trace.entries           = entry->caller;
1882
1883         save_stack_trace_user(&trace);
1884         if (!call_filter_check_discard(call, entry, buffer, event))
1885                 __buffer_unlock_commit(buffer, event);
1886
1887  out_drop_count:
1888         __this_cpu_dec(user_stack_count);
1889  out:
1890         preempt_enable();
1891 }
1892
1893 #ifdef UNUSED
1894 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1895 {
1896         ftrace_trace_userstack(tr, flags, preempt_count());
1897 }
1898 #endif /* UNUSED */
1899
1900 #endif /* CONFIG_STACKTRACE */
1901
1902 /* created for use with alloc_percpu */
1903 struct trace_buffer_struct {
1904         char buffer[TRACE_BUF_SIZE];
1905 };
1906
1907 static struct trace_buffer_struct *trace_percpu_buffer;
1908 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1909 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1910 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1911
1912 /*
1913  * The buffer used is dependent on the context. There is a per cpu
1914  * buffer for normal context, softirq contex, hard irq context and
1915  * for NMI context. Thise allows for lockless recording.
1916  *
1917  * Note, if the buffers failed to be allocated, then this returns NULL
1918  */
1919 static char *get_trace_buf(void)
1920 {
1921         struct trace_buffer_struct *percpu_buffer;
1922
1923         /*
1924          * If we have allocated per cpu buffers, then we do not
1925          * need to do any locking.
1926          */
1927         if (in_nmi())
1928                 percpu_buffer = trace_percpu_nmi_buffer;
1929         else if (in_irq())
1930                 percpu_buffer = trace_percpu_irq_buffer;
1931         else if (in_softirq())
1932                 percpu_buffer = trace_percpu_sirq_buffer;
1933         else
1934                 percpu_buffer = trace_percpu_buffer;
1935
1936         if (!percpu_buffer)
1937                 return NULL;
1938
1939         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1940 }
1941
1942 static int alloc_percpu_trace_buffer(void)
1943 {
1944         struct trace_buffer_struct *buffers;
1945         struct trace_buffer_struct *sirq_buffers;
1946         struct trace_buffer_struct *irq_buffers;
1947         struct trace_buffer_struct *nmi_buffers;
1948
1949         buffers = alloc_percpu(struct trace_buffer_struct);
1950         if (!buffers)
1951                 goto err_warn;
1952
1953         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1954         if (!sirq_buffers)
1955                 goto err_sirq;
1956
1957         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1958         if (!irq_buffers)
1959                 goto err_irq;
1960
1961         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1962         if (!nmi_buffers)
1963                 goto err_nmi;
1964
1965         trace_percpu_buffer = buffers;
1966         trace_percpu_sirq_buffer = sirq_buffers;
1967         trace_percpu_irq_buffer = irq_buffers;
1968         trace_percpu_nmi_buffer = nmi_buffers;
1969
1970         return 0;
1971
1972  err_nmi:
1973         free_percpu(irq_buffers);
1974  err_irq:
1975         free_percpu(sirq_buffers);
1976  err_sirq:
1977         free_percpu(buffers);
1978  err_warn:
1979         WARN(1, "Could not allocate percpu trace_printk buffer");
1980         return -ENOMEM;
1981 }
1982
1983 static int buffers_allocated;
1984
1985 void trace_printk_init_buffers(void)
1986 {
1987         if (buffers_allocated)
1988                 return;
1989
1990         if (alloc_percpu_trace_buffer())
1991                 return;
1992
1993         pr_info("ftrace: Allocated trace_printk buffers\n");
1994
1995         /* Expand the buffers to set size */
1996         tracing_update_buffers();
1997
1998         buffers_allocated = 1;
1999
2000         /*
2001          * trace_printk_init_buffers() can be called by modules.
2002          * If that happens, then we need to start cmdline recording
2003          * directly here. If the global_trace.buffer is already
2004          * allocated here, then this was called by module code.
2005          */
2006         if (global_trace.trace_buffer.buffer)
2007                 tracing_start_cmdline_record();
2008 }
2009
2010 void trace_printk_start_comm(void)
2011 {
2012         /* Start tracing comms if trace printk is set */
2013         if (!buffers_allocated)
2014                 return;
2015         tracing_start_cmdline_record();
2016 }
2017
2018 static void trace_printk_start_stop_comm(int enabled)
2019 {
2020         if (!buffers_allocated)
2021                 return;
2022
2023         if (enabled)
2024                 tracing_start_cmdline_record();
2025         else
2026                 tracing_stop_cmdline_record();
2027 }
2028
2029 /**
2030  * trace_vbprintk - write binary msg to tracing buffer
2031  *
2032  */
2033 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2034 {
2035         struct ftrace_event_call *call = &event_bprint;
2036         struct ring_buffer_event *event;
2037         struct ring_buffer *buffer;
2038         struct trace_array *tr = &global_trace;
2039         struct bprint_entry *entry;
2040         unsigned long flags;
2041         char *tbuffer;
2042         int len = 0, size, pc;
2043
2044         if (unlikely(tracing_selftest_running || tracing_disabled))
2045                 return 0;
2046
2047         /* Don't pollute graph traces with trace_vprintk internals */
2048         pause_graph_tracing();
2049
2050         pc = preempt_count();
2051         preempt_disable_notrace();
2052
2053         tbuffer = get_trace_buf();
2054         if (!tbuffer) {
2055                 len = 0;
2056                 goto out;
2057         }
2058
2059         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2060
2061         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2062                 goto out;
2063
2064         local_save_flags(flags);
2065         size = sizeof(*entry) + sizeof(u32) * len;
2066         buffer = tr->trace_buffer.buffer;
2067         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2068                                           flags, pc);
2069         if (!event)
2070                 goto out;
2071         entry = ring_buffer_event_data(event);
2072         entry->ip                       = ip;
2073         entry->fmt                      = fmt;
2074
2075         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2076         if (!call_filter_check_discard(call, entry, buffer, event)) {
2077                 __buffer_unlock_commit(buffer, event);
2078                 ftrace_trace_stack(buffer, flags, 6, pc);
2079         }
2080
2081 out:
2082         preempt_enable_notrace();
2083         unpause_graph_tracing();
2084
2085         return len;
2086 }
2087 EXPORT_SYMBOL_GPL(trace_vbprintk);
2088
2089 static int
2090 __trace_array_vprintk(struct ring_buffer *buffer,
2091                       unsigned long ip, const char *fmt, va_list args)
2092 {
2093         struct ftrace_event_call *call = &event_print;
2094         struct ring_buffer_event *event;
2095         int len = 0, size, pc;
2096         struct print_entry *entry;
2097         unsigned long flags;
2098         char *tbuffer;
2099
2100         if (tracing_disabled || tracing_selftest_running)
2101                 return 0;
2102
2103         /* Don't pollute graph traces with trace_vprintk internals */
2104         pause_graph_tracing();
2105
2106         pc = preempt_count();
2107         preempt_disable_notrace();
2108
2109
2110         tbuffer = get_trace_buf();
2111         if (!tbuffer) {
2112                 len = 0;
2113                 goto out;
2114         }
2115
2116         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2117         if (len > TRACE_BUF_SIZE)
2118                 goto out;
2119
2120         local_save_flags(flags);
2121         size = sizeof(*entry) + len + 1;
2122         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2123                                           flags, pc);
2124         if (!event)
2125                 goto out;
2126         entry = ring_buffer_event_data(event);
2127         entry->ip = ip;
2128
2129         memcpy(&entry->buf, tbuffer, len);
2130         entry->buf[len] = '\0';
2131         if (!call_filter_check_discard(call, entry, buffer, event)) {
2132                 __buffer_unlock_commit(buffer, event);
2133                 ftrace_trace_stack(buffer, flags, 6, pc);
2134         }
2135  out:
2136         preempt_enable_notrace();
2137         unpause_graph_tracing();
2138
2139         return len;
2140 }
2141
2142 int trace_array_vprintk(struct trace_array *tr,
2143                         unsigned long ip, const char *fmt, va_list args)
2144 {
2145         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2146 }
2147
2148 int trace_array_printk(struct trace_array *tr,
2149                        unsigned long ip, const char *fmt, ...)
2150 {
2151         int ret;
2152         va_list ap;
2153
2154         if (!(trace_flags & TRACE_ITER_PRINTK))
2155                 return 0;
2156
2157         va_start(ap, fmt);
2158         ret = trace_array_vprintk(tr, ip, fmt, ap);
2159         va_end(ap);
2160         return ret;
2161 }
2162
2163 int trace_array_printk_buf(struct ring_buffer *buffer,
2164                            unsigned long ip, const char *fmt, ...)
2165 {
2166         int ret;
2167         va_list ap;
2168
2169         if (!(trace_flags & TRACE_ITER_PRINTK))
2170                 return 0;
2171
2172         va_start(ap, fmt);
2173         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2174         va_end(ap);
2175         return ret;
2176 }
2177
2178 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2179 {
2180         return trace_array_vprintk(&global_trace, ip, fmt, args);
2181 }
2182 EXPORT_SYMBOL_GPL(trace_vprintk);
2183
2184 static void trace_iterator_increment(struct trace_iterator *iter)
2185 {
2186         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2187
2188         iter->idx++;
2189         if (buf_iter)
2190                 ring_buffer_read(buf_iter, NULL);
2191 }
2192
2193 static struct trace_entry *
2194 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2195                 unsigned long *lost_events)
2196 {
2197         struct ring_buffer_event *event;
2198         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2199
2200         if (buf_iter)
2201                 event = ring_buffer_iter_peek(buf_iter, ts);
2202         else
2203                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2204                                          lost_events);
2205
2206         if (event) {
2207                 iter->ent_size = ring_buffer_event_length(event);
2208                 return ring_buffer_event_data(event);
2209         }
2210         iter->ent_size = 0;
2211         return NULL;
2212 }
2213
2214 static struct trace_entry *
2215 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2216                   unsigned long *missing_events, u64 *ent_ts)
2217 {
2218         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2219         struct trace_entry *ent, *next = NULL;
2220         unsigned long lost_events = 0, next_lost = 0;
2221         int cpu_file = iter->cpu_file;
2222         u64 next_ts = 0, ts;
2223         int next_cpu = -1;
2224         int next_size = 0;
2225         int cpu;
2226
2227         /*
2228          * If we are in a per_cpu trace file, don't bother by iterating over
2229          * all cpu and peek directly.
2230          */
2231         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2232                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2233                         return NULL;
2234                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2235                 if (ent_cpu)
2236                         *ent_cpu = cpu_file;
2237
2238                 return ent;
2239         }
2240
2241         for_each_tracing_cpu(cpu) {
2242
2243                 if (ring_buffer_empty_cpu(buffer, cpu))
2244                         continue;
2245
2246                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2247
2248                 /*
2249                  * Pick the entry with the smallest timestamp:
2250                  */
2251                 if (ent && (!next || ts < next_ts)) {
2252                         next = ent;
2253                         next_cpu = cpu;
2254                         next_ts = ts;
2255                         next_lost = lost_events;
2256                         next_size = iter->ent_size;
2257                 }
2258         }
2259
2260         iter->ent_size = next_size;
2261
2262         if (ent_cpu)
2263                 *ent_cpu = next_cpu;
2264
2265         if (ent_ts)
2266                 *ent_ts = next_ts;
2267
2268         if (missing_events)
2269                 *missing_events = next_lost;
2270
2271         return next;
2272 }
2273
2274 /* Find the next real entry, without updating the iterator itself */
2275 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2276                                           int *ent_cpu, u64 *ent_ts)
2277 {
2278         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2279 }
2280
2281 /* Find the next real entry, and increment the iterator to the next entry */
2282 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2283 {
2284         iter->ent = __find_next_entry(iter, &iter->cpu,
2285                                       &iter->lost_events, &iter->ts);
2286
2287         if (iter->ent)
2288                 trace_iterator_increment(iter);
2289
2290         return iter->ent ? iter : NULL;
2291 }
2292
2293 static void trace_consume(struct trace_iterator *iter)
2294 {
2295         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2296                             &iter->lost_events);
2297 }
2298
2299 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2300 {
2301         struct trace_iterator *iter = m->private;
2302         int i = (int)*pos;
2303         void *ent;
2304
2305         WARN_ON_ONCE(iter->leftover);
2306
2307         (*pos)++;
2308
2309         /* can't go backwards */
2310         if (iter->idx > i)
2311                 return NULL;
2312
2313         if (iter->idx < 0)
2314                 ent = trace_find_next_entry_inc(iter);
2315         else
2316                 ent = iter;
2317
2318         while (ent && iter->idx < i)
2319                 ent = trace_find_next_entry_inc(iter);
2320
2321         iter->pos = *pos;
2322
2323         return ent;
2324 }
2325
2326 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2327 {
2328         struct ring_buffer_event *event;
2329         struct ring_buffer_iter *buf_iter;
2330         unsigned long entries = 0;
2331         u64 ts;
2332
2333         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2334
2335         buf_iter = trace_buffer_iter(iter, cpu);
2336         if (!buf_iter)
2337                 return;
2338
2339         ring_buffer_iter_reset(buf_iter);
2340
2341         /*
2342          * We could have the case with the max latency tracers
2343          * that a reset never took place on a cpu. This is evident
2344          * by the timestamp being before the start of the buffer.
2345          */
2346         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2347                 if (ts >= iter->trace_buffer->time_start)
2348                         break;
2349                 entries++;
2350                 ring_buffer_read(buf_iter, NULL);
2351         }
2352
2353         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2354 }
2355
2356 /*
2357  * The current tracer is copied to avoid a global locking
2358  * all around.
2359  */
2360 static void *s_start(struct seq_file *m, loff_t *pos)
2361 {
2362         struct trace_iterator *iter = m->private;
2363         struct trace_array *tr = iter->tr;
2364         int cpu_file = iter->cpu_file;
2365         void *p = NULL;
2366         loff_t l = 0;
2367         int cpu;
2368
2369         /*
2370          * copy the tracer to avoid using a global lock all around.
2371          * iter->trace is a copy of current_trace, the pointer to the
2372          * name may be used instead of a strcmp(), as iter->trace->name
2373          * will point to the same string as current_trace->name.
2374          */
2375         mutex_lock(&trace_types_lock);
2376         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2377                 *iter->trace = *tr->current_trace;
2378         mutex_unlock(&trace_types_lock);
2379
2380 #ifdef CONFIG_TRACER_MAX_TRACE
2381         if (iter->snapshot && iter->trace->use_max_tr)
2382                 return ERR_PTR(-EBUSY);
2383 #endif
2384
2385         if (!iter->snapshot)
2386                 atomic_inc(&trace_record_cmdline_disabled);
2387
2388         if (*pos != iter->pos) {
2389                 iter->ent = NULL;
2390                 iter->cpu = 0;
2391                 iter->idx = -1;
2392
2393                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2394                         for_each_tracing_cpu(cpu)
2395                                 tracing_iter_reset(iter, cpu);
2396                 } else
2397                         tracing_iter_reset(iter, cpu_file);
2398
2399                 iter->leftover = 0;
2400                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2401                         ;
2402
2403         } else {
2404                 /*
2405                  * If we overflowed the seq_file before, then we want
2406                  * to just reuse the trace_seq buffer again.
2407                  */
2408                 if (iter->leftover)
2409                         p = iter;
2410                 else {
2411                         l = *pos - 1;
2412                         p = s_next(m, p, &l);
2413                 }
2414         }
2415
2416         trace_event_read_lock();
2417         trace_access_lock(cpu_file);
2418         return p;
2419 }
2420
2421 static void s_stop(struct seq_file *m, void *p)
2422 {
2423         struct trace_iterator *iter = m->private;
2424
2425 #ifdef CONFIG_TRACER_MAX_TRACE
2426         if (iter->snapshot && iter->trace->use_max_tr)
2427                 return;
2428 #endif
2429
2430         if (!iter->snapshot)
2431                 atomic_dec(&trace_record_cmdline_disabled);
2432
2433         trace_access_unlock(iter->cpu_file);
2434         trace_event_read_unlock();
2435 }
2436
2437 static void
2438 get_total_entries(struct trace_buffer *buf,
2439                   unsigned long *total, unsigned long *entries)
2440 {
2441         unsigned long count;
2442         int cpu;
2443
2444         *total = 0;
2445         *entries = 0;
2446
2447         for_each_tracing_cpu(cpu) {
2448                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2449                 /*
2450                  * If this buffer has skipped entries, then we hold all
2451                  * entries for the trace and we need to ignore the
2452                  * ones before the time stamp.
2453                  */
2454                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2455                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2456                         /* total is the same as the entries */
2457                         *total += count;
2458                 } else
2459                         *total += count +
2460                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2461                 *entries += count;
2462         }
2463 }
2464
2465 static void print_lat_help_header(struct seq_file *m)
2466 {
2467         seq_puts(m, "#                  _------=> CPU#            \n");
2468         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2469         seq_puts(m, "#                | / _----=> need-resched    \n");
2470         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2471         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2472         seq_puts(m, "#                |||| /     delay             \n");
2473         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2474         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2475 }
2476
2477 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2478 {
2479         unsigned long total;
2480         unsigned long entries;
2481
2482         get_total_entries(buf, &total, &entries);
2483         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2484                    entries, total, num_online_cpus());
2485         seq_puts(m, "#\n");
2486 }
2487
2488 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2489 {
2490         print_event_info(buf, m);
2491         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2492         seq_puts(m, "#              | |       |          |         |\n");
2493 }
2494
2495 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2496 {
2497         print_event_info(buf, m);
2498         seq_puts(m, "#                              _-----=> irqs-off\n");
2499         seq_puts(m, "#                             / _----=> need-resched\n");
2500         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2501         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2502         seq_puts(m, "#                            ||| /     delay\n");
2503         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2504         seq_puts(m, "#              | |       |   ||||       |         |\n");
2505 }
2506
2507 void
2508 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2509 {
2510         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2511         struct trace_buffer *buf = iter->trace_buffer;
2512         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2513         struct tracer *type = iter->trace;
2514         unsigned long entries;
2515         unsigned long total;
2516         const char *name = "preemption";
2517
2518         name = type->name;
2519
2520         get_total_entries(buf, &total, &entries);
2521
2522         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2523                    name, UTS_RELEASE);
2524         seq_puts(m, "# -----------------------------------"
2525                  "---------------------------------\n");
2526         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2527                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2528                    nsecs_to_usecs(data->saved_latency),
2529                    entries,
2530                    total,
2531                    buf->cpu,
2532 #if defined(CONFIG_PREEMPT_NONE)
2533                    "server",
2534 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2535                    "desktop",
2536 #elif defined(CONFIG_PREEMPT)
2537                    "preempt",
2538 #else
2539                    "unknown",
2540 #endif
2541                    /* These are reserved for later use */
2542                    0, 0, 0, 0);
2543 #ifdef CONFIG_SMP
2544         seq_printf(m, " #P:%d)\n", num_online_cpus());
2545 #else
2546         seq_puts(m, ")\n");
2547 #endif
2548         seq_puts(m, "#    -----------------\n");
2549         seq_printf(m, "#    | task: %.16s-%d "
2550                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2551                    data->comm, data->pid,
2552                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2553                    data->policy, data->rt_priority);
2554         seq_puts(m, "#    -----------------\n");
2555
2556         if (data->critical_start) {
2557                 seq_puts(m, "#  => started at: ");
2558                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2559                 trace_print_seq(m, &iter->seq);
2560                 seq_puts(m, "\n#  => ended at:   ");
2561                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2562                 trace_print_seq(m, &iter->seq);
2563                 seq_puts(m, "\n#\n");
2564         }
2565
2566         seq_puts(m, "#\n");
2567 }
2568
2569 static void test_cpu_buff_start(struct trace_iterator *iter)
2570 {
2571         struct trace_seq *s = &iter->seq;
2572
2573         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2574                 return;
2575
2576         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2577                 return;
2578
2579         if (cpumask_test_cpu(iter->cpu, iter->started))
2580                 return;
2581
2582         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2583                 return;
2584
2585         cpumask_set_cpu(iter->cpu, iter->started);
2586
2587         /* Don't print started cpu buffer for the first entry of the trace */
2588         if (iter->idx > 1)
2589                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2590                                 iter->cpu);
2591 }
2592
2593 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2594 {
2595         struct trace_seq *s = &iter->seq;
2596         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2597         struct trace_entry *entry;
2598         struct trace_event *event;
2599
2600         entry = iter->ent;
2601
2602         test_cpu_buff_start(iter);
2603
2604         event = ftrace_find_event(entry->type);
2605
2606         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2607                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2608                         if (!trace_print_lat_context(iter))
2609                                 goto partial;
2610                 } else {
2611                         if (!trace_print_context(iter))
2612                                 goto partial;
2613                 }
2614         }
2615
2616         if (event)
2617                 return event->funcs->trace(iter, sym_flags, event);
2618
2619         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2620                 goto partial;
2621
2622         return TRACE_TYPE_HANDLED;
2623 partial:
2624         return TRACE_TYPE_PARTIAL_LINE;
2625 }
2626
2627 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2628 {
2629         struct trace_seq *s = &iter->seq;
2630         struct trace_entry *entry;
2631         struct trace_event *event;
2632
2633         entry = iter->ent;
2634
2635         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2636                 if (!trace_seq_printf(s, "%d %d %llu ",
2637                                       entry->pid, iter->cpu, iter->ts))
2638                         goto partial;
2639         }
2640
2641         event = ftrace_find_event(entry->type);
2642         if (event)
2643                 return event->funcs->raw(iter, 0, event);
2644
2645         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2646                 goto partial;
2647
2648         return TRACE_TYPE_HANDLED;
2649 partial:
2650         return TRACE_TYPE_PARTIAL_LINE;
2651 }
2652
2653 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2654 {
2655         struct trace_seq *s = &iter->seq;
2656         unsigned char newline = '\n';
2657         struct trace_entry *entry;
2658         struct trace_event *event;
2659
2660         entry = iter->ent;
2661
2662         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2663                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2664                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2665                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2666         }
2667
2668         event = ftrace_find_event(entry->type);
2669         if (event) {
2670                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2671                 if (ret != TRACE_TYPE_HANDLED)
2672                         return ret;
2673         }
2674
2675         SEQ_PUT_FIELD_RET(s, newline);
2676
2677         return TRACE_TYPE_HANDLED;
2678 }
2679
2680 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2681 {
2682         struct trace_seq *s = &iter->seq;
2683         struct trace_entry *entry;
2684         struct trace_event *event;
2685
2686         entry = iter->ent;
2687
2688         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2689                 SEQ_PUT_FIELD_RET(s, entry->pid);
2690                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2691                 SEQ_PUT_FIELD_RET(s, iter->ts);
2692         }
2693
2694         event = ftrace_find_event(entry->type);
2695         return event ? event->funcs->binary(iter, 0, event) :
2696                 TRACE_TYPE_HANDLED;
2697 }
2698
2699 int trace_empty(struct trace_iterator *iter)
2700 {
2701         struct ring_buffer_iter *buf_iter;
2702         int cpu;
2703
2704         /* If we are looking at one CPU buffer, only check that one */
2705         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2706                 cpu = iter->cpu_file;
2707                 buf_iter = trace_buffer_iter(iter, cpu);
2708                 if (buf_iter) {
2709                         if (!ring_buffer_iter_empty(buf_iter))
2710                                 return 0;
2711                 } else {
2712                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2713                                 return 0;
2714                 }
2715                 return 1;
2716         }
2717
2718         for_each_tracing_cpu(cpu) {
2719                 buf_iter = trace_buffer_iter(iter, cpu);
2720                 if (buf_iter) {
2721                         if (!ring_buffer_iter_empty(buf_iter))
2722                                 return 0;
2723                 } else {
2724                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2725                                 return 0;
2726                 }
2727         }
2728
2729         return 1;
2730 }
2731
2732 /*  Called with trace_event_read_lock() held. */
2733 enum print_line_t print_trace_line(struct trace_iterator *iter)
2734 {
2735         enum print_line_t ret;
2736
2737         if (iter->lost_events &&
2738             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2739                                  iter->cpu, iter->lost_events))
2740                 return TRACE_TYPE_PARTIAL_LINE;
2741
2742         if (iter->trace && iter->trace->print_line) {
2743                 ret = iter->trace->print_line(iter);
2744                 if (ret != TRACE_TYPE_UNHANDLED)
2745                         return ret;
2746         }
2747
2748         if (iter->ent->type == TRACE_BPUTS &&
2749                         trace_flags & TRACE_ITER_PRINTK &&
2750                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2751                 return trace_print_bputs_msg_only(iter);
2752
2753         if (iter->ent->type == TRACE_BPRINT &&
2754                         trace_flags & TRACE_ITER_PRINTK &&
2755                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2756                 return trace_print_bprintk_msg_only(iter);
2757
2758         if (iter->ent->type == TRACE_PRINT &&
2759                         trace_flags & TRACE_ITER_PRINTK &&
2760                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2761                 return trace_print_printk_msg_only(iter);
2762
2763         if (trace_flags & TRACE_ITER_BIN)
2764                 return print_bin_fmt(iter);
2765
2766         if (trace_flags & TRACE_ITER_HEX)
2767                 return print_hex_fmt(iter);
2768
2769         if (trace_flags & TRACE_ITER_RAW)
2770                 return print_raw_fmt(iter);
2771
2772         return print_trace_fmt(iter);
2773 }
2774
2775 void trace_latency_header(struct seq_file *m)
2776 {
2777         struct trace_iterator *iter = m->private;
2778
2779         /* print nothing if the buffers are empty */
2780         if (trace_empty(iter))
2781                 return;
2782
2783         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2784                 print_trace_header(m, iter);
2785
2786         if (!(trace_flags & TRACE_ITER_VERBOSE))
2787                 print_lat_help_header(m);
2788 }
2789
2790 void trace_default_header(struct seq_file *m)
2791 {
2792         struct trace_iterator *iter = m->private;
2793
2794         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2795                 return;
2796
2797         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2798                 /* print nothing if the buffers are empty */
2799                 if (trace_empty(iter))
2800                         return;
2801                 print_trace_header(m, iter);
2802                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2803                         print_lat_help_header(m);
2804         } else {
2805                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2806                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2807                                 print_func_help_header_irq(iter->trace_buffer, m);
2808                         else
2809                                 print_func_help_header(iter->trace_buffer, m);
2810                 }
2811         }
2812 }
2813
2814 static void test_ftrace_alive(struct seq_file *m)
2815 {
2816         if (!ftrace_is_dead())
2817                 return;
2818         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2819         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2820 }
2821
2822 #ifdef CONFIG_TRACER_MAX_TRACE
2823 static void show_snapshot_main_help(struct seq_file *m)
2824 {
2825         seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2826         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2827         seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2828         seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
2829         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2830         seq_printf(m, "#                       is not a '0' or '1')\n");
2831 }
2832
2833 static void show_snapshot_percpu_help(struct seq_file *m)
2834 {
2835         seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2836 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2837         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2838         seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2839 #else
2840         seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2841         seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2842 #endif
2843         seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2844         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2845         seq_printf(m, "#                       is not a '0' or '1')\n");
2846 }
2847
2848 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2849 {
2850         if (iter->tr->allocated_snapshot)
2851                 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2852         else
2853                 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2854
2855         seq_printf(m, "# Snapshot commands:\n");
2856         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2857                 show_snapshot_main_help(m);
2858         else
2859                 show_snapshot_percpu_help(m);
2860 }
2861 #else
2862 /* Should never be called */
2863 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2864 #endif
2865
2866 static int s_show(struct seq_file *m, void *v)
2867 {
2868         struct trace_iterator *iter = v;
2869         int ret;
2870
2871         if (iter->ent == NULL) {
2872                 if (iter->tr) {
2873                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2874                         seq_puts(m, "#\n");
2875                         test_ftrace_alive(m);
2876                 }
2877                 if (iter->snapshot && trace_empty(iter))
2878                         print_snapshot_help(m, iter);
2879                 else if (iter->trace && iter->trace->print_header)
2880                         iter->trace->print_header(m);
2881                 else
2882                         trace_default_header(m);
2883
2884         } else if (iter->leftover) {
2885                 /*
2886                  * If we filled the seq_file buffer earlier, we
2887                  * want to just show it now.
2888                  */
2889                 ret = trace_print_seq(m, &iter->seq);
2890
2891                 /* ret should this time be zero, but you never know */
2892                 iter->leftover = ret;
2893
2894         } else {
2895                 print_trace_line(iter);
2896                 ret = trace_print_seq(m, &iter->seq);
2897                 /*
2898                  * If we overflow the seq_file buffer, then it will
2899                  * ask us for this data again at start up.
2900                  * Use that instead.
2901                  *  ret is 0 if seq_file write succeeded.
2902                  *        -1 otherwise.
2903                  */
2904                 iter->leftover = ret;
2905         }
2906
2907         return 0;
2908 }
2909
2910 /*
2911  * Should be used after trace_array_get(), trace_types_lock
2912  * ensures that i_cdev was already initialized.
2913  */
2914 static inline int tracing_get_cpu(struct inode *inode)
2915 {
2916         if (inode->i_cdev) /* See trace_create_cpu_file() */
2917                 return (long)inode->i_cdev - 1;
2918         return RING_BUFFER_ALL_CPUS;
2919 }
2920
2921 static const struct seq_operations tracer_seq_ops = {
2922         .start          = s_start,
2923         .next           = s_next,
2924         .stop           = s_stop,
2925         .show           = s_show,
2926 };
2927
2928 static struct trace_iterator *
2929 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2930 {
2931         struct trace_array *tr = inode->i_private;
2932         struct trace_iterator *iter;
2933         int cpu;
2934
2935         if (tracing_disabled)
2936                 return ERR_PTR(-ENODEV);
2937
2938         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2939         if (!iter)
2940                 return ERR_PTR(-ENOMEM);
2941
2942         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2943                                     GFP_KERNEL);
2944         if (!iter->buffer_iter)
2945                 goto release;
2946
2947         /*
2948          * We make a copy of the current tracer to avoid concurrent
2949          * changes on it while we are reading.
2950          */
2951         mutex_lock(&trace_types_lock);
2952         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2953         if (!iter->trace)
2954                 goto fail;
2955
2956         *iter->trace = *tr->current_trace;
2957
2958         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2959                 goto fail;
2960
2961         iter->tr = tr;
2962
2963 #ifdef CONFIG_TRACER_MAX_TRACE
2964         /* Currently only the top directory has a snapshot */
2965         if (tr->current_trace->print_max || snapshot)
2966                 iter->trace_buffer = &tr->max_buffer;
2967         else
2968 #endif
2969                 iter->trace_buffer = &tr->trace_buffer;
2970         iter->snapshot = snapshot;
2971         iter->pos = -1;
2972         iter->cpu_file = tracing_get_cpu(inode);
2973         mutex_init(&iter->mutex);
2974
2975         /* Notify the tracer early; before we stop tracing. */
2976         if (iter->trace && iter->trace->open)
2977                 iter->trace->open(iter);
2978
2979         /* Annotate start of buffers if we had overruns */
2980         if (ring_buffer_overruns(iter->trace_buffer->buffer))
2981                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2982
2983         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
2984         if (trace_clocks[tr->clock_id].in_ns)
2985                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2986
2987         /* stop the trace while dumping if we are not opening "snapshot" */
2988         if (!iter->snapshot)
2989                 tracing_stop_tr(tr);
2990
2991         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
2992                 for_each_tracing_cpu(cpu) {
2993                         iter->buffer_iter[cpu] =
2994                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2995                 }
2996                 ring_buffer_read_prepare_sync();
2997                 for_each_tracing_cpu(cpu) {
2998                         ring_buffer_read_start(iter->buffer_iter[cpu]);
2999                         tracing_iter_reset(iter, cpu);
3000                 }
3001         } else {
3002                 cpu = iter->cpu_file;
3003                 iter->buffer_iter[cpu] =
3004                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3005                 ring_buffer_read_prepare_sync();
3006                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3007                 tracing_iter_reset(iter, cpu);
3008         }
3009
3010         mutex_unlock(&trace_types_lock);
3011
3012         return iter;
3013
3014  fail:
3015         mutex_unlock(&trace_types_lock);
3016         kfree(iter->trace);
3017         kfree(iter->buffer_iter);
3018 release:
3019         seq_release_private(inode, file);
3020         return ERR_PTR(-ENOMEM);
3021 }
3022
3023 int tracing_open_generic(struct inode *inode, struct file *filp)
3024 {
3025         if (tracing_disabled)
3026                 return -ENODEV;
3027
3028         filp->private_data = inode->i_private;
3029         return 0;
3030 }
3031
3032 bool tracing_is_disabled(void)
3033 {
3034         return (tracing_disabled) ? true: false;
3035 }
3036
3037 /*
3038  * Open and update trace_array ref count.
3039  * Must have the current trace_array passed to it.
3040  */
3041 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3042 {
3043         struct trace_array *tr = inode->i_private;
3044
3045         if (tracing_disabled)
3046                 return -ENODEV;
3047
3048         if (trace_array_get(tr) < 0)
3049                 return -ENODEV;
3050
3051         filp->private_data = inode->i_private;
3052
3053         return 0;
3054 }
3055
3056 static int tracing_release(struct inode *inode, struct file *file)
3057 {
3058         struct trace_array *tr = inode->i_private;
3059         struct seq_file *m = file->private_data;
3060         struct trace_iterator *iter;
3061         int cpu;
3062
3063         if (!(file->f_mode & FMODE_READ)) {
3064                 trace_array_put(tr);
3065                 return 0;
3066         }
3067
3068         /* Writes do not use seq_file */
3069         iter = m->private;
3070         mutex_lock(&trace_types_lock);
3071
3072         for_each_tracing_cpu(cpu) {
3073                 if (iter->buffer_iter[cpu])
3074                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3075         }
3076
3077         if (iter->trace && iter->trace->close)
3078                 iter->trace->close(iter);
3079
3080         if (!iter->snapshot)
3081                 /* reenable tracing if it was previously enabled */
3082                 tracing_start_tr(tr);
3083
3084         __trace_array_put(tr);
3085
3086         mutex_unlock(&trace_types_lock);
3087
3088         mutex_destroy(&iter->mutex);
3089         free_cpumask_var(iter->started);
3090         kfree(iter->trace);
3091         kfree(iter->buffer_iter);
3092         seq_release_private(inode, file);
3093
3094         return 0;
3095 }
3096
3097 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3098 {
3099         struct trace_array *tr = inode->i_private;
3100
3101         trace_array_put(tr);
3102         return 0;
3103 }
3104
3105 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3106 {
3107         struct trace_array *tr = inode->i_private;
3108
3109         trace_array_put(tr);
3110
3111         return single_release(inode, file);
3112 }
3113
3114 static int tracing_open(struct inode *inode, struct file *file)
3115 {
3116         struct trace_array *tr = inode->i_private;
3117         struct trace_iterator *iter;
3118         int ret = 0;
3119
3120         if (trace_array_get(tr) < 0)
3121                 return -ENODEV;
3122
3123         /* If this file was open for write, then erase contents */
3124         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3125                 int cpu = tracing_get_cpu(inode);
3126
3127                 if (cpu == RING_BUFFER_ALL_CPUS)
3128                         tracing_reset_online_cpus(&tr->trace_buffer);
3129                 else
3130                         tracing_reset(&tr->trace_buffer, cpu);
3131         }
3132
3133         if (file->f_mode & FMODE_READ) {
3134                 iter = __tracing_open(inode, file, false);
3135                 if (IS_ERR(iter))
3136                         ret = PTR_ERR(iter);
3137                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3138                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3139         }
3140
3141         if (ret < 0)
3142                 trace_array_put(tr);
3143
3144         return ret;
3145 }
3146
3147 static void *
3148 t_next(struct seq_file *m, void *v, loff_t *pos)
3149 {
3150         struct tracer *t = v;
3151
3152         (*pos)++;
3153
3154         if (t)
3155                 t = t->next;
3156
3157         return t;
3158 }
3159
3160 static void *t_start(struct seq_file *m, loff_t *pos)
3161 {
3162         struct tracer *t;
3163         loff_t l = 0;
3164
3165         mutex_lock(&trace_types_lock);
3166         for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
3167                 ;
3168
3169         return t;
3170 }
3171
3172 static void t_stop(struct seq_file *m, void *p)
3173 {
3174         mutex_unlock(&trace_types_lock);
3175 }
3176
3177 static int t_show(struct seq_file *m, void *v)
3178 {
3179         struct tracer *t = v;
3180
3181         if (!t)
3182                 return 0;
3183
3184         seq_printf(m, "%s", t->name);
3185         if (t->next)
3186                 seq_putc(m, ' ');
3187         else
3188                 seq_putc(m, '\n');
3189
3190         return 0;
3191 }
3192
3193 static const struct seq_operations show_traces_seq_ops = {
3194         .start          = t_start,
3195         .next           = t_next,
3196         .stop           = t_stop,
3197         .show           = t_show,
3198 };
3199
3200 static int show_traces_open(struct inode *inode, struct file *file)
3201 {
3202         if (tracing_disabled)
3203                 return -ENODEV;
3204
3205         return seq_open(file, &show_traces_seq_ops);
3206 }
3207
3208 static ssize_t
3209 tracing_write_stub(struct file *filp, const char __user *ubuf,
3210                    size_t count, loff_t *ppos)
3211 {
3212         return count;
3213 }
3214
3215 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3216 {
3217         int ret;
3218
3219         if (file->f_mode & FMODE_READ)
3220                 ret = seq_lseek(file, offset, whence);
3221         else
3222                 file->f_pos = ret = 0;
3223
3224         return ret;
3225 }
3226
3227 static const struct file_operations tracing_fops = {
3228         .open           = tracing_open,
3229         .read           = seq_read,
3230         .write          = tracing_write_stub,
3231         .llseek         = tracing_lseek,
3232         .release        = tracing_release,
3233 };
3234
3235 static const struct file_operations show_traces_fops = {
3236         .open           = show_traces_open,
3237         .read           = seq_read,
3238         .release        = seq_release,
3239         .llseek         = seq_lseek,
3240 };
3241
3242 /*
3243  * The tracer itself will not take this lock, but still we want
3244  * to provide a consistent cpumask to user-space:
3245  */
3246 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3247
3248 /*
3249  * Temporary storage for the character representation of the
3250  * CPU bitmask (and one more byte for the newline):
3251  */
3252 static char mask_str[NR_CPUS + 1];
3253
3254 static ssize_t
3255 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3256                      size_t count, loff_t *ppos)
3257 {
3258         struct trace_array *tr = file_inode(filp)->i_private;
3259         int len;
3260
3261         mutex_lock(&tracing_cpumask_update_lock);
3262
3263         len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3264         if (count - len < 2) {
3265                 count = -EINVAL;
3266                 goto out_err;
3267         }
3268         len += sprintf(mask_str + len, "\n");
3269         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3270
3271 out_err:
3272         mutex_unlock(&tracing_cpumask_update_lock);
3273
3274         return count;
3275 }
3276
3277 static ssize_t
3278 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3279                       size_t count, loff_t *ppos)
3280 {
3281         struct trace_array *tr = file_inode(filp)->i_private;
3282         cpumask_var_t tracing_cpumask_new;
3283         int err, cpu;
3284
3285         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3286                 return -ENOMEM;
3287
3288         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3289         if (err)
3290                 goto err_unlock;
3291
3292         mutex_lock(&tracing_cpumask_update_lock);
3293
3294         local_irq_disable();
3295         arch_spin_lock(&ftrace_max_lock);
3296         for_each_tracing_cpu(cpu) {
3297                 /*
3298                  * Increase/decrease the disabled counter if we are
3299                  * about to flip a bit in the cpumask:
3300                  */
3301                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3302                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3303                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3304                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3305                 }
3306                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3307                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3308                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3309                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3310                 }
3311         }
3312         arch_spin_unlock(&ftrace_max_lock);
3313         local_irq_enable();
3314
3315         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3316
3317         mutex_unlock(&tracing_cpumask_update_lock);
3318         free_cpumask_var(tracing_cpumask_new);
3319
3320         return count;
3321
3322 err_unlock:
3323         free_cpumask_var(tracing_cpumask_new);
3324
3325         return err;
3326 }
3327
3328 static const struct file_operations tracing_cpumask_fops = {
3329         .open           = tracing_open_generic_tr,
3330         .read           = tracing_cpumask_read,
3331         .write          = tracing_cpumask_write,
3332         .release        = tracing_release_generic_tr,
3333         .llseek         = generic_file_llseek,
3334 };
3335
3336 static int tracing_trace_options_show(struct seq_file *m, void *v)
3337 {
3338         struct tracer_opt *trace_opts;
3339         struct trace_array *tr = m->private;
3340         u32 tracer_flags;
3341         int i;
3342
3343         mutex_lock(&trace_types_lock);
3344         tracer_flags = tr->current_trace->flags->val;
3345         trace_opts = tr->current_trace->flags->opts;
3346
3347         for (i = 0; trace_options[i]; i++) {
3348                 if (trace_flags & (1 << i))
3349                         seq_printf(m, "%s\n", trace_options[i]);
3350                 else
3351                         seq_printf(m, "no%s\n", trace_options[i]);
3352         }
3353
3354         for (i = 0; trace_opts[i].name; i++) {
3355                 if (tracer_flags & trace_opts[i].bit)
3356                         seq_printf(m, "%s\n", trace_opts[i].name);
3357                 else
3358                         seq_printf(m, "no%s\n", trace_opts[i].name);
3359         }
3360         mutex_unlock(&trace_types_lock);
3361
3362         return 0;
3363 }
3364
3365 static int __set_tracer_option(struct tracer *trace,
3366                                struct tracer_flags *tracer_flags,
3367                                struct tracer_opt *opts, int neg)
3368 {
3369         int ret;
3370
3371         ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
3372         if (ret)
3373                 return ret;
3374
3375         if (neg)
3376                 tracer_flags->val &= ~opts->bit;
3377         else
3378                 tracer_flags->val |= opts->bit;
3379         return 0;
3380 }
3381
3382 /* Try to assign a tracer specific option */
3383 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
3384 {
3385         struct tracer_flags *tracer_flags = trace->flags;
3386         struct tracer_opt *opts = NULL;
3387         int i;
3388
3389         for (i = 0; tracer_flags->opts[i].name; i++) {
3390                 opts = &tracer_flags->opts[i];
3391
3392                 if (strcmp(cmp, opts->name) == 0)
3393                         return __set_tracer_option(trace, trace->flags,
3394                                                    opts, neg);
3395         }
3396
3397         return -EINVAL;
3398 }
3399
3400 /* Some tracers require overwrite to stay enabled */
3401 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3402 {
3403         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3404                 return -1;
3405
3406         return 0;
3407 }
3408
3409 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3410 {
3411         /* do nothing if flag is already set */
3412         if (!!(trace_flags & mask) == !!enabled)
3413                 return 0;
3414
3415         /* Give the tracer a chance to approve the change */
3416         if (tr->current_trace->flag_changed)
3417                 if (tr->current_trace->flag_changed(tr->current_trace, mask, !!enabled))
3418                         return -EINVAL;
3419
3420         if (enabled)
3421                 trace_flags |= mask;
3422         else
3423                 trace_flags &= ~mask;
3424
3425         if (mask == TRACE_ITER_RECORD_CMD)
3426                 trace_event_enable_cmd_record(enabled);
3427
3428         if (mask == TRACE_ITER_OVERWRITE) {
3429                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3430 #ifdef CONFIG_TRACER_MAX_TRACE
3431                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3432 #endif
3433         }
3434
3435         if (mask == TRACE_ITER_PRINTK)
3436                 trace_printk_start_stop_comm(enabled);
3437
3438         return 0;
3439 }
3440
3441 static int trace_set_options(struct trace_array *tr, char *option)
3442 {
3443         char *cmp;
3444         int neg = 0;
3445         int ret = -ENODEV;
3446         int i;
3447
3448         cmp = strstrip(option);
3449
3450         if (strncmp(cmp, "no", 2) == 0) {
3451                 neg = 1;
3452                 cmp += 2;
3453         }
3454
3455         mutex_lock(&trace_types_lock);
3456
3457         for (i = 0; trace_options[i]; i++) {
3458                 if (strcmp(cmp, trace_options[i]) == 0) {
3459                         ret = set_tracer_flag(tr, 1 << i, !neg);
3460                         break;
3461                 }
3462         }
3463
3464         /* If no option could be set, test the specific tracer options */
3465         if (!trace_options[i])
3466                 ret = set_tracer_option(tr->current_trace, cmp, neg);
3467
3468         mutex_unlock(&trace_types_lock);
3469
3470         return ret;
3471 }
3472
3473 static ssize_t
3474 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3475                         size_t cnt, loff_t *ppos)
3476 {
3477         struct seq_file *m = filp->private_data;
3478         struct trace_array *tr = m->private;
3479         char buf[64];
3480         int ret;
3481
3482         if (cnt >= sizeof(buf))
3483                 return -EINVAL;
3484
3485         if (copy_from_user(&buf, ubuf, cnt))
3486                 return -EFAULT;
3487
3488         buf[cnt] = 0;
3489
3490         ret = trace_set_options(tr, buf);
3491         if (ret < 0)
3492                 return ret;
3493
3494         *ppos += cnt;
3495
3496         return cnt;
3497 }
3498
3499 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3500 {
3501         struct trace_array *tr = inode->i_private;
3502         int ret;
3503
3504         if (tracing_disabled)
3505                 return -ENODEV;
3506
3507         if (trace_array_get(tr) < 0)
3508                 return -ENODEV;
3509
3510         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3511         if (ret < 0)
3512                 trace_array_put(tr);
3513
3514         return ret;
3515 }
3516
3517 static const struct file_operations tracing_iter_fops = {
3518         .open           = tracing_trace_options_open,
3519         .read           = seq_read,
3520         .llseek         = seq_lseek,
3521         .release        = tracing_single_release_tr,
3522         .write          = tracing_trace_options_write,
3523 };
3524
3525 static const char readme_msg[] =
3526         "tracing mini-HOWTO:\n\n"
3527         "# echo 0 > tracing_on : quick way to disable tracing\n"
3528         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3529         " Important files:\n"
3530         "  trace\t\t\t- The static contents of the buffer\n"
3531         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3532         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3533         "  current_tracer\t- function and latency tracers\n"
3534         "  available_tracers\t- list of configured tracers for current_tracer\n"
3535         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3536         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3537         "  trace_clock\t\t-change the clock used to order events\n"
3538         "       local:   Per cpu clock but may not be synced across CPUs\n"
3539         "      global:   Synced across CPUs but slows tracing down.\n"
3540         "     counter:   Not a clock, but just an increment\n"
3541         "      uptime:   Jiffy counter from time of boot\n"
3542         "        perf:   Same clock that perf events use\n"
3543 #ifdef CONFIG_X86_64
3544         "     x86-tsc:   TSC cycle counter\n"
3545 #endif
3546         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3547         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3548         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3549         "\t\t\t  Remove sub-buffer with rmdir\n"
3550         "  trace_options\t\t- Set format or modify how tracing happens\n"
3551         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3552         "\t\t\t  option name\n"
3553 #ifdef CONFIG_DYNAMIC_FTRACE
3554         "\n  available_filter_functions - list of functions that can be filtered on\n"
3555         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3556         "\t\t\t  functions\n"
3557         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3558         "\t     modules: Can select a group via module\n"
3559         "\t      Format: :mod:<module-name>\n"
3560         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3561         "\t    triggers: a command to perform when function is hit\n"
3562         "\t      Format: <function>:<trigger>[:count]\n"
3563         "\t     trigger: traceon, traceoff\n"
3564         "\t\t      enable_event:<system>:<event>\n"
3565         "\t\t      disable_event:<system>:<event>\n"
3566 #ifdef CONFIG_STACKTRACE
3567         "\t\t      stacktrace\n"
3568 #endif
3569 #ifdef CONFIG_TRACER_SNAPSHOT
3570         "\t\t      snapshot\n"
3571 #endif
3572         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3573         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3574         "\t     The first one will disable tracing every time do_fault is hit\n"
3575         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3576         "\t       The first time do trap is hit and it disables tracing, the\n"
3577         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3578         "\t       the counter will not decrement. It only decrements when the\n"
3579         "\t       trigger did work\n"
3580         "\t     To remove trigger without count:\n"
3581         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3582         "\t     To remove trigger with a count:\n"
3583         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3584         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3585         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3586         "\t    modules: Can select a group via module command :mod:\n"
3587         "\t    Does not accept triggers\n"
3588 #endif /* CONFIG_DYNAMIC_FTRACE */
3589 #ifdef CONFIG_FUNCTION_TRACER
3590         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3591         "\t\t    (function)\n"
3592 #endif
3593 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3594         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3595         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3596 #endif
3597 #ifdef CONFIG_TRACER_SNAPSHOT
3598         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3599         "\t\t\t  snapshot buffer. Read the contents for more\n"
3600         "\t\t\t  information\n"
3601 #endif
3602 #ifdef CONFIG_STACK_TRACER
3603         "  stack_trace\t\t- Shows the max stack trace when active\n"
3604         "  stack_max_size\t- Shows current max stack size that was traced\n"
3605         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3606         "\t\t\t  new trace)\n"
3607 #ifdef CONFIG_DYNAMIC_FTRACE
3608         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3609         "\t\t\t  traces\n"
3610 #endif
3611 #endif /* CONFIG_STACK_TRACER */
3612         "  events/\t\t- Directory containing all trace event subsystems:\n"
3613         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3614         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3615         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3616         "\t\t\t  events\n"
3617         "      filter\t\t- If set, only events passing filter are traced\n"
3618         "  events/<system>/<event>/\t- Directory containing control files for\n"
3619         "\t\t\t  <event>:\n"
3620         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3621         "      filter\t\t- If set, only events passing filter are traced\n"
3622         "      trigger\t\t- If set, a command to perform when event is hit\n"
3623         "\t    Format: <trigger>[:count][if <filter>]\n"
3624         "\t   trigger: traceon, traceoff\n"
3625         "\t            enable_event:<system>:<event>\n"
3626         "\t            disable_event:<system>:<event>\n"
3627 #ifdef CONFIG_STACKTRACE
3628         "\t\t    stacktrace\n"
3629 #endif
3630 #ifdef CONFIG_TRACER_SNAPSHOT
3631         "\t\t    snapshot\n"
3632 #endif
3633         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3634         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3635         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3636         "\t                  events/block/block_unplug/trigger\n"
3637         "\t   The first disables tracing every time block_unplug is hit.\n"
3638         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3639         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3640         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3641         "\t   Like function triggers, the counter is only decremented if it\n"
3642         "\t    enabled or disabled tracing.\n"
3643         "\t   To remove a trigger without a count:\n"
3644         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3645         "\t   To remove a trigger with a count:\n"
3646         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3647         "\t   Filters can be ignored when removing a trigger.\n"
3648 ;
3649
3650 static ssize_t
3651 tracing_readme_read(struct file *filp, char __user *ubuf,
3652                        size_t cnt, loff_t *ppos)
3653 {
3654         return simple_read_from_buffer(ubuf, cnt, ppos,
3655                                         readme_msg, strlen(readme_msg));
3656 }
3657
3658 static const struct file_operations tracing_readme_fops = {
3659         .open           = tracing_open_generic,
3660         .read           = tracing_readme_read,
3661         .llseek         = generic_file_llseek,
3662 };
3663
3664 static ssize_t
3665 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
3666                                 size_t cnt, loff_t *ppos)
3667 {
3668         char *buf_comm;
3669         char *file_buf;
3670         char *buf;
3671         int len = 0;
3672         int pid;
3673         int i;
3674
3675         file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
3676         if (!file_buf)
3677                 return -ENOMEM;
3678
3679         buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
3680         if (!buf_comm) {
3681                 kfree(file_buf);
3682                 return -ENOMEM;
3683         }
3684
3685         buf = file_buf;
3686
3687         for (i = 0; i < SAVED_CMDLINES; i++) {
3688                 int r;
3689
3690                 pid = map_cmdline_to_pid[i];
3691                 if (pid == -1 || pid == NO_CMDLINE_MAP)
3692                         continue;
3693
3694                 trace_find_cmdline(pid, buf_comm);
3695                 r = sprintf(buf, "%d %s\n", pid, buf_comm);
3696                 buf += r;
3697                 len += r;
3698         }
3699
3700         len = simple_read_from_buffer(ubuf, cnt, ppos,
3701                                       file_buf, len);
3702
3703         kfree(file_buf);
3704         kfree(buf_comm);
3705
3706         return len;
3707 }
3708
3709 static const struct file_operations tracing_saved_cmdlines_fops = {
3710     .open       = tracing_open_generic,
3711     .read       = tracing_saved_cmdlines_read,
3712     .llseek     = generic_file_llseek,
3713 };
3714
3715 static ssize_t
3716 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3717                        size_t cnt, loff_t *ppos)
3718 {
3719         struct trace_array *tr = filp->private_data;
3720         char buf[MAX_TRACER_SIZE+2];
3721         int r;
3722
3723         mutex_lock(&trace_types_lock);
3724         r = sprintf(buf, "%s\n", tr->current_trace->name);
3725         mutex_unlock(&trace_types_lock);
3726
3727         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3728 }
3729
3730 int tracer_init(struct tracer *t, struct trace_array *tr)
3731 {
3732         tracing_reset_online_cpus(&tr->trace_buffer);
3733         return t->init(tr);
3734 }
3735
3736 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3737 {
3738         int cpu;
3739
3740         for_each_tracing_cpu(cpu)
3741                 per_cpu_ptr(buf->data, cpu)->entries = val;
3742 }
3743
3744 #ifdef CONFIG_TRACER_MAX_TRACE
3745 /* resize @tr's buffer to the size of @size_tr's entries */
3746 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3747                                         struct trace_buffer *size_buf, int cpu_id)
3748 {
3749         int cpu, ret = 0;
3750
3751         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3752                 for_each_tracing_cpu(cpu) {
3753                         ret = ring_buffer_resize(trace_buf->buffer,
3754                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3755                         if (ret < 0)
3756                                 break;
3757                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3758                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3759                 }
3760         } else {
3761                 ret = ring_buffer_resize(trace_buf->buffer,
3762                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3763                 if (ret == 0)
3764                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3765                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3766         }
3767
3768         return ret;
3769 }
3770 #endif /* CONFIG_TRACER_MAX_TRACE */
3771
3772 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3773                                         unsigned long size, int cpu)
3774 {
3775         int ret;
3776
3777         /*
3778          * If kernel or user changes the size of the ring buffer
3779          * we use the size that was given, and we can forget about
3780          * expanding it later.
3781          */
3782         ring_buffer_expanded = true;
3783
3784         /* May be called before buffers are initialized */
3785         if (!tr->trace_buffer.buffer)
3786                 return 0;
3787
3788         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3789         if (ret < 0)
3790                 return ret;
3791
3792 #ifdef CONFIG_TRACER_MAX_TRACE
3793         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3794             !tr->current_trace->use_max_tr)
3795                 goto out;
3796
3797         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3798         if (ret < 0) {
3799                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3800                                                      &tr->trace_buffer, cpu);
3801                 if (r < 0) {
3802                         /*
3803                          * AARGH! We are left with different
3804                          * size max buffer!!!!
3805                          * The max buffer is our "snapshot" buffer.
3806                          * When a tracer needs a snapshot (one of the
3807                          * latency tracers), it swaps the max buffer
3808                          * with the saved snap shot. We succeeded to
3809                          * update the size of the main buffer, but failed to
3810                          * update the size of the max buffer. But when we tried
3811                          * to reset the main buffer to the original size, we
3812                          * failed there too. This is very unlikely to
3813                          * happen, but if it does, warn and kill all
3814                          * tracing.
3815                          */
3816                         WARN_ON(1);
3817                         tracing_disabled = 1;
3818                 }
3819                 return ret;
3820         }
3821
3822         if (cpu == RING_BUFFER_ALL_CPUS)
3823                 set_buffer_entries(&tr->max_buffer, size);
3824         else
3825                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
3826
3827  out:
3828 #endif /* CONFIG_TRACER_MAX_TRACE */
3829
3830         if (cpu == RING_BUFFER_ALL_CPUS)
3831                 set_buffer_entries(&tr->trace_buffer, size);
3832         else
3833                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
3834
3835         return ret;
3836 }
3837
3838 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
3839                                           unsigned long size, int cpu_id)
3840 {
3841         int ret = size;
3842
3843         mutex_lock(&trace_types_lock);
3844
3845         if (cpu_id != RING_BUFFER_ALL_CPUS) {
3846                 /* make sure, this cpu is enabled in the mask */
3847                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
3848                         ret = -EINVAL;
3849                         goto out;
3850                 }
3851         }
3852
3853         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
3854         if (ret < 0)
3855                 ret = -ENOMEM;
3856
3857 out:
3858         mutex_unlock(&trace_types_lock);
3859
3860         return ret;
3861 }
3862
3863
3864 /**
3865  * tracing_update_buffers - used by tracing facility to expand ring buffers
3866  *
3867  * To save on memory when the tracing is never used on a system with it
3868  * configured in. The ring buffers are set to a minimum size. But once
3869  * a user starts to use the tracing facility, then they need to grow
3870  * to their default size.
3871  *
3872  * This function is to be called when a tracer is about to be used.
3873  */
3874 int tracing_update_buffers(void)
3875 {
3876         int ret = 0;
3877
3878         mutex_lock(&trace_types_lock);
3879         if (!ring_buffer_expanded)
3880                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
3881                                                 RING_BUFFER_ALL_CPUS);
3882         mutex_unlock(&trace_types_lock);
3883
3884         return ret;
3885 }
3886
3887 struct trace_option_dentry;
3888
3889 static struct trace_option_dentry *
3890 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
3891
3892 static void
3893 destroy_trace_option_files(struct trace_option_dentry *topts);
3894
3895 static int tracing_set_tracer(const char *buf)
3896 {
3897         static struct trace_option_dentry *topts;
3898         struct trace_array *tr = &global_trace;
3899         struct tracer *t;
3900 #ifdef CONFIG_TRACER_MAX_TRACE
3901         bool had_max_tr;
3902 #endif
3903         int ret = 0;
3904
3905         mutex_lock(&trace_types_lock);
3906
3907         if (!ring_buffer_expanded) {
3908                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
3909                                                 RING_BUFFER_ALL_CPUS);
3910                 if (ret < 0)
3911                         goto out;
3912                 ret = 0;
3913         }
3914
3915         for (t = trace_types; t; t = t->next) {
3916                 if (strcmp(t->name, buf) == 0)
3917                         break;
3918         }
3919         if (!t) {
3920                 ret = -EINVAL;
3921                 goto out;
3922         }
3923         if (t == tr->current_trace)
3924                 goto out;
3925
3926         trace_branch_disable();
3927
3928         tr->current_trace->enabled = false;
3929
3930         if (tr->current_trace->reset)
3931                 tr->current_trace->reset(tr);
3932
3933         /* Current trace needs to be nop_trace before synchronize_sched */
3934         tr->current_trace = &nop_trace;
3935
3936 #ifdef CONFIG_TRACER_MAX_TRACE
3937         had_max_tr = tr->allocated_snapshot;
3938
3939         if (had_max_tr && !t->use_max_tr) {
3940                 /*
3941                  * We need to make sure that the update_max_tr sees that
3942                  * current_trace changed to nop_trace to keep it from
3943                  * swapping the buffers after we resize it.
3944                  * The update_max_tr is called from interrupts disabled
3945                  * so a synchronized_sched() is sufficient.
3946                  */
3947                 synchronize_sched();
3948                 free_snapshot(tr);
3949         }
3950 #endif
3951         destroy_trace_option_files(topts);
3952
3953         topts = create_trace_option_files(tr, t);
3954
3955 #ifdef CONFIG_TRACER_MAX_TRACE
3956         if (t->use_max_tr && !had_max_tr) {
3957                 ret = alloc_snapshot(tr);
3958                 if (ret < 0)
3959                         goto out;
3960         }
3961 #endif
3962
3963         if (t->init) {
3964                 ret = tracer_init(t, tr);
3965                 if (ret)
3966                         goto out;
3967         }
3968
3969         tr->current_trace = t;
3970         tr->current_trace->enabled = true;
3971         trace_branch_enable(tr);
3972  out:
3973         mutex_unlock(&trace_types_lock);
3974
3975         return ret;
3976 }
3977
3978 static ssize_t
3979 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
3980                         size_t cnt, loff_t *ppos)
3981 {
3982         char buf[MAX_TRACER_SIZE+1];
3983         int i;
3984         size_t ret;
3985         int err;
3986
3987         ret = cnt;
3988
3989         if (cnt > MAX_TRACER_SIZE)
3990                 cnt = MAX_TRACER_SIZE;
3991
3992         if (copy_from_user(&buf, ubuf, cnt))
3993                 return -EFAULT;
3994
3995         buf[cnt] = 0;
3996
3997         /* strip ending whitespace. */
3998         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
3999                 buf[i] = 0;
4000
4001         err = tracing_set_tracer(buf);
4002         if (err)
4003                 return err;
4004
4005         *ppos += ret;
4006
4007         return ret;
4008 }
4009
4010 static ssize_t
4011 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4012                      size_t cnt, loff_t *ppos)
4013 {
4014         unsigned long *ptr = filp->private_data;
4015         char buf[64];
4016         int r;
4017
4018         r = snprintf(buf, sizeof(buf), "%ld\n",
4019                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4020         if (r > sizeof(buf))
4021                 r = sizeof(buf);
4022         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4023 }
4024
4025 static ssize_t
4026 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4027                       size_t cnt, loff_t *ppos)
4028 {
4029         unsigned long *ptr = filp->private_data;
4030         unsigned long val;
4031         int ret;
4032
4033         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4034         if (ret)
4035                 return ret;
4036
4037         *ptr = val * 1000;
4038
4039         return cnt;
4040 }
4041
4042 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4043 {
4044         struct trace_array *tr = inode->i_private;
4045         struct trace_iterator *iter;
4046         int ret = 0;
4047
4048         if (tracing_disabled)
4049                 return -ENODEV;
4050
4051         if (trace_array_get(tr) < 0)
4052                 return -ENODEV;
4053
4054         mutex_lock(&trace_types_lock);
4055
4056         /* create a buffer to store the information to pass to userspace */
4057         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4058         if (!iter) {
4059                 ret = -ENOMEM;
4060                 __trace_array_put(tr);
4061                 goto out;
4062         }
4063
4064         /*
4065          * We make a copy of the current tracer to avoid concurrent
4066          * changes on it while we are reading.
4067          */
4068         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4069         if (!iter->trace) {
4070                 ret = -ENOMEM;
4071                 goto fail;
4072         }
4073         *iter->trace = *tr->current_trace;
4074
4075         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4076                 ret = -ENOMEM;
4077                 goto fail;
4078         }
4079
4080         /* trace pipe does not show start of buffer */
4081         cpumask_setall(iter->started);
4082
4083         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4084                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4085
4086         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4087         if (trace_clocks[tr->clock_id].in_ns)
4088                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4089
4090         iter->tr = tr;
4091         iter->trace_buffer = &tr->trace_buffer;
4092         iter->cpu_file = tracing_get_cpu(inode);
4093         mutex_init(&iter->mutex);
4094         filp->private_data = iter;
4095
4096         if (iter->trace->pipe_open)
4097                 iter->trace->pipe_open(iter);
4098
4099         nonseekable_open(inode, filp);
4100 out:
4101         mutex_unlock(&trace_types_lock);
4102         return ret;
4103
4104 fail:
4105         kfree(iter->trace);
4106         kfree(iter);
4107         __trace_array_put(tr);
4108         mutex_unlock(&trace_types_lock);
4109         return ret;
4110 }
4111
4112 static int tracing_release_pipe(struct inode *inode, struct file *file)
4113 {
4114         struct trace_iterator *iter = file->private_data;
4115         struct trace_array *tr = inode->i_private;
4116
4117         mutex_lock(&trace_types_lock);
4118
4119         if (iter->trace->pipe_close)
4120                 iter->trace->pipe_close(iter);
4121
4122         mutex_unlock(&trace_types_lock);
4123
4124         free_cpumask_var(iter->started);
4125         mutex_destroy(&iter->mutex);
4126         kfree(iter->trace);
4127         kfree(iter);
4128
4129         trace_array_put(tr);
4130
4131         return 0;
4132 }
4133
4134 static unsigned int
4135 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4136 {
4137         /* Iterators are static, they should be filled or empty */
4138         if (trace_buffer_iter(iter, iter->cpu_file))
4139                 return POLLIN | POLLRDNORM;
4140
4141         if (trace_flags & TRACE_ITER_BLOCK)
4142                 /*
4143                  * Always select as readable when in blocking mode
4144                  */
4145                 return POLLIN | POLLRDNORM;
4146         else
4147                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4148                                              filp, poll_table);
4149 }
4150
4151 static unsigned int
4152 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4153 {
4154         struct trace_iterator *iter = filp->private_data;
4155
4156         return trace_poll(iter, filp, poll_table);
4157 }
4158
4159 /*
4160  * This is a make-shift waitqueue.
4161  * A tracer might use this callback on some rare cases:
4162  *
4163  *  1) the current tracer might hold the runqueue lock when it wakes up
4164  *     a reader, hence a deadlock (sched, function, and function graph tracers)
4165  *  2) the function tracers, trace all functions, we don't want
4166  *     the overhead of calling wake_up and friends
4167  *     (and tracing them too)
4168  *
4169  *     Anyway, this is really very primitive wakeup.
4170  */
4171 int poll_wait_pipe(struct trace_iterator *iter)
4172 {
4173         set_current_state(TASK_INTERRUPTIBLE);
4174         /* sleep for 100 msecs, and try again. */
4175         schedule_timeout(HZ / 10);
4176         return 0;
4177 }
4178
4179 /* Must be called with trace_types_lock mutex held. */
4180 static int tracing_wait_pipe(struct file *filp)
4181 {
4182         struct trace_iterator *iter = filp->private_data;
4183         int ret;
4184
4185         while (trace_empty(iter)) {
4186
4187                 if ((filp->f_flags & O_NONBLOCK)) {
4188                         return -EAGAIN;
4189                 }
4190
4191                 mutex_unlock(&iter->mutex);
4192
4193                 ret = iter->trace->wait_pipe(iter);
4194
4195                 mutex_lock(&iter->mutex);
4196
4197                 if (ret)
4198                         return ret;
4199
4200                 if (signal_pending(current))
4201                         return -EINTR;
4202
4203                 /*
4204                  * We block until we read something and tracing is disabled.
4205                  * We still block if tracing is disabled, but we have never
4206                  * read anything. This allows a user to cat this file, and
4207                  * then enable tracing. But after we have read something,
4208                  * we give an EOF when tracing is again disabled.
4209                  *
4210                  * iter->pos will be 0 if we haven't read anything.
4211                  */
4212                 if (!tracing_is_on() && iter->pos)
4213                         break;
4214         }
4215
4216         return 1;
4217 }
4218
4219 /*
4220  * Consumer reader.
4221  */
4222 static ssize_t
4223 tracing_read_pipe(struct file *filp, char __user *ubuf,
4224                   size_t cnt, loff_t *ppos)
4225 {
4226         struct trace_iterator *iter = filp->private_data;
4227         struct trace_array *tr = iter->tr;
4228         ssize_t sret;
4229
4230         /* return any leftover data */
4231         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4232         if (sret != -EBUSY)
4233                 return sret;
4234
4235         trace_seq_init(&iter->seq);
4236
4237         /* copy the tracer to avoid using a global lock all around */
4238         mutex_lock(&trace_types_lock);
4239         if (unlikely(iter->trace->name != tr->current_trace->name))
4240                 *iter->trace = *tr->current_trace;
4241         mutex_unlock(&trace_types_lock);
4242
4243         /*
4244          * Avoid more than one consumer on a single file descriptor
4245          * This is just a matter of traces coherency, the ring buffer itself
4246          * is protected.
4247          */
4248         mutex_lock(&iter->mutex);
4249         if (iter->trace->read) {
4250                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4251                 if (sret)
4252                         goto out;
4253         }
4254
4255 waitagain:
4256         sret = tracing_wait_pipe(filp);
4257         if (sret <= 0)
4258                 goto out;
4259
4260         /* stop when tracing is finished */
4261         if (trace_empty(iter)) {
4262                 sret = 0;
4263                 goto out;
4264         }
4265
4266         if (cnt >= PAGE_SIZE)
4267                 cnt = PAGE_SIZE - 1;
4268
4269         /* reset all but tr, trace, and overruns */
4270         memset(&iter->seq, 0,
4271                sizeof(struct trace_iterator) -
4272                offsetof(struct trace_iterator, seq));
4273         cpumask_clear(iter->started);
4274         iter->pos = -1;
4275
4276         trace_event_read_lock();
4277         trace_access_lock(iter->cpu_file);
4278         while (trace_find_next_entry_inc(iter) != NULL) {
4279                 enum print_line_t ret;
4280                 int len = iter->seq.len;
4281
4282                 ret = print_trace_line(iter);
4283                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4284                         /* don't print partial lines */
4285                         iter->seq.len = len;
4286                         break;
4287                 }
4288                 if (ret != TRACE_TYPE_NO_CONSUME)
4289                         trace_consume(iter);
4290
4291                 if (iter->seq.len >= cnt)
4292                         break;
4293
4294                 /*
4295                  * Setting the full flag means we reached the trace_seq buffer
4296                  * size and we should leave by partial output condition above.
4297                  * One of the trace_seq_* functions is not used properly.
4298                  */
4299                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4300                           iter->ent->type);
4301         }
4302         trace_access_unlock(iter->cpu_file);
4303         trace_event_read_unlock();
4304
4305         /* Now copy what we have to the user */
4306         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4307         if (iter->seq.readpos >= iter->seq.len)
4308                 trace_seq_init(&iter->seq);
4309
4310         /*
4311          * If there was nothing to send to user, in spite of consuming trace
4312          * entries, go back to wait for more entries.
4313          */
4314         if (sret == -EBUSY)
4315                 goto waitagain;
4316
4317 out:
4318         mutex_unlock(&iter->mutex);
4319
4320         return sret;
4321 }
4322
4323 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4324                                      unsigned int idx)
4325 {
4326         __free_page(spd->pages[idx]);
4327 }
4328
4329 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4330         .can_merge              = 0,
4331         .map                    = generic_pipe_buf_map,
4332         .unmap                  = generic_pipe_buf_unmap,
4333         .confirm                = generic_pipe_buf_confirm,
4334         .release                = generic_pipe_buf_release,
4335         .steal                  = generic_pipe_buf_steal,
4336         .get                    = generic_pipe_buf_get,
4337 };
4338
4339 static size_t
4340 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4341 {
4342         size_t count;
4343         int ret;
4344
4345         /* Seq buffer is page-sized, exactly what we need. */
4346         for (;;) {
4347                 count = iter->seq.len;
4348                 ret = print_trace_line(iter);
4349                 count = iter->seq.len - count;
4350                 if (rem < count) {
4351                         rem = 0;
4352                         iter->seq.len -= count;
4353                         break;
4354                 }
4355                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4356                         iter->seq.len -= count;
4357                         break;
4358                 }
4359
4360                 if (ret != TRACE_TYPE_NO_CONSUME)
4361                         trace_consume(iter);
4362                 rem -= count;
4363                 if (!trace_find_next_entry_inc(iter))   {
4364                         rem = 0;
4365                         iter->ent = NULL;
4366                         break;
4367                 }
4368         }
4369
4370         return rem;
4371 }
4372
4373 static ssize_t tracing_splice_read_pipe(struct file *filp,
4374                                         loff_t *ppos,
4375                                         struct pipe_inode_info *pipe,
4376                                         size_t len,
4377                                         unsigned int flags)
4378 {
4379         struct page *pages_def[PIPE_DEF_BUFFERS];
4380         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4381         struct trace_iterator *iter = filp->private_data;
4382         struct splice_pipe_desc spd = {
4383                 .pages          = pages_def,
4384                 .partial        = partial_def,
4385                 .nr_pages       = 0, /* This gets updated below. */
4386                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4387                 .flags          = flags,
4388                 .ops            = &tracing_pipe_buf_ops,
4389                 .spd_release    = tracing_spd_release_pipe,
4390         };
4391         struct trace_array *tr = iter->tr;
4392         ssize_t ret;
4393         size_t rem;
4394         unsigned int i;
4395
4396         if (splice_grow_spd(pipe, &spd))
4397                 return -ENOMEM;
4398
4399         /* copy the tracer to avoid using a global lock all around */
4400         mutex_lock(&trace_types_lock);
4401         if (unlikely(iter->trace->name != tr->current_trace->name))
4402                 *iter->trace = *tr->current_trace;
4403         mutex_unlock(&trace_types_lock);
4404
4405         mutex_lock(&iter->mutex);
4406
4407         if (iter->trace->splice_read) {
4408                 ret = iter->trace->splice_read(iter, filp,
4409                                                ppos, pipe, len, flags);
4410                 if (ret)
4411                         goto out_err;
4412         }
4413
4414         ret = tracing_wait_pipe(filp);
4415         if (ret <= 0)
4416                 goto out_err;
4417
4418         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4419                 ret = -EFAULT;
4420                 goto out_err;
4421         }
4422
4423         trace_event_read_lock();
4424         trace_access_lock(iter->cpu_file);
4425
4426         /* Fill as many pages as possible. */
4427         for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
4428                 spd.pages[i] = alloc_page(GFP_KERNEL);
4429                 if (!spd.pages[i])
4430                         break;
4431
4432                 rem = tracing_fill_pipe_page(rem, iter);
4433
4434                 /* Copy the data into the page, so we can start over. */
4435                 ret = trace_seq_to_buffer(&iter->seq,
4436                                           page_address(spd.pages[i]),
4437                                           iter->seq.len);
4438                 if (ret < 0) {
4439                         __free_page(spd.pages[i]);
4440                         break;
4441                 }
4442                 spd.partial[i].offset = 0;
4443                 spd.partial[i].len = iter->seq.len;
4444
4445                 trace_seq_init(&iter->seq);
4446         }
4447
4448         trace_access_unlock(iter->cpu_file);
4449         trace_event_read_unlock();
4450         mutex_unlock(&iter->mutex);
4451
4452         spd.nr_pages = i;
4453
4454         ret = splice_to_pipe(pipe, &spd);
4455 out:
4456         splice_shrink_spd(&spd);
4457         return ret;
4458
4459 out_err:
4460         mutex_unlock(&iter->mutex);
4461         goto out;
4462 }
4463
4464 static ssize_t
4465 tracing_entries_read(struct file *filp, char __user *ubuf,
4466                      size_t cnt, loff_t *ppos)
4467 {
4468         struct inode *inode = file_inode(filp);
4469         struct trace_array *tr = inode->i_private;
4470         int cpu = tracing_get_cpu(inode);
4471         char buf[64];
4472         int r = 0;
4473         ssize_t ret;
4474
4475         mutex_lock(&trace_types_lock);
4476
4477         if (cpu == RING_BUFFER_ALL_CPUS) {
4478                 int cpu, buf_size_same;
4479                 unsigned long size;
4480
4481                 size = 0;
4482                 buf_size_same = 1;
4483                 /* check if all cpu sizes are same */
4484                 for_each_tracing_cpu(cpu) {
4485                         /* fill in the size from first enabled cpu */
4486                         if (size == 0)
4487                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4488                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4489                                 buf_size_same = 0;
4490                                 break;
4491                         }
4492                 }
4493
4494                 if (buf_size_same) {
4495                         if (!ring_buffer_expanded)
4496                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4497                                             size >> 10,
4498                                             trace_buf_size >> 10);
4499                         else
4500                                 r = sprintf(buf, "%lu\n", size >> 10);
4501                 } else
4502                         r = sprintf(buf, "X\n");
4503         } else
4504                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4505
4506         mutex_unlock(&trace_types_lock);
4507
4508         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4509         return ret;
4510 }
4511
4512 static ssize_t
4513 tracing_entries_write(struct file *filp, const char __user *ubuf,
4514                       size_t cnt, loff_t *ppos)
4515 {
4516         struct inode *inode = file_inode(filp);
4517         struct trace_array *tr = inode->i_private;
4518         unsigned long val;
4519         int ret;
4520
4521         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4522         if (ret)
4523                 return ret;
4524
4525         /* must have at least 1 entry */
4526         if (!val)
4527                 return -EINVAL;
4528
4529         /* value is in KB */
4530         val <<= 10;
4531         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4532         if (ret < 0)
4533                 return ret;
4534
4535         *ppos += cnt;
4536
4537         return cnt;
4538 }
4539
4540 static ssize_t
4541 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4542                                 size_t cnt, loff_t *ppos)
4543 {
4544         struct trace_array *tr = filp->private_data;
4545         char buf[64];
4546         int r, cpu;
4547         unsigned long size = 0, expanded_size = 0;
4548
4549         mutex_lock(&trace_types_lock);
4550         for_each_tracing_cpu(cpu) {
4551                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4552                 if (!ring_buffer_expanded)
4553                         expanded_size += trace_buf_size >> 10;
4554         }
4555         if (ring_buffer_expanded)
4556                 r = sprintf(buf, "%lu\n", size);
4557         else
4558                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4559         mutex_unlock(&trace_types_lock);
4560
4561         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4562 }
4563
4564 static ssize_t
4565 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4566                           size_t cnt, loff_t *ppos)
4567 {
4568         /*
4569          * There is no need to read what the user has written, this function
4570          * is just to make sure that there is no error when "echo" is used
4571          */
4572
4573         *ppos += cnt;
4574
4575         return cnt;
4576 }
4577
4578 static int
4579 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4580 {
4581         struct trace_array *tr = inode->i_private;
4582
4583         /* disable tracing ? */
4584         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4585                 tracer_tracing_off(tr);
4586         /* resize the ring buffer to 0 */
4587         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4588
4589         trace_array_put(tr);
4590
4591         return 0;
4592 }
4593
4594 static ssize_t
4595 tracing_mark_write(struct file *filp, const char __user *ubuf,
4596                                         size_t cnt, loff_t *fpos)
4597 {
4598         unsigned long addr = (unsigned long)ubuf;
4599         struct trace_array *tr = filp->private_data;
4600         struct ring_buffer_event *event;
4601         struct ring_buffer *buffer;
4602         struct print_entry *entry;
4603         unsigned long irq_flags;
4604         struct page *pages[2];
4605         void *map_page[2];
4606         int nr_pages = 1;
4607         ssize_t written;
4608         int offset;
4609         int size;
4610         int len;
4611         int ret;
4612         int i;
4613
4614         if (tracing_disabled)
4615                 return -EINVAL;
4616
4617         if (!(trace_flags & TRACE_ITER_MARKERS))
4618                 return -EINVAL;
4619
4620         if (cnt > TRACE_BUF_SIZE)
4621                 cnt = TRACE_BUF_SIZE;
4622
4623         /*
4624          * Userspace is injecting traces into the kernel trace buffer.
4625          * We want to be as non intrusive as possible.
4626          * To do so, we do not want to allocate any special buffers
4627          * or take any locks, but instead write the userspace data
4628          * straight into the ring buffer.
4629          *
4630          * First we need to pin the userspace buffer into memory,
4631          * which, most likely it is, because it just referenced it.
4632          * But there's no guarantee that it is. By using get_user_pages_fast()
4633          * and kmap_atomic/kunmap_atomic() we can get access to the
4634          * pages directly. We then write the data directly into the
4635          * ring buffer.
4636          */
4637         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4638
4639         /* check if we cross pages */
4640         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4641                 nr_pages = 2;
4642
4643         offset = addr & (PAGE_SIZE - 1);
4644         addr &= PAGE_MASK;
4645
4646         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4647         if (ret < nr_pages) {
4648                 while (--ret >= 0)
4649                         put_page(pages[ret]);
4650                 written = -EFAULT;
4651                 goto out;
4652         }
4653
4654         for (i = 0; i < nr_pages; i++)
4655                 map_page[i] = kmap_atomic(pages[i]);
4656
4657         local_save_flags(irq_flags);
4658         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4659         buffer = tr->trace_buffer.buffer;
4660         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4661                                           irq_flags, preempt_count());
4662         if (!event) {
4663                 /* Ring buffer disabled, return as if not open for write */
4664                 written = -EBADF;
4665                 goto out_unlock;
4666         }
4667
4668         entry = ring_buffer_event_data(event);
4669         entry->ip = _THIS_IP_;
4670
4671         if (nr_pages == 2) {
4672                 len = PAGE_SIZE - offset;
4673                 memcpy(&entry->buf, map_page[0] + offset, len);
4674                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4675         } else
4676                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4677
4678         if (entry->buf[cnt - 1] != '\n') {
4679                 entry->buf[cnt] = '\n';
4680                 entry->buf[cnt + 1] = '\0';
4681         } else
4682                 entry->buf[cnt] = '\0';
4683
4684         __buffer_unlock_commit(buffer, event);
4685
4686         written = cnt;
4687
4688         *fpos += written;
4689
4690  out_unlock:
4691         for (i = 0; i < nr_pages; i++){
4692                 kunmap_atomic(map_page[i]);
4693                 put_page(pages[i]);
4694         }
4695  out:
4696         return written;
4697 }
4698
4699 static int tracing_clock_show(struct seq_file *m, void *v)
4700 {
4701         struct trace_array *tr = m->private;
4702         int i;
4703
4704         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4705                 seq_printf(m,
4706                         "%s%s%s%s", i ? " " : "",
4707                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4708                         i == tr->clock_id ? "]" : "");
4709         seq_putc(m, '\n');
4710
4711         return 0;
4712 }
4713
4714 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4715                                    size_t cnt, loff_t *fpos)
4716 {
4717         struct seq_file *m = filp->private_data;
4718         struct trace_array *tr = m->private;
4719         char buf[64];
4720         const char *clockstr;
4721         int i;
4722
4723         if (cnt >= sizeof(buf))
4724                 return -EINVAL;
4725
4726         if (copy_from_user(&buf, ubuf, cnt))
4727                 return -EFAULT;
4728
4729         buf[cnt] = 0;
4730
4731         clockstr = strstrip(buf);
4732
4733         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4734                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4735                         break;
4736         }
4737         if (i == ARRAY_SIZE(trace_clocks))
4738                 return -EINVAL;
4739
4740         mutex_lock(&trace_types_lock);
4741
4742         tr->clock_id = i;
4743
4744         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4745
4746         /*
4747          * New clock may not be consistent with the previous clock.
4748          * Reset the buffer so that it doesn't have incomparable timestamps.
4749          */
4750         tracing_reset_online_cpus(&tr->trace_buffer);
4751
4752 #ifdef CONFIG_TRACER_MAX_TRACE
4753         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4754                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4755         tracing_reset_online_cpus(&tr->max_buffer);
4756 #endif
4757
4758         mutex_unlock(&trace_types_lock);
4759
4760         *fpos += cnt;
4761
4762         return cnt;
4763 }
4764
4765 static int tracing_clock_open(struct inode *inode, struct file *file)
4766 {
4767         struct trace_array *tr = inode->i_private;
4768         int ret;
4769
4770         if (tracing_disabled)
4771                 return -ENODEV;
4772
4773         if (trace_array_get(tr))
4774                 return -ENODEV;
4775
4776         ret = single_open(file, tracing_clock_show, inode->i_private);
4777         if (ret < 0)
4778                 trace_array_put(tr);
4779
4780         return ret;
4781 }
4782
4783 struct ftrace_buffer_info {
4784         struct trace_iterator   iter;
4785         void                    *spare;
4786         unsigned int            read;
4787 };
4788
4789 #ifdef CONFIG_TRACER_SNAPSHOT
4790 static int tracing_snapshot_open(struct inode *inode, struct file *file)
4791 {
4792         struct trace_array *tr = inode->i_private;
4793         struct trace_iterator *iter;
4794         struct seq_file *m;
4795         int ret = 0;
4796
4797         if (trace_array_get(tr) < 0)
4798                 return -ENODEV;
4799
4800         if (file->f_mode & FMODE_READ) {
4801                 iter = __tracing_open(inode, file, true);
4802                 if (IS_ERR(iter))
4803                         ret = PTR_ERR(iter);
4804         } else {
4805                 /* Writes still need the seq_file to hold the private data */
4806                 ret = -ENOMEM;
4807                 m = kzalloc(sizeof(*m), GFP_KERNEL);
4808                 if (!m)
4809                         goto out;
4810                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4811                 if (!iter) {
4812                         kfree(m);
4813                         goto out;
4814                 }
4815                 ret = 0;
4816
4817                 iter->tr = tr;
4818                 iter->trace_buffer = &tr->max_buffer;
4819                 iter->cpu_file = tracing_get_cpu(inode);
4820                 m->private = iter;
4821                 file->private_data = m;
4822         }
4823 out:
4824         if (ret < 0)
4825                 trace_array_put(tr);
4826
4827         return ret;
4828 }
4829
4830 static ssize_t
4831 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
4832                        loff_t *ppos)
4833 {
4834         struct seq_file *m = filp->private_data;
4835         struct trace_iterator *iter = m->private;
4836         struct trace_array *tr = iter->tr;
4837         unsigned long val;
4838         int ret;
4839
4840         ret = tracing_update_buffers();
4841         if (ret < 0)
4842                 return ret;
4843
4844         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4845         if (ret)
4846                 return ret;
4847
4848         mutex_lock(&trace_types_lock);
4849
4850         if (tr->current_trace->use_max_tr) {
4851                 ret = -EBUSY;
4852                 goto out;
4853         }
4854
4855         switch (val) {
4856         case 0:
4857                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4858                         ret = -EINVAL;
4859                         break;
4860                 }
4861                 if (tr->allocated_snapshot)
4862                         free_snapshot(tr);
4863                 break;
4864         case 1:
4865 /* Only allow per-cpu swap if the ring buffer supports it */
4866 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
4867                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4868                         ret = -EINVAL;
4869                         break;
4870                 }
4871 #endif
4872                 if (!tr->allocated_snapshot) {
4873                         ret = alloc_snapshot(tr);
4874                         if (ret < 0)
4875                                 break;
4876                 }
4877                 local_irq_disable();
4878                 /* Now, we're going to swap */
4879                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4880                         update_max_tr(tr, current, smp_processor_id());
4881                 else
4882                         update_max_tr_single(tr, current, iter->cpu_file);
4883                 local_irq_enable();
4884                 break;
4885         default:
4886                 if (tr->allocated_snapshot) {
4887                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4888                                 tracing_reset_online_cpus(&tr->max_buffer);
4889                         else
4890                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
4891                 }
4892                 break;
4893         }
4894
4895         if (ret >= 0) {
4896                 *ppos += cnt;
4897                 ret = cnt;
4898         }
4899 out:
4900         mutex_unlock(&trace_types_lock);
4901         return ret;
4902 }
4903
4904 static int tracing_snapshot_release(struct inode *inode, struct file *file)
4905 {
4906         struct seq_file *m = file->private_data;
4907         int ret;
4908
4909         ret = tracing_release(inode, file);
4910
4911         if (file->f_mode & FMODE_READ)
4912                 return ret;
4913
4914         /* If write only, the seq_file is just a stub */
4915         if (m)
4916                 kfree(m->private);
4917         kfree(m);
4918
4919         return 0;
4920 }
4921
4922 static int tracing_buffers_open(struct inode *inode, struct file *filp);
4923 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
4924                                     size_t count, loff_t *ppos);
4925 static int tracing_buffers_release(struct inode *inode, struct file *file);
4926 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4927                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
4928
4929 static int snapshot_raw_open(struct inode *inode, struct file *filp)
4930 {
4931         struct ftrace_buffer_info *info;
4932         int ret;
4933
4934         ret = tracing_buffers_open(inode, filp);
4935         if (ret < 0)
4936                 return ret;
4937
4938         info = filp->private_data;
4939
4940         if (info->iter.trace->use_max_tr) {
4941                 tracing_buffers_release(inode, filp);
4942                 return -EBUSY;
4943         }
4944
4945         info->iter.snapshot = true;
4946         info->iter.trace_buffer = &info->iter.tr->max_buffer;
4947
4948         return ret;
4949 }
4950
4951 #endif /* CONFIG_TRACER_SNAPSHOT */
4952
4953
4954 static const struct file_operations tracing_max_lat_fops = {
4955         .open           = tracing_open_generic,
4956         .read           = tracing_max_lat_read,
4957         .write          = tracing_max_lat_write,
4958         .llseek         = generic_file_llseek,
4959 };
4960
4961 static const struct file_operations set_tracer_fops = {
4962         .open           = tracing_open_generic,
4963         .read           = tracing_set_trace_read,
4964         .write          = tracing_set_trace_write,
4965         .llseek         = generic_file_llseek,
4966 };
4967
4968 static const struct file_operations tracing_pipe_fops = {
4969         .open           = tracing_open_pipe,
4970         .poll           = tracing_poll_pipe,
4971         .read           = tracing_read_pipe,
4972         .splice_read    = tracing_splice_read_pipe,
4973         .release        = tracing_release_pipe,
4974         .llseek         = no_llseek,
4975 };
4976
4977 static const struct file_operations tracing_entries_fops = {
4978         .open           = tracing_open_generic_tr,
4979         .read           = tracing_entries_read,
4980         .write          = tracing_entries_write,
4981         .llseek         = generic_file_llseek,
4982         .release        = tracing_release_generic_tr,
4983 };
4984
4985 static const struct file_operations tracing_total_entries_fops = {
4986         .open           = tracing_open_generic_tr,
4987         .read           = tracing_total_entries_read,
4988         .llseek         = generic_file_llseek,
4989         .release        = tracing_release_generic_tr,
4990 };
4991
4992 static const struct file_operations tracing_free_buffer_fops = {
4993         .open           = tracing_open_generic_tr,
4994         .write          = tracing_free_buffer_write,
4995         .release        = tracing_free_buffer_release,
4996 };
4997
4998 static const struct file_operations tracing_mark_fops = {
4999         .open           = tracing_open_generic_tr,
5000         .write          = tracing_mark_write,
5001         .llseek         = generic_file_llseek,
5002         .release        = tracing_release_generic_tr,
5003 };
5004
5005 static const struct file_operations trace_clock_fops = {
5006         .open           = tracing_clock_open,
5007         .read           = seq_read,
5008         .llseek         = seq_lseek,
5009         .release        = tracing_single_release_tr,
5010         .write          = tracing_clock_write,
5011 };
5012
5013 #ifdef CONFIG_TRACER_SNAPSHOT
5014 static const struct file_operations snapshot_fops = {
5015         .open           = tracing_snapshot_open,
5016         .read           = seq_read,
5017         .write          = tracing_snapshot_write,
5018         .llseek         = tracing_lseek,
5019         .release        = tracing_snapshot_release,
5020 };
5021
5022 static const struct file_operations snapshot_raw_fops = {
5023         .open           = snapshot_raw_open,
5024         .read           = tracing_buffers_read,
5025         .release        = tracing_buffers_release,
5026         .splice_read    = tracing_buffers_splice_read,
5027         .llseek         = no_llseek,
5028 };
5029
5030 #endif /* CONFIG_TRACER_SNAPSHOT */
5031
5032 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5033 {
5034         struct trace_array *tr = inode->i_private;
5035         struct ftrace_buffer_info *info;
5036         int ret;
5037
5038         if (tracing_disabled)
5039                 return -ENODEV;
5040
5041         if (trace_array_get(tr) < 0)
5042                 return -ENODEV;
5043
5044         info = kzalloc(sizeof(*info), GFP_KERNEL);
5045         if (!info) {
5046                 trace_array_put(tr);
5047                 return -ENOMEM;
5048         }
5049
5050         mutex_lock(&trace_types_lock);
5051
5052         info->iter.tr           = tr;
5053         info->iter.cpu_file     = tracing_get_cpu(inode);
5054         info->iter.trace        = tr->current_trace;
5055         info->iter.trace_buffer = &tr->trace_buffer;
5056         info->spare             = NULL;
5057         /* Force reading ring buffer for first read */
5058         info->read              = (unsigned int)-1;
5059
5060         filp->private_data = info;
5061
5062         mutex_unlock(&trace_types_lock);
5063
5064         ret = nonseekable_open(inode, filp);
5065         if (ret < 0)
5066                 trace_array_put(tr);
5067
5068         return ret;
5069 }
5070
5071 static unsigned int
5072 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5073 {
5074         struct ftrace_buffer_info *info = filp->private_data;
5075         struct trace_iterator *iter = &info->iter;
5076
5077         return trace_poll(iter, filp, poll_table);
5078 }
5079
5080 static ssize_t
5081 tracing_buffers_read(struct file *filp, char __user *ubuf,
5082                      size_t count, loff_t *ppos)
5083 {
5084         struct ftrace_buffer_info *info = filp->private_data;
5085         struct trace_iterator *iter = &info->iter;
5086         ssize_t ret;
5087         ssize_t size;
5088
5089         if (!count)
5090                 return 0;
5091
5092         mutex_lock(&trace_types_lock);
5093
5094 #ifdef CONFIG_TRACER_MAX_TRACE
5095         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5096                 size = -EBUSY;
5097                 goto out_unlock;
5098         }
5099 #endif
5100
5101         if (!info->spare)
5102                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5103                                                           iter->cpu_file);
5104         size = -ENOMEM;
5105         if (!info->spare)
5106                 goto out_unlock;
5107
5108         /* Do we have previous read data to read? */
5109         if (info->read < PAGE_SIZE)
5110                 goto read;
5111
5112  again:
5113         trace_access_lock(iter->cpu_file);
5114         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5115                                     &info->spare,
5116                                     count,
5117                                     iter->cpu_file, 0);
5118         trace_access_unlock(iter->cpu_file);
5119
5120         if (ret < 0) {
5121                 if (trace_empty(iter)) {
5122                         if ((filp->f_flags & O_NONBLOCK)) {
5123                                 size = -EAGAIN;
5124                                 goto out_unlock;
5125                         }
5126                         mutex_unlock(&trace_types_lock);
5127                         ret = iter->trace->wait_pipe(iter);
5128                         mutex_lock(&trace_types_lock);
5129                         if (ret) {
5130                                 size = ret;
5131                                 goto out_unlock;
5132                         }
5133                         if (signal_pending(current)) {
5134                                 size = -EINTR;
5135                                 goto out_unlock;
5136                         }
5137                         goto again;
5138                 }
5139                 size = 0;
5140                 goto out_unlock;
5141         }
5142
5143         info->read = 0;
5144  read:
5145         size = PAGE_SIZE - info->read;
5146         if (size > count)
5147                 size = count;
5148
5149         ret = copy_to_user(ubuf, info->spare + info->read, size);
5150         if (ret == size) {
5151                 size = -EFAULT;
5152                 goto out_unlock;
5153         }
5154         size -= ret;
5155
5156         *ppos += size;
5157         info->read += size;
5158
5159  out_unlock:
5160         mutex_unlock(&trace_types_lock);
5161
5162         return size;
5163 }
5164
5165 static int tracing_buffers_release(struct inode *inode, struct file *file)
5166 {
5167         struct ftrace_buffer_info *info = file->private_data;
5168         struct trace_iterator *iter = &info->iter;
5169
5170         mutex_lock(&trace_types_lock);
5171
5172         __trace_array_put(iter->tr);
5173
5174         if (info->spare)
5175                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5176         kfree(info);
5177
5178         mutex_unlock(&trace_types_lock);
5179
5180         return 0;
5181 }
5182
5183 struct buffer_ref {
5184         struct ring_buffer      *buffer;
5185         void                    *page;
5186         int                     ref;
5187 };
5188
5189 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5190                                     struct pipe_buffer *buf)
5191 {
5192         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5193
5194         if (--ref->ref)
5195                 return;
5196
5197         ring_buffer_free_read_page(ref->buffer, ref->page);
5198         kfree(ref);
5199         buf->private = 0;
5200 }
5201
5202 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5203                                 struct pipe_buffer *buf)
5204 {
5205         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5206
5207         ref->ref++;
5208 }
5209
5210 /* Pipe buffer operations for a buffer. */
5211 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5212         .can_merge              = 0,
5213         .map                    = generic_pipe_buf_map,
5214         .unmap                  = generic_pipe_buf_unmap,
5215         .confirm                = generic_pipe_buf_confirm,
5216         .release                = buffer_pipe_buf_release,
5217         .steal                  = generic_pipe_buf_steal,
5218         .get                    = buffer_pipe_buf_get,
5219 };
5220
5221 /*
5222  * Callback from splice_to_pipe(), if we need to release some pages
5223  * at the end of the spd in case we error'ed out in filling the pipe.
5224  */
5225 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5226 {
5227         struct buffer_ref *ref =
5228                 (struct buffer_ref *)spd->partial[i].private;
5229
5230         if (--ref->ref)
5231                 return;
5232
5233         ring_buffer_free_read_page(ref->buffer, ref->page);
5234         kfree(ref);
5235         spd->partial[i].private = 0;
5236 }
5237
5238 static ssize_t
5239 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5240                             struct pipe_inode_info *pipe, size_t len,
5241                             unsigned int flags)
5242 {
5243         struct ftrace_buffer_info *info = file->private_data;
5244         struct trace_iterator *iter = &info->iter;
5245         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5246         struct page *pages_def[PIPE_DEF_BUFFERS];
5247         struct splice_pipe_desc spd = {
5248                 .pages          = pages_def,
5249                 .partial        = partial_def,
5250                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5251                 .flags          = flags,
5252                 .ops            = &buffer_pipe_buf_ops,
5253                 .spd_release    = buffer_spd_release,
5254         };
5255         struct buffer_ref *ref;
5256         int entries, size, i;
5257         ssize_t ret;
5258
5259         mutex_lock(&trace_types_lock);
5260
5261 #ifdef CONFIG_TRACER_MAX_TRACE
5262         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5263                 ret = -EBUSY;
5264                 goto out;
5265         }
5266 #endif
5267
5268         if (splice_grow_spd(pipe, &spd)) {
5269                 ret = -ENOMEM;
5270                 goto out;
5271         }
5272
5273         if (*ppos & (PAGE_SIZE - 1)) {
5274                 ret = -EINVAL;
5275                 goto out;
5276         }
5277
5278         if (len & (PAGE_SIZE - 1)) {
5279                 if (len < PAGE_SIZE) {
5280                         ret = -EINVAL;
5281                         goto out;
5282                 }
5283                 len &= PAGE_MASK;
5284         }
5285
5286  again:
5287         trace_access_lock(iter->cpu_file);
5288         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5289
5290         for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
5291                 struct page *page;
5292                 int r;
5293
5294                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5295                 if (!ref)
5296                         break;
5297
5298                 ref->ref = 1;
5299                 ref->buffer = iter->trace_buffer->buffer;
5300                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5301                 if (!ref->page) {
5302                         kfree(ref);
5303                         break;
5304                 }
5305
5306                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5307                                           len, iter->cpu_file, 1);
5308                 if (r < 0) {
5309                         ring_buffer_free_read_page(ref->buffer, ref->page);
5310                         kfree(ref);
5311                         break;
5312                 }
5313
5314                 /*
5315                  * zero out any left over data, this is going to
5316                  * user land.
5317                  */
5318                 size = ring_buffer_page_len(ref->page);
5319                 if (size < PAGE_SIZE)
5320                         memset(ref->page + size, 0, PAGE_SIZE - size);
5321
5322                 page = virt_to_page(ref->page);
5323
5324                 spd.pages[i] = page;
5325                 spd.partial[i].len = PAGE_SIZE;
5326                 spd.partial[i].offset = 0;
5327                 spd.partial[i].private = (unsigned long)ref;
5328                 spd.nr_pages++;
5329                 *ppos += PAGE_SIZE;
5330
5331                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5332         }
5333
5334         trace_access_unlock(iter->cpu_file);
5335         spd.nr_pages = i;
5336
5337         /* did we read anything? */
5338         if (!spd.nr_pages) {
5339                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5340                         ret = -EAGAIN;
5341                         goto out;
5342                 }
5343                 mutex_unlock(&trace_types_lock);
5344                 ret = iter->trace->wait_pipe(iter);
5345                 mutex_lock(&trace_types_lock);
5346                 if (ret)
5347                         goto out;
5348                 if (signal_pending(current)) {
5349                         ret = -EINTR;
5350                         goto out;
5351                 }
5352                 goto again;
5353         }
5354
5355         ret = splice_to_pipe(pipe, &spd);
5356         splice_shrink_spd(&spd);
5357 out:
5358         mutex_unlock(&trace_types_lock);
5359
5360         return ret;
5361 }
5362
5363 static const struct file_operations tracing_buffers_fops = {
5364         .open           = tracing_buffers_open,
5365         .read           = tracing_buffers_read,
5366         .poll           = tracing_buffers_poll,
5367         .release        = tracing_buffers_release,
5368         .splice_read    = tracing_buffers_splice_read,
5369         .llseek         = no_llseek,
5370 };
5371
5372 static ssize_t
5373 tracing_stats_read(struct file *filp, char __user *ubuf,
5374                    size_t count, loff_t *ppos)
5375 {
5376         struct inode *inode = file_inode(filp);
5377         struct trace_array *tr = inode->i_private;
5378         struct trace_buffer *trace_buf = &tr->trace_buffer;
5379         int cpu = tracing_get_cpu(inode);
5380         struct trace_seq *s;
5381         unsigned long cnt;
5382         unsigned long long t;
5383         unsigned long usec_rem;
5384
5385         s = kmalloc(sizeof(*s), GFP_KERNEL);
5386         if (!s)
5387                 return -ENOMEM;
5388
5389         trace_seq_init(s);
5390
5391         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5392         trace_seq_printf(s, "entries: %ld\n", cnt);
5393
5394         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5395         trace_seq_printf(s, "overrun: %ld\n", cnt);
5396
5397         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5398         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5399
5400         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5401         trace_seq_printf(s, "bytes: %ld\n", cnt);
5402
5403         if (trace_clocks[tr->clock_id].in_ns) {
5404                 /* local or global for trace_clock */
5405                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5406                 usec_rem = do_div(t, USEC_PER_SEC);
5407                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5408                                                                 t, usec_rem);
5409
5410                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5411                 usec_rem = do_div(t, USEC_PER_SEC);
5412                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5413         } else {
5414                 /* counter or tsc mode for trace_clock */
5415                 trace_seq_printf(s, "oldest event ts: %llu\n",
5416                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5417
5418                 trace_seq_printf(s, "now ts: %llu\n",
5419                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5420         }
5421
5422         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5423         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5424
5425         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5426         trace_seq_printf(s, "read events: %ld\n", cnt);
5427
5428         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5429
5430         kfree(s);
5431
5432         return count;
5433 }
5434
5435 static const struct file_operations tracing_stats_fops = {
5436         .open           = tracing_open_generic_tr,
5437         .read           = tracing_stats_read,
5438         .llseek         = generic_file_llseek,
5439         .release        = tracing_release_generic_tr,
5440 };
5441
5442 #ifdef CONFIG_DYNAMIC_FTRACE
5443
5444 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5445 {
5446         return 0;
5447 }
5448
5449 static ssize_t
5450 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5451                   size_t cnt, loff_t *ppos)
5452 {
5453         static char ftrace_dyn_info_buffer[1024];
5454         static DEFINE_MUTEX(dyn_info_mutex);
5455         unsigned long *p = filp->private_data;
5456         char *buf = ftrace_dyn_info_buffer;
5457         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5458         int r;
5459
5460         mutex_lock(&dyn_info_mutex);
5461         r = sprintf(buf, "%ld ", *p);
5462
5463         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5464         buf[r++] = '\n';
5465
5466         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5467
5468         mutex_unlock(&dyn_info_mutex);
5469
5470         return r;
5471 }
5472
5473 static const struct file_operations tracing_dyn_info_fops = {
5474         .open           = tracing_open_generic,
5475         .read           = tracing_read_dyn_info,
5476         .llseek         = generic_file_llseek,
5477 };
5478 #endif /* CONFIG_DYNAMIC_FTRACE */
5479
5480 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5481 static void
5482 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5483 {
5484         tracing_snapshot();
5485 }
5486
5487 static void
5488 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5489 {
5490         unsigned long *count = (long *)data;
5491
5492         if (!*count)
5493                 return;
5494
5495         if (*count != -1)
5496                 (*count)--;
5497
5498         tracing_snapshot();
5499 }
5500
5501 static int
5502 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5503                       struct ftrace_probe_ops *ops, void *data)
5504 {
5505         long count = (long)data;
5506
5507         seq_printf(m, "%ps:", (void *)ip);
5508
5509         seq_printf(m, "snapshot");
5510
5511         if (count == -1)
5512                 seq_printf(m, ":unlimited\n");
5513         else
5514                 seq_printf(m, ":count=%ld\n", count);
5515
5516         return 0;
5517 }
5518
5519 static struct ftrace_probe_ops snapshot_probe_ops = {
5520         .func                   = ftrace_snapshot,
5521         .print                  = ftrace_snapshot_print,
5522 };
5523
5524 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5525         .func                   = ftrace_count_snapshot,
5526         .print                  = ftrace_snapshot_print,
5527 };
5528
5529 static int
5530 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5531                                char *glob, char *cmd, char *param, int enable)
5532 {
5533         struct ftrace_probe_ops *ops;
5534         void *count = (void *)-1;
5535         char *number;
5536         int ret;
5537
5538         /* hash funcs only work with set_ftrace_filter */
5539         if (!enable)
5540                 return -EINVAL;
5541
5542         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5543
5544         if (glob[0] == '!') {
5545                 unregister_ftrace_function_probe_func(glob+1, ops);
5546                 return 0;
5547         }
5548
5549         if (!param)
5550                 goto out_reg;
5551
5552         number = strsep(&param, ":");
5553
5554         if (!strlen(number))
5555                 goto out_reg;
5556
5557         /*
5558          * We use the callback data field (which is a pointer)
5559          * as our counter.
5560          */
5561         ret = kstrtoul(number, 0, (unsigned long *)&count);
5562         if (ret)
5563                 return ret;
5564
5565  out_reg:
5566         ret = register_ftrace_function_probe(glob, ops, count);
5567
5568         if (ret >= 0)
5569                 alloc_snapshot(&global_trace);
5570
5571         return ret < 0 ? ret : 0;
5572 }
5573
5574 static struct ftrace_func_command ftrace_snapshot_cmd = {
5575         .name                   = "snapshot",
5576         .func                   = ftrace_trace_snapshot_callback,
5577 };
5578
5579 static __init int register_snapshot_cmd(void)
5580 {
5581         return register_ftrace_command(&ftrace_snapshot_cmd);
5582 }
5583 #else
5584 static inline __init int register_snapshot_cmd(void) { return 0; }
5585 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5586
5587 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5588 {
5589         if (tr->dir)
5590                 return tr->dir;
5591
5592         if (!debugfs_initialized())
5593                 return NULL;
5594
5595         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5596                 tr->dir = debugfs_create_dir("tracing", NULL);
5597
5598         if (!tr->dir)
5599                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5600
5601         return tr->dir;
5602 }
5603
5604 struct dentry *tracing_init_dentry(void)
5605 {
5606         return tracing_init_dentry_tr(&global_trace);
5607 }
5608
5609 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5610 {
5611         struct dentry *d_tracer;
5612
5613         if (tr->percpu_dir)
5614                 return tr->percpu_dir;
5615
5616         d_tracer = tracing_init_dentry_tr(tr);
5617         if (!d_tracer)
5618                 return NULL;
5619
5620         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5621
5622         WARN_ONCE(!tr->percpu_dir,
5623                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5624
5625         return tr->percpu_dir;
5626 }
5627
5628 static struct dentry *
5629 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5630                       void *data, long cpu, const struct file_operations *fops)
5631 {
5632         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5633
5634         if (ret) /* See tracing_get_cpu() */
5635                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5636         return ret;
5637 }
5638
5639 static void
5640 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5641 {
5642         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5643         struct dentry *d_cpu;
5644         char cpu_dir[30]; /* 30 characters should be more than enough */
5645
5646         if (!d_percpu)
5647                 return;
5648
5649         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5650         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5651         if (!d_cpu) {
5652                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5653                 return;
5654         }
5655
5656         /* per cpu trace_pipe */
5657         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5658                                 tr, cpu, &tracing_pipe_fops);
5659
5660         /* per cpu trace */
5661         trace_create_cpu_file("trace", 0644, d_cpu,
5662                                 tr, cpu, &tracing_fops);
5663
5664         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5665                                 tr, cpu, &tracing_buffers_fops);
5666
5667         trace_create_cpu_file("stats", 0444, d_cpu,
5668                                 tr, cpu, &tracing_stats_fops);
5669
5670         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5671                                 tr, cpu, &tracing_entries_fops);
5672
5673 #ifdef CONFIG_TRACER_SNAPSHOT
5674         trace_create_cpu_file("snapshot", 0644, d_cpu,
5675                                 tr, cpu, &snapshot_fops);
5676
5677         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5678                                 tr, cpu, &snapshot_raw_fops);
5679 #endif
5680 }
5681
5682 #ifdef CONFIG_FTRACE_SELFTEST
5683 /* Let selftest have access to static functions in this file */
5684 #include "trace_selftest.c"
5685 #endif
5686
5687 struct trace_option_dentry {
5688         struct tracer_opt               *opt;
5689         struct tracer_flags             *flags;
5690         struct trace_array              *tr;
5691         struct dentry                   *entry;
5692 };
5693
5694 static ssize_t
5695 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5696                         loff_t *ppos)
5697 {
5698         struct trace_option_dentry *topt = filp->private_data;
5699         char *buf;
5700
5701         if (topt->flags->val & topt->opt->bit)
5702                 buf = "1\n";
5703         else
5704                 buf = "0\n";
5705
5706         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5707 }
5708
5709 static ssize_t
5710 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5711                          loff_t *ppos)
5712 {
5713         struct trace_option_dentry *topt = filp->private_data;
5714         unsigned long val;
5715         int ret;
5716
5717         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5718         if (ret)
5719                 return ret;
5720
5721         if (val != 0 && val != 1)
5722                 return -EINVAL;
5723
5724         if (!!(topt->flags->val & topt->opt->bit) != val) {
5725                 mutex_lock(&trace_types_lock);
5726                 ret = __set_tracer_option(topt->tr->current_trace, topt->flags,
5727                                           topt->opt, !val);
5728                 mutex_unlock(&trace_types_lock);
5729                 if (ret)
5730                         return ret;
5731         }
5732
5733         *ppos += cnt;
5734
5735         return cnt;
5736 }
5737
5738
5739 static const struct file_operations trace_options_fops = {
5740         .open = tracing_open_generic,
5741         .read = trace_options_read,
5742         .write = trace_options_write,
5743         .llseek = generic_file_llseek,
5744 };
5745
5746 static ssize_t
5747 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5748                         loff_t *ppos)
5749 {
5750         long index = (long)filp->private_data;
5751         char *buf;
5752
5753         if (trace_flags & (1 << index))
5754                 buf = "1\n";
5755         else
5756                 buf = "0\n";
5757
5758         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5759 }
5760
5761 static ssize_t
5762 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5763                          loff_t *ppos)
5764 {
5765         struct trace_array *tr = &global_trace;
5766         long index = (long)filp->private_data;
5767         unsigned long val;
5768         int ret;
5769
5770         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5771         if (ret)
5772                 return ret;
5773
5774         if (val != 0 && val != 1)
5775                 return -EINVAL;
5776
5777         mutex_lock(&trace_types_lock);
5778         ret = set_tracer_flag(tr, 1 << index, val);
5779         mutex_unlock(&trace_types_lock);
5780
5781         if (ret < 0)
5782                 return ret;
5783
5784         *ppos += cnt;
5785
5786         return cnt;
5787 }
5788
5789 static const struct file_operations trace_options_core_fops = {
5790         .open = tracing_open_generic,
5791         .read = trace_options_core_read,
5792         .write = trace_options_core_write,
5793         .llseek = generic_file_llseek,
5794 };
5795
5796 struct dentry *trace_create_file(const char *name,
5797                                  umode_t mode,
5798                                  struct dentry *parent,
5799                                  void *data,
5800                                  const struct file_operations *fops)
5801 {
5802         struct dentry *ret;
5803
5804         ret = debugfs_create_file(name, mode, parent, data, fops);
5805         if (!ret)
5806                 pr_warning("Could not create debugfs '%s' entry\n", name);
5807
5808         return ret;
5809 }
5810
5811
5812 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
5813 {
5814         struct dentry *d_tracer;
5815
5816         if (tr->options)
5817                 return tr->options;
5818
5819         d_tracer = tracing_init_dentry_tr(tr);
5820         if (!d_tracer)
5821                 return NULL;
5822
5823         tr->options = debugfs_create_dir("options", d_tracer);
5824         if (!tr->options) {
5825                 pr_warning("Could not create debugfs directory 'options'\n");
5826                 return NULL;
5827         }
5828
5829         return tr->options;
5830 }
5831
5832 static void
5833 create_trace_option_file(struct trace_array *tr,
5834                          struct trace_option_dentry *topt,
5835                          struct tracer_flags *flags,
5836                          struct tracer_opt *opt)
5837 {
5838         struct dentry *t_options;
5839
5840         t_options = trace_options_init_dentry(tr);
5841         if (!t_options)
5842                 return;
5843
5844         topt->flags = flags;
5845         topt->opt = opt;
5846         topt->tr = tr;
5847
5848         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
5849                                     &trace_options_fops);
5850
5851 }
5852
5853 static struct trace_option_dentry *
5854 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
5855 {
5856         struct trace_option_dentry *topts;
5857         struct tracer_flags *flags;
5858         struct tracer_opt *opts;
5859         int cnt;
5860
5861         if (!tracer)
5862                 return NULL;
5863
5864         flags = tracer->flags;
5865
5866         if (!flags || !flags->opts)
5867                 return NULL;
5868
5869         opts = flags->opts;
5870
5871         for (cnt = 0; opts[cnt].name; cnt++)
5872                 ;
5873
5874         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
5875         if (!topts)
5876                 return NULL;
5877
5878         for (cnt = 0; opts[cnt].name; cnt++)
5879                 create_trace_option_file(tr, &topts[cnt], flags,
5880                                          &opts[cnt]);
5881
5882         return topts;
5883 }
5884
5885 static void
5886 destroy_trace_option_files(struct trace_option_dentry *topts)
5887 {
5888         int cnt;
5889
5890         if (!topts)
5891                 return;
5892
5893         for (cnt = 0; topts[cnt].opt; cnt++) {
5894                 if (topts[cnt].entry)
5895                         debugfs_remove(topts[cnt].entry);
5896         }
5897
5898         kfree(topts);
5899 }
5900
5901 static struct dentry *
5902 create_trace_option_core_file(struct trace_array *tr,
5903                               const char *option, long index)
5904 {
5905         struct dentry *t_options;
5906
5907         t_options = trace_options_init_dentry(tr);
5908         if (!t_options)
5909                 return NULL;
5910
5911         return trace_create_file(option, 0644, t_options, (void *)index,
5912                                     &trace_options_core_fops);
5913 }
5914
5915 static __init void create_trace_options_dir(struct trace_array *tr)
5916 {
5917         struct dentry *t_options;
5918         int i;
5919
5920         t_options = trace_options_init_dentry(tr);
5921         if (!t_options)
5922                 return;
5923
5924         for (i = 0; trace_options[i]; i++)
5925                 create_trace_option_core_file(tr, trace_options[i], i);
5926 }
5927
5928 static ssize_t
5929 rb_simple_read(struct file *filp, char __user *ubuf,
5930                size_t cnt, loff_t *ppos)
5931 {
5932         struct trace_array *tr = filp->private_data;
5933         char buf[64];
5934         int r;
5935
5936         r = tracer_tracing_is_on(tr);
5937         r = sprintf(buf, "%d\n", r);
5938
5939         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5940 }
5941
5942 static ssize_t
5943 rb_simple_write(struct file *filp, const char __user *ubuf,
5944                 size_t cnt, loff_t *ppos)
5945 {
5946         struct trace_array *tr = filp->private_data;
5947         struct ring_buffer *buffer = tr->trace_buffer.buffer;
5948         unsigned long val;
5949         int ret;
5950
5951         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5952         if (ret)
5953                 return ret;
5954
5955         if (buffer) {
5956                 mutex_lock(&trace_types_lock);
5957                 if (val) {
5958                         tracer_tracing_on(tr);
5959                         if (tr->current_trace->start)
5960                                 tr->current_trace->start(tr);
5961                 } else {
5962                         tracer_tracing_off(tr);
5963                         if (tr->current_trace->stop)
5964                                 tr->current_trace->stop(tr);
5965                 }
5966                 mutex_unlock(&trace_types_lock);
5967         }
5968
5969         (*ppos)++;
5970
5971         return cnt;
5972 }
5973
5974 static const struct file_operations rb_simple_fops = {
5975         .open           = tracing_open_generic_tr,
5976         .read           = rb_simple_read,
5977         .write          = rb_simple_write,
5978         .release        = tracing_release_generic_tr,
5979         .llseek         = default_llseek,
5980 };
5981
5982 struct dentry *trace_instance_dir;
5983
5984 static void
5985 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
5986
5987 static int
5988 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
5989 {
5990         enum ring_buffer_flags rb_flags;
5991
5992         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
5993
5994         buf->tr = tr;
5995
5996         buf->buffer = ring_buffer_alloc(size, rb_flags);
5997         if (!buf->buffer)
5998                 return -ENOMEM;
5999
6000         buf->data = alloc_percpu(struct trace_array_cpu);
6001         if (!buf->data) {
6002                 ring_buffer_free(buf->buffer);
6003                 return -ENOMEM;
6004         }
6005
6006         /* Allocate the first page for all buffers */
6007         set_buffer_entries(&tr->trace_buffer,
6008                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6009
6010         return 0;
6011 }
6012
6013 static int allocate_trace_buffers(struct trace_array *tr, int size)
6014 {
6015         int ret;
6016
6017         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6018         if (ret)
6019                 return ret;
6020
6021 #ifdef CONFIG_TRACER_MAX_TRACE
6022         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6023                                     allocate_snapshot ? size : 1);
6024         if (WARN_ON(ret)) {
6025                 ring_buffer_free(tr->trace_buffer.buffer);
6026                 free_percpu(tr->trace_buffer.data);
6027                 return -ENOMEM;
6028         }
6029         tr->allocated_snapshot = allocate_snapshot;
6030
6031         /*
6032          * Only the top level trace array gets its snapshot allocated
6033          * from the kernel command line.
6034          */
6035         allocate_snapshot = false;
6036 #endif
6037         return 0;
6038 }
6039
6040 static int new_instance_create(const char *name)
6041 {
6042         struct trace_array *tr;
6043         int ret;
6044
6045         mutex_lock(&trace_types_lock);
6046
6047         ret = -EEXIST;
6048         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6049                 if (tr->name && strcmp(tr->name, name) == 0)
6050                         goto out_unlock;
6051         }
6052
6053         ret = -ENOMEM;
6054         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6055         if (!tr)
6056                 goto out_unlock;
6057
6058         tr->name = kstrdup(name, GFP_KERNEL);
6059         if (!tr->name)
6060                 goto out_free_tr;
6061
6062         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6063                 goto out_free_tr;
6064
6065         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6066
6067         raw_spin_lock_init(&tr->start_lock);
6068
6069         tr->current_trace = &nop_trace;
6070
6071         INIT_LIST_HEAD(&tr->systems);
6072         INIT_LIST_HEAD(&tr->events);
6073
6074         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6075                 goto out_free_tr;
6076
6077         tr->dir = debugfs_create_dir(name, trace_instance_dir);
6078         if (!tr->dir)
6079                 goto out_free_tr;
6080
6081         ret = event_trace_add_tracer(tr->dir, tr);
6082         if (ret) {
6083                 debugfs_remove_recursive(tr->dir);
6084                 goto out_free_tr;
6085         }
6086
6087         init_tracer_debugfs(tr, tr->dir);
6088
6089         list_add(&tr->list, &ftrace_trace_arrays);
6090
6091         mutex_unlock(&trace_types_lock);
6092
6093         return 0;
6094
6095  out_free_tr:
6096         if (tr->trace_buffer.buffer)
6097                 ring_buffer_free(tr->trace_buffer.buffer);
6098         free_cpumask_var(tr->tracing_cpumask);
6099         kfree(tr->name);
6100         kfree(tr);
6101
6102  out_unlock:
6103         mutex_unlock(&trace_types_lock);
6104
6105         return ret;
6106
6107 }
6108
6109 static int instance_delete(const char *name)
6110 {
6111         struct trace_array *tr;
6112         int found = 0;
6113         int ret;
6114
6115         mutex_lock(&trace_types_lock);
6116
6117         ret = -ENODEV;
6118         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6119                 if (tr->name && strcmp(tr->name, name) == 0) {
6120                         found = 1;
6121                         break;
6122                 }
6123         }
6124         if (!found)
6125                 goto out_unlock;
6126
6127         ret = -EBUSY;
6128         if (tr->ref)
6129                 goto out_unlock;
6130
6131         list_del(&tr->list);
6132
6133         event_trace_del_tracer(tr);
6134         debugfs_remove_recursive(tr->dir);
6135         free_percpu(tr->trace_buffer.data);
6136         ring_buffer_free(tr->trace_buffer.buffer);
6137
6138         kfree(tr->name);
6139         kfree(tr);
6140
6141         ret = 0;
6142
6143  out_unlock:
6144         mutex_unlock(&trace_types_lock);
6145
6146         return ret;
6147 }
6148
6149 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6150 {
6151         struct dentry *parent;
6152         int ret;
6153
6154         /* Paranoid: Make sure the parent is the "instances" directory */
6155         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6156         if (WARN_ON_ONCE(parent != trace_instance_dir))
6157                 return -ENOENT;
6158
6159         /*
6160          * The inode mutex is locked, but debugfs_create_dir() will also
6161          * take the mutex. As the instances directory can not be destroyed
6162          * or changed in any other way, it is safe to unlock it, and
6163          * let the dentry try. If two users try to make the same dir at
6164          * the same time, then the new_instance_create() will determine the
6165          * winner.
6166          */
6167         mutex_unlock(&inode->i_mutex);
6168
6169         ret = new_instance_create(dentry->d_iname);
6170
6171         mutex_lock(&inode->i_mutex);
6172
6173         return ret;
6174 }
6175
6176 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6177 {
6178         struct dentry *parent;
6179         int ret;
6180
6181         /* Paranoid: Make sure the parent is the "instances" directory */
6182         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6183         if (WARN_ON_ONCE(parent != trace_instance_dir))
6184                 return -ENOENT;
6185
6186         /* The caller did a dget() on dentry */
6187         mutex_unlock(&dentry->d_inode->i_mutex);
6188
6189         /*
6190          * The inode mutex is locked, but debugfs_create_dir() will also
6191          * take the mutex. As the instances directory can not be destroyed
6192          * or changed in any other way, it is safe to unlock it, and
6193          * let the dentry try. If two users try to make the same dir at
6194          * the same time, then the instance_delete() will determine the
6195          * winner.
6196          */
6197         mutex_unlock(&inode->i_mutex);
6198
6199         ret = instance_delete(dentry->d_iname);
6200
6201         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6202         mutex_lock(&dentry->d_inode->i_mutex);
6203
6204         return ret;
6205 }
6206
6207 static const struct inode_operations instance_dir_inode_operations = {
6208         .lookup         = simple_lookup,
6209         .mkdir          = instance_mkdir,
6210         .rmdir          = instance_rmdir,
6211 };
6212
6213 static __init void create_trace_instances(struct dentry *d_tracer)
6214 {
6215         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6216         if (WARN_ON(!trace_instance_dir))
6217                 return;
6218
6219         /* Hijack the dir inode operations, to allow mkdir */
6220         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6221 }
6222
6223 static void
6224 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6225 {
6226         int cpu;
6227
6228         trace_create_file("tracing_cpumask", 0644, d_tracer,
6229                           tr, &tracing_cpumask_fops);
6230
6231         trace_create_file("trace_options", 0644, d_tracer,
6232                           tr, &tracing_iter_fops);
6233
6234         trace_create_file("trace", 0644, d_tracer,
6235                           tr, &tracing_fops);
6236
6237         trace_create_file("trace_pipe", 0444, d_tracer,
6238                           tr, &tracing_pipe_fops);
6239
6240         trace_create_file("buffer_size_kb", 0644, d_tracer,
6241                           tr, &tracing_entries_fops);
6242
6243         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6244                           tr, &tracing_total_entries_fops);
6245
6246         trace_create_file("free_buffer", 0200, d_tracer,
6247                           tr, &tracing_free_buffer_fops);
6248
6249         trace_create_file("trace_marker", 0220, d_tracer,
6250                           tr, &tracing_mark_fops);
6251
6252         trace_create_file("trace_clock", 0644, d_tracer, tr,
6253                           &trace_clock_fops);
6254
6255         trace_create_file("tracing_on", 0644, d_tracer,
6256                           tr, &rb_simple_fops);
6257
6258 #ifdef CONFIG_TRACER_SNAPSHOT
6259         trace_create_file("snapshot", 0644, d_tracer,
6260                           tr, &snapshot_fops);
6261 #endif
6262
6263         for_each_tracing_cpu(cpu)
6264                 tracing_init_debugfs_percpu(tr, cpu);
6265
6266 }
6267
6268 static __init int tracer_init_debugfs(void)
6269 {
6270         struct dentry *d_tracer;
6271
6272         trace_access_lock_init();
6273
6274         d_tracer = tracing_init_dentry();
6275         if (!d_tracer)
6276                 return 0;
6277
6278         init_tracer_debugfs(&global_trace, d_tracer);
6279
6280         trace_create_file("available_tracers", 0444, d_tracer,
6281                         &global_trace, &show_traces_fops);
6282
6283         trace_create_file("current_tracer", 0644, d_tracer,
6284                         &global_trace, &set_tracer_fops);
6285
6286 #ifdef CONFIG_TRACER_MAX_TRACE
6287         trace_create_file("tracing_max_latency", 0644, d_tracer,
6288                         &tracing_max_latency, &tracing_max_lat_fops);
6289 #endif
6290
6291         trace_create_file("tracing_thresh", 0644, d_tracer,
6292                         &tracing_thresh, &tracing_max_lat_fops);
6293
6294         trace_create_file("README", 0444, d_tracer,
6295                         NULL, &tracing_readme_fops);
6296
6297         trace_create_file("saved_cmdlines", 0444, d_tracer,
6298                         NULL, &tracing_saved_cmdlines_fops);
6299
6300 #ifdef CONFIG_DYNAMIC_FTRACE
6301         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6302                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6303 #endif
6304
6305         create_trace_instances(d_tracer);
6306
6307         create_trace_options_dir(&global_trace);
6308
6309         return 0;
6310 }
6311
6312 static int trace_panic_handler(struct notifier_block *this,
6313                                unsigned long event, void *unused)
6314 {
6315         if (ftrace_dump_on_oops)
6316                 ftrace_dump(ftrace_dump_on_oops);
6317         return NOTIFY_OK;
6318 }
6319
6320 static struct notifier_block trace_panic_notifier = {
6321         .notifier_call  = trace_panic_handler,
6322         .next           = NULL,
6323         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6324 };
6325
6326 static int trace_die_handler(struct notifier_block *self,
6327                              unsigned long val,
6328                              void *data)
6329 {
6330         switch (val) {
6331         case DIE_OOPS:
6332                 if (ftrace_dump_on_oops)
6333                         ftrace_dump(ftrace_dump_on_oops);
6334                 break;
6335         default:
6336                 break;
6337         }
6338         return NOTIFY_OK;
6339 }
6340
6341 static struct notifier_block trace_die_notifier = {
6342         .notifier_call = trace_die_handler,
6343         .priority = 200
6344 };
6345
6346 /*
6347  * printk is set to max of 1024, we really don't need it that big.
6348  * Nothing should be printing 1000 characters anyway.
6349  */
6350 #define TRACE_MAX_PRINT         1000
6351
6352 /*
6353  * Define here KERN_TRACE so that we have one place to modify
6354  * it if we decide to change what log level the ftrace dump
6355  * should be at.
6356  */
6357 #define KERN_TRACE              KERN_EMERG
6358
6359 void
6360 trace_printk_seq(struct trace_seq *s)
6361 {
6362         /* Probably should print a warning here. */
6363         if (s->len >= TRACE_MAX_PRINT)
6364                 s->len = TRACE_MAX_PRINT;
6365
6366         /* should be zero ended, but we are paranoid. */
6367         s->buffer[s->len] = 0;
6368
6369         printk(KERN_TRACE "%s", s->buffer);
6370
6371         trace_seq_init(s);
6372 }
6373
6374 void trace_init_global_iter(struct trace_iterator *iter)
6375 {
6376         iter->tr = &global_trace;
6377         iter->trace = iter->tr->current_trace;
6378         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6379         iter->trace_buffer = &global_trace.trace_buffer;
6380
6381         if (iter->trace && iter->trace->open)
6382                 iter->trace->open(iter);
6383
6384         /* Annotate start of buffers if we had overruns */
6385         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6386                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6387
6388         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6389         if (trace_clocks[iter->tr->clock_id].in_ns)
6390                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6391 }
6392
6393 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6394 {
6395         /* use static because iter can be a bit big for the stack */
6396         static struct trace_iterator iter;
6397         static atomic_t dump_running;
6398         unsigned int old_userobj;
6399         unsigned long flags;
6400         int cnt = 0, cpu;
6401
6402         /* Only allow one dump user at a time. */
6403         if (atomic_inc_return(&dump_running) != 1) {
6404                 atomic_dec(&dump_running);
6405                 return;
6406         }
6407
6408         /*
6409          * Always turn off tracing when we dump.
6410          * We don't need to show trace output of what happens
6411          * between multiple crashes.
6412          *
6413          * If the user does a sysrq-z, then they can re-enable
6414          * tracing with echo 1 > tracing_on.
6415          */
6416         tracing_off();
6417
6418         local_irq_save(flags);
6419
6420         /* Simulate the iterator */
6421         trace_init_global_iter(&iter);
6422
6423         for_each_tracing_cpu(cpu) {
6424                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6425         }
6426
6427         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6428
6429         /* don't look at user memory in panic mode */
6430         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6431
6432         switch (oops_dump_mode) {
6433         case DUMP_ALL:
6434                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6435                 break;
6436         case DUMP_ORIG:
6437                 iter.cpu_file = raw_smp_processor_id();
6438                 break;
6439         case DUMP_NONE:
6440                 goto out_enable;
6441         default:
6442                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6443                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6444         }
6445
6446         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6447
6448         /* Did function tracer already get disabled? */
6449         if (ftrace_is_dead()) {
6450                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6451                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6452         }
6453
6454         /*
6455          * We need to stop all tracing on all CPUS to read the
6456          * the next buffer. This is a bit expensive, but is
6457          * not done often. We fill all what we can read,
6458          * and then release the locks again.
6459          */
6460
6461         while (!trace_empty(&iter)) {
6462
6463                 if (!cnt)
6464                         printk(KERN_TRACE "---------------------------------\n");
6465
6466                 cnt++;
6467
6468                 /* reset all but tr, trace, and overruns */
6469                 memset(&iter.seq, 0,
6470                        sizeof(struct trace_iterator) -
6471                        offsetof(struct trace_iterator, seq));
6472                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6473                 iter.pos = -1;
6474
6475                 if (trace_find_next_entry_inc(&iter) != NULL) {
6476                         int ret;
6477
6478                         ret = print_trace_line(&iter);
6479                         if (ret != TRACE_TYPE_NO_CONSUME)
6480                                 trace_consume(&iter);
6481                 }
6482                 touch_nmi_watchdog();
6483
6484                 trace_printk_seq(&iter.seq);
6485         }
6486
6487         if (!cnt)
6488                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6489         else
6490                 printk(KERN_TRACE "---------------------------------\n");
6491
6492  out_enable:
6493         trace_flags |= old_userobj;
6494
6495         for_each_tracing_cpu(cpu) {
6496                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6497         }
6498         atomic_dec(&dump_running);
6499         local_irq_restore(flags);
6500 }
6501 EXPORT_SYMBOL_GPL(ftrace_dump);
6502
6503 __init static int tracer_alloc_buffers(void)
6504 {
6505         int ring_buf_size;
6506         int ret = -ENOMEM;
6507
6508
6509         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6510                 goto out;
6511
6512         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6513                 goto out_free_buffer_mask;
6514
6515         /* Only allocate trace_printk buffers if a trace_printk exists */
6516         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6517                 /* Must be called before global_trace.buffer is allocated */
6518                 trace_printk_init_buffers();
6519
6520         /* To save memory, keep the ring buffer size to its minimum */
6521         if (ring_buffer_expanded)
6522                 ring_buf_size = trace_buf_size;
6523         else
6524                 ring_buf_size = 1;
6525
6526         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6527         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6528
6529         raw_spin_lock_init(&global_trace.start_lock);
6530
6531         /* Used for event triggers */
6532         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
6533         if (!temp_buffer)
6534                 goto out_free_cpumask;
6535
6536         /* TODO: make the number of buffers hot pluggable with CPUS */
6537         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6538                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6539                 WARN_ON(1);
6540                 goto out_free_temp_buffer;
6541         }
6542
6543         if (global_trace.buffer_disabled)
6544                 tracing_off();
6545
6546         trace_init_cmdlines();
6547
6548         /*
6549          * register_tracer() might reference current_trace, so it
6550          * needs to be set before we register anything. This is
6551          * just a bootstrap of current_trace anyway.
6552          */
6553         global_trace.current_trace = &nop_trace;
6554
6555         register_tracer(&nop_trace);
6556
6557         /* All seems OK, enable tracing */
6558         tracing_disabled = 0;
6559
6560         atomic_notifier_chain_register(&panic_notifier_list,
6561                                        &trace_panic_notifier);
6562
6563         register_die_notifier(&trace_die_notifier);
6564
6565         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6566
6567         INIT_LIST_HEAD(&global_trace.systems);
6568         INIT_LIST_HEAD(&global_trace.events);
6569         list_add(&global_trace.list, &ftrace_trace_arrays);
6570
6571         while (trace_boot_options) {
6572                 char *option;
6573
6574                 option = strsep(&trace_boot_options, ",");
6575                 trace_set_options(&global_trace, option);
6576         }
6577
6578         register_snapshot_cmd();
6579
6580         return 0;
6581
6582 out_free_temp_buffer:
6583         ring_buffer_free(temp_buffer);
6584 out_free_cpumask:
6585         free_percpu(global_trace.trace_buffer.data);
6586 #ifdef CONFIG_TRACER_MAX_TRACE
6587         free_percpu(global_trace.max_buffer.data);
6588 #endif
6589         free_cpumask_var(global_trace.tracing_cpumask);
6590 out_free_buffer_mask:
6591         free_cpumask_var(tracing_buffer_mask);
6592 out:
6593         return ret;
6594 }
6595
6596 __init static int clear_boot_tracer(void)
6597 {
6598         /*
6599          * The default tracer at boot buffer is an init section.
6600          * This function is called in lateinit. If we did not
6601          * find the boot tracer, then clear it out, to prevent
6602          * later registration from accessing the buffer that is
6603          * about to be freed.
6604          */
6605         if (!default_bootup_tracer)
6606                 return 0;
6607
6608         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6609                default_bootup_tracer);
6610         default_bootup_tracer = NULL;
6611
6612         return 0;
6613 }
6614
6615 early_initcall(tracer_alloc_buffers);
6616 fs_initcall(tracer_init_debugfs);
6617 late_initcall(clear_boot_tracer);