tracing: Fix wraparound problems in "uptime" trace clock
[platform/adaptation/renesas_rcar/renesas_kernel.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int dummy_set_flag(u32 old_flags, u32 bit, int set)
77 {
78         return 0;
79 }
80
81 /*
82  * To prevent the comm cache from being overwritten when no
83  * tracing is active, only save the comm when a trace event
84  * occurred.
85  */
86 static DEFINE_PER_CPU(bool, trace_cmdline_save);
87
88 /*
89  * Kill all tracing for good (never come back).
90  * It is initialized to 1 but will turn to zero if the initialization
91  * of the tracer is successful. But that is the only place that sets
92  * this back to zero.
93  */
94 static int tracing_disabled = 1;
95
96 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 static int tracing_set_tracer(const char *buf);
122
123 #define MAX_TRACER_SIZE         100
124 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
125 static char *default_bootup_tracer;
126
127 static bool allocate_snapshot;
128
129 static int __init set_cmdline_ftrace(char *str)
130 {
131         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
132         default_bootup_tracer = bootup_tracer_buf;
133         /* We are using ftrace early, expand it */
134         ring_buffer_expanded = true;
135         return 1;
136 }
137 __setup("ftrace=", set_cmdline_ftrace);
138
139 static int __init set_ftrace_dump_on_oops(char *str)
140 {
141         if (*str++ != '=' || !*str) {
142                 ftrace_dump_on_oops = DUMP_ALL;
143                 return 1;
144         }
145
146         if (!strcmp("orig_cpu", str)) {
147                 ftrace_dump_on_oops = DUMP_ORIG;
148                 return 1;
149         }
150
151         return 0;
152 }
153 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
154
155 static int __init stop_trace_on_warning(char *str)
156 {
157         __disable_trace_on_warning = 1;
158         return 1;
159 }
160 __setup("traceoff_on_warning=", stop_trace_on_warning);
161
162 static int __init boot_alloc_snapshot(char *str)
163 {
164         allocate_snapshot = true;
165         /* We also need the main ring buffer expanded */
166         ring_buffer_expanded = true;
167         return 1;
168 }
169 __setup("alloc_snapshot", boot_alloc_snapshot);
170
171
172 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
173 static char *trace_boot_options __initdata;
174
175 static int __init set_trace_boot_options(char *str)
176 {
177         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
178         trace_boot_options = trace_boot_options_buf;
179         return 0;
180 }
181 __setup("trace_options=", set_trace_boot_options);
182
183
184 unsigned long long ns2usecs(cycle_t nsec)
185 {
186         nsec += 500;
187         do_div(nsec, 1000);
188         return nsec;
189 }
190
191 /*
192  * The global_trace is the descriptor that holds the tracing
193  * buffers for the live tracing. For each CPU, it contains
194  * a link list of pages that will store trace entries. The
195  * page descriptor of the pages in the memory is used to hold
196  * the link list by linking the lru item in the page descriptor
197  * to each of the pages in the buffer per CPU.
198  *
199  * For each active CPU there is a data field that holds the
200  * pages for the buffer for that CPU. Each CPU has the same number
201  * of pages allocated for its buffer.
202  */
203 static struct trace_array       global_trace;
204
205 LIST_HEAD(ftrace_trace_arrays);
206
207 int trace_array_get(struct trace_array *this_tr)
208 {
209         struct trace_array *tr;
210         int ret = -ENODEV;
211
212         mutex_lock(&trace_types_lock);
213         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
214                 if (tr == this_tr) {
215                         tr->ref++;
216                         ret = 0;
217                         break;
218                 }
219         }
220         mutex_unlock(&trace_types_lock);
221
222         return ret;
223 }
224
225 static void __trace_array_put(struct trace_array *this_tr)
226 {
227         WARN_ON(!this_tr->ref);
228         this_tr->ref--;
229 }
230
231 void trace_array_put(struct trace_array *this_tr)
232 {
233         mutex_lock(&trace_types_lock);
234         __trace_array_put(this_tr);
235         mutex_unlock(&trace_types_lock);
236 }
237
238 int filter_check_discard(struct ftrace_event_file *file, void *rec,
239                          struct ring_buffer *buffer,
240                          struct ring_buffer_event *event)
241 {
242         if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
243             !filter_match_preds(file->filter, rec)) {
244                 ring_buffer_discard_commit(buffer, event);
245                 return 1;
246         }
247
248         return 0;
249 }
250 EXPORT_SYMBOL_GPL(filter_check_discard);
251
252 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
253                               struct ring_buffer *buffer,
254                               struct ring_buffer_event *event)
255 {
256         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
257             !filter_match_preds(call->filter, rec)) {
258                 ring_buffer_discard_commit(buffer, event);
259                 return 1;
260         }
261
262         return 0;
263 }
264 EXPORT_SYMBOL_GPL(call_filter_check_discard);
265
266 cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
267 {
268         u64 ts;
269
270         /* Early boot up does not have a buffer yet */
271         if (!buf->buffer)
272                 return trace_clock_local();
273
274         ts = ring_buffer_time_stamp(buf->buffer, cpu);
275         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
276
277         return ts;
278 }
279
280 cycle_t ftrace_now(int cpu)
281 {
282         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
283 }
284
285 /**
286  * tracing_is_enabled - Show if global_trace has been disabled
287  *
288  * Shows if the global trace has been enabled or not. It uses the
289  * mirror flag "buffer_disabled" to be used in fast paths such as for
290  * the irqsoff tracer. But it may be inaccurate due to races. If you
291  * need to know the accurate state, use tracing_is_on() which is a little
292  * slower, but accurate.
293  */
294 int tracing_is_enabled(void)
295 {
296         /*
297          * For quick access (irqsoff uses this in fast path), just
298          * return the mirror variable of the state of the ring buffer.
299          * It's a little racy, but we don't really care.
300          */
301         smp_rmb();
302         return !global_trace.buffer_disabled;
303 }
304
305 /*
306  * trace_buf_size is the size in bytes that is allocated
307  * for a buffer. Note, the number of bytes is always rounded
308  * to page size.
309  *
310  * This number is purposely set to a low number of 16384.
311  * If the dump on oops happens, it will be much appreciated
312  * to not have to wait for all that output. Anyway this can be
313  * boot time and run time configurable.
314  */
315 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
316
317 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
318
319 /* trace_types holds a link list of available tracers. */
320 static struct tracer            *trace_types __read_mostly;
321
322 /*
323  * trace_types_lock is used to protect the trace_types list.
324  */
325 DEFINE_MUTEX(trace_types_lock);
326
327 /*
328  * serialize the access of the ring buffer
329  *
330  * ring buffer serializes readers, but it is low level protection.
331  * The validity of the events (which returns by ring_buffer_peek() ..etc)
332  * are not protected by ring buffer.
333  *
334  * The content of events may become garbage if we allow other process consumes
335  * these events concurrently:
336  *   A) the page of the consumed events may become a normal page
337  *      (not reader page) in ring buffer, and this page will be rewrited
338  *      by events producer.
339  *   B) The page of the consumed events may become a page for splice_read,
340  *      and this page will be returned to system.
341  *
342  * These primitives allow multi process access to different cpu ring buffer
343  * concurrently.
344  *
345  * These primitives don't distinguish read-only and read-consume access.
346  * Multi read-only access are also serialized.
347  */
348
349 #ifdef CONFIG_SMP
350 static DECLARE_RWSEM(all_cpu_access_lock);
351 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
352
353 static inline void trace_access_lock(int cpu)
354 {
355         if (cpu == RING_BUFFER_ALL_CPUS) {
356                 /* gain it for accessing the whole ring buffer. */
357                 down_write(&all_cpu_access_lock);
358         } else {
359                 /* gain it for accessing a cpu ring buffer. */
360
361                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
362                 down_read(&all_cpu_access_lock);
363
364                 /* Secondly block other access to this @cpu ring buffer. */
365                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
366         }
367 }
368
369 static inline void trace_access_unlock(int cpu)
370 {
371         if (cpu == RING_BUFFER_ALL_CPUS) {
372                 up_write(&all_cpu_access_lock);
373         } else {
374                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
375                 up_read(&all_cpu_access_lock);
376         }
377 }
378
379 static inline void trace_access_lock_init(void)
380 {
381         int cpu;
382
383         for_each_possible_cpu(cpu)
384                 mutex_init(&per_cpu(cpu_access_lock, cpu));
385 }
386
387 #else
388
389 static DEFINE_MUTEX(access_lock);
390
391 static inline void trace_access_lock(int cpu)
392 {
393         (void)cpu;
394         mutex_lock(&access_lock);
395 }
396
397 static inline void trace_access_unlock(int cpu)
398 {
399         (void)cpu;
400         mutex_unlock(&access_lock);
401 }
402
403 static inline void trace_access_lock_init(void)
404 {
405 }
406
407 #endif
408
409 /* trace_flags holds trace_options default values */
410 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
411         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
412         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
413         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
414
415 static void tracer_tracing_on(struct trace_array *tr)
416 {
417         if (tr->trace_buffer.buffer)
418                 ring_buffer_record_on(tr->trace_buffer.buffer);
419         /*
420          * This flag is looked at when buffers haven't been allocated
421          * yet, or by some tracers (like irqsoff), that just want to
422          * know if the ring buffer has been disabled, but it can handle
423          * races of where it gets disabled but we still do a record.
424          * As the check is in the fast path of the tracers, it is more
425          * important to be fast than accurate.
426          */
427         tr->buffer_disabled = 0;
428         /* Make the flag seen by readers */
429         smp_wmb();
430 }
431
432 /**
433  * tracing_on - enable tracing buffers
434  *
435  * This function enables tracing buffers that may have been
436  * disabled with tracing_off.
437  */
438 void tracing_on(void)
439 {
440         tracer_tracing_on(&global_trace);
441 }
442 EXPORT_SYMBOL_GPL(tracing_on);
443
444 /**
445  * __trace_puts - write a constant string into the trace buffer.
446  * @ip:    The address of the caller
447  * @str:   The constant string to write
448  * @size:  The size of the string.
449  */
450 int __trace_puts(unsigned long ip, const char *str, int size)
451 {
452         struct ring_buffer_event *event;
453         struct ring_buffer *buffer;
454         struct print_entry *entry;
455         unsigned long irq_flags;
456         int alloc;
457         int pc;
458
459         if (!(trace_flags & TRACE_ITER_PRINTK))
460                 return 0;
461
462         pc = preempt_count();
463
464         if (unlikely(tracing_selftest_running || tracing_disabled))
465                 return 0;
466
467         alloc = sizeof(*entry) + size + 2; /* possible \n added */
468
469         local_save_flags(irq_flags);
470         buffer = global_trace.trace_buffer.buffer;
471         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
472                                           irq_flags, pc);
473         if (!event)
474                 return 0;
475
476         entry = ring_buffer_event_data(event);
477         entry->ip = ip;
478
479         memcpy(&entry->buf, str, size);
480
481         /* Add a newline if necessary */
482         if (entry->buf[size - 1] != '\n') {
483                 entry->buf[size] = '\n';
484                 entry->buf[size + 1] = '\0';
485         } else
486                 entry->buf[size] = '\0';
487
488         __buffer_unlock_commit(buffer, event);
489         ftrace_trace_stack(buffer, irq_flags, 4, pc);
490
491         return size;
492 }
493 EXPORT_SYMBOL_GPL(__trace_puts);
494
495 /**
496  * __trace_bputs - write the pointer to a constant string into trace buffer
497  * @ip:    The address of the caller
498  * @str:   The constant string to write to the buffer to
499  */
500 int __trace_bputs(unsigned long ip, const char *str)
501 {
502         struct ring_buffer_event *event;
503         struct ring_buffer *buffer;
504         struct bputs_entry *entry;
505         unsigned long irq_flags;
506         int size = sizeof(struct bputs_entry);
507         int pc;
508
509         if (!(trace_flags & TRACE_ITER_PRINTK))
510                 return 0;
511
512         pc = preempt_count();
513
514         if (unlikely(tracing_selftest_running || tracing_disabled))
515                 return 0;
516
517         local_save_flags(irq_flags);
518         buffer = global_trace.trace_buffer.buffer;
519         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
520                                           irq_flags, pc);
521         if (!event)
522                 return 0;
523
524         entry = ring_buffer_event_data(event);
525         entry->ip                       = ip;
526         entry->str                      = str;
527
528         __buffer_unlock_commit(buffer, event);
529         ftrace_trace_stack(buffer, irq_flags, 4, pc);
530
531         return 1;
532 }
533 EXPORT_SYMBOL_GPL(__trace_bputs);
534
535 #ifdef CONFIG_TRACER_SNAPSHOT
536 /**
537  * trace_snapshot - take a snapshot of the current buffer.
538  *
539  * This causes a swap between the snapshot buffer and the current live
540  * tracing buffer. You can use this to take snapshots of the live
541  * trace when some condition is triggered, but continue to trace.
542  *
543  * Note, make sure to allocate the snapshot with either
544  * a tracing_snapshot_alloc(), or by doing it manually
545  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
546  *
547  * If the snapshot buffer is not allocated, it will stop tracing.
548  * Basically making a permanent snapshot.
549  */
550 void tracing_snapshot(void)
551 {
552         struct trace_array *tr = &global_trace;
553         struct tracer *tracer = tr->current_trace;
554         unsigned long flags;
555
556         if (in_nmi()) {
557                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
558                 internal_trace_puts("*** snapshot is being ignored        ***\n");
559                 return;
560         }
561
562         if (!tr->allocated_snapshot) {
563                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
564                 internal_trace_puts("*** stopping trace here!   ***\n");
565                 tracing_off();
566                 return;
567         }
568
569         /* Note, snapshot can not be used when the tracer uses it */
570         if (tracer->use_max_tr) {
571                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
572                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
573                 return;
574         }
575
576         local_irq_save(flags);
577         update_max_tr(tr, current, smp_processor_id());
578         local_irq_restore(flags);
579 }
580 EXPORT_SYMBOL_GPL(tracing_snapshot);
581
582 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
583                                         struct trace_buffer *size_buf, int cpu_id);
584 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
585
586 static int alloc_snapshot(struct trace_array *tr)
587 {
588         int ret;
589
590         if (!tr->allocated_snapshot) {
591
592                 /* allocate spare buffer */
593                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
594                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
595                 if (ret < 0)
596                         return ret;
597
598                 tr->allocated_snapshot = true;
599         }
600
601         return 0;
602 }
603
604 void free_snapshot(struct trace_array *tr)
605 {
606         /*
607          * We don't free the ring buffer. instead, resize it because
608          * The max_tr ring buffer has some state (e.g. ring->clock) and
609          * we want preserve it.
610          */
611         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
612         set_buffer_entries(&tr->max_buffer, 1);
613         tracing_reset_online_cpus(&tr->max_buffer);
614         tr->allocated_snapshot = false;
615 }
616
617 /**
618  * tracing_alloc_snapshot - allocate snapshot buffer.
619  *
620  * This only allocates the snapshot buffer if it isn't already
621  * allocated - it doesn't also take a snapshot.
622  *
623  * This is meant to be used in cases where the snapshot buffer needs
624  * to be set up for events that can't sleep but need to be able to
625  * trigger a snapshot.
626  */
627 int tracing_alloc_snapshot(void)
628 {
629         struct trace_array *tr = &global_trace;
630         int ret;
631
632         ret = alloc_snapshot(tr);
633         WARN_ON(ret < 0);
634
635         return ret;
636 }
637 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
638
639 /**
640  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
641  *
642  * This is similar to trace_snapshot(), but it will allocate the
643  * snapshot buffer if it isn't already allocated. Use this only
644  * where it is safe to sleep, as the allocation may sleep.
645  *
646  * This causes a swap between the snapshot buffer and the current live
647  * tracing buffer. You can use this to take snapshots of the live
648  * trace when some condition is triggered, but continue to trace.
649  */
650 void tracing_snapshot_alloc(void)
651 {
652         int ret;
653
654         ret = tracing_alloc_snapshot();
655         if (ret < 0)
656                 return;
657
658         tracing_snapshot();
659 }
660 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
661 #else
662 void tracing_snapshot(void)
663 {
664         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
665 }
666 EXPORT_SYMBOL_GPL(tracing_snapshot);
667 int tracing_alloc_snapshot(void)
668 {
669         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
670         return -ENODEV;
671 }
672 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
673 void tracing_snapshot_alloc(void)
674 {
675         /* Give warning */
676         tracing_snapshot();
677 }
678 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
679 #endif /* CONFIG_TRACER_SNAPSHOT */
680
681 static void tracer_tracing_off(struct trace_array *tr)
682 {
683         if (tr->trace_buffer.buffer)
684                 ring_buffer_record_off(tr->trace_buffer.buffer);
685         /*
686          * This flag is looked at when buffers haven't been allocated
687          * yet, or by some tracers (like irqsoff), that just want to
688          * know if the ring buffer has been disabled, but it can handle
689          * races of where it gets disabled but we still do a record.
690          * As the check is in the fast path of the tracers, it is more
691          * important to be fast than accurate.
692          */
693         tr->buffer_disabled = 1;
694         /* Make the flag seen by readers */
695         smp_wmb();
696 }
697
698 /**
699  * tracing_off - turn off tracing buffers
700  *
701  * This function stops the tracing buffers from recording data.
702  * It does not disable any overhead the tracers themselves may
703  * be causing. This function simply causes all recording to
704  * the ring buffers to fail.
705  */
706 void tracing_off(void)
707 {
708         tracer_tracing_off(&global_trace);
709 }
710 EXPORT_SYMBOL_GPL(tracing_off);
711
712 void disable_trace_on_warning(void)
713 {
714         if (__disable_trace_on_warning)
715                 tracing_off();
716 }
717
718 /**
719  * tracer_tracing_is_on - show real state of ring buffer enabled
720  * @tr : the trace array to know if ring buffer is enabled
721  *
722  * Shows real state of the ring buffer if it is enabled or not.
723  */
724 static int tracer_tracing_is_on(struct trace_array *tr)
725 {
726         if (tr->trace_buffer.buffer)
727                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
728         return !tr->buffer_disabled;
729 }
730
731 /**
732  * tracing_is_on - show state of ring buffers enabled
733  */
734 int tracing_is_on(void)
735 {
736         return tracer_tracing_is_on(&global_trace);
737 }
738 EXPORT_SYMBOL_GPL(tracing_is_on);
739
740 static int __init set_buf_size(char *str)
741 {
742         unsigned long buf_size;
743
744         if (!str)
745                 return 0;
746         buf_size = memparse(str, &str);
747         /* nr_entries can not be zero */
748         if (buf_size == 0)
749                 return 0;
750         trace_buf_size = buf_size;
751         return 1;
752 }
753 __setup("trace_buf_size=", set_buf_size);
754
755 static int __init set_tracing_thresh(char *str)
756 {
757         unsigned long threshold;
758         int ret;
759
760         if (!str)
761                 return 0;
762         ret = kstrtoul(str, 0, &threshold);
763         if (ret < 0)
764                 return 0;
765         tracing_thresh = threshold * 1000;
766         return 1;
767 }
768 __setup("tracing_thresh=", set_tracing_thresh);
769
770 unsigned long nsecs_to_usecs(unsigned long nsecs)
771 {
772         return nsecs / 1000;
773 }
774
775 /* These must match the bit postions in trace_iterator_flags */
776 static const char *trace_options[] = {
777         "print-parent",
778         "sym-offset",
779         "sym-addr",
780         "verbose",
781         "raw",
782         "hex",
783         "bin",
784         "block",
785         "stacktrace",
786         "trace_printk",
787         "ftrace_preempt",
788         "branch",
789         "annotate",
790         "userstacktrace",
791         "sym-userobj",
792         "printk-msg-only",
793         "context-info",
794         "latency-format",
795         "sleep-time",
796         "graph-time",
797         "record-cmd",
798         "overwrite",
799         "disable_on_free",
800         "irq-info",
801         "markers",
802         "function-trace",
803         NULL
804 };
805
806 static struct {
807         u64 (*func)(void);
808         const char *name;
809         int in_ns;              /* is this clock in nanoseconds? */
810 } trace_clocks[] = {
811         { trace_clock_local,    "local",        1 },
812         { trace_clock_global,   "global",       1 },
813         { trace_clock_counter,  "counter",      0 },
814         { trace_clock_jiffies,  "uptime",       0 },
815         { trace_clock,          "perf",         1 },
816         ARCH_TRACE_CLOCKS
817 };
818
819 /*
820  * trace_parser_get_init - gets the buffer for trace parser
821  */
822 int trace_parser_get_init(struct trace_parser *parser, int size)
823 {
824         memset(parser, 0, sizeof(*parser));
825
826         parser->buffer = kmalloc(size, GFP_KERNEL);
827         if (!parser->buffer)
828                 return 1;
829
830         parser->size = size;
831         return 0;
832 }
833
834 /*
835  * trace_parser_put - frees the buffer for trace parser
836  */
837 void trace_parser_put(struct trace_parser *parser)
838 {
839         kfree(parser->buffer);
840 }
841
842 /*
843  * trace_get_user - reads the user input string separated by  space
844  * (matched by isspace(ch))
845  *
846  * For each string found the 'struct trace_parser' is updated,
847  * and the function returns.
848  *
849  * Returns number of bytes read.
850  *
851  * See kernel/trace/trace.h for 'struct trace_parser' details.
852  */
853 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
854         size_t cnt, loff_t *ppos)
855 {
856         char ch;
857         size_t read = 0;
858         ssize_t ret;
859
860         if (!*ppos)
861                 trace_parser_clear(parser);
862
863         ret = get_user(ch, ubuf++);
864         if (ret)
865                 goto out;
866
867         read++;
868         cnt--;
869
870         /*
871          * The parser is not finished with the last write,
872          * continue reading the user input without skipping spaces.
873          */
874         if (!parser->cont) {
875                 /* skip white space */
876                 while (cnt && isspace(ch)) {
877                         ret = get_user(ch, ubuf++);
878                         if (ret)
879                                 goto out;
880                         read++;
881                         cnt--;
882                 }
883
884                 /* only spaces were written */
885                 if (isspace(ch)) {
886                         *ppos += read;
887                         ret = read;
888                         goto out;
889                 }
890
891                 parser->idx = 0;
892         }
893
894         /* read the non-space input */
895         while (cnt && !isspace(ch)) {
896                 if (parser->idx < parser->size - 1)
897                         parser->buffer[parser->idx++] = ch;
898                 else {
899                         ret = -EINVAL;
900                         goto out;
901                 }
902                 ret = get_user(ch, ubuf++);
903                 if (ret)
904                         goto out;
905                 read++;
906                 cnt--;
907         }
908
909         /* We either got finished input or we have to wait for another call. */
910         if (isspace(ch)) {
911                 parser->buffer[parser->idx] = 0;
912                 parser->cont = false;
913         } else if (parser->idx < parser->size - 1) {
914                 parser->cont = true;
915                 parser->buffer[parser->idx++] = ch;
916         } else {
917                 ret = -EINVAL;
918                 goto out;
919         }
920
921         *ppos += read;
922         ret = read;
923
924 out:
925         return ret;
926 }
927
928 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
929 {
930         int len;
931         int ret;
932
933         if (!cnt)
934                 return 0;
935
936         if (s->len <= s->readpos)
937                 return -EBUSY;
938
939         len = s->len - s->readpos;
940         if (cnt > len)
941                 cnt = len;
942         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
943         if (ret == cnt)
944                 return -EFAULT;
945
946         cnt -= ret;
947
948         s->readpos += cnt;
949         return cnt;
950 }
951
952 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
953 {
954         int len;
955
956         if (s->len <= s->readpos)
957                 return -EBUSY;
958
959         len = s->len - s->readpos;
960         if (cnt > len)
961                 cnt = len;
962         memcpy(buf, s->buffer + s->readpos, cnt);
963
964         s->readpos += cnt;
965         return cnt;
966 }
967
968 /*
969  * ftrace_max_lock is used to protect the swapping of buffers
970  * when taking a max snapshot. The buffers themselves are
971  * protected by per_cpu spinlocks. But the action of the swap
972  * needs its own lock.
973  *
974  * This is defined as a arch_spinlock_t in order to help
975  * with performance when lockdep debugging is enabled.
976  *
977  * It is also used in other places outside the update_max_tr
978  * so it needs to be defined outside of the
979  * CONFIG_TRACER_MAX_TRACE.
980  */
981 static arch_spinlock_t ftrace_max_lock =
982         (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
983
984 unsigned long __read_mostly     tracing_thresh;
985
986 #ifdef CONFIG_TRACER_MAX_TRACE
987 unsigned long __read_mostly     tracing_max_latency;
988
989 /*
990  * Copy the new maximum trace into the separate maximum-trace
991  * structure. (this way the maximum trace is permanently saved,
992  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
993  */
994 static void
995 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
996 {
997         struct trace_buffer *trace_buf = &tr->trace_buffer;
998         struct trace_buffer *max_buf = &tr->max_buffer;
999         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1000         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1001
1002         max_buf->cpu = cpu;
1003         max_buf->time_start = data->preempt_timestamp;
1004
1005         max_data->saved_latency = tracing_max_latency;
1006         max_data->critical_start = data->critical_start;
1007         max_data->critical_end = data->critical_end;
1008
1009         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1010         max_data->pid = tsk->pid;
1011         /*
1012          * If tsk == current, then use current_uid(), as that does not use
1013          * RCU. The irq tracer can be called out of RCU scope.
1014          */
1015         if (tsk == current)
1016                 max_data->uid = current_uid();
1017         else
1018                 max_data->uid = task_uid(tsk);
1019
1020         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1021         max_data->policy = tsk->policy;
1022         max_data->rt_priority = tsk->rt_priority;
1023
1024         /* record this tasks comm */
1025         tracing_record_cmdline(tsk);
1026 }
1027
1028 /**
1029  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1030  * @tr: tracer
1031  * @tsk: the task with the latency
1032  * @cpu: The cpu that initiated the trace.
1033  *
1034  * Flip the buffers between the @tr and the max_tr and record information
1035  * about which task was the cause of this latency.
1036  */
1037 void
1038 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1039 {
1040         struct ring_buffer *buf;
1041
1042         if (tr->stop_count)
1043                 return;
1044
1045         WARN_ON_ONCE(!irqs_disabled());
1046
1047         if (!tr->allocated_snapshot) {
1048                 /* Only the nop tracer should hit this when disabling */
1049                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1050                 return;
1051         }
1052
1053         arch_spin_lock(&ftrace_max_lock);
1054
1055         buf = tr->trace_buffer.buffer;
1056         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1057         tr->max_buffer.buffer = buf;
1058
1059         __update_max_tr(tr, tsk, cpu);
1060         arch_spin_unlock(&ftrace_max_lock);
1061 }
1062
1063 /**
1064  * update_max_tr_single - only copy one trace over, and reset the rest
1065  * @tr - tracer
1066  * @tsk - task with the latency
1067  * @cpu - the cpu of the buffer to copy.
1068  *
1069  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1070  */
1071 void
1072 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1073 {
1074         int ret;
1075
1076         if (tr->stop_count)
1077                 return;
1078
1079         WARN_ON_ONCE(!irqs_disabled());
1080         if (!tr->allocated_snapshot) {
1081                 /* Only the nop tracer should hit this when disabling */
1082                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1083                 return;
1084         }
1085
1086         arch_spin_lock(&ftrace_max_lock);
1087
1088         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1089
1090         if (ret == -EBUSY) {
1091                 /*
1092                  * We failed to swap the buffer due to a commit taking
1093                  * place on this CPU. We fail to record, but we reset
1094                  * the max trace buffer (no one writes directly to it)
1095                  * and flag that it failed.
1096                  */
1097                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1098                         "Failed to swap buffers due to commit in progress\n");
1099         }
1100
1101         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1102
1103         __update_max_tr(tr, tsk, cpu);
1104         arch_spin_unlock(&ftrace_max_lock);
1105 }
1106 #endif /* CONFIG_TRACER_MAX_TRACE */
1107
1108 static int default_wait_pipe(struct trace_iterator *iter)
1109 {
1110         /* Iterators are static, they should be filled or empty */
1111         if (trace_buffer_iter(iter, iter->cpu_file))
1112                 return 0;
1113
1114         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
1115 }
1116
1117 #ifdef CONFIG_FTRACE_STARTUP_TEST
1118 static int run_tracer_selftest(struct tracer *type)
1119 {
1120         struct trace_array *tr = &global_trace;
1121         struct tracer *saved_tracer = tr->current_trace;
1122         int ret;
1123
1124         if (!type->selftest || tracing_selftest_disabled)
1125                 return 0;
1126
1127         /*
1128          * Run a selftest on this tracer.
1129          * Here we reset the trace buffer, and set the current
1130          * tracer to be this tracer. The tracer can then run some
1131          * internal tracing to verify that everything is in order.
1132          * If we fail, we do not register this tracer.
1133          */
1134         tracing_reset_online_cpus(&tr->trace_buffer);
1135
1136         tr->current_trace = type;
1137
1138 #ifdef CONFIG_TRACER_MAX_TRACE
1139         if (type->use_max_tr) {
1140                 /* If we expanded the buffers, make sure the max is expanded too */
1141                 if (ring_buffer_expanded)
1142                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1143                                            RING_BUFFER_ALL_CPUS);
1144                 tr->allocated_snapshot = true;
1145         }
1146 #endif
1147
1148         /* the test is responsible for initializing and enabling */
1149         pr_info("Testing tracer %s: ", type->name);
1150         ret = type->selftest(type, tr);
1151         /* the test is responsible for resetting too */
1152         tr->current_trace = saved_tracer;
1153         if (ret) {
1154                 printk(KERN_CONT "FAILED!\n");
1155                 /* Add the warning after printing 'FAILED' */
1156                 WARN_ON(1);
1157                 return -1;
1158         }
1159         /* Only reset on passing, to avoid touching corrupted buffers */
1160         tracing_reset_online_cpus(&tr->trace_buffer);
1161
1162 #ifdef CONFIG_TRACER_MAX_TRACE
1163         if (type->use_max_tr) {
1164                 tr->allocated_snapshot = false;
1165
1166                 /* Shrink the max buffer again */
1167                 if (ring_buffer_expanded)
1168                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1169                                            RING_BUFFER_ALL_CPUS);
1170         }
1171 #endif
1172
1173         printk(KERN_CONT "PASSED\n");
1174         return 0;
1175 }
1176 #else
1177 static inline int run_tracer_selftest(struct tracer *type)
1178 {
1179         return 0;
1180 }
1181 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1182
1183 /**
1184  * register_tracer - register a tracer with the ftrace system.
1185  * @type - the plugin for the tracer
1186  *
1187  * Register a new plugin tracer.
1188  */
1189 int register_tracer(struct tracer *type)
1190 {
1191         struct tracer *t;
1192         int ret = 0;
1193
1194         if (!type->name) {
1195                 pr_info("Tracer must have a name\n");
1196                 return -1;
1197         }
1198
1199         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1200                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1201                 return -1;
1202         }
1203
1204         mutex_lock(&trace_types_lock);
1205
1206         tracing_selftest_running = true;
1207
1208         for (t = trace_types; t; t = t->next) {
1209                 if (strcmp(type->name, t->name) == 0) {
1210                         /* already found */
1211                         pr_info("Tracer %s already registered\n",
1212                                 type->name);
1213                         ret = -1;
1214                         goto out;
1215                 }
1216         }
1217
1218         if (!type->set_flag)
1219                 type->set_flag = &dummy_set_flag;
1220         if (!type->flags)
1221                 type->flags = &dummy_tracer_flags;
1222         else
1223                 if (!type->flags->opts)
1224                         type->flags->opts = dummy_tracer_opt;
1225         if (!type->wait_pipe)
1226                 type->wait_pipe = default_wait_pipe;
1227
1228         ret = run_tracer_selftest(type);
1229         if (ret < 0)
1230                 goto out;
1231
1232         type->next = trace_types;
1233         trace_types = type;
1234
1235  out:
1236         tracing_selftest_running = false;
1237         mutex_unlock(&trace_types_lock);
1238
1239         if (ret || !default_bootup_tracer)
1240                 goto out_unlock;
1241
1242         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1243                 goto out_unlock;
1244
1245         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1246         /* Do we want this tracer to start on bootup? */
1247         tracing_set_tracer(type->name);
1248         default_bootup_tracer = NULL;
1249         /* disable other selftests, since this will break it. */
1250         tracing_selftest_disabled = true;
1251 #ifdef CONFIG_FTRACE_STARTUP_TEST
1252         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1253                type->name);
1254 #endif
1255
1256  out_unlock:
1257         return ret;
1258 }
1259
1260 void tracing_reset(struct trace_buffer *buf, int cpu)
1261 {
1262         struct ring_buffer *buffer = buf->buffer;
1263
1264         if (!buffer)
1265                 return;
1266
1267         ring_buffer_record_disable(buffer);
1268
1269         /* Make sure all commits have finished */
1270         synchronize_sched();
1271         ring_buffer_reset_cpu(buffer, cpu);
1272
1273         ring_buffer_record_enable(buffer);
1274 }
1275
1276 void tracing_reset_online_cpus(struct trace_buffer *buf)
1277 {
1278         struct ring_buffer *buffer = buf->buffer;
1279         int cpu;
1280
1281         if (!buffer)
1282                 return;
1283
1284         ring_buffer_record_disable(buffer);
1285
1286         /* Make sure all commits have finished */
1287         synchronize_sched();
1288
1289         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1290
1291         for_each_online_cpu(cpu)
1292                 ring_buffer_reset_cpu(buffer, cpu);
1293
1294         ring_buffer_record_enable(buffer);
1295 }
1296
1297 /* Must have trace_types_lock held */
1298 void tracing_reset_all_online_cpus(void)
1299 {
1300         struct trace_array *tr;
1301
1302         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1303                 tracing_reset_online_cpus(&tr->trace_buffer);
1304 #ifdef CONFIG_TRACER_MAX_TRACE
1305                 tracing_reset_online_cpus(&tr->max_buffer);
1306 #endif
1307         }
1308 }
1309
1310 #define SAVED_CMDLINES 128
1311 #define NO_CMDLINE_MAP UINT_MAX
1312 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1313 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
1314 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
1315 static int cmdline_idx;
1316 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1317
1318 /* temporary disable recording */
1319 static atomic_t trace_record_cmdline_disabled __read_mostly;
1320
1321 static void trace_init_cmdlines(void)
1322 {
1323         memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
1324         memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
1325         cmdline_idx = 0;
1326 }
1327
1328 int is_tracing_stopped(void)
1329 {
1330         return global_trace.stop_count;
1331 }
1332
1333 /**
1334  * tracing_start - quick start of the tracer
1335  *
1336  * If tracing is enabled but was stopped by tracing_stop,
1337  * this will start the tracer back up.
1338  */
1339 void tracing_start(void)
1340 {
1341         struct ring_buffer *buffer;
1342         unsigned long flags;
1343
1344         if (tracing_disabled)
1345                 return;
1346
1347         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1348         if (--global_trace.stop_count) {
1349                 if (global_trace.stop_count < 0) {
1350                         /* Someone screwed up their debugging */
1351                         WARN_ON_ONCE(1);
1352                         global_trace.stop_count = 0;
1353                 }
1354                 goto out;
1355         }
1356
1357         /* Prevent the buffers from switching */
1358         arch_spin_lock(&ftrace_max_lock);
1359
1360         buffer = global_trace.trace_buffer.buffer;
1361         if (buffer)
1362                 ring_buffer_record_enable(buffer);
1363
1364 #ifdef CONFIG_TRACER_MAX_TRACE
1365         buffer = global_trace.max_buffer.buffer;
1366         if (buffer)
1367                 ring_buffer_record_enable(buffer);
1368 #endif
1369
1370         arch_spin_unlock(&ftrace_max_lock);
1371
1372  out:
1373         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1374 }
1375
1376 static void tracing_start_tr(struct trace_array *tr)
1377 {
1378         struct ring_buffer *buffer;
1379         unsigned long flags;
1380
1381         if (tracing_disabled)
1382                 return;
1383
1384         /* If global, we need to also start the max tracer */
1385         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1386                 return tracing_start();
1387
1388         raw_spin_lock_irqsave(&tr->start_lock, flags);
1389
1390         if (--tr->stop_count) {
1391                 if (tr->stop_count < 0) {
1392                         /* Someone screwed up their debugging */
1393                         WARN_ON_ONCE(1);
1394                         tr->stop_count = 0;
1395                 }
1396                 goto out;
1397         }
1398
1399         buffer = tr->trace_buffer.buffer;
1400         if (buffer)
1401                 ring_buffer_record_enable(buffer);
1402
1403  out:
1404         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1405 }
1406
1407 /**
1408  * tracing_stop - quick stop of the tracer
1409  *
1410  * Light weight way to stop tracing. Use in conjunction with
1411  * tracing_start.
1412  */
1413 void tracing_stop(void)
1414 {
1415         struct ring_buffer *buffer;
1416         unsigned long flags;
1417
1418         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1419         if (global_trace.stop_count++)
1420                 goto out;
1421
1422         /* Prevent the buffers from switching */
1423         arch_spin_lock(&ftrace_max_lock);
1424
1425         buffer = global_trace.trace_buffer.buffer;
1426         if (buffer)
1427                 ring_buffer_record_disable(buffer);
1428
1429 #ifdef CONFIG_TRACER_MAX_TRACE
1430         buffer = global_trace.max_buffer.buffer;
1431         if (buffer)
1432                 ring_buffer_record_disable(buffer);
1433 #endif
1434
1435         arch_spin_unlock(&ftrace_max_lock);
1436
1437  out:
1438         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1439 }
1440
1441 static void tracing_stop_tr(struct trace_array *tr)
1442 {
1443         struct ring_buffer *buffer;
1444         unsigned long flags;
1445
1446         /* If global, we need to also stop the max tracer */
1447         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1448                 return tracing_stop();
1449
1450         raw_spin_lock_irqsave(&tr->start_lock, flags);
1451         if (tr->stop_count++)
1452                 goto out;
1453
1454         buffer = tr->trace_buffer.buffer;
1455         if (buffer)
1456                 ring_buffer_record_disable(buffer);
1457
1458  out:
1459         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1460 }
1461
1462 void trace_stop_cmdline_recording(void);
1463
1464 static int trace_save_cmdline(struct task_struct *tsk)
1465 {
1466         unsigned pid, idx;
1467
1468         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1469                 return 0;
1470
1471         /*
1472          * It's not the end of the world if we don't get
1473          * the lock, but we also don't want to spin
1474          * nor do we want to disable interrupts,
1475          * so if we miss here, then better luck next time.
1476          */
1477         if (!arch_spin_trylock(&trace_cmdline_lock))
1478                 return 0;
1479
1480         idx = map_pid_to_cmdline[tsk->pid];
1481         if (idx == NO_CMDLINE_MAP) {
1482                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
1483
1484                 /*
1485                  * Check whether the cmdline buffer at idx has a pid
1486                  * mapped. We are going to overwrite that entry so we
1487                  * need to clear the map_pid_to_cmdline. Otherwise we
1488                  * would read the new comm for the old pid.
1489                  */
1490                 pid = map_cmdline_to_pid[idx];
1491                 if (pid != NO_CMDLINE_MAP)
1492                         map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1493
1494                 map_cmdline_to_pid[idx] = tsk->pid;
1495                 map_pid_to_cmdline[tsk->pid] = idx;
1496
1497                 cmdline_idx = idx;
1498         }
1499
1500         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
1501
1502         arch_spin_unlock(&trace_cmdline_lock);
1503
1504         return 1;
1505 }
1506
1507 void trace_find_cmdline(int pid, char comm[])
1508 {
1509         unsigned map;
1510
1511         if (!pid) {
1512                 strcpy(comm, "<idle>");
1513                 return;
1514         }
1515
1516         if (WARN_ON_ONCE(pid < 0)) {
1517                 strcpy(comm, "<XXX>");
1518                 return;
1519         }
1520
1521         if (pid > PID_MAX_DEFAULT) {
1522                 strcpy(comm, "<...>");
1523                 return;
1524         }
1525
1526         preempt_disable();
1527         arch_spin_lock(&trace_cmdline_lock);
1528         map = map_pid_to_cmdline[pid];
1529         if (map != NO_CMDLINE_MAP)
1530                 strcpy(comm, saved_cmdlines[map]);
1531         else
1532                 strcpy(comm, "<...>");
1533
1534         arch_spin_unlock(&trace_cmdline_lock);
1535         preempt_enable();
1536 }
1537
1538 void tracing_record_cmdline(struct task_struct *tsk)
1539 {
1540         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1541                 return;
1542
1543         if (!__this_cpu_read(trace_cmdline_save))
1544                 return;
1545
1546         if (trace_save_cmdline(tsk))
1547                 __this_cpu_write(trace_cmdline_save, false);
1548 }
1549
1550 void
1551 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1552                              int pc)
1553 {
1554         struct task_struct *tsk = current;
1555
1556         entry->preempt_count            = pc & 0xff;
1557         entry->pid                      = (tsk) ? tsk->pid : 0;
1558         entry->flags =
1559 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1560                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1561 #else
1562                 TRACE_FLAG_IRQS_NOSUPPORT |
1563 #endif
1564                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1565                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1566                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1567                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1568 }
1569 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1570
1571 struct ring_buffer_event *
1572 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1573                           int type,
1574                           unsigned long len,
1575                           unsigned long flags, int pc)
1576 {
1577         struct ring_buffer_event *event;
1578
1579         event = ring_buffer_lock_reserve(buffer, len);
1580         if (event != NULL) {
1581                 struct trace_entry *ent = ring_buffer_event_data(event);
1582
1583                 tracing_generic_entry_update(ent, flags, pc);
1584                 ent->type = type;
1585         }
1586
1587         return event;
1588 }
1589
1590 void
1591 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1592 {
1593         __this_cpu_write(trace_cmdline_save, true);
1594         ring_buffer_unlock_commit(buffer, event);
1595 }
1596
1597 static inline void
1598 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1599                              struct ring_buffer_event *event,
1600                              unsigned long flags, int pc)
1601 {
1602         __buffer_unlock_commit(buffer, event);
1603
1604         ftrace_trace_stack(buffer, flags, 6, pc);
1605         ftrace_trace_userstack(buffer, flags, pc);
1606 }
1607
1608 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1609                                 struct ring_buffer_event *event,
1610                                 unsigned long flags, int pc)
1611 {
1612         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1613 }
1614 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1615
1616 static struct ring_buffer *temp_buffer;
1617
1618 struct ring_buffer_event *
1619 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1620                           struct ftrace_event_file *ftrace_file,
1621                           int type, unsigned long len,
1622                           unsigned long flags, int pc)
1623 {
1624         struct ring_buffer_event *entry;
1625
1626         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1627         entry = trace_buffer_lock_reserve(*current_rb,
1628                                          type, len, flags, pc);
1629         /*
1630          * If tracing is off, but we have triggers enabled
1631          * we still need to look at the event data. Use the temp_buffer
1632          * to store the trace event for the tigger to use. It's recusive
1633          * safe and will not be recorded anywhere.
1634          */
1635         if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1636                 *current_rb = temp_buffer;
1637                 entry = trace_buffer_lock_reserve(*current_rb,
1638                                                   type, len, flags, pc);
1639         }
1640         return entry;
1641 }
1642 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1643
1644 struct ring_buffer_event *
1645 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1646                                   int type, unsigned long len,
1647                                   unsigned long flags, int pc)
1648 {
1649         *current_rb = global_trace.trace_buffer.buffer;
1650         return trace_buffer_lock_reserve(*current_rb,
1651                                          type, len, flags, pc);
1652 }
1653 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1654
1655 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1656                                         struct ring_buffer_event *event,
1657                                         unsigned long flags, int pc)
1658 {
1659         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1660 }
1661 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1662
1663 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1664                                      struct ring_buffer_event *event,
1665                                      unsigned long flags, int pc,
1666                                      struct pt_regs *regs)
1667 {
1668         __buffer_unlock_commit(buffer, event);
1669
1670         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1671         ftrace_trace_userstack(buffer, flags, pc);
1672 }
1673 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1674
1675 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1676                                          struct ring_buffer_event *event)
1677 {
1678         ring_buffer_discard_commit(buffer, event);
1679 }
1680 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1681
1682 void
1683 trace_function(struct trace_array *tr,
1684                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1685                int pc)
1686 {
1687         struct ftrace_event_call *call = &event_function;
1688         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1689         struct ring_buffer_event *event;
1690         struct ftrace_entry *entry;
1691
1692         /* If we are reading the ring buffer, don't trace */
1693         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1694                 return;
1695
1696         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1697                                           flags, pc);
1698         if (!event)
1699                 return;
1700         entry   = ring_buffer_event_data(event);
1701         entry->ip                       = ip;
1702         entry->parent_ip                = parent_ip;
1703
1704         if (!call_filter_check_discard(call, entry, buffer, event))
1705                 __buffer_unlock_commit(buffer, event);
1706 }
1707
1708 #ifdef CONFIG_STACKTRACE
1709
1710 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1711 struct ftrace_stack {
1712         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1713 };
1714
1715 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1716 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1717
1718 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1719                                  unsigned long flags,
1720                                  int skip, int pc, struct pt_regs *regs)
1721 {
1722         struct ftrace_event_call *call = &event_kernel_stack;
1723         struct ring_buffer_event *event;
1724         struct stack_entry *entry;
1725         struct stack_trace trace;
1726         int use_stack;
1727         int size = FTRACE_STACK_ENTRIES;
1728
1729         trace.nr_entries        = 0;
1730         trace.skip              = skip;
1731
1732         /*
1733          * Since events can happen in NMIs there's no safe way to
1734          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1735          * or NMI comes in, it will just have to use the default
1736          * FTRACE_STACK_SIZE.
1737          */
1738         preempt_disable_notrace();
1739
1740         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1741         /*
1742          * We don't need any atomic variables, just a barrier.
1743          * If an interrupt comes in, we don't care, because it would
1744          * have exited and put the counter back to what we want.
1745          * We just need a barrier to keep gcc from moving things
1746          * around.
1747          */
1748         barrier();
1749         if (use_stack == 1) {
1750                 trace.entries           = &__get_cpu_var(ftrace_stack).calls[0];
1751                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1752
1753                 if (regs)
1754                         save_stack_trace_regs(regs, &trace);
1755                 else
1756                         save_stack_trace(&trace);
1757
1758                 if (trace.nr_entries > size)
1759                         size = trace.nr_entries;
1760         } else
1761                 /* From now on, use_stack is a boolean */
1762                 use_stack = 0;
1763
1764         size *= sizeof(unsigned long);
1765
1766         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1767                                           sizeof(*entry) + size, flags, pc);
1768         if (!event)
1769                 goto out;
1770         entry = ring_buffer_event_data(event);
1771
1772         memset(&entry->caller, 0, size);
1773
1774         if (use_stack)
1775                 memcpy(&entry->caller, trace.entries,
1776                        trace.nr_entries * sizeof(unsigned long));
1777         else {
1778                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1779                 trace.entries           = entry->caller;
1780                 if (regs)
1781                         save_stack_trace_regs(regs, &trace);
1782                 else
1783                         save_stack_trace(&trace);
1784         }
1785
1786         entry->size = trace.nr_entries;
1787
1788         if (!call_filter_check_discard(call, entry, buffer, event))
1789                 __buffer_unlock_commit(buffer, event);
1790
1791  out:
1792         /* Again, don't let gcc optimize things here */
1793         barrier();
1794         __this_cpu_dec(ftrace_stack_reserve);
1795         preempt_enable_notrace();
1796
1797 }
1798
1799 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1800                              int skip, int pc, struct pt_regs *regs)
1801 {
1802         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1803                 return;
1804
1805         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1806 }
1807
1808 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1809                         int skip, int pc)
1810 {
1811         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1812                 return;
1813
1814         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1815 }
1816
1817 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1818                    int pc)
1819 {
1820         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1821 }
1822
1823 /**
1824  * trace_dump_stack - record a stack back trace in the trace buffer
1825  * @skip: Number of functions to skip (helper handlers)
1826  */
1827 void trace_dump_stack(int skip)
1828 {
1829         unsigned long flags;
1830
1831         if (tracing_disabled || tracing_selftest_running)
1832                 return;
1833
1834         local_save_flags(flags);
1835
1836         /*
1837          * Skip 3 more, seems to get us at the caller of
1838          * this function.
1839          */
1840         skip += 3;
1841         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1842                              flags, skip, preempt_count(), NULL);
1843 }
1844
1845 static DEFINE_PER_CPU(int, user_stack_count);
1846
1847 void
1848 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1849 {
1850         struct ftrace_event_call *call = &event_user_stack;
1851         struct ring_buffer_event *event;
1852         struct userstack_entry *entry;
1853         struct stack_trace trace;
1854
1855         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1856                 return;
1857
1858         /*
1859          * NMIs can not handle page faults, even with fix ups.
1860          * The save user stack can (and often does) fault.
1861          */
1862         if (unlikely(in_nmi()))
1863                 return;
1864
1865         /*
1866          * prevent recursion, since the user stack tracing may
1867          * trigger other kernel events.
1868          */
1869         preempt_disable();
1870         if (__this_cpu_read(user_stack_count))
1871                 goto out;
1872
1873         __this_cpu_inc(user_stack_count);
1874
1875         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1876                                           sizeof(*entry), flags, pc);
1877         if (!event)
1878                 goto out_drop_count;
1879         entry   = ring_buffer_event_data(event);
1880
1881         entry->tgid             = current->tgid;
1882         memset(&entry->caller, 0, sizeof(entry->caller));
1883
1884         trace.nr_entries        = 0;
1885         trace.max_entries       = FTRACE_STACK_ENTRIES;
1886         trace.skip              = 0;
1887         trace.entries           = entry->caller;
1888
1889         save_stack_trace_user(&trace);
1890         if (!call_filter_check_discard(call, entry, buffer, event))
1891                 __buffer_unlock_commit(buffer, event);
1892
1893  out_drop_count:
1894         __this_cpu_dec(user_stack_count);
1895  out:
1896         preempt_enable();
1897 }
1898
1899 #ifdef UNUSED
1900 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1901 {
1902         ftrace_trace_userstack(tr, flags, preempt_count());
1903 }
1904 #endif /* UNUSED */
1905
1906 #endif /* CONFIG_STACKTRACE */
1907
1908 /* created for use with alloc_percpu */
1909 struct trace_buffer_struct {
1910         char buffer[TRACE_BUF_SIZE];
1911 };
1912
1913 static struct trace_buffer_struct *trace_percpu_buffer;
1914 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1915 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1916 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1917
1918 /*
1919  * The buffer used is dependent on the context. There is a per cpu
1920  * buffer for normal context, softirq contex, hard irq context and
1921  * for NMI context. Thise allows for lockless recording.
1922  *
1923  * Note, if the buffers failed to be allocated, then this returns NULL
1924  */
1925 static char *get_trace_buf(void)
1926 {
1927         struct trace_buffer_struct *percpu_buffer;
1928
1929         /*
1930          * If we have allocated per cpu buffers, then we do not
1931          * need to do any locking.
1932          */
1933         if (in_nmi())
1934                 percpu_buffer = trace_percpu_nmi_buffer;
1935         else if (in_irq())
1936                 percpu_buffer = trace_percpu_irq_buffer;
1937         else if (in_softirq())
1938                 percpu_buffer = trace_percpu_sirq_buffer;
1939         else
1940                 percpu_buffer = trace_percpu_buffer;
1941
1942         if (!percpu_buffer)
1943                 return NULL;
1944
1945         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1946 }
1947
1948 static int alloc_percpu_trace_buffer(void)
1949 {
1950         struct trace_buffer_struct *buffers;
1951         struct trace_buffer_struct *sirq_buffers;
1952         struct trace_buffer_struct *irq_buffers;
1953         struct trace_buffer_struct *nmi_buffers;
1954
1955         buffers = alloc_percpu(struct trace_buffer_struct);
1956         if (!buffers)
1957                 goto err_warn;
1958
1959         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1960         if (!sirq_buffers)
1961                 goto err_sirq;
1962
1963         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1964         if (!irq_buffers)
1965                 goto err_irq;
1966
1967         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1968         if (!nmi_buffers)
1969                 goto err_nmi;
1970
1971         trace_percpu_buffer = buffers;
1972         trace_percpu_sirq_buffer = sirq_buffers;
1973         trace_percpu_irq_buffer = irq_buffers;
1974         trace_percpu_nmi_buffer = nmi_buffers;
1975
1976         return 0;
1977
1978  err_nmi:
1979         free_percpu(irq_buffers);
1980  err_irq:
1981         free_percpu(sirq_buffers);
1982  err_sirq:
1983         free_percpu(buffers);
1984  err_warn:
1985         WARN(1, "Could not allocate percpu trace_printk buffer");
1986         return -ENOMEM;
1987 }
1988
1989 static int buffers_allocated;
1990
1991 void trace_printk_init_buffers(void)
1992 {
1993         if (buffers_allocated)
1994                 return;
1995
1996         if (alloc_percpu_trace_buffer())
1997                 return;
1998
1999         pr_info("ftrace: Allocated trace_printk buffers\n");
2000
2001         /* Expand the buffers to set size */
2002         tracing_update_buffers();
2003
2004         buffers_allocated = 1;
2005
2006         /*
2007          * trace_printk_init_buffers() can be called by modules.
2008          * If that happens, then we need to start cmdline recording
2009          * directly here. If the global_trace.buffer is already
2010          * allocated here, then this was called by module code.
2011          */
2012         if (global_trace.trace_buffer.buffer)
2013                 tracing_start_cmdline_record();
2014 }
2015
2016 void trace_printk_start_comm(void)
2017 {
2018         /* Start tracing comms if trace printk is set */
2019         if (!buffers_allocated)
2020                 return;
2021         tracing_start_cmdline_record();
2022 }
2023
2024 static void trace_printk_start_stop_comm(int enabled)
2025 {
2026         if (!buffers_allocated)
2027                 return;
2028
2029         if (enabled)
2030                 tracing_start_cmdline_record();
2031         else
2032                 tracing_stop_cmdline_record();
2033 }
2034
2035 /**
2036  * trace_vbprintk - write binary msg to tracing buffer
2037  *
2038  */
2039 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2040 {
2041         struct ftrace_event_call *call = &event_bprint;
2042         struct ring_buffer_event *event;
2043         struct ring_buffer *buffer;
2044         struct trace_array *tr = &global_trace;
2045         struct bprint_entry *entry;
2046         unsigned long flags;
2047         char *tbuffer;
2048         int len = 0, size, pc;
2049
2050         if (unlikely(tracing_selftest_running || tracing_disabled))
2051                 return 0;
2052
2053         /* Don't pollute graph traces with trace_vprintk internals */
2054         pause_graph_tracing();
2055
2056         pc = preempt_count();
2057         preempt_disable_notrace();
2058
2059         tbuffer = get_trace_buf();
2060         if (!tbuffer) {
2061                 len = 0;
2062                 goto out;
2063         }
2064
2065         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2066
2067         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2068                 goto out;
2069
2070         local_save_flags(flags);
2071         size = sizeof(*entry) + sizeof(u32) * len;
2072         buffer = tr->trace_buffer.buffer;
2073         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2074                                           flags, pc);
2075         if (!event)
2076                 goto out;
2077         entry = ring_buffer_event_data(event);
2078         entry->ip                       = ip;
2079         entry->fmt                      = fmt;
2080
2081         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2082         if (!call_filter_check_discard(call, entry, buffer, event)) {
2083                 __buffer_unlock_commit(buffer, event);
2084                 ftrace_trace_stack(buffer, flags, 6, pc);
2085         }
2086
2087 out:
2088         preempt_enable_notrace();
2089         unpause_graph_tracing();
2090
2091         return len;
2092 }
2093 EXPORT_SYMBOL_GPL(trace_vbprintk);
2094
2095 static int
2096 __trace_array_vprintk(struct ring_buffer *buffer,
2097                       unsigned long ip, const char *fmt, va_list args)
2098 {
2099         struct ftrace_event_call *call = &event_print;
2100         struct ring_buffer_event *event;
2101         int len = 0, size, pc;
2102         struct print_entry *entry;
2103         unsigned long flags;
2104         char *tbuffer;
2105
2106         if (tracing_disabled || tracing_selftest_running)
2107                 return 0;
2108
2109         /* Don't pollute graph traces with trace_vprintk internals */
2110         pause_graph_tracing();
2111
2112         pc = preempt_count();
2113         preempt_disable_notrace();
2114
2115
2116         tbuffer = get_trace_buf();
2117         if (!tbuffer) {
2118                 len = 0;
2119                 goto out;
2120         }
2121
2122         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2123         if (len > TRACE_BUF_SIZE)
2124                 goto out;
2125
2126         local_save_flags(flags);
2127         size = sizeof(*entry) + len + 1;
2128         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2129                                           flags, pc);
2130         if (!event)
2131                 goto out;
2132         entry = ring_buffer_event_data(event);
2133         entry->ip = ip;
2134
2135         memcpy(&entry->buf, tbuffer, len);
2136         entry->buf[len] = '\0';
2137         if (!call_filter_check_discard(call, entry, buffer, event)) {
2138                 __buffer_unlock_commit(buffer, event);
2139                 ftrace_trace_stack(buffer, flags, 6, pc);
2140         }
2141  out:
2142         preempt_enable_notrace();
2143         unpause_graph_tracing();
2144
2145         return len;
2146 }
2147
2148 int trace_array_vprintk(struct trace_array *tr,
2149                         unsigned long ip, const char *fmt, va_list args)
2150 {
2151         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2152 }
2153
2154 int trace_array_printk(struct trace_array *tr,
2155                        unsigned long ip, const char *fmt, ...)
2156 {
2157         int ret;
2158         va_list ap;
2159
2160         if (!(trace_flags & TRACE_ITER_PRINTK))
2161                 return 0;
2162
2163         va_start(ap, fmt);
2164         ret = trace_array_vprintk(tr, ip, fmt, ap);
2165         va_end(ap);
2166         return ret;
2167 }
2168
2169 int trace_array_printk_buf(struct ring_buffer *buffer,
2170                            unsigned long ip, const char *fmt, ...)
2171 {
2172         int ret;
2173         va_list ap;
2174
2175         if (!(trace_flags & TRACE_ITER_PRINTK))
2176                 return 0;
2177
2178         va_start(ap, fmt);
2179         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2180         va_end(ap);
2181         return ret;
2182 }
2183
2184 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2185 {
2186         return trace_array_vprintk(&global_trace, ip, fmt, args);
2187 }
2188 EXPORT_SYMBOL_GPL(trace_vprintk);
2189
2190 static void trace_iterator_increment(struct trace_iterator *iter)
2191 {
2192         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2193
2194         iter->idx++;
2195         if (buf_iter)
2196                 ring_buffer_read(buf_iter, NULL);
2197 }
2198
2199 static struct trace_entry *
2200 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2201                 unsigned long *lost_events)
2202 {
2203         struct ring_buffer_event *event;
2204         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2205
2206         if (buf_iter)
2207                 event = ring_buffer_iter_peek(buf_iter, ts);
2208         else
2209                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2210                                          lost_events);
2211
2212         if (event) {
2213                 iter->ent_size = ring_buffer_event_length(event);
2214                 return ring_buffer_event_data(event);
2215         }
2216         iter->ent_size = 0;
2217         return NULL;
2218 }
2219
2220 static struct trace_entry *
2221 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2222                   unsigned long *missing_events, u64 *ent_ts)
2223 {
2224         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2225         struct trace_entry *ent, *next = NULL;
2226         unsigned long lost_events = 0, next_lost = 0;
2227         int cpu_file = iter->cpu_file;
2228         u64 next_ts = 0, ts;
2229         int next_cpu = -1;
2230         int next_size = 0;
2231         int cpu;
2232
2233         /*
2234          * If we are in a per_cpu trace file, don't bother by iterating over
2235          * all cpu and peek directly.
2236          */
2237         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2238                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2239                         return NULL;
2240                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2241                 if (ent_cpu)
2242                         *ent_cpu = cpu_file;
2243
2244                 return ent;
2245         }
2246
2247         for_each_tracing_cpu(cpu) {
2248
2249                 if (ring_buffer_empty_cpu(buffer, cpu))
2250                         continue;
2251
2252                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2253
2254                 /*
2255                  * Pick the entry with the smallest timestamp:
2256                  */
2257                 if (ent && (!next || ts < next_ts)) {
2258                         next = ent;
2259                         next_cpu = cpu;
2260                         next_ts = ts;
2261                         next_lost = lost_events;
2262                         next_size = iter->ent_size;
2263                 }
2264         }
2265
2266         iter->ent_size = next_size;
2267
2268         if (ent_cpu)
2269                 *ent_cpu = next_cpu;
2270
2271         if (ent_ts)
2272                 *ent_ts = next_ts;
2273
2274         if (missing_events)
2275                 *missing_events = next_lost;
2276
2277         return next;
2278 }
2279
2280 /* Find the next real entry, without updating the iterator itself */
2281 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2282                                           int *ent_cpu, u64 *ent_ts)
2283 {
2284         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2285 }
2286
2287 /* Find the next real entry, and increment the iterator to the next entry */
2288 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2289 {
2290         iter->ent = __find_next_entry(iter, &iter->cpu,
2291                                       &iter->lost_events, &iter->ts);
2292
2293         if (iter->ent)
2294                 trace_iterator_increment(iter);
2295
2296         return iter->ent ? iter : NULL;
2297 }
2298
2299 static void trace_consume(struct trace_iterator *iter)
2300 {
2301         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2302                             &iter->lost_events);
2303 }
2304
2305 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2306 {
2307         struct trace_iterator *iter = m->private;
2308         int i = (int)*pos;
2309         void *ent;
2310
2311         WARN_ON_ONCE(iter->leftover);
2312
2313         (*pos)++;
2314
2315         /* can't go backwards */
2316         if (iter->idx > i)
2317                 return NULL;
2318
2319         if (iter->idx < 0)
2320                 ent = trace_find_next_entry_inc(iter);
2321         else
2322                 ent = iter;
2323
2324         while (ent && iter->idx < i)
2325                 ent = trace_find_next_entry_inc(iter);
2326
2327         iter->pos = *pos;
2328
2329         return ent;
2330 }
2331
2332 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2333 {
2334         struct ring_buffer_event *event;
2335         struct ring_buffer_iter *buf_iter;
2336         unsigned long entries = 0;
2337         u64 ts;
2338
2339         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2340
2341         buf_iter = trace_buffer_iter(iter, cpu);
2342         if (!buf_iter)
2343                 return;
2344
2345         ring_buffer_iter_reset(buf_iter);
2346
2347         /*
2348          * We could have the case with the max latency tracers
2349          * that a reset never took place on a cpu. This is evident
2350          * by the timestamp being before the start of the buffer.
2351          */
2352         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2353                 if (ts >= iter->trace_buffer->time_start)
2354                         break;
2355                 entries++;
2356                 ring_buffer_read(buf_iter, NULL);
2357         }
2358
2359         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2360 }
2361
2362 /*
2363  * The current tracer is copied to avoid a global locking
2364  * all around.
2365  */
2366 static void *s_start(struct seq_file *m, loff_t *pos)
2367 {
2368         struct trace_iterator *iter = m->private;
2369         struct trace_array *tr = iter->tr;
2370         int cpu_file = iter->cpu_file;
2371         void *p = NULL;
2372         loff_t l = 0;
2373         int cpu;
2374
2375         /*
2376          * copy the tracer to avoid using a global lock all around.
2377          * iter->trace is a copy of current_trace, the pointer to the
2378          * name may be used instead of a strcmp(), as iter->trace->name
2379          * will point to the same string as current_trace->name.
2380          */
2381         mutex_lock(&trace_types_lock);
2382         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2383                 *iter->trace = *tr->current_trace;
2384         mutex_unlock(&trace_types_lock);
2385
2386 #ifdef CONFIG_TRACER_MAX_TRACE
2387         if (iter->snapshot && iter->trace->use_max_tr)
2388                 return ERR_PTR(-EBUSY);
2389 #endif
2390
2391         if (!iter->snapshot)
2392                 atomic_inc(&trace_record_cmdline_disabled);
2393
2394         if (*pos != iter->pos) {
2395                 iter->ent = NULL;
2396                 iter->cpu = 0;
2397                 iter->idx = -1;
2398
2399                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2400                         for_each_tracing_cpu(cpu)
2401                                 tracing_iter_reset(iter, cpu);
2402                 } else
2403                         tracing_iter_reset(iter, cpu_file);
2404
2405                 iter->leftover = 0;
2406                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2407                         ;
2408
2409         } else {
2410                 /*
2411                  * If we overflowed the seq_file before, then we want
2412                  * to just reuse the trace_seq buffer again.
2413                  */
2414                 if (iter->leftover)
2415                         p = iter;
2416                 else {
2417                         l = *pos - 1;
2418                         p = s_next(m, p, &l);
2419                 }
2420         }
2421
2422         trace_event_read_lock();
2423         trace_access_lock(cpu_file);
2424         return p;
2425 }
2426
2427 static void s_stop(struct seq_file *m, void *p)
2428 {
2429         struct trace_iterator *iter = m->private;
2430
2431 #ifdef CONFIG_TRACER_MAX_TRACE
2432         if (iter->snapshot && iter->trace->use_max_tr)
2433                 return;
2434 #endif
2435
2436         if (!iter->snapshot)
2437                 atomic_dec(&trace_record_cmdline_disabled);
2438
2439         trace_access_unlock(iter->cpu_file);
2440         trace_event_read_unlock();
2441 }
2442
2443 static void
2444 get_total_entries(struct trace_buffer *buf,
2445                   unsigned long *total, unsigned long *entries)
2446 {
2447         unsigned long count;
2448         int cpu;
2449
2450         *total = 0;
2451         *entries = 0;
2452
2453         for_each_tracing_cpu(cpu) {
2454                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2455                 /*
2456                  * If this buffer has skipped entries, then we hold all
2457                  * entries for the trace and we need to ignore the
2458                  * ones before the time stamp.
2459                  */
2460                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2461                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2462                         /* total is the same as the entries */
2463                         *total += count;
2464                 } else
2465                         *total += count +
2466                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2467                 *entries += count;
2468         }
2469 }
2470
2471 static void print_lat_help_header(struct seq_file *m)
2472 {
2473         seq_puts(m, "#                  _------=> CPU#            \n");
2474         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2475         seq_puts(m, "#                | / _----=> need-resched    \n");
2476         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2477         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2478         seq_puts(m, "#                |||| /     delay             \n");
2479         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2480         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2481 }
2482
2483 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2484 {
2485         unsigned long total;
2486         unsigned long entries;
2487
2488         get_total_entries(buf, &total, &entries);
2489         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2490                    entries, total, num_online_cpus());
2491         seq_puts(m, "#\n");
2492 }
2493
2494 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2495 {
2496         print_event_info(buf, m);
2497         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2498         seq_puts(m, "#              | |       |          |         |\n");
2499 }
2500
2501 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2502 {
2503         print_event_info(buf, m);
2504         seq_puts(m, "#                              _-----=> irqs-off\n");
2505         seq_puts(m, "#                             / _----=> need-resched\n");
2506         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2507         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2508         seq_puts(m, "#                            ||| /     delay\n");
2509         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2510         seq_puts(m, "#              | |       |   ||||       |         |\n");
2511 }
2512
2513 void
2514 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2515 {
2516         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2517         struct trace_buffer *buf = iter->trace_buffer;
2518         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2519         struct tracer *type = iter->trace;
2520         unsigned long entries;
2521         unsigned long total;
2522         const char *name = "preemption";
2523
2524         name = type->name;
2525
2526         get_total_entries(buf, &total, &entries);
2527
2528         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2529                    name, UTS_RELEASE);
2530         seq_puts(m, "# -----------------------------------"
2531                  "---------------------------------\n");
2532         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2533                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2534                    nsecs_to_usecs(data->saved_latency),
2535                    entries,
2536                    total,
2537                    buf->cpu,
2538 #if defined(CONFIG_PREEMPT_NONE)
2539                    "server",
2540 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2541                    "desktop",
2542 #elif defined(CONFIG_PREEMPT)
2543                    "preempt",
2544 #else
2545                    "unknown",
2546 #endif
2547                    /* These are reserved for later use */
2548                    0, 0, 0, 0);
2549 #ifdef CONFIG_SMP
2550         seq_printf(m, " #P:%d)\n", num_online_cpus());
2551 #else
2552         seq_puts(m, ")\n");
2553 #endif
2554         seq_puts(m, "#    -----------------\n");
2555         seq_printf(m, "#    | task: %.16s-%d "
2556                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2557                    data->comm, data->pid,
2558                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2559                    data->policy, data->rt_priority);
2560         seq_puts(m, "#    -----------------\n");
2561
2562         if (data->critical_start) {
2563                 seq_puts(m, "#  => started at: ");
2564                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2565                 trace_print_seq(m, &iter->seq);
2566                 seq_puts(m, "\n#  => ended at:   ");
2567                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2568                 trace_print_seq(m, &iter->seq);
2569                 seq_puts(m, "\n#\n");
2570         }
2571
2572         seq_puts(m, "#\n");
2573 }
2574
2575 static void test_cpu_buff_start(struct trace_iterator *iter)
2576 {
2577         struct trace_seq *s = &iter->seq;
2578
2579         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2580                 return;
2581
2582         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2583                 return;
2584
2585         if (cpumask_test_cpu(iter->cpu, iter->started))
2586                 return;
2587
2588         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2589                 return;
2590
2591         cpumask_set_cpu(iter->cpu, iter->started);
2592
2593         /* Don't print started cpu buffer for the first entry of the trace */
2594         if (iter->idx > 1)
2595                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2596                                 iter->cpu);
2597 }
2598
2599 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2600 {
2601         struct trace_seq *s = &iter->seq;
2602         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2603         struct trace_entry *entry;
2604         struct trace_event *event;
2605
2606         entry = iter->ent;
2607
2608         test_cpu_buff_start(iter);
2609
2610         event = ftrace_find_event(entry->type);
2611
2612         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2613                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2614                         if (!trace_print_lat_context(iter))
2615                                 goto partial;
2616                 } else {
2617                         if (!trace_print_context(iter))
2618                                 goto partial;
2619                 }
2620         }
2621
2622         if (event)
2623                 return event->funcs->trace(iter, sym_flags, event);
2624
2625         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2626                 goto partial;
2627
2628         return TRACE_TYPE_HANDLED;
2629 partial:
2630         return TRACE_TYPE_PARTIAL_LINE;
2631 }
2632
2633 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2634 {
2635         struct trace_seq *s = &iter->seq;
2636         struct trace_entry *entry;
2637         struct trace_event *event;
2638
2639         entry = iter->ent;
2640
2641         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2642                 if (!trace_seq_printf(s, "%d %d %llu ",
2643                                       entry->pid, iter->cpu, iter->ts))
2644                         goto partial;
2645         }
2646
2647         event = ftrace_find_event(entry->type);
2648         if (event)
2649                 return event->funcs->raw(iter, 0, event);
2650
2651         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2652                 goto partial;
2653
2654         return TRACE_TYPE_HANDLED;
2655 partial:
2656         return TRACE_TYPE_PARTIAL_LINE;
2657 }
2658
2659 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2660 {
2661         struct trace_seq *s = &iter->seq;
2662         unsigned char newline = '\n';
2663         struct trace_entry *entry;
2664         struct trace_event *event;
2665
2666         entry = iter->ent;
2667
2668         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2669                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2670                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2671                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2672         }
2673
2674         event = ftrace_find_event(entry->type);
2675         if (event) {
2676                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2677                 if (ret != TRACE_TYPE_HANDLED)
2678                         return ret;
2679         }
2680
2681         SEQ_PUT_FIELD_RET(s, newline);
2682
2683         return TRACE_TYPE_HANDLED;
2684 }
2685
2686 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2687 {
2688         struct trace_seq *s = &iter->seq;
2689         struct trace_entry *entry;
2690         struct trace_event *event;
2691
2692         entry = iter->ent;
2693
2694         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2695                 SEQ_PUT_FIELD_RET(s, entry->pid);
2696                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2697                 SEQ_PUT_FIELD_RET(s, iter->ts);
2698         }
2699
2700         event = ftrace_find_event(entry->type);
2701         return event ? event->funcs->binary(iter, 0, event) :
2702                 TRACE_TYPE_HANDLED;
2703 }
2704
2705 int trace_empty(struct trace_iterator *iter)
2706 {
2707         struct ring_buffer_iter *buf_iter;
2708         int cpu;
2709
2710         /* If we are looking at one CPU buffer, only check that one */
2711         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2712                 cpu = iter->cpu_file;
2713                 buf_iter = trace_buffer_iter(iter, cpu);
2714                 if (buf_iter) {
2715                         if (!ring_buffer_iter_empty(buf_iter))
2716                                 return 0;
2717                 } else {
2718                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2719                                 return 0;
2720                 }
2721                 return 1;
2722         }
2723
2724         for_each_tracing_cpu(cpu) {
2725                 buf_iter = trace_buffer_iter(iter, cpu);
2726                 if (buf_iter) {
2727                         if (!ring_buffer_iter_empty(buf_iter))
2728                                 return 0;
2729                 } else {
2730                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2731                                 return 0;
2732                 }
2733         }
2734
2735         return 1;
2736 }
2737
2738 /*  Called with trace_event_read_lock() held. */
2739 enum print_line_t print_trace_line(struct trace_iterator *iter)
2740 {
2741         enum print_line_t ret;
2742
2743         if (iter->lost_events &&
2744             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2745                                  iter->cpu, iter->lost_events))
2746                 return TRACE_TYPE_PARTIAL_LINE;
2747
2748         if (iter->trace && iter->trace->print_line) {
2749                 ret = iter->trace->print_line(iter);
2750                 if (ret != TRACE_TYPE_UNHANDLED)
2751                         return ret;
2752         }
2753
2754         if (iter->ent->type == TRACE_BPUTS &&
2755                         trace_flags & TRACE_ITER_PRINTK &&
2756                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2757                 return trace_print_bputs_msg_only(iter);
2758
2759         if (iter->ent->type == TRACE_BPRINT &&
2760                         trace_flags & TRACE_ITER_PRINTK &&
2761                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2762                 return trace_print_bprintk_msg_only(iter);
2763
2764         if (iter->ent->type == TRACE_PRINT &&
2765                         trace_flags & TRACE_ITER_PRINTK &&
2766                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2767                 return trace_print_printk_msg_only(iter);
2768
2769         if (trace_flags & TRACE_ITER_BIN)
2770                 return print_bin_fmt(iter);
2771
2772         if (trace_flags & TRACE_ITER_HEX)
2773                 return print_hex_fmt(iter);
2774
2775         if (trace_flags & TRACE_ITER_RAW)
2776                 return print_raw_fmt(iter);
2777
2778         return print_trace_fmt(iter);
2779 }
2780
2781 void trace_latency_header(struct seq_file *m)
2782 {
2783         struct trace_iterator *iter = m->private;
2784
2785         /* print nothing if the buffers are empty */
2786         if (trace_empty(iter))
2787                 return;
2788
2789         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2790                 print_trace_header(m, iter);
2791
2792         if (!(trace_flags & TRACE_ITER_VERBOSE))
2793                 print_lat_help_header(m);
2794 }
2795
2796 void trace_default_header(struct seq_file *m)
2797 {
2798         struct trace_iterator *iter = m->private;
2799
2800         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2801                 return;
2802
2803         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2804                 /* print nothing if the buffers are empty */
2805                 if (trace_empty(iter))
2806                         return;
2807                 print_trace_header(m, iter);
2808                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2809                         print_lat_help_header(m);
2810         } else {
2811                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2812                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2813                                 print_func_help_header_irq(iter->trace_buffer, m);
2814                         else
2815                                 print_func_help_header(iter->trace_buffer, m);
2816                 }
2817         }
2818 }
2819
2820 static void test_ftrace_alive(struct seq_file *m)
2821 {
2822         if (!ftrace_is_dead())
2823                 return;
2824         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2825         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2826 }
2827
2828 #ifdef CONFIG_TRACER_MAX_TRACE
2829 static void show_snapshot_main_help(struct seq_file *m)
2830 {
2831         seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2832         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2833         seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2834         seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
2835         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2836         seq_printf(m, "#                       is not a '0' or '1')\n");
2837 }
2838
2839 static void show_snapshot_percpu_help(struct seq_file *m)
2840 {
2841         seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2842 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2843         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2844         seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2845 #else
2846         seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2847         seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2848 #endif
2849         seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2850         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2851         seq_printf(m, "#                       is not a '0' or '1')\n");
2852 }
2853
2854 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2855 {
2856         if (iter->tr->allocated_snapshot)
2857                 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2858         else
2859                 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2860
2861         seq_printf(m, "# Snapshot commands:\n");
2862         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2863                 show_snapshot_main_help(m);
2864         else
2865                 show_snapshot_percpu_help(m);
2866 }
2867 #else
2868 /* Should never be called */
2869 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2870 #endif
2871
2872 static int s_show(struct seq_file *m, void *v)
2873 {
2874         struct trace_iterator *iter = v;
2875         int ret;
2876
2877         if (iter->ent == NULL) {
2878                 if (iter->tr) {
2879                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2880                         seq_puts(m, "#\n");
2881                         test_ftrace_alive(m);
2882                 }
2883                 if (iter->snapshot && trace_empty(iter))
2884                         print_snapshot_help(m, iter);
2885                 else if (iter->trace && iter->trace->print_header)
2886                         iter->trace->print_header(m);
2887                 else
2888                         trace_default_header(m);
2889
2890         } else if (iter->leftover) {
2891                 /*
2892                  * If we filled the seq_file buffer earlier, we
2893                  * want to just show it now.
2894                  */
2895                 ret = trace_print_seq(m, &iter->seq);
2896
2897                 /* ret should this time be zero, but you never know */
2898                 iter->leftover = ret;
2899
2900         } else {
2901                 print_trace_line(iter);
2902                 ret = trace_print_seq(m, &iter->seq);
2903                 /*
2904                  * If we overflow the seq_file buffer, then it will
2905                  * ask us for this data again at start up.
2906                  * Use that instead.
2907                  *  ret is 0 if seq_file write succeeded.
2908                  *        -1 otherwise.
2909                  */
2910                 iter->leftover = ret;
2911         }
2912
2913         return 0;
2914 }
2915
2916 /*
2917  * Should be used after trace_array_get(), trace_types_lock
2918  * ensures that i_cdev was already initialized.
2919  */
2920 static inline int tracing_get_cpu(struct inode *inode)
2921 {
2922         if (inode->i_cdev) /* See trace_create_cpu_file() */
2923                 return (long)inode->i_cdev - 1;
2924         return RING_BUFFER_ALL_CPUS;
2925 }
2926
2927 static const struct seq_operations tracer_seq_ops = {
2928         .start          = s_start,
2929         .next           = s_next,
2930         .stop           = s_stop,
2931         .show           = s_show,
2932 };
2933
2934 static struct trace_iterator *
2935 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2936 {
2937         struct trace_array *tr = inode->i_private;
2938         struct trace_iterator *iter;
2939         int cpu;
2940
2941         if (tracing_disabled)
2942                 return ERR_PTR(-ENODEV);
2943
2944         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2945         if (!iter)
2946                 return ERR_PTR(-ENOMEM);
2947
2948         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2949                                     GFP_KERNEL);
2950         if (!iter->buffer_iter)
2951                 goto release;
2952
2953         /*
2954          * We make a copy of the current tracer to avoid concurrent
2955          * changes on it while we are reading.
2956          */
2957         mutex_lock(&trace_types_lock);
2958         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2959         if (!iter->trace)
2960                 goto fail;
2961
2962         *iter->trace = *tr->current_trace;
2963
2964         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2965                 goto fail;
2966
2967         iter->tr = tr;
2968
2969 #ifdef CONFIG_TRACER_MAX_TRACE
2970         /* Currently only the top directory has a snapshot */
2971         if (tr->current_trace->print_max || snapshot)
2972                 iter->trace_buffer = &tr->max_buffer;
2973         else
2974 #endif
2975                 iter->trace_buffer = &tr->trace_buffer;
2976         iter->snapshot = snapshot;
2977         iter->pos = -1;
2978         iter->cpu_file = tracing_get_cpu(inode);
2979         mutex_init(&iter->mutex);
2980
2981         /* Notify the tracer early; before we stop tracing. */
2982         if (iter->trace && iter->trace->open)
2983                 iter->trace->open(iter);
2984
2985         /* Annotate start of buffers if we had overruns */
2986         if (ring_buffer_overruns(iter->trace_buffer->buffer))
2987                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2988
2989         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
2990         if (trace_clocks[tr->clock_id].in_ns)
2991                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2992
2993         /* stop the trace while dumping if we are not opening "snapshot" */
2994         if (!iter->snapshot)
2995                 tracing_stop_tr(tr);
2996
2997         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
2998                 for_each_tracing_cpu(cpu) {
2999                         iter->buffer_iter[cpu] =
3000                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3001                 }
3002                 ring_buffer_read_prepare_sync();
3003                 for_each_tracing_cpu(cpu) {
3004                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3005                         tracing_iter_reset(iter, cpu);
3006                 }
3007         } else {
3008                 cpu = iter->cpu_file;
3009                 iter->buffer_iter[cpu] =
3010                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3011                 ring_buffer_read_prepare_sync();
3012                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3013                 tracing_iter_reset(iter, cpu);
3014         }
3015
3016         mutex_unlock(&trace_types_lock);
3017
3018         return iter;
3019
3020  fail:
3021         mutex_unlock(&trace_types_lock);
3022         kfree(iter->trace);
3023         kfree(iter->buffer_iter);
3024 release:
3025         seq_release_private(inode, file);
3026         return ERR_PTR(-ENOMEM);
3027 }
3028
3029 int tracing_open_generic(struct inode *inode, struct file *filp)
3030 {
3031         if (tracing_disabled)
3032                 return -ENODEV;
3033
3034         filp->private_data = inode->i_private;
3035         return 0;
3036 }
3037
3038 bool tracing_is_disabled(void)
3039 {
3040         return (tracing_disabled) ? true: false;
3041 }
3042
3043 /*
3044  * Open and update trace_array ref count.
3045  * Must have the current trace_array passed to it.
3046  */
3047 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3048 {
3049         struct trace_array *tr = inode->i_private;
3050
3051         if (tracing_disabled)
3052                 return -ENODEV;
3053
3054         if (trace_array_get(tr) < 0)
3055                 return -ENODEV;
3056
3057         filp->private_data = inode->i_private;
3058
3059         return 0;
3060 }
3061
3062 static int tracing_release(struct inode *inode, struct file *file)
3063 {
3064         struct trace_array *tr = inode->i_private;
3065         struct seq_file *m = file->private_data;
3066         struct trace_iterator *iter;
3067         int cpu;
3068
3069         if (!(file->f_mode & FMODE_READ)) {
3070                 trace_array_put(tr);
3071                 return 0;
3072         }
3073
3074         /* Writes do not use seq_file */
3075         iter = m->private;
3076         mutex_lock(&trace_types_lock);
3077
3078         for_each_tracing_cpu(cpu) {
3079                 if (iter->buffer_iter[cpu])
3080                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3081         }
3082
3083         if (iter->trace && iter->trace->close)
3084                 iter->trace->close(iter);
3085
3086         if (!iter->snapshot)
3087                 /* reenable tracing if it was previously enabled */
3088                 tracing_start_tr(tr);
3089
3090         __trace_array_put(tr);
3091
3092         mutex_unlock(&trace_types_lock);
3093
3094         mutex_destroy(&iter->mutex);
3095         free_cpumask_var(iter->started);
3096         kfree(iter->trace);
3097         kfree(iter->buffer_iter);
3098         seq_release_private(inode, file);
3099
3100         return 0;
3101 }
3102
3103 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3104 {
3105         struct trace_array *tr = inode->i_private;
3106
3107         trace_array_put(tr);
3108         return 0;
3109 }
3110
3111 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3112 {
3113         struct trace_array *tr = inode->i_private;
3114
3115         trace_array_put(tr);
3116
3117         return single_release(inode, file);
3118 }
3119
3120 static int tracing_open(struct inode *inode, struct file *file)
3121 {
3122         struct trace_array *tr = inode->i_private;
3123         struct trace_iterator *iter;
3124         int ret = 0;
3125
3126         if (trace_array_get(tr) < 0)
3127                 return -ENODEV;
3128
3129         /* If this file was open for write, then erase contents */
3130         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3131                 int cpu = tracing_get_cpu(inode);
3132
3133                 if (cpu == RING_BUFFER_ALL_CPUS)
3134                         tracing_reset_online_cpus(&tr->trace_buffer);
3135                 else
3136                         tracing_reset(&tr->trace_buffer, cpu);
3137         }
3138
3139         if (file->f_mode & FMODE_READ) {
3140                 iter = __tracing_open(inode, file, false);
3141                 if (IS_ERR(iter))
3142                         ret = PTR_ERR(iter);
3143                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3144                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3145         }
3146
3147         if (ret < 0)
3148                 trace_array_put(tr);
3149
3150         return ret;
3151 }
3152
3153 static void *
3154 t_next(struct seq_file *m, void *v, loff_t *pos)
3155 {
3156         struct tracer *t = v;
3157
3158         (*pos)++;
3159
3160         if (t)
3161                 t = t->next;
3162
3163         return t;
3164 }
3165
3166 static void *t_start(struct seq_file *m, loff_t *pos)
3167 {
3168         struct tracer *t;
3169         loff_t l = 0;
3170
3171         mutex_lock(&trace_types_lock);
3172         for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
3173                 ;
3174
3175         return t;
3176 }
3177
3178 static void t_stop(struct seq_file *m, void *p)
3179 {
3180         mutex_unlock(&trace_types_lock);
3181 }
3182
3183 static int t_show(struct seq_file *m, void *v)
3184 {
3185         struct tracer *t = v;
3186
3187         if (!t)
3188                 return 0;
3189
3190         seq_printf(m, "%s", t->name);
3191         if (t->next)
3192                 seq_putc(m, ' ');
3193         else
3194                 seq_putc(m, '\n');
3195
3196         return 0;
3197 }
3198
3199 static const struct seq_operations show_traces_seq_ops = {
3200         .start          = t_start,
3201         .next           = t_next,
3202         .stop           = t_stop,
3203         .show           = t_show,
3204 };
3205
3206 static int show_traces_open(struct inode *inode, struct file *file)
3207 {
3208         if (tracing_disabled)
3209                 return -ENODEV;
3210
3211         return seq_open(file, &show_traces_seq_ops);
3212 }
3213
3214 static ssize_t
3215 tracing_write_stub(struct file *filp, const char __user *ubuf,
3216                    size_t count, loff_t *ppos)
3217 {
3218         return count;
3219 }
3220
3221 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3222 {
3223         int ret;
3224
3225         if (file->f_mode & FMODE_READ)
3226                 ret = seq_lseek(file, offset, whence);
3227         else
3228                 file->f_pos = ret = 0;
3229
3230         return ret;
3231 }
3232
3233 static const struct file_operations tracing_fops = {
3234         .open           = tracing_open,
3235         .read           = seq_read,
3236         .write          = tracing_write_stub,
3237         .llseek         = tracing_lseek,
3238         .release        = tracing_release,
3239 };
3240
3241 static const struct file_operations show_traces_fops = {
3242         .open           = show_traces_open,
3243         .read           = seq_read,
3244         .release        = seq_release,
3245         .llseek         = seq_lseek,
3246 };
3247
3248 /*
3249  * The tracer itself will not take this lock, but still we want
3250  * to provide a consistent cpumask to user-space:
3251  */
3252 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3253
3254 /*
3255  * Temporary storage for the character representation of the
3256  * CPU bitmask (and one more byte for the newline):
3257  */
3258 static char mask_str[NR_CPUS + 1];
3259
3260 static ssize_t
3261 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3262                      size_t count, loff_t *ppos)
3263 {
3264         struct trace_array *tr = file_inode(filp)->i_private;
3265         int len;
3266
3267         mutex_lock(&tracing_cpumask_update_lock);
3268
3269         len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3270         if (count - len < 2) {
3271                 count = -EINVAL;
3272                 goto out_err;
3273         }
3274         len += sprintf(mask_str + len, "\n");
3275         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3276
3277 out_err:
3278         mutex_unlock(&tracing_cpumask_update_lock);
3279
3280         return count;
3281 }
3282
3283 static ssize_t
3284 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3285                       size_t count, loff_t *ppos)
3286 {
3287         struct trace_array *tr = file_inode(filp)->i_private;
3288         cpumask_var_t tracing_cpumask_new;
3289         int err, cpu;
3290
3291         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3292                 return -ENOMEM;
3293
3294         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3295         if (err)
3296                 goto err_unlock;
3297
3298         mutex_lock(&tracing_cpumask_update_lock);
3299
3300         local_irq_disable();
3301         arch_spin_lock(&ftrace_max_lock);
3302         for_each_tracing_cpu(cpu) {
3303                 /*
3304                  * Increase/decrease the disabled counter if we are
3305                  * about to flip a bit in the cpumask:
3306                  */
3307                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3308                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3309                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3310                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3311                 }
3312                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3313                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3314                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3315                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3316                 }
3317         }
3318         arch_spin_unlock(&ftrace_max_lock);
3319         local_irq_enable();
3320
3321         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3322
3323         mutex_unlock(&tracing_cpumask_update_lock);
3324         free_cpumask_var(tracing_cpumask_new);
3325
3326         return count;
3327
3328 err_unlock:
3329         free_cpumask_var(tracing_cpumask_new);
3330
3331         return err;
3332 }
3333
3334 static const struct file_operations tracing_cpumask_fops = {
3335         .open           = tracing_open_generic_tr,
3336         .read           = tracing_cpumask_read,
3337         .write          = tracing_cpumask_write,
3338         .release        = tracing_release_generic_tr,
3339         .llseek         = generic_file_llseek,
3340 };
3341
3342 static int tracing_trace_options_show(struct seq_file *m, void *v)
3343 {
3344         struct tracer_opt *trace_opts;
3345         struct trace_array *tr = m->private;
3346         u32 tracer_flags;
3347         int i;
3348
3349         mutex_lock(&trace_types_lock);
3350         tracer_flags = tr->current_trace->flags->val;
3351         trace_opts = tr->current_trace->flags->opts;
3352
3353         for (i = 0; trace_options[i]; i++) {
3354                 if (trace_flags & (1 << i))
3355                         seq_printf(m, "%s\n", trace_options[i]);
3356                 else
3357                         seq_printf(m, "no%s\n", trace_options[i]);
3358         }
3359
3360         for (i = 0; trace_opts[i].name; i++) {
3361                 if (tracer_flags & trace_opts[i].bit)
3362                         seq_printf(m, "%s\n", trace_opts[i].name);
3363                 else
3364                         seq_printf(m, "no%s\n", trace_opts[i].name);
3365         }
3366         mutex_unlock(&trace_types_lock);
3367
3368         return 0;
3369 }
3370
3371 static int __set_tracer_option(struct tracer *trace,
3372                                struct tracer_flags *tracer_flags,
3373                                struct tracer_opt *opts, int neg)
3374 {
3375         int ret;
3376
3377         ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
3378         if (ret)
3379                 return ret;
3380
3381         if (neg)
3382                 tracer_flags->val &= ~opts->bit;
3383         else
3384                 tracer_flags->val |= opts->bit;
3385         return 0;
3386 }
3387
3388 /* Try to assign a tracer specific option */
3389 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
3390 {
3391         struct tracer_flags *tracer_flags = trace->flags;
3392         struct tracer_opt *opts = NULL;
3393         int i;
3394
3395         for (i = 0; tracer_flags->opts[i].name; i++) {
3396                 opts = &tracer_flags->opts[i];
3397
3398                 if (strcmp(cmp, opts->name) == 0)
3399                         return __set_tracer_option(trace, trace->flags,
3400                                                    opts, neg);
3401         }
3402
3403         return -EINVAL;
3404 }
3405
3406 /* Some tracers require overwrite to stay enabled */
3407 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3408 {
3409         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3410                 return -1;
3411
3412         return 0;
3413 }
3414
3415 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3416 {
3417         /* do nothing if flag is already set */
3418         if (!!(trace_flags & mask) == !!enabled)
3419                 return 0;
3420
3421         /* Give the tracer a chance to approve the change */
3422         if (tr->current_trace->flag_changed)
3423                 if (tr->current_trace->flag_changed(tr->current_trace, mask, !!enabled))
3424                         return -EINVAL;
3425
3426         if (enabled)
3427                 trace_flags |= mask;
3428         else
3429                 trace_flags &= ~mask;
3430
3431         if (mask == TRACE_ITER_RECORD_CMD)
3432                 trace_event_enable_cmd_record(enabled);
3433
3434         if (mask == TRACE_ITER_OVERWRITE) {
3435                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3436 #ifdef CONFIG_TRACER_MAX_TRACE
3437                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3438 #endif
3439         }
3440
3441         if (mask == TRACE_ITER_PRINTK)
3442                 trace_printk_start_stop_comm(enabled);
3443
3444         return 0;
3445 }
3446
3447 static int trace_set_options(struct trace_array *tr, char *option)
3448 {
3449         char *cmp;
3450         int neg = 0;
3451         int ret = -ENODEV;
3452         int i;
3453
3454         cmp = strstrip(option);
3455
3456         if (strncmp(cmp, "no", 2) == 0) {
3457                 neg = 1;
3458                 cmp += 2;
3459         }
3460
3461         mutex_lock(&trace_types_lock);
3462
3463         for (i = 0; trace_options[i]; i++) {
3464                 if (strcmp(cmp, trace_options[i]) == 0) {
3465                         ret = set_tracer_flag(tr, 1 << i, !neg);
3466                         break;
3467                 }
3468         }
3469
3470         /* If no option could be set, test the specific tracer options */
3471         if (!trace_options[i])
3472                 ret = set_tracer_option(tr->current_trace, cmp, neg);
3473
3474         mutex_unlock(&trace_types_lock);
3475
3476         return ret;
3477 }
3478
3479 static ssize_t
3480 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3481                         size_t cnt, loff_t *ppos)
3482 {
3483         struct seq_file *m = filp->private_data;
3484         struct trace_array *tr = m->private;
3485         char buf[64];
3486         int ret;
3487
3488         if (cnt >= sizeof(buf))
3489                 return -EINVAL;
3490
3491         if (copy_from_user(&buf, ubuf, cnt))
3492                 return -EFAULT;
3493
3494         buf[cnt] = 0;
3495
3496         ret = trace_set_options(tr, buf);
3497         if (ret < 0)
3498                 return ret;
3499
3500         *ppos += cnt;
3501
3502         return cnt;
3503 }
3504
3505 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3506 {
3507         struct trace_array *tr = inode->i_private;
3508         int ret;
3509
3510         if (tracing_disabled)
3511                 return -ENODEV;
3512
3513         if (trace_array_get(tr) < 0)
3514                 return -ENODEV;
3515
3516         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3517         if (ret < 0)
3518                 trace_array_put(tr);
3519
3520         return ret;
3521 }
3522
3523 static const struct file_operations tracing_iter_fops = {
3524         .open           = tracing_trace_options_open,
3525         .read           = seq_read,
3526         .llseek         = seq_lseek,
3527         .release        = tracing_single_release_tr,
3528         .write          = tracing_trace_options_write,
3529 };
3530
3531 static const char readme_msg[] =
3532         "tracing mini-HOWTO:\n\n"
3533         "# echo 0 > tracing_on : quick way to disable tracing\n"
3534         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3535         " Important files:\n"
3536         "  trace\t\t\t- The static contents of the buffer\n"
3537         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3538         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3539         "  current_tracer\t- function and latency tracers\n"
3540         "  available_tracers\t- list of configured tracers for current_tracer\n"
3541         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3542         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3543         "  trace_clock\t\t-change the clock used to order events\n"
3544         "       local:   Per cpu clock but may not be synced across CPUs\n"
3545         "      global:   Synced across CPUs but slows tracing down.\n"
3546         "     counter:   Not a clock, but just an increment\n"
3547         "      uptime:   Jiffy counter from time of boot\n"
3548         "        perf:   Same clock that perf events use\n"
3549 #ifdef CONFIG_X86_64
3550         "     x86-tsc:   TSC cycle counter\n"
3551 #endif
3552         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3553         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3554         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3555         "\t\t\t  Remove sub-buffer with rmdir\n"
3556         "  trace_options\t\t- Set format or modify how tracing happens\n"
3557         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3558         "\t\t\t  option name\n"
3559 #ifdef CONFIG_DYNAMIC_FTRACE
3560         "\n  available_filter_functions - list of functions that can be filtered on\n"
3561         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3562         "\t\t\t  functions\n"
3563         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3564         "\t     modules: Can select a group via module\n"
3565         "\t      Format: :mod:<module-name>\n"
3566         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3567         "\t    triggers: a command to perform when function is hit\n"
3568         "\t      Format: <function>:<trigger>[:count]\n"
3569         "\t     trigger: traceon, traceoff\n"
3570         "\t\t      enable_event:<system>:<event>\n"
3571         "\t\t      disable_event:<system>:<event>\n"
3572 #ifdef CONFIG_STACKTRACE
3573         "\t\t      stacktrace\n"
3574 #endif
3575 #ifdef CONFIG_TRACER_SNAPSHOT
3576         "\t\t      snapshot\n"
3577 #endif
3578         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3579         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3580         "\t     The first one will disable tracing every time do_fault is hit\n"
3581         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3582         "\t       The first time do trap is hit and it disables tracing, the\n"
3583         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3584         "\t       the counter will not decrement. It only decrements when the\n"
3585         "\t       trigger did work\n"
3586         "\t     To remove trigger without count:\n"
3587         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3588         "\t     To remove trigger with a count:\n"
3589         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3590         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3591         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3592         "\t    modules: Can select a group via module command :mod:\n"
3593         "\t    Does not accept triggers\n"
3594 #endif /* CONFIG_DYNAMIC_FTRACE */
3595 #ifdef CONFIG_FUNCTION_TRACER
3596         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3597         "\t\t    (function)\n"
3598 #endif
3599 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3600         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3601         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3602 #endif
3603 #ifdef CONFIG_TRACER_SNAPSHOT
3604         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3605         "\t\t\t  snapshot buffer. Read the contents for more\n"
3606         "\t\t\t  information\n"
3607 #endif
3608 #ifdef CONFIG_STACK_TRACER
3609         "  stack_trace\t\t- Shows the max stack trace when active\n"
3610         "  stack_max_size\t- Shows current max stack size that was traced\n"
3611         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3612         "\t\t\t  new trace)\n"
3613 #ifdef CONFIG_DYNAMIC_FTRACE
3614         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3615         "\t\t\t  traces\n"
3616 #endif
3617 #endif /* CONFIG_STACK_TRACER */
3618         "  events/\t\t- Directory containing all trace event subsystems:\n"
3619         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3620         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3621         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3622         "\t\t\t  events\n"
3623         "      filter\t\t- If set, only events passing filter are traced\n"
3624         "  events/<system>/<event>/\t- Directory containing control files for\n"
3625         "\t\t\t  <event>:\n"
3626         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3627         "      filter\t\t- If set, only events passing filter are traced\n"
3628         "      trigger\t\t- If set, a command to perform when event is hit\n"
3629         "\t    Format: <trigger>[:count][if <filter>]\n"
3630         "\t   trigger: traceon, traceoff\n"
3631         "\t            enable_event:<system>:<event>\n"
3632         "\t            disable_event:<system>:<event>\n"
3633 #ifdef CONFIG_STACKTRACE
3634         "\t\t    stacktrace\n"
3635 #endif
3636 #ifdef CONFIG_TRACER_SNAPSHOT
3637         "\t\t    snapshot\n"
3638 #endif
3639         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3640         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3641         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3642         "\t                  events/block/block_unplug/trigger\n"
3643         "\t   The first disables tracing every time block_unplug is hit.\n"
3644         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3645         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3646         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3647         "\t   Like function triggers, the counter is only decremented if it\n"
3648         "\t    enabled or disabled tracing.\n"
3649         "\t   To remove a trigger without a count:\n"
3650         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3651         "\t   To remove a trigger with a count:\n"
3652         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3653         "\t   Filters can be ignored when removing a trigger.\n"
3654 ;
3655
3656 static ssize_t
3657 tracing_readme_read(struct file *filp, char __user *ubuf,
3658                        size_t cnt, loff_t *ppos)
3659 {
3660         return simple_read_from_buffer(ubuf, cnt, ppos,
3661                                         readme_msg, strlen(readme_msg));
3662 }
3663
3664 static const struct file_operations tracing_readme_fops = {
3665         .open           = tracing_open_generic,
3666         .read           = tracing_readme_read,
3667         .llseek         = generic_file_llseek,
3668 };
3669
3670 static ssize_t
3671 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
3672                                 size_t cnt, loff_t *ppos)
3673 {
3674         char *buf_comm;
3675         char *file_buf;
3676         char *buf;
3677         int len = 0;
3678         int pid;
3679         int i;
3680
3681         file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
3682         if (!file_buf)
3683                 return -ENOMEM;
3684
3685         buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
3686         if (!buf_comm) {
3687                 kfree(file_buf);
3688                 return -ENOMEM;
3689         }
3690
3691         buf = file_buf;
3692
3693         for (i = 0; i < SAVED_CMDLINES; i++) {
3694                 int r;
3695
3696                 pid = map_cmdline_to_pid[i];
3697                 if (pid == -1 || pid == NO_CMDLINE_MAP)
3698                         continue;
3699
3700                 trace_find_cmdline(pid, buf_comm);
3701                 r = sprintf(buf, "%d %s\n", pid, buf_comm);
3702                 buf += r;
3703                 len += r;
3704         }
3705
3706         len = simple_read_from_buffer(ubuf, cnt, ppos,
3707                                       file_buf, len);
3708
3709         kfree(file_buf);
3710         kfree(buf_comm);
3711
3712         return len;
3713 }
3714
3715 static const struct file_operations tracing_saved_cmdlines_fops = {
3716     .open       = tracing_open_generic,
3717     .read       = tracing_saved_cmdlines_read,
3718     .llseek     = generic_file_llseek,
3719 };
3720
3721 static ssize_t
3722 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3723                        size_t cnt, loff_t *ppos)
3724 {
3725         struct trace_array *tr = filp->private_data;
3726         char buf[MAX_TRACER_SIZE+2];
3727         int r;
3728
3729         mutex_lock(&trace_types_lock);
3730         r = sprintf(buf, "%s\n", tr->current_trace->name);
3731         mutex_unlock(&trace_types_lock);
3732
3733         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3734 }
3735
3736 int tracer_init(struct tracer *t, struct trace_array *tr)
3737 {
3738         tracing_reset_online_cpus(&tr->trace_buffer);
3739         return t->init(tr);
3740 }
3741
3742 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3743 {
3744         int cpu;
3745
3746         for_each_tracing_cpu(cpu)
3747                 per_cpu_ptr(buf->data, cpu)->entries = val;
3748 }
3749
3750 #ifdef CONFIG_TRACER_MAX_TRACE
3751 /* resize @tr's buffer to the size of @size_tr's entries */
3752 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3753                                         struct trace_buffer *size_buf, int cpu_id)
3754 {
3755         int cpu, ret = 0;
3756
3757         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3758                 for_each_tracing_cpu(cpu) {
3759                         ret = ring_buffer_resize(trace_buf->buffer,
3760                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3761                         if (ret < 0)
3762                                 break;
3763                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3764                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3765                 }
3766         } else {
3767                 ret = ring_buffer_resize(trace_buf->buffer,
3768                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3769                 if (ret == 0)
3770                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3771                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3772         }
3773
3774         return ret;
3775 }
3776 #endif /* CONFIG_TRACER_MAX_TRACE */
3777
3778 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3779                                         unsigned long size, int cpu)
3780 {
3781         int ret;
3782
3783         /*
3784          * If kernel or user changes the size of the ring buffer
3785          * we use the size that was given, and we can forget about
3786          * expanding it later.
3787          */
3788         ring_buffer_expanded = true;
3789
3790         /* May be called before buffers are initialized */
3791         if (!tr->trace_buffer.buffer)
3792                 return 0;
3793
3794         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3795         if (ret < 0)
3796                 return ret;
3797
3798 #ifdef CONFIG_TRACER_MAX_TRACE
3799         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3800             !tr->current_trace->use_max_tr)
3801                 goto out;
3802
3803         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3804         if (ret < 0) {
3805                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3806                                                      &tr->trace_buffer, cpu);
3807                 if (r < 0) {
3808                         /*
3809                          * AARGH! We are left with different
3810                          * size max buffer!!!!
3811                          * The max buffer is our "snapshot" buffer.
3812                          * When a tracer needs a snapshot (one of the
3813                          * latency tracers), it swaps the max buffer
3814                          * with the saved snap shot. We succeeded to
3815                          * update the size of the main buffer, but failed to
3816                          * update the size of the max buffer. But when we tried
3817                          * to reset the main buffer to the original size, we
3818                          * failed there too. This is very unlikely to
3819                          * happen, but if it does, warn and kill all
3820                          * tracing.
3821                          */
3822                         WARN_ON(1);
3823                         tracing_disabled = 1;
3824                 }
3825                 return ret;
3826         }
3827
3828         if (cpu == RING_BUFFER_ALL_CPUS)
3829                 set_buffer_entries(&tr->max_buffer, size);
3830         else
3831                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
3832
3833  out:
3834 #endif /* CONFIG_TRACER_MAX_TRACE */
3835
3836         if (cpu == RING_BUFFER_ALL_CPUS)
3837                 set_buffer_entries(&tr->trace_buffer, size);
3838         else
3839                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
3840
3841         return ret;
3842 }
3843
3844 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
3845                                           unsigned long size, int cpu_id)
3846 {
3847         int ret = size;
3848
3849         mutex_lock(&trace_types_lock);
3850
3851         if (cpu_id != RING_BUFFER_ALL_CPUS) {
3852                 /* make sure, this cpu is enabled in the mask */
3853                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
3854                         ret = -EINVAL;
3855                         goto out;
3856                 }
3857         }
3858
3859         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
3860         if (ret < 0)
3861                 ret = -ENOMEM;
3862
3863 out:
3864         mutex_unlock(&trace_types_lock);
3865
3866         return ret;
3867 }
3868
3869
3870 /**
3871  * tracing_update_buffers - used by tracing facility to expand ring buffers
3872  *
3873  * To save on memory when the tracing is never used on a system with it
3874  * configured in. The ring buffers are set to a minimum size. But once
3875  * a user starts to use the tracing facility, then they need to grow
3876  * to their default size.
3877  *
3878  * This function is to be called when a tracer is about to be used.
3879  */
3880 int tracing_update_buffers(void)
3881 {
3882         int ret = 0;
3883
3884         mutex_lock(&trace_types_lock);
3885         if (!ring_buffer_expanded)
3886                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
3887                                                 RING_BUFFER_ALL_CPUS);
3888         mutex_unlock(&trace_types_lock);
3889
3890         return ret;
3891 }
3892
3893 struct trace_option_dentry;
3894
3895 static struct trace_option_dentry *
3896 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
3897
3898 static void
3899 destroy_trace_option_files(struct trace_option_dentry *topts);
3900
3901 static int tracing_set_tracer(const char *buf)
3902 {
3903         static struct trace_option_dentry *topts;
3904         struct trace_array *tr = &global_trace;
3905         struct tracer *t;
3906 #ifdef CONFIG_TRACER_MAX_TRACE
3907         bool had_max_tr;
3908 #endif
3909         int ret = 0;
3910
3911         mutex_lock(&trace_types_lock);
3912
3913         if (!ring_buffer_expanded) {
3914                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
3915                                                 RING_BUFFER_ALL_CPUS);
3916                 if (ret < 0)
3917                         goto out;
3918                 ret = 0;
3919         }
3920
3921         for (t = trace_types; t; t = t->next) {
3922                 if (strcmp(t->name, buf) == 0)
3923                         break;
3924         }
3925         if (!t) {
3926                 ret = -EINVAL;
3927                 goto out;
3928         }
3929         if (t == tr->current_trace)
3930                 goto out;
3931
3932         trace_branch_disable();
3933
3934         tr->current_trace->enabled = false;
3935
3936         if (tr->current_trace->reset)
3937                 tr->current_trace->reset(tr);
3938
3939         /* Current trace needs to be nop_trace before synchronize_sched */
3940         tr->current_trace = &nop_trace;
3941
3942 #ifdef CONFIG_TRACER_MAX_TRACE
3943         had_max_tr = tr->allocated_snapshot;
3944
3945         if (had_max_tr && !t->use_max_tr) {
3946                 /*
3947                  * We need to make sure that the update_max_tr sees that
3948                  * current_trace changed to nop_trace to keep it from
3949                  * swapping the buffers after we resize it.
3950                  * The update_max_tr is called from interrupts disabled
3951                  * so a synchronized_sched() is sufficient.
3952                  */
3953                 synchronize_sched();
3954                 free_snapshot(tr);
3955         }
3956 #endif
3957         destroy_trace_option_files(topts);
3958
3959         topts = create_trace_option_files(tr, t);
3960
3961 #ifdef CONFIG_TRACER_MAX_TRACE
3962         if (t->use_max_tr && !had_max_tr) {
3963                 ret = alloc_snapshot(tr);
3964                 if (ret < 0)
3965                         goto out;
3966         }
3967 #endif
3968
3969         if (t->init) {
3970                 ret = tracer_init(t, tr);
3971                 if (ret)
3972                         goto out;
3973         }
3974
3975         tr->current_trace = t;
3976         tr->current_trace->enabled = true;
3977         trace_branch_enable(tr);
3978  out:
3979         mutex_unlock(&trace_types_lock);
3980
3981         return ret;
3982 }
3983
3984 static ssize_t
3985 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
3986                         size_t cnt, loff_t *ppos)
3987 {
3988         char buf[MAX_TRACER_SIZE+1];
3989         int i;
3990         size_t ret;
3991         int err;
3992
3993         ret = cnt;
3994
3995         if (cnt > MAX_TRACER_SIZE)
3996                 cnt = MAX_TRACER_SIZE;
3997
3998         if (copy_from_user(&buf, ubuf, cnt))
3999                 return -EFAULT;
4000
4001         buf[cnt] = 0;
4002
4003         /* strip ending whitespace. */
4004         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4005                 buf[i] = 0;
4006
4007         err = tracing_set_tracer(buf);
4008         if (err)
4009                 return err;
4010
4011         *ppos += ret;
4012
4013         return ret;
4014 }
4015
4016 static ssize_t
4017 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4018                      size_t cnt, loff_t *ppos)
4019 {
4020         unsigned long *ptr = filp->private_data;
4021         char buf[64];
4022         int r;
4023
4024         r = snprintf(buf, sizeof(buf), "%ld\n",
4025                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4026         if (r > sizeof(buf))
4027                 r = sizeof(buf);
4028         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4029 }
4030
4031 static ssize_t
4032 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4033                       size_t cnt, loff_t *ppos)
4034 {
4035         unsigned long *ptr = filp->private_data;
4036         unsigned long val;
4037         int ret;
4038
4039         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4040         if (ret)
4041                 return ret;
4042
4043         *ptr = val * 1000;
4044
4045         return cnt;
4046 }
4047
4048 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4049 {
4050         struct trace_array *tr = inode->i_private;
4051         struct trace_iterator *iter;
4052         int ret = 0;
4053
4054         if (tracing_disabled)
4055                 return -ENODEV;
4056
4057         if (trace_array_get(tr) < 0)
4058                 return -ENODEV;
4059
4060         mutex_lock(&trace_types_lock);
4061
4062         /* create a buffer to store the information to pass to userspace */
4063         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4064         if (!iter) {
4065                 ret = -ENOMEM;
4066                 __trace_array_put(tr);
4067                 goto out;
4068         }
4069
4070         /*
4071          * We make a copy of the current tracer to avoid concurrent
4072          * changes on it while we are reading.
4073          */
4074         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4075         if (!iter->trace) {
4076                 ret = -ENOMEM;
4077                 goto fail;
4078         }
4079         *iter->trace = *tr->current_trace;
4080
4081         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4082                 ret = -ENOMEM;
4083                 goto fail;
4084         }
4085
4086         /* trace pipe does not show start of buffer */
4087         cpumask_setall(iter->started);
4088
4089         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4090                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4091
4092         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4093         if (trace_clocks[tr->clock_id].in_ns)
4094                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4095
4096         iter->tr = tr;
4097         iter->trace_buffer = &tr->trace_buffer;
4098         iter->cpu_file = tracing_get_cpu(inode);
4099         mutex_init(&iter->mutex);
4100         filp->private_data = iter;
4101
4102         if (iter->trace->pipe_open)
4103                 iter->trace->pipe_open(iter);
4104
4105         nonseekable_open(inode, filp);
4106 out:
4107         mutex_unlock(&trace_types_lock);
4108         return ret;
4109
4110 fail:
4111         kfree(iter->trace);
4112         kfree(iter);
4113         __trace_array_put(tr);
4114         mutex_unlock(&trace_types_lock);
4115         return ret;
4116 }
4117
4118 static int tracing_release_pipe(struct inode *inode, struct file *file)
4119 {
4120         struct trace_iterator *iter = file->private_data;
4121         struct trace_array *tr = inode->i_private;
4122
4123         mutex_lock(&trace_types_lock);
4124
4125         if (iter->trace->pipe_close)
4126                 iter->trace->pipe_close(iter);
4127
4128         mutex_unlock(&trace_types_lock);
4129
4130         free_cpumask_var(iter->started);
4131         mutex_destroy(&iter->mutex);
4132         kfree(iter->trace);
4133         kfree(iter);
4134
4135         trace_array_put(tr);
4136
4137         return 0;
4138 }
4139
4140 static unsigned int
4141 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4142 {
4143         /* Iterators are static, they should be filled or empty */
4144         if (trace_buffer_iter(iter, iter->cpu_file))
4145                 return POLLIN | POLLRDNORM;
4146
4147         if (trace_flags & TRACE_ITER_BLOCK)
4148                 /*
4149                  * Always select as readable when in blocking mode
4150                  */
4151                 return POLLIN | POLLRDNORM;
4152         else
4153                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4154                                              filp, poll_table);
4155 }
4156
4157 static unsigned int
4158 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4159 {
4160         struct trace_iterator *iter = filp->private_data;
4161
4162         return trace_poll(iter, filp, poll_table);
4163 }
4164
4165 /*
4166  * This is a make-shift waitqueue.
4167  * A tracer might use this callback on some rare cases:
4168  *
4169  *  1) the current tracer might hold the runqueue lock when it wakes up
4170  *     a reader, hence a deadlock (sched, function, and function graph tracers)
4171  *  2) the function tracers, trace all functions, we don't want
4172  *     the overhead of calling wake_up and friends
4173  *     (and tracing them too)
4174  *
4175  *     Anyway, this is really very primitive wakeup.
4176  */
4177 int poll_wait_pipe(struct trace_iterator *iter)
4178 {
4179         set_current_state(TASK_INTERRUPTIBLE);
4180         /* sleep for 100 msecs, and try again. */
4181         schedule_timeout(HZ / 10);
4182         return 0;
4183 }
4184
4185 /* Must be called with trace_types_lock mutex held. */
4186 static int tracing_wait_pipe(struct file *filp)
4187 {
4188         struct trace_iterator *iter = filp->private_data;
4189         int ret;
4190
4191         while (trace_empty(iter)) {
4192
4193                 if ((filp->f_flags & O_NONBLOCK)) {
4194                         return -EAGAIN;
4195                 }
4196
4197                 mutex_unlock(&iter->mutex);
4198
4199                 ret = iter->trace->wait_pipe(iter);
4200
4201                 mutex_lock(&iter->mutex);
4202
4203                 if (ret)
4204                         return ret;
4205
4206                 if (signal_pending(current))
4207                         return -EINTR;
4208
4209                 /*
4210                  * We block until we read something and tracing is disabled.
4211                  * We still block if tracing is disabled, but we have never
4212                  * read anything. This allows a user to cat this file, and
4213                  * then enable tracing. But after we have read something,
4214                  * we give an EOF when tracing is again disabled.
4215                  *
4216                  * iter->pos will be 0 if we haven't read anything.
4217                  */
4218                 if (!tracing_is_on() && iter->pos)
4219                         break;
4220         }
4221
4222         return 1;
4223 }
4224
4225 /*
4226  * Consumer reader.
4227  */
4228 static ssize_t
4229 tracing_read_pipe(struct file *filp, char __user *ubuf,
4230                   size_t cnt, loff_t *ppos)
4231 {
4232         struct trace_iterator *iter = filp->private_data;
4233         struct trace_array *tr = iter->tr;
4234         ssize_t sret;
4235
4236         /* return any leftover data */
4237         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4238         if (sret != -EBUSY)
4239                 return sret;
4240
4241         trace_seq_init(&iter->seq);
4242
4243         /* copy the tracer to avoid using a global lock all around */
4244         mutex_lock(&trace_types_lock);
4245         if (unlikely(iter->trace->name != tr->current_trace->name))
4246                 *iter->trace = *tr->current_trace;
4247         mutex_unlock(&trace_types_lock);
4248
4249         /*
4250          * Avoid more than one consumer on a single file descriptor
4251          * This is just a matter of traces coherency, the ring buffer itself
4252          * is protected.
4253          */
4254         mutex_lock(&iter->mutex);
4255         if (iter->trace->read) {
4256                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4257                 if (sret)
4258                         goto out;
4259         }
4260
4261 waitagain:
4262         sret = tracing_wait_pipe(filp);
4263         if (sret <= 0)
4264                 goto out;
4265
4266         /* stop when tracing is finished */
4267         if (trace_empty(iter)) {
4268                 sret = 0;
4269                 goto out;
4270         }
4271
4272         if (cnt >= PAGE_SIZE)
4273                 cnt = PAGE_SIZE - 1;
4274
4275         /* reset all but tr, trace, and overruns */
4276         memset(&iter->seq, 0,
4277                sizeof(struct trace_iterator) -
4278                offsetof(struct trace_iterator, seq));
4279         cpumask_clear(iter->started);
4280         iter->pos = -1;
4281
4282         trace_event_read_lock();
4283         trace_access_lock(iter->cpu_file);
4284         while (trace_find_next_entry_inc(iter) != NULL) {
4285                 enum print_line_t ret;
4286                 int len = iter->seq.len;
4287
4288                 ret = print_trace_line(iter);
4289                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4290                         /* don't print partial lines */
4291                         iter->seq.len = len;
4292                         break;
4293                 }
4294                 if (ret != TRACE_TYPE_NO_CONSUME)
4295                         trace_consume(iter);
4296
4297                 if (iter->seq.len >= cnt)
4298                         break;
4299
4300                 /*
4301                  * Setting the full flag means we reached the trace_seq buffer
4302                  * size and we should leave by partial output condition above.
4303                  * One of the trace_seq_* functions is not used properly.
4304                  */
4305                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4306                           iter->ent->type);
4307         }
4308         trace_access_unlock(iter->cpu_file);
4309         trace_event_read_unlock();
4310
4311         /* Now copy what we have to the user */
4312         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4313         if (iter->seq.readpos >= iter->seq.len)
4314                 trace_seq_init(&iter->seq);
4315
4316         /*
4317          * If there was nothing to send to user, in spite of consuming trace
4318          * entries, go back to wait for more entries.
4319          */
4320         if (sret == -EBUSY)
4321                 goto waitagain;
4322
4323 out:
4324         mutex_unlock(&iter->mutex);
4325
4326         return sret;
4327 }
4328
4329 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4330                                      unsigned int idx)
4331 {
4332         __free_page(spd->pages[idx]);
4333 }
4334
4335 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4336         .can_merge              = 0,
4337         .map                    = generic_pipe_buf_map,
4338         .unmap                  = generic_pipe_buf_unmap,
4339         .confirm                = generic_pipe_buf_confirm,
4340         .release                = generic_pipe_buf_release,
4341         .steal                  = generic_pipe_buf_steal,
4342         .get                    = generic_pipe_buf_get,
4343 };
4344
4345 static size_t
4346 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4347 {
4348         size_t count;
4349         int ret;
4350
4351         /* Seq buffer is page-sized, exactly what we need. */
4352         for (;;) {
4353                 count = iter->seq.len;
4354                 ret = print_trace_line(iter);
4355                 count = iter->seq.len - count;
4356                 if (rem < count) {
4357                         rem = 0;
4358                         iter->seq.len -= count;
4359                         break;
4360                 }
4361                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4362                         iter->seq.len -= count;
4363                         break;
4364                 }
4365
4366                 if (ret != TRACE_TYPE_NO_CONSUME)
4367                         trace_consume(iter);
4368                 rem -= count;
4369                 if (!trace_find_next_entry_inc(iter))   {
4370                         rem = 0;
4371                         iter->ent = NULL;
4372                         break;
4373                 }
4374         }
4375
4376         return rem;
4377 }
4378
4379 static ssize_t tracing_splice_read_pipe(struct file *filp,
4380                                         loff_t *ppos,
4381                                         struct pipe_inode_info *pipe,
4382                                         size_t len,
4383                                         unsigned int flags)
4384 {
4385         struct page *pages_def[PIPE_DEF_BUFFERS];
4386         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4387         struct trace_iterator *iter = filp->private_data;
4388         struct splice_pipe_desc spd = {
4389                 .pages          = pages_def,
4390                 .partial        = partial_def,
4391                 .nr_pages       = 0, /* This gets updated below. */
4392                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4393                 .flags          = flags,
4394                 .ops            = &tracing_pipe_buf_ops,
4395                 .spd_release    = tracing_spd_release_pipe,
4396         };
4397         struct trace_array *tr = iter->tr;
4398         ssize_t ret;
4399         size_t rem;
4400         unsigned int i;
4401
4402         if (splice_grow_spd(pipe, &spd))
4403                 return -ENOMEM;
4404
4405         /* copy the tracer to avoid using a global lock all around */
4406         mutex_lock(&trace_types_lock);
4407         if (unlikely(iter->trace->name != tr->current_trace->name))
4408                 *iter->trace = *tr->current_trace;
4409         mutex_unlock(&trace_types_lock);
4410
4411         mutex_lock(&iter->mutex);
4412
4413         if (iter->trace->splice_read) {
4414                 ret = iter->trace->splice_read(iter, filp,
4415                                                ppos, pipe, len, flags);
4416                 if (ret)
4417                         goto out_err;
4418         }
4419
4420         ret = tracing_wait_pipe(filp);
4421         if (ret <= 0)
4422                 goto out_err;
4423
4424         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4425                 ret = -EFAULT;
4426                 goto out_err;
4427         }
4428
4429         trace_event_read_lock();
4430         trace_access_lock(iter->cpu_file);
4431
4432         /* Fill as many pages as possible. */
4433         for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
4434                 spd.pages[i] = alloc_page(GFP_KERNEL);
4435                 if (!spd.pages[i])
4436                         break;
4437
4438                 rem = tracing_fill_pipe_page(rem, iter);
4439
4440                 /* Copy the data into the page, so we can start over. */
4441                 ret = trace_seq_to_buffer(&iter->seq,
4442                                           page_address(spd.pages[i]),
4443                                           iter->seq.len);
4444                 if (ret < 0) {
4445                         __free_page(spd.pages[i]);
4446                         break;
4447                 }
4448                 spd.partial[i].offset = 0;
4449                 spd.partial[i].len = iter->seq.len;
4450
4451                 trace_seq_init(&iter->seq);
4452         }
4453
4454         trace_access_unlock(iter->cpu_file);
4455         trace_event_read_unlock();
4456         mutex_unlock(&iter->mutex);
4457
4458         spd.nr_pages = i;
4459
4460         ret = splice_to_pipe(pipe, &spd);
4461 out:
4462         splice_shrink_spd(&spd);
4463         return ret;
4464
4465 out_err:
4466         mutex_unlock(&iter->mutex);
4467         goto out;
4468 }
4469
4470 static ssize_t
4471 tracing_entries_read(struct file *filp, char __user *ubuf,
4472                      size_t cnt, loff_t *ppos)
4473 {
4474         struct inode *inode = file_inode(filp);
4475         struct trace_array *tr = inode->i_private;
4476         int cpu = tracing_get_cpu(inode);
4477         char buf[64];
4478         int r = 0;
4479         ssize_t ret;
4480
4481         mutex_lock(&trace_types_lock);
4482
4483         if (cpu == RING_BUFFER_ALL_CPUS) {
4484                 int cpu, buf_size_same;
4485                 unsigned long size;
4486
4487                 size = 0;
4488                 buf_size_same = 1;
4489                 /* check if all cpu sizes are same */
4490                 for_each_tracing_cpu(cpu) {
4491                         /* fill in the size from first enabled cpu */
4492                         if (size == 0)
4493                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4494                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4495                                 buf_size_same = 0;
4496                                 break;
4497                         }
4498                 }
4499
4500                 if (buf_size_same) {
4501                         if (!ring_buffer_expanded)
4502                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4503                                             size >> 10,
4504                                             trace_buf_size >> 10);
4505                         else
4506                                 r = sprintf(buf, "%lu\n", size >> 10);
4507                 } else
4508                         r = sprintf(buf, "X\n");
4509         } else
4510                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4511
4512         mutex_unlock(&trace_types_lock);
4513
4514         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4515         return ret;
4516 }
4517
4518 static ssize_t
4519 tracing_entries_write(struct file *filp, const char __user *ubuf,
4520                       size_t cnt, loff_t *ppos)
4521 {
4522         struct inode *inode = file_inode(filp);
4523         struct trace_array *tr = inode->i_private;
4524         unsigned long val;
4525         int ret;
4526
4527         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4528         if (ret)
4529                 return ret;
4530
4531         /* must have at least 1 entry */
4532         if (!val)
4533                 return -EINVAL;
4534
4535         /* value is in KB */
4536         val <<= 10;
4537         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4538         if (ret < 0)
4539                 return ret;
4540
4541         *ppos += cnt;
4542
4543         return cnt;
4544 }
4545
4546 static ssize_t
4547 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4548                                 size_t cnt, loff_t *ppos)
4549 {
4550         struct trace_array *tr = filp->private_data;
4551         char buf[64];
4552         int r, cpu;
4553         unsigned long size = 0, expanded_size = 0;
4554
4555         mutex_lock(&trace_types_lock);
4556         for_each_tracing_cpu(cpu) {
4557                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4558                 if (!ring_buffer_expanded)
4559                         expanded_size += trace_buf_size >> 10;
4560         }
4561         if (ring_buffer_expanded)
4562                 r = sprintf(buf, "%lu\n", size);
4563         else
4564                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4565         mutex_unlock(&trace_types_lock);
4566
4567         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4568 }
4569
4570 static ssize_t
4571 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4572                           size_t cnt, loff_t *ppos)
4573 {
4574         /*
4575          * There is no need to read what the user has written, this function
4576          * is just to make sure that there is no error when "echo" is used
4577          */
4578
4579         *ppos += cnt;
4580
4581         return cnt;
4582 }
4583
4584 static int
4585 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4586 {
4587         struct trace_array *tr = inode->i_private;
4588
4589         /* disable tracing ? */
4590         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4591                 tracer_tracing_off(tr);
4592         /* resize the ring buffer to 0 */
4593         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4594
4595         trace_array_put(tr);
4596
4597         return 0;
4598 }
4599
4600 static ssize_t
4601 tracing_mark_write(struct file *filp, const char __user *ubuf,
4602                                         size_t cnt, loff_t *fpos)
4603 {
4604         unsigned long addr = (unsigned long)ubuf;
4605         struct trace_array *tr = filp->private_data;
4606         struct ring_buffer_event *event;
4607         struct ring_buffer *buffer;
4608         struct print_entry *entry;
4609         unsigned long irq_flags;
4610         struct page *pages[2];
4611         void *map_page[2];
4612         int nr_pages = 1;
4613         ssize_t written;
4614         int offset;
4615         int size;
4616         int len;
4617         int ret;
4618         int i;
4619
4620         if (tracing_disabled)
4621                 return -EINVAL;
4622
4623         if (!(trace_flags & TRACE_ITER_MARKERS))
4624                 return -EINVAL;
4625
4626         if (cnt > TRACE_BUF_SIZE)
4627                 cnt = TRACE_BUF_SIZE;
4628
4629         /*
4630          * Userspace is injecting traces into the kernel trace buffer.
4631          * We want to be as non intrusive as possible.
4632          * To do so, we do not want to allocate any special buffers
4633          * or take any locks, but instead write the userspace data
4634          * straight into the ring buffer.
4635          *
4636          * First we need to pin the userspace buffer into memory,
4637          * which, most likely it is, because it just referenced it.
4638          * But there's no guarantee that it is. By using get_user_pages_fast()
4639          * and kmap_atomic/kunmap_atomic() we can get access to the
4640          * pages directly. We then write the data directly into the
4641          * ring buffer.
4642          */
4643         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4644
4645         /* check if we cross pages */
4646         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4647                 nr_pages = 2;
4648
4649         offset = addr & (PAGE_SIZE - 1);
4650         addr &= PAGE_MASK;
4651
4652         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4653         if (ret < nr_pages) {
4654                 while (--ret >= 0)
4655                         put_page(pages[ret]);
4656                 written = -EFAULT;
4657                 goto out;
4658         }
4659
4660         for (i = 0; i < nr_pages; i++)
4661                 map_page[i] = kmap_atomic(pages[i]);
4662
4663         local_save_flags(irq_flags);
4664         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4665         buffer = tr->trace_buffer.buffer;
4666         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4667                                           irq_flags, preempt_count());
4668         if (!event) {
4669                 /* Ring buffer disabled, return as if not open for write */
4670                 written = -EBADF;
4671                 goto out_unlock;
4672         }
4673
4674         entry = ring_buffer_event_data(event);
4675         entry->ip = _THIS_IP_;
4676
4677         if (nr_pages == 2) {
4678                 len = PAGE_SIZE - offset;
4679                 memcpy(&entry->buf, map_page[0] + offset, len);
4680                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4681         } else
4682                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4683
4684         if (entry->buf[cnt - 1] != '\n') {
4685                 entry->buf[cnt] = '\n';
4686                 entry->buf[cnt + 1] = '\0';
4687         } else
4688                 entry->buf[cnt] = '\0';
4689
4690         __buffer_unlock_commit(buffer, event);
4691
4692         written = cnt;
4693
4694         *fpos += written;
4695
4696  out_unlock:
4697         for (i = 0; i < nr_pages; i++){
4698                 kunmap_atomic(map_page[i]);
4699                 put_page(pages[i]);
4700         }
4701  out:
4702         return written;
4703 }
4704
4705 static int tracing_clock_show(struct seq_file *m, void *v)
4706 {
4707         struct trace_array *tr = m->private;
4708         int i;
4709
4710         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4711                 seq_printf(m,
4712                         "%s%s%s%s", i ? " " : "",
4713                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4714                         i == tr->clock_id ? "]" : "");
4715         seq_putc(m, '\n');
4716
4717         return 0;
4718 }
4719
4720 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4721                                    size_t cnt, loff_t *fpos)
4722 {
4723         struct seq_file *m = filp->private_data;
4724         struct trace_array *tr = m->private;
4725         char buf[64];
4726         const char *clockstr;
4727         int i;
4728
4729         if (cnt >= sizeof(buf))
4730                 return -EINVAL;
4731
4732         if (copy_from_user(&buf, ubuf, cnt))
4733                 return -EFAULT;
4734
4735         buf[cnt] = 0;
4736
4737         clockstr = strstrip(buf);
4738
4739         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4740                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4741                         break;
4742         }
4743         if (i == ARRAY_SIZE(trace_clocks))
4744                 return -EINVAL;
4745
4746         mutex_lock(&trace_types_lock);
4747
4748         tr->clock_id = i;
4749
4750         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4751
4752         /*
4753          * New clock may not be consistent with the previous clock.
4754          * Reset the buffer so that it doesn't have incomparable timestamps.
4755          */
4756         tracing_reset_online_cpus(&tr->trace_buffer);
4757
4758 #ifdef CONFIG_TRACER_MAX_TRACE
4759         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4760                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4761         tracing_reset_online_cpus(&tr->max_buffer);
4762 #endif
4763
4764         mutex_unlock(&trace_types_lock);
4765
4766         *fpos += cnt;
4767
4768         return cnt;
4769 }
4770
4771 static int tracing_clock_open(struct inode *inode, struct file *file)
4772 {
4773         struct trace_array *tr = inode->i_private;
4774         int ret;
4775
4776         if (tracing_disabled)
4777                 return -ENODEV;
4778
4779         if (trace_array_get(tr))
4780                 return -ENODEV;
4781
4782         ret = single_open(file, tracing_clock_show, inode->i_private);
4783         if (ret < 0)
4784                 trace_array_put(tr);
4785
4786         return ret;
4787 }
4788
4789 struct ftrace_buffer_info {
4790         struct trace_iterator   iter;
4791         void                    *spare;
4792         unsigned int            read;
4793 };
4794
4795 #ifdef CONFIG_TRACER_SNAPSHOT
4796 static int tracing_snapshot_open(struct inode *inode, struct file *file)
4797 {
4798         struct trace_array *tr = inode->i_private;
4799         struct trace_iterator *iter;
4800         struct seq_file *m;
4801         int ret = 0;
4802
4803         if (trace_array_get(tr) < 0)
4804                 return -ENODEV;
4805
4806         if (file->f_mode & FMODE_READ) {
4807                 iter = __tracing_open(inode, file, true);
4808                 if (IS_ERR(iter))
4809                         ret = PTR_ERR(iter);
4810         } else {
4811                 /* Writes still need the seq_file to hold the private data */
4812                 ret = -ENOMEM;
4813                 m = kzalloc(sizeof(*m), GFP_KERNEL);
4814                 if (!m)
4815                         goto out;
4816                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4817                 if (!iter) {
4818                         kfree(m);
4819                         goto out;
4820                 }
4821                 ret = 0;
4822
4823                 iter->tr = tr;
4824                 iter->trace_buffer = &tr->max_buffer;
4825                 iter->cpu_file = tracing_get_cpu(inode);
4826                 m->private = iter;
4827                 file->private_data = m;
4828         }
4829 out:
4830         if (ret < 0)
4831                 trace_array_put(tr);
4832
4833         return ret;
4834 }
4835
4836 static ssize_t
4837 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
4838                        loff_t *ppos)
4839 {
4840         struct seq_file *m = filp->private_data;
4841         struct trace_iterator *iter = m->private;
4842         struct trace_array *tr = iter->tr;
4843         unsigned long val;
4844         int ret;
4845
4846         ret = tracing_update_buffers();
4847         if (ret < 0)
4848                 return ret;
4849
4850         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4851         if (ret)
4852                 return ret;
4853
4854         mutex_lock(&trace_types_lock);
4855
4856         if (tr->current_trace->use_max_tr) {
4857                 ret = -EBUSY;
4858                 goto out;
4859         }
4860
4861         switch (val) {
4862         case 0:
4863                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4864                         ret = -EINVAL;
4865                         break;
4866                 }
4867                 if (tr->allocated_snapshot)
4868                         free_snapshot(tr);
4869                 break;
4870         case 1:
4871 /* Only allow per-cpu swap if the ring buffer supports it */
4872 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
4873                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4874                         ret = -EINVAL;
4875                         break;
4876                 }
4877 #endif
4878                 if (!tr->allocated_snapshot) {
4879                         ret = alloc_snapshot(tr);
4880                         if (ret < 0)
4881                                 break;
4882                 }
4883                 local_irq_disable();
4884                 /* Now, we're going to swap */
4885                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4886                         update_max_tr(tr, current, smp_processor_id());
4887                 else
4888                         update_max_tr_single(tr, current, iter->cpu_file);
4889                 local_irq_enable();
4890                 break;
4891         default:
4892                 if (tr->allocated_snapshot) {
4893                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4894                                 tracing_reset_online_cpus(&tr->max_buffer);
4895                         else
4896                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
4897                 }
4898                 break;
4899         }
4900
4901         if (ret >= 0) {
4902                 *ppos += cnt;
4903                 ret = cnt;
4904         }
4905 out:
4906         mutex_unlock(&trace_types_lock);
4907         return ret;
4908 }
4909
4910 static int tracing_snapshot_release(struct inode *inode, struct file *file)
4911 {
4912         struct seq_file *m = file->private_data;
4913         int ret;
4914
4915         ret = tracing_release(inode, file);
4916
4917         if (file->f_mode & FMODE_READ)
4918                 return ret;
4919
4920         /* If write only, the seq_file is just a stub */
4921         if (m)
4922                 kfree(m->private);
4923         kfree(m);
4924
4925         return 0;
4926 }
4927
4928 static int tracing_buffers_open(struct inode *inode, struct file *filp);
4929 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
4930                                     size_t count, loff_t *ppos);
4931 static int tracing_buffers_release(struct inode *inode, struct file *file);
4932 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4933                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
4934
4935 static int snapshot_raw_open(struct inode *inode, struct file *filp)
4936 {
4937         struct ftrace_buffer_info *info;
4938         int ret;
4939
4940         ret = tracing_buffers_open(inode, filp);
4941         if (ret < 0)
4942                 return ret;
4943
4944         info = filp->private_data;
4945
4946         if (info->iter.trace->use_max_tr) {
4947                 tracing_buffers_release(inode, filp);
4948                 return -EBUSY;
4949         }
4950
4951         info->iter.snapshot = true;
4952         info->iter.trace_buffer = &info->iter.tr->max_buffer;
4953
4954         return ret;
4955 }
4956
4957 #endif /* CONFIG_TRACER_SNAPSHOT */
4958
4959
4960 static const struct file_operations tracing_max_lat_fops = {
4961         .open           = tracing_open_generic,
4962         .read           = tracing_max_lat_read,
4963         .write          = tracing_max_lat_write,
4964         .llseek         = generic_file_llseek,
4965 };
4966
4967 static const struct file_operations set_tracer_fops = {
4968         .open           = tracing_open_generic,
4969         .read           = tracing_set_trace_read,
4970         .write          = tracing_set_trace_write,
4971         .llseek         = generic_file_llseek,
4972 };
4973
4974 static const struct file_operations tracing_pipe_fops = {
4975         .open           = tracing_open_pipe,
4976         .poll           = tracing_poll_pipe,
4977         .read           = tracing_read_pipe,
4978         .splice_read    = tracing_splice_read_pipe,
4979         .release        = tracing_release_pipe,
4980         .llseek         = no_llseek,
4981 };
4982
4983 static const struct file_operations tracing_entries_fops = {
4984         .open           = tracing_open_generic_tr,
4985         .read           = tracing_entries_read,
4986         .write          = tracing_entries_write,
4987         .llseek         = generic_file_llseek,
4988         .release        = tracing_release_generic_tr,
4989 };
4990
4991 static const struct file_operations tracing_total_entries_fops = {
4992         .open           = tracing_open_generic_tr,
4993         .read           = tracing_total_entries_read,
4994         .llseek         = generic_file_llseek,
4995         .release        = tracing_release_generic_tr,
4996 };
4997
4998 static const struct file_operations tracing_free_buffer_fops = {
4999         .open           = tracing_open_generic_tr,
5000         .write          = tracing_free_buffer_write,
5001         .release        = tracing_free_buffer_release,
5002 };
5003
5004 static const struct file_operations tracing_mark_fops = {
5005         .open           = tracing_open_generic_tr,
5006         .write          = tracing_mark_write,
5007         .llseek         = generic_file_llseek,
5008         .release        = tracing_release_generic_tr,
5009 };
5010
5011 static const struct file_operations trace_clock_fops = {
5012         .open           = tracing_clock_open,
5013         .read           = seq_read,
5014         .llseek         = seq_lseek,
5015         .release        = tracing_single_release_tr,
5016         .write          = tracing_clock_write,
5017 };
5018
5019 #ifdef CONFIG_TRACER_SNAPSHOT
5020 static const struct file_operations snapshot_fops = {
5021         .open           = tracing_snapshot_open,
5022         .read           = seq_read,
5023         .write          = tracing_snapshot_write,
5024         .llseek         = tracing_lseek,
5025         .release        = tracing_snapshot_release,
5026 };
5027
5028 static const struct file_operations snapshot_raw_fops = {
5029         .open           = snapshot_raw_open,
5030         .read           = tracing_buffers_read,
5031         .release        = tracing_buffers_release,
5032         .splice_read    = tracing_buffers_splice_read,
5033         .llseek         = no_llseek,
5034 };
5035
5036 #endif /* CONFIG_TRACER_SNAPSHOT */
5037
5038 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5039 {
5040         struct trace_array *tr = inode->i_private;
5041         struct ftrace_buffer_info *info;
5042         int ret;
5043
5044         if (tracing_disabled)
5045                 return -ENODEV;
5046
5047         if (trace_array_get(tr) < 0)
5048                 return -ENODEV;
5049
5050         info = kzalloc(sizeof(*info), GFP_KERNEL);
5051         if (!info) {
5052                 trace_array_put(tr);
5053                 return -ENOMEM;
5054         }
5055
5056         mutex_lock(&trace_types_lock);
5057
5058         info->iter.tr           = tr;
5059         info->iter.cpu_file     = tracing_get_cpu(inode);
5060         info->iter.trace        = tr->current_trace;
5061         info->iter.trace_buffer = &tr->trace_buffer;
5062         info->spare             = NULL;
5063         /* Force reading ring buffer for first read */
5064         info->read              = (unsigned int)-1;
5065
5066         filp->private_data = info;
5067
5068         mutex_unlock(&trace_types_lock);
5069
5070         ret = nonseekable_open(inode, filp);
5071         if (ret < 0)
5072                 trace_array_put(tr);
5073
5074         return ret;
5075 }
5076
5077 static unsigned int
5078 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5079 {
5080         struct ftrace_buffer_info *info = filp->private_data;
5081         struct trace_iterator *iter = &info->iter;
5082
5083         return trace_poll(iter, filp, poll_table);
5084 }
5085
5086 static ssize_t
5087 tracing_buffers_read(struct file *filp, char __user *ubuf,
5088                      size_t count, loff_t *ppos)
5089 {
5090         struct ftrace_buffer_info *info = filp->private_data;
5091         struct trace_iterator *iter = &info->iter;
5092         ssize_t ret;
5093         ssize_t size;
5094
5095         if (!count)
5096                 return 0;
5097
5098         mutex_lock(&trace_types_lock);
5099
5100 #ifdef CONFIG_TRACER_MAX_TRACE
5101         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5102                 size = -EBUSY;
5103                 goto out_unlock;
5104         }
5105 #endif
5106
5107         if (!info->spare)
5108                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5109                                                           iter->cpu_file);
5110         size = -ENOMEM;
5111         if (!info->spare)
5112                 goto out_unlock;
5113
5114         /* Do we have previous read data to read? */
5115         if (info->read < PAGE_SIZE)
5116                 goto read;
5117
5118  again:
5119         trace_access_lock(iter->cpu_file);
5120         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5121                                     &info->spare,
5122                                     count,
5123                                     iter->cpu_file, 0);
5124         trace_access_unlock(iter->cpu_file);
5125
5126         if (ret < 0) {
5127                 if (trace_empty(iter)) {
5128                         if ((filp->f_flags & O_NONBLOCK)) {
5129                                 size = -EAGAIN;
5130                                 goto out_unlock;
5131                         }
5132                         mutex_unlock(&trace_types_lock);
5133                         ret = iter->trace->wait_pipe(iter);
5134                         mutex_lock(&trace_types_lock);
5135                         if (ret) {
5136                                 size = ret;
5137                                 goto out_unlock;
5138                         }
5139                         if (signal_pending(current)) {
5140                                 size = -EINTR;
5141                                 goto out_unlock;
5142                         }
5143                         goto again;
5144                 }
5145                 size = 0;
5146                 goto out_unlock;
5147         }
5148
5149         info->read = 0;
5150  read:
5151         size = PAGE_SIZE - info->read;
5152         if (size > count)
5153                 size = count;
5154
5155         ret = copy_to_user(ubuf, info->spare + info->read, size);
5156         if (ret == size) {
5157                 size = -EFAULT;
5158                 goto out_unlock;
5159         }
5160         size -= ret;
5161
5162         *ppos += size;
5163         info->read += size;
5164
5165  out_unlock:
5166         mutex_unlock(&trace_types_lock);
5167
5168         return size;
5169 }
5170
5171 static int tracing_buffers_release(struct inode *inode, struct file *file)
5172 {
5173         struct ftrace_buffer_info *info = file->private_data;
5174         struct trace_iterator *iter = &info->iter;
5175
5176         mutex_lock(&trace_types_lock);
5177
5178         __trace_array_put(iter->tr);
5179
5180         if (info->spare)
5181                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5182         kfree(info);
5183
5184         mutex_unlock(&trace_types_lock);
5185
5186         return 0;
5187 }
5188
5189 struct buffer_ref {
5190         struct ring_buffer      *buffer;
5191         void                    *page;
5192         int                     ref;
5193 };
5194
5195 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5196                                     struct pipe_buffer *buf)
5197 {
5198         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5199
5200         if (--ref->ref)
5201                 return;
5202
5203         ring_buffer_free_read_page(ref->buffer, ref->page);
5204         kfree(ref);
5205         buf->private = 0;
5206 }
5207
5208 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5209                                 struct pipe_buffer *buf)
5210 {
5211         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5212
5213         ref->ref++;
5214 }
5215
5216 /* Pipe buffer operations for a buffer. */
5217 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5218         .can_merge              = 0,
5219         .map                    = generic_pipe_buf_map,
5220         .unmap                  = generic_pipe_buf_unmap,
5221         .confirm                = generic_pipe_buf_confirm,
5222         .release                = buffer_pipe_buf_release,
5223         .steal                  = generic_pipe_buf_steal,
5224         .get                    = buffer_pipe_buf_get,
5225 };
5226
5227 /*
5228  * Callback from splice_to_pipe(), if we need to release some pages
5229  * at the end of the spd in case we error'ed out in filling the pipe.
5230  */
5231 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5232 {
5233         struct buffer_ref *ref =
5234                 (struct buffer_ref *)spd->partial[i].private;
5235
5236         if (--ref->ref)
5237                 return;
5238
5239         ring_buffer_free_read_page(ref->buffer, ref->page);
5240         kfree(ref);
5241         spd->partial[i].private = 0;
5242 }
5243
5244 static ssize_t
5245 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5246                             struct pipe_inode_info *pipe, size_t len,
5247                             unsigned int flags)
5248 {
5249         struct ftrace_buffer_info *info = file->private_data;
5250         struct trace_iterator *iter = &info->iter;
5251         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5252         struct page *pages_def[PIPE_DEF_BUFFERS];
5253         struct splice_pipe_desc spd = {
5254                 .pages          = pages_def,
5255                 .partial        = partial_def,
5256                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5257                 .flags          = flags,
5258                 .ops            = &buffer_pipe_buf_ops,
5259                 .spd_release    = buffer_spd_release,
5260         };
5261         struct buffer_ref *ref;
5262         int entries, size, i;
5263         ssize_t ret;
5264
5265         mutex_lock(&trace_types_lock);
5266
5267 #ifdef CONFIG_TRACER_MAX_TRACE
5268         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5269                 ret = -EBUSY;
5270                 goto out;
5271         }
5272 #endif
5273
5274         if (splice_grow_spd(pipe, &spd)) {
5275                 ret = -ENOMEM;
5276                 goto out;
5277         }
5278
5279         if (*ppos & (PAGE_SIZE - 1)) {
5280                 ret = -EINVAL;
5281                 goto out;
5282         }
5283
5284         if (len & (PAGE_SIZE - 1)) {
5285                 if (len < PAGE_SIZE) {
5286                         ret = -EINVAL;
5287                         goto out;
5288                 }
5289                 len &= PAGE_MASK;
5290         }
5291
5292  again:
5293         trace_access_lock(iter->cpu_file);
5294         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5295
5296         for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
5297                 struct page *page;
5298                 int r;
5299
5300                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5301                 if (!ref)
5302                         break;
5303
5304                 ref->ref = 1;
5305                 ref->buffer = iter->trace_buffer->buffer;
5306                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5307                 if (!ref->page) {
5308                         kfree(ref);
5309                         break;
5310                 }
5311
5312                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5313                                           len, iter->cpu_file, 1);
5314                 if (r < 0) {
5315                         ring_buffer_free_read_page(ref->buffer, ref->page);
5316                         kfree(ref);
5317                         break;
5318                 }
5319
5320                 /*
5321                  * zero out any left over data, this is going to
5322                  * user land.
5323                  */
5324                 size = ring_buffer_page_len(ref->page);
5325                 if (size < PAGE_SIZE)
5326                         memset(ref->page + size, 0, PAGE_SIZE - size);
5327
5328                 page = virt_to_page(ref->page);
5329
5330                 spd.pages[i] = page;
5331                 spd.partial[i].len = PAGE_SIZE;
5332                 spd.partial[i].offset = 0;
5333                 spd.partial[i].private = (unsigned long)ref;
5334                 spd.nr_pages++;
5335                 *ppos += PAGE_SIZE;
5336
5337                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5338         }
5339
5340         trace_access_unlock(iter->cpu_file);
5341         spd.nr_pages = i;
5342
5343         /* did we read anything? */
5344         if (!spd.nr_pages) {
5345                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5346                         ret = -EAGAIN;
5347                         goto out;
5348                 }
5349                 mutex_unlock(&trace_types_lock);
5350                 ret = iter->trace->wait_pipe(iter);
5351                 mutex_lock(&trace_types_lock);
5352                 if (ret)
5353                         goto out;
5354                 if (signal_pending(current)) {
5355                         ret = -EINTR;
5356                         goto out;
5357                 }
5358                 goto again;
5359         }
5360
5361         ret = splice_to_pipe(pipe, &spd);
5362         splice_shrink_spd(&spd);
5363 out:
5364         mutex_unlock(&trace_types_lock);
5365
5366         return ret;
5367 }
5368
5369 static const struct file_operations tracing_buffers_fops = {
5370         .open           = tracing_buffers_open,
5371         .read           = tracing_buffers_read,
5372         .poll           = tracing_buffers_poll,
5373         .release        = tracing_buffers_release,
5374         .splice_read    = tracing_buffers_splice_read,
5375         .llseek         = no_llseek,
5376 };
5377
5378 static ssize_t
5379 tracing_stats_read(struct file *filp, char __user *ubuf,
5380                    size_t count, loff_t *ppos)
5381 {
5382         struct inode *inode = file_inode(filp);
5383         struct trace_array *tr = inode->i_private;
5384         struct trace_buffer *trace_buf = &tr->trace_buffer;
5385         int cpu = tracing_get_cpu(inode);
5386         struct trace_seq *s;
5387         unsigned long cnt;
5388         unsigned long long t;
5389         unsigned long usec_rem;
5390
5391         s = kmalloc(sizeof(*s), GFP_KERNEL);
5392         if (!s)
5393                 return -ENOMEM;
5394
5395         trace_seq_init(s);
5396
5397         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5398         trace_seq_printf(s, "entries: %ld\n", cnt);
5399
5400         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5401         trace_seq_printf(s, "overrun: %ld\n", cnt);
5402
5403         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5404         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5405
5406         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5407         trace_seq_printf(s, "bytes: %ld\n", cnt);
5408
5409         if (trace_clocks[tr->clock_id].in_ns) {
5410                 /* local or global for trace_clock */
5411                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5412                 usec_rem = do_div(t, USEC_PER_SEC);
5413                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5414                                                                 t, usec_rem);
5415
5416                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5417                 usec_rem = do_div(t, USEC_PER_SEC);
5418                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5419         } else {
5420                 /* counter or tsc mode for trace_clock */
5421                 trace_seq_printf(s, "oldest event ts: %llu\n",
5422                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5423
5424                 trace_seq_printf(s, "now ts: %llu\n",
5425                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5426         }
5427
5428         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5429         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5430
5431         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5432         trace_seq_printf(s, "read events: %ld\n", cnt);
5433
5434         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5435
5436         kfree(s);
5437
5438         return count;
5439 }
5440
5441 static const struct file_operations tracing_stats_fops = {
5442         .open           = tracing_open_generic_tr,
5443         .read           = tracing_stats_read,
5444         .llseek         = generic_file_llseek,
5445         .release        = tracing_release_generic_tr,
5446 };
5447
5448 #ifdef CONFIG_DYNAMIC_FTRACE
5449
5450 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5451 {
5452         return 0;
5453 }
5454
5455 static ssize_t
5456 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5457                   size_t cnt, loff_t *ppos)
5458 {
5459         static char ftrace_dyn_info_buffer[1024];
5460         static DEFINE_MUTEX(dyn_info_mutex);
5461         unsigned long *p = filp->private_data;
5462         char *buf = ftrace_dyn_info_buffer;
5463         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5464         int r;
5465
5466         mutex_lock(&dyn_info_mutex);
5467         r = sprintf(buf, "%ld ", *p);
5468
5469         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5470         buf[r++] = '\n';
5471
5472         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5473
5474         mutex_unlock(&dyn_info_mutex);
5475
5476         return r;
5477 }
5478
5479 static const struct file_operations tracing_dyn_info_fops = {
5480         .open           = tracing_open_generic,
5481         .read           = tracing_read_dyn_info,
5482         .llseek         = generic_file_llseek,
5483 };
5484 #endif /* CONFIG_DYNAMIC_FTRACE */
5485
5486 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5487 static void
5488 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5489 {
5490         tracing_snapshot();
5491 }
5492
5493 static void
5494 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5495 {
5496         unsigned long *count = (long *)data;
5497
5498         if (!*count)
5499                 return;
5500
5501         if (*count != -1)
5502                 (*count)--;
5503
5504         tracing_snapshot();
5505 }
5506
5507 static int
5508 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5509                       struct ftrace_probe_ops *ops, void *data)
5510 {
5511         long count = (long)data;
5512
5513         seq_printf(m, "%ps:", (void *)ip);
5514
5515         seq_printf(m, "snapshot");
5516
5517         if (count == -1)
5518                 seq_printf(m, ":unlimited\n");
5519         else
5520                 seq_printf(m, ":count=%ld\n", count);
5521
5522         return 0;
5523 }
5524
5525 static struct ftrace_probe_ops snapshot_probe_ops = {
5526         .func                   = ftrace_snapshot,
5527         .print                  = ftrace_snapshot_print,
5528 };
5529
5530 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5531         .func                   = ftrace_count_snapshot,
5532         .print                  = ftrace_snapshot_print,
5533 };
5534
5535 static int
5536 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5537                                char *glob, char *cmd, char *param, int enable)
5538 {
5539         struct ftrace_probe_ops *ops;
5540         void *count = (void *)-1;
5541         char *number;
5542         int ret;
5543
5544         /* hash funcs only work with set_ftrace_filter */
5545         if (!enable)
5546                 return -EINVAL;
5547
5548         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5549
5550         if (glob[0] == '!') {
5551                 unregister_ftrace_function_probe_func(glob+1, ops);
5552                 return 0;
5553         }
5554
5555         if (!param)
5556                 goto out_reg;
5557
5558         number = strsep(&param, ":");
5559
5560         if (!strlen(number))
5561                 goto out_reg;
5562
5563         /*
5564          * We use the callback data field (which is a pointer)
5565          * as our counter.
5566          */
5567         ret = kstrtoul(number, 0, (unsigned long *)&count);
5568         if (ret)
5569                 return ret;
5570
5571  out_reg:
5572         ret = register_ftrace_function_probe(glob, ops, count);
5573
5574         if (ret >= 0)
5575                 alloc_snapshot(&global_trace);
5576
5577         return ret < 0 ? ret : 0;
5578 }
5579
5580 static struct ftrace_func_command ftrace_snapshot_cmd = {
5581         .name                   = "snapshot",
5582         .func                   = ftrace_trace_snapshot_callback,
5583 };
5584
5585 static __init int register_snapshot_cmd(void)
5586 {
5587         return register_ftrace_command(&ftrace_snapshot_cmd);
5588 }
5589 #else
5590 static inline __init int register_snapshot_cmd(void) { return 0; }
5591 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5592
5593 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5594 {
5595         if (tr->dir)
5596                 return tr->dir;
5597
5598         if (!debugfs_initialized())
5599                 return NULL;
5600
5601         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5602                 tr->dir = debugfs_create_dir("tracing", NULL);
5603
5604         if (!tr->dir)
5605                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5606
5607         return tr->dir;
5608 }
5609
5610 struct dentry *tracing_init_dentry(void)
5611 {
5612         return tracing_init_dentry_tr(&global_trace);
5613 }
5614
5615 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5616 {
5617         struct dentry *d_tracer;
5618
5619         if (tr->percpu_dir)
5620                 return tr->percpu_dir;
5621
5622         d_tracer = tracing_init_dentry_tr(tr);
5623         if (!d_tracer)
5624                 return NULL;
5625
5626         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5627
5628         WARN_ONCE(!tr->percpu_dir,
5629                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5630
5631         return tr->percpu_dir;
5632 }
5633
5634 static struct dentry *
5635 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5636                       void *data, long cpu, const struct file_operations *fops)
5637 {
5638         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5639
5640         if (ret) /* See tracing_get_cpu() */
5641                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5642         return ret;
5643 }
5644
5645 static void
5646 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5647 {
5648         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5649         struct dentry *d_cpu;
5650         char cpu_dir[30]; /* 30 characters should be more than enough */
5651
5652         if (!d_percpu)
5653                 return;
5654
5655         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5656         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5657         if (!d_cpu) {
5658                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5659                 return;
5660         }
5661
5662         /* per cpu trace_pipe */
5663         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5664                                 tr, cpu, &tracing_pipe_fops);
5665
5666         /* per cpu trace */
5667         trace_create_cpu_file("trace", 0644, d_cpu,
5668                                 tr, cpu, &tracing_fops);
5669
5670         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5671                                 tr, cpu, &tracing_buffers_fops);
5672
5673         trace_create_cpu_file("stats", 0444, d_cpu,
5674                                 tr, cpu, &tracing_stats_fops);
5675
5676         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5677                                 tr, cpu, &tracing_entries_fops);
5678
5679 #ifdef CONFIG_TRACER_SNAPSHOT
5680         trace_create_cpu_file("snapshot", 0644, d_cpu,
5681                                 tr, cpu, &snapshot_fops);
5682
5683         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5684                                 tr, cpu, &snapshot_raw_fops);
5685 #endif
5686 }
5687
5688 #ifdef CONFIG_FTRACE_SELFTEST
5689 /* Let selftest have access to static functions in this file */
5690 #include "trace_selftest.c"
5691 #endif
5692
5693 struct trace_option_dentry {
5694         struct tracer_opt               *opt;
5695         struct tracer_flags             *flags;
5696         struct trace_array              *tr;
5697         struct dentry                   *entry;
5698 };
5699
5700 static ssize_t
5701 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5702                         loff_t *ppos)
5703 {
5704         struct trace_option_dentry *topt = filp->private_data;
5705         char *buf;
5706
5707         if (topt->flags->val & topt->opt->bit)
5708                 buf = "1\n";
5709         else
5710                 buf = "0\n";
5711
5712         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5713 }
5714
5715 static ssize_t
5716 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5717                          loff_t *ppos)
5718 {
5719         struct trace_option_dentry *topt = filp->private_data;
5720         unsigned long val;
5721         int ret;
5722
5723         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5724         if (ret)
5725                 return ret;
5726
5727         if (val != 0 && val != 1)
5728                 return -EINVAL;
5729
5730         if (!!(topt->flags->val & topt->opt->bit) != val) {
5731                 mutex_lock(&trace_types_lock);
5732                 ret = __set_tracer_option(topt->tr->current_trace, topt->flags,
5733                                           topt->opt, !val);
5734                 mutex_unlock(&trace_types_lock);
5735                 if (ret)
5736                         return ret;
5737         }
5738
5739         *ppos += cnt;
5740
5741         return cnt;
5742 }
5743
5744
5745 static const struct file_operations trace_options_fops = {
5746         .open = tracing_open_generic,
5747         .read = trace_options_read,
5748         .write = trace_options_write,
5749         .llseek = generic_file_llseek,
5750 };
5751
5752 static ssize_t
5753 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5754                         loff_t *ppos)
5755 {
5756         long index = (long)filp->private_data;
5757         char *buf;
5758
5759         if (trace_flags & (1 << index))
5760                 buf = "1\n";
5761         else
5762                 buf = "0\n";
5763
5764         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5765 }
5766
5767 static ssize_t
5768 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5769                          loff_t *ppos)
5770 {
5771         struct trace_array *tr = &global_trace;
5772         long index = (long)filp->private_data;
5773         unsigned long val;
5774         int ret;
5775
5776         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5777         if (ret)
5778                 return ret;
5779
5780         if (val != 0 && val != 1)
5781                 return -EINVAL;
5782
5783         mutex_lock(&trace_types_lock);
5784         ret = set_tracer_flag(tr, 1 << index, val);
5785         mutex_unlock(&trace_types_lock);
5786
5787         if (ret < 0)
5788                 return ret;
5789
5790         *ppos += cnt;
5791
5792         return cnt;
5793 }
5794
5795 static const struct file_operations trace_options_core_fops = {
5796         .open = tracing_open_generic,
5797         .read = trace_options_core_read,
5798         .write = trace_options_core_write,
5799         .llseek = generic_file_llseek,
5800 };
5801
5802 struct dentry *trace_create_file(const char *name,
5803                                  umode_t mode,
5804                                  struct dentry *parent,
5805                                  void *data,
5806                                  const struct file_operations *fops)
5807 {
5808         struct dentry *ret;
5809
5810         ret = debugfs_create_file(name, mode, parent, data, fops);
5811         if (!ret)
5812                 pr_warning("Could not create debugfs '%s' entry\n", name);
5813
5814         return ret;
5815 }
5816
5817
5818 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
5819 {
5820         struct dentry *d_tracer;
5821
5822         if (tr->options)
5823                 return tr->options;
5824
5825         d_tracer = tracing_init_dentry_tr(tr);
5826         if (!d_tracer)
5827                 return NULL;
5828
5829         tr->options = debugfs_create_dir("options", d_tracer);
5830         if (!tr->options) {
5831                 pr_warning("Could not create debugfs directory 'options'\n");
5832                 return NULL;
5833         }
5834
5835         return tr->options;
5836 }
5837
5838 static void
5839 create_trace_option_file(struct trace_array *tr,
5840                          struct trace_option_dentry *topt,
5841                          struct tracer_flags *flags,
5842                          struct tracer_opt *opt)
5843 {
5844         struct dentry *t_options;
5845
5846         t_options = trace_options_init_dentry(tr);
5847         if (!t_options)
5848                 return;
5849
5850         topt->flags = flags;
5851         topt->opt = opt;
5852         topt->tr = tr;
5853
5854         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
5855                                     &trace_options_fops);
5856
5857 }
5858
5859 static struct trace_option_dentry *
5860 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
5861 {
5862         struct trace_option_dentry *topts;
5863         struct tracer_flags *flags;
5864         struct tracer_opt *opts;
5865         int cnt;
5866
5867         if (!tracer)
5868                 return NULL;
5869
5870         flags = tracer->flags;
5871
5872         if (!flags || !flags->opts)
5873                 return NULL;
5874
5875         opts = flags->opts;
5876
5877         for (cnt = 0; opts[cnt].name; cnt++)
5878                 ;
5879
5880         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
5881         if (!topts)
5882                 return NULL;
5883
5884         for (cnt = 0; opts[cnt].name; cnt++)
5885                 create_trace_option_file(tr, &topts[cnt], flags,
5886                                          &opts[cnt]);
5887
5888         return topts;
5889 }
5890
5891 static void
5892 destroy_trace_option_files(struct trace_option_dentry *topts)
5893 {
5894         int cnt;
5895
5896         if (!topts)
5897                 return;
5898
5899         for (cnt = 0; topts[cnt].opt; cnt++) {
5900                 if (topts[cnt].entry)
5901                         debugfs_remove(topts[cnt].entry);
5902         }
5903
5904         kfree(topts);
5905 }
5906
5907 static struct dentry *
5908 create_trace_option_core_file(struct trace_array *tr,
5909                               const char *option, long index)
5910 {
5911         struct dentry *t_options;
5912
5913         t_options = trace_options_init_dentry(tr);
5914         if (!t_options)
5915                 return NULL;
5916
5917         return trace_create_file(option, 0644, t_options, (void *)index,
5918                                     &trace_options_core_fops);
5919 }
5920
5921 static __init void create_trace_options_dir(struct trace_array *tr)
5922 {
5923         struct dentry *t_options;
5924         int i;
5925
5926         t_options = trace_options_init_dentry(tr);
5927         if (!t_options)
5928                 return;
5929
5930         for (i = 0; trace_options[i]; i++)
5931                 create_trace_option_core_file(tr, trace_options[i], i);
5932 }
5933
5934 static ssize_t
5935 rb_simple_read(struct file *filp, char __user *ubuf,
5936                size_t cnt, loff_t *ppos)
5937 {
5938         struct trace_array *tr = filp->private_data;
5939         char buf[64];
5940         int r;
5941
5942         r = tracer_tracing_is_on(tr);
5943         r = sprintf(buf, "%d\n", r);
5944
5945         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5946 }
5947
5948 static ssize_t
5949 rb_simple_write(struct file *filp, const char __user *ubuf,
5950                 size_t cnt, loff_t *ppos)
5951 {
5952         struct trace_array *tr = filp->private_data;
5953         struct ring_buffer *buffer = tr->trace_buffer.buffer;
5954         unsigned long val;
5955         int ret;
5956
5957         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5958         if (ret)
5959                 return ret;
5960
5961         if (buffer) {
5962                 mutex_lock(&trace_types_lock);
5963                 if (val) {
5964                         tracer_tracing_on(tr);
5965                         if (tr->current_trace->start)
5966                                 tr->current_trace->start(tr);
5967                 } else {
5968                         tracer_tracing_off(tr);
5969                         if (tr->current_trace->stop)
5970                                 tr->current_trace->stop(tr);
5971                 }
5972                 mutex_unlock(&trace_types_lock);
5973         }
5974
5975         (*ppos)++;
5976
5977         return cnt;
5978 }
5979
5980 static const struct file_operations rb_simple_fops = {
5981         .open           = tracing_open_generic_tr,
5982         .read           = rb_simple_read,
5983         .write          = rb_simple_write,
5984         .release        = tracing_release_generic_tr,
5985         .llseek         = default_llseek,
5986 };
5987
5988 struct dentry *trace_instance_dir;
5989
5990 static void
5991 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
5992
5993 static int
5994 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
5995 {
5996         enum ring_buffer_flags rb_flags;
5997
5998         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
5999
6000         buf->tr = tr;
6001
6002         buf->buffer = ring_buffer_alloc(size, rb_flags);
6003         if (!buf->buffer)
6004                 return -ENOMEM;
6005
6006         buf->data = alloc_percpu(struct trace_array_cpu);
6007         if (!buf->data) {
6008                 ring_buffer_free(buf->buffer);
6009                 return -ENOMEM;
6010         }
6011
6012         /* Allocate the first page for all buffers */
6013         set_buffer_entries(&tr->trace_buffer,
6014                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6015
6016         return 0;
6017 }
6018
6019 static int allocate_trace_buffers(struct trace_array *tr, int size)
6020 {
6021         int ret;
6022
6023         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6024         if (ret)
6025                 return ret;
6026
6027 #ifdef CONFIG_TRACER_MAX_TRACE
6028         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6029                                     allocate_snapshot ? size : 1);
6030         if (WARN_ON(ret)) {
6031                 ring_buffer_free(tr->trace_buffer.buffer);
6032                 free_percpu(tr->trace_buffer.data);
6033                 return -ENOMEM;
6034         }
6035         tr->allocated_snapshot = allocate_snapshot;
6036
6037         /*
6038          * Only the top level trace array gets its snapshot allocated
6039          * from the kernel command line.
6040          */
6041         allocate_snapshot = false;
6042 #endif
6043         return 0;
6044 }
6045
6046 static int new_instance_create(const char *name)
6047 {
6048         struct trace_array *tr;
6049         int ret;
6050
6051         mutex_lock(&trace_types_lock);
6052
6053         ret = -EEXIST;
6054         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6055                 if (tr->name && strcmp(tr->name, name) == 0)
6056                         goto out_unlock;
6057         }
6058
6059         ret = -ENOMEM;
6060         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6061         if (!tr)
6062                 goto out_unlock;
6063
6064         tr->name = kstrdup(name, GFP_KERNEL);
6065         if (!tr->name)
6066                 goto out_free_tr;
6067
6068         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6069                 goto out_free_tr;
6070
6071         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6072
6073         raw_spin_lock_init(&tr->start_lock);
6074
6075         tr->current_trace = &nop_trace;
6076
6077         INIT_LIST_HEAD(&tr->systems);
6078         INIT_LIST_HEAD(&tr->events);
6079
6080         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6081                 goto out_free_tr;
6082
6083         tr->dir = debugfs_create_dir(name, trace_instance_dir);
6084         if (!tr->dir)
6085                 goto out_free_tr;
6086
6087         ret = event_trace_add_tracer(tr->dir, tr);
6088         if (ret) {
6089                 debugfs_remove_recursive(tr->dir);
6090                 goto out_free_tr;
6091         }
6092
6093         init_tracer_debugfs(tr, tr->dir);
6094
6095         list_add(&tr->list, &ftrace_trace_arrays);
6096
6097         mutex_unlock(&trace_types_lock);
6098
6099         return 0;
6100
6101  out_free_tr:
6102         if (tr->trace_buffer.buffer)
6103                 ring_buffer_free(tr->trace_buffer.buffer);
6104         free_cpumask_var(tr->tracing_cpumask);
6105         kfree(tr->name);
6106         kfree(tr);
6107
6108  out_unlock:
6109         mutex_unlock(&trace_types_lock);
6110
6111         return ret;
6112
6113 }
6114
6115 static int instance_delete(const char *name)
6116 {
6117         struct trace_array *tr;
6118         int found = 0;
6119         int ret;
6120
6121         mutex_lock(&trace_types_lock);
6122
6123         ret = -ENODEV;
6124         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6125                 if (tr->name && strcmp(tr->name, name) == 0) {
6126                         found = 1;
6127                         break;
6128                 }
6129         }
6130         if (!found)
6131                 goto out_unlock;
6132
6133         ret = -EBUSY;
6134         if (tr->ref)
6135                 goto out_unlock;
6136
6137         list_del(&tr->list);
6138
6139         event_trace_del_tracer(tr);
6140         debugfs_remove_recursive(tr->dir);
6141         free_percpu(tr->trace_buffer.data);
6142         ring_buffer_free(tr->trace_buffer.buffer);
6143
6144         kfree(tr->name);
6145         kfree(tr);
6146
6147         ret = 0;
6148
6149  out_unlock:
6150         mutex_unlock(&trace_types_lock);
6151
6152         return ret;
6153 }
6154
6155 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6156 {
6157         struct dentry *parent;
6158         int ret;
6159
6160         /* Paranoid: Make sure the parent is the "instances" directory */
6161         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6162         if (WARN_ON_ONCE(parent != trace_instance_dir))
6163                 return -ENOENT;
6164
6165         /*
6166          * The inode mutex is locked, but debugfs_create_dir() will also
6167          * take the mutex. As the instances directory can not be destroyed
6168          * or changed in any other way, it is safe to unlock it, and
6169          * let the dentry try. If two users try to make the same dir at
6170          * the same time, then the new_instance_create() will determine the
6171          * winner.
6172          */
6173         mutex_unlock(&inode->i_mutex);
6174
6175         ret = new_instance_create(dentry->d_iname);
6176
6177         mutex_lock(&inode->i_mutex);
6178
6179         return ret;
6180 }
6181
6182 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6183 {
6184         struct dentry *parent;
6185         int ret;
6186
6187         /* Paranoid: Make sure the parent is the "instances" directory */
6188         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6189         if (WARN_ON_ONCE(parent != trace_instance_dir))
6190                 return -ENOENT;
6191
6192         /* The caller did a dget() on dentry */
6193         mutex_unlock(&dentry->d_inode->i_mutex);
6194
6195         /*
6196          * The inode mutex is locked, but debugfs_create_dir() will also
6197          * take the mutex. As the instances directory can not be destroyed
6198          * or changed in any other way, it is safe to unlock it, and
6199          * let the dentry try. If two users try to make the same dir at
6200          * the same time, then the instance_delete() will determine the
6201          * winner.
6202          */
6203         mutex_unlock(&inode->i_mutex);
6204
6205         ret = instance_delete(dentry->d_iname);
6206
6207         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6208         mutex_lock(&dentry->d_inode->i_mutex);
6209
6210         return ret;
6211 }
6212
6213 static const struct inode_operations instance_dir_inode_operations = {
6214         .lookup         = simple_lookup,
6215         .mkdir          = instance_mkdir,
6216         .rmdir          = instance_rmdir,
6217 };
6218
6219 static __init void create_trace_instances(struct dentry *d_tracer)
6220 {
6221         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6222         if (WARN_ON(!trace_instance_dir))
6223                 return;
6224
6225         /* Hijack the dir inode operations, to allow mkdir */
6226         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6227 }
6228
6229 static void
6230 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6231 {
6232         int cpu;
6233
6234         trace_create_file("tracing_cpumask", 0644, d_tracer,
6235                           tr, &tracing_cpumask_fops);
6236
6237         trace_create_file("trace_options", 0644, d_tracer,
6238                           tr, &tracing_iter_fops);
6239
6240         trace_create_file("trace", 0644, d_tracer,
6241                           tr, &tracing_fops);
6242
6243         trace_create_file("trace_pipe", 0444, d_tracer,
6244                           tr, &tracing_pipe_fops);
6245
6246         trace_create_file("buffer_size_kb", 0644, d_tracer,
6247                           tr, &tracing_entries_fops);
6248
6249         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6250                           tr, &tracing_total_entries_fops);
6251
6252         trace_create_file("free_buffer", 0200, d_tracer,
6253                           tr, &tracing_free_buffer_fops);
6254
6255         trace_create_file("trace_marker", 0220, d_tracer,
6256                           tr, &tracing_mark_fops);
6257
6258         trace_create_file("trace_clock", 0644, d_tracer, tr,
6259                           &trace_clock_fops);
6260
6261         trace_create_file("tracing_on", 0644, d_tracer,
6262                           tr, &rb_simple_fops);
6263
6264 #ifdef CONFIG_TRACER_SNAPSHOT
6265         trace_create_file("snapshot", 0644, d_tracer,
6266                           tr, &snapshot_fops);
6267 #endif
6268
6269         for_each_tracing_cpu(cpu)
6270                 tracing_init_debugfs_percpu(tr, cpu);
6271
6272 }
6273
6274 static __init int tracer_init_debugfs(void)
6275 {
6276         struct dentry *d_tracer;
6277
6278         trace_access_lock_init();
6279
6280         d_tracer = tracing_init_dentry();
6281         if (!d_tracer)
6282                 return 0;
6283
6284         init_tracer_debugfs(&global_trace, d_tracer);
6285
6286         trace_create_file("available_tracers", 0444, d_tracer,
6287                         &global_trace, &show_traces_fops);
6288
6289         trace_create_file("current_tracer", 0644, d_tracer,
6290                         &global_trace, &set_tracer_fops);
6291
6292 #ifdef CONFIG_TRACER_MAX_TRACE
6293         trace_create_file("tracing_max_latency", 0644, d_tracer,
6294                         &tracing_max_latency, &tracing_max_lat_fops);
6295 #endif
6296
6297         trace_create_file("tracing_thresh", 0644, d_tracer,
6298                         &tracing_thresh, &tracing_max_lat_fops);
6299
6300         trace_create_file("README", 0444, d_tracer,
6301                         NULL, &tracing_readme_fops);
6302
6303         trace_create_file("saved_cmdlines", 0444, d_tracer,
6304                         NULL, &tracing_saved_cmdlines_fops);
6305
6306 #ifdef CONFIG_DYNAMIC_FTRACE
6307         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6308                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6309 #endif
6310
6311         create_trace_instances(d_tracer);
6312
6313         create_trace_options_dir(&global_trace);
6314
6315         return 0;
6316 }
6317
6318 static int trace_panic_handler(struct notifier_block *this,
6319                                unsigned long event, void *unused)
6320 {
6321         if (ftrace_dump_on_oops)
6322                 ftrace_dump(ftrace_dump_on_oops);
6323         return NOTIFY_OK;
6324 }
6325
6326 static struct notifier_block trace_panic_notifier = {
6327         .notifier_call  = trace_panic_handler,
6328         .next           = NULL,
6329         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6330 };
6331
6332 static int trace_die_handler(struct notifier_block *self,
6333                              unsigned long val,
6334                              void *data)
6335 {
6336         switch (val) {
6337         case DIE_OOPS:
6338                 if (ftrace_dump_on_oops)
6339                         ftrace_dump(ftrace_dump_on_oops);
6340                 break;
6341         default:
6342                 break;
6343         }
6344         return NOTIFY_OK;
6345 }
6346
6347 static struct notifier_block trace_die_notifier = {
6348         .notifier_call = trace_die_handler,
6349         .priority = 200
6350 };
6351
6352 /*
6353  * printk is set to max of 1024, we really don't need it that big.
6354  * Nothing should be printing 1000 characters anyway.
6355  */
6356 #define TRACE_MAX_PRINT         1000
6357
6358 /*
6359  * Define here KERN_TRACE so that we have one place to modify
6360  * it if we decide to change what log level the ftrace dump
6361  * should be at.
6362  */
6363 #define KERN_TRACE              KERN_EMERG
6364
6365 void
6366 trace_printk_seq(struct trace_seq *s)
6367 {
6368         /* Probably should print a warning here. */
6369         if (s->len >= TRACE_MAX_PRINT)
6370                 s->len = TRACE_MAX_PRINT;
6371
6372         /* should be zero ended, but we are paranoid. */
6373         s->buffer[s->len] = 0;
6374
6375         printk(KERN_TRACE "%s", s->buffer);
6376
6377         trace_seq_init(s);
6378 }
6379
6380 void trace_init_global_iter(struct trace_iterator *iter)
6381 {
6382         iter->tr = &global_trace;
6383         iter->trace = iter->tr->current_trace;
6384         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6385         iter->trace_buffer = &global_trace.trace_buffer;
6386
6387         if (iter->trace && iter->trace->open)
6388                 iter->trace->open(iter);
6389
6390         /* Annotate start of buffers if we had overruns */
6391         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6392                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6393
6394         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6395         if (trace_clocks[iter->tr->clock_id].in_ns)
6396                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6397 }
6398
6399 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6400 {
6401         /* use static because iter can be a bit big for the stack */
6402         static struct trace_iterator iter;
6403         static atomic_t dump_running;
6404         unsigned int old_userobj;
6405         unsigned long flags;
6406         int cnt = 0, cpu;
6407
6408         /* Only allow one dump user at a time. */
6409         if (atomic_inc_return(&dump_running) != 1) {
6410                 atomic_dec(&dump_running);
6411                 return;
6412         }
6413
6414         /*
6415          * Always turn off tracing when we dump.
6416          * We don't need to show trace output of what happens
6417          * between multiple crashes.
6418          *
6419          * If the user does a sysrq-z, then they can re-enable
6420          * tracing with echo 1 > tracing_on.
6421          */
6422         tracing_off();
6423
6424         local_irq_save(flags);
6425
6426         /* Simulate the iterator */
6427         trace_init_global_iter(&iter);
6428
6429         for_each_tracing_cpu(cpu) {
6430                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6431         }
6432
6433         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6434
6435         /* don't look at user memory in panic mode */
6436         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6437
6438         switch (oops_dump_mode) {
6439         case DUMP_ALL:
6440                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6441                 break;
6442         case DUMP_ORIG:
6443                 iter.cpu_file = raw_smp_processor_id();
6444                 break;
6445         case DUMP_NONE:
6446                 goto out_enable;
6447         default:
6448                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6449                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6450         }
6451
6452         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6453
6454         /* Did function tracer already get disabled? */
6455         if (ftrace_is_dead()) {
6456                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6457                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6458         }
6459
6460         /*
6461          * We need to stop all tracing on all CPUS to read the
6462          * the next buffer. This is a bit expensive, but is
6463          * not done often. We fill all what we can read,
6464          * and then release the locks again.
6465          */
6466
6467         while (!trace_empty(&iter)) {
6468
6469                 if (!cnt)
6470                         printk(KERN_TRACE "---------------------------------\n");
6471
6472                 cnt++;
6473
6474                 /* reset all but tr, trace, and overruns */
6475                 memset(&iter.seq, 0,
6476                        sizeof(struct trace_iterator) -
6477                        offsetof(struct trace_iterator, seq));
6478                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6479                 iter.pos = -1;
6480
6481                 if (trace_find_next_entry_inc(&iter) != NULL) {
6482                         int ret;
6483
6484                         ret = print_trace_line(&iter);
6485                         if (ret != TRACE_TYPE_NO_CONSUME)
6486                                 trace_consume(&iter);
6487                 }
6488                 touch_nmi_watchdog();
6489
6490                 trace_printk_seq(&iter.seq);
6491         }
6492
6493         if (!cnt)
6494                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6495         else
6496                 printk(KERN_TRACE "---------------------------------\n");
6497
6498  out_enable:
6499         trace_flags |= old_userobj;
6500
6501         for_each_tracing_cpu(cpu) {
6502                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6503         }
6504         atomic_dec(&dump_running);
6505         local_irq_restore(flags);
6506 }
6507 EXPORT_SYMBOL_GPL(ftrace_dump);
6508
6509 __init static int tracer_alloc_buffers(void)
6510 {
6511         int ring_buf_size;
6512         int ret = -ENOMEM;
6513
6514
6515         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6516                 goto out;
6517
6518         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6519                 goto out_free_buffer_mask;
6520
6521         /* Only allocate trace_printk buffers if a trace_printk exists */
6522         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6523                 /* Must be called before global_trace.buffer is allocated */
6524                 trace_printk_init_buffers();
6525
6526         /* To save memory, keep the ring buffer size to its minimum */
6527         if (ring_buffer_expanded)
6528                 ring_buf_size = trace_buf_size;
6529         else
6530                 ring_buf_size = 1;
6531
6532         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6533         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6534
6535         raw_spin_lock_init(&global_trace.start_lock);
6536
6537         /* Used for event triggers */
6538         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
6539         if (!temp_buffer)
6540                 goto out_free_cpumask;
6541
6542         /* TODO: make the number of buffers hot pluggable with CPUS */
6543         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6544                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6545                 WARN_ON(1);
6546                 goto out_free_temp_buffer;
6547         }
6548
6549         if (global_trace.buffer_disabled)
6550                 tracing_off();
6551
6552         trace_init_cmdlines();
6553
6554         /*
6555          * register_tracer() might reference current_trace, so it
6556          * needs to be set before we register anything. This is
6557          * just a bootstrap of current_trace anyway.
6558          */
6559         global_trace.current_trace = &nop_trace;
6560
6561         register_tracer(&nop_trace);
6562
6563         /* All seems OK, enable tracing */
6564         tracing_disabled = 0;
6565
6566         atomic_notifier_chain_register(&panic_notifier_list,
6567                                        &trace_panic_notifier);
6568
6569         register_die_notifier(&trace_die_notifier);
6570
6571         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6572
6573         INIT_LIST_HEAD(&global_trace.systems);
6574         INIT_LIST_HEAD(&global_trace.events);
6575         list_add(&global_trace.list, &ftrace_trace_arrays);
6576
6577         while (trace_boot_options) {
6578                 char *option;
6579
6580                 option = strsep(&trace_boot_options, ",");
6581                 trace_set_options(&global_trace, option);
6582         }
6583
6584         register_snapshot_cmd();
6585
6586         return 0;
6587
6588 out_free_temp_buffer:
6589         ring_buffer_free(temp_buffer);
6590 out_free_cpumask:
6591         free_percpu(global_trace.trace_buffer.data);
6592 #ifdef CONFIG_TRACER_MAX_TRACE
6593         free_percpu(global_trace.max_buffer.data);
6594 #endif
6595         free_cpumask_var(global_trace.tracing_cpumask);
6596 out_free_buffer_mask:
6597         free_cpumask_var(tracing_buffer_mask);
6598 out:
6599         return ret;
6600 }
6601
6602 __init static int clear_boot_tracer(void)
6603 {
6604         /*
6605          * The default tracer at boot buffer is an init section.
6606          * This function is called in lateinit. If we did not
6607          * find the boot tracer, then clear it out, to prevent
6608          * later registration from accessing the buffer that is
6609          * about to be freed.
6610          */
6611         if (!default_bootup_tracer)
6612                 return 0;
6613
6614         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6615                default_bootup_tracer);
6616         default_bootup_tracer = NULL;
6617
6618         return 0;
6619 }
6620
6621 early_initcall(tracer_alloc_buffers);
6622 fs_initcall(tracer_init_debugfs);
6623 late_initcall(clear_boot_tracer);