fd21e601a891f62e7bd87934b07eda865e82d98e
[platform/adaptation/renesas_rcar/renesas_kernel.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int dummy_set_flag(u32 old_flags, u32 bit, int set)
77 {
78         return 0;
79 }
80
81 /*
82  * To prevent the comm cache from being overwritten when no
83  * tracing is active, only save the comm when a trace event
84  * occurred.
85  */
86 static DEFINE_PER_CPU(bool, trace_cmdline_save);
87
88 /*
89  * Kill all tracing for good (never come back).
90  * It is initialized to 1 but will turn to zero if the initialization
91  * of the tracer is successful. But that is the only place that sets
92  * this back to zero.
93  */
94 static int tracing_disabled = 1;
95
96 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 static int tracing_set_tracer(const char *buf);
122
123 #define MAX_TRACER_SIZE         100
124 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
125 static char *default_bootup_tracer;
126
127 static bool allocate_snapshot;
128
129 static int __init set_cmdline_ftrace(char *str)
130 {
131         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
132         default_bootup_tracer = bootup_tracer_buf;
133         /* We are using ftrace early, expand it */
134         ring_buffer_expanded = true;
135         return 1;
136 }
137 __setup("ftrace=", set_cmdline_ftrace);
138
139 static int __init set_ftrace_dump_on_oops(char *str)
140 {
141         if (*str++ != '=' || !*str) {
142                 ftrace_dump_on_oops = DUMP_ALL;
143                 return 1;
144         }
145
146         if (!strcmp("orig_cpu", str)) {
147                 ftrace_dump_on_oops = DUMP_ORIG;
148                 return 1;
149         }
150
151         return 0;
152 }
153 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
154
155 static int __init stop_trace_on_warning(char *str)
156 {
157         __disable_trace_on_warning = 1;
158         return 1;
159 }
160 __setup("traceoff_on_warning=", stop_trace_on_warning);
161
162 static int __init boot_alloc_snapshot(char *str)
163 {
164         allocate_snapshot = true;
165         /* We also need the main ring buffer expanded */
166         ring_buffer_expanded = true;
167         return 1;
168 }
169 __setup("alloc_snapshot", boot_alloc_snapshot);
170
171
172 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
173 static char *trace_boot_options __initdata;
174
175 static int __init set_trace_boot_options(char *str)
176 {
177         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
178         trace_boot_options = trace_boot_options_buf;
179         return 0;
180 }
181 __setup("trace_options=", set_trace_boot_options);
182
183
184 unsigned long long ns2usecs(cycle_t nsec)
185 {
186         nsec += 500;
187         do_div(nsec, 1000);
188         return nsec;
189 }
190
191 /*
192  * The global_trace is the descriptor that holds the tracing
193  * buffers for the live tracing. For each CPU, it contains
194  * a link list of pages that will store trace entries. The
195  * page descriptor of the pages in the memory is used to hold
196  * the link list by linking the lru item in the page descriptor
197  * to each of the pages in the buffer per CPU.
198  *
199  * For each active CPU there is a data field that holds the
200  * pages for the buffer for that CPU. Each CPU has the same number
201  * of pages allocated for its buffer.
202  */
203 static struct trace_array       global_trace;
204
205 LIST_HEAD(ftrace_trace_arrays);
206
207 int trace_array_get(struct trace_array *this_tr)
208 {
209         struct trace_array *tr;
210         int ret = -ENODEV;
211
212         mutex_lock(&trace_types_lock);
213         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
214                 if (tr == this_tr) {
215                         tr->ref++;
216                         ret = 0;
217                         break;
218                 }
219         }
220         mutex_unlock(&trace_types_lock);
221
222         return ret;
223 }
224
225 static void __trace_array_put(struct trace_array *this_tr)
226 {
227         WARN_ON(!this_tr->ref);
228         this_tr->ref--;
229 }
230
231 void trace_array_put(struct trace_array *this_tr)
232 {
233         mutex_lock(&trace_types_lock);
234         __trace_array_put(this_tr);
235         mutex_unlock(&trace_types_lock);
236 }
237
238 int filter_check_discard(struct ftrace_event_file *file, void *rec,
239                          struct ring_buffer *buffer,
240                          struct ring_buffer_event *event)
241 {
242         if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
243             !filter_match_preds(file->filter, rec)) {
244                 ring_buffer_discard_commit(buffer, event);
245                 return 1;
246         }
247
248         return 0;
249 }
250 EXPORT_SYMBOL_GPL(filter_check_discard);
251
252 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
253                               struct ring_buffer *buffer,
254                               struct ring_buffer_event *event)
255 {
256         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
257             !filter_match_preds(call->filter, rec)) {
258                 ring_buffer_discard_commit(buffer, event);
259                 return 1;
260         }
261
262         return 0;
263 }
264 EXPORT_SYMBOL_GPL(call_filter_check_discard);
265
266 cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
267 {
268         u64 ts;
269
270         /* Early boot up does not have a buffer yet */
271         if (!buf->buffer)
272                 return trace_clock_local();
273
274         ts = ring_buffer_time_stamp(buf->buffer, cpu);
275         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
276
277         return ts;
278 }
279
280 cycle_t ftrace_now(int cpu)
281 {
282         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
283 }
284
285 /**
286  * tracing_is_enabled - Show if global_trace has been disabled
287  *
288  * Shows if the global trace has been enabled or not. It uses the
289  * mirror flag "buffer_disabled" to be used in fast paths such as for
290  * the irqsoff tracer. But it may be inaccurate due to races. If you
291  * need to know the accurate state, use tracing_is_on() which is a little
292  * slower, but accurate.
293  */
294 int tracing_is_enabled(void)
295 {
296         /*
297          * For quick access (irqsoff uses this in fast path), just
298          * return the mirror variable of the state of the ring buffer.
299          * It's a little racy, but we don't really care.
300          */
301         smp_rmb();
302         return !global_trace.buffer_disabled;
303 }
304
305 /*
306  * trace_buf_size is the size in bytes that is allocated
307  * for a buffer. Note, the number of bytes is always rounded
308  * to page size.
309  *
310  * This number is purposely set to a low number of 16384.
311  * If the dump on oops happens, it will be much appreciated
312  * to not have to wait for all that output. Anyway this can be
313  * boot time and run time configurable.
314  */
315 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
316
317 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
318
319 /* trace_types holds a link list of available tracers. */
320 static struct tracer            *trace_types __read_mostly;
321
322 /*
323  * trace_types_lock is used to protect the trace_types list.
324  */
325 DEFINE_MUTEX(trace_types_lock);
326
327 /*
328  * serialize the access of the ring buffer
329  *
330  * ring buffer serializes readers, but it is low level protection.
331  * The validity of the events (which returns by ring_buffer_peek() ..etc)
332  * are not protected by ring buffer.
333  *
334  * The content of events may become garbage if we allow other process consumes
335  * these events concurrently:
336  *   A) the page of the consumed events may become a normal page
337  *      (not reader page) in ring buffer, and this page will be rewrited
338  *      by events producer.
339  *   B) The page of the consumed events may become a page for splice_read,
340  *      and this page will be returned to system.
341  *
342  * These primitives allow multi process access to different cpu ring buffer
343  * concurrently.
344  *
345  * These primitives don't distinguish read-only and read-consume access.
346  * Multi read-only access are also serialized.
347  */
348
349 #ifdef CONFIG_SMP
350 static DECLARE_RWSEM(all_cpu_access_lock);
351 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
352
353 static inline void trace_access_lock(int cpu)
354 {
355         if (cpu == RING_BUFFER_ALL_CPUS) {
356                 /* gain it for accessing the whole ring buffer. */
357                 down_write(&all_cpu_access_lock);
358         } else {
359                 /* gain it for accessing a cpu ring buffer. */
360
361                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
362                 down_read(&all_cpu_access_lock);
363
364                 /* Secondly block other access to this @cpu ring buffer. */
365                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
366         }
367 }
368
369 static inline void trace_access_unlock(int cpu)
370 {
371         if (cpu == RING_BUFFER_ALL_CPUS) {
372                 up_write(&all_cpu_access_lock);
373         } else {
374                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
375                 up_read(&all_cpu_access_lock);
376         }
377 }
378
379 static inline void trace_access_lock_init(void)
380 {
381         int cpu;
382
383         for_each_possible_cpu(cpu)
384                 mutex_init(&per_cpu(cpu_access_lock, cpu));
385 }
386
387 #else
388
389 static DEFINE_MUTEX(access_lock);
390
391 static inline void trace_access_lock(int cpu)
392 {
393         (void)cpu;
394         mutex_lock(&access_lock);
395 }
396
397 static inline void trace_access_unlock(int cpu)
398 {
399         (void)cpu;
400         mutex_unlock(&access_lock);
401 }
402
403 static inline void trace_access_lock_init(void)
404 {
405 }
406
407 #endif
408
409 /* trace_flags holds trace_options default values */
410 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
411         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
412         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
413         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
414
415 static void tracer_tracing_on(struct trace_array *tr)
416 {
417         if (tr->trace_buffer.buffer)
418                 ring_buffer_record_on(tr->trace_buffer.buffer);
419         /*
420          * This flag is looked at when buffers haven't been allocated
421          * yet, or by some tracers (like irqsoff), that just want to
422          * know if the ring buffer has been disabled, but it can handle
423          * races of where it gets disabled but we still do a record.
424          * As the check is in the fast path of the tracers, it is more
425          * important to be fast than accurate.
426          */
427         tr->buffer_disabled = 0;
428         /* Make the flag seen by readers */
429         smp_wmb();
430 }
431
432 /**
433  * tracing_on - enable tracing buffers
434  *
435  * This function enables tracing buffers that may have been
436  * disabled with tracing_off.
437  */
438 void tracing_on(void)
439 {
440         tracer_tracing_on(&global_trace);
441 }
442 EXPORT_SYMBOL_GPL(tracing_on);
443
444 /**
445  * __trace_puts - write a constant string into the trace buffer.
446  * @ip:    The address of the caller
447  * @str:   The constant string to write
448  * @size:  The size of the string.
449  */
450 int __trace_puts(unsigned long ip, const char *str, int size)
451 {
452         struct ring_buffer_event *event;
453         struct ring_buffer *buffer;
454         struct print_entry *entry;
455         unsigned long irq_flags;
456         int alloc;
457
458         if (unlikely(tracing_selftest_running || tracing_disabled))
459                 return 0;
460
461         alloc = sizeof(*entry) + size + 2; /* possible \n added */
462
463         local_save_flags(irq_flags);
464         buffer = global_trace.trace_buffer.buffer;
465         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
466                                           irq_flags, preempt_count());
467         if (!event)
468                 return 0;
469
470         entry = ring_buffer_event_data(event);
471         entry->ip = ip;
472
473         memcpy(&entry->buf, str, size);
474
475         /* Add a newline if necessary */
476         if (entry->buf[size - 1] != '\n') {
477                 entry->buf[size] = '\n';
478                 entry->buf[size + 1] = '\0';
479         } else
480                 entry->buf[size] = '\0';
481
482         __buffer_unlock_commit(buffer, event);
483
484         return size;
485 }
486 EXPORT_SYMBOL_GPL(__trace_puts);
487
488 /**
489  * __trace_bputs - write the pointer to a constant string into trace buffer
490  * @ip:    The address of the caller
491  * @str:   The constant string to write to the buffer to
492  */
493 int __trace_bputs(unsigned long ip, const char *str)
494 {
495         struct ring_buffer_event *event;
496         struct ring_buffer *buffer;
497         struct bputs_entry *entry;
498         unsigned long irq_flags;
499         int size = sizeof(struct bputs_entry);
500
501         if (unlikely(tracing_selftest_running || tracing_disabled))
502                 return 0;
503
504         local_save_flags(irq_flags);
505         buffer = global_trace.trace_buffer.buffer;
506         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
507                                           irq_flags, preempt_count());
508         if (!event)
509                 return 0;
510
511         entry = ring_buffer_event_data(event);
512         entry->ip                       = ip;
513         entry->str                      = str;
514
515         __buffer_unlock_commit(buffer, event);
516
517         return 1;
518 }
519 EXPORT_SYMBOL_GPL(__trace_bputs);
520
521 #ifdef CONFIG_TRACER_SNAPSHOT
522 /**
523  * trace_snapshot - take a snapshot of the current buffer.
524  *
525  * This causes a swap between the snapshot buffer and the current live
526  * tracing buffer. You can use this to take snapshots of the live
527  * trace when some condition is triggered, but continue to trace.
528  *
529  * Note, make sure to allocate the snapshot with either
530  * a tracing_snapshot_alloc(), or by doing it manually
531  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
532  *
533  * If the snapshot buffer is not allocated, it will stop tracing.
534  * Basically making a permanent snapshot.
535  */
536 void tracing_snapshot(void)
537 {
538         struct trace_array *tr = &global_trace;
539         struct tracer *tracer = tr->current_trace;
540         unsigned long flags;
541
542         if (in_nmi()) {
543                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
544                 internal_trace_puts("*** snapshot is being ignored        ***\n");
545                 return;
546         }
547
548         if (!tr->allocated_snapshot) {
549                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
550                 internal_trace_puts("*** stopping trace here!   ***\n");
551                 tracing_off();
552                 return;
553         }
554
555         /* Note, snapshot can not be used when the tracer uses it */
556         if (tracer->use_max_tr) {
557                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
558                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
559                 return;
560         }
561
562         local_irq_save(flags);
563         update_max_tr(tr, current, smp_processor_id());
564         local_irq_restore(flags);
565 }
566 EXPORT_SYMBOL_GPL(tracing_snapshot);
567
568 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
569                                         struct trace_buffer *size_buf, int cpu_id);
570 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
571
572 static int alloc_snapshot(struct trace_array *tr)
573 {
574         int ret;
575
576         if (!tr->allocated_snapshot) {
577
578                 /* allocate spare buffer */
579                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
580                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
581                 if (ret < 0)
582                         return ret;
583
584                 tr->allocated_snapshot = true;
585         }
586
587         return 0;
588 }
589
590 void free_snapshot(struct trace_array *tr)
591 {
592         /*
593          * We don't free the ring buffer. instead, resize it because
594          * The max_tr ring buffer has some state (e.g. ring->clock) and
595          * we want preserve it.
596          */
597         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
598         set_buffer_entries(&tr->max_buffer, 1);
599         tracing_reset_online_cpus(&tr->max_buffer);
600         tr->allocated_snapshot = false;
601 }
602
603 /**
604  * tracing_alloc_snapshot - allocate snapshot buffer.
605  *
606  * This only allocates the snapshot buffer if it isn't already
607  * allocated - it doesn't also take a snapshot.
608  *
609  * This is meant to be used in cases where the snapshot buffer needs
610  * to be set up for events that can't sleep but need to be able to
611  * trigger a snapshot.
612  */
613 int tracing_alloc_snapshot(void)
614 {
615         struct trace_array *tr = &global_trace;
616         int ret;
617
618         ret = alloc_snapshot(tr);
619         WARN_ON(ret < 0);
620
621         return ret;
622 }
623 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
624
625 /**
626  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
627  *
628  * This is similar to trace_snapshot(), but it will allocate the
629  * snapshot buffer if it isn't already allocated. Use this only
630  * where it is safe to sleep, as the allocation may sleep.
631  *
632  * This causes a swap between the snapshot buffer and the current live
633  * tracing buffer. You can use this to take snapshots of the live
634  * trace when some condition is triggered, but continue to trace.
635  */
636 void tracing_snapshot_alloc(void)
637 {
638         int ret;
639
640         ret = tracing_alloc_snapshot();
641         if (ret < 0)
642                 return;
643
644         tracing_snapshot();
645 }
646 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
647 #else
648 void tracing_snapshot(void)
649 {
650         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
651 }
652 EXPORT_SYMBOL_GPL(tracing_snapshot);
653 int tracing_alloc_snapshot(void)
654 {
655         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
656         return -ENODEV;
657 }
658 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
659 void tracing_snapshot_alloc(void)
660 {
661         /* Give warning */
662         tracing_snapshot();
663 }
664 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
665 #endif /* CONFIG_TRACER_SNAPSHOT */
666
667 static void tracer_tracing_off(struct trace_array *tr)
668 {
669         if (tr->trace_buffer.buffer)
670                 ring_buffer_record_off(tr->trace_buffer.buffer);
671         /*
672          * This flag is looked at when buffers haven't been allocated
673          * yet, or by some tracers (like irqsoff), that just want to
674          * know if the ring buffer has been disabled, but it can handle
675          * races of where it gets disabled but we still do a record.
676          * As the check is in the fast path of the tracers, it is more
677          * important to be fast than accurate.
678          */
679         tr->buffer_disabled = 1;
680         /* Make the flag seen by readers */
681         smp_wmb();
682 }
683
684 /**
685  * tracing_off - turn off tracing buffers
686  *
687  * This function stops the tracing buffers from recording data.
688  * It does not disable any overhead the tracers themselves may
689  * be causing. This function simply causes all recording to
690  * the ring buffers to fail.
691  */
692 void tracing_off(void)
693 {
694         tracer_tracing_off(&global_trace);
695 }
696 EXPORT_SYMBOL_GPL(tracing_off);
697
698 void disable_trace_on_warning(void)
699 {
700         if (__disable_trace_on_warning)
701                 tracing_off();
702 }
703
704 /**
705  * tracer_tracing_is_on - show real state of ring buffer enabled
706  * @tr : the trace array to know if ring buffer is enabled
707  *
708  * Shows real state of the ring buffer if it is enabled or not.
709  */
710 static int tracer_tracing_is_on(struct trace_array *tr)
711 {
712         if (tr->trace_buffer.buffer)
713                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
714         return !tr->buffer_disabled;
715 }
716
717 /**
718  * tracing_is_on - show state of ring buffers enabled
719  */
720 int tracing_is_on(void)
721 {
722         return tracer_tracing_is_on(&global_trace);
723 }
724 EXPORT_SYMBOL_GPL(tracing_is_on);
725
726 static int __init set_buf_size(char *str)
727 {
728         unsigned long buf_size;
729
730         if (!str)
731                 return 0;
732         buf_size = memparse(str, &str);
733         /* nr_entries can not be zero */
734         if (buf_size == 0)
735                 return 0;
736         trace_buf_size = buf_size;
737         return 1;
738 }
739 __setup("trace_buf_size=", set_buf_size);
740
741 static int __init set_tracing_thresh(char *str)
742 {
743         unsigned long threshold;
744         int ret;
745
746         if (!str)
747                 return 0;
748         ret = kstrtoul(str, 0, &threshold);
749         if (ret < 0)
750                 return 0;
751         tracing_thresh = threshold * 1000;
752         return 1;
753 }
754 __setup("tracing_thresh=", set_tracing_thresh);
755
756 unsigned long nsecs_to_usecs(unsigned long nsecs)
757 {
758         return nsecs / 1000;
759 }
760
761 /* These must match the bit postions in trace_iterator_flags */
762 static const char *trace_options[] = {
763         "print-parent",
764         "sym-offset",
765         "sym-addr",
766         "verbose",
767         "raw",
768         "hex",
769         "bin",
770         "block",
771         "stacktrace",
772         "trace_printk",
773         "ftrace_preempt",
774         "branch",
775         "annotate",
776         "userstacktrace",
777         "sym-userobj",
778         "printk-msg-only",
779         "context-info",
780         "latency-format",
781         "sleep-time",
782         "graph-time",
783         "record-cmd",
784         "overwrite",
785         "disable_on_free",
786         "irq-info",
787         "markers",
788         "function-trace",
789         NULL
790 };
791
792 static struct {
793         u64 (*func)(void);
794         const char *name;
795         int in_ns;              /* is this clock in nanoseconds? */
796 } trace_clocks[] = {
797         { trace_clock_local,    "local",        1 },
798         { trace_clock_global,   "global",       1 },
799         { trace_clock_counter,  "counter",      0 },
800         { trace_clock_jiffies,  "uptime",       1 },
801         { trace_clock,          "perf",         1 },
802         ARCH_TRACE_CLOCKS
803 };
804
805 /*
806  * trace_parser_get_init - gets the buffer for trace parser
807  */
808 int trace_parser_get_init(struct trace_parser *parser, int size)
809 {
810         memset(parser, 0, sizeof(*parser));
811
812         parser->buffer = kmalloc(size, GFP_KERNEL);
813         if (!parser->buffer)
814                 return 1;
815
816         parser->size = size;
817         return 0;
818 }
819
820 /*
821  * trace_parser_put - frees the buffer for trace parser
822  */
823 void trace_parser_put(struct trace_parser *parser)
824 {
825         kfree(parser->buffer);
826 }
827
828 /*
829  * trace_get_user - reads the user input string separated by  space
830  * (matched by isspace(ch))
831  *
832  * For each string found the 'struct trace_parser' is updated,
833  * and the function returns.
834  *
835  * Returns number of bytes read.
836  *
837  * See kernel/trace/trace.h for 'struct trace_parser' details.
838  */
839 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
840         size_t cnt, loff_t *ppos)
841 {
842         char ch;
843         size_t read = 0;
844         ssize_t ret;
845
846         if (!*ppos)
847                 trace_parser_clear(parser);
848
849         ret = get_user(ch, ubuf++);
850         if (ret)
851                 goto out;
852
853         read++;
854         cnt--;
855
856         /*
857          * The parser is not finished with the last write,
858          * continue reading the user input without skipping spaces.
859          */
860         if (!parser->cont) {
861                 /* skip white space */
862                 while (cnt && isspace(ch)) {
863                         ret = get_user(ch, ubuf++);
864                         if (ret)
865                                 goto out;
866                         read++;
867                         cnt--;
868                 }
869
870                 /* only spaces were written */
871                 if (isspace(ch)) {
872                         *ppos += read;
873                         ret = read;
874                         goto out;
875                 }
876
877                 parser->idx = 0;
878         }
879
880         /* read the non-space input */
881         while (cnt && !isspace(ch)) {
882                 if (parser->idx < parser->size - 1)
883                         parser->buffer[parser->idx++] = ch;
884                 else {
885                         ret = -EINVAL;
886                         goto out;
887                 }
888                 ret = get_user(ch, ubuf++);
889                 if (ret)
890                         goto out;
891                 read++;
892                 cnt--;
893         }
894
895         /* We either got finished input or we have to wait for another call. */
896         if (isspace(ch)) {
897                 parser->buffer[parser->idx] = 0;
898                 parser->cont = false;
899         } else if (parser->idx < parser->size - 1) {
900                 parser->cont = true;
901                 parser->buffer[parser->idx++] = ch;
902         } else {
903                 ret = -EINVAL;
904                 goto out;
905         }
906
907         *ppos += read;
908         ret = read;
909
910 out:
911         return ret;
912 }
913
914 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
915 {
916         int len;
917         int ret;
918
919         if (!cnt)
920                 return 0;
921
922         if (s->len <= s->readpos)
923                 return -EBUSY;
924
925         len = s->len - s->readpos;
926         if (cnt > len)
927                 cnt = len;
928         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
929         if (ret == cnt)
930                 return -EFAULT;
931
932         cnt -= ret;
933
934         s->readpos += cnt;
935         return cnt;
936 }
937
938 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
939 {
940         int len;
941
942         if (s->len <= s->readpos)
943                 return -EBUSY;
944
945         len = s->len - s->readpos;
946         if (cnt > len)
947                 cnt = len;
948         memcpy(buf, s->buffer + s->readpos, cnt);
949
950         s->readpos += cnt;
951         return cnt;
952 }
953
954 /*
955  * ftrace_max_lock is used to protect the swapping of buffers
956  * when taking a max snapshot. The buffers themselves are
957  * protected by per_cpu spinlocks. But the action of the swap
958  * needs its own lock.
959  *
960  * This is defined as a arch_spinlock_t in order to help
961  * with performance when lockdep debugging is enabled.
962  *
963  * It is also used in other places outside the update_max_tr
964  * so it needs to be defined outside of the
965  * CONFIG_TRACER_MAX_TRACE.
966  */
967 static arch_spinlock_t ftrace_max_lock =
968         (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
969
970 unsigned long __read_mostly     tracing_thresh;
971
972 #ifdef CONFIG_TRACER_MAX_TRACE
973 unsigned long __read_mostly     tracing_max_latency;
974
975 /*
976  * Copy the new maximum trace into the separate maximum-trace
977  * structure. (this way the maximum trace is permanently saved,
978  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
979  */
980 static void
981 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
982 {
983         struct trace_buffer *trace_buf = &tr->trace_buffer;
984         struct trace_buffer *max_buf = &tr->max_buffer;
985         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
986         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
987
988         max_buf->cpu = cpu;
989         max_buf->time_start = data->preempt_timestamp;
990
991         max_data->saved_latency = tracing_max_latency;
992         max_data->critical_start = data->critical_start;
993         max_data->critical_end = data->critical_end;
994
995         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
996         max_data->pid = tsk->pid;
997         /*
998          * If tsk == current, then use current_uid(), as that does not use
999          * RCU. The irq tracer can be called out of RCU scope.
1000          */
1001         if (tsk == current)
1002                 max_data->uid = current_uid();
1003         else
1004                 max_data->uid = task_uid(tsk);
1005
1006         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1007         max_data->policy = tsk->policy;
1008         max_data->rt_priority = tsk->rt_priority;
1009
1010         /* record this tasks comm */
1011         tracing_record_cmdline(tsk);
1012 }
1013
1014 /**
1015  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1016  * @tr: tracer
1017  * @tsk: the task with the latency
1018  * @cpu: The cpu that initiated the trace.
1019  *
1020  * Flip the buffers between the @tr and the max_tr and record information
1021  * about which task was the cause of this latency.
1022  */
1023 void
1024 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1025 {
1026         struct ring_buffer *buf;
1027
1028         if (tr->stop_count)
1029                 return;
1030
1031         WARN_ON_ONCE(!irqs_disabled());
1032
1033         if (!tr->allocated_snapshot) {
1034                 /* Only the nop tracer should hit this when disabling */
1035                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1036                 return;
1037         }
1038
1039         arch_spin_lock(&ftrace_max_lock);
1040
1041         buf = tr->trace_buffer.buffer;
1042         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1043         tr->max_buffer.buffer = buf;
1044
1045         __update_max_tr(tr, tsk, cpu);
1046         arch_spin_unlock(&ftrace_max_lock);
1047 }
1048
1049 /**
1050  * update_max_tr_single - only copy one trace over, and reset the rest
1051  * @tr - tracer
1052  * @tsk - task with the latency
1053  * @cpu - the cpu of the buffer to copy.
1054  *
1055  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1056  */
1057 void
1058 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1059 {
1060         int ret;
1061
1062         if (tr->stop_count)
1063                 return;
1064
1065         WARN_ON_ONCE(!irqs_disabled());
1066         if (!tr->allocated_snapshot) {
1067                 /* Only the nop tracer should hit this when disabling */
1068                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1069                 return;
1070         }
1071
1072         arch_spin_lock(&ftrace_max_lock);
1073
1074         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1075
1076         if (ret == -EBUSY) {
1077                 /*
1078                  * We failed to swap the buffer due to a commit taking
1079                  * place on this CPU. We fail to record, but we reset
1080                  * the max trace buffer (no one writes directly to it)
1081                  * and flag that it failed.
1082                  */
1083                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1084                         "Failed to swap buffers due to commit in progress\n");
1085         }
1086
1087         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1088
1089         __update_max_tr(tr, tsk, cpu);
1090         arch_spin_unlock(&ftrace_max_lock);
1091 }
1092 #endif /* CONFIG_TRACER_MAX_TRACE */
1093
1094 static void default_wait_pipe(struct trace_iterator *iter)
1095 {
1096         /* Iterators are static, they should be filled or empty */
1097         if (trace_buffer_iter(iter, iter->cpu_file))
1098                 return;
1099
1100         ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
1101 }
1102
1103 #ifdef CONFIG_FTRACE_STARTUP_TEST
1104 static int run_tracer_selftest(struct tracer *type)
1105 {
1106         struct trace_array *tr = &global_trace;
1107         struct tracer *saved_tracer = tr->current_trace;
1108         int ret;
1109
1110         if (!type->selftest || tracing_selftest_disabled)
1111                 return 0;
1112
1113         /*
1114          * Run a selftest on this tracer.
1115          * Here we reset the trace buffer, and set the current
1116          * tracer to be this tracer. The tracer can then run some
1117          * internal tracing to verify that everything is in order.
1118          * If we fail, we do not register this tracer.
1119          */
1120         tracing_reset_online_cpus(&tr->trace_buffer);
1121
1122         tr->current_trace = type;
1123
1124 #ifdef CONFIG_TRACER_MAX_TRACE
1125         if (type->use_max_tr) {
1126                 /* If we expanded the buffers, make sure the max is expanded too */
1127                 if (ring_buffer_expanded)
1128                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1129                                            RING_BUFFER_ALL_CPUS);
1130                 tr->allocated_snapshot = true;
1131         }
1132 #endif
1133
1134         /* the test is responsible for initializing and enabling */
1135         pr_info("Testing tracer %s: ", type->name);
1136         ret = type->selftest(type, tr);
1137         /* the test is responsible for resetting too */
1138         tr->current_trace = saved_tracer;
1139         if (ret) {
1140                 printk(KERN_CONT "FAILED!\n");
1141                 /* Add the warning after printing 'FAILED' */
1142                 WARN_ON(1);
1143                 return -1;
1144         }
1145         /* Only reset on passing, to avoid touching corrupted buffers */
1146         tracing_reset_online_cpus(&tr->trace_buffer);
1147
1148 #ifdef CONFIG_TRACER_MAX_TRACE
1149         if (type->use_max_tr) {
1150                 tr->allocated_snapshot = false;
1151
1152                 /* Shrink the max buffer again */
1153                 if (ring_buffer_expanded)
1154                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1155                                            RING_BUFFER_ALL_CPUS);
1156         }
1157 #endif
1158
1159         printk(KERN_CONT "PASSED\n");
1160         return 0;
1161 }
1162 #else
1163 static inline int run_tracer_selftest(struct tracer *type)
1164 {
1165         return 0;
1166 }
1167 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1168
1169 /**
1170  * register_tracer - register a tracer with the ftrace system.
1171  * @type - the plugin for the tracer
1172  *
1173  * Register a new plugin tracer.
1174  */
1175 int register_tracer(struct tracer *type)
1176 {
1177         struct tracer *t;
1178         int ret = 0;
1179
1180         if (!type->name) {
1181                 pr_info("Tracer must have a name\n");
1182                 return -1;
1183         }
1184
1185         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1186                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1187                 return -1;
1188         }
1189
1190         mutex_lock(&trace_types_lock);
1191
1192         tracing_selftest_running = true;
1193
1194         for (t = trace_types; t; t = t->next) {
1195                 if (strcmp(type->name, t->name) == 0) {
1196                         /* already found */
1197                         pr_info("Tracer %s already registered\n",
1198                                 type->name);
1199                         ret = -1;
1200                         goto out;
1201                 }
1202         }
1203
1204         if (!type->set_flag)
1205                 type->set_flag = &dummy_set_flag;
1206         if (!type->flags)
1207                 type->flags = &dummy_tracer_flags;
1208         else
1209                 if (!type->flags->opts)
1210                         type->flags->opts = dummy_tracer_opt;
1211         if (!type->wait_pipe)
1212                 type->wait_pipe = default_wait_pipe;
1213
1214         ret = run_tracer_selftest(type);
1215         if (ret < 0)
1216                 goto out;
1217
1218         type->next = trace_types;
1219         trace_types = type;
1220
1221  out:
1222         tracing_selftest_running = false;
1223         mutex_unlock(&trace_types_lock);
1224
1225         if (ret || !default_bootup_tracer)
1226                 goto out_unlock;
1227
1228         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1229                 goto out_unlock;
1230
1231         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1232         /* Do we want this tracer to start on bootup? */
1233         tracing_set_tracer(type->name);
1234         default_bootup_tracer = NULL;
1235         /* disable other selftests, since this will break it. */
1236         tracing_selftest_disabled = true;
1237 #ifdef CONFIG_FTRACE_STARTUP_TEST
1238         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1239                type->name);
1240 #endif
1241
1242  out_unlock:
1243         return ret;
1244 }
1245
1246 void tracing_reset(struct trace_buffer *buf, int cpu)
1247 {
1248         struct ring_buffer *buffer = buf->buffer;
1249
1250         if (!buffer)
1251                 return;
1252
1253         ring_buffer_record_disable(buffer);
1254
1255         /* Make sure all commits have finished */
1256         synchronize_sched();
1257         ring_buffer_reset_cpu(buffer, cpu);
1258
1259         ring_buffer_record_enable(buffer);
1260 }
1261
1262 void tracing_reset_online_cpus(struct trace_buffer *buf)
1263 {
1264         struct ring_buffer *buffer = buf->buffer;
1265         int cpu;
1266
1267         if (!buffer)
1268                 return;
1269
1270         ring_buffer_record_disable(buffer);
1271
1272         /* Make sure all commits have finished */
1273         synchronize_sched();
1274
1275         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1276
1277         for_each_online_cpu(cpu)
1278                 ring_buffer_reset_cpu(buffer, cpu);
1279
1280         ring_buffer_record_enable(buffer);
1281 }
1282
1283 /* Must have trace_types_lock held */
1284 void tracing_reset_all_online_cpus(void)
1285 {
1286         struct trace_array *tr;
1287
1288         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1289                 tracing_reset_online_cpus(&tr->trace_buffer);
1290 #ifdef CONFIG_TRACER_MAX_TRACE
1291                 tracing_reset_online_cpus(&tr->max_buffer);
1292 #endif
1293         }
1294 }
1295
1296 #define SAVED_CMDLINES 128
1297 #define NO_CMDLINE_MAP UINT_MAX
1298 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1299 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
1300 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
1301 static int cmdline_idx;
1302 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1303
1304 /* temporary disable recording */
1305 static atomic_t trace_record_cmdline_disabled __read_mostly;
1306
1307 static void trace_init_cmdlines(void)
1308 {
1309         memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
1310         memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
1311         cmdline_idx = 0;
1312 }
1313
1314 int is_tracing_stopped(void)
1315 {
1316         return global_trace.stop_count;
1317 }
1318
1319 /**
1320  * tracing_start - quick start of the tracer
1321  *
1322  * If tracing is enabled but was stopped by tracing_stop,
1323  * this will start the tracer back up.
1324  */
1325 void tracing_start(void)
1326 {
1327         struct ring_buffer *buffer;
1328         unsigned long flags;
1329
1330         if (tracing_disabled)
1331                 return;
1332
1333         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1334         if (--global_trace.stop_count) {
1335                 if (global_trace.stop_count < 0) {
1336                         /* Someone screwed up their debugging */
1337                         WARN_ON_ONCE(1);
1338                         global_trace.stop_count = 0;
1339                 }
1340                 goto out;
1341         }
1342
1343         /* Prevent the buffers from switching */
1344         arch_spin_lock(&ftrace_max_lock);
1345
1346         buffer = global_trace.trace_buffer.buffer;
1347         if (buffer)
1348                 ring_buffer_record_enable(buffer);
1349
1350 #ifdef CONFIG_TRACER_MAX_TRACE
1351         buffer = global_trace.max_buffer.buffer;
1352         if (buffer)
1353                 ring_buffer_record_enable(buffer);
1354 #endif
1355
1356         arch_spin_unlock(&ftrace_max_lock);
1357
1358  out:
1359         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1360 }
1361
1362 static void tracing_start_tr(struct trace_array *tr)
1363 {
1364         struct ring_buffer *buffer;
1365         unsigned long flags;
1366
1367         if (tracing_disabled)
1368                 return;
1369
1370         /* If global, we need to also start the max tracer */
1371         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1372                 return tracing_start();
1373
1374         raw_spin_lock_irqsave(&tr->start_lock, flags);
1375
1376         if (--tr->stop_count) {
1377                 if (tr->stop_count < 0) {
1378                         /* Someone screwed up their debugging */
1379                         WARN_ON_ONCE(1);
1380                         tr->stop_count = 0;
1381                 }
1382                 goto out;
1383         }
1384
1385         buffer = tr->trace_buffer.buffer;
1386         if (buffer)
1387                 ring_buffer_record_enable(buffer);
1388
1389  out:
1390         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1391 }
1392
1393 /**
1394  * tracing_stop - quick stop of the tracer
1395  *
1396  * Light weight way to stop tracing. Use in conjunction with
1397  * tracing_start.
1398  */
1399 void tracing_stop(void)
1400 {
1401         struct ring_buffer *buffer;
1402         unsigned long flags;
1403
1404         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1405         if (global_trace.stop_count++)
1406                 goto out;
1407
1408         /* Prevent the buffers from switching */
1409         arch_spin_lock(&ftrace_max_lock);
1410
1411         buffer = global_trace.trace_buffer.buffer;
1412         if (buffer)
1413                 ring_buffer_record_disable(buffer);
1414
1415 #ifdef CONFIG_TRACER_MAX_TRACE
1416         buffer = global_trace.max_buffer.buffer;
1417         if (buffer)
1418                 ring_buffer_record_disable(buffer);
1419 #endif
1420
1421         arch_spin_unlock(&ftrace_max_lock);
1422
1423  out:
1424         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1425 }
1426
1427 static void tracing_stop_tr(struct trace_array *tr)
1428 {
1429         struct ring_buffer *buffer;
1430         unsigned long flags;
1431
1432         /* If global, we need to also stop the max tracer */
1433         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1434                 return tracing_stop();
1435
1436         raw_spin_lock_irqsave(&tr->start_lock, flags);
1437         if (tr->stop_count++)
1438                 goto out;
1439
1440         buffer = tr->trace_buffer.buffer;
1441         if (buffer)
1442                 ring_buffer_record_disable(buffer);
1443
1444  out:
1445         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1446 }
1447
1448 void trace_stop_cmdline_recording(void);
1449
1450 static int trace_save_cmdline(struct task_struct *tsk)
1451 {
1452         unsigned pid, idx;
1453
1454         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1455                 return 0;
1456
1457         /*
1458          * It's not the end of the world if we don't get
1459          * the lock, but we also don't want to spin
1460          * nor do we want to disable interrupts,
1461          * so if we miss here, then better luck next time.
1462          */
1463         if (!arch_spin_trylock(&trace_cmdline_lock))
1464                 return 0;
1465
1466         idx = map_pid_to_cmdline[tsk->pid];
1467         if (idx == NO_CMDLINE_MAP) {
1468                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
1469
1470                 /*
1471                  * Check whether the cmdline buffer at idx has a pid
1472                  * mapped. We are going to overwrite that entry so we
1473                  * need to clear the map_pid_to_cmdline. Otherwise we
1474                  * would read the new comm for the old pid.
1475                  */
1476                 pid = map_cmdline_to_pid[idx];
1477                 if (pid != NO_CMDLINE_MAP)
1478                         map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1479
1480                 map_cmdline_to_pid[idx] = tsk->pid;
1481                 map_pid_to_cmdline[tsk->pid] = idx;
1482
1483                 cmdline_idx = idx;
1484         }
1485
1486         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
1487
1488         arch_spin_unlock(&trace_cmdline_lock);
1489
1490         return 1;
1491 }
1492
1493 void trace_find_cmdline(int pid, char comm[])
1494 {
1495         unsigned map;
1496
1497         if (!pid) {
1498                 strcpy(comm, "<idle>");
1499                 return;
1500         }
1501
1502         if (WARN_ON_ONCE(pid < 0)) {
1503                 strcpy(comm, "<XXX>");
1504                 return;
1505         }
1506
1507         if (pid > PID_MAX_DEFAULT) {
1508                 strcpy(comm, "<...>");
1509                 return;
1510         }
1511
1512         preempt_disable();
1513         arch_spin_lock(&trace_cmdline_lock);
1514         map = map_pid_to_cmdline[pid];
1515         if (map != NO_CMDLINE_MAP)
1516                 strcpy(comm, saved_cmdlines[map]);
1517         else
1518                 strcpy(comm, "<...>");
1519
1520         arch_spin_unlock(&trace_cmdline_lock);
1521         preempt_enable();
1522 }
1523
1524 void tracing_record_cmdline(struct task_struct *tsk)
1525 {
1526         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1527                 return;
1528
1529         if (!__this_cpu_read(trace_cmdline_save))
1530                 return;
1531
1532         if (trace_save_cmdline(tsk))
1533                 __this_cpu_write(trace_cmdline_save, false);
1534 }
1535
1536 void
1537 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1538                              int pc)
1539 {
1540         struct task_struct *tsk = current;
1541
1542         entry->preempt_count            = pc & 0xff;
1543         entry->pid                      = (tsk) ? tsk->pid : 0;
1544         entry->flags =
1545 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1546                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1547 #else
1548                 TRACE_FLAG_IRQS_NOSUPPORT |
1549 #endif
1550                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1551                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1552                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1553                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1554 }
1555 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1556
1557 struct ring_buffer_event *
1558 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1559                           int type,
1560                           unsigned long len,
1561                           unsigned long flags, int pc)
1562 {
1563         struct ring_buffer_event *event;
1564
1565         event = ring_buffer_lock_reserve(buffer, len);
1566         if (event != NULL) {
1567                 struct trace_entry *ent = ring_buffer_event_data(event);
1568
1569                 tracing_generic_entry_update(ent, flags, pc);
1570                 ent->type = type;
1571         }
1572
1573         return event;
1574 }
1575
1576 void
1577 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1578 {
1579         __this_cpu_write(trace_cmdline_save, true);
1580         ring_buffer_unlock_commit(buffer, event);
1581 }
1582
1583 static inline void
1584 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1585                              struct ring_buffer_event *event,
1586                              unsigned long flags, int pc)
1587 {
1588         __buffer_unlock_commit(buffer, event);
1589
1590         ftrace_trace_stack(buffer, flags, 6, pc);
1591         ftrace_trace_userstack(buffer, flags, pc);
1592 }
1593
1594 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1595                                 struct ring_buffer_event *event,
1596                                 unsigned long flags, int pc)
1597 {
1598         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1599 }
1600 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1601
1602 static struct ring_buffer *temp_buffer;
1603
1604 struct ring_buffer_event *
1605 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1606                           struct ftrace_event_file *ftrace_file,
1607                           int type, unsigned long len,
1608                           unsigned long flags, int pc)
1609 {
1610         struct ring_buffer_event *entry;
1611
1612         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1613         entry = trace_buffer_lock_reserve(*current_rb,
1614                                          type, len, flags, pc);
1615         /*
1616          * If tracing is off, but we have triggers enabled
1617          * we still need to look at the event data. Use the temp_buffer
1618          * to store the trace event for the tigger to use. It's recusive
1619          * safe and will not be recorded anywhere.
1620          */
1621         if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1622                 *current_rb = temp_buffer;
1623                 entry = trace_buffer_lock_reserve(*current_rb,
1624                                                   type, len, flags, pc);
1625         }
1626         return entry;
1627 }
1628 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1629
1630 struct ring_buffer_event *
1631 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1632                                   int type, unsigned long len,
1633                                   unsigned long flags, int pc)
1634 {
1635         *current_rb = global_trace.trace_buffer.buffer;
1636         return trace_buffer_lock_reserve(*current_rb,
1637                                          type, len, flags, pc);
1638 }
1639 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1640
1641 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1642                                         struct ring_buffer_event *event,
1643                                         unsigned long flags, int pc)
1644 {
1645         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1646 }
1647 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1648
1649 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1650                                      struct ring_buffer_event *event,
1651                                      unsigned long flags, int pc,
1652                                      struct pt_regs *regs)
1653 {
1654         __buffer_unlock_commit(buffer, event);
1655
1656         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1657         ftrace_trace_userstack(buffer, flags, pc);
1658 }
1659 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1660
1661 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1662                                          struct ring_buffer_event *event)
1663 {
1664         ring_buffer_discard_commit(buffer, event);
1665 }
1666 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1667
1668 void
1669 trace_function(struct trace_array *tr,
1670                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1671                int pc)
1672 {
1673         struct ftrace_event_call *call = &event_function;
1674         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1675         struct ring_buffer_event *event;
1676         struct ftrace_entry *entry;
1677
1678         /* If we are reading the ring buffer, don't trace */
1679         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1680                 return;
1681
1682         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1683                                           flags, pc);
1684         if (!event)
1685                 return;
1686         entry   = ring_buffer_event_data(event);
1687         entry->ip                       = ip;
1688         entry->parent_ip                = parent_ip;
1689
1690         if (!call_filter_check_discard(call, entry, buffer, event))
1691                 __buffer_unlock_commit(buffer, event);
1692 }
1693
1694 #ifdef CONFIG_STACKTRACE
1695
1696 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1697 struct ftrace_stack {
1698         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1699 };
1700
1701 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1702 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1703
1704 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1705                                  unsigned long flags,
1706                                  int skip, int pc, struct pt_regs *regs)
1707 {
1708         struct ftrace_event_call *call = &event_kernel_stack;
1709         struct ring_buffer_event *event;
1710         struct stack_entry *entry;
1711         struct stack_trace trace;
1712         int use_stack;
1713         int size = FTRACE_STACK_ENTRIES;
1714
1715         trace.nr_entries        = 0;
1716         trace.skip              = skip;
1717
1718         /*
1719          * Since events can happen in NMIs there's no safe way to
1720          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1721          * or NMI comes in, it will just have to use the default
1722          * FTRACE_STACK_SIZE.
1723          */
1724         preempt_disable_notrace();
1725
1726         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1727         /*
1728          * We don't need any atomic variables, just a barrier.
1729          * If an interrupt comes in, we don't care, because it would
1730          * have exited and put the counter back to what we want.
1731          * We just need a barrier to keep gcc from moving things
1732          * around.
1733          */
1734         barrier();
1735         if (use_stack == 1) {
1736                 trace.entries           = &__get_cpu_var(ftrace_stack).calls[0];
1737                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1738
1739                 if (regs)
1740                         save_stack_trace_regs(regs, &trace);
1741                 else
1742                         save_stack_trace(&trace);
1743
1744                 if (trace.nr_entries > size)
1745                         size = trace.nr_entries;
1746         } else
1747                 /* From now on, use_stack is a boolean */
1748                 use_stack = 0;
1749
1750         size *= sizeof(unsigned long);
1751
1752         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1753                                           sizeof(*entry) + size, flags, pc);
1754         if (!event)
1755                 goto out;
1756         entry = ring_buffer_event_data(event);
1757
1758         memset(&entry->caller, 0, size);
1759
1760         if (use_stack)
1761                 memcpy(&entry->caller, trace.entries,
1762                        trace.nr_entries * sizeof(unsigned long));
1763         else {
1764                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1765                 trace.entries           = entry->caller;
1766                 if (regs)
1767                         save_stack_trace_regs(regs, &trace);
1768                 else
1769                         save_stack_trace(&trace);
1770         }
1771
1772         entry->size = trace.nr_entries;
1773
1774         if (!call_filter_check_discard(call, entry, buffer, event))
1775                 __buffer_unlock_commit(buffer, event);
1776
1777  out:
1778         /* Again, don't let gcc optimize things here */
1779         barrier();
1780         __this_cpu_dec(ftrace_stack_reserve);
1781         preempt_enable_notrace();
1782
1783 }
1784
1785 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1786                              int skip, int pc, struct pt_regs *regs)
1787 {
1788         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1789                 return;
1790
1791         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1792 }
1793
1794 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1795                         int skip, int pc)
1796 {
1797         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1798                 return;
1799
1800         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1801 }
1802
1803 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1804                    int pc)
1805 {
1806         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1807 }
1808
1809 /**
1810  * trace_dump_stack - record a stack back trace in the trace buffer
1811  * @skip: Number of functions to skip (helper handlers)
1812  */
1813 void trace_dump_stack(int skip)
1814 {
1815         unsigned long flags;
1816
1817         if (tracing_disabled || tracing_selftest_running)
1818                 return;
1819
1820         local_save_flags(flags);
1821
1822         /*
1823          * Skip 3 more, seems to get us at the caller of
1824          * this function.
1825          */
1826         skip += 3;
1827         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1828                              flags, skip, preempt_count(), NULL);
1829 }
1830
1831 static DEFINE_PER_CPU(int, user_stack_count);
1832
1833 void
1834 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1835 {
1836         struct ftrace_event_call *call = &event_user_stack;
1837         struct ring_buffer_event *event;
1838         struct userstack_entry *entry;
1839         struct stack_trace trace;
1840
1841         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1842                 return;
1843
1844         /*
1845          * NMIs can not handle page faults, even with fix ups.
1846          * The save user stack can (and often does) fault.
1847          */
1848         if (unlikely(in_nmi()))
1849                 return;
1850
1851         /*
1852          * prevent recursion, since the user stack tracing may
1853          * trigger other kernel events.
1854          */
1855         preempt_disable();
1856         if (__this_cpu_read(user_stack_count))
1857                 goto out;
1858
1859         __this_cpu_inc(user_stack_count);
1860
1861         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1862                                           sizeof(*entry), flags, pc);
1863         if (!event)
1864                 goto out_drop_count;
1865         entry   = ring_buffer_event_data(event);
1866
1867         entry->tgid             = current->tgid;
1868         memset(&entry->caller, 0, sizeof(entry->caller));
1869
1870         trace.nr_entries        = 0;
1871         trace.max_entries       = FTRACE_STACK_ENTRIES;
1872         trace.skip              = 0;
1873         trace.entries           = entry->caller;
1874
1875         save_stack_trace_user(&trace);
1876         if (!call_filter_check_discard(call, entry, buffer, event))
1877                 __buffer_unlock_commit(buffer, event);
1878
1879  out_drop_count:
1880         __this_cpu_dec(user_stack_count);
1881  out:
1882         preempt_enable();
1883 }
1884
1885 #ifdef UNUSED
1886 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1887 {
1888         ftrace_trace_userstack(tr, flags, preempt_count());
1889 }
1890 #endif /* UNUSED */
1891
1892 #endif /* CONFIG_STACKTRACE */
1893
1894 /* created for use with alloc_percpu */
1895 struct trace_buffer_struct {
1896         char buffer[TRACE_BUF_SIZE];
1897 };
1898
1899 static struct trace_buffer_struct *trace_percpu_buffer;
1900 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1901 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1902 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1903
1904 /*
1905  * The buffer used is dependent on the context. There is a per cpu
1906  * buffer for normal context, softirq contex, hard irq context and
1907  * for NMI context. Thise allows for lockless recording.
1908  *
1909  * Note, if the buffers failed to be allocated, then this returns NULL
1910  */
1911 static char *get_trace_buf(void)
1912 {
1913         struct trace_buffer_struct *percpu_buffer;
1914
1915         /*
1916          * If we have allocated per cpu buffers, then we do not
1917          * need to do any locking.
1918          */
1919         if (in_nmi())
1920                 percpu_buffer = trace_percpu_nmi_buffer;
1921         else if (in_irq())
1922                 percpu_buffer = trace_percpu_irq_buffer;
1923         else if (in_softirq())
1924                 percpu_buffer = trace_percpu_sirq_buffer;
1925         else
1926                 percpu_buffer = trace_percpu_buffer;
1927
1928         if (!percpu_buffer)
1929                 return NULL;
1930
1931         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1932 }
1933
1934 static int alloc_percpu_trace_buffer(void)
1935 {
1936         struct trace_buffer_struct *buffers;
1937         struct trace_buffer_struct *sirq_buffers;
1938         struct trace_buffer_struct *irq_buffers;
1939         struct trace_buffer_struct *nmi_buffers;
1940
1941         buffers = alloc_percpu(struct trace_buffer_struct);
1942         if (!buffers)
1943                 goto err_warn;
1944
1945         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1946         if (!sirq_buffers)
1947                 goto err_sirq;
1948
1949         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1950         if (!irq_buffers)
1951                 goto err_irq;
1952
1953         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1954         if (!nmi_buffers)
1955                 goto err_nmi;
1956
1957         trace_percpu_buffer = buffers;
1958         trace_percpu_sirq_buffer = sirq_buffers;
1959         trace_percpu_irq_buffer = irq_buffers;
1960         trace_percpu_nmi_buffer = nmi_buffers;
1961
1962         return 0;
1963
1964  err_nmi:
1965         free_percpu(irq_buffers);
1966  err_irq:
1967         free_percpu(sirq_buffers);
1968  err_sirq:
1969         free_percpu(buffers);
1970  err_warn:
1971         WARN(1, "Could not allocate percpu trace_printk buffer");
1972         return -ENOMEM;
1973 }
1974
1975 static int buffers_allocated;
1976
1977 void trace_printk_init_buffers(void)
1978 {
1979         if (buffers_allocated)
1980                 return;
1981
1982         if (alloc_percpu_trace_buffer())
1983                 return;
1984
1985         pr_info("ftrace: Allocated trace_printk buffers\n");
1986
1987         /* Expand the buffers to set size */
1988         tracing_update_buffers();
1989
1990         buffers_allocated = 1;
1991
1992         /*
1993          * trace_printk_init_buffers() can be called by modules.
1994          * If that happens, then we need to start cmdline recording
1995          * directly here. If the global_trace.buffer is already
1996          * allocated here, then this was called by module code.
1997          */
1998         if (global_trace.trace_buffer.buffer)
1999                 tracing_start_cmdline_record();
2000 }
2001
2002 void trace_printk_start_comm(void)
2003 {
2004         /* Start tracing comms if trace printk is set */
2005         if (!buffers_allocated)
2006                 return;
2007         tracing_start_cmdline_record();
2008 }
2009
2010 static void trace_printk_start_stop_comm(int enabled)
2011 {
2012         if (!buffers_allocated)
2013                 return;
2014
2015         if (enabled)
2016                 tracing_start_cmdline_record();
2017         else
2018                 tracing_stop_cmdline_record();
2019 }
2020
2021 /**
2022  * trace_vbprintk - write binary msg to tracing buffer
2023  *
2024  */
2025 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2026 {
2027         struct ftrace_event_call *call = &event_bprint;
2028         struct ring_buffer_event *event;
2029         struct ring_buffer *buffer;
2030         struct trace_array *tr = &global_trace;
2031         struct bprint_entry *entry;
2032         unsigned long flags;
2033         char *tbuffer;
2034         int len = 0, size, pc;
2035
2036         if (unlikely(tracing_selftest_running || tracing_disabled))
2037                 return 0;
2038
2039         /* Don't pollute graph traces with trace_vprintk internals */
2040         pause_graph_tracing();
2041
2042         pc = preempt_count();
2043         preempt_disable_notrace();
2044
2045         tbuffer = get_trace_buf();
2046         if (!tbuffer) {
2047                 len = 0;
2048                 goto out;
2049         }
2050
2051         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2052
2053         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2054                 goto out;
2055
2056         local_save_flags(flags);
2057         size = sizeof(*entry) + sizeof(u32) * len;
2058         buffer = tr->trace_buffer.buffer;
2059         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2060                                           flags, pc);
2061         if (!event)
2062                 goto out;
2063         entry = ring_buffer_event_data(event);
2064         entry->ip                       = ip;
2065         entry->fmt                      = fmt;
2066
2067         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2068         if (!call_filter_check_discard(call, entry, buffer, event)) {
2069                 __buffer_unlock_commit(buffer, event);
2070                 ftrace_trace_stack(buffer, flags, 6, pc);
2071         }
2072
2073 out:
2074         preempt_enable_notrace();
2075         unpause_graph_tracing();
2076
2077         return len;
2078 }
2079 EXPORT_SYMBOL_GPL(trace_vbprintk);
2080
2081 static int
2082 __trace_array_vprintk(struct ring_buffer *buffer,
2083                       unsigned long ip, const char *fmt, va_list args)
2084 {
2085         struct ftrace_event_call *call = &event_print;
2086         struct ring_buffer_event *event;
2087         int len = 0, size, pc;
2088         struct print_entry *entry;
2089         unsigned long flags;
2090         char *tbuffer;
2091
2092         if (tracing_disabled || tracing_selftest_running)
2093                 return 0;
2094
2095         /* Don't pollute graph traces with trace_vprintk internals */
2096         pause_graph_tracing();
2097
2098         pc = preempt_count();
2099         preempt_disable_notrace();
2100
2101
2102         tbuffer = get_trace_buf();
2103         if (!tbuffer) {
2104                 len = 0;
2105                 goto out;
2106         }
2107
2108         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2109         if (len > TRACE_BUF_SIZE)
2110                 goto out;
2111
2112         local_save_flags(flags);
2113         size = sizeof(*entry) + len + 1;
2114         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2115                                           flags, pc);
2116         if (!event)
2117                 goto out;
2118         entry = ring_buffer_event_data(event);
2119         entry->ip = ip;
2120
2121         memcpy(&entry->buf, tbuffer, len);
2122         entry->buf[len] = '\0';
2123         if (!call_filter_check_discard(call, entry, buffer, event)) {
2124                 __buffer_unlock_commit(buffer, event);
2125                 ftrace_trace_stack(buffer, flags, 6, pc);
2126         }
2127  out:
2128         preempt_enable_notrace();
2129         unpause_graph_tracing();
2130
2131         return len;
2132 }
2133
2134 int trace_array_vprintk(struct trace_array *tr,
2135                         unsigned long ip, const char *fmt, va_list args)
2136 {
2137         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2138 }
2139
2140 int trace_array_printk(struct trace_array *tr,
2141                        unsigned long ip, const char *fmt, ...)
2142 {
2143         int ret;
2144         va_list ap;
2145
2146         if (!(trace_flags & TRACE_ITER_PRINTK))
2147                 return 0;
2148
2149         va_start(ap, fmt);
2150         ret = trace_array_vprintk(tr, ip, fmt, ap);
2151         va_end(ap);
2152         return ret;
2153 }
2154
2155 int trace_array_printk_buf(struct ring_buffer *buffer,
2156                            unsigned long ip, const char *fmt, ...)
2157 {
2158         int ret;
2159         va_list ap;
2160
2161         if (!(trace_flags & TRACE_ITER_PRINTK))
2162                 return 0;
2163
2164         va_start(ap, fmt);
2165         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2166         va_end(ap);
2167         return ret;
2168 }
2169
2170 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2171 {
2172         return trace_array_vprintk(&global_trace, ip, fmt, args);
2173 }
2174 EXPORT_SYMBOL_GPL(trace_vprintk);
2175
2176 static void trace_iterator_increment(struct trace_iterator *iter)
2177 {
2178         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2179
2180         iter->idx++;
2181         if (buf_iter)
2182                 ring_buffer_read(buf_iter, NULL);
2183 }
2184
2185 static struct trace_entry *
2186 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2187                 unsigned long *lost_events)
2188 {
2189         struct ring_buffer_event *event;
2190         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2191
2192         if (buf_iter)
2193                 event = ring_buffer_iter_peek(buf_iter, ts);
2194         else
2195                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2196                                          lost_events);
2197
2198         if (event) {
2199                 iter->ent_size = ring_buffer_event_length(event);
2200                 return ring_buffer_event_data(event);
2201         }
2202         iter->ent_size = 0;
2203         return NULL;
2204 }
2205
2206 static struct trace_entry *
2207 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2208                   unsigned long *missing_events, u64 *ent_ts)
2209 {
2210         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2211         struct trace_entry *ent, *next = NULL;
2212         unsigned long lost_events = 0, next_lost = 0;
2213         int cpu_file = iter->cpu_file;
2214         u64 next_ts = 0, ts;
2215         int next_cpu = -1;
2216         int next_size = 0;
2217         int cpu;
2218
2219         /*
2220          * If we are in a per_cpu trace file, don't bother by iterating over
2221          * all cpu and peek directly.
2222          */
2223         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2224                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2225                         return NULL;
2226                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2227                 if (ent_cpu)
2228                         *ent_cpu = cpu_file;
2229
2230                 return ent;
2231         }
2232
2233         for_each_tracing_cpu(cpu) {
2234
2235                 if (ring_buffer_empty_cpu(buffer, cpu))
2236                         continue;
2237
2238                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2239
2240                 /*
2241                  * Pick the entry with the smallest timestamp:
2242                  */
2243                 if (ent && (!next || ts < next_ts)) {
2244                         next = ent;
2245                         next_cpu = cpu;
2246                         next_ts = ts;
2247                         next_lost = lost_events;
2248                         next_size = iter->ent_size;
2249                 }
2250         }
2251
2252         iter->ent_size = next_size;
2253
2254         if (ent_cpu)
2255                 *ent_cpu = next_cpu;
2256
2257         if (ent_ts)
2258                 *ent_ts = next_ts;
2259
2260         if (missing_events)
2261                 *missing_events = next_lost;
2262
2263         return next;
2264 }
2265
2266 /* Find the next real entry, without updating the iterator itself */
2267 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2268                                           int *ent_cpu, u64 *ent_ts)
2269 {
2270         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2271 }
2272
2273 /* Find the next real entry, and increment the iterator to the next entry */
2274 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2275 {
2276         iter->ent = __find_next_entry(iter, &iter->cpu,
2277                                       &iter->lost_events, &iter->ts);
2278
2279         if (iter->ent)
2280                 trace_iterator_increment(iter);
2281
2282         return iter->ent ? iter : NULL;
2283 }
2284
2285 static void trace_consume(struct trace_iterator *iter)
2286 {
2287         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2288                             &iter->lost_events);
2289 }
2290
2291 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2292 {
2293         struct trace_iterator *iter = m->private;
2294         int i = (int)*pos;
2295         void *ent;
2296
2297         WARN_ON_ONCE(iter->leftover);
2298
2299         (*pos)++;
2300
2301         /* can't go backwards */
2302         if (iter->idx > i)
2303                 return NULL;
2304
2305         if (iter->idx < 0)
2306                 ent = trace_find_next_entry_inc(iter);
2307         else
2308                 ent = iter;
2309
2310         while (ent && iter->idx < i)
2311                 ent = trace_find_next_entry_inc(iter);
2312
2313         iter->pos = *pos;
2314
2315         return ent;
2316 }
2317
2318 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2319 {
2320         struct ring_buffer_event *event;
2321         struct ring_buffer_iter *buf_iter;
2322         unsigned long entries = 0;
2323         u64 ts;
2324
2325         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2326
2327         buf_iter = trace_buffer_iter(iter, cpu);
2328         if (!buf_iter)
2329                 return;
2330
2331         ring_buffer_iter_reset(buf_iter);
2332
2333         /*
2334          * We could have the case with the max latency tracers
2335          * that a reset never took place on a cpu. This is evident
2336          * by the timestamp being before the start of the buffer.
2337          */
2338         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2339                 if (ts >= iter->trace_buffer->time_start)
2340                         break;
2341                 entries++;
2342                 ring_buffer_read(buf_iter, NULL);
2343         }
2344
2345         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2346 }
2347
2348 /*
2349  * The current tracer is copied to avoid a global locking
2350  * all around.
2351  */
2352 static void *s_start(struct seq_file *m, loff_t *pos)
2353 {
2354         struct trace_iterator *iter = m->private;
2355         struct trace_array *tr = iter->tr;
2356         int cpu_file = iter->cpu_file;
2357         void *p = NULL;
2358         loff_t l = 0;
2359         int cpu;
2360
2361         /*
2362          * copy the tracer to avoid using a global lock all around.
2363          * iter->trace is a copy of current_trace, the pointer to the
2364          * name may be used instead of a strcmp(), as iter->trace->name
2365          * will point to the same string as current_trace->name.
2366          */
2367         mutex_lock(&trace_types_lock);
2368         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2369                 *iter->trace = *tr->current_trace;
2370         mutex_unlock(&trace_types_lock);
2371
2372 #ifdef CONFIG_TRACER_MAX_TRACE
2373         if (iter->snapshot && iter->trace->use_max_tr)
2374                 return ERR_PTR(-EBUSY);
2375 #endif
2376
2377         if (!iter->snapshot)
2378                 atomic_inc(&trace_record_cmdline_disabled);
2379
2380         if (*pos != iter->pos) {
2381                 iter->ent = NULL;
2382                 iter->cpu = 0;
2383                 iter->idx = -1;
2384
2385                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2386                         for_each_tracing_cpu(cpu)
2387                                 tracing_iter_reset(iter, cpu);
2388                 } else
2389                         tracing_iter_reset(iter, cpu_file);
2390
2391                 iter->leftover = 0;
2392                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2393                         ;
2394
2395         } else {
2396                 /*
2397                  * If we overflowed the seq_file before, then we want
2398                  * to just reuse the trace_seq buffer again.
2399                  */
2400                 if (iter->leftover)
2401                         p = iter;
2402                 else {
2403                         l = *pos - 1;
2404                         p = s_next(m, p, &l);
2405                 }
2406         }
2407
2408         trace_event_read_lock();
2409         trace_access_lock(cpu_file);
2410         return p;
2411 }
2412
2413 static void s_stop(struct seq_file *m, void *p)
2414 {
2415         struct trace_iterator *iter = m->private;
2416
2417 #ifdef CONFIG_TRACER_MAX_TRACE
2418         if (iter->snapshot && iter->trace->use_max_tr)
2419                 return;
2420 #endif
2421
2422         if (!iter->snapshot)
2423                 atomic_dec(&trace_record_cmdline_disabled);
2424
2425         trace_access_unlock(iter->cpu_file);
2426         trace_event_read_unlock();
2427 }
2428
2429 static void
2430 get_total_entries(struct trace_buffer *buf,
2431                   unsigned long *total, unsigned long *entries)
2432 {
2433         unsigned long count;
2434         int cpu;
2435
2436         *total = 0;
2437         *entries = 0;
2438
2439         for_each_tracing_cpu(cpu) {
2440                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2441                 /*
2442                  * If this buffer has skipped entries, then we hold all
2443                  * entries for the trace and we need to ignore the
2444                  * ones before the time stamp.
2445                  */
2446                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2447                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2448                         /* total is the same as the entries */
2449                         *total += count;
2450                 } else
2451                         *total += count +
2452                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2453                 *entries += count;
2454         }
2455 }
2456
2457 static void print_lat_help_header(struct seq_file *m)
2458 {
2459         seq_puts(m, "#                  _------=> CPU#            \n");
2460         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2461         seq_puts(m, "#                | / _----=> need-resched    \n");
2462         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2463         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2464         seq_puts(m, "#                |||| /     delay             \n");
2465         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2466         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2467 }
2468
2469 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2470 {
2471         unsigned long total;
2472         unsigned long entries;
2473
2474         get_total_entries(buf, &total, &entries);
2475         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2476                    entries, total, num_online_cpus());
2477         seq_puts(m, "#\n");
2478 }
2479
2480 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2481 {
2482         print_event_info(buf, m);
2483         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2484         seq_puts(m, "#              | |       |          |         |\n");
2485 }
2486
2487 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2488 {
2489         print_event_info(buf, m);
2490         seq_puts(m, "#                              _-----=> irqs-off\n");
2491         seq_puts(m, "#                             / _----=> need-resched\n");
2492         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2493         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2494         seq_puts(m, "#                            ||| /     delay\n");
2495         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2496         seq_puts(m, "#              | |       |   ||||       |         |\n");
2497 }
2498
2499 void
2500 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2501 {
2502         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2503         struct trace_buffer *buf = iter->trace_buffer;
2504         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2505         struct tracer *type = iter->trace;
2506         unsigned long entries;
2507         unsigned long total;
2508         const char *name = "preemption";
2509
2510         name = type->name;
2511
2512         get_total_entries(buf, &total, &entries);
2513
2514         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2515                    name, UTS_RELEASE);
2516         seq_puts(m, "# -----------------------------------"
2517                  "---------------------------------\n");
2518         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2519                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2520                    nsecs_to_usecs(data->saved_latency),
2521                    entries,
2522                    total,
2523                    buf->cpu,
2524 #if defined(CONFIG_PREEMPT_NONE)
2525                    "server",
2526 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2527                    "desktop",
2528 #elif defined(CONFIG_PREEMPT)
2529                    "preempt",
2530 #else
2531                    "unknown",
2532 #endif
2533                    /* These are reserved for later use */
2534                    0, 0, 0, 0);
2535 #ifdef CONFIG_SMP
2536         seq_printf(m, " #P:%d)\n", num_online_cpus());
2537 #else
2538         seq_puts(m, ")\n");
2539 #endif
2540         seq_puts(m, "#    -----------------\n");
2541         seq_printf(m, "#    | task: %.16s-%d "
2542                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2543                    data->comm, data->pid,
2544                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2545                    data->policy, data->rt_priority);
2546         seq_puts(m, "#    -----------------\n");
2547
2548         if (data->critical_start) {
2549                 seq_puts(m, "#  => started at: ");
2550                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2551                 trace_print_seq(m, &iter->seq);
2552                 seq_puts(m, "\n#  => ended at:   ");
2553                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2554                 trace_print_seq(m, &iter->seq);
2555                 seq_puts(m, "\n#\n");
2556         }
2557
2558         seq_puts(m, "#\n");
2559 }
2560
2561 static void test_cpu_buff_start(struct trace_iterator *iter)
2562 {
2563         struct trace_seq *s = &iter->seq;
2564
2565         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2566                 return;
2567
2568         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2569                 return;
2570
2571         if (cpumask_test_cpu(iter->cpu, iter->started))
2572                 return;
2573
2574         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2575                 return;
2576
2577         cpumask_set_cpu(iter->cpu, iter->started);
2578
2579         /* Don't print started cpu buffer for the first entry of the trace */
2580         if (iter->idx > 1)
2581                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2582                                 iter->cpu);
2583 }
2584
2585 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2586 {
2587         struct trace_seq *s = &iter->seq;
2588         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2589         struct trace_entry *entry;
2590         struct trace_event *event;
2591
2592         entry = iter->ent;
2593
2594         test_cpu_buff_start(iter);
2595
2596         event = ftrace_find_event(entry->type);
2597
2598         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2599                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2600                         if (!trace_print_lat_context(iter))
2601                                 goto partial;
2602                 } else {
2603                         if (!trace_print_context(iter))
2604                                 goto partial;
2605                 }
2606         }
2607
2608         if (event)
2609                 return event->funcs->trace(iter, sym_flags, event);
2610
2611         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2612                 goto partial;
2613
2614         return TRACE_TYPE_HANDLED;
2615 partial:
2616         return TRACE_TYPE_PARTIAL_LINE;
2617 }
2618
2619 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2620 {
2621         struct trace_seq *s = &iter->seq;
2622         struct trace_entry *entry;
2623         struct trace_event *event;
2624
2625         entry = iter->ent;
2626
2627         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2628                 if (!trace_seq_printf(s, "%d %d %llu ",
2629                                       entry->pid, iter->cpu, iter->ts))
2630                         goto partial;
2631         }
2632
2633         event = ftrace_find_event(entry->type);
2634         if (event)
2635                 return event->funcs->raw(iter, 0, event);
2636
2637         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2638                 goto partial;
2639
2640         return TRACE_TYPE_HANDLED;
2641 partial:
2642         return TRACE_TYPE_PARTIAL_LINE;
2643 }
2644
2645 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2646 {
2647         struct trace_seq *s = &iter->seq;
2648         unsigned char newline = '\n';
2649         struct trace_entry *entry;
2650         struct trace_event *event;
2651
2652         entry = iter->ent;
2653
2654         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2655                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2656                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2657                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2658         }
2659
2660         event = ftrace_find_event(entry->type);
2661         if (event) {
2662                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2663                 if (ret != TRACE_TYPE_HANDLED)
2664                         return ret;
2665         }
2666
2667         SEQ_PUT_FIELD_RET(s, newline);
2668
2669         return TRACE_TYPE_HANDLED;
2670 }
2671
2672 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2673 {
2674         struct trace_seq *s = &iter->seq;
2675         struct trace_entry *entry;
2676         struct trace_event *event;
2677
2678         entry = iter->ent;
2679
2680         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2681                 SEQ_PUT_FIELD_RET(s, entry->pid);
2682                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2683                 SEQ_PUT_FIELD_RET(s, iter->ts);
2684         }
2685
2686         event = ftrace_find_event(entry->type);
2687         return event ? event->funcs->binary(iter, 0, event) :
2688                 TRACE_TYPE_HANDLED;
2689 }
2690
2691 int trace_empty(struct trace_iterator *iter)
2692 {
2693         struct ring_buffer_iter *buf_iter;
2694         int cpu;
2695
2696         /* If we are looking at one CPU buffer, only check that one */
2697         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2698                 cpu = iter->cpu_file;
2699                 buf_iter = trace_buffer_iter(iter, cpu);
2700                 if (buf_iter) {
2701                         if (!ring_buffer_iter_empty(buf_iter))
2702                                 return 0;
2703                 } else {
2704                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2705                                 return 0;
2706                 }
2707                 return 1;
2708         }
2709
2710         for_each_tracing_cpu(cpu) {
2711                 buf_iter = trace_buffer_iter(iter, cpu);
2712                 if (buf_iter) {
2713                         if (!ring_buffer_iter_empty(buf_iter))
2714                                 return 0;
2715                 } else {
2716                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2717                                 return 0;
2718                 }
2719         }
2720
2721         return 1;
2722 }
2723
2724 /*  Called with trace_event_read_lock() held. */
2725 enum print_line_t print_trace_line(struct trace_iterator *iter)
2726 {
2727         enum print_line_t ret;
2728
2729         if (iter->lost_events &&
2730             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2731                                  iter->cpu, iter->lost_events))
2732                 return TRACE_TYPE_PARTIAL_LINE;
2733
2734         if (iter->trace && iter->trace->print_line) {
2735                 ret = iter->trace->print_line(iter);
2736                 if (ret != TRACE_TYPE_UNHANDLED)
2737                         return ret;
2738         }
2739
2740         if (iter->ent->type == TRACE_BPUTS &&
2741                         trace_flags & TRACE_ITER_PRINTK &&
2742                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2743                 return trace_print_bputs_msg_only(iter);
2744
2745         if (iter->ent->type == TRACE_BPRINT &&
2746                         trace_flags & TRACE_ITER_PRINTK &&
2747                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2748                 return trace_print_bprintk_msg_only(iter);
2749
2750         if (iter->ent->type == TRACE_PRINT &&
2751                         trace_flags & TRACE_ITER_PRINTK &&
2752                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2753                 return trace_print_printk_msg_only(iter);
2754
2755         if (trace_flags & TRACE_ITER_BIN)
2756                 return print_bin_fmt(iter);
2757
2758         if (trace_flags & TRACE_ITER_HEX)
2759                 return print_hex_fmt(iter);
2760
2761         if (trace_flags & TRACE_ITER_RAW)
2762                 return print_raw_fmt(iter);
2763
2764         return print_trace_fmt(iter);
2765 }
2766
2767 void trace_latency_header(struct seq_file *m)
2768 {
2769         struct trace_iterator *iter = m->private;
2770
2771         /* print nothing if the buffers are empty */
2772         if (trace_empty(iter))
2773                 return;
2774
2775         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2776                 print_trace_header(m, iter);
2777
2778         if (!(trace_flags & TRACE_ITER_VERBOSE))
2779                 print_lat_help_header(m);
2780 }
2781
2782 void trace_default_header(struct seq_file *m)
2783 {
2784         struct trace_iterator *iter = m->private;
2785
2786         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2787                 return;
2788
2789         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2790                 /* print nothing if the buffers are empty */
2791                 if (trace_empty(iter))
2792                         return;
2793                 print_trace_header(m, iter);
2794                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2795                         print_lat_help_header(m);
2796         } else {
2797                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2798                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2799                                 print_func_help_header_irq(iter->trace_buffer, m);
2800                         else
2801                                 print_func_help_header(iter->trace_buffer, m);
2802                 }
2803         }
2804 }
2805
2806 static void test_ftrace_alive(struct seq_file *m)
2807 {
2808         if (!ftrace_is_dead())
2809                 return;
2810         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2811         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2812 }
2813
2814 #ifdef CONFIG_TRACER_MAX_TRACE
2815 static void show_snapshot_main_help(struct seq_file *m)
2816 {
2817         seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2818         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2819         seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2820         seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
2821         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2822         seq_printf(m, "#                       is not a '0' or '1')\n");
2823 }
2824
2825 static void show_snapshot_percpu_help(struct seq_file *m)
2826 {
2827         seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2828 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2829         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2830         seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2831 #else
2832         seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2833         seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2834 #endif
2835         seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2836         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2837         seq_printf(m, "#                       is not a '0' or '1')\n");
2838 }
2839
2840 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2841 {
2842         if (iter->tr->allocated_snapshot)
2843                 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2844         else
2845                 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2846
2847         seq_printf(m, "# Snapshot commands:\n");
2848         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2849                 show_snapshot_main_help(m);
2850         else
2851                 show_snapshot_percpu_help(m);
2852 }
2853 #else
2854 /* Should never be called */
2855 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2856 #endif
2857
2858 static int s_show(struct seq_file *m, void *v)
2859 {
2860         struct trace_iterator *iter = v;
2861         int ret;
2862
2863         if (iter->ent == NULL) {
2864                 if (iter->tr) {
2865                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2866                         seq_puts(m, "#\n");
2867                         test_ftrace_alive(m);
2868                 }
2869                 if (iter->snapshot && trace_empty(iter))
2870                         print_snapshot_help(m, iter);
2871                 else if (iter->trace && iter->trace->print_header)
2872                         iter->trace->print_header(m);
2873                 else
2874                         trace_default_header(m);
2875
2876         } else if (iter->leftover) {
2877                 /*
2878                  * If we filled the seq_file buffer earlier, we
2879                  * want to just show it now.
2880                  */
2881                 ret = trace_print_seq(m, &iter->seq);
2882
2883                 /* ret should this time be zero, but you never know */
2884                 iter->leftover = ret;
2885
2886         } else {
2887                 print_trace_line(iter);
2888                 ret = trace_print_seq(m, &iter->seq);
2889                 /*
2890                  * If we overflow the seq_file buffer, then it will
2891                  * ask us for this data again at start up.
2892                  * Use that instead.
2893                  *  ret is 0 if seq_file write succeeded.
2894                  *        -1 otherwise.
2895                  */
2896                 iter->leftover = ret;
2897         }
2898
2899         return 0;
2900 }
2901
2902 /*
2903  * Should be used after trace_array_get(), trace_types_lock
2904  * ensures that i_cdev was already initialized.
2905  */
2906 static inline int tracing_get_cpu(struct inode *inode)
2907 {
2908         if (inode->i_cdev) /* See trace_create_cpu_file() */
2909                 return (long)inode->i_cdev - 1;
2910         return RING_BUFFER_ALL_CPUS;
2911 }
2912
2913 static const struct seq_operations tracer_seq_ops = {
2914         .start          = s_start,
2915         .next           = s_next,
2916         .stop           = s_stop,
2917         .show           = s_show,
2918 };
2919
2920 static struct trace_iterator *
2921 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2922 {
2923         struct trace_array *tr = inode->i_private;
2924         struct trace_iterator *iter;
2925         int cpu;
2926
2927         if (tracing_disabled)
2928                 return ERR_PTR(-ENODEV);
2929
2930         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2931         if (!iter)
2932                 return ERR_PTR(-ENOMEM);
2933
2934         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2935                                     GFP_KERNEL);
2936         if (!iter->buffer_iter)
2937                 goto release;
2938
2939         /*
2940          * We make a copy of the current tracer to avoid concurrent
2941          * changes on it while we are reading.
2942          */
2943         mutex_lock(&trace_types_lock);
2944         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2945         if (!iter->trace)
2946                 goto fail;
2947
2948         *iter->trace = *tr->current_trace;
2949
2950         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2951                 goto fail;
2952
2953         iter->tr = tr;
2954
2955 #ifdef CONFIG_TRACER_MAX_TRACE
2956         /* Currently only the top directory has a snapshot */
2957         if (tr->current_trace->print_max || snapshot)
2958                 iter->trace_buffer = &tr->max_buffer;
2959         else
2960 #endif
2961                 iter->trace_buffer = &tr->trace_buffer;
2962         iter->snapshot = snapshot;
2963         iter->pos = -1;
2964         iter->cpu_file = tracing_get_cpu(inode);
2965         mutex_init(&iter->mutex);
2966
2967         /* Notify the tracer early; before we stop tracing. */
2968         if (iter->trace && iter->trace->open)
2969                 iter->trace->open(iter);
2970
2971         /* Annotate start of buffers if we had overruns */
2972         if (ring_buffer_overruns(iter->trace_buffer->buffer))
2973                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2974
2975         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
2976         if (trace_clocks[tr->clock_id].in_ns)
2977                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2978
2979         /* stop the trace while dumping if we are not opening "snapshot" */
2980         if (!iter->snapshot)
2981                 tracing_stop_tr(tr);
2982
2983         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
2984                 for_each_tracing_cpu(cpu) {
2985                         iter->buffer_iter[cpu] =
2986                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2987                 }
2988                 ring_buffer_read_prepare_sync();
2989                 for_each_tracing_cpu(cpu) {
2990                         ring_buffer_read_start(iter->buffer_iter[cpu]);
2991                         tracing_iter_reset(iter, cpu);
2992                 }
2993         } else {
2994                 cpu = iter->cpu_file;
2995                 iter->buffer_iter[cpu] =
2996                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2997                 ring_buffer_read_prepare_sync();
2998                 ring_buffer_read_start(iter->buffer_iter[cpu]);
2999                 tracing_iter_reset(iter, cpu);
3000         }
3001
3002         mutex_unlock(&trace_types_lock);
3003
3004         return iter;
3005
3006  fail:
3007         mutex_unlock(&trace_types_lock);
3008         kfree(iter->trace);
3009         kfree(iter->buffer_iter);
3010 release:
3011         seq_release_private(inode, file);
3012         return ERR_PTR(-ENOMEM);
3013 }
3014
3015 int tracing_open_generic(struct inode *inode, struct file *filp)
3016 {
3017         if (tracing_disabled)
3018                 return -ENODEV;
3019
3020         filp->private_data = inode->i_private;
3021         return 0;
3022 }
3023
3024 bool tracing_is_disabled(void)
3025 {
3026         return (tracing_disabled) ? true: false;
3027 }
3028
3029 /*
3030  * Open and update trace_array ref count.
3031  * Must have the current trace_array passed to it.
3032  */
3033 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3034 {
3035         struct trace_array *tr = inode->i_private;
3036
3037         if (tracing_disabled)
3038                 return -ENODEV;
3039
3040         if (trace_array_get(tr) < 0)
3041                 return -ENODEV;
3042
3043         filp->private_data = inode->i_private;
3044
3045         return 0;
3046 }
3047
3048 static int tracing_release(struct inode *inode, struct file *file)
3049 {
3050         struct trace_array *tr = inode->i_private;
3051         struct seq_file *m = file->private_data;
3052         struct trace_iterator *iter;
3053         int cpu;
3054
3055         if (!(file->f_mode & FMODE_READ)) {
3056                 trace_array_put(tr);
3057                 return 0;
3058         }
3059
3060         /* Writes do not use seq_file */
3061         iter = m->private;
3062         mutex_lock(&trace_types_lock);
3063
3064         for_each_tracing_cpu(cpu) {
3065                 if (iter->buffer_iter[cpu])
3066                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3067         }
3068
3069         if (iter->trace && iter->trace->close)
3070                 iter->trace->close(iter);
3071
3072         if (!iter->snapshot)
3073                 /* reenable tracing if it was previously enabled */
3074                 tracing_start_tr(tr);
3075
3076         __trace_array_put(tr);
3077
3078         mutex_unlock(&trace_types_lock);
3079
3080         mutex_destroy(&iter->mutex);
3081         free_cpumask_var(iter->started);
3082         kfree(iter->trace);
3083         kfree(iter->buffer_iter);
3084         seq_release_private(inode, file);
3085
3086         return 0;
3087 }
3088
3089 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3090 {
3091         struct trace_array *tr = inode->i_private;
3092
3093         trace_array_put(tr);
3094         return 0;
3095 }
3096
3097 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3098 {
3099         struct trace_array *tr = inode->i_private;
3100
3101         trace_array_put(tr);
3102
3103         return single_release(inode, file);
3104 }
3105
3106 static int tracing_open(struct inode *inode, struct file *file)
3107 {
3108         struct trace_array *tr = inode->i_private;
3109         struct trace_iterator *iter;
3110         int ret = 0;
3111
3112         if (trace_array_get(tr) < 0)
3113                 return -ENODEV;
3114
3115         /* If this file was open for write, then erase contents */
3116         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3117                 int cpu = tracing_get_cpu(inode);
3118
3119                 if (cpu == RING_BUFFER_ALL_CPUS)
3120                         tracing_reset_online_cpus(&tr->trace_buffer);
3121                 else
3122                         tracing_reset(&tr->trace_buffer, cpu);
3123         }
3124
3125         if (file->f_mode & FMODE_READ) {
3126                 iter = __tracing_open(inode, file, false);
3127                 if (IS_ERR(iter))
3128                         ret = PTR_ERR(iter);
3129                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3130                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3131         }
3132
3133         if (ret < 0)
3134                 trace_array_put(tr);
3135
3136         return ret;
3137 }
3138
3139 static void *
3140 t_next(struct seq_file *m, void *v, loff_t *pos)
3141 {
3142         struct tracer *t = v;
3143
3144         (*pos)++;
3145
3146         if (t)
3147                 t = t->next;
3148
3149         return t;
3150 }
3151
3152 static void *t_start(struct seq_file *m, loff_t *pos)
3153 {
3154         struct tracer *t;
3155         loff_t l = 0;
3156
3157         mutex_lock(&trace_types_lock);
3158         for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
3159                 ;
3160
3161         return t;
3162 }
3163
3164 static void t_stop(struct seq_file *m, void *p)
3165 {
3166         mutex_unlock(&trace_types_lock);
3167 }
3168
3169 static int t_show(struct seq_file *m, void *v)
3170 {
3171         struct tracer *t = v;
3172
3173         if (!t)
3174                 return 0;
3175
3176         seq_printf(m, "%s", t->name);
3177         if (t->next)
3178                 seq_putc(m, ' ');
3179         else
3180                 seq_putc(m, '\n');
3181
3182         return 0;
3183 }
3184
3185 static const struct seq_operations show_traces_seq_ops = {
3186         .start          = t_start,
3187         .next           = t_next,
3188         .stop           = t_stop,
3189         .show           = t_show,
3190 };
3191
3192 static int show_traces_open(struct inode *inode, struct file *file)
3193 {
3194         if (tracing_disabled)
3195                 return -ENODEV;
3196
3197         return seq_open(file, &show_traces_seq_ops);
3198 }
3199
3200 static ssize_t
3201 tracing_write_stub(struct file *filp, const char __user *ubuf,
3202                    size_t count, loff_t *ppos)
3203 {
3204         return count;
3205 }
3206
3207 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3208 {
3209         int ret;
3210
3211         if (file->f_mode & FMODE_READ)
3212                 ret = seq_lseek(file, offset, whence);
3213         else
3214                 file->f_pos = ret = 0;
3215
3216         return ret;
3217 }
3218
3219 static const struct file_operations tracing_fops = {
3220         .open           = tracing_open,
3221         .read           = seq_read,
3222         .write          = tracing_write_stub,
3223         .llseek         = tracing_lseek,
3224         .release        = tracing_release,
3225 };
3226
3227 static const struct file_operations show_traces_fops = {
3228         .open           = show_traces_open,
3229         .read           = seq_read,
3230         .release        = seq_release,
3231         .llseek         = seq_lseek,
3232 };
3233
3234 /*
3235  * The tracer itself will not take this lock, but still we want
3236  * to provide a consistent cpumask to user-space:
3237  */
3238 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3239
3240 /*
3241  * Temporary storage for the character representation of the
3242  * CPU bitmask (and one more byte for the newline):
3243  */
3244 static char mask_str[NR_CPUS + 1];
3245
3246 static ssize_t
3247 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3248                      size_t count, loff_t *ppos)
3249 {
3250         struct trace_array *tr = file_inode(filp)->i_private;
3251         int len;
3252
3253         mutex_lock(&tracing_cpumask_update_lock);
3254
3255         len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3256         if (count - len < 2) {
3257                 count = -EINVAL;
3258                 goto out_err;
3259         }
3260         len += sprintf(mask_str + len, "\n");
3261         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3262
3263 out_err:
3264         mutex_unlock(&tracing_cpumask_update_lock);
3265
3266         return count;
3267 }
3268
3269 static ssize_t
3270 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3271                       size_t count, loff_t *ppos)
3272 {
3273         struct trace_array *tr = file_inode(filp)->i_private;
3274         cpumask_var_t tracing_cpumask_new;
3275         int err, cpu;
3276
3277         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3278                 return -ENOMEM;
3279
3280         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3281         if (err)
3282                 goto err_unlock;
3283
3284         mutex_lock(&tracing_cpumask_update_lock);
3285
3286         local_irq_disable();
3287         arch_spin_lock(&ftrace_max_lock);
3288         for_each_tracing_cpu(cpu) {
3289                 /*
3290                  * Increase/decrease the disabled counter if we are
3291                  * about to flip a bit in the cpumask:
3292                  */
3293                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3294                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3295                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3296                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3297                 }
3298                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3299                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3300                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3301                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3302                 }
3303         }
3304         arch_spin_unlock(&ftrace_max_lock);
3305         local_irq_enable();
3306
3307         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3308
3309         mutex_unlock(&tracing_cpumask_update_lock);
3310         free_cpumask_var(tracing_cpumask_new);
3311
3312         return count;
3313
3314 err_unlock:
3315         free_cpumask_var(tracing_cpumask_new);
3316
3317         return err;
3318 }
3319
3320 static const struct file_operations tracing_cpumask_fops = {
3321         .open           = tracing_open_generic_tr,
3322         .read           = tracing_cpumask_read,
3323         .write          = tracing_cpumask_write,
3324         .release        = tracing_release_generic_tr,
3325         .llseek         = generic_file_llseek,
3326 };
3327
3328 static int tracing_trace_options_show(struct seq_file *m, void *v)
3329 {
3330         struct tracer_opt *trace_opts;
3331         struct trace_array *tr = m->private;
3332         u32 tracer_flags;
3333         int i;
3334
3335         mutex_lock(&trace_types_lock);
3336         tracer_flags = tr->current_trace->flags->val;
3337         trace_opts = tr->current_trace->flags->opts;
3338
3339         for (i = 0; trace_options[i]; i++) {
3340                 if (trace_flags & (1 << i))
3341                         seq_printf(m, "%s\n", trace_options[i]);
3342                 else
3343                         seq_printf(m, "no%s\n", trace_options[i]);
3344         }
3345
3346         for (i = 0; trace_opts[i].name; i++) {
3347                 if (tracer_flags & trace_opts[i].bit)
3348                         seq_printf(m, "%s\n", trace_opts[i].name);
3349                 else
3350                         seq_printf(m, "no%s\n", trace_opts[i].name);
3351         }
3352         mutex_unlock(&trace_types_lock);
3353
3354         return 0;
3355 }
3356
3357 static int __set_tracer_option(struct tracer *trace,
3358                                struct tracer_flags *tracer_flags,
3359                                struct tracer_opt *opts, int neg)
3360 {
3361         int ret;
3362
3363         ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
3364         if (ret)
3365                 return ret;
3366
3367         if (neg)
3368                 tracer_flags->val &= ~opts->bit;
3369         else
3370                 tracer_flags->val |= opts->bit;
3371         return 0;
3372 }
3373
3374 /* Try to assign a tracer specific option */
3375 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
3376 {
3377         struct tracer_flags *tracer_flags = trace->flags;
3378         struct tracer_opt *opts = NULL;
3379         int i;
3380
3381         for (i = 0; tracer_flags->opts[i].name; i++) {
3382                 opts = &tracer_flags->opts[i];
3383
3384                 if (strcmp(cmp, opts->name) == 0)
3385                         return __set_tracer_option(trace, trace->flags,
3386                                                    opts, neg);
3387         }
3388
3389         return -EINVAL;
3390 }
3391
3392 /* Some tracers require overwrite to stay enabled */
3393 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3394 {
3395         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3396                 return -1;
3397
3398         return 0;
3399 }
3400
3401 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3402 {
3403         /* do nothing if flag is already set */
3404         if (!!(trace_flags & mask) == !!enabled)
3405                 return 0;
3406
3407         /* Give the tracer a chance to approve the change */
3408         if (tr->current_trace->flag_changed)
3409                 if (tr->current_trace->flag_changed(tr->current_trace, mask, !!enabled))
3410                         return -EINVAL;
3411
3412         if (enabled)
3413                 trace_flags |= mask;
3414         else
3415                 trace_flags &= ~mask;
3416
3417         if (mask == TRACE_ITER_RECORD_CMD)
3418                 trace_event_enable_cmd_record(enabled);
3419
3420         if (mask == TRACE_ITER_OVERWRITE) {
3421                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3422 #ifdef CONFIG_TRACER_MAX_TRACE
3423                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3424 #endif
3425         }
3426
3427         if (mask == TRACE_ITER_PRINTK)
3428                 trace_printk_start_stop_comm(enabled);
3429
3430         return 0;
3431 }
3432
3433 static int trace_set_options(struct trace_array *tr, char *option)
3434 {
3435         char *cmp;
3436         int neg = 0;
3437         int ret = -ENODEV;
3438         int i;
3439
3440         cmp = strstrip(option);
3441
3442         if (strncmp(cmp, "no", 2) == 0) {
3443                 neg = 1;
3444                 cmp += 2;
3445         }
3446
3447         mutex_lock(&trace_types_lock);
3448
3449         for (i = 0; trace_options[i]; i++) {
3450                 if (strcmp(cmp, trace_options[i]) == 0) {
3451                         ret = set_tracer_flag(tr, 1 << i, !neg);
3452                         break;
3453                 }
3454         }
3455
3456         /* If no option could be set, test the specific tracer options */
3457         if (!trace_options[i])
3458                 ret = set_tracer_option(tr->current_trace, cmp, neg);
3459
3460         mutex_unlock(&trace_types_lock);
3461
3462         return ret;
3463 }
3464
3465 static ssize_t
3466 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3467                         size_t cnt, loff_t *ppos)
3468 {
3469         struct seq_file *m = filp->private_data;
3470         struct trace_array *tr = m->private;
3471         char buf[64];
3472         int ret;
3473
3474         if (cnt >= sizeof(buf))
3475                 return -EINVAL;
3476
3477         if (copy_from_user(&buf, ubuf, cnt))
3478                 return -EFAULT;
3479
3480         buf[cnt] = 0;
3481
3482         ret = trace_set_options(tr, buf);
3483         if (ret < 0)
3484                 return ret;
3485
3486         *ppos += cnt;
3487
3488         return cnt;
3489 }
3490
3491 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3492 {
3493         struct trace_array *tr = inode->i_private;
3494         int ret;
3495
3496         if (tracing_disabled)
3497                 return -ENODEV;
3498
3499         if (trace_array_get(tr) < 0)
3500                 return -ENODEV;
3501
3502         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3503         if (ret < 0)
3504                 trace_array_put(tr);
3505
3506         return ret;
3507 }
3508
3509 static const struct file_operations tracing_iter_fops = {
3510         .open           = tracing_trace_options_open,
3511         .read           = seq_read,
3512         .llseek         = seq_lseek,
3513         .release        = tracing_single_release_tr,
3514         .write          = tracing_trace_options_write,
3515 };
3516
3517 static const char readme_msg[] =
3518         "tracing mini-HOWTO:\n\n"
3519         "# echo 0 > tracing_on : quick way to disable tracing\n"
3520         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3521         " Important files:\n"
3522         "  trace\t\t\t- The static contents of the buffer\n"
3523         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3524         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3525         "  current_tracer\t- function and latency tracers\n"
3526         "  available_tracers\t- list of configured tracers for current_tracer\n"
3527         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3528         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3529         "  trace_clock\t\t-change the clock used to order events\n"
3530         "       local:   Per cpu clock but may not be synced across CPUs\n"
3531         "      global:   Synced across CPUs but slows tracing down.\n"
3532         "     counter:   Not a clock, but just an increment\n"
3533         "      uptime:   Jiffy counter from time of boot\n"
3534         "        perf:   Same clock that perf events use\n"
3535 #ifdef CONFIG_X86_64
3536         "     x86-tsc:   TSC cycle counter\n"
3537 #endif
3538         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3539         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3540         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3541         "\t\t\t  Remove sub-buffer with rmdir\n"
3542         "  trace_options\t\t- Set format or modify how tracing happens\n"
3543         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3544         "\t\t\t  option name\n"
3545 #ifdef CONFIG_DYNAMIC_FTRACE
3546         "\n  available_filter_functions - list of functions that can be filtered on\n"
3547         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3548         "\t\t\t  functions\n"
3549         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3550         "\t     modules: Can select a group via module\n"
3551         "\t      Format: :mod:<module-name>\n"
3552         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3553         "\t    triggers: a command to perform when function is hit\n"
3554         "\t      Format: <function>:<trigger>[:count]\n"
3555         "\t     trigger: traceon, traceoff\n"
3556         "\t\t      enable_event:<system>:<event>\n"
3557         "\t\t      disable_event:<system>:<event>\n"
3558 #ifdef CONFIG_STACKTRACE
3559         "\t\t      stacktrace\n"
3560 #endif
3561 #ifdef CONFIG_TRACER_SNAPSHOT
3562         "\t\t      snapshot\n"
3563 #endif
3564         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3565         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3566         "\t     The first one will disable tracing every time do_fault is hit\n"
3567         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3568         "\t       The first time do trap is hit and it disables tracing, the\n"
3569         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3570         "\t       the counter will not decrement. It only decrements when the\n"
3571         "\t       trigger did work\n"
3572         "\t     To remove trigger without count:\n"
3573         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3574         "\t     To remove trigger with a count:\n"
3575         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3576         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3577         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3578         "\t    modules: Can select a group via module command :mod:\n"
3579         "\t    Does not accept triggers\n"
3580 #endif /* CONFIG_DYNAMIC_FTRACE */
3581 #ifdef CONFIG_FUNCTION_TRACER
3582         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3583         "\t\t    (function)\n"
3584 #endif
3585 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3586         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3587         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3588 #endif
3589 #ifdef CONFIG_TRACER_SNAPSHOT
3590         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3591         "\t\t\t  snapshot buffer. Read the contents for more\n"
3592         "\t\t\t  information\n"
3593 #endif
3594 #ifdef CONFIG_STACK_TRACER
3595         "  stack_trace\t\t- Shows the max stack trace when active\n"
3596         "  stack_max_size\t- Shows current max stack size that was traced\n"
3597         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3598         "\t\t\t  new trace)\n"
3599 #ifdef CONFIG_DYNAMIC_FTRACE
3600         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3601         "\t\t\t  traces\n"
3602 #endif
3603 #endif /* CONFIG_STACK_TRACER */
3604         "  events/\t\t- Directory containing all trace event subsystems:\n"
3605         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3606         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3607         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3608         "\t\t\t  events\n"
3609         "      filter\t\t- If set, only events passing filter are traced\n"
3610         "  events/<system>/<event>/\t- Directory containing control files for\n"
3611         "\t\t\t  <event>:\n"
3612         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3613         "      filter\t\t- If set, only events passing filter are traced\n"
3614         "      trigger\t\t- If set, a command to perform when event is hit\n"
3615         "\t    Format: <trigger>[:count][if <filter>]\n"
3616         "\t   trigger: traceon, traceoff\n"
3617         "\t            enable_event:<system>:<event>\n"
3618         "\t            disable_event:<system>:<event>\n"
3619 #ifdef CONFIG_STACKTRACE
3620         "\t\t    stacktrace\n"
3621 #endif
3622 #ifdef CONFIG_TRACER_SNAPSHOT
3623         "\t\t    snapshot\n"
3624 #endif
3625         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3626         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3627         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3628         "\t                  events/block/block_unplug/trigger\n"
3629         "\t   The first disables tracing every time block_unplug is hit.\n"
3630         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3631         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3632         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3633         "\t   Like function triggers, the counter is only decremented if it\n"
3634         "\t    enabled or disabled tracing.\n"
3635         "\t   To remove a trigger without a count:\n"
3636         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3637         "\t   To remove a trigger with a count:\n"
3638         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3639         "\t   Filters can be ignored when removing a trigger.\n"
3640 ;
3641
3642 static ssize_t
3643 tracing_readme_read(struct file *filp, char __user *ubuf,
3644                        size_t cnt, loff_t *ppos)
3645 {
3646         return simple_read_from_buffer(ubuf, cnt, ppos,
3647                                         readme_msg, strlen(readme_msg));
3648 }
3649
3650 static const struct file_operations tracing_readme_fops = {
3651         .open           = tracing_open_generic,
3652         .read           = tracing_readme_read,
3653         .llseek         = generic_file_llseek,
3654 };
3655
3656 static ssize_t
3657 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
3658                                 size_t cnt, loff_t *ppos)
3659 {
3660         char *buf_comm;
3661         char *file_buf;
3662         char *buf;
3663         int len = 0;
3664         int pid;
3665         int i;
3666
3667         file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
3668         if (!file_buf)
3669                 return -ENOMEM;
3670
3671         buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
3672         if (!buf_comm) {
3673                 kfree(file_buf);
3674                 return -ENOMEM;
3675         }
3676
3677         buf = file_buf;
3678
3679         for (i = 0; i < SAVED_CMDLINES; i++) {
3680                 int r;
3681
3682                 pid = map_cmdline_to_pid[i];
3683                 if (pid == -1 || pid == NO_CMDLINE_MAP)
3684                         continue;
3685
3686                 trace_find_cmdline(pid, buf_comm);
3687                 r = sprintf(buf, "%d %s\n", pid, buf_comm);
3688                 buf += r;
3689                 len += r;
3690         }
3691
3692         len = simple_read_from_buffer(ubuf, cnt, ppos,
3693                                       file_buf, len);
3694
3695         kfree(file_buf);
3696         kfree(buf_comm);
3697
3698         return len;
3699 }
3700
3701 static const struct file_operations tracing_saved_cmdlines_fops = {
3702     .open       = tracing_open_generic,
3703     .read       = tracing_saved_cmdlines_read,
3704     .llseek     = generic_file_llseek,
3705 };
3706
3707 static ssize_t
3708 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3709                        size_t cnt, loff_t *ppos)
3710 {
3711         struct trace_array *tr = filp->private_data;
3712         char buf[MAX_TRACER_SIZE+2];
3713         int r;
3714
3715         mutex_lock(&trace_types_lock);
3716         r = sprintf(buf, "%s\n", tr->current_trace->name);
3717         mutex_unlock(&trace_types_lock);
3718
3719         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3720 }
3721
3722 int tracer_init(struct tracer *t, struct trace_array *tr)
3723 {
3724         tracing_reset_online_cpus(&tr->trace_buffer);
3725         return t->init(tr);
3726 }
3727
3728 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3729 {
3730         int cpu;
3731
3732         for_each_tracing_cpu(cpu)
3733                 per_cpu_ptr(buf->data, cpu)->entries = val;
3734 }
3735
3736 #ifdef CONFIG_TRACER_MAX_TRACE
3737 /* resize @tr's buffer to the size of @size_tr's entries */
3738 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3739                                         struct trace_buffer *size_buf, int cpu_id)
3740 {
3741         int cpu, ret = 0;
3742
3743         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3744                 for_each_tracing_cpu(cpu) {
3745                         ret = ring_buffer_resize(trace_buf->buffer,
3746                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3747                         if (ret < 0)
3748                                 break;
3749                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3750                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3751                 }
3752         } else {
3753                 ret = ring_buffer_resize(trace_buf->buffer,
3754                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3755                 if (ret == 0)
3756                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3757                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3758         }
3759
3760         return ret;
3761 }
3762 #endif /* CONFIG_TRACER_MAX_TRACE */
3763
3764 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3765                                         unsigned long size, int cpu)
3766 {
3767         int ret;
3768
3769         /*
3770          * If kernel or user changes the size of the ring buffer
3771          * we use the size that was given, and we can forget about
3772          * expanding it later.
3773          */
3774         ring_buffer_expanded = true;
3775
3776         /* May be called before buffers are initialized */
3777         if (!tr->trace_buffer.buffer)
3778                 return 0;
3779
3780         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3781         if (ret < 0)
3782                 return ret;
3783
3784 #ifdef CONFIG_TRACER_MAX_TRACE
3785         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3786             !tr->current_trace->use_max_tr)
3787                 goto out;
3788
3789         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3790         if (ret < 0) {
3791                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3792                                                      &tr->trace_buffer, cpu);
3793                 if (r < 0) {
3794                         /*
3795                          * AARGH! We are left with different
3796                          * size max buffer!!!!
3797                          * The max buffer is our "snapshot" buffer.
3798                          * When a tracer needs a snapshot (one of the
3799                          * latency tracers), it swaps the max buffer
3800                          * with the saved snap shot. We succeeded to
3801                          * update the size of the main buffer, but failed to
3802                          * update the size of the max buffer. But when we tried
3803                          * to reset the main buffer to the original size, we
3804                          * failed there too. This is very unlikely to
3805                          * happen, but if it does, warn and kill all
3806                          * tracing.
3807                          */
3808                         WARN_ON(1);
3809                         tracing_disabled = 1;
3810                 }
3811                 return ret;
3812         }
3813
3814         if (cpu == RING_BUFFER_ALL_CPUS)
3815                 set_buffer_entries(&tr->max_buffer, size);
3816         else
3817                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
3818
3819  out:
3820 #endif /* CONFIG_TRACER_MAX_TRACE */
3821
3822         if (cpu == RING_BUFFER_ALL_CPUS)
3823                 set_buffer_entries(&tr->trace_buffer, size);
3824         else
3825                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
3826
3827         return ret;
3828 }
3829
3830 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
3831                                           unsigned long size, int cpu_id)
3832 {
3833         int ret = size;
3834
3835         mutex_lock(&trace_types_lock);
3836
3837         if (cpu_id != RING_BUFFER_ALL_CPUS) {
3838                 /* make sure, this cpu is enabled in the mask */
3839                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
3840                         ret = -EINVAL;
3841                         goto out;
3842                 }
3843         }
3844
3845         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
3846         if (ret < 0)
3847                 ret = -ENOMEM;
3848
3849 out:
3850         mutex_unlock(&trace_types_lock);
3851
3852         return ret;
3853 }
3854
3855
3856 /**
3857  * tracing_update_buffers - used by tracing facility to expand ring buffers
3858  *
3859  * To save on memory when the tracing is never used on a system with it
3860  * configured in. The ring buffers are set to a minimum size. But once
3861  * a user starts to use the tracing facility, then they need to grow
3862  * to their default size.
3863  *
3864  * This function is to be called when a tracer is about to be used.
3865  */
3866 int tracing_update_buffers(void)
3867 {
3868         int ret = 0;
3869
3870         mutex_lock(&trace_types_lock);
3871         if (!ring_buffer_expanded)
3872                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
3873                                                 RING_BUFFER_ALL_CPUS);
3874         mutex_unlock(&trace_types_lock);
3875
3876         return ret;
3877 }
3878
3879 struct trace_option_dentry;
3880
3881 static struct trace_option_dentry *
3882 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
3883
3884 static void
3885 destroy_trace_option_files(struct trace_option_dentry *topts);
3886
3887 static int tracing_set_tracer(const char *buf)
3888 {
3889         static struct trace_option_dentry *topts;
3890         struct trace_array *tr = &global_trace;
3891         struct tracer *t;
3892 #ifdef CONFIG_TRACER_MAX_TRACE
3893         bool had_max_tr;
3894 #endif
3895         int ret = 0;
3896
3897         mutex_lock(&trace_types_lock);
3898
3899         if (!ring_buffer_expanded) {
3900                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
3901                                                 RING_BUFFER_ALL_CPUS);
3902                 if (ret < 0)
3903                         goto out;
3904                 ret = 0;
3905         }
3906
3907         for (t = trace_types; t; t = t->next) {
3908                 if (strcmp(t->name, buf) == 0)
3909                         break;
3910         }
3911         if (!t) {
3912                 ret = -EINVAL;
3913                 goto out;
3914         }
3915         if (t == tr->current_trace)
3916                 goto out;
3917
3918         trace_branch_disable();
3919
3920         tr->current_trace->enabled = false;
3921
3922         if (tr->current_trace->reset)
3923                 tr->current_trace->reset(tr);
3924
3925         /* Current trace needs to be nop_trace before synchronize_sched */
3926         tr->current_trace = &nop_trace;
3927
3928 #ifdef CONFIG_TRACER_MAX_TRACE
3929         had_max_tr = tr->allocated_snapshot;
3930
3931         if (had_max_tr && !t->use_max_tr) {
3932                 /*
3933                  * We need to make sure that the update_max_tr sees that
3934                  * current_trace changed to nop_trace to keep it from
3935                  * swapping the buffers after we resize it.
3936                  * The update_max_tr is called from interrupts disabled
3937                  * so a synchronized_sched() is sufficient.
3938                  */
3939                 synchronize_sched();
3940                 free_snapshot(tr);
3941         }
3942 #endif
3943         destroy_trace_option_files(topts);
3944
3945         topts = create_trace_option_files(tr, t);
3946
3947 #ifdef CONFIG_TRACER_MAX_TRACE
3948         if (t->use_max_tr && !had_max_tr) {
3949                 ret = alloc_snapshot(tr);
3950                 if (ret < 0)
3951                         goto out;
3952         }
3953 #endif
3954
3955         if (t->init) {
3956                 ret = tracer_init(t, tr);
3957                 if (ret)
3958                         goto out;
3959         }
3960
3961         tr->current_trace = t;
3962         tr->current_trace->enabled = true;
3963         trace_branch_enable(tr);
3964  out:
3965         mutex_unlock(&trace_types_lock);
3966
3967         return ret;
3968 }
3969
3970 static ssize_t
3971 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
3972                         size_t cnt, loff_t *ppos)
3973 {
3974         char buf[MAX_TRACER_SIZE+1];
3975         int i;
3976         size_t ret;
3977         int err;
3978
3979         ret = cnt;
3980
3981         if (cnt > MAX_TRACER_SIZE)
3982                 cnt = MAX_TRACER_SIZE;
3983
3984         if (copy_from_user(&buf, ubuf, cnt))
3985                 return -EFAULT;
3986
3987         buf[cnt] = 0;
3988
3989         /* strip ending whitespace. */
3990         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
3991                 buf[i] = 0;
3992
3993         err = tracing_set_tracer(buf);
3994         if (err)
3995                 return err;
3996
3997         *ppos += ret;
3998
3999         return ret;
4000 }
4001
4002 static ssize_t
4003 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4004                      size_t cnt, loff_t *ppos)
4005 {
4006         unsigned long *ptr = filp->private_data;
4007         char buf[64];
4008         int r;
4009
4010         r = snprintf(buf, sizeof(buf), "%ld\n",
4011                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4012         if (r > sizeof(buf))
4013                 r = sizeof(buf);
4014         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4015 }
4016
4017 static ssize_t
4018 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4019                       size_t cnt, loff_t *ppos)
4020 {
4021         unsigned long *ptr = filp->private_data;
4022         unsigned long val;
4023         int ret;
4024
4025         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4026         if (ret)
4027                 return ret;
4028
4029         *ptr = val * 1000;
4030
4031         return cnt;
4032 }
4033
4034 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4035 {
4036         struct trace_array *tr = inode->i_private;
4037         struct trace_iterator *iter;
4038         int ret = 0;
4039
4040         if (tracing_disabled)
4041                 return -ENODEV;
4042
4043         if (trace_array_get(tr) < 0)
4044                 return -ENODEV;
4045
4046         mutex_lock(&trace_types_lock);
4047
4048         /* create a buffer to store the information to pass to userspace */
4049         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4050         if (!iter) {
4051                 ret = -ENOMEM;
4052                 __trace_array_put(tr);
4053                 goto out;
4054         }
4055
4056         /*
4057          * We make a copy of the current tracer to avoid concurrent
4058          * changes on it while we are reading.
4059          */
4060         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4061         if (!iter->trace) {
4062                 ret = -ENOMEM;
4063                 goto fail;
4064         }
4065         *iter->trace = *tr->current_trace;
4066
4067         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4068                 ret = -ENOMEM;
4069                 goto fail;
4070         }
4071
4072         /* trace pipe does not show start of buffer */
4073         cpumask_setall(iter->started);
4074
4075         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4076                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4077
4078         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4079         if (trace_clocks[tr->clock_id].in_ns)
4080                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4081
4082         iter->tr = tr;
4083         iter->trace_buffer = &tr->trace_buffer;
4084         iter->cpu_file = tracing_get_cpu(inode);
4085         mutex_init(&iter->mutex);
4086         filp->private_data = iter;
4087
4088         if (iter->trace->pipe_open)
4089                 iter->trace->pipe_open(iter);
4090
4091         nonseekable_open(inode, filp);
4092 out:
4093         mutex_unlock(&trace_types_lock);
4094         return ret;
4095
4096 fail:
4097         kfree(iter->trace);
4098         kfree(iter);
4099         __trace_array_put(tr);
4100         mutex_unlock(&trace_types_lock);
4101         return ret;
4102 }
4103
4104 static int tracing_release_pipe(struct inode *inode, struct file *file)
4105 {
4106         struct trace_iterator *iter = file->private_data;
4107         struct trace_array *tr = inode->i_private;
4108
4109         mutex_lock(&trace_types_lock);
4110
4111         if (iter->trace->pipe_close)
4112                 iter->trace->pipe_close(iter);
4113
4114         mutex_unlock(&trace_types_lock);
4115
4116         free_cpumask_var(iter->started);
4117         mutex_destroy(&iter->mutex);
4118         kfree(iter->trace);
4119         kfree(iter);
4120
4121         trace_array_put(tr);
4122
4123         return 0;
4124 }
4125
4126 static unsigned int
4127 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4128 {
4129         /* Iterators are static, they should be filled or empty */
4130         if (trace_buffer_iter(iter, iter->cpu_file))
4131                 return POLLIN | POLLRDNORM;
4132
4133         if (trace_flags & TRACE_ITER_BLOCK)
4134                 /*
4135                  * Always select as readable when in blocking mode
4136                  */
4137                 return POLLIN | POLLRDNORM;
4138         else
4139                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4140                                              filp, poll_table);
4141 }
4142
4143 static unsigned int
4144 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4145 {
4146         struct trace_iterator *iter = filp->private_data;
4147
4148         return trace_poll(iter, filp, poll_table);
4149 }
4150
4151 /*
4152  * This is a make-shift waitqueue.
4153  * A tracer might use this callback on some rare cases:
4154  *
4155  *  1) the current tracer might hold the runqueue lock when it wakes up
4156  *     a reader, hence a deadlock (sched, function, and function graph tracers)
4157  *  2) the function tracers, trace all functions, we don't want
4158  *     the overhead of calling wake_up and friends
4159  *     (and tracing them too)
4160  *
4161  *     Anyway, this is really very primitive wakeup.
4162  */
4163 void poll_wait_pipe(struct trace_iterator *iter)
4164 {
4165         set_current_state(TASK_INTERRUPTIBLE);
4166         /* sleep for 100 msecs, and try again. */
4167         schedule_timeout(HZ / 10);
4168 }
4169
4170 /* Must be called with trace_types_lock mutex held. */
4171 static int tracing_wait_pipe(struct file *filp)
4172 {
4173         struct trace_iterator *iter = filp->private_data;
4174
4175         while (trace_empty(iter)) {
4176
4177                 if ((filp->f_flags & O_NONBLOCK)) {
4178                         return -EAGAIN;
4179                 }
4180
4181                 mutex_unlock(&iter->mutex);
4182
4183                 iter->trace->wait_pipe(iter);
4184
4185                 mutex_lock(&iter->mutex);
4186
4187                 if (signal_pending(current))
4188                         return -EINTR;
4189
4190                 /*
4191                  * We block until we read something and tracing is disabled.
4192                  * We still block if tracing is disabled, but we have never
4193                  * read anything. This allows a user to cat this file, and
4194                  * then enable tracing. But after we have read something,
4195                  * we give an EOF when tracing is again disabled.
4196                  *
4197                  * iter->pos will be 0 if we haven't read anything.
4198                  */
4199                 if (!tracing_is_on() && iter->pos)
4200                         break;
4201         }
4202
4203         return 1;
4204 }
4205
4206 /*
4207  * Consumer reader.
4208  */
4209 static ssize_t
4210 tracing_read_pipe(struct file *filp, char __user *ubuf,
4211                   size_t cnt, loff_t *ppos)
4212 {
4213         struct trace_iterator *iter = filp->private_data;
4214         struct trace_array *tr = iter->tr;
4215         ssize_t sret;
4216
4217         /* return any leftover data */
4218         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4219         if (sret != -EBUSY)
4220                 return sret;
4221
4222         trace_seq_init(&iter->seq);
4223
4224         /* copy the tracer to avoid using a global lock all around */
4225         mutex_lock(&trace_types_lock);
4226         if (unlikely(iter->trace->name != tr->current_trace->name))
4227                 *iter->trace = *tr->current_trace;
4228         mutex_unlock(&trace_types_lock);
4229
4230         /*
4231          * Avoid more than one consumer on a single file descriptor
4232          * This is just a matter of traces coherency, the ring buffer itself
4233          * is protected.
4234          */
4235         mutex_lock(&iter->mutex);
4236         if (iter->trace->read) {
4237                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4238                 if (sret)
4239                         goto out;
4240         }
4241
4242 waitagain:
4243         sret = tracing_wait_pipe(filp);
4244         if (sret <= 0)
4245                 goto out;
4246
4247         /* stop when tracing is finished */
4248         if (trace_empty(iter)) {
4249                 sret = 0;
4250                 goto out;
4251         }
4252
4253         if (cnt >= PAGE_SIZE)
4254                 cnt = PAGE_SIZE - 1;
4255
4256         /* reset all but tr, trace, and overruns */
4257         memset(&iter->seq, 0,
4258                sizeof(struct trace_iterator) -
4259                offsetof(struct trace_iterator, seq));
4260         cpumask_clear(iter->started);
4261         iter->pos = -1;
4262
4263         trace_event_read_lock();
4264         trace_access_lock(iter->cpu_file);
4265         while (trace_find_next_entry_inc(iter) != NULL) {
4266                 enum print_line_t ret;
4267                 int len = iter->seq.len;
4268
4269                 ret = print_trace_line(iter);
4270                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4271                         /* don't print partial lines */
4272                         iter->seq.len = len;
4273                         break;
4274                 }
4275                 if (ret != TRACE_TYPE_NO_CONSUME)
4276                         trace_consume(iter);
4277
4278                 if (iter->seq.len >= cnt)
4279                         break;
4280
4281                 /*
4282                  * Setting the full flag means we reached the trace_seq buffer
4283                  * size and we should leave by partial output condition above.
4284                  * One of the trace_seq_* functions is not used properly.
4285                  */
4286                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4287                           iter->ent->type);
4288         }
4289         trace_access_unlock(iter->cpu_file);
4290         trace_event_read_unlock();
4291
4292         /* Now copy what we have to the user */
4293         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4294         if (iter->seq.readpos >= iter->seq.len)
4295                 trace_seq_init(&iter->seq);
4296
4297         /*
4298          * If there was nothing to send to user, in spite of consuming trace
4299          * entries, go back to wait for more entries.
4300          */
4301         if (sret == -EBUSY)
4302                 goto waitagain;
4303
4304 out:
4305         mutex_unlock(&iter->mutex);
4306
4307         return sret;
4308 }
4309
4310 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4311                                      unsigned int idx)
4312 {
4313         __free_page(spd->pages[idx]);
4314 }
4315
4316 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4317         .can_merge              = 0,
4318         .map                    = generic_pipe_buf_map,
4319         .unmap                  = generic_pipe_buf_unmap,
4320         .confirm                = generic_pipe_buf_confirm,
4321         .release                = generic_pipe_buf_release,
4322         .steal                  = generic_pipe_buf_steal,
4323         .get                    = generic_pipe_buf_get,
4324 };
4325
4326 static size_t
4327 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4328 {
4329         size_t count;
4330         int ret;
4331
4332         /* Seq buffer is page-sized, exactly what we need. */
4333         for (;;) {
4334                 count = iter->seq.len;
4335                 ret = print_trace_line(iter);
4336                 count = iter->seq.len - count;
4337                 if (rem < count) {
4338                         rem = 0;
4339                         iter->seq.len -= count;
4340                         break;
4341                 }
4342                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4343                         iter->seq.len -= count;
4344                         break;
4345                 }
4346
4347                 if (ret != TRACE_TYPE_NO_CONSUME)
4348                         trace_consume(iter);
4349                 rem -= count;
4350                 if (!trace_find_next_entry_inc(iter))   {
4351                         rem = 0;
4352                         iter->ent = NULL;
4353                         break;
4354                 }
4355         }
4356
4357         return rem;
4358 }
4359
4360 static ssize_t tracing_splice_read_pipe(struct file *filp,
4361                                         loff_t *ppos,
4362                                         struct pipe_inode_info *pipe,
4363                                         size_t len,
4364                                         unsigned int flags)
4365 {
4366         struct page *pages_def[PIPE_DEF_BUFFERS];
4367         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4368         struct trace_iterator *iter = filp->private_data;
4369         struct splice_pipe_desc spd = {
4370                 .pages          = pages_def,
4371                 .partial        = partial_def,
4372                 .nr_pages       = 0, /* This gets updated below. */
4373                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4374                 .flags          = flags,
4375                 .ops            = &tracing_pipe_buf_ops,
4376                 .spd_release    = tracing_spd_release_pipe,
4377         };
4378         struct trace_array *tr = iter->tr;
4379         ssize_t ret;
4380         size_t rem;
4381         unsigned int i;
4382
4383         if (splice_grow_spd(pipe, &spd))
4384                 return -ENOMEM;
4385
4386         /* copy the tracer to avoid using a global lock all around */
4387         mutex_lock(&trace_types_lock);
4388         if (unlikely(iter->trace->name != tr->current_trace->name))
4389                 *iter->trace = *tr->current_trace;
4390         mutex_unlock(&trace_types_lock);
4391
4392         mutex_lock(&iter->mutex);
4393
4394         if (iter->trace->splice_read) {
4395                 ret = iter->trace->splice_read(iter, filp,
4396                                                ppos, pipe, len, flags);
4397                 if (ret)
4398                         goto out_err;
4399         }
4400
4401         ret = tracing_wait_pipe(filp);
4402         if (ret <= 0)
4403                 goto out_err;
4404
4405         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4406                 ret = -EFAULT;
4407                 goto out_err;
4408         }
4409
4410         trace_event_read_lock();
4411         trace_access_lock(iter->cpu_file);
4412
4413         /* Fill as many pages as possible. */
4414         for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
4415                 spd.pages[i] = alloc_page(GFP_KERNEL);
4416                 if (!spd.pages[i])
4417                         break;
4418
4419                 rem = tracing_fill_pipe_page(rem, iter);
4420
4421                 /* Copy the data into the page, so we can start over. */
4422                 ret = trace_seq_to_buffer(&iter->seq,
4423                                           page_address(spd.pages[i]),
4424                                           iter->seq.len);
4425                 if (ret < 0) {
4426                         __free_page(spd.pages[i]);
4427                         break;
4428                 }
4429                 spd.partial[i].offset = 0;
4430                 spd.partial[i].len = iter->seq.len;
4431
4432                 trace_seq_init(&iter->seq);
4433         }
4434
4435         trace_access_unlock(iter->cpu_file);
4436         trace_event_read_unlock();
4437         mutex_unlock(&iter->mutex);
4438
4439         spd.nr_pages = i;
4440
4441         ret = splice_to_pipe(pipe, &spd);
4442 out:
4443         splice_shrink_spd(&spd);
4444         return ret;
4445
4446 out_err:
4447         mutex_unlock(&iter->mutex);
4448         goto out;
4449 }
4450
4451 static ssize_t
4452 tracing_entries_read(struct file *filp, char __user *ubuf,
4453                      size_t cnt, loff_t *ppos)
4454 {
4455         struct inode *inode = file_inode(filp);
4456         struct trace_array *tr = inode->i_private;
4457         int cpu = tracing_get_cpu(inode);
4458         char buf[64];
4459         int r = 0;
4460         ssize_t ret;
4461
4462         mutex_lock(&trace_types_lock);
4463
4464         if (cpu == RING_BUFFER_ALL_CPUS) {
4465                 int cpu, buf_size_same;
4466                 unsigned long size;
4467
4468                 size = 0;
4469                 buf_size_same = 1;
4470                 /* check if all cpu sizes are same */
4471                 for_each_tracing_cpu(cpu) {
4472                         /* fill in the size from first enabled cpu */
4473                         if (size == 0)
4474                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4475                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4476                                 buf_size_same = 0;
4477                                 break;
4478                         }
4479                 }
4480
4481                 if (buf_size_same) {
4482                         if (!ring_buffer_expanded)
4483                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4484                                             size >> 10,
4485                                             trace_buf_size >> 10);
4486                         else
4487                                 r = sprintf(buf, "%lu\n", size >> 10);
4488                 } else
4489                         r = sprintf(buf, "X\n");
4490         } else
4491                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4492
4493         mutex_unlock(&trace_types_lock);
4494
4495         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4496         return ret;
4497 }
4498
4499 static ssize_t
4500 tracing_entries_write(struct file *filp, const char __user *ubuf,
4501                       size_t cnt, loff_t *ppos)
4502 {
4503         struct inode *inode = file_inode(filp);
4504         struct trace_array *tr = inode->i_private;
4505         unsigned long val;
4506         int ret;
4507
4508         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4509         if (ret)
4510                 return ret;
4511
4512         /* must have at least 1 entry */
4513         if (!val)
4514                 return -EINVAL;
4515
4516         /* value is in KB */
4517         val <<= 10;
4518         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4519         if (ret < 0)
4520                 return ret;
4521
4522         *ppos += cnt;
4523
4524         return cnt;
4525 }
4526
4527 static ssize_t
4528 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4529                                 size_t cnt, loff_t *ppos)
4530 {
4531         struct trace_array *tr = filp->private_data;
4532         char buf[64];
4533         int r, cpu;
4534         unsigned long size = 0, expanded_size = 0;
4535
4536         mutex_lock(&trace_types_lock);
4537         for_each_tracing_cpu(cpu) {
4538                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4539                 if (!ring_buffer_expanded)
4540                         expanded_size += trace_buf_size >> 10;
4541         }
4542         if (ring_buffer_expanded)
4543                 r = sprintf(buf, "%lu\n", size);
4544         else
4545                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4546         mutex_unlock(&trace_types_lock);
4547
4548         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4549 }
4550
4551 static ssize_t
4552 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4553                           size_t cnt, loff_t *ppos)
4554 {
4555         /*
4556          * There is no need to read what the user has written, this function
4557          * is just to make sure that there is no error when "echo" is used
4558          */
4559
4560         *ppos += cnt;
4561
4562         return cnt;
4563 }
4564
4565 static int
4566 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4567 {
4568         struct trace_array *tr = inode->i_private;
4569
4570         /* disable tracing ? */
4571         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4572                 tracer_tracing_off(tr);
4573         /* resize the ring buffer to 0 */
4574         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4575
4576         trace_array_put(tr);
4577
4578         return 0;
4579 }
4580
4581 static ssize_t
4582 tracing_mark_write(struct file *filp, const char __user *ubuf,
4583                                         size_t cnt, loff_t *fpos)
4584 {
4585         unsigned long addr = (unsigned long)ubuf;
4586         struct trace_array *tr = filp->private_data;
4587         struct ring_buffer_event *event;
4588         struct ring_buffer *buffer;
4589         struct print_entry *entry;
4590         unsigned long irq_flags;
4591         struct page *pages[2];
4592         void *map_page[2];
4593         int nr_pages = 1;
4594         ssize_t written;
4595         int offset;
4596         int size;
4597         int len;
4598         int ret;
4599         int i;
4600
4601         if (tracing_disabled)
4602                 return -EINVAL;
4603
4604         if (!(trace_flags & TRACE_ITER_MARKERS))
4605                 return -EINVAL;
4606
4607         if (cnt > TRACE_BUF_SIZE)
4608                 cnt = TRACE_BUF_SIZE;
4609
4610         /*
4611          * Userspace is injecting traces into the kernel trace buffer.
4612          * We want to be as non intrusive as possible.
4613          * To do so, we do not want to allocate any special buffers
4614          * or take any locks, but instead write the userspace data
4615          * straight into the ring buffer.
4616          *
4617          * First we need to pin the userspace buffer into memory,
4618          * which, most likely it is, because it just referenced it.
4619          * But there's no guarantee that it is. By using get_user_pages_fast()
4620          * and kmap_atomic/kunmap_atomic() we can get access to the
4621          * pages directly. We then write the data directly into the
4622          * ring buffer.
4623          */
4624         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4625
4626         /* check if we cross pages */
4627         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4628                 nr_pages = 2;
4629
4630         offset = addr & (PAGE_SIZE - 1);
4631         addr &= PAGE_MASK;
4632
4633         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4634         if (ret < nr_pages) {
4635                 while (--ret >= 0)
4636                         put_page(pages[ret]);
4637                 written = -EFAULT;
4638                 goto out;
4639         }
4640
4641         for (i = 0; i < nr_pages; i++)
4642                 map_page[i] = kmap_atomic(pages[i]);
4643
4644         local_save_flags(irq_flags);
4645         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4646         buffer = tr->trace_buffer.buffer;
4647         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4648                                           irq_flags, preempt_count());
4649         if (!event) {
4650                 /* Ring buffer disabled, return as if not open for write */
4651                 written = -EBADF;
4652                 goto out_unlock;
4653         }
4654
4655         entry = ring_buffer_event_data(event);
4656         entry->ip = _THIS_IP_;
4657
4658         if (nr_pages == 2) {
4659                 len = PAGE_SIZE - offset;
4660                 memcpy(&entry->buf, map_page[0] + offset, len);
4661                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4662         } else
4663                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4664
4665         if (entry->buf[cnt - 1] != '\n') {
4666                 entry->buf[cnt] = '\n';
4667                 entry->buf[cnt + 1] = '\0';
4668         } else
4669                 entry->buf[cnt] = '\0';
4670
4671         __buffer_unlock_commit(buffer, event);
4672
4673         written = cnt;
4674
4675         *fpos += written;
4676
4677  out_unlock:
4678         for (i = 0; i < nr_pages; i++){
4679                 kunmap_atomic(map_page[i]);
4680                 put_page(pages[i]);
4681         }
4682  out:
4683         return written;
4684 }
4685
4686 static int tracing_clock_show(struct seq_file *m, void *v)
4687 {
4688         struct trace_array *tr = m->private;
4689         int i;
4690
4691         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4692                 seq_printf(m,
4693                         "%s%s%s%s", i ? " " : "",
4694                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4695                         i == tr->clock_id ? "]" : "");
4696         seq_putc(m, '\n');
4697
4698         return 0;
4699 }
4700
4701 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4702                                    size_t cnt, loff_t *fpos)
4703 {
4704         struct seq_file *m = filp->private_data;
4705         struct trace_array *tr = m->private;
4706         char buf[64];
4707         const char *clockstr;
4708         int i;
4709
4710         if (cnt >= sizeof(buf))
4711                 return -EINVAL;
4712
4713         if (copy_from_user(&buf, ubuf, cnt))
4714                 return -EFAULT;
4715
4716         buf[cnt] = 0;
4717
4718         clockstr = strstrip(buf);
4719
4720         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4721                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4722                         break;
4723         }
4724         if (i == ARRAY_SIZE(trace_clocks))
4725                 return -EINVAL;
4726
4727         mutex_lock(&trace_types_lock);
4728
4729         tr->clock_id = i;
4730
4731         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4732
4733         /*
4734          * New clock may not be consistent with the previous clock.
4735          * Reset the buffer so that it doesn't have incomparable timestamps.
4736          */
4737         tracing_reset_online_cpus(&tr->trace_buffer);
4738
4739 #ifdef CONFIG_TRACER_MAX_TRACE
4740         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4741                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4742         tracing_reset_online_cpus(&tr->max_buffer);
4743 #endif
4744
4745         mutex_unlock(&trace_types_lock);
4746
4747         *fpos += cnt;
4748
4749         return cnt;
4750 }
4751
4752 static int tracing_clock_open(struct inode *inode, struct file *file)
4753 {
4754         struct trace_array *tr = inode->i_private;
4755         int ret;
4756
4757         if (tracing_disabled)
4758                 return -ENODEV;
4759
4760         if (trace_array_get(tr))
4761                 return -ENODEV;
4762
4763         ret = single_open(file, tracing_clock_show, inode->i_private);
4764         if (ret < 0)
4765                 trace_array_put(tr);
4766
4767         return ret;
4768 }
4769
4770 struct ftrace_buffer_info {
4771         struct trace_iterator   iter;
4772         void                    *spare;
4773         unsigned int            read;
4774 };
4775
4776 #ifdef CONFIG_TRACER_SNAPSHOT
4777 static int tracing_snapshot_open(struct inode *inode, struct file *file)
4778 {
4779         struct trace_array *tr = inode->i_private;
4780         struct trace_iterator *iter;
4781         struct seq_file *m;
4782         int ret = 0;
4783
4784         if (trace_array_get(tr) < 0)
4785                 return -ENODEV;
4786
4787         if (file->f_mode & FMODE_READ) {
4788                 iter = __tracing_open(inode, file, true);
4789                 if (IS_ERR(iter))
4790                         ret = PTR_ERR(iter);
4791         } else {
4792                 /* Writes still need the seq_file to hold the private data */
4793                 ret = -ENOMEM;
4794                 m = kzalloc(sizeof(*m), GFP_KERNEL);
4795                 if (!m)
4796                         goto out;
4797                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4798                 if (!iter) {
4799                         kfree(m);
4800                         goto out;
4801                 }
4802                 ret = 0;
4803
4804                 iter->tr = tr;
4805                 iter->trace_buffer = &tr->max_buffer;
4806                 iter->cpu_file = tracing_get_cpu(inode);
4807                 m->private = iter;
4808                 file->private_data = m;
4809         }
4810 out:
4811         if (ret < 0)
4812                 trace_array_put(tr);
4813
4814         return ret;
4815 }
4816
4817 static ssize_t
4818 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
4819                        loff_t *ppos)
4820 {
4821         struct seq_file *m = filp->private_data;
4822         struct trace_iterator *iter = m->private;
4823         struct trace_array *tr = iter->tr;
4824         unsigned long val;
4825         int ret;
4826
4827         ret = tracing_update_buffers();
4828         if (ret < 0)
4829                 return ret;
4830
4831         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4832         if (ret)
4833                 return ret;
4834
4835         mutex_lock(&trace_types_lock);
4836
4837         if (tr->current_trace->use_max_tr) {
4838                 ret = -EBUSY;
4839                 goto out;
4840         }
4841
4842         switch (val) {
4843         case 0:
4844                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4845                         ret = -EINVAL;
4846                         break;
4847                 }
4848                 if (tr->allocated_snapshot)
4849                         free_snapshot(tr);
4850                 break;
4851         case 1:
4852 /* Only allow per-cpu swap if the ring buffer supports it */
4853 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
4854                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4855                         ret = -EINVAL;
4856                         break;
4857                 }
4858 #endif
4859                 if (!tr->allocated_snapshot) {
4860                         ret = alloc_snapshot(tr);
4861                         if (ret < 0)
4862                                 break;
4863                 }
4864                 local_irq_disable();
4865                 /* Now, we're going to swap */
4866                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4867                         update_max_tr(tr, current, smp_processor_id());
4868                 else
4869                         update_max_tr_single(tr, current, iter->cpu_file);
4870                 local_irq_enable();
4871                 break;
4872         default:
4873                 if (tr->allocated_snapshot) {
4874                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4875                                 tracing_reset_online_cpus(&tr->max_buffer);
4876                         else
4877                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
4878                 }
4879                 break;
4880         }
4881
4882         if (ret >= 0) {
4883                 *ppos += cnt;
4884                 ret = cnt;
4885         }
4886 out:
4887         mutex_unlock(&trace_types_lock);
4888         return ret;
4889 }
4890
4891 static int tracing_snapshot_release(struct inode *inode, struct file *file)
4892 {
4893         struct seq_file *m = file->private_data;
4894         int ret;
4895
4896         ret = tracing_release(inode, file);
4897
4898         if (file->f_mode & FMODE_READ)
4899                 return ret;
4900
4901         /* If write only, the seq_file is just a stub */
4902         if (m)
4903                 kfree(m->private);
4904         kfree(m);
4905
4906         return 0;
4907 }
4908
4909 static int tracing_buffers_open(struct inode *inode, struct file *filp);
4910 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
4911                                     size_t count, loff_t *ppos);
4912 static int tracing_buffers_release(struct inode *inode, struct file *file);
4913 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4914                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
4915
4916 static int snapshot_raw_open(struct inode *inode, struct file *filp)
4917 {
4918         struct ftrace_buffer_info *info;
4919         int ret;
4920
4921         ret = tracing_buffers_open(inode, filp);
4922         if (ret < 0)
4923                 return ret;
4924
4925         info = filp->private_data;
4926
4927         if (info->iter.trace->use_max_tr) {
4928                 tracing_buffers_release(inode, filp);
4929                 return -EBUSY;
4930         }
4931
4932         info->iter.snapshot = true;
4933         info->iter.trace_buffer = &info->iter.tr->max_buffer;
4934
4935         return ret;
4936 }
4937
4938 #endif /* CONFIG_TRACER_SNAPSHOT */
4939
4940
4941 static const struct file_operations tracing_max_lat_fops = {
4942         .open           = tracing_open_generic,
4943         .read           = tracing_max_lat_read,
4944         .write          = tracing_max_lat_write,
4945         .llseek         = generic_file_llseek,
4946 };
4947
4948 static const struct file_operations set_tracer_fops = {
4949         .open           = tracing_open_generic,
4950         .read           = tracing_set_trace_read,
4951         .write          = tracing_set_trace_write,
4952         .llseek         = generic_file_llseek,
4953 };
4954
4955 static const struct file_operations tracing_pipe_fops = {
4956         .open           = tracing_open_pipe,
4957         .poll           = tracing_poll_pipe,
4958         .read           = tracing_read_pipe,
4959         .splice_read    = tracing_splice_read_pipe,
4960         .release        = tracing_release_pipe,
4961         .llseek         = no_llseek,
4962 };
4963
4964 static const struct file_operations tracing_entries_fops = {
4965         .open           = tracing_open_generic_tr,
4966         .read           = tracing_entries_read,
4967         .write          = tracing_entries_write,
4968         .llseek         = generic_file_llseek,
4969         .release        = tracing_release_generic_tr,
4970 };
4971
4972 static const struct file_operations tracing_total_entries_fops = {
4973         .open           = tracing_open_generic_tr,
4974         .read           = tracing_total_entries_read,
4975         .llseek         = generic_file_llseek,
4976         .release        = tracing_release_generic_tr,
4977 };
4978
4979 static const struct file_operations tracing_free_buffer_fops = {
4980         .open           = tracing_open_generic_tr,
4981         .write          = tracing_free_buffer_write,
4982         .release        = tracing_free_buffer_release,
4983 };
4984
4985 static const struct file_operations tracing_mark_fops = {
4986         .open           = tracing_open_generic_tr,
4987         .write          = tracing_mark_write,
4988         .llseek         = generic_file_llseek,
4989         .release        = tracing_release_generic_tr,
4990 };
4991
4992 static const struct file_operations trace_clock_fops = {
4993         .open           = tracing_clock_open,
4994         .read           = seq_read,
4995         .llseek         = seq_lseek,
4996         .release        = tracing_single_release_tr,
4997         .write          = tracing_clock_write,
4998 };
4999
5000 #ifdef CONFIG_TRACER_SNAPSHOT
5001 static const struct file_operations snapshot_fops = {
5002         .open           = tracing_snapshot_open,
5003         .read           = seq_read,
5004         .write          = tracing_snapshot_write,
5005         .llseek         = tracing_lseek,
5006         .release        = tracing_snapshot_release,
5007 };
5008
5009 static const struct file_operations snapshot_raw_fops = {
5010         .open           = snapshot_raw_open,
5011         .read           = tracing_buffers_read,
5012         .release        = tracing_buffers_release,
5013         .splice_read    = tracing_buffers_splice_read,
5014         .llseek         = no_llseek,
5015 };
5016
5017 #endif /* CONFIG_TRACER_SNAPSHOT */
5018
5019 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5020 {
5021         struct trace_array *tr = inode->i_private;
5022         struct ftrace_buffer_info *info;
5023         int ret;
5024
5025         if (tracing_disabled)
5026                 return -ENODEV;
5027
5028         if (trace_array_get(tr) < 0)
5029                 return -ENODEV;
5030
5031         info = kzalloc(sizeof(*info), GFP_KERNEL);
5032         if (!info) {
5033                 trace_array_put(tr);
5034                 return -ENOMEM;
5035         }
5036
5037         mutex_lock(&trace_types_lock);
5038
5039         info->iter.tr           = tr;
5040         info->iter.cpu_file     = tracing_get_cpu(inode);
5041         info->iter.trace        = tr->current_trace;
5042         info->iter.trace_buffer = &tr->trace_buffer;
5043         info->spare             = NULL;
5044         /* Force reading ring buffer for first read */
5045         info->read              = (unsigned int)-1;
5046
5047         filp->private_data = info;
5048
5049         mutex_unlock(&trace_types_lock);
5050
5051         ret = nonseekable_open(inode, filp);
5052         if (ret < 0)
5053                 trace_array_put(tr);
5054
5055         return ret;
5056 }
5057
5058 static unsigned int
5059 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5060 {
5061         struct ftrace_buffer_info *info = filp->private_data;
5062         struct trace_iterator *iter = &info->iter;
5063
5064         return trace_poll(iter, filp, poll_table);
5065 }
5066
5067 static ssize_t
5068 tracing_buffers_read(struct file *filp, char __user *ubuf,
5069                      size_t count, loff_t *ppos)
5070 {
5071         struct ftrace_buffer_info *info = filp->private_data;
5072         struct trace_iterator *iter = &info->iter;
5073         ssize_t ret;
5074         ssize_t size;
5075
5076         if (!count)
5077                 return 0;
5078
5079         mutex_lock(&trace_types_lock);
5080
5081 #ifdef CONFIG_TRACER_MAX_TRACE
5082         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5083                 size = -EBUSY;
5084                 goto out_unlock;
5085         }
5086 #endif
5087
5088         if (!info->spare)
5089                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5090                                                           iter->cpu_file);
5091         size = -ENOMEM;
5092         if (!info->spare)
5093                 goto out_unlock;
5094
5095         /* Do we have previous read data to read? */
5096         if (info->read < PAGE_SIZE)
5097                 goto read;
5098
5099  again:
5100         trace_access_lock(iter->cpu_file);
5101         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5102                                     &info->spare,
5103                                     count,
5104                                     iter->cpu_file, 0);
5105         trace_access_unlock(iter->cpu_file);
5106
5107         if (ret < 0) {
5108                 if (trace_empty(iter)) {
5109                         if ((filp->f_flags & O_NONBLOCK)) {
5110                                 size = -EAGAIN;
5111                                 goto out_unlock;
5112                         }
5113                         mutex_unlock(&trace_types_lock);
5114                         iter->trace->wait_pipe(iter);
5115                         mutex_lock(&trace_types_lock);
5116                         if (signal_pending(current)) {
5117                                 size = -EINTR;
5118                                 goto out_unlock;
5119                         }
5120                         goto again;
5121                 }
5122                 size = 0;
5123                 goto out_unlock;
5124         }
5125
5126         info->read = 0;
5127  read:
5128         size = PAGE_SIZE - info->read;
5129         if (size > count)
5130                 size = count;
5131
5132         ret = copy_to_user(ubuf, info->spare + info->read, size);
5133         if (ret == size) {
5134                 size = -EFAULT;
5135                 goto out_unlock;
5136         }
5137         size -= ret;
5138
5139         *ppos += size;
5140         info->read += size;
5141
5142  out_unlock:
5143         mutex_unlock(&trace_types_lock);
5144
5145         return size;
5146 }
5147
5148 static int tracing_buffers_release(struct inode *inode, struct file *file)
5149 {
5150         struct ftrace_buffer_info *info = file->private_data;
5151         struct trace_iterator *iter = &info->iter;
5152
5153         mutex_lock(&trace_types_lock);
5154
5155         __trace_array_put(iter->tr);
5156
5157         if (info->spare)
5158                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5159         kfree(info);
5160
5161         mutex_unlock(&trace_types_lock);
5162
5163         return 0;
5164 }
5165
5166 struct buffer_ref {
5167         struct ring_buffer      *buffer;
5168         void                    *page;
5169         int                     ref;
5170 };
5171
5172 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5173                                     struct pipe_buffer *buf)
5174 {
5175         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5176
5177         if (--ref->ref)
5178                 return;
5179
5180         ring_buffer_free_read_page(ref->buffer, ref->page);
5181         kfree(ref);
5182         buf->private = 0;
5183 }
5184
5185 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5186                                 struct pipe_buffer *buf)
5187 {
5188         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5189
5190         ref->ref++;
5191 }
5192
5193 /* Pipe buffer operations for a buffer. */
5194 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5195         .can_merge              = 0,
5196         .map                    = generic_pipe_buf_map,
5197         .unmap                  = generic_pipe_buf_unmap,
5198         .confirm                = generic_pipe_buf_confirm,
5199         .release                = buffer_pipe_buf_release,
5200         .steal                  = generic_pipe_buf_steal,
5201         .get                    = buffer_pipe_buf_get,
5202 };
5203
5204 /*
5205  * Callback from splice_to_pipe(), if we need to release some pages
5206  * at the end of the spd in case we error'ed out in filling the pipe.
5207  */
5208 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5209 {
5210         struct buffer_ref *ref =
5211                 (struct buffer_ref *)spd->partial[i].private;
5212
5213         if (--ref->ref)
5214                 return;
5215
5216         ring_buffer_free_read_page(ref->buffer, ref->page);
5217         kfree(ref);
5218         spd->partial[i].private = 0;
5219 }
5220
5221 static ssize_t
5222 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5223                             struct pipe_inode_info *pipe, size_t len,
5224                             unsigned int flags)
5225 {
5226         struct ftrace_buffer_info *info = file->private_data;
5227         struct trace_iterator *iter = &info->iter;
5228         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5229         struct page *pages_def[PIPE_DEF_BUFFERS];
5230         struct splice_pipe_desc spd = {
5231                 .pages          = pages_def,
5232                 .partial        = partial_def,
5233                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5234                 .flags          = flags,
5235                 .ops            = &buffer_pipe_buf_ops,
5236                 .spd_release    = buffer_spd_release,
5237         };
5238         struct buffer_ref *ref;
5239         int entries, size, i;
5240         ssize_t ret;
5241
5242         mutex_lock(&trace_types_lock);
5243
5244 #ifdef CONFIG_TRACER_MAX_TRACE
5245         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5246                 ret = -EBUSY;
5247                 goto out;
5248         }
5249 #endif
5250
5251         if (splice_grow_spd(pipe, &spd)) {
5252                 ret = -ENOMEM;
5253                 goto out;
5254         }
5255
5256         if (*ppos & (PAGE_SIZE - 1)) {
5257                 ret = -EINVAL;
5258                 goto out;
5259         }
5260
5261         if (len & (PAGE_SIZE - 1)) {
5262                 if (len < PAGE_SIZE) {
5263                         ret = -EINVAL;
5264                         goto out;
5265                 }
5266                 len &= PAGE_MASK;
5267         }
5268
5269  again:
5270         trace_access_lock(iter->cpu_file);
5271         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5272
5273         for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
5274                 struct page *page;
5275                 int r;
5276
5277                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5278                 if (!ref)
5279                         break;
5280
5281                 ref->ref = 1;
5282                 ref->buffer = iter->trace_buffer->buffer;
5283                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5284                 if (!ref->page) {
5285                         kfree(ref);
5286                         break;
5287                 }
5288
5289                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5290                                           len, iter->cpu_file, 1);
5291                 if (r < 0) {
5292                         ring_buffer_free_read_page(ref->buffer, ref->page);
5293                         kfree(ref);
5294                         break;
5295                 }
5296
5297                 /*
5298                  * zero out any left over data, this is going to
5299                  * user land.
5300                  */
5301                 size = ring_buffer_page_len(ref->page);
5302                 if (size < PAGE_SIZE)
5303                         memset(ref->page + size, 0, PAGE_SIZE - size);
5304
5305                 page = virt_to_page(ref->page);
5306
5307                 spd.pages[i] = page;
5308                 spd.partial[i].len = PAGE_SIZE;
5309                 spd.partial[i].offset = 0;
5310                 spd.partial[i].private = (unsigned long)ref;
5311                 spd.nr_pages++;
5312                 *ppos += PAGE_SIZE;
5313
5314                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5315         }
5316
5317         trace_access_unlock(iter->cpu_file);
5318         spd.nr_pages = i;
5319
5320         /* did we read anything? */
5321         if (!spd.nr_pages) {
5322                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5323                         ret = -EAGAIN;
5324                         goto out;
5325                 }
5326                 mutex_unlock(&trace_types_lock);
5327                 iter->trace->wait_pipe(iter);
5328                 mutex_lock(&trace_types_lock);
5329                 if (signal_pending(current)) {
5330                         ret = -EINTR;
5331                         goto out;
5332                 }
5333                 goto again;
5334         }
5335
5336         ret = splice_to_pipe(pipe, &spd);
5337         splice_shrink_spd(&spd);
5338 out:
5339         mutex_unlock(&trace_types_lock);
5340
5341         return ret;
5342 }
5343
5344 static const struct file_operations tracing_buffers_fops = {
5345         .open           = tracing_buffers_open,
5346         .read           = tracing_buffers_read,
5347         .poll           = tracing_buffers_poll,
5348         .release        = tracing_buffers_release,
5349         .splice_read    = tracing_buffers_splice_read,
5350         .llseek         = no_llseek,
5351 };
5352
5353 static ssize_t
5354 tracing_stats_read(struct file *filp, char __user *ubuf,
5355                    size_t count, loff_t *ppos)
5356 {
5357         struct inode *inode = file_inode(filp);
5358         struct trace_array *tr = inode->i_private;
5359         struct trace_buffer *trace_buf = &tr->trace_buffer;
5360         int cpu = tracing_get_cpu(inode);
5361         struct trace_seq *s;
5362         unsigned long cnt;
5363         unsigned long long t;
5364         unsigned long usec_rem;
5365
5366         s = kmalloc(sizeof(*s), GFP_KERNEL);
5367         if (!s)
5368                 return -ENOMEM;
5369
5370         trace_seq_init(s);
5371
5372         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5373         trace_seq_printf(s, "entries: %ld\n", cnt);
5374
5375         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5376         trace_seq_printf(s, "overrun: %ld\n", cnt);
5377
5378         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5379         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5380
5381         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5382         trace_seq_printf(s, "bytes: %ld\n", cnt);
5383
5384         if (trace_clocks[tr->clock_id].in_ns) {
5385                 /* local or global for trace_clock */
5386                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5387                 usec_rem = do_div(t, USEC_PER_SEC);
5388                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5389                                                                 t, usec_rem);
5390
5391                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5392                 usec_rem = do_div(t, USEC_PER_SEC);
5393                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5394         } else {
5395                 /* counter or tsc mode for trace_clock */
5396                 trace_seq_printf(s, "oldest event ts: %llu\n",
5397                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5398
5399                 trace_seq_printf(s, "now ts: %llu\n",
5400                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5401         }
5402
5403         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5404         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5405
5406         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5407         trace_seq_printf(s, "read events: %ld\n", cnt);
5408
5409         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5410
5411         kfree(s);
5412
5413         return count;
5414 }
5415
5416 static const struct file_operations tracing_stats_fops = {
5417         .open           = tracing_open_generic_tr,
5418         .read           = tracing_stats_read,
5419         .llseek         = generic_file_llseek,
5420         .release        = tracing_release_generic_tr,
5421 };
5422
5423 #ifdef CONFIG_DYNAMIC_FTRACE
5424
5425 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5426 {
5427         return 0;
5428 }
5429
5430 static ssize_t
5431 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5432                   size_t cnt, loff_t *ppos)
5433 {
5434         static char ftrace_dyn_info_buffer[1024];
5435         static DEFINE_MUTEX(dyn_info_mutex);
5436         unsigned long *p = filp->private_data;
5437         char *buf = ftrace_dyn_info_buffer;
5438         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5439         int r;
5440
5441         mutex_lock(&dyn_info_mutex);
5442         r = sprintf(buf, "%ld ", *p);
5443
5444         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5445         buf[r++] = '\n';
5446
5447         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5448
5449         mutex_unlock(&dyn_info_mutex);
5450
5451         return r;
5452 }
5453
5454 static const struct file_operations tracing_dyn_info_fops = {
5455         .open           = tracing_open_generic,
5456         .read           = tracing_read_dyn_info,
5457         .llseek         = generic_file_llseek,
5458 };
5459 #endif /* CONFIG_DYNAMIC_FTRACE */
5460
5461 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5462 static void
5463 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5464 {
5465         tracing_snapshot();
5466 }
5467
5468 static void
5469 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5470 {
5471         unsigned long *count = (long *)data;
5472
5473         if (!*count)
5474                 return;
5475
5476         if (*count != -1)
5477                 (*count)--;
5478
5479         tracing_snapshot();
5480 }
5481
5482 static int
5483 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5484                       struct ftrace_probe_ops *ops, void *data)
5485 {
5486         long count = (long)data;
5487
5488         seq_printf(m, "%ps:", (void *)ip);
5489
5490         seq_printf(m, "snapshot");
5491
5492         if (count == -1)
5493                 seq_printf(m, ":unlimited\n");
5494         else
5495                 seq_printf(m, ":count=%ld\n", count);
5496
5497         return 0;
5498 }
5499
5500 static struct ftrace_probe_ops snapshot_probe_ops = {
5501         .func                   = ftrace_snapshot,
5502         .print                  = ftrace_snapshot_print,
5503 };
5504
5505 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5506         .func                   = ftrace_count_snapshot,
5507         .print                  = ftrace_snapshot_print,
5508 };
5509
5510 static int
5511 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5512                                char *glob, char *cmd, char *param, int enable)
5513 {
5514         struct ftrace_probe_ops *ops;
5515         void *count = (void *)-1;
5516         char *number;
5517         int ret;
5518
5519         /* hash funcs only work with set_ftrace_filter */
5520         if (!enable)
5521                 return -EINVAL;
5522
5523         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5524
5525         if (glob[0] == '!') {
5526                 unregister_ftrace_function_probe_func(glob+1, ops);
5527                 return 0;
5528         }
5529
5530         if (!param)
5531                 goto out_reg;
5532
5533         number = strsep(&param, ":");
5534
5535         if (!strlen(number))
5536                 goto out_reg;
5537
5538         /*
5539          * We use the callback data field (which is a pointer)
5540          * as our counter.
5541          */
5542         ret = kstrtoul(number, 0, (unsigned long *)&count);
5543         if (ret)
5544                 return ret;
5545
5546  out_reg:
5547         ret = register_ftrace_function_probe(glob, ops, count);
5548
5549         if (ret >= 0)
5550                 alloc_snapshot(&global_trace);
5551
5552         return ret < 0 ? ret : 0;
5553 }
5554
5555 static struct ftrace_func_command ftrace_snapshot_cmd = {
5556         .name                   = "snapshot",
5557         .func                   = ftrace_trace_snapshot_callback,
5558 };
5559
5560 static __init int register_snapshot_cmd(void)
5561 {
5562         return register_ftrace_command(&ftrace_snapshot_cmd);
5563 }
5564 #else
5565 static inline __init int register_snapshot_cmd(void) { return 0; }
5566 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5567
5568 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5569 {
5570         if (tr->dir)
5571                 return tr->dir;
5572
5573         if (!debugfs_initialized())
5574                 return NULL;
5575
5576         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5577                 tr->dir = debugfs_create_dir("tracing", NULL);
5578
5579         if (!tr->dir)
5580                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5581
5582         return tr->dir;
5583 }
5584
5585 struct dentry *tracing_init_dentry(void)
5586 {
5587         return tracing_init_dentry_tr(&global_trace);
5588 }
5589
5590 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5591 {
5592         struct dentry *d_tracer;
5593
5594         if (tr->percpu_dir)
5595                 return tr->percpu_dir;
5596
5597         d_tracer = tracing_init_dentry_tr(tr);
5598         if (!d_tracer)
5599                 return NULL;
5600
5601         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5602
5603         WARN_ONCE(!tr->percpu_dir,
5604                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5605
5606         return tr->percpu_dir;
5607 }
5608
5609 static struct dentry *
5610 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5611                       void *data, long cpu, const struct file_operations *fops)
5612 {
5613         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5614
5615         if (ret) /* See tracing_get_cpu() */
5616                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5617         return ret;
5618 }
5619
5620 static void
5621 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5622 {
5623         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5624         struct dentry *d_cpu;
5625         char cpu_dir[30]; /* 30 characters should be more than enough */
5626
5627         if (!d_percpu)
5628                 return;
5629
5630         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5631         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5632         if (!d_cpu) {
5633                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5634                 return;
5635         }
5636
5637         /* per cpu trace_pipe */
5638         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5639                                 tr, cpu, &tracing_pipe_fops);
5640
5641         /* per cpu trace */
5642         trace_create_cpu_file("trace", 0644, d_cpu,
5643                                 tr, cpu, &tracing_fops);
5644
5645         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5646                                 tr, cpu, &tracing_buffers_fops);
5647
5648         trace_create_cpu_file("stats", 0444, d_cpu,
5649                                 tr, cpu, &tracing_stats_fops);
5650
5651         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5652                                 tr, cpu, &tracing_entries_fops);
5653
5654 #ifdef CONFIG_TRACER_SNAPSHOT
5655         trace_create_cpu_file("snapshot", 0644, d_cpu,
5656                                 tr, cpu, &snapshot_fops);
5657
5658         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5659                                 tr, cpu, &snapshot_raw_fops);
5660 #endif
5661 }
5662
5663 #ifdef CONFIG_FTRACE_SELFTEST
5664 /* Let selftest have access to static functions in this file */
5665 #include "trace_selftest.c"
5666 #endif
5667
5668 struct trace_option_dentry {
5669         struct tracer_opt               *opt;
5670         struct tracer_flags             *flags;
5671         struct trace_array              *tr;
5672         struct dentry                   *entry;
5673 };
5674
5675 static ssize_t
5676 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5677                         loff_t *ppos)
5678 {
5679         struct trace_option_dentry *topt = filp->private_data;
5680         char *buf;
5681
5682         if (topt->flags->val & topt->opt->bit)
5683                 buf = "1\n";
5684         else
5685                 buf = "0\n";
5686
5687         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5688 }
5689
5690 static ssize_t
5691 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5692                          loff_t *ppos)
5693 {
5694         struct trace_option_dentry *topt = filp->private_data;
5695         unsigned long val;
5696         int ret;
5697
5698         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5699         if (ret)
5700                 return ret;
5701
5702         if (val != 0 && val != 1)
5703                 return -EINVAL;
5704
5705         if (!!(topt->flags->val & topt->opt->bit) != val) {
5706                 mutex_lock(&trace_types_lock);
5707                 ret = __set_tracer_option(topt->tr->current_trace, topt->flags,
5708                                           topt->opt, !val);
5709                 mutex_unlock(&trace_types_lock);
5710                 if (ret)
5711                         return ret;
5712         }
5713
5714         *ppos += cnt;
5715
5716         return cnt;
5717 }
5718
5719
5720 static const struct file_operations trace_options_fops = {
5721         .open = tracing_open_generic,
5722         .read = trace_options_read,
5723         .write = trace_options_write,
5724         .llseek = generic_file_llseek,
5725 };
5726
5727 static ssize_t
5728 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5729                         loff_t *ppos)
5730 {
5731         long index = (long)filp->private_data;
5732         char *buf;
5733
5734         if (trace_flags & (1 << index))
5735                 buf = "1\n";
5736         else
5737                 buf = "0\n";
5738
5739         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5740 }
5741
5742 static ssize_t
5743 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5744                          loff_t *ppos)
5745 {
5746         struct trace_array *tr = &global_trace;
5747         long index = (long)filp->private_data;
5748         unsigned long val;
5749         int ret;
5750
5751         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5752         if (ret)
5753                 return ret;
5754
5755         if (val != 0 && val != 1)
5756                 return -EINVAL;
5757
5758         mutex_lock(&trace_types_lock);
5759         ret = set_tracer_flag(tr, 1 << index, val);
5760         mutex_unlock(&trace_types_lock);
5761
5762         if (ret < 0)
5763                 return ret;
5764
5765         *ppos += cnt;
5766
5767         return cnt;
5768 }
5769
5770 static const struct file_operations trace_options_core_fops = {
5771         .open = tracing_open_generic,
5772         .read = trace_options_core_read,
5773         .write = trace_options_core_write,
5774         .llseek = generic_file_llseek,
5775 };
5776
5777 struct dentry *trace_create_file(const char *name,
5778                                  umode_t mode,
5779                                  struct dentry *parent,
5780                                  void *data,
5781                                  const struct file_operations *fops)
5782 {
5783         struct dentry *ret;
5784
5785         ret = debugfs_create_file(name, mode, parent, data, fops);
5786         if (!ret)
5787                 pr_warning("Could not create debugfs '%s' entry\n", name);
5788
5789         return ret;
5790 }
5791
5792
5793 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
5794 {
5795         struct dentry *d_tracer;
5796
5797         if (tr->options)
5798                 return tr->options;
5799
5800         d_tracer = tracing_init_dentry_tr(tr);
5801         if (!d_tracer)
5802                 return NULL;
5803
5804         tr->options = debugfs_create_dir("options", d_tracer);
5805         if (!tr->options) {
5806                 pr_warning("Could not create debugfs directory 'options'\n");
5807                 return NULL;
5808         }
5809
5810         return tr->options;
5811 }
5812
5813 static void
5814 create_trace_option_file(struct trace_array *tr,
5815                          struct trace_option_dentry *topt,
5816                          struct tracer_flags *flags,
5817                          struct tracer_opt *opt)
5818 {
5819         struct dentry *t_options;
5820
5821         t_options = trace_options_init_dentry(tr);
5822         if (!t_options)
5823                 return;
5824
5825         topt->flags = flags;
5826         topt->opt = opt;
5827         topt->tr = tr;
5828
5829         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
5830                                     &trace_options_fops);
5831
5832 }
5833
5834 static struct trace_option_dentry *
5835 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
5836 {
5837         struct trace_option_dentry *topts;
5838         struct tracer_flags *flags;
5839         struct tracer_opt *opts;
5840         int cnt;
5841
5842         if (!tracer)
5843                 return NULL;
5844
5845         flags = tracer->flags;
5846
5847         if (!flags || !flags->opts)
5848                 return NULL;
5849
5850         opts = flags->opts;
5851
5852         for (cnt = 0; opts[cnt].name; cnt++)
5853                 ;
5854
5855         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
5856         if (!topts)
5857                 return NULL;
5858
5859         for (cnt = 0; opts[cnt].name; cnt++)
5860                 create_trace_option_file(tr, &topts[cnt], flags,
5861                                          &opts[cnt]);
5862
5863         return topts;
5864 }
5865
5866 static void
5867 destroy_trace_option_files(struct trace_option_dentry *topts)
5868 {
5869         int cnt;
5870
5871         if (!topts)
5872                 return;
5873
5874         for (cnt = 0; topts[cnt].opt; cnt++) {
5875                 if (topts[cnt].entry)
5876                         debugfs_remove(topts[cnt].entry);
5877         }
5878
5879         kfree(topts);
5880 }
5881
5882 static struct dentry *
5883 create_trace_option_core_file(struct trace_array *tr,
5884                               const char *option, long index)
5885 {
5886         struct dentry *t_options;
5887
5888         t_options = trace_options_init_dentry(tr);
5889         if (!t_options)
5890                 return NULL;
5891
5892         return trace_create_file(option, 0644, t_options, (void *)index,
5893                                     &trace_options_core_fops);
5894 }
5895
5896 static __init void create_trace_options_dir(struct trace_array *tr)
5897 {
5898         struct dentry *t_options;
5899         int i;
5900
5901         t_options = trace_options_init_dentry(tr);
5902         if (!t_options)
5903                 return;
5904
5905         for (i = 0; trace_options[i]; i++)
5906                 create_trace_option_core_file(tr, trace_options[i], i);
5907 }
5908
5909 static ssize_t
5910 rb_simple_read(struct file *filp, char __user *ubuf,
5911                size_t cnt, loff_t *ppos)
5912 {
5913         struct trace_array *tr = filp->private_data;
5914         char buf[64];
5915         int r;
5916
5917         r = tracer_tracing_is_on(tr);
5918         r = sprintf(buf, "%d\n", r);
5919
5920         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5921 }
5922
5923 static ssize_t
5924 rb_simple_write(struct file *filp, const char __user *ubuf,
5925                 size_t cnt, loff_t *ppos)
5926 {
5927         struct trace_array *tr = filp->private_data;
5928         struct ring_buffer *buffer = tr->trace_buffer.buffer;
5929         unsigned long val;
5930         int ret;
5931
5932         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5933         if (ret)
5934                 return ret;
5935
5936         if (buffer) {
5937                 mutex_lock(&trace_types_lock);
5938                 if (val) {
5939                         tracer_tracing_on(tr);
5940                         if (tr->current_trace->start)
5941                                 tr->current_trace->start(tr);
5942                 } else {
5943                         tracer_tracing_off(tr);
5944                         if (tr->current_trace->stop)
5945                                 tr->current_trace->stop(tr);
5946                 }
5947                 mutex_unlock(&trace_types_lock);
5948         }
5949
5950         (*ppos)++;
5951
5952         return cnt;
5953 }
5954
5955 static const struct file_operations rb_simple_fops = {
5956         .open           = tracing_open_generic_tr,
5957         .read           = rb_simple_read,
5958         .write          = rb_simple_write,
5959         .release        = tracing_release_generic_tr,
5960         .llseek         = default_llseek,
5961 };
5962
5963 struct dentry *trace_instance_dir;
5964
5965 static void
5966 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
5967
5968 static int
5969 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
5970 {
5971         enum ring_buffer_flags rb_flags;
5972
5973         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
5974
5975         buf->tr = tr;
5976
5977         buf->buffer = ring_buffer_alloc(size, rb_flags);
5978         if (!buf->buffer)
5979                 return -ENOMEM;
5980
5981         buf->data = alloc_percpu(struct trace_array_cpu);
5982         if (!buf->data) {
5983                 ring_buffer_free(buf->buffer);
5984                 return -ENOMEM;
5985         }
5986
5987         /* Allocate the first page for all buffers */
5988         set_buffer_entries(&tr->trace_buffer,
5989                            ring_buffer_size(tr->trace_buffer.buffer, 0));
5990
5991         return 0;
5992 }
5993
5994 static int allocate_trace_buffers(struct trace_array *tr, int size)
5995 {
5996         int ret;
5997
5998         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
5999         if (ret)
6000                 return ret;
6001
6002 #ifdef CONFIG_TRACER_MAX_TRACE
6003         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6004                                     allocate_snapshot ? size : 1);
6005         if (WARN_ON(ret)) {
6006                 ring_buffer_free(tr->trace_buffer.buffer);
6007                 free_percpu(tr->trace_buffer.data);
6008                 return -ENOMEM;
6009         }
6010         tr->allocated_snapshot = allocate_snapshot;
6011
6012         /*
6013          * Only the top level trace array gets its snapshot allocated
6014          * from the kernel command line.
6015          */
6016         allocate_snapshot = false;
6017 #endif
6018         return 0;
6019 }
6020
6021 static int new_instance_create(const char *name)
6022 {
6023         struct trace_array *tr;
6024         int ret;
6025
6026         mutex_lock(&trace_types_lock);
6027
6028         ret = -EEXIST;
6029         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6030                 if (tr->name && strcmp(tr->name, name) == 0)
6031                         goto out_unlock;
6032         }
6033
6034         ret = -ENOMEM;
6035         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6036         if (!tr)
6037                 goto out_unlock;
6038
6039         tr->name = kstrdup(name, GFP_KERNEL);
6040         if (!tr->name)
6041                 goto out_free_tr;
6042
6043         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6044                 goto out_free_tr;
6045
6046         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6047
6048         raw_spin_lock_init(&tr->start_lock);
6049
6050         tr->current_trace = &nop_trace;
6051
6052         INIT_LIST_HEAD(&tr->systems);
6053         INIT_LIST_HEAD(&tr->events);
6054
6055         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6056                 goto out_free_tr;
6057
6058         tr->dir = debugfs_create_dir(name, trace_instance_dir);
6059         if (!tr->dir)
6060                 goto out_free_tr;
6061
6062         ret = event_trace_add_tracer(tr->dir, tr);
6063         if (ret) {
6064                 debugfs_remove_recursive(tr->dir);
6065                 goto out_free_tr;
6066         }
6067
6068         init_tracer_debugfs(tr, tr->dir);
6069
6070         list_add(&tr->list, &ftrace_trace_arrays);
6071
6072         mutex_unlock(&trace_types_lock);
6073
6074         return 0;
6075
6076  out_free_tr:
6077         if (tr->trace_buffer.buffer)
6078                 ring_buffer_free(tr->trace_buffer.buffer);
6079         free_cpumask_var(tr->tracing_cpumask);
6080         kfree(tr->name);
6081         kfree(tr);
6082
6083  out_unlock:
6084         mutex_unlock(&trace_types_lock);
6085
6086         return ret;
6087
6088 }
6089
6090 static int instance_delete(const char *name)
6091 {
6092         struct trace_array *tr;
6093         int found = 0;
6094         int ret;
6095
6096         mutex_lock(&trace_types_lock);
6097
6098         ret = -ENODEV;
6099         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6100                 if (tr->name && strcmp(tr->name, name) == 0) {
6101                         found = 1;
6102                         break;
6103                 }
6104         }
6105         if (!found)
6106                 goto out_unlock;
6107
6108         ret = -EBUSY;
6109         if (tr->ref)
6110                 goto out_unlock;
6111
6112         list_del(&tr->list);
6113
6114         event_trace_del_tracer(tr);
6115         debugfs_remove_recursive(tr->dir);
6116         free_percpu(tr->trace_buffer.data);
6117         ring_buffer_free(tr->trace_buffer.buffer);
6118
6119         kfree(tr->name);
6120         kfree(tr);
6121
6122         ret = 0;
6123
6124  out_unlock:
6125         mutex_unlock(&trace_types_lock);
6126
6127         return ret;
6128 }
6129
6130 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6131 {
6132         struct dentry *parent;
6133         int ret;
6134
6135         /* Paranoid: Make sure the parent is the "instances" directory */
6136         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6137         if (WARN_ON_ONCE(parent != trace_instance_dir))
6138                 return -ENOENT;
6139
6140         /*
6141          * The inode mutex is locked, but debugfs_create_dir() will also
6142          * take the mutex. As the instances directory can not be destroyed
6143          * or changed in any other way, it is safe to unlock it, and
6144          * let the dentry try. If two users try to make the same dir at
6145          * the same time, then the new_instance_create() will determine the
6146          * winner.
6147          */
6148         mutex_unlock(&inode->i_mutex);
6149
6150         ret = new_instance_create(dentry->d_iname);
6151
6152         mutex_lock(&inode->i_mutex);
6153
6154         return ret;
6155 }
6156
6157 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6158 {
6159         struct dentry *parent;
6160         int ret;
6161
6162         /* Paranoid: Make sure the parent is the "instances" directory */
6163         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6164         if (WARN_ON_ONCE(parent != trace_instance_dir))
6165                 return -ENOENT;
6166
6167         /* The caller did a dget() on dentry */
6168         mutex_unlock(&dentry->d_inode->i_mutex);
6169
6170         /*
6171          * The inode mutex is locked, but debugfs_create_dir() will also
6172          * take the mutex. As the instances directory can not be destroyed
6173          * or changed in any other way, it is safe to unlock it, and
6174          * let the dentry try. If two users try to make the same dir at
6175          * the same time, then the instance_delete() will determine the
6176          * winner.
6177          */
6178         mutex_unlock(&inode->i_mutex);
6179
6180         ret = instance_delete(dentry->d_iname);
6181
6182         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6183         mutex_lock(&dentry->d_inode->i_mutex);
6184
6185         return ret;
6186 }
6187
6188 static const struct inode_operations instance_dir_inode_operations = {
6189         .lookup         = simple_lookup,
6190         .mkdir          = instance_mkdir,
6191         .rmdir          = instance_rmdir,
6192 };
6193
6194 static __init void create_trace_instances(struct dentry *d_tracer)
6195 {
6196         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6197         if (WARN_ON(!trace_instance_dir))
6198                 return;
6199
6200         /* Hijack the dir inode operations, to allow mkdir */
6201         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6202 }
6203
6204 static void
6205 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6206 {
6207         int cpu;
6208
6209         trace_create_file("tracing_cpumask", 0644, d_tracer,
6210                           tr, &tracing_cpumask_fops);
6211
6212         trace_create_file("trace_options", 0644, d_tracer,
6213                           tr, &tracing_iter_fops);
6214
6215         trace_create_file("trace", 0644, d_tracer,
6216                           tr, &tracing_fops);
6217
6218         trace_create_file("trace_pipe", 0444, d_tracer,
6219                           tr, &tracing_pipe_fops);
6220
6221         trace_create_file("buffer_size_kb", 0644, d_tracer,
6222                           tr, &tracing_entries_fops);
6223
6224         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6225                           tr, &tracing_total_entries_fops);
6226
6227         trace_create_file("free_buffer", 0200, d_tracer,
6228                           tr, &tracing_free_buffer_fops);
6229
6230         trace_create_file("trace_marker", 0220, d_tracer,
6231                           tr, &tracing_mark_fops);
6232
6233         trace_create_file("trace_clock", 0644, d_tracer, tr,
6234                           &trace_clock_fops);
6235
6236         trace_create_file("tracing_on", 0644, d_tracer,
6237                           tr, &rb_simple_fops);
6238
6239 #ifdef CONFIG_TRACER_SNAPSHOT
6240         trace_create_file("snapshot", 0644, d_tracer,
6241                           tr, &snapshot_fops);
6242 #endif
6243
6244         for_each_tracing_cpu(cpu)
6245                 tracing_init_debugfs_percpu(tr, cpu);
6246
6247 }
6248
6249 static __init int tracer_init_debugfs(void)
6250 {
6251         struct dentry *d_tracer;
6252
6253         trace_access_lock_init();
6254
6255         d_tracer = tracing_init_dentry();
6256         if (!d_tracer)
6257                 return 0;
6258
6259         init_tracer_debugfs(&global_trace, d_tracer);
6260
6261         trace_create_file("available_tracers", 0444, d_tracer,
6262                         &global_trace, &show_traces_fops);
6263
6264         trace_create_file("current_tracer", 0644, d_tracer,
6265                         &global_trace, &set_tracer_fops);
6266
6267 #ifdef CONFIG_TRACER_MAX_TRACE
6268         trace_create_file("tracing_max_latency", 0644, d_tracer,
6269                         &tracing_max_latency, &tracing_max_lat_fops);
6270 #endif
6271
6272         trace_create_file("tracing_thresh", 0644, d_tracer,
6273                         &tracing_thresh, &tracing_max_lat_fops);
6274
6275         trace_create_file("README", 0444, d_tracer,
6276                         NULL, &tracing_readme_fops);
6277
6278         trace_create_file("saved_cmdlines", 0444, d_tracer,
6279                         NULL, &tracing_saved_cmdlines_fops);
6280
6281 #ifdef CONFIG_DYNAMIC_FTRACE
6282         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6283                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6284 #endif
6285
6286         create_trace_instances(d_tracer);
6287
6288         create_trace_options_dir(&global_trace);
6289
6290         return 0;
6291 }
6292
6293 static int trace_panic_handler(struct notifier_block *this,
6294                                unsigned long event, void *unused)
6295 {
6296         if (ftrace_dump_on_oops)
6297                 ftrace_dump(ftrace_dump_on_oops);
6298         return NOTIFY_OK;
6299 }
6300
6301 static struct notifier_block trace_panic_notifier = {
6302         .notifier_call  = trace_panic_handler,
6303         .next           = NULL,
6304         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6305 };
6306
6307 static int trace_die_handler(struct notifier_block *self,
6308                              unsigned long val,
6309                              void *data)
6310 {
6311         switch (val) {
6312         case DIE_OOPS:
6313                 if (ftrace_dump_on_oops)
6314                         ftrace_dump(ftrace_dump_on_oops);
6315                 break;
6316         default:
6317                 break;
6318         }
6319         return NOTIFY_OK;
6320 }
6321
6322 static struct notifier_block trace_die_notifier = {
6323         .notifier_call = trace_die_handler,
6324         .priority = 200
6325 };
6326
6327 /*
6328  * printk is set to max of 1024, we really don't need it that big.
6329  * Nothing should be printing 1000 characters anyway.
6330  */
6331 #define TRACE_MAX_PRINT         1000
6332
6333 /*
6334  * Define here KERN_TRACE so that we have one place to modify
6335  * it if we decide to change what log level the ftrace dump
6336  * should be at.
6337  */
6338 #define KERN_TRACE              KERN_EMERG
6339
6340 void
6341 trace_printk_seq(struct trace_seq *s)
6342 {
6343         /* Probably should print a warning here. */
6344         if (s->len >= TRACE_MAX_PRINT)
6345                 s->len = TRACE_MAX_PRINT;
6346
6347         /* should be zero ended, but we are paranoid. */
6348         s->buffer[s->len] = 0;
6349
6350         printk(KERN_TRACE "%s", s->buffer);
6351
6352         trace_seq_init(s);
6353 }
6354
6355 void trace_init_global_iter(struct trace_iterator *iter)
6356 {
6357         iter->tr = &global_trace;
6358         iter->trace = iter->tr->current_trace;
6359         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6360         iter->trace_buffer = &global_trace.trace_buffer;
6361
6362         if (iter->trace && iter->trace->open)
6363                 iter->trace->open(iter);
6364
6365         /* Annotate start of buffers if we had overruns */
6366         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6367                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6368
6369         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6370         if (trace_clocks[iter->tr->clock_id].in_ns)
6371                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6372 }
6373
6374 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6375 {
6376         /* use static because iter can be a bit big for the stack */
6377         static struct trace_iterator iter;
6378         static atomic_t dump_running;
6379         unsigned int old_userobj;
6380         unsigned long flags;
6381         int cnt = 0, cpu;
6382
6383         /* Only allow one dump user at a time. */
6384         if (atomic_inc_return(&dump_running) != 1) {
6385                 atomic_dec(&dump_running);
6386                 return;
6387         }
6388
6389         /*
6390          * Always turn off tracing when we dump.
6391          * We don't need to show trace output of what happens
6392          * between multiple crashes.
6393          *
6394          * If the user does a sysrq-z, then they can re-enable
6395          * tracing with echo 1 > tracing_on.
6396          */
6397         tracing_off();
6398
6399         local_irq_save(flags);
6400
6401         /* Simulate the iterator */
6402         trace_init_global_iter(&iter);
6403
6404         for_each_tracing_cpu(cpu) {
6405                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6406         }
6407
6408         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6409
6410         /* don't look at user memory in panic mode */
6411         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6412
6413         switch (oops_dump_mode) {
6414         case DUMP_ALL:
6415                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6416                 break;
6417         case DUMP_ORIG:
6418                 iter.cpu_file = raw_smp_processor_id();
6419                 break;
6420         case DUMP_NONE:
6421                 goto out_enable;
6422         default:
6423                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6424                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6425         }
6426
6427         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6428
6429         /* Did function tracer already get disabled? */
6430         if (ftrace_is_dead()) {
6431                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6432                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6433         }
6434
6435         /*
6436          * We need to stop all tracing on all CPUS to read the
6437          * the next buffer. This is a bit expensive, but is
6438          * not done often. We fill all what we can read,
6439          * and then release the locks again.
6440          */
6441
6442         while (!trace_empty(&iter)) {
6443
6444                 if (!cnt)
6445                         printk(KERN_TRACE "---------------------------------\n");
6446
6447                 cnt++;
6448
6449                 /* reset all but tr, trace, and overruns */
6450                 memset(&iter.seq, 0,
6451                        sizeof(struct trace_iterator) -
6452                        offsetof(struct trace_iterator, seq));
6453                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6454                 iter.pos = -1;
6455
6456                 if (trace_find_next_entry_inc(&iter) != NULL) {
6457                         int ret;
6458
6459                         ret = print_trace_line(&iter);
6460                         if (ret != TRACE_TYPE_NO_CONSUME)
6461                                 trace_consume(&iter);
6462                 }
6463                 touch_nmi_watchdog();
6464
6465                 trace_printk_seq(&iter.seq);
6466         }
6467
6468         if (!cnt)
6469                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6470         else
6471                 printk(KERN_TRACE "---------------------------------\n");
6472
6473  out_enable:
6474         trace_flags |= old_userobj;
6475
6476         for_each_tracing_cpu(cpu) {
6477                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6478         }
6479         atomic_dec(&dump_running);
6480         local_irq_restore(flags);
6481 }
6482 EXPORT_SYMBOL_GPL(ftrace_dump);
6483
6484 __init static int tracer_alloc_buffers(void)
6485 {
6486         int ring_buf_size;
6487         int ret = -ENOMEM;
6488
6489
6490         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6491                 goto out;
6492
6493         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6494                 goto out_free_buffer_mask;
6495
6496         /* Only allocate trace_printk buffers if a trace_printk exists */
6497         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6498                 /* Must be called before global_trace.buffer is allocated */
6499                 trace_printk_init_buffers();
6500
6501         /* To save memory, keep the ring buffer size to its minimum */
6502         if (ring_buffer_expanded)
6503                 ring_buf_size = trace_buf_size;
6504         else
6505                 ring_buf_size = 1;
6506
6507         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6508         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6509
6510         raw_spin_lock_init(&global_trace.start_lock);
6511
6512         /* Used for event triggers */
6513         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
6514         if (!temp_buffer)
6515                 goto out_free_cpumask;
6516
6517         /* TODO: make the number of buffers hot pluggable with CPUS */
6518         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6519                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6520                 WARN_ON(1);
6521                 goto out_free_temp_buffer;
6522         }
6523
6524         if (global_trace.buffer_disabled)
6525                 tracing_off();
6526
6527         trace_init_cmdlines();
6528
6529         /*
6530          * register_tracer() might reference current_trace, so it
6531          * needs to be set before we register anything. This is
6532          * just a bootstrap of current_trace anyway.
6533          */
6534         global_trace.current_trace = &nop_trace;
6535
6536         register_tracer(&nop_trace);
6537
6538         /* All seems OK, enable tracing */
6539         tracing_disabled = 0;
6540
6541         atomic_notifier_chain_register(&panic_notifier_list,
6542                                        &trace_panic_notifier);
6543
6544         register_die_notifier(&trace_die_notifier);
6545
6546         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6547
6548         INIT_LIST_HEAD(&global_trace.systems);
6549         INIT_LIST_HEAD(&global_trace.events);
6550         list_add(&global_trace.list, &ftrace_trace_arrays);
6551
6552         while (trace_boot_options) {
6553                 char *option;
6554
6555                 option = strsep(&trace_boot_options, ",");
6556                 trace_set_options(&global_trace, option);
6557         }
6558
6559         register_snapshot_cmd();
6560
6561         return 0;
6562
6563 out_free_temp_buffer:
6564         ring_buffer_free(temp_buffer);
6565 out_free_cpumask:
6566         free_percpu(global_trace.trace_buffer.data);
6567 #ifdef CONFIG_TRACER_MAX_TRACE
6568         free_percpu(global_trace.max_buffer.data);
6569 #endif
6570         free_cpumask_var(global_trace.tracing_cpumask);
6571 out_free_buffer_mask:
6572         free_cpumask_var(tracing_buffer_mask);
6573 out:
6574         return ret;
6575 }
6576
6577 __init static int clear_boot_tracer(void)
6578 {
6579         /*
6580          * The default tracer at boot buffer is an init section.
6581          * This function is called in lateinit. If we did not
6582          * find the boot tracer, then clear it out, to prevent
6583          * later registration from accessing the buffer that is
6584          * about to be freed.
6585          */
6586         if (!default_bootup_tracer)
6587                 return 0;
6588
6589         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6590                default_bootup_tracer);
6591         default_bootup_tracer = NULL;
6592
6593         return 0;
6594 }
6595
6596 early_initcall(tracer_alloc_buffers);
6597 fs_initcall(tracer_init_debugfs);
6598 late_initcall(clear_boot_tracer);