ring-buffer: Check if buffer exists before polling
[platform/adaptation/renesas_rcar/renesas_kernel.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int dummy_set_flag(u32 old_flags, u32 bit, int set)
77 {
78         return 0;
79 }
80
81 /*
82  * To prevent the comm cache from being overwritten when no
83  * tracing is active, only save the comm when a trace event
84  * occurred.
85  */
86 static DEFINE_PER_CPU(bool, trace_cmdline_save);
87
88 /*
89  * Kill all tracing for good (never come back).
90  * It is initialized to 1 but will turn to zero if the initialization
91  * of the tracer is successful. But that is the only place that sets
92  * this back to zero.
93  */
94 static int tracing_disabled = 1;
95
96 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 static int tracing_set_tracer(const char *buf);
122
123 #define MAX_TRACER_SIZE         100
124 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
125 static char *default_bootup_tracer;
126
127 static bool allocate_snapshot;
128
129 static int __init set_cmdline_ftrace(char *str)
130 {
131         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
132         default_bootup_tracer = bootup_tracer_buf;
133         /* We are using ftrace early, expand it */
134         ring_buffer_expanded = true;
135         return 1;
136 }
137 __setup("ftrace=", set_cmdline_ftrace);
138
139 static int __init set_ftrace_dump_on_oops(char *str)
140 {
141         if (*str++ != '=' || !*str) {
142                 ftrace_dump_on_oops = DUMP_ALL;
143                 return 1;
144         }
145
146         if (!strcmp("orig_cpu", str)) {
147                 ftrace_dump_on_oops = DUMP_ORIG;
148                 return 1;
149         }
150
151         return 0;
152 }
153 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
154
155 static int __init stop_trace_on_warning(char *str)
156 {
157         __disable_trace_on_warning = 1;
158         return 1;
159 }
160 __setup("traceoff_on_warning=", stop_trace_on_warning);
161
162 static int __init boot_alloc_snapshot(char *str)
163 {
164         allocate_snapshot = true;
165         /* We also need the main ring buffer expanded */
166         ring_buffer_expanded = true;
167         return 1;
168 }
169 __setup("alloc_snapshot", boot_alloc_snapshot);
170
171
172 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
173 static char *trace_boot_options __initdata;
174
175 static int __init set_trace_boot_options(char *str)
176 {
177         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
178         trace_boot_options = trace_boot_options_buf;
179         return 0;
180 }
181 __setup("trace_options=", set_trace_boot_options);
182
183
184 unsigned long long ns2usecs(cycle_t nsec)
185 {
186         nsec += 500;
187         do_div(nsec, 1000);
188         return nsec;
189 }
190
191 /*
192  * The global_trace is the descriptor that holds the tracing
193  * buffers for the live tracing. For each CPU, it contains
194  * a link list of pages that will store trace entries. The
195  * page descriptor of the pages in the memory is used to hold
196  * the link list by linking the lru item in the page descriptor
197  * to each of the pages in the buffer per CPU.
198  *
199  * For each active CPU there is a data field that holds the
200  * pages for the buffer for that CPU. Each CPU has the same number
201  * of pages allocated for its buffer.
202  */
203 static struct trace_array       global_trace;
204
205 LIST_HEAD(ftrace_trace_arrays);
206
207 int trace_array_get(struct trace_array *this_tr)
208 {
209         struct trace_array *tr;
210         int ret = -ENODEV;
211
212         mutex_lock(&trace_types_lock);
213         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
214                 if (tr == this_tr) {
215                         tr->ref++;
216                         ret = 0;
217                         break;
218                 }
219         }
220         mutex_unlock(&trace_types_lock);
221
222         return ret;
223 }
224
225 static void __trace_array_put(struct trace_array *this_tr)
226 {
227         WARN_ON(!this_tr->ref);
228         this_tr->ref--;
229 }
230
231 void trace_array_put(struct trace_array *this_tr)
232 {
233         mutex_lock(&trace_types_lock);
234         __trace_array_put(this_tr);
235         mutex_unlock(&trace_types_lock);
236 }
237
238 int filter_check_discard(struct ftrace_event_file *file, void *rec,
239                          struct ring_buffer *buffer,
240                          struct ring_buffer_event *event)
241 {
242         if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
243             !filter_match_preds(file->filter, rec)) {
244                 ring_buffer_discard_commit(buffer, event);
245                 return 1;
246         }
247
248         return 0;
249 }
250 EXPORT_SYMBOL_GPL(filter_check_discard);
251
252 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
253                               struct ring_buffer *buffer,
254                               struct ring_buffer_event *event)
255 {
256         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
257             !filter_match_preds(call->filter, rec)) {
258                 ring_buffer_discard_commit(buffer, event);
259                 return 1;
260         }
261
262         return 0;
263 }
264 EXPORT_SYMBOL_GPL(call_filter_check_discard);
265
266 cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
267 {
268         u64 ts;
269
270         /* Early boot up does not have a buffer yet */
271         if (!buf->buffer)
272                 return trace_clock_local();
273
274         ts = ring_buffer_time_stamp(buf->buffer, cpu);
275         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
276
277         return ts;
278 }
279
280 cycle_t ftrace_now(int cpu)
281 {
282         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
283 }
284
285 /**
286  * tracing_is_enabled - Show if global_trace has been disabled
287  *
288  * Shows if the global trace has been enabled or not. It uses the
289  * mirror flag "buffer_disabled" to be used in fast paths such as for
290  * the irqsoff tracer. But it may be inaccurate due to races. If you
291  * need to know the accurate state, use tracing_is_on() which is a little
292  * slower, but accurate.
293  */
294 int tracing_is_enabled(void)
295 {
296         /*
297          * For quick access (irqsoff uses this in fast path), just
298          * return the mirror variable of the state of the ring buffer.
299          * It's a little racy, but we don't really care.
300          */
301         smp_rmb();
302         return !global_trace.buffer_disabled;
303 }
304
305 /*
306  * trace_buf_size is the size in bytes that is allocated
307  * for a buffer. Note, the number of bytes is always rounded
308  * to page size.
309  *
310  * This number is purposely set to a low number of 16384.
311  * If the dump on oops happens, it will be much appreciated
312  * to not have to wait for all that output. Anyway this can be
313  * boot time and run time configurable.
314  */
315 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
316
317 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
318
319 /* trace_types holds a link list of available tracers. */
320 static struct tracer            *trace_types __read_mostly;
321
322 /*
323  * trace_types_lock is used to protect the trace_types list.
324  */
325 DEFINE_MUTEX(trace_types_lock);
326
327 /*
328  * serialize the access of the ring buffer
329  *
330  * ring buffer serializes readers, but it is low level protection.
331  * The validity of the events (which returns by ring_buffer_peek() ..etc)
332  * are not protected by ring buffer.
333  *
334  * The content of events may become garbage if we allow other process consumes
335  * these events concurrently:
336  *   A) the page of the consumed events may become a normal page
337  *      (not reader page) in ring buffer, and this page will be rewrited
338  *      by events producer.
339  *   B) The page of the consumed events may become a page for splice_read,
340  *      and this page will be returned to system.
341  *
342  * These primitives allow multi process access to different cpu ring buffer
343  * concurrently.
344  *
345  * These primitives don't distinguish read-only and read-consume access.
346  * Multi read-only access are also serialized.
347  */
348
349 #ifdef CONFIG_SMP
350 static DECLARE_RWSEM(all_cpu_access_lock);
351 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
352
353 static inline void trace_access_lock(int cpu)
354 {
355         if (cpu == RING_BUFFER_ALL_CPUS) {
356                 /* gain it for accessing the whole ring buffer. */
357                 down_write(&all_cpu_access_lock);
358         } else {
359                 /* gain it for accessing a cpu ring buffer. */
360
361                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
362                 down_read(&all_cpu_access_lock);
363
364                 /* Secondly block other access to this @cpu ring buffer. */
365                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
366         }
367 }
368
369 static inline void trace_access_unlock(int cpu)
370 {
371         if (cpu == RING_BUFFER_ALL_CPUS) {
372                 up_write(&all_cpu_access_lock);
373         } else {
374                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
375                 up_read(&all_cpu_access_lock);
376         }
377 }
378
379 static inline void trace_access_lock_init(void)
380 {
381         int cpu;
382
383         for_each_possible_cpu(cpu)
384                 mutex_init(&per_cpu(cpu_access_lock, cpu));
385 }
386
387 #else
388
389 static DEFINE_MUTEX(access_lock);
390
391 static inline void trace_access_lock(int cpu)
392 {
393         (void)cpu;
394         mutex_lock(&access_lock);
395 }
396
397 static inline void trace_access_unlock(int cpu)
398 {
399         (void)cpu;
400         mutex_unlock(&access_lock);
401 }
402
403 static inline void trace_access_lock_init(void)
404 {
405 }
406
407 #endif
408
409 /* trace_flags holds trace_options default values */
410 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
411         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
412         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
413         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
414
415 static void tracer_tracing_on(struct trace_array *tr)
416 {
417         if (tr->trace_buffer.buffer)
418                 ring_buffer_record_on(tr->trace_buffer.buffer);
419         /*
420          * This flag is looked at when buffers haven't been allocated
421          * yet, or by some tracers (like irqsoff), that just want to
422          * know if the ring buffer has been disabled, but it can handle
423          * races of where it gets disabled but we still do a record.
424          * As the check is in the fast path of the tracers, it is more
425          * important to be fast than accurate.
426          */
427         tr->buffer_disabled = 0;
428         /* Make the flag seen by readers */
429         smp_wmb();
430 }
431
432 /**
433  * tracing_on - enable tracing buffers
434  *
435  * This function enables tracing buffers that may have been
436  * disabled with tracing_off.
437  */
438 void tracing_on(void)
439 {
440         tracer_tracing_on(&global_trace);
441 }
442 EXPORT_SYMBOL_GPL(tracing_on);
443
444 /**
445  * __trace_puts - write a constant string into the trace buffer.
446  * @ip:    The address of the caller
447  * @str:   The constant string to write
448  * @size:  The size of the string.
449  */
450 int __trace_puts(unsigned long ip, const char *str, int size)
451 {
452         struct ring_buffer_event *event;
453         struct ring_buffer *buffer;
454         struct print_entry *entry;
455         unsigned long irq_flags;
456         int alloc;
457
458         if (unlikely(tracing_selftest_running || tracing_disabled))
459                 return 0;
460
461         alloc = sizeof(*entry) + size + 2; /* possible \n added */
462
463         local_save_flags(irq_flags);
464         buffer = global_trace.trace_buffer.buffer;
465         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
466                                           irq_flags, preempt_count());
467         if (!event)
468                 return 0;
469
470         entry = ring_buffer_event_data(event);
471         entry->ip = ip;
472
473         memcpy(&entry->buf, str, size);
474
475         /* Add a newline if necessary */
476         if (entry->buf[size - 1] != '\n') {
477                 entry->buf[size] = '\n';
478                 entry->buf[size + 1] = '\0';
479         } else
480                 entry->buf[size] = '\0';
481
482         __buffer_unlock_commit(buffer, event);
483
484         return size;
485 }
486 EXPORT_SYMBOL_GPL(__trace_puts);
487
488 /**
489  * __trace_bputs - write the pointer to a constant string into trace buffer
490  * @ip:    The address of the caller
491  * @str:   The constant string to write to the buffer to
492  */
493 int __trace_bputs(unsigned long ip, const char *str)
494 {
495         struct ring_buffer_event *event;
496         struct ring_buffer *buffer;
497         struct bputs_entry *entry;
498         unsigned long irq_flags;
499         int size = sizeof(struct bputs_entry);
500
501         if (unlikely(tracing_selftest_running || tracing_disabled))
502                 return 0;
503
504         local_save_flags(irq_flags);
505         buffer = global_trace.trace_buffer.buffer;
506         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
507                                           irq_flags, preempt_count());
508         if (!event)
509                 return 0;
510
511         entry = ring_buffer_event_data(event);
512         entry->ip                       = ip;
513         entry->str                      = str;
514
515         __buffer_unlock_commit(buffer, event);
516
517         return 1;
518 }
519 EXPORT_SYMBOL_GPL(__trace_bputs);
520
521 #ifdef CONFIG_TRACER_SNAPSHOT
522 /**
523  * trace_snapshot - take a snapshot of the current buffer.
524  *
525  * This causes a swap between the snapshot buffer and the current live
526  * tracing buffer. You can use this to take snapshots of the live
527  * trace when some condition is triggered, but continue to trace.
528  *
529  * Note, make sure to allocate the snapshot with either
530  * a tracing_snapshot_alloc(), or by doing it manually
531  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
532  *
533  * If the snapshot buffer is not allocated, it will stop tracing.
534  * Basically making a permanent snapshot.
535  */
536 void tracing_snapshot(void)
537 {
538         struct trace_array *tr = &global_trace;
539         struct tracer *tracer = tr->current_trace;
540         unsigned long flags;
541
542         if (in_nmi()) {
543                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
544                 internal_trace_puts("*** snapshot is being ignored        ***\n");
545                 return;
546         }
547
548         if (!tr->allocated_snapshot) {
549                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
550                 internal_trace_puts("*** stopping trace here!   ***\n");
551                 tracing_off();
552                 return;
553         }
554
555         /* Note, snapshot can not be used when the tracer uses it */
556         if (tracer->use_max_tr) {
557                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
558                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
559                 return;
560         }
561
562         local_irq_save(flags);
563         update_max_tr(tr, current, smp_processor_id());
564         local_irq_restore(flags);
565 }
566 EXPORT_SYMBOL_GPL(tracing_snapshot);
567
568 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
569                                         struct trace_buffer *size_buf, int cpu_id);
570 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
571
572 static int alloc_snapshot(struct trace_array *tr)
573 {
574         int ret;
575
576         if (!tr->allocated_snapshot) {
577
578                 /* allocate spare buffer */
579                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
580                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
581                 if (ret < 0)
582                         return ret;
583
584                 tr->allocated_snapshot = true;
585         }
586
587         return 0;
588 }
589
590 void free_snapshot(struct trace_array *tr)
591 {
592         /*
593          * We don't free the ring buffer. instead, resize it because
594          * The max_tr ring buffer has some state (e.g. ring->clock) and
595          * we want preserve it.
596          */
597         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
598         set_buffer_entries(&tr->max_buffer, 1);
599         tracing_reset_online_cpus(&tr->max_buffer);
600         tr->allocated_snapshot = false;
601 }
602
603 /**
604  * tracing_alloc_snapshot - allocate snapshot buffer.
605  *
606  * This only allocates the snapshot buffer if it isn't already
607  * allocated - it doesn't also take a snapshot.
608  *
609  * This is meant to be used in cases where the snapshot buffer needs
610  * to be set up for events that can't sleep but need to be able to
611  * trigger a snapshot.
612  */
613 int tracing_alloc_snapshot(void)
614 {
615         struct trace_array *tr = &global_trace;
616         int ret;
617
618         ret = alloc_snapshot(tr);
619         WARN_ON(ret < 0);
620
621         return ret;
622 }
623 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
624
625 /**
626  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
627  *
628  * This is similar to trace_snapshot(), but it will allocate the
629  * snapshot buffer if it isn't already allocated. Use this only
630  * where it is safe to sleep, as the allocation may sleep.
631  *
632  * This causes a swap between the snapshot buffer and the current live
633  * tracing buffer. You can use this to take snapshots of the live
634  * trace when some condition is triggered, but continue to trace.
635  */
636 void tracing_snapshot_alloc(void)
637 {
638         int ret;
639
640         ret = tracing_alloc_snapshot();
641         if (ret < 0)
642                 return;
643
644         tracing_snapshot();
645 }
646 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
647 #else
648 void tracing_snapshot(void)
649 {
650         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
651 }
652 EXPORT_SYMBOL_GPL(tracing_snapshot);
653 int tracing_alloc_snapshot(void)
654 {
655         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
656         return -ENODEV;
657 }
658 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
659 void tracing_snapshot_alloc(void)
660 {
661         /* Give warning */
662         tracing_snapshot();
663 }
664 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
665 #endif /* CONFIG_TRACER_SNAPSHOT */
666
667 static void tracer_tracing_off(struct trace_array *tr)
668 {
669         if (tr->trace_buffer.buffer)
670                 ring_buffer_record_off(tr->trace_buffer.buffer);
671         /*
672          * This flag is looked at when buffers haven't been allocated
673          * yet, or by some tracers (like irqsoff), that just want to
674          * know if the ring buffer has been disabled, but it can handle
675          * races of where it gets disabled but we still do a record.
676          * As the check is in the fast path of the tracers, it is more
677          * important to be fast than accurate.
678          */
679         tr->buffer_disabled = 1;
680         /* Make the flag seen by readers */
681         smp_wmb();
682 }
683
684 /**
685  * tracing_off - turn off tracing buffers
686  *
687  * This function stops the tracing buffers from recording data.
688  * It does not disable any overhead the tracers themselves may
689  * be causing. This function simply causes all recording to
690  * the ring buffers to fail.
691  */
692 void tracing_off(void)
693 {
694         tracer_tracing_off(&global_trace);
695 }
696 EXPORT_SYMBOL_GPL(tracing_off);
697
698 void disable_trace_on_warning(void)
699 {
700         if (__disable_trace_on_warning)
701                 tracing_off();
702 }
703
704 /**
705  * tracer_tracing_is_on - show real state of ring buffer enabled
706  * @tr : the trace array to know if ring buffer is enabled
707  *
708  * Shows real state of the ring buffer if it is enabled or not.
709  */
710 static int tracer_tracing_is_on(struct trace_array *tr)
711 {
712         if (tr->trace_buffer.buffer)
713                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
714         return !tr->buffer_disabled;
715 }
716
717 /**
718  * tracing_is_on - show state of ring buffers enabled
719  */
720 int tracing_is_on(void)
721 {
722         return tracer_tracing_is_on(&global_trace);
723 }
724 EXPORT_SYMBOL_GPL(tracing_is_on);
725
726 static int __init set_buf_size(char *str)
727 {
728         unsigned long buf_size;
729
730         if (!str)
731                 return 0;
732         buf_size = memparse(str, &str);
733         /* nr_entries can not be zero */
734         if (buf_size == 0)
735                 return 0;
736         trace_buf_size = buf_size;
737         return 1;
738 }
739 __setup("trace_buf_size=", set_buf_size);
740
741 static int __init set_tracing_thresh(char *str)
742 {
743         unsigned long threshold;
744         int ret;
745
746         if (!str)
747                 return 0;
748         ret = kstrtoul(str, 0, &threshold);
749         if (ret < 0)
750                 return 0;
751         tracing_thresh = threshold * 1000;
752         return 1;
753 }
754 __setup("tracing_thresh=", set_tracing_thresh);
755
756 unsigned long nsecs_to_usecs(unsigned long nsecs)
757 {
758         return nsecs / 1000;
759 }
760
761 /* These must match the bit postions in trace_iterator_flags */
762 static const char *trace_options[] = {
763         "print-parent",
764         "sym-offset",
765         "sym-addr",
766         "verbose",
767         "raw",
768         "hex",
769         "bin",
770         "block",
771         "stacktrace",
772         "trace_printk",
773         "ftrace_preempt",
774         "branch",
775         "annotate",
776         "userstacktrace",
777         "sym-userobj",
778         "printk-msg-only",
779         "context-info",
780         "latency-format",
781         "sleep-time",
782         "graph-time",
783         "record-cmd",
784         "overwrite",
785         "disable_on_free",
786         "irq-info",
787         "markers",
788         "function-trace",
789         NULL
790 };
791
792 static struct {
793         u64 (*func)(void);
794         const char *name;
795         int in_ns;              /* is this clock in nanoseconds? */
796 } trace_clocks[] = {
797         { trace_clock_local,    "local",        1 },
798         { trace_clock_global,   "global",       1 },
799         { trace_clock_counter,  "counter",      0 },
800         { trace_clock_jiffies,  "uptime",       1 },
801         { trace_clock,          "perf",         1 },
802         ARCH_TRACE_CLOCKS
803 };
804
805 /*
806  * trace_parser_get_init - gets the buffer for trace parser
807  */
808 int trace_parser_get_init(struct trace_parser *parser, int size)
809 {
810         memset(parser, 0, sizeof(*parser));
811
812         parser->buffer = kmalloc(size, GFP_KERNEL);
813         if (!parser->buffer)
814                 return 1;
815
816         parser->size = size;
817         return 0;
818 }
819
820 /*
821  * trace_parser_put - frees the buffer for trace parser
822  */
823 void trace_parser_put(struct trace_parser *parser)
824 {
825         kfree(parser->buffer);
826 }
827
828 /*
829  * trace_get_user - reads the user input string separated by  space
830  * (matched by isspace(ch))
831  *
832  * For each string found the 'struct trace_parser' is updated,
833  * and the function returns.
834  *
835  * Returns number of bytes read.
836  *
837  * See kernel/trace/trace.h for 'struct trace_parser' details.
838  */
839 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
840         size_t cnt, loff_t *ppos)
841 {
842         char ch;
843         size_t read = 0;
844         ssize_t ret;
845
846         if (!*ppos)
847                 trace_parser_clear(parser);
848
849         ret = get_user(ch, ubuf++);
850         if (ret)
851                 goto out;
852
853         read++;
854         cnt--;
855
856         /*
857          * The parser is not finished with the last write,
858          * continue reading the user input without skipping spaces.
859          */
860         if (!parser->cont) {
861                 /* skip white space */
862                 while (cnt && isspace(ch)) {
863                         ret = get_user(ch, ubuf++);
864                         if (ret)
865                                 goto out;
866                         read++;
867                         cnt--;
868                 }
869
870                 /* only spaces were written */
871                 if (isspace(ch)) {
872                         *ppos += read;
873                         ret = read;
874                         goto out;
875                 }
876
877                 parser->idx = 0;
878         }
879
880         /* read the non-space input */
881         while (cnt && !isspace(ch)) {
882                 if (parser->idx < parser->size - 1)
883                         parser->buffer[parser->idx++] = ch;
884                 else {
885                         ret = -EINVAL;
886                         goto out;
887                 }
888                 ret = get_user(ch, ubuf++);
889                 if (ret)
890                         goto out;
891                 read++;
892                 cnt--;
893         }
894
895         /* We either got finished input or we have to wait for another call. */
896         if (isspace(ch)) {
897                 parser->buffer[parser->idx] = 0;
898                 parser->cont = false;
899         } else if (parser->idx < parser->size - 1) {
900                 parser->cont = true;
901                 parser->buffer[parser->idx++] = ch;
902         } else {
903                 ret = -EINVAL;
904                 goto out;
905         }
906
907         *ppos += read;
908         ret = read;
909
910 out:
911         return ret;
912 }
913
914 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
915 {
916         int len;
917         int ret;
918
919         if (!cnt)
920                 return 0;
921
922         if (s->len <= s->readpos)
923                 return -EBUSY;
924
925         len = s->len - s->readpos;
926         if (cnt > len)
927                 cnt = len;
928         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
929         if (ret == cnt)
930                 return -EFAULT;
931
932         cnt -= ret;
933
934         s->readpos += cnt;
935         return cnt;
936 }
937
938 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
939 {
940         int len;
941
942         if (s->len <= s->readpos)
943                 return -EBUSY;
944
945         len = s->len - s->readpos;
946         if (cnt > len)
947                 cnt = len;
948         memcpy(buf, s->buffer + s->readpos, cnt);
949
950         s->readpos += cnt;
951         return cnt;
952 }
953
954 /*
955  * ftrace_max_lock is used to protect the swapping of buffers
956  * when taking a max snapshot. The buffers themselves are
957  * protected by per_cpu spinlocks. But the action of the swap
958  * needs its own lock.
959  *
960  * This is defined as a arch_spinlock_t in order to help
961  * with performance when lockdep debugging is enabled.
962  *
963  * It is also used in other places outside the update_max_tr
964  * so it needs to be defined outside of the
965  * CONFIG_TRACER_MAX_TRACE.
966  */
967 static arch_spinlock_t ftrace_max_lock =
968         (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
969
970 unsigned long __read_mostly     tracing_thresh;
971
972 #ifdef CONFIG_TRACER_MAX_TRACE
973 unsigned long __read_mostly     tracing_max_latency;
974
975 /*
976  * Copy the new maximum trace into the separate maximum-trace
977  * structure. (this way the maximum trace is permanently saved,
978  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
979  */
980 static void
981 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
982 {
983         struct trace_buffer *trace_buf = &tr->trace_buffer;
984         struct trace_buffer *max_buf = &tr->max_buffer;
985         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
986         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
987
988         max_buf->cpu = cpu;
989         max_buf->time_start = data->preempt_timestamp;
990
991         max_data->saved_latency = tracing_max_latency;
992         max_data->critical_start = data->critical_start;
993         max_data->critical_end = data->critical_end;
994
995         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
996         max_data->pid = tsk->pid;
997         /*
998          * If tsk == current, then use current_uid(), as that does not use
999          * RCU. The irq tracer can be called out of RCU scope.
1000          */
1001         if (tsk == current)
1002                 max_data->uid = current_uid();
1003         else
1004                 max_data->uid = task_uid(tsk);
1005
1006         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1007         max_data->policy = tsk->policy;
1008         max_data->rt_priority = tsk->rt_priority;
1009
1010         /* record this tasks comm */
1011         tracing_record_cmdline(tsk);
1012 }
1013
1014 /**
1015  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1016  * @tr: tracer
1017  * @tsk: the task with the latency
1018  * @cpu: The cpu that initiated the trace.
1019  *
1020  * Flip the buffers between the @tr and the max_tr and record information
1021  * about which task was the cause of this latency.
1022  */
1023 void
1024 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1025 {
1026         struct ring_buffer *buf;
1027
1028         if (tr->stop_count)
1029                 return;
1030
1031         WARN_ON_ONCE(!irqs_disabled());
1032
1033         if (!tr->allocated_snapshot) {
1034                 /* Only the nop tracer should hit this when disabling */
1035                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1036                 return;
1037         }
1038
1039         arch_spin_lock(&ftrace_max_lock);
1040
1041         buf = tr->trace_buffer.buffer;
1042         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1043         tr->max_buffer.buffer = buf;
1044
1045         __update_max_tr(tr, tsk, cpu);
1046         arch_spin_unlock(&ftrace_max_lock);
1047 }
1048
1049 /**
1050  * update_max_tr_single - only copy one trace over, and reset the rest
1051  * @tr - tracer
1052  * @tsk - task with the latency
1053  * @cpu - the cpu of the buffer to copy.
1054  *
1055  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1056  */
1057 void
1058 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1059 {
1060         int ret;
1061
1062         if (tr->stop_count)
1063                 return;
1064
1065         WARN_ON_ONCE(!irqs_disabled());
1066         if (!tr->allocated_snapshot) {
1067                 /* Only the nop tracer should hit this when disabling */
1068                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1069                 return;
1070         }
1071
1072         arch_spin_lock(&ftrace_max_lock);
1073
1074         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1075
1076         if (ret == -EBUSY) {
1077                 /*
1078                  * We failed to swap the buffer due to a commit taking
1079                  * place on this CPU. We fail to record, but we reset
1080                  * the max trace buffer (no one writes directly to it)
1081                  * and flag that it failed.
1082                  */
1083                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1084                         "Failed to swap buffers due to commit in progress\n");
1085         }
1086
1087         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1088
1089         __update_max_tr(tr, tsk, cpu);
1090         arch_spin_unlock(&ftrace_max_lock);
1091 }
1092 #endif /* CONFIG_TRACER_MAX_TRACE */
1093
1094 static int default_wait_pipe(struct trace_iterator *iter)
1095 {
1096         /* Iterators are static, they should be filled or empty */
1097         if (trace_buffer_iter(iter, iter->cpu_file))
1098                 return 0;
1099
1100         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
1101 }
1102
1103 #ifdef CONFIG_FTRACE_STARTUP_TEST
1104 static int run_tracer_selftest(struct tracer *type)
1105 {
1106         struct trace_array *tr = &global_trace;
1107         struct tracer *saved_tracer = tr->current_trace;
1108         int ret;
1109
1110         if (!type->selftest || tracing_selftest_disabled)
1111                 return 0;
1112
1113         /*
1114          * Run a selftest on this tracer.
1115          * Here we reset the trace buffer, and set the current
1116          * tracer to be this tracer. The tracer can then run some
1117          * internal tracing to verify that everything is in order.
1118          * If we fail, we do not register this tracer.
1119          */
1120         tracing_reset_online_cpus(&tr->trace_buffer);
1121
1122         tr->current_trace = type;
1123
1124 #ifdef CONFIG_TRACER_MAX_TRACE
1125         if (type->use_max_tr) {
1126                 /* If we expanded the buffers, make sure the max is expanded too */
1127                 if (ring_buffer_expanded)
1128                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1129                                            RING_BUFFER_ALL_CPUS);
1130                 tr->allocated_snapshot = true;
1131         }
1132 #endif
1133
1134         /* the test is responsible for initializing and enabling */
1135         pr_info("Testing tracer %s: ", type->name);
1136         ret = type->selftest(type, tr);
1137         /* the test is responsible for resetting too */
1138         tr->current_trace = saved_tracer;
1139         if (ret) {
1140                 printk(KERN_CONT "FAILED!\n");
1141                 /* Add the warning after printing 'FAILED' */
1142                 WARN_ON(1);
1143                 return -1;
1144         }
1145         /* Only reset on passing, to avoid touching corrupted buffers */
1146         tracing_reset_online_cpus(&tr->trace_buffer);
1147
1148 #ifdef CONFIG_TRACER_MAX_TRACE
1149         if (type->use_max_tr) {
1150                 tr->allocated_snapshot = false;
1151
1152                 /* Shrink the max buffer again */
1153                 if (ring_buffer_expanded)
1154                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1155                                            RING_BUFFER_ALL_CPUS);
1156         }
1157 #endif
1158
1159         printk(KERN_CONT "PASSED\n");
1160         return 0;
1161 }
1162 #else
1163 static inline int run_tracer_selftest(struct tracer *type)
1164 {
1165         return 0;
1166 }
1167 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1168
1169 /**
1170  * register_tracer - register a tracer with the ftrace system.
1171  * @type - the plugin for the tracer
1172  *
1173  * Register a new plugin tracer.
1174  */
1175 int register_tracer(struct tracer *type)
1176 {
1177         struct tracer *t;
1178         int ret = 0;
1179
1180         if (!type->name) {
1181                 pr_info("Tracer must have a name\n");
1182                 return -1;
1183         }
1184
1185         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1186                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1187                 return -1;
1188         }
1189
1190         mutex_lock(&trace_types_lock);
1191
1192         tracing_selftest_running = true;
1193
1194         for (t = trace_types; t; t = t->next) {
1195                 if (strcmp(type->name, t->name) == 0) {
1196                         /* already found */
1197                         pr_info("Tracer %s already registered\n",
1198                                 type->name);
1199                         ret = -1;
1200                         goto out;
1201                 }
1202         }
1203
1204         if (!type->set_flag)
1205                 type->set_flag = &dummy_set_flag;
1206         if (!type->flags)
1207                 type->flags = &dummy_tracer_flags;
1208         else
1209                 if (!type->flags->opts)
1210                         type->flags->opts = dummy_tracer_opt;
1211         if (!type->wait_pipe)
1212                 type->wait_pipe = default_wait_pipe;
1213
1214         ret = run_tracer_selftest(type);
1215         if (ret < 0)
1216                 goto out;
1217
1218         type->next = trace_types;
1219         trace_types = type;
1220
1221  out:
1222         tracing_selftest_running = false;
1223         mutex_unlock(&trace_types_lock);
1224
1225         if (ret || !default_bootup_tracer)
1226                 goto out_unlock;
1227
1228         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1229                 goto out_unlock;
1230
1231         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1232         /* Do we want this tracer to start on bootup? */
1233         tracing_set_tracer(type->name);
1234         default_bootup_tracer = NULL;
1235         /* disable other selftests, since this will break it. */
1236         tracing_selftest_disabled = true;
1237 #ifdef CONFIG_FTRACE_STARTUP_TEST
1238         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1239                type->name);
1240 #endif
1241
1242  out_unlock:
1243         return ret;
1244 }
1245
1246 void tracing_reset(struct trace_buffer *buf, int cpu)
1247 {
1248         struct ring_buffer *buffer = buf->buffer;
1249
1250         if (!buffer)
1251                 return;
1252
1253         ring_buffer_record_disable(buffer);
1254
1255         /* Make sure all commits have finished */
1256         synchronize_sched();
1257         ring_buffer_reset_cpu(buffer, cpu);
1258
1259         ring_buffer_record_enable(buffer);
1260 }
1261
1262 void tracing_reset_online_cpus(struct trace_buffer *buf)
1263 {
1264         struct ring_buffer *buffer = buf->buffer;
1265         int cpu;
1266
1267         if (!buffer)
1268                 return;
1269
1270         ring_buffer_record_disable(buffer);
1271
1272         /* Make sure all commits have finished */
1273         synchronize_sched();
1274
1275         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1276
1277         for_each_online_cpu(cpu)
1278                 ring_buffer_reset_cpu(buffer, cpu);
1279
1280         ring_buffer_record_enable(buffer);
1281 }
1282
1283 /* Must have trace_types_lock held */
1284 void tracing_reset_all_online_cpus(void)
1285 {
1286         struct trace_array *tr;
1287
1288         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1289                 tracing_reset_online_cpus(&tr->trace_buffer);
1290 #ifdef CONFIG_TRACER_MAX_TRACE
1291                 tracing_reset_online_cpus(&tr->max_buffer);
1292 #endif
1293         }
1294 }
1295
1296 #define SAVED_CMDLINES 128
1297 #define NO_CMDLINE_MAP UINT_MAX
1298 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1299 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
1300 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
1301 static int cmdline_idx;
1302 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1303
1304 /* temporary disable recording */
1305 static atomic_t trace_record_cmdline_disabled __read_mostly;
1306
1307 static void trace_init_cmdlines(void)
1308 {
1309         memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
1310         memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
1311         cmdline_idx = 0;
1312 }
1313
1314 int is_tracing_stopped(void)
1315 {
1316         return global_trace.stop_count;
1317 }
1318
1319 /**
1320  * tracing_start - quick start of the tracer
1321  *
1322  * If tracing is enabled but was stopped by tracing_stop,
1323  * this will start the tracer back up.
1324  */
1325 void tracing_start(void)
1326 {
1327         struct ring_buffer *buffer;
1328         unsigned long flags;
1329
1330         if (tracing_disabled)
1331                 return;
1332
1333         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1334         if (--global_trace.stop_count) {
1335                 if (global_trace.stop_count < 0) {
1336                         /* Someone screwed up their debugging */
1337                         WARN_ON_ONCE(1);
1338                         global_trace.stop_count = 0;
1339                 }
1340                 goto out;
1341         }
1342
1343         /* Prevent the buffers from switching */
1344         arch_spin_lock(&ftrace_max_lock);
1345
1346         buffer = global_trace.trace_buffer.buffer;
1347         if (buffer)
1348                 ring_buffer_record_enable(buffer);
1349
1350 #ifdef CONFIG_TRACER_MAX_TRACE
1351         buffer = global_trace.max_buffer.buffer;
1352         if (buffer)
1353                 ring_buffer_record_enable(buffer);
1354 #endif
1355
1356         arch_spin_unlock(&ftrace_max_lock);
1357
1358  out:
1359         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1360 }
1361
1362 static void tracing_start_tr(struct trace_array *tr)
1363 {
1364         struct ring_buffer *buffer;
1365         unsigned long flags;
1366
1367         if (tracing_disabled)
1368                 return;
1369
1370         /* If global, we need to also start the max tracer */
1371         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1372                 return tracing_start();
1373
1374         raw_spin_lock_irqsave(&tr->start_lock, flags);
1375
1376         if (--tr->stop_count) {
1377                 if (tr->stop_count < 0) {
1378                         /* Someone screwed up their debugging */
1379                         WARN_ON_ONCE(1);
1380                         tr->stop_count = 0;
1381                 }
1382                 goto out;
1383         }
1384
1385         buffer = tr->trace_buffer.buffer;
1386         if (buffer)
1387                 ring_buffer_record_enable(buffer);
1388
1389  out:
1390         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1391 }
1392
1393 /**
1394  * tracing_stop - quick stop of the tracer
1395  *
1396  * Light weight way to stop tracing. Use in conjunction with
1397  * tracing_start.
1398  */
1399 void tracing_stop(void)
1400 {
1401         struct ring_buffer *buffer;
1402         unsigned long flags;
1403
1404         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1405         if (global_trace.stop_count++)
1406                 goto out;
1407
1408         /* Prevent the buffers from switching */
1409         arch_spin_lock(&ftrace_max_lock);
1410
1411         buffer = global_trace.trace_buffer.buffer;
1412         if (buffer)
1413                 ring_buffer_record_disable(buffer);
1414
1415 #ifdef CONFIG_TRACER_MAX_TRACE
1416         buffer = global_trace.max_buffer.buffer;
1417         if (buffer)
1418                 ring_buffer_record_disable(buffer);
1419 #endif
1420
1421         arch_spin_unlock(&ftrace_max_lock);
1422
1423  out:
1424         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1425 }
1426
1427 static void tracing_stop_tr(struct trace_array *tr)
1428 {
1429         struct ring_buffer *buffer;
1430         unsigned long flags;
1431
1432         /* If global, we need to also stop the max tracer */
1433         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1434                 return tracing_stop();
1435
1436         raw_spin_lock_irqsave(&tr->start_lock, flags);
1437         if (tr->stop_count++)
1438                 goto out;
1439
1440         buffer = tr->trace_buffer.buffer;
1441         if (buffer)
1442                 ring_buffer_record_disable(buffer);
1443
1444  out:
1445         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1446 }
1447
1448 void trace_stop_cmdline_recording(void);
1449
1450 static int trace_save_cmdline(struct task_struct *tsk)
1451 {
1452         unsigned pid, idx;
1453
1454         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1455                 return 0;
1456
1457         /*
1458          * It's not the end of the world if we don't get
1459          * the lock, but we also don't want to spin
1460          * nor do we want to disable interrupts,
1461          * so if we miss here, then better luck next time.
1462          */
1463         if (!arch_spin_trylock(&trace_cmdline_lock))
1464                 return 0;
1465
1466         idx = map_pid_to_cmdline[tsk->pid];
1467         if (idx == NO_CMDLINE_MAP) {
1468                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
1469
1470                 /*
1471                  * Check whether the cmdline buffer at idx has a pid
1472                  * mapped. We are going to overwrite that entry so we
1473                  * need to clear the map_pid_to_cmdline. Otherwise we
1474                  * would read the new comm for the old pid.
1475                  */
1476                 pid = map_cmdline_to_pid[idx];
1477                 if (pid != NO_CMDLINE_MAP)
1478                         map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1479
1480                 map_cmdline_to_pid[idx] = tsk->pid;
1481                 map_pid_to_cmdline[tsk->pid] = idx;
1482
1483                 cmdline_idx = idx;
1484         }
1485
1486         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
1487
1488         arch_spin_unlock(&trace_cmdline_lock);
1489
1490         return 1;
1491 }
1492
1493 void trace_find_cmdline(int pid, char comm[])
1494 {
1495         unsigned map;
1496
1497         if (!pid) {
1498                 strcpy(comm, "<idle>");
1499                 return;
1500         }
1501
1502         if (WARN_ON_ONCE(pid < 0)) {
1503                 strcpy(comm, "<XXX>");
1504                 return;
1505         }
1506
1507         if (pid > PID_MAX_DEFAULT) {
1508                 strcpy(comm, "<...>");
1509                 return;
1510         }
1511
1512         preempt_disable();
1513         arch_spin_lock(&trace_cmdline_lock);
1514         map = map_pid_to_cmdline[pid];
1515         if (map != NO_CMDLINE_MAP)
1516                 strcpy(comm, saved_cmdlines[map]);
1517         else
1518                 strcpy(comm, "<...>");
1519
1520         arch_spin_unlock(&trace_cmdline_lock);
1521         preempt_enable();
1522 }
1523
1524 void tracing_record_cmdline(struct task_struct *tsk)
1525 {
1526         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1527                 return;
1528
1529         if (!__this_cpu_read(trace_cmdline_save))
1530                 return;
1531
1532         if (trace_save_cmdline(tsk))
1533                 __this_cpu_write(trace_cmdline_save, false);
1534 }
1535
1536 void
1537 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1538                              int pc)
1539 {
1540         struct task_struct *tsk = current;
1541
1542         entry->preempt_count            = pc & 0xff;
1543         entry->pid                      = (tsk) ? tsk->pid : 0;
1544         entry->flags =
1545 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1546                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1547 #else
1548                 TRACE_FLAG_IRQS_NOSUPPORT |
1549 #endif
1550                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1551                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1552                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1553                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1554 }
1555 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1556
1557 struct ring_buffer_event *
1558 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1559                           int type,
1560                           unsigned long len,
1561                           unsigned long flags, int pc)
1562 {
1563         struct ring_buffer_event *event;
1564
1565         event = ring_buffer_lock_reserve(buffer, len);
1566         if (event != NULL) {
1567                 struct trace_entry *ent = ring_buffer_event_data(event);
1568
1569                 tracing_generic_entry_update(ent, flags, pc);
1570                 ent->type = type;
1571         }
1572
1573         return event;
1574 }
1575
1576 void
1577 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1578 {
1579         __this_cpu_write(trace_cmdline_save, true);
1580         ring_buffer_unlock_commit(buffer, event);
1581 }
1582
1583 static inline void
1584 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1585                              struct ring_buffer_event *event,
1586                              unsigned long flags, int pc)
1587 {
1588         __buffer_unlock_commit(buffer, event);
1589
1590         ftrace_trace_stack(buffer, flags, 6, pc);
1591         ftrace_trace_userstack(buffer, flags, pc);
1592 }
1593
1594 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1595                                 struct ring_buffer_event *event,
1596                                 unsigned long flags, int pc)
1597 {
1598         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1599 }
1600 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1601
1602 static struct ring_buffer *temp_buffer;
1603
1604 struct ring_buffer_event *
1605 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1606                           struct ftrace_event_file *ftrace_file,
1607                           int type, unsigned long len,
1608                           unsigned long flags, int pc)
1609 {
1610         struct ring_buffer_event *entry;
1611
1612         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1613         entry = trace_buffer_lock_reserve(*current_rb,
1614                                          type, len, flags, pc);
1615         /*
1616          * If tracing is off, but we have triggers enabled
1617          * we still need to look at the event data. Use the temp_buffer
1618          * to store the trace event for the tigger to use. It's recusive
1619          * safe and will not be recorded anywhere.
1620          */
1621         if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1622                 *current_rb = temp_buffer;
1623                 entry = trace_buffer_lock_reserve(*current_rb,
1624                                                   type, len, flags, pc);
1625         }
1626         return entry;
1627 }
1628 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1629
1630 struct ring_buffer_event *
1631 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1632                                   int type, unsigned long len,
1633                                   unsigned long flags, int pc)
1634 {
1635         *current_rb = global_trace.trace_buffer.buffer;
1636         return trace_buffer_lock_reserve(*current_rb,
1637                                          type, len, flags, pc);
1638 }
1639 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1640
1641 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1642                                         struct ring_buffer_event *event,
1643                                         unsigned long flags, int pc)
1644 {
1645         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1646 }
1647 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1648
1649 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1650                                      struct ring_buffer_event *event,
1651                                      unsigned long flags, int pc,
1652                                      struct pt_regs *regs)
1653 {
1654         __buffer_unlock_commit(buffer, event);
1655
1656         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1657         ftrace_trace_userstack(buffer, flags, pc);
1658 }
1659 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1660
1661 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1662                                          struct ring_buffer_event *event)
1663 {
1664         ring_buffer_discard_commit(buffer, event);
1665 }
1666 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1667
1668 void
1669 trace_function(struct trace_array *tr,
1670                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1671                int pc)
1672 {
1673         struct ftrace_event_call *call = &event_function;
1674         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1675         struct ring_buffer_event *event;
1676         struct ftrace_entry *entry;
1677
1678         /* If we are reading the ring buffer, don't trace */
1679         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1680                 return;
1681
1682         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1683                                           flags, pc);
1684         if (!event)
1685                 return;
1686         entry   = ring_buffer_event_data(event);
1687         entry->ip                       = ip;
1688         entry->parent_ip                = parent_ip;
1689
1690         if (!call_filter_check_discard(call, entry, buffer, event))
1691                 __buffer_unlock_commit(buffer, event);
1692 }
1693
1694 #ifdef CONFIG_STACKTRACE
1695
1696 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1697 struct ftrace_stack {
1698         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1699 };
1700
1701 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1702 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1703
1704 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1705                                  unsigned long flags,
1706                                  int skip, int pc, struct pt_regs *regs)
1707 {
1708         struct ftrace_event_call *call = &event_kernel_stack;
1709         struct ring_buffer_event *event;
1710         struct stack_entry *entry;
1711         struct stack_trace trace;
1712         int use_stack;
1713         int size = FTRACE_STACK_ENTRIES;
1714
1715         trace.nr_entries        = 0;
1716         trace.skip              = skip;
1717
1718         /*
1719          * Since events can happen in NMIs there's no safe way to
1720          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1721          * or NMI comes in, it will just have to use the default
1722          * FTRACE_STACK_SIZE.
1723          */
1724         preempt_disable_notrace();
1725
1726         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1727         /*
1728          * We don't need any atomic variables, just a barrier.
1729          * If an interrupt comes in, we don't care, because it would
1730          * have exited and put the counter back to what we want.
1731          * We just need a barrier to keep gcc from moving things
1732          * around.
1733          */
1734         barrier();
1735         if (use_stack == 1) {
1736                 trace.entries           = &__get_cpu_var(ftrace_stack).calls[0];
1737                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1738
1739                 if (regs)
1740                         save_stack_trace_regs(regs, &trace);
1741                 else
1742                         save_stack_trace(&trace);
1743
1744                 if (trace.nr_entries > size)
1745                         size = trace.nr_entries;
1746         } else
1747                 /* From now on, use_stack is a boolean */
1748                 use_stack = 0;
1749
1750         size *= sizeof(unsigned long);
1751
1752         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1753                                           sizeof(*entry) + size, flags, pc);
1754         if (!event)
1755                 goto out;
1756         entry = ring_buffer_event_data(event);
1757
1758         memset(&entry->caller, 0, size);
1759
1760         if (use_stack)
1761                 memcpy(&entry->caller, trace.entries,
1762                        trace.nr_entries * sizeof(unsigned long));
1763         else {
1764                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1765                 trace.entries           = entry->caller;
1766                 if (regs)
1767                         save_stack_trace_regs(regs, &trace);
1768                 else
1769                         save_stack_trace(&trace);
1770         }
1771
1772         entry->size = trace.nr_entries;
1773
1774         if (!call_filter_check_discard(call, entry, buffer, event))
1775                 __buffer_unlock_commit(buffer, event);
1776
1777  out:
1778         /* Again, don't let gcc optimize things here */
1779         barrier();
1780         __this_cpu_dec(ftrace_stack_reserve);
1781         preempt_enable_notrace();
1782
1783 }
1784
1785 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1786                              int skip, int pc, struct pt_regs *regs)
1787 {
1788         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1789                 return;
1790
1791         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1792 }
1793
1794 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1795                         int skip, int pc)
1796 {
1797         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1798                 return;
1799
1800         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1801 }
1802
1803 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1804                    int pc)
1805 {
1806         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1807 }
1808
1809 /**
1810  * trace_dump_stack - record a stack back trace in the trace buffer
1811  * @skip: Number of functions to skip (helper handlers)
1812  */
1813 void trace_dump_stack(int skip)
1814 {
1815         unsigned long flags;
1816
1817         if (tracing_disabled || tracing_selftest_running)
1818                 return;
1819
1820         local_save_flags(flags);
1821
1822         /*
1823          * Skip 3 more, seems to get us at the caller of
1824          * this function.
1825          */
1826         skip += 3;
1827         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1828                              flags, skip, preempt_count(), NULL);
1829 }
1830
1831 static DEFINE_PER_CPU(int, user_stack_count);
1832
1833 void
1834 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1835 {
1836         struct ftrace_event_call *call = &event_user_stack;
1837         struct ring_buffer_event *event;
1838         struct userstack_entry *entry;
1839         struct stack_trace trace;
1840
1841         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1842                 return;
1843
1844         /*
1845          * NMIs can not handle page faults, even with fix ups.
1846          * The save user stack can (and often does) fault.
1847          */
1848         if (unlikely(in_nmi()))
1849                 return;
1850
1851         /*
1852          * prevent recursion, since the user stack tracing may
1853          * trigger other kernel events.
1854          */
1855         preempt_disable();
1856         if (__this_cpu_read(user_stack_count))
1857                 goto out;
1858
1859         __this_cpu_inc(user_stack_count);
1860
1861         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1862                                           sizeof(*entry), flags, pc);
1863         if (!event)
1864                 goto out_drop_count;
1865         entry   = ring_buffer_event_data(event);
1866
1867         entry->tgid             = current->tgid;
1868         memset(&entry->caller, 0, sizeof(entry->caller));
1869
1870         trace.nr_entries        = 0;
1871         trace.max_entries       = FTRACE_STACK_ENTRIES;
1872         trace.skip              = 0;
1873         trace.entries           = entry->caller;
1874
1875         save_stack_trace_user(&trace);
1876         if (!call_filter_check_discard(call, entry, buffer, event))
1877                 __buffer_unlock_commit(buffer, event);
1878
1879  out_drop_count:
1880         __this_cpu_dec(user_stack_count);
1881  out:
1882         preempt_enable();
1883 }
1884
1885 #ifdef UNUSED
1886 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1887 {
1888         ftrace_trace_userstack(tr, flags, preempt_count());
1889 }
1890 #endif /* UNUSED */
1891
1892 #endif /* CONFIG_STACKTRACE */
1893
1894 /* created for use with alloc_percpu */
1895 struct trace_buffer_struct {
1896         char buffer[TRACE_BUF_SIZE];
1897 };
1898
1899 static struct trace_buffer_struct *trace_percpu_buffer;
1900 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1901 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1902 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1903
1904 /*
1905  * The buffer used is dependent on the context. There is a per cpu
1906  * buffer for normal context, softirq contex, hard irq context and
1907  * for NMI context. Thise allows for lockless recording.
1908  *
1909  * Note, if the buffers failed to be allocated, then this returns NULL
1910  */
1911 static char *get_trace_buf(void)
1912 {
1913         struct trace_buffer_struct *percpu_buffer;
1914
1915         /*
1916          * If we have allocated per cpu buffers, then we do not
1917          * need to do any locking.
1918          */
1919         if (in_nmi())
1920                 percpu_buffer = trace_percpu_nmi_buffer;
1921         else if (in_irq())
1922                 percpu_buffer = trace_percpu_irq_buffer;
1923         else if (in_softirq())
1924                 percpu_buffer = trace_percpu_sirq_buffer;
1925         else
1926                 percpu_buffer = trace_percpu_buffer;
1927
1928         if (!percpu_buffer)
1929                 return NULL;
1930
1931         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1932 }
1933
1934 static int alloc_percpu_trace_buffer(void)
1935 {
1936         struct trace_buffer_struct *buffers;
1937         struct trace_buffer_struct *sirq_buffers;
1938         struct trace_buffer_struct *irq_buffers;
1939         struct trace_buffer_struct *nmi_buffers;
1940
1941         buffers = alloc_percpu(struct trace_buffer_struct);
1942         if (!buffers)
1943                 goto err_warn;
1944
1945         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1946         if (!sirq_buffers)
1947                 goto err_sirq;
1948
1949         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1950         if (!irq_buffers)
1951                 goto err_irq;
1952
1953         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1954         if (!nmi_buffers)
1955                 goto err_nmi;
1956
1957         trace_percpu_buffer = buffers;
1958         trace_percpu_sirq_buffer = sirq_buffers;
1959         trace_percpu_irq_buffer = irq_buffers;
1960         trace_percpu_nmi_buffer = nmi_buffers;
1961
1962         return 0;
1963
1964  err_nmi:
1965         free_percpu(irq_buffers);
1966  err_irq:
1967         free_percpu(sirq_buffers);
1968  err_sirq:
1969         free_percpu(buffers);
1970  err_warn:
1971         WARN(1, "Could not allocate percpu trace_printk buffer");
1972         return -ENOMEM;
1973 }
1974
1975 static int buffers_allocated;
1976
1977 void trace_printk_init_buffers(void)
1978 {
1979         if (buffers_allocated)
1980                 return;
1981
1982         if (alloc_percpu_trace_buffer())
1983                 return;
1984
1985         pr_info("ftrace: Allocated trace_printk buffers\n");
1986
1987         /* Expand the buffers to set size */
1988         tracing_update_buffers();
1989
1990         buffers_allocated = 1;
1991
1992         /*
1993          * trace_printk_init_buffers() can be called by modules.
1994          * If that happens, then we need to start cmdline recording
1995          * directly here. If the global_trace.buffer is already
1996          * allocated here, then this was called by module code.
1997          */
1998         if (global_trace.trace_buffer.buffer)
1999                 tracing_start_cmdline_record();
2000 }
2001
2002 void trace_printk_start_comm(void)
2003 {
2004         /* Start tracing comms if trace printk is set */
2005         if (!buffers_allocated)
2006                 return;
2007         tracing_start_cmdline_record();
2008 }
2009
2010 static void trace_printk_start_stop_comm(int enabled)
2011 {
2012         if (!buffers_allocated)
2013                 return;
2014
2015         if (enabled)
2016                 tracing_start_cmdline_record();
2017         else
2018                 tracing_stop_cmdline_record();
2019 }
2020
2021 /**
2022  * trace_vbprintk - write binary msg to tracing buffer
2023  *
2024  */
2025 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2026 {
2027         struct ftrace_event_call *call = &event_bprint;
2028         struct ring_buffer_event *event;
2029         struct ring_buffer *buffer;
2030         struct trace_array *tr = &global_trace;
2031         struct bprint_entry *entry;
2032         unsigned long flags;
2033         char *tbuffer;
2034         int len = 0, size, pc;
2035
2036         if (unlikely(tracing_selftest_running || tracing_disabled))
2037                 return 0;
2038
2039         /* Don't pollute graph traces with trace_vprintk internals */
2040         pause_graph_tracing();
2041
2042         pc = preempt_count();
2043         preempt_disable_notrace();
2044
2045         tbuffer = get_trace_buf();
2046         if (!tbuffer) {
2047                 len = 0;
2048                 goto out;
2049         }
2050
2051         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2052
2053         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2054                 goto out;
2055
2056         local_save_flags(flags);
2057         size = sizeof(*entry) + sizeof(u32) * len;
2058         buffer = tr->trace_buffer.buffer;
2059         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2060                                           flags, pc);
2061         if (!event)
2062                 goto out;
2063         entry = ring_buffer_event_data(event);
2064         entry->ip                       = ip;
2065         entry->fmt                      = fmt;
2066
2067         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2068         if (!call_filter_check_discard(call, entry, buffer, event)) {
2069                 __buffer_unlock_commit(buffer, event);
2070                 ftrace_trace_stack(buffer, flags, 6, pc);
2071         }
2072
2073 out:
2074         preempt_enable_notrace();
2075         unpause_graph_tracing();
2076
2077         return len;
2078 }
2079 EXPORT_SYMBOL_GPL(trace_vbprintk);
2080
2081 static int
2082 __trace_array_vprintk(struct ring_buffer *buffer,
2083                       unsigned long ip, const char *fmt, va_list args)
2084 {
2085         struct ftrace_event_call *call = &event_print;
2086         struct ring_buffer_event *event;
2087         int len = 0, size, pc;
2088         struct print_entry *entry;
2089         unsigned long flags;
2090         char *tbuffer;
2091
2092         if (tracing_disabled || tracing_selftest_running)
2093                 return 0;
2094
2095         /* Don't pollute graph traces with trace_vprintk internals */
2096         pause_graph_tracing();
2097
2098         pc = preempt_count();
2099         preempt_disable_notrace();
2100
2101
2102         tbuffer = get_trace_buf();
2103         if (!tbuffer) {
2104                 len = 0;
2105                 goto out;
2106         }
2107
2108         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2109         if (len > TRACE_BUF_SIZE)
2110                 goto out;
2111
2112         local_save_flags(flags);
2113         size = sizeof(*entry) + len + 1;
2114         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2115                                           flags, pc);
2116         if (!event)
2117                 goto out;
2118         entry = ring_buffer_event_data(event);
2119         entry->ip = ip;
2120
2121         memcpy(&entry->buf, tbuffer, len);
2122         entry->buf[len] = '\0';
2123         if (!call_filter_check_discard(call, entry, buffer, event)) {
2124                 __buffer_unlock_commit(buffer, event);
2125                 ftrace_trace_stack(buffer, flags, 6, pc);
2126         }
2127  out:
2128         preempt_enable_notrace();
2129         unpause_graph_tracing();
2130
2131         return len;
2132 }
2133
2134 int trace_array_vprintk(struct trace_array *tr,
2135                         unsigned long ip, const char *fmt, va_list args)
2136 {
2137         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2138 }
2139
2140 int trace_array_printk(struct trace_array *tr,
2141                        unsigned long ip, const char *fmt, ...)
2142 {
2143         int ret;
2144         va_list ap;
2145
2146         if (!(trace_flags & TRACE_ITER_PRINTK))
2147                 return 0;
2148
2149         va_start(ap, fmt);
2150         ret = trace_array_vprintk(tr, ip, fmt, ap);
2151         va_end(ap);
2152         return ret;
2153 }
2154
2155 int trace_array_printk_buf(struct ring_buffer *buffer,
2156                            unsigned long ip, const char *fmt, ...)
2157 {
2158         int ret;
2159         va_list ap;
2160
2161         if (!(trace_flags & TRACE_ITER_PRINTK))
2162                 return 0;
2163
2164         va_start(ap, fmt);
2165         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2166         va_end(ap);
2167         return ret;
2168 }
2169
2170 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2171 {
2172         return trace_array_vprintk(&global_trace, ip, fmt, args);
2173 }
2174 EXPORT_SYMBOL_GPL(trace_vprintk);
2175
2176 static void trace_iterator_increment(struct trace_iterator *iter)
2177 {
2178         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2179
2180         iter->idx++;
2181         if (buf_iter)
2182                 ring_buffer_read(buf_iter, NULL);
2183 }
2184
2185 static struct trace_entry *
2186 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2187                 unsigned long *lost_events)
2188 {
2189         struct ring_buffer_event *event;
2190         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2191
2192         if (buf_iter)
2193                 event = ring_buffer_iter_peek(buf_iter, ts);
2194         else
2195                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2196                                          lost_events);
2197
2198         if (event) {
2199                 iter->ent_size = ring_buffer_event_length(event);
2200                 return ring_buffer_event_data(event);
2201         }
2202         iter->ent_size = 0;
2203         return NULL;
2204 }
2205
2206 static struct trace_entry *
2207 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2208                   unsigned long *missing_events, u64 *ent_ts)
2209 {
2210         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2211         struct trace_entry *ent, *next = NULL;
2212         unsigned long lost_events = 0, next_lost = 0;
2213         int cpu_file = iter->cpu_file;
2214         u64 next_ts = 0, ts;
2215         int next_cpu = -1;
2216         int next_size = 0;
2217         int cpu;
2218
2219         /*
2220          * If we are in a per_cpu trace file, don't bother by iterating over
2221          * all cpu and peek directly.
2222          */
2223         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2224                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2225                         return NULL;
2226                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2227                 if (ent_cpu)
2228                         *ent_cpu = cpu_file;
2229
2230                 return ent;
2231         }
2232
2233         for_each_tracing_cpu(cpu) {
2234
2235                 if (ring_buffer_empty_cpu(buffer, cpu))
2236                         continue;
2237
2238                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2239
2240                 /*
2241                  * Pick the entry with the smallest timestamp:
2242                  */
2243                 if (ent && (!next || ts < next_ts)) {
2244                         next = ent;
2245                         next_cpu = cpu;
2246                         next_ts = ts;
2247                         next_lost = lost_events;
2248                         next_size = iter->ent_size;
2249                 }
2250         }
2251
2252         iter->ent_size = next_size;
2253
2254         if (ent_cpu)
2255                 *ent_cpu = next_cpu;
2256
2257         if (ent_ts)
2258                 *ent_ts = next_ts;
2259
2260         if (missing_events)
2261                 *missing_events = next_lost;
2262
2263         return next;
2264 }
2265
2266 /* Find the next real entry, without updating the iterator itself */
2267 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2268                                           int *ent_cpu, u64 *ent_ts)
2269 {
2270         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2271 }
2272
2273 /* Find the next real entry, and increment the iterator to the next entry */
2274 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2275 {
2276         iter->ent = __find_next_entry(iter, &iter->cpu,
2277                                       &iter->lost_events, &iter->ts);
2278
2279         if (iter->ent)
2280                 trace_iterator_increment(iter);
2281
2282         return iter->ent ? iter : NULL;
2283 }
2284
2285 static void trace_consume(struct trace_iterator *iter)
2286 {
2287         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2288                             &iter->lost_events);
2289 }
2290
2291 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2292 {
2293         struct trace_iterator *iter = m->private;
2294         int i = (int)*pos;
2295         void *ent;
2296
2297         WARN_ON_ONCE(iter->leftover);
2298
2299         (*pos)++;
2300
2301         /* can't go backwards */
2302         if (iter->idx > i)
2303                 return NULL;
2304
2305         if (iter->idx < 0)
2306                 ent = trace_find_next_entry_inc(iter);
2307         else
2308                 ent = iter;
2309
2310         while (ent && iter->idx < i)
2311                 ent = trace_find_next_entry_inc(iter);
2312
2313         iter->pos = *pos;
2314
2315         return ent;
2316 }
2317
2318 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2319 {
2320         struct ring_buffer_event *event;
2321         struct ring_buffer_iter *buf_iter;
2322         unsigned long entries = 0;
2323         u64 ts;
2324
2325         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2326
2327         buf_iter = trace_buffer_iter(iter, cpu);
2328         if (!buf_iter)
2329                 return;
2330
2331         ring_buffer_iter_reset(buf_iter);
2332
2333         /*
2334          * We could have the case with the max latency tracers
2335          * that a reset never took place on a cpu. This is evident
2336          * by the timestamp being before the start of the buffer.
2337          */
2338         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2339                 if (ts >= iter->trace_buffer->time_start)
2340                         break;
2341                 entries++;
2342                 ring_buffer_read(buf_iter, NULL);
2343         }
2344
2345         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2346 }
2347
2348 /*
2349  * The current tracer is copied to avoid a global locking
2350  * all around.
2351  */
2352 static void *s_start(struct seq_file *m, loff_t *pos)
2353 {
2354         struct trace_iterator *iter = m->private;
2355         struct trace_array *tr = iter->tr;
2356         int cpu_file = iter->cpu_file;
2357         void *p = NULL;
2358         loff_t l = 0;
2359         int cpu;
2360
2361         /*
2362          * copy the tracer to avoid using a global lock all around.
2363          * iter->trace is a copy of current_trace, the pointer to the
2364          * name may be used instead of a strcmp(), as iter->trace->name
2365          * will point to the same string as current_trace->name.
2366          */
2367         mutex_lock(&trace_types_lock);
2368         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2369                 *iter->trace = *tr->current_trace;
2370         mutex_unlock(&trace_types_lock);
2371
2372 #ifdef CONFIG_TRACER_MAX_TRACE
2373         if (iter->snapshot && iter->trace->use_max_tr)
2374                 return ERR_PTR(-EBUSY);
2375 #endif
2376
2377         if (!iter->snapshot)
2378                 atomic_inc(&trace_record_cmdline_disabled);
2379
2380         if (*pos != iter->pos) {
2381                 iter->ent = NULL;
2382                 iter->cpu = 0;
2383                 iter->idx = -1;
2384
2385                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2386                         for_each_tracing_cpu(cpu)
2387                                 tracing_iter_reset(iter, cpu);
2388                 } else
2389                         tracing_iter_reset(iter, cpu_file);
2390
2391                 iter->leftover = 0;
2392                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2393                         ;
2394
2395         } else {
2396                 /*
2397                  * If we overflowed the seq_file before, then we want
2398                  * to just reuse the trace_seq buffer again.
2399                  */
2400                 if (iter->leftover)
2401                         p = iter;
2402                 else {
2403                         l = *pos - 1;
2404                         p = s_next(m, p, &l);
2405                 }
2406         }
2407
2408         trace_event_read_lock();
2409         trace_access_lock(cpu_file);
2410         return p;
2411 }
2412
2413 static void s_stop(struct seq_file *m, void *p)
2414 {
2415         struct trace_iterator *iter = m->private;
2416
2417 #ifdef CONFIG_TRACER_MAX_TRACE
2418         if (iter->snapshot && iter->trace->use_max_tr)
2419                 return;
2420 #endif
2421
2422         if (!iter->snapshot)
2423                 atomic_dec(&trace_record_cmdline_disabled);
2424
2425         trace_access_unlock(iter->cpu_file);
2426         trace_event_read_unlock();
2427 }
2428
2429 static void
2430 get_total_entries(struct trace_buffer *buf,
2431                   unsigned long *total, unsigned long *entries)
2432 {
2433         unsigned long count;
2434         int cpu;
2435
2436         *total = 0;
2437         *entries = 0;
2438
2439         for_each_tracing_cpu(cpu) {
2440                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2441                 /*
2442                  * If this buffer has skipped entries, then we hold all
2443                  * entries for the trace and we need to ignore the
2444                  * ones before the time stamp.
2445                  */
2446                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2447                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2448                         /* total is the same as the entries */
2449                         *total += count;
2450                 } else
2451                         *total += count +
2452                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2453                 *entries += count;
2454         }
2455 }
2456
2457 static void print_lat_help_header(struct seq_file *m)
2458 {
2459         seq_puts(m, "#                  _------=> CPU#            \n");
2460         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2461         seq_puts(m, "#                | / _----=> need-resched    \n");
2462         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2463         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2464         seq_puts(m, "#                |||| /     delay             \n");
2465         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2466         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2467 }
2468
2469 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2470 {
2471         unsigned long total;
2472         unsigned long entries;
2473
2474         get_total_entries(buf, &total, &entries);
2475         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2476                    entries, total, num_online_cpus());
2477         seq_puts(m, "#\n");
2478 }
2479
2480 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2481 {
2482         print_event_info(buf, m);
2483         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2484         seq_puts(m, "#              | |       |          |         |\n");
2485 }
2486
2487 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2488 {
2489         print_event_info(buf, m);
2490         seq_puts(m, "#                              _-----=> irqs-off\n");
2491         seq_puts(m, "#                             / _----=> need-resched\n");
2492         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2493         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2494         seq_puts(m, "#                            ||| /     delay\n");
2495         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2496         seq_puts(m, "#              | |       |   ||||       |         |\n");
2497 }
2498
2499 void
2500 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2501 {
2502         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2503         struct trace_buffer *buf = iter->trace_buffer;
2504         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2505         struct tracer *type = iter->trace;
2506         unsigned long entries;
2507         unsigned long total;
2508         const char *name = "preemption";
2509
2510         name = type->name;
2511
2512         get_total_entries(buf, &total, &entries);
2513
2514         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2515                    name, UTS_RELEASE);
2516         seq_puts(m, "# -----------------------------------"
2517                  "---------------------------------\n");
2518         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2519                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2520                    nsecs_to_usecs(data->saved_latency),
2521                    entries,
2522                    total,
2523                    buf->cpu,
2524 #if defined(CONFIG_PREEMPT_NONE)
2525                    "server",
2526 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2527                    "desktop",
2528 #elif defined(CONFIG_PREEMPT)
2529                    "preempt",
2530 #else
2531                    "unknown",
2532 #endif
2533                    /* These are reserved for later use */
2534                    0, 0, 0, 0);
2535 #ifdef CONFIG_SMP
2536         seq_printf(m, " #P:%d)\n", num_online_cpus());
2537 #else
2538         seq_puts(m, ")\n");
2539 #endif
2540         seq_puts(m, "#    -----------------\n");
2541         seq_printf(m, "#    | task: %.16s-%d "
2542                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2543                    data->comm, data->pid,
2544                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2545                    data->policy, data->rt_priority);
2546         seq_puts(m, "#    -----------------\n");
2547
2548         if (data->critical_start) {
2549                 seq_puts(m, "#  => started at: ");
2550                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2551                 trace_print_seq(m, &iter->seq);
2552                 seq_puts(m, "\n#  => ended at:   ");
2553                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2554                 trace_print_seq(m, &iter->seq);
2555                 seq_puts(m, "\n#\n");
2556         }
2557
2558         seq_puts(m, "#\n");
2559 }
2560
2561 static void test_cpu_buff_start(struct trace_iterator *iter)
2562 {
2563         struct trace_seq *s = &iter->seq;
2564
2565         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2566                 return;
2567
2568         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2569                 return;
2570
2571         if (cpumask_test_cpu(iter->cpu, iter->started))
2572                 return;
2573
2574         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2575                 return;
2576
2577         cpumask_set_cpu(iter->cpu, iter->started);
2578
2579         /* Don't print started cpu buffer for the first entry of the trace */
2580         if (iter->idx > 1)
2581                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2582                                 iter->cpu);
2583 }
2584
2585 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2586 {
2587         struct trace_seq *s = &iter->seq;
2588         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2589         struct trace_entry *entry;
2590         struct trace_event *event;
2591
2592         entry = iter->ent;
2593
2594         test_cpu_buff_start(iter);
2595
2596         event = ftrace_find_event(entry->type);
2597
2598         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2599                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2600                         if (!trace_print_lat_context(iter))
2601                                 goto partial;
2602                 } else {
2603                         if (!trace_print_context(iter))
2604                                 goto partial;
2605                 }
2606         }
2607
2608         if (event)
2609                 return event->funcs->trace(iter, sym_flags, event);
2610
2611         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2612                 goto partial;
2613
2614         return TRACE_TYPE_HANDLED;
2615 partial:
2616         return TRACE_TYPE_PARTIAL_LINE;
2617 }
2618
2619 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2620 {
2621         struct trace_seq *s = &iter->seq;
2622         struct trace_entry *entry;
2623         struct trace_event *event;
2624
2625         entry = iter->ent;
2626
2627         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2628                 if (!trace_seq_printf(s, "%d %d %llu ",
2629                                       entry->pid, iter->cpu, iter->ts))
2630                         goto partial;
2631         }
2632
2633         event = ftrace_find_event(entry->type);
2634         if (event)
2635                 return event->funcs->raw(iter, 0, event);
2636
2637         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2638                 goto partial;
2639
2640         return TRACE_TYPE_HANDLED;
2641 partial:
2642         return TRACE_TYPE_PARTIAL_LINE;
2643 }
2644
2645 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2646 {
2647         struct trace_seq *s = &iter->seq;
2648         unsigned char newline = '\n';
2649         struct trace_entry *entry;
2650         struct trace_event *event;
2651
2652         entry = iter->ent;
2653
2654         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2655                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2656                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2657                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2658         }
2659
2660         event = ftrace_find_event(entry->type);
2661         if (event) {
2662                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2663                 if (ret != TRACE_TYPE_HANDLED)
2664                         return ret;
2665         }
2666
2667         SEQ_PUT_FIELD_RET(s, newline);
2668
2669         return TRACE_TYPE_HANDLED;
2670 }
2671
2672 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2673 {
2674         struct trace_seq *s = &iter->seq;
2675         struct trace_entry *entry;
2676         struct trace_event *event;
2677
2678         entry = iter->ent;
2679
2680         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2681                 SEQ_PUT_FIELD_RET(s, entry->pid);
2682                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2683                 SEQ_PUT_FIELD_RET(s, iter->ts);
2684         }
2685
2686         event = ftrace_find_event(entry->type);
2687         return event ? event->funcs->binary(iter, 0, event) :
2688                 TRACE_TYPE_HANDLED;
2689 }
2690
2691 int trace_empty(struct trace_iterator *iter)
2692 {
2693         struct ring_buffer_iter *buf_iter;
2694         int cpu;
2695
2696         /* If we are looking at one CPU buffer, only check that one */
2697         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2698                 cpu = iter->cpu_file;
2699                 buf_iter = trace_buffer_iter(iter, cpu);
2700                 if (buf_iter) {
2701                         if (!ring_buffer_iter_empty(buf_iter))
2702                                 return 0;
2703                 } else {
2704                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2705                                 return 0;
2706                 }
2707                 return 1;
2708         }
2709
2710         for_each_tracing_cpu(cpu) {
2711                 buf_iter = trace_buffer_iter(iter, cpu);
2712                 if (buf_iter) {
2713                         if (!ring_buffer_iter_empty(buf_iter))
2714                                 return 0;
2715                 } else {
2716                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2717                                 return 0;
2718                 }
2719         }
2720
2721         return 1;
2722 }
2723
2724 /*  Called with trace_event_read_lock() held. */
2725 enum print_line_t print_trace_line(struct trace_iterator *iter)
2726 {
2727         enum print_line_t ret;
2728
2729         if (iter->lost_events &&
2730             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2731                                  iter->cpu, iter->lost_events))
2732                 return TRACE_TYPE_PARTIAL_LINE;
2733
2734         if (iter->trace && iter->trace->print_line) {
2735                 ret = iter->trace->print_line(iter);
2736                 if (ret != TRACE_TYPE_UNHANDLED)
2737                         return ret;
2738         }
2739
2740         if (iter->ent->type == TRACE_BPUTS &&
2741                         trace_flags & TRACE_ITER_PRINTK &&
2742                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2743                 return trace_print_bputs_msg_only(iter);
2744
2745         if (iter->ent->type == TRACE_BPRINT &&
2746                         trace_flags & TRACE_ITER_PRINTK &&
2747                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2748                 return trace_print_bprintk_msg_only(iter);
2749
2750         if (iter->ent->type == TRACE_PRINT &&
2751                         trace_flags & TRACE_ITER_PRINTK &&
2752                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2753                 return trace_print_printk_msg_only(iter);
2754
2755         if (trace_flags & TRACE_ITER_BIN)
2756                 return print_bin_fmt(iter);
2757
2758         if (trace_flags & TRACE_ITER_HEX)
2759                 return print_hex_fmt(iter);
2760
2761         if (trace_flags & TRACE_ITER_RAW)
2762                 return print_raw_fmt(iter);
2763
2764         return print_trace_fmt(iter);
2765 }
2766
2767 void trace_latency_header(struct seq_file *m)
2768 {
2769         struct trace_iterator *iter = m->private;
2770
2771         /* print nothing if the buffers are empty */
2772         if (trace_empty(iter))
2773                 return;
2774
2775         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2776                 print_trace_header(m, iter);
2777
2778         if (!(trace_flags & TRACE_ITER_VERBOSE))
2779                 print_lat_help_header(m);
2780 }
2781
2782 void trace_default_header(struct seq_file *m)
2783 {
2784         struct trace_iterator *iter = m->private;
2785
2786         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2787                 return;
2788
2789         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2790                 /* print nothing if the buffers are empty */
2791                 if (trace_empty(iter))
2792                         return;
2793                 print_trace_header(m, iter);
2794                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2795                         print_lat_help_header(m);
2796         } else {
2797                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2798                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2799                                 print_func_help_header_irq(iter->trace_buffer, m);
2800                         else
2801                                 print_func_help_header(iter->trace_buffer, m);
2802                 }
2803         }
2804 }
2805
2806 static void test_ftrace_alive(struct seq_file *m)
2807 {
2808         if (!ftrace_is_dead())
2809                 return;
2810         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2811         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2812 }
2813
2814 #ifdef CONFIG_TRACER_MAX_TRACE
2815 static void show_snapshot_main_help(struct seq_file *m)
2816 {
2817         seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2818         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2819         seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2820         seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
2821         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2822         seq_printf(m, "#                       is not a '0' or '1')\n");
2823 }
2824
2825 static void show_snapshot_percpu_help(struct seq_file *m)
2826 {
2827         seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2828 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2829         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2830         seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2831 #else
2832         seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2833         seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2834 #endif
2835         seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2836         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2837         seq_printf(m, "#                       is not a '0' or '1')\n");
2838 }
2839
2840 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2841 {
2842         if (iter->tr->allocated_snapshot)
2843                 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2844         else
2845                 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2846
2847         seq_printf(m, "# Snapshot commands:\n");
2848         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2849                 show_snapshot_main_help(m);
2850         else
2851                 show_snapshot_percpu_help(m);
2852 }
2853 #else
2854 /* Should never be called */
2855 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2856 #endif
2857
2858 static int s_show(struct seq_file *m, void *v)
2859 {
2860         struct trace_iterator *iter = v;
2861         int ret;
2862
2863         if (iter->ent == NULL) {
2864                 if (iter->tr) {
2865                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2866                         seq_puts(m, "#\n");
2867                         test_ftrace_alive(m);
2868                 }
2869                 if (iter->snapshot && trace_empty(iter))
2870                         print_snapshot_help(m, iter);
2871                 else if (iter->trace && iter->trace->print_header)
2872                         iter->trace->print_header(m);
2873                 else
2874                         trace_default_header(m);
2875
2876         } else if (iter->leftover) {
2877                 /*
2878                  * If we filled the seq_file buffer earlier, we
2879                  * want to just show it now.
2880                  */
2881                 ret = trace_print_seq(m, &iter->seq);
2882
2883                 /* ret should this time be zero, but you never know */
2884                 iter->leftover = ret;
2885
2886         } else {
2887                 print_trace_line(iter);
2888                 ret = trace_print_seq(m, &iter->seq);
2889                 /*
2890                  * If we overflow the seq_file buffer, then it will
2891                  * ask us for this data again at start up.
2892                  * Use that instead.
2893                  *  ret is 0 if seq_file write succeeded.
2894                  *        -1 otherwise.
2895                  */
2896                 iter->leftover = ret;
2897         }
2898
2899         return 0;
2900 }
2901
2902 /*
2903  * Should be used after trace_array_get(), trace_types_lock
2904  * ensures that i_cdev was already initialized.
2905  */
2906 static inline int tracing_get_cpu(struct inode *inode)
2907 {
2908         if (inode->i_cdev) /* See trace_create_cpu_file() */
2909                 return (long)inode->i_cdev - 1;
2910         return RING_BUFFER_ALL_CPUS;
2911 }
2912
2913 static const struct seq_operations tracer_seq_ops = {
2914         .start          = s_start,
2915         .next           = s_next,
2916         .stop           = s_stop,
2917         .show           = s_show,
2918 };
2919
2920 static struct trace_iterator *
2921 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2922 {
2923         struct trace_array *tr = inode->i_private;
2924         struct trace_iterator *iter;
2925         int cpu;
2926
2927         if (tracing_disabled)
2928                 return ERR_PTR(-ENODEV);
2929
2930         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2931         if (!iter)
2932                 return ERR_PTR(-ENOMEM);
2933
2934         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2935                                     GFP_KERNEL);
2936         if (!iter->buffer_iter)
2937                 goto release;
2938
2939         /*
2940          * We make a copy of the current tracer to avoid concurrent
2941          * changes on it while we are reading.
2942          */
2943         mutex_lock(&trace_types_lock);
2944         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2945         if (!iter->trace)
2946                 goto fail;
2947
2948         *iter->trace = *tr->current_trace;
2949
2950         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2951                 goto fail;
2952
2953         iter->tr = tr;
2954
2955 #ifdef CONFIG_TRACER_MAX_TRACE
2956         /* Currently only the top directory has a snapshot */
2957         if (tr->current_trace->print_max || snapshot)
2958                 iter->trace_buffer = &tr->max_buffer;
2959         else
2960 #endif
2961                 iter->trace_buffer = &tr->trace_buffer;
2962         iter->snapshot = snapshot;
2963         iter->pos = -1;
2964         iter->cpu_file = tracing_get_cpu(inode);
2965         mutex_init(&iter->mutex);
2966
2967         /* Notify the tracer early; before we stop tracing. */
2968         if (iter->trace && iter->trace->open)
2969                 iter->trace->open(iter);
2970
2971         /* Annotate start of buffers if we had overruns */
2972         if (ring_buffer_overruns(iter->trace_buffer->buffer))
2973                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2974
2975         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
2976         if (trace_clocks[tr->clock_id].in_ns)
2977                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2978
2979         /* stop the trace while dumping if we are not opening "snapshot" */
2980         if (!iter->snapshot)
2981                 tracing_stop_tr(tr);
2982
2983         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
2984                 for_each_tracing_cpu(cpu) {
2985                         iter->buffer_iter[cpu] =
2986                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2987                 }
2988                 ring_buffer_read_prepare_sync();
2989                 for_each_tracing_cpu(cpu) {
2990                         ring_buffer_read_start(iter->buffer_iter[cpu]);
2991                         tracing_iter_reset(iter, cpu);
2992                 }
2993         } else {
2994                 cpu = iter->cpu_file;
2995                 iter->buffer_iter[cpu] =
2996                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2997                 ring_buffer_read_prepare_sync();
2998                 ring_buffer_read_start(iter->buffer_iter[cpu]);
2999                 tracing_iter_reset(iter, cpu);
3000         }
3001
3002         mutex_unlock(&trace_types_lock);
3003
3004         return iter;
3005
3006  fail:
3007         mutex_unlock(&trace_types_lock);
3008         kfree(iter->trace);
3009         kfree(iter->buffer_iter);
3010 release:
3011         seq_release_private(inode, file);
3012         return ERR_PTR(-ENOMEM);
3013 }
3014
3015 int tracing_open_generic(struct inode *inode, struct file *filp)
3016 {
3017         if (tracing_disabled)
3018                 return -ENODEV;
3019
3020         filp->private_data = inode->i_private;
3021         return 0;
3022 }
3023
3024 bool tracing_is_disabled(void)
3025 {
3026         return (tracing_disabled) ? true: false;
3027 }
3028
3029 /*
3030  * Open and update trace_array ref count.
3031  * Must have the current trace_array passed to it.
3032  */
3033 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3034 {
3035         struct trace_array *tr = inode->i_private;
3036
3037         if (tracing_disabled)
3038                 return -ENODEV;
3039
3040         if (trace_array_get(tr) < 0)
3041                 return -ENODEV;
3042
3043         filp->private_data = inode->i_private;
3044
3045         return 0;
3046 }
3047
3048 static int tracing_release(struct inode *inode, struct file *file)
3049 {
3050         struct trace_array *tr = inode->i_private;
3051         struct seq_file *m = file->private_data;
3052         struct trace_iterator *iter;
3053         int cpu;
3054
3055         if (!(file->f_mode & FMODE_READ)) {
3056                 trace_array_put(tr);
3057                 return 0;
3058         }
3059
3060         /* Writes do not use seq_file */
3061         iter = m->private;
3062         mutex_lock(&trace_types_lock);
3063
3064         for_each_tracing_cpu(cpu) {
3065                 if (iter->buffer_iter[cpu])
3066                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3067         }
3068
3069         if (iter->trace && iter->trace->close)
3070                 iter->trace->close(iter);
3071
3072         if (!iter->snapshot)
3073                 /* reenable tracing if it was previously enabled */
3074                 tracing_start_tr(tr);
3075
3076         __trace_array_put(tr);
3077
3078         mutex_unlock(&trace_types_lock);
3079
3080         mutex_destroy(&iter->mutex);
3081         free_cpumask_var(iter->started);
3082         kfree(iter->trace);
3083         kfree(iter->buffer_iter);
3084         seq_release_private(inode, file);
3085
3086         return 0;
3087 }
3088
3089 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3090 {
3091         struct trace_array *tr = inode->i_private;
3092
3093         trace_array_put(tr);
3094         return 0;
3095 }
3096
3097 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3098 {
3099         struct trace_array *tr = inode->i_private;
3100
3101         trace_array_put(tr);
3102
3103         return single_release(inode, file);
3104 }
3105
3106 static int tracing_open(struct inode *inode, struct file *file)
3107 {
3108         struct trace_array *tr = inode->i_private;
3109         struct trace_iterator *iter;
3110         int ret = 0;
3111
3112         if (trace_array_get(tr) < 0)
3113                 return -ENODEV;
3114
3115         /* If this file was open for write, then erase contents */
3116         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3117                 int cpu = tracing_get_cpu(inode);
3118
3119                 if (cpu == RING_BUFFER_ALL_CPUS)
3120                         tracing_reset_online_cpus(&tr->trace_buffer);
3121                 else
3122                         tracing_reset(&tr->trace_buffer, cpu);
3123         }
3124
3125         if (file->f_mode & FMODE_READ) {
3126                 iter = __tracing_open(inode, file, false);
3127                 if (IS_ERR(iter))
3128                         ret = PTR_ERR(iter);
3129                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3130                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3131         }
3132
3133         if (ret < 0)
3134                 trace_array_put(tr);
3135
3136         return ret;
3137 }
3138
3139 static void *
3140 t_next(struct seq_file *m, void *v, loff_t *pos)
3141 {
3142         struct tracer *t = v;
3143
3144         (*pos)++;
3145
3146         if (t)
3147                 t = t->next;
3148
3149         return t;
3150 }
3151
3152 static void *t_start(struct seq_file *m, loff_t *pos)
3153 {
3154         struct tracer *t;
3155         loff_t l = 0;
3156
3157         mutex_lock(&trace_types_lock);
3158         for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
3159                 ;
3160
3161         return t;
3162 }
3163
3164 static void t_stop(struct seq_file *m, void *p)
3165 {
3166         mutex_unlock(&trace_types_lock);
3167 }
3168
3169 static int t_show(struct seq_file *m, void *v)
3170 {
3171         struct tracer *t = v;
3172
3173         if (!t)
3174                 return 0;
3175
3176         seq_printf(m, "%s", t->name);
3177         if (t->next)
3178                 seq_putc(m, ' ');
3179         else
3180                 seq_putc(m, '\n');
3181
3182         return 0;
3183 }
3184
3185 static const struct seq_operations show_traces_seq_ops = {
3186         .start          = t_start,
3187         .next           = t_next,
3188         .stop           = t_stop,
3189         .show           = t_show,
3190 };
3191
3192 static int show_traces_open(struct inode *inode, struct file *file)
3193 {
3194         if (tracing_disabled)
3195                 return -ENODEV;
3196
3197         return seq_open(file, &show_traces_seq_ops);
3198 }
3199
3200 static ssize_t
3201 tracing_write_stub(struct file *filp, const char __user *ubuf,
3202                    size_t count, loff_t *ppos)
3203 {
3204         return count;
3205 }
3206
3207 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3208 {
3209         int ret;
3210
3211         if (file->f_mode & FMODE_READ)
3212                 ret = seq_lseek(file, offset, whence);
3213         else
3214                 file->f_pos = ret = 0;
3215
3216         return ret;
3217 }
3218
3219 static const struct file_operations tracing_fops = {
3220         .open           = tracing_open,
3221         .read           = seq_read,
3222         .write          = tracing_write_stub,
3223         .llseek         = tracing_lseek,
3224         .release        = tracing_release,
3225 };
3226
3227 static const struct file_operations show_traces_fops = {
3228         .open           = show_traces_open,
3229         .read           = seq_read,
3230         .release        = seq_release,
3231         .llseek         = seq_lseek,
3232 };
3233
3234 /*
3235  * The tracer itself will not take this lock, but still we want
3236  * to provide a consistent cpumask to user-space:
3237  */
3238 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3239
3240 /*
3241  * Temporary storage for the character representation of the
3242  * CPU bitmask (and one more byte for the newline):
3243  */
3244 static char mask_str[NR_CPUS + 1];
3245
3246 static ssize_t
3247 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3248                      size_t count, loff_t *ppos)
3249 {
3250         struct trace_array *tr = file_inode(filp)->i_private;
3251         int len;
3252
3253         mutex_lock(&tracing_cpumask_update_lock);
3254
3255         len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3256         if (count - len < 2) {
3257                 count = -EINVAL;
3258                 goto out_err;
3259         }
3260         len += sprintf(mask_str + len, "\n");
3261         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3262
3263 out_err:
3264         mutex_unlock(&tracing_cpumask_update_lock);
3265
3266         return count;
3267 }
3268
3269 static ssize_t
3270 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3271                       size_t count, loff_t *ppos)
3272 {
3273         struct trace_array *tr = file_inode(filp)->i_private;
3274         cpumask_var_t tracing_cpumask_new;
3275         int err, cpu;
3276
3277         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3278                 return -ENOMEM;
3279
3280         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3281         if (err)
3282                 goto err_unlock;
3283
3284         mutex_lock(&tracing_cpumask_update_lock);
3285
3286         local_irq_disable();
3287         arch_spin_lock(&ftrace_max_lock);
3288         for_each_tracing_cpu(cpu) {
3289                 /*
3290                  * Increase/decrease the disabled counter if we are
3291                  * about to flip a bit in the cpumask:
3292                  */
3293                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3294                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3295                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3296                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3297                 }
3298                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3299                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3300                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3301                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3302                 }
3303         }
3304         arch_spin_unlock(&ftrace_max_lock);
3305         local_irq_enable();
3306
3307         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3308
3309         mutex_unlock(&tracing_cpumask_update_lock);
3310         free_cpumask_var(tracing_cpumask_new);
3311
3312         return count;
3313
3314 err_unlock:
3315         free_cpumask_var(tracing_cpumask_new);
3316
3317         return err;
3318 }
3319
3320 static const struct file_operations tracing_cpumask_fops = {
3321         .open           = tracing_open_generic_tr,
3322         .read           = tracing_cpumask_read,
3323         .write          = tracing_cpumask_write,
3324         .release        = tracing_release_generic_tr,
3325         .llseek         = generic_file_llseek,
3326 };
3327
3328 static int tracing_trace_options_show(struct seq_file *m, void *v)
3329 {
3330         struct tracer_opt *trace_opts;
3331         struct trace_array *tr = m->private;
3332         u32 tracer_flags;
3333         int i;
3334
3335         mutex_lock(&trace_types_lock);
3336         tracer_flags = tr->current_trace->flags->val;
3337         trace_opts = tr->current_trace->flags->opts;
3338
3339         for (i = 0; trace_options[i]; i++) {
3340                 if (trace_flags & (1 << i))
3341                         seq_printf(m, "%s\n", trace_options[i]);
3342                 else
3343                         seq_printf(m, "no%s\n", trace_options[i]);
3344         }
3345
3346         for (i = 0; trace_opts[i].name; i++) {
3347                 if (tracer_flags & trace_opts[i].bit)
3348                         seq_printf(m, "%s\n", trace_opts[i].name);
3349                 else
3350                         seq_printf(m, "no%s\n", trace_opts[i].name);
3351         }
3352         mutex_unlock(&trace_types_lock);
3353
3354         return 0;
3355 }
3356
3357 static int __set_tracer_option(struct tracer *trace,
3358                                struct tracer_flags *tracer_flags,
3359                                struct tracer_opt *opts, int neg)
3360 {
3361         int ret;
3362
3363         ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
3364         if (ret)
3365                 return ret;
3366
3367         if (neg)
3368                 tracer_flags->val &= ~opts->bit;
3369         else
3370                 tracer_flags->val |= opts->bit;
3371         return 0;
3372 }
3373
3374 /* Try to assign a tracer specific option */
3375 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
3376 {
3377         struct tracer_flags *tracer_flags = trace->flags;
3378         struct tracer_opt *opts = NULL;
3379         int i;
3380
3381         for (i = 0; tracer_flags->opts[i].name; i++) {
3382                 opts = &tracer_flags->opts[i];
3383
3384                 if (strcmp(cmp, opts->name) == 0)
3385                         return __set_tracer_option(trace, trace->flags,
3386                                                    opts, neg);
3387         }
3388
3389         return -EINVAL;
3390 }
3391
3392 /* Some tracers require overwrite to stay enabled */
3393 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3394 {
3395         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3396                 return -1;
3397
3398         return 0;
3399 }
3400
3401 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3402 {
3403         /* do nothing if flag is already set */
3404         if (!!(trace_flags & mask) == !!enabled)
3405                 return 0;
3406
3407         /* Give the tracer a chance to approve the change */
3408         if (tr->current_trace->flag_changed)
3409                 if (tr->current_trace->flag_changed(tr->current_trace, mask, !!enabled))
3410                         return -EINVAL;
3411
3412         if (enabled)
3413                 trace_flags |= mask;
3414         else
3415                 trace_flags &= ~mask;
3416
3417         if (mask == TRACE_ITER_RECORD_CMD)
3418                 trace_event_enable_cmd_record(enabled);
3419
3420         if (mask == TRACE_ITER_OVERWRITE) {
3421                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3422 #ifdef CONFIG_TRACER_MAX_TRACE
3423                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3424 #endif
3425         }
3426
3427         if (mask == TRACE_ITER_PRINTK)
3428                 trace_printk_start_stop_comm(enabled);
3429
3430         return 0;
3431 }
3432
3433 static int trace_set_options(struct trace_array *tr, char *option)
3434 {
3435         char *cmp;
3436         int neg = 0;
3437         int ret = -ENODEV;
3438         int i;
3439
3440         cmp = strstrip(option);
3441
3442         if (strncmp(cmp, "no", 2) == 0) {
3443                 neg = 1;
3444                 cmp += 2;
3445         }
3446
3447         mutex_lock(&trace_types_lock);
3448
3449         for (i = 0; trace_options[i]; i++) {
3450                 if (strcmp(cmp, trace_options[i]) == 0) {
3451                         ret = set_tracer_flag(tr, 1 << i, !neg);
3452                         break;
3453                 }
3454         }
3455
3456         /* If no option could be set, test the specific tracer options */
3457         if (!trace_options[i])
3458                 ret = set_tracer_option(tr->current_trace, cmp, neg);
3459
3460         mutex_unlock(&trace_types_lock);
3461
3462         return ret;
3463 }
3464
3465 static ssize_t
3466 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3467                         size_t cnt, loff_t *ppos)
3468 {
3469         struct seq_file *m = filp->private_data;
3470         struct trace_array *tr = m->private;
3471         char buf[64];
3472         int ret;
3473
3474         if (cnt >= sizeof(buf))
3475                 return -EINVAL;
3476
3477         if (copy_from_user(&buf, ubuf, cnt))
3478                 return -EFAULT;
3479
3480         buf[cnt] = 0;
3481
3482         ret = trace_set_options(tr, buf);
3483         if (ret < 0)
3484                 return ret;
3485
3486         *ppos += cnt;
3487
3488         return cnt;
3489 }
3490
3491 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3492 {
3493         struct trace_array *tr = inode->i_private;
3494         int ret;
3495
3496         if (tracing_disabled)
3497                 return -ENODEV;
3498
3499         if (trace_array_get(tr) < 0)
3500                 return -ENODEV;
3501
3502         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3503         if (ret < 0)
3504                 trace_array_put(tr);
3505
3506         return ret;
3507 }
3508
3509 static const struct file_operations tracing_iter_fops = {
3510         .open           = tracing_trace_options_open,
3511         .read           = seq_read,
3512         .llseek         = seq_lseek,
3513         .release        = tracing_single_release_tr,
3514         .write          = tracing_trace_options_write,
3515 };
3516
3517 static const char readme_msg[] =
3518         "tracing mini-HOWTO:\n\n"
3519         "# echo 0 > tracing_on : quick way to disable tracing\n"
3520         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3521         " Important files:\n"
3522         "  trace\t\t\t- The static contents of the buffer\n"
3523         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3524         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3525         "  current_tracer\t- function and latency tracers\n"
3526         "  available_tracers\t- list of configured tracers for current_tracer\n"
3527         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3528         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3529         "  trace_clock\t\t-change the clock used to order events\n"
3530         "       local:   Per cpu clock but may not be synced across CPUs\n"
3531         "      global:   Synced across CPUs but slows tracing down.\n"
3532         "     counter:   Not a clock, but just an increment\n"
3533         "      uptime:   Jiffy counter from time of boot\n"
3534         "        perf:   Same clock that perf events use\n"
3535 #ifdef CONFIG_X86_64
3536         "     x86-tsc:   TSC cycle counter\n"
3537 #endif
3538         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3539         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3540         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3541         "\t\t\t  Remove sub-buffer with rmdir\n"
3542         "  trace_options\t\t- Set format or modify how tracing happens\n"
3543         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3544         "\t\t\t  option name\n"
3545 #ifdef CONFIG_DYNAMIC_FTRACE
3546         "\n  available_filter_functions - list of functions that can be filtered on\n"
3547         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3548         "\t\t\t  functions\n"
3549         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3550         "\t     modules: Can select a group via module\n"
3551         "\t      Format: :mod:<module-name>\n"
3552         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3553         "\t    triggers: a command to perform when function is hit\n"
3554         "\t      Format: <function>:<trigger>[:count]\n"
3555         "\t     trigger: traceon, traceoff\n"
3556         "\t\t      enable_event:<system>:<event>\n"
3557         "\t\t      disable_event:<system>:<event>\n"
3558 #ifdef CONFIG_STACKTRACE
3559         "\t\t      stacktrace\n"
3560 #endif
3561 #ifdef CONFIG_TRACER_SNAPSHOT
3562         "\t\t      snapshot\n"
3563 #endif
3564         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3565         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3566         "\t     The first one will disable tracing every time do_fault is hit\n"
3567         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3568         "\t       The first time do trap is hit and it disables tracing, the\n"
3569         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3570         "\t       the counter will not decrement. It only decrements when the\n"
3571         "\t       trigger did work\n"
3572         "\t     To remove trigger without count:\n"
3573         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3574         "\t     To remove trigger with a count:\n"
3575         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3576         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3577         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3578         "\t    modules: Can select a group via module command :mod:\n"
3579         "\t    Does not accept triggers\n"
3580 #endif /* CONFIG_DYNAMIC_FTRACE */
3581 #ifdef CONFIG_FUNCTION_TRACER
3582         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3583         "\t\t    (function)\n"
3584 #endif
3585 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3586         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3587         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3588 #endif
3589 #ifdef CONFIG_TRACER_SNAPSHOT
3590         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3591         "\t\t\t  snapshot buffer. Read the contents for more\n"
3592         "\t\t\t  information\n"
3593 #endif
3594 #ifdef CONFIG_STACK_TRACER
3595         "  stack_trace\t\t- Shows the max stack trace when active\n"
3596         "  stack_max_size\t- Shows current max stack size that was traced\n"
3597         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3598         "\t\t\t  new trace)\n"
3599 #ifdef CONFIG_DYNAMIC_FTRACE
3600         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3601         "\t\t\t  traces\n"
3602 #endif
3603 #endif /* CONFIG_STACK_TRACER */
3604         "  events/\t\t- Directory containing all trace event subsystems:\n"
3605         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3606         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3607         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3608         "\t\t\t  events\n"
3609         "      filter\t\t- If set, only events passing filter are traced\n"
3610         "  events/<system>/<event>/\t- Directory containing control files for\n"
3611         "\t\t\t  <event>:\n"
3612         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3613         "      filter\t\t- If set, only events passing filter are traced\n"
3614         "      trigger\t\t- If set, a command to perform when event is hit\n"
3615         "\t    Format: <trigger>[:count][if <filter>]\n"
3616         "\t   trigger: traceon, traceoff\n"
3617         "\t            enable_event:<system>:<event>\n"
3618         "\t            disable_event:<system>:<event>\n"
3619 #ifdef CONFIG_STACKTRACE
3620         "\t\t    stacktrace\n"
3621 #endif
3622 #ifdef CONFIG_TRACER_SNAPSHOT
3623         "\t\t    snapshot\n"
3624 #endif
3625         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3626         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3627         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3628         "\t                  events/block/block_unplug/trigger\n"
3629         "\t   The first disables tracing every time block_unplug is hit.\n"
3630         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3631         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3632         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3633         "\t   Like function triggers, the counter is only decremented if it\n"
3634         "\t    enabled or disabled tracing.\n"
3635         "\t   To remove a trigger without a count:\n"
3636         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3637         "\t   To remove a trigger with a count:\n"
3638         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3639         "\t   Filters can be ignored when removing a trigger.\n"
3640 ;
3641
3642 static ssize_t
3643 tracing_readme_read(struct file *filp, char __user *ubuf,
3644                        size_t cnt, loff_t *ppos)
3645 {
3646         return simple_read_from_buffer(ubuf, cnt, ppos,
3647                                         readme_msg, strlen(readme_msg));
3648 }
3649
3650 static const struct file_operations tracing_readme_fops = {
3651         .open           = tracing_open_generic,
3652         .read           = tracing_readme_read,
3653         .llseek         = generic_file_llseek,
3654 };
3655
3656 static ssize_t
3657 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
3658                                 size_t cnt, loff_t *ppos)
3659 {
3660         char *buf_comm;
3661         char *file_buf;
3662         char *buf;
3663         int len = 0;
3664         int pid;
3665         int i;
3666
3667         file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
3668         if (!file_buf)
3669                 return -ENOMEM;
3670
3671         buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
3672         if (!buf_comm) {
3673                 kfree(file_buf);
3674                 return -ENOMEM;
3675         }
3676
3677         buf = file_buf;
3678
3679         for (i = 0; i < SAVED_CMDLINES; i++) {
3680                 int r;
3681
3682                 pid = map_cmdline_to_pid[i];
3683                 if (pid == -1 || pid == NO_CMDLINE_MAP)
3684                         continue;
3685
3686                 trace_find_cmdline(pid, buf_comm);
3687                 r = sprintf(buf, "%d %s\n", pid, buf_comm);
3688                 buf += r;
3689                 len += r;
3690         }
3691
3692         len = simple_read_from_buffer(ubuf, cnt, ppos,
3693                                       file_buf, len);
3694
3695         kfree(file_buf);
3696         kfree(buf_comm);
3697
3698         return len;
3699 }
3700
3701 static const struct file_operations tracing_saved_cmdlines_fops = {
3702     .open       = tracing_open_generic,
3703     .read       = tracing_saved_cmdlines_read,
3704     .llseek     = generic_file_llseek,
3705 };
3706
3707 static ssize_t
3708 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3709                        size_t cnt, loff_t *ppos)
3710 {
3711         struct trace_array *tr = filp->private_data;
3712         char buf[MAX_TRACER_SIZE+2];
3713         int r;
3714
3715         mutex_lock(&trace_types_lock);
3716         r = sprintf(buf, "%s\n", tr->current_trace->name);
3717         mutex_unlock(&trace_types_lock);
3718
3719         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3720 }
3721
3722 int tracer_init(struct tracer *t, struct trace_array *tr)
3723 {
3724         tracing_reset_online_cpus(&tr->trace_buffer);
3725         return t->init(tr);
3726 }
3727
3728 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3729 {
3730         int cpu;
3731
3732         for_each_tracing_cpu(cpu)
3733                 per_cpu_ptr(buf->data, cpu)->entries = val;
3734 }
3735
3736 #ifdef CONFIG_TRACER_MAX_TRACE
3737 /* resize @tr's buffer to the size of @size_tr's entries */
3738 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3739                                         struct trace_buffer *size_buf, int cpu_id)
3740 {
3741         int cpu, ret = 0;
3742
3743         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3744                 for_each_tracing_cpu(cpu) {
3745                         ret = ring_buffer_resize(trace_buf->buffer,
3746                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3747                         if (ret < 0)
3748                                 break;
3749                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3750                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3751                 }
3752         } else {
3753                 ret = ring_buffer_resize(trace_buf->buffer,
3754                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3755                 if (ret == 0)
3756                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3757                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3758         }
3759
3760         return ret;
3761 }
3762 #endif /* CONFIG_TRACER_MAX_TRACE */
3763
3764 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3765                                         unsigned long size, int cpu)
3766 {
3767         int ret;
3768
3769         /*
3770          * If kernel or user changes the size of the ring buffer
3771          * we use the size that was given, and we can forget about
3772          * expanding it later.
3773          */
3774         ring_buffer_expanded = true;
3775
3776         /* May be called before buffers are initialized */
3777         if (!tr->trace_buffer.buffer)
3778                 return 0;
3779
3780         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3781         if (ret < 0)
3782                 return ret;
3783
3784 #ifdef CONFIG_TRACER_MAX_TRACE
3785         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3786             !tr->current_trace->use_max_tr)
3787                 goto out;
3788
3789         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3790         if (ret < 0) {
3791                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3792                                                      &tr->trace_buffer, cpu);
3793                 if (r < 0) {
3794                         /*
3795                          * AARGH! We are left with different
3796                          * size max buffer!!!!
3797                          * The max buffer is our "snapshot" buffer.
3798                          * When a tracer needs a snapshot (one of the
3799                          * latency tracers), it swaps the max buffer
3800                          * with the saved snap shot. We succeeded to
3801                          * update the size of the main buffer, but failed to
3802                          * update the size of the max buffer. But when we tried
3803                          * to reset the main buffer to the original size, we
3804                          * failed there too. This is very unlikely to
3805                          * happen, but if it does, warn and kill all
3806                          * tracing.
3807                          */
3808                         WARN_ON(1);
3809                         tracing_disabled = 1;
3810                 }
3811                 return ret;
3812         }
3813
3814         if (cpu == RING_BUFFER_ALL_CPUS)
3815                 set_buffer_entries(&tr->max_buffer, size);
3816         else
3817                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
3818
3819  out:
3820 #endif /* CONFIG_TRACER_MAX_TRACE */
3821
3822         if (cpu == RING_BUFFER_ALL_CPUS)
3823                 set_buffer_entries(&tr->trace_buffer, size);
3824         else
3825                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
3826
3827         return ret;
3828 }
3829
3830 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
3831                                           unsigned long size, int cpu_id)
3832 {
3833         int ret = size;
3834
3835         mutex_lock(&trace_types_lock);
3836
3837         if (cpu_id != RING_BUFFER_ALL_CPUS) {
3838                 /* make sure, this cpu is enabled in the mask */
3839                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
3840                         ret = -EINVAL;
3841                         goto out;
3842                 }
3843         }
3844
3845         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
3846         if (ret < 0)
3847                 ret = -ENOMEM;
3848
3849 out:
3850         mutex_unlock(&trace_types_lock);
3851
3852         return ret;
3853 }
3854
3855
3856 /**
3857  * tracing_update_buffers - used by tracing facility to expand ring buffers
3858  *
3859  * To save on memory when the tracing is never used on a system with it
3860  * configured in. The ring buffers are set to a minimum size. But once
3861  * a user starts to use the tracing facility, then they need to grow
3862  * to their default size.
3863  *
3864  * This function is to be called when a tracer is about to be used.
3865  */
3866 int tracing_update_buffers(void)
3867 {
3868         int ret = 0;
3869
3870         mutex_lock(&trace_types_lock);
3871         if (!ring_buffer_expanded)
3872                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
3873                                                 RING_BUFFER_ALL_CPUS);
3874         mutex_unlock(&trace_types_lock);
3875
3876         return ret;
3877 }
3878
3879 struct trace_option_dentry;
3880
3881 static struct trace_option_dentry *
3882 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
3883
3884 static void
3885 destroy_trace_option_files(struct trace_option_dentry *topts);
3886
3887 static int tracing_set_tracer(const char *buf)
3888 {
3889         static struct trace_option_dentry *topts;
3890         struct trace_array *tr = &global_trace;
3891         struct tracer *t;
3892 #ifdef CONFIG_TRACER_MAX_TRACE
3893         bool had_max_tr;
3894 #endif
3895         int ret = 0;
3896
3897         mutex_lock(&trace_types_lock);
3898
3899         if (!ring_buffer_expanded) {
3900                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
3901                                                 RING_BUFFER_ALL_CPUS);
3902                 if (ret < 0)
3903                         goto out;
3904                 ret = 0;
3905         }
3906
3907         for (t = trace_types; t; t = t->next) {
3908                 if (strcmp(t->name, buf) == 0)
3909                         break;
3910         }
3911         if (!t) {
3912                 ret = -EINVAL;
3913                 goto out;
3914         }
3915         if (t == tr->current_trace)
3916                 goto out;
3917
3918         trace_branch_disable();
3919
3920         tr->current_trace->enabled = false;
3921
3922         if (tr->current_trace->reset)
3923                 tr->current_trace->reset(tr);
3924
3925         /* Current trace needs to be nop_trace before synchronize_sched */
3926         tr->current_trace = &nop_trace;
3927
3928 #ifdef CONFIG_TRACER_MAX_TRACE
3929         had_max_tr = tr->allocated_snapshot;
3930
3931         if (had_max_tr && !t->use_max_tr) {
3932                 /*
3933                  * We need to make sure that the update_max_tr sees that
3934                  * current_trace changed to nop_trace to keep it from
3935                  * swapping the buffers after we resize it.
3936                  * The update_max_tr is called from interrupts disabled
3937                  * so a synchronized_sched() is sufficient.
3938                  */
3939                 synchronize_sched();
3940                 free_snapshot(tr);
3941         }
3942 #endif
3943         destroy_trace_option_files(topts);
3944
3945         topts = create_trace_option_files(tr, t);
3946
3947 #ifdef CONFIG_TRACER_MAX_TRACE
3948         if (t->use_max_tr && !had_max_tr) {
3949                 ret = alloc_snapshot(tr);
3950                 if (ret < 0)
3951                         goto out;
3952         }
3953 #endif
3954
3955         if (t->init) {
3956                 ret = tracer_init(t, tr);
3957                 if (ret)
3958                         goto out;
3959         }
3960
3961         tr->current_trace = t;
3962         tr->current_trace->enabled = true;
3963         trace_branch_enable(tr);
3964  out:
3965         mutex_unlock(&trace_types_lock);
3966
3967         return ret;
3968 }
3969
3970 static ssize_t
3971 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
3972                         size_t cnt, loff_t *ppos)
3973 {
3974         char buf[MAX_TRACER_SIZE+1];
3975         int i;
3976         size_t ret;
3977         int err;
3978
3979         ret = cnt;
3980
3981         if (cnt > MAX_TRACER_SIZE)
3982                 cnt = MAX_TRACER_SIZE;
3983
3984         if (copy_from_user(&buf, ubuf, cnt))
3985                 return -EFAULT;
3986
3987         buf[cnt] = 0;
3988
3989         /* strip ending whitespace. */
3990         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
3991                 buf[i] = 0;
3992
3993         err = tracing_set_tracer(buf);
3994         if (err)
3995                 return err;
3996
3997         *ppos += ret;
3998
3999         return ret;
4000 }
4001
4002 static ssize_t
4003 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4004                      size_t cnt, loff_t *ppos)
4005 {
4006         unsigned long *ptr = filp->private_data;
4007         char buf[64];
4008         int r;
4009
4010         r = snprintf(buf, sizeof(buf), "%ld\n",
4011                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4012         if (r > sizeof(buf))
4013                 r = sizeof(buf);
4014         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4015 }
4016
4017 static ssize_t
4018 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4019                       size_t cnt, loff_t *ppos)
4020 {
4021         unsigned long *ptr = filp->private_data;
4022         unsigned long val;
4023         int ret;
4024
4025         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4026         if (ret)
4027                 return ret;
4028
4029         *ptr = val * 1000;
4030
4031         return cnt;
4032 }
4033
4034 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4035 {
4036         struct trace_array *tr = inode->i_private;
4037         struct trace_iterator *iter;
4038         int ret = 0;
4039
4040         if (tracing_disabled)
4041                 return -ENODEV;
4042
4043         if (trace_array_get(tr) < 0)
4044                 return -ENODEV;
4045
4046         mutex_lock(&trace_types_lock);
4047
4048         /* create a buffer to store the information to pass to userspace */
4049         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4050         if (!iter) {
4051                 ret = -ENOMEM;
4052                 __trace_array_put(tr);
4053                 goto out;
4054         }
4055
4056         /*
4057          * We make a copy of the current tracer to avoid concurrent
4058          * changes on it while we are reading.
4059          */
4060         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4061         if (!iter->trace) {
4062                 ret = -ENOMEM;
4063                 goto fail;
4064         }
4065         *iter->trace = *tr->current_trace;
4066
4067         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4068                 ret = -ENOMEM;
4069                 goto fail;
4070         }
4071
4072         /* trace pipe does not show start of buffer */
4073         cpumask_setall(iter->started);
4074
4075         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4076                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4077
4078         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4079         if (trace_clocks[tr->clock_id].in_ns)
4080                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4081
4082         iter->tr = tr;
4083         iter->trace_buffer = &tr->trace_buffer;
4084         iter->cpu_file = tracing_get_cpu(inode);
4085         mutex_init(&iter->mutex);
4086         filp->private_data = iter;
4087
4088         if (iter->trace->pipe_open)
4089                 iter->trace->pipe_open(iter);
4090
4091         nonseekable_open(inode, filp);
4092 out:
4093         mutex_unlock(&trace_types_lock);
4094         return ret;
4095
4096 fail:
4097         kfree(iter->trace);
4098         kfree(iter);
4099         __trace_array_put(tr);
4100         mutex_unlock(&trace_types_lock);
4101         return ret;
4102 }
4103
4104 static int tracing_release_pipe(struct inode *inode, struct file *file)
4105 {
4106         struct trace_iterator *iter = file->private_data;
4107         struct trace_array *tr = inode->i_private;
4108
4109         mutex_lock(&trace_types_lock);
4110
4111         if (iter->trace->pipe_close)
4112                 iter->trace->pipe_close(iter);
4113
4114         mutex_unlock(&trace_types_lock);
4115
4116         free_cpumask_var(iter->started);
4117         mutex_destroy(&iter->mutex);
4118         kfree(iter->trace);
4119         kfree(iter);
4120
4121         trace_array_put(tr);
4122
4123         return 0;
4124 }
4125
4126 static unsigned int
4127 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4128 {
4129         /* Iterators are static, they should be filled or empty */
4130         if (trace_buffer_iter(iter, iter->cpu_file))
4131                 return POLLIN | POLLRDNORM;
4132
4133         if (trace_flags & TRACE_ITER_BLOCK)
4134                 /*
4135                  * Always select as readable when in blocking mode
4136                  */
4137                 return POLLIN | POLLRDNORM;
4138         else
4139                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4140                                              filp, poll_table);
4141 }
4142
4143 static unsigned int
4144 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4145 {
4146         struct trace_iterator *iter = filp->private_data;
4147
4148         return trace_poll(iter, filp, poll_table);
4149 }
4150
4151 /*
4152  * This is a make-shift waitqueue.
4153  * A tracer might use this callback on some rare cases:
4154  *
4155  *  1) the current tracer might hold the runqueue lock when it wakes up
4156  *     a reader, hence a deadlock (sched, function, and function graph tracers)
4157  *  2) the function tracers, trace all functions, we don't want
4158  *     the overhead of calling wake_up and friends
4159  *     (and tracing them too)
4160  *
4161  *     Anyway, this is really very primitive wakeup.
4162  */
4163 int poll_wait_pipe(struct trace_iterator *iter)
4164 {
4165         set_current_state(TASK_INTERRUPTIBLE);
4166         /* sleep for 100 msecs, and try again. */
4167         schedule_timeout(HZ / 10);
4168         return 0;
4169 }
4170
4171 /* Must be called with trace_types_lock mutex held. */
4172 static int tracing_wait_pipe(struct file *filp)
4173 {
4174         struct trace_iterator *iter = filp->private_data;
4175         int ret;
4176
4177         while (trace_empty(iter)) {
4178
4179                 if ((filp->f_flags & O_NONBLOCK)) {
4180                         return -EAGAIN;
4181                 }
4182
4183                 mutex_unlock(&iter->mutex);
4184
4185                 ret = iter->trace->wait_pipe(iter);
4186
4187                 mutex_lock(&iter->mutex);
4188
4189                 if (ret)
4190                         return ret;
4191
4192                 if (signal_pending(current))
4193                         return -EINTR;
4194
4195                 /*
4196                  * We block until we read something and tracing is disabled.
4197                  * We still block if tracing is disabled, but we have never
4198                  * read anything. This allows a user to cat this file, and
4199                  * then enable tracing. But after we have read something,
4200                  * we give an EOF when tracing is again disabled.
4201                  *
4202                  * iter->pos will be 0 if we haven't read anything.
4203                  */
4204                 if (!tracing_is_on() && iter->pos)
4205                         break;
4206         }
4207
4208         return 1;
4209 }
4210
4211 /*
4212  * Consumer reader.
4213  */
4214 static ssize_t
4215 tracing_read_pipe(struct file *filp, char __user *ubuf,
4216                   size_t cnt, loff_t *ppos)
4217 {
4218         struct trace_iterator *iter = filp->private_data;
4219         struct trace_array *tr = iter->tr;
4220         ssize_t sret;
4221
4222         /* return any leftover data */
4223         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4224         if (sret != -EBUSY)
4225                 return sret;
4226
4227         trace_seq_init(&iter->seq);
4228
4229         /* copy the tracer to avoid using a global lock all around */
4230         mutex_lock(&trace_types_lock);
4231         if (unlikely(iter->trace->name != tr->current_trace->name))
4232                 *iter->trace = *tr->current_trace;
4233         mutex_unlock(&trace_types_lock);
4234
4235         /*
4236          * Avoid more than one consumer on a single file descriptor
4237          * This is just a matter of traces coherency, the ring buffer itself
4238          * is protected.
4239          */
4240         mutex_lock(&iter->mutex);
4241         if (iter->trace->read) {
4242                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4243                 if (sret)
4244                         goto out;
4245         }
4246
4247 waitagain:
4248         sret = tracing_wait_pipe(filp);
4249         if (sret <= 0)
4250                 goto out;
4251
4252         /* stop when tracing is finished */
4253         if (trace_empty(iter)) {
4254                 sret = 0;
4255                 goto out;
4256         }
4257
4258         if (cnt >= PAGE_SIZE)
4259                 cnt = PAGE_SIZE - 1;
4260
4261         /* reset all but tr, trace, and overruns */
4262         memset(&iter->seq, 0,
4263                sizeof(struct trace_iterator) -
4264                offsetof(struct trace_iterator, seq));
4265         cpumask_clear(iter->started);
4266         iter->pos = -1;
4267
4268         trace_event_read_lock();
4269         trace_access_lock(iter->cpu_file);
4270         while (trace_find_next_entry_inc(iter) != NULL) {
4271                 enum print_line_t ret;
4272                 int len = iter->seq.len;
4273
4274                 ret = print_trace_line(iter);
4275                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4276                         /* don't print partial lines */
4277                         iter->seq.len = len;
4278                         break;
4279                 }
4280                 if (ret != TRACE_TYPE_NO_CONSUME)
4281                         trace_consume(iter);
4282
4283                 if (iter->seq.len >= cnt)
4284                         break;
4285
4286                 /*
4287                  * Setting the full flag means we reached the trace_seq buffer
4288                  * size and we should leave by partial output condition above.
4289                  * One of the trace_seq_* functions is not used properly.
4290                  */
4291                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4292                           iter->ent->type);
4293         }
4294         trace_access_unlock(iter->cpu_file);
4295         trace_event_read_unlock();
4296
4297         /* Now copy what we have to the user */
4298         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4299         if (iter->seq.readpos >= iter->seq.len)
4300                 trace_seq_init(&iter->seq);
4301
4302         /*
4303          * If there was nothing to send to user, in spite of consuming trace
4304          * entries, go back to wait for more entries.
4305          */
4306         if (sret == -EBUSY)
4307                 goto waitagain;
4308
4309 out:
4310         mutex_unlock(&iter->mutex);
4311
4312         return sret;
4313 }
4314
4315 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4316                                      unsigned int idx)
4317 {
4318         __free_page(spd->pages[idx]);
4319 }
4320
4321 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4322         .can_merge              = 0,
4323         .map                    = generic_pipe_buf_map,
4324         .unmap                  = generic_pipe_buf_unmap,
4325         .confirm                = generic_pipe_buf_confirm,
4326         .release                = generic_pipe_buf_release,
4327         .steal                  = generic_pipe_buf_steal,
4328         .get                    = generic_pipe_buf_get,
4329 };
4330
4331 static size_t
4332 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4333 {
4334         size_t count;
4335         int ret;
4336
4337         /* Seq buffer is page-sized, exactly what we need. */
4338         for (;;) {
4339                 count = iter->seq.len;
4340                 ret = print_trace_line(iter);
4341                 count = iter->seq.len - count;
4342                 if (rem < count) {
4343                         rem = 0;
4344                         iter->seq.len -= count;
4345                         break;
4346                 }
4347                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4348                         iter->seq.len -= count;
4349                         break;
4350                 }
4351
4352                 if (ret != TRACE_TYPE_NO_CONSUME)
4353                         trace_consume(iter);
4354                 rem -= count;
4355                 if (!trace_find_next_entry_inc(iter))   {
4356                         rem = 0;
4357                         iter->ent = NULL;
4358                         break;
4359                 }
4360         }
4361
4362         return rem;
4363 }
4364
4365 static ssize_t tracing_splice_read_pipe(struct file *filp,
4366                                         loff_t *ppos,
4367                                         struct pipe_inode_info *pipe,
4368                                         size_t len,
4369                                         unsigned int flags)
4370 {
4371         struct page *pages_def[PIPE_DEF_BUFFERS];
4372         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4373         struct trace_iterator *iter = filp->private_data;
4374         struct splice_pipe_desc spd = {
4375                 .pages          = pages_def,
4376                 .partial        = partial_def,
4377                 .nr_pages       = 0, /* This gets updated below. */
4378                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4379                 .flags          = flags,
4380                 .ops            = &tracing_pipe_buf_ops,
4381                 .spd_release    = tracing_spd_release_pipe,
4382         };
4383         struct trace_array *tr = iter->tr;
4384         ssize_t ret;
4385         size_t rem;
4386         unsigned int i;
4387
4388         if (splice_grow_spd(pipe, &spd))
4389                 return -ENOMEM;
4390
4391         /* copy the tracer to avoid using a global lock all around */
4392         mutex_lock(&trace_types_lock);
4393         if (unlikely(iter->trace->name != tr->current_trace->name))
4394                 *iter->trace = *tr->current_trace;
4395         mutex_unlock(&trace_types_lock);
4396
4397         mutex_lock(&iter->mutex);
4398
4399         if (iter->trace->splice_read) {
4400                 ret = iter->trace->splice_read(iter, filp,
4401                                                ppos, pipe, len, flags);
4402                 if (ret)
4403                         goto out_err;
4404         }
4405
4406         ret = tracing_wait_pipe(filp);
4407         if (ret <= 0)
4408                 goto out_err;
4409
4410         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4411                 ret = -EFAULT;
4412                 goto out_err;
4413         }
4414
4415         trace_event_read_lock();
4416         trace_access_lock(iter->cpu_file);
4417
4418         /* Fill as many pages as possible. */
4419         for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
4420                 spd.pages[i] = alloc_page(GFP_KERNEL);
4421                 if (!spd.pages[i])
4422                         break;
4423
4424                 rem = tracing_fill_pipe_page(rem, iter);
4425
4426                 /* Copy the data into the page, so we can start over. */
4427                 ret = trace_seq_to_buffer(&iter->seq,
4428                                           page_address(spd.pages[i]),
4429                                           iter->seq.len);
4430                 if (ret < 0) {
4431                         __free_page(spd.pages[i]);
4432                         break;
4433                 }
4434                 spd.partial[i].offset = 0;
4435                 spd.partial[i].len = iter->seq.len;
4436
4437                 trace_seq_init(&iter->seq);
4438         }
4439
4440         trace_access_unlock(iter->cpu_file);
4441         trace_event_read_unlock();
4442         mutex_unlock(&iter->mutex);
4443
4444         spd.nr_pages = i;
4445
4446         ret = splice_to_pipe(pipe, &spd);
4447 out:
4448         splice_shrink_spd(&spd);
4449         return ret;
4450
4451 out_err:
4452         mutex_unlock(&iter->mutex);
4453         goto out;
4454 }
4455
4456 static ssize_t
4457 tracing_entries_read(struct file *filp, char __user *ubuf,
4458                      size_t cnt, loff_t *ppos)
4459 {
4460         struct inode *inode = file_inode(filp);
4461         struct trace_array *tr = inode->i_private;
4462         int cpu = tracing_get_cpu(inode);
4463         char buf[64];
4464         int r = 0;
4465         ssize_t ret;
4466
4467         mutex_lock(&trace_types_lock);
4468
4469         if (cpu == RING_BUFFER_ALL_CPUS) {
4470                 int cpu, buf_size_same;
4471                 unsigned long size;
4472
4473                 size = 0;
4474                 buf_size_same = 1;
4475                 /* check if all cpu sizes are same */
4476                 for_each_tracing_cpu(cpu) {
4477                         /* fill in the size from first enabled cpu */
4478                         if (size == 0)
4479                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4480                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4481                                 buf_size_same = 0;
4482                                 break;
4483                         }
4484                 }
4485
4486                 if (buf_size_same) {
4487                         if (!ring_buffer_expanded)
4488                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4489                                             size >> 10,
4490                                             trace_buf_size >> 10);
4491                         else
4492                                 r = sprintf(buf, "%lu\n", size >> 10);
4493                 } else
4494                         r = sprintf(buf, "X\n");
4495         } else
4496                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4497
4498         mutex_unlock(&trace_types_lock);
4499
4500         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4501         return ret;
4502 }
4503
4504 static ssize_t
4505 tracing_entries_write(struct file *filp, const char __user *ubuf,
4506                       size_t cnt, loff_t *ppos)
4507 {
4508         struct inode *inode = file_inode(filp);
4509         struct trace_array *tr = inode->i_private;
4510         unsigned long val;
4511         int ret;
4512
4513         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4514         if (ret)
4515                 return ret;
4516
4517         /* must have at least 1 entry */
4518         if (!val)
4519                 return -EINVAL;
4520
4521         /* value is in KB */
4522         val <<= 10;
4523         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4524         if (ret < 0)
4525                 return ret;
4526
4527         *ppos += cnt;
4528
4529         return cnt;
4530 }
4531
4532 static ssize_t
4533 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4534                                 size_t cnt, loff_t *ppos)
4535 {
4536         struct trace_array *tr = filp->private_data;
4537         char buf[64];
4538         int r, cpu;
4539         unsigned long size = 0, expanded_size = 0;
4540
4541         mutex_lock(&trace_types_lock);
4542         for_each_tracing_cpu(cpu) {
4543                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4544                 if (!ring_buffer_expanded)
4545                         expanded_size += trace_buf_size >> 10;
4546         }
4547         if (ring_buffer_expanded)
4548                 r = sprintf(buf, "%lu\n", size);
4549         else
4550                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4551         mutex_unlock(&trace_types_lock);
4552
4553         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4554 }
4555
4556 static ssize_t
4557 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4558                           size_t cnt, loff_t *ppos)
4559 {
4560         /*
4561          * There is no need to read what the user has written, this function
4562          * is just to make sure that there is no error when "echo" is used
4563          */
4564
4565         *ppos += cnt;
4566
4567         return cnt;
4568 }
4569
4570 static int
4571 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4572 {
4573         struct trace_array *tr = inode->i_private;
4574
4575         /* disable tracing ? */
4576         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4577                 tracer_tracing_off(tr);
4578         /* resize the ring buffer to 0 */
4579         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4580
4581         trace_array_put(tr);
4582
4583         return 0;
4584 }
4585
4586 static ssize_t
4587 tracing_mark_write(struct file *filp, const char __user *ubuf,
4588                                         size_t cnt, loff_t *fpos)
4589 {
4590         unsigned long addr = (unsigned long)ubuf;
4591         struct trace_array *tr = filp->private_data;
4592         struct ring_buffer_event *event;
4593         struct ring_buffer *buffer;
4594         struct print_entry *entry;
4595         unsigned long irq_flags;
4596         struct page *pages[2];
4597         void *map_page[2];
4598         int nr_pages = 1;
4599         ssize_t written;
4600         int offset;
4601         int size;
4602         int len;
4603         int ret;
4604         int i;
4605
4606         if (tracing_disabled)
4607                 return -EINVAL;
4608
4609         if (!(trace_flags & TRACE_ITER_MARKERS))
4610                 return -EINVAL;
4611
4612         if (cnt > TRACE_BUF_SIZE)
4613                 cnt = TRACE_BUF_SIZE;
4614
4615         /*
4616          * Userspace is injecting traces into the kernel trace buffer.
4617          * We want to be as non intrusive as possible.
4618          * To do so, we do not want to allocate any special buffers
4619          * or take any locks, but instead write the userspace data
4620          * straight into the ring buffer.
4621          *
4622          * First we need to pin the userspace buffer into memory,
4623          * which, most likely it is, because it just referenced it.
4624          * But there's no guarantee that it is. By using get_user_pages_fast()
4625          * and kmap_atomic/kunmap_atomic() we can get access to the
4626          * pages directly. We then write the data directly into the
4627          * ring buffer.
4628          */
4629         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4630
4631         /* check if we cross pages */
4632         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4633                 nr_pages = 2;
4634
4635         offset = addr & (PAGE_SIZE - 1);
4636         addr &= PAGE_MASK;
4637
4638         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4639         if (ret < nr_pages) {
4640                 while (--ret >= 0)
4641                         put_page(pages[ret]);
4642                 written = -EFAULT;
4643                 goto out;
4644         }
4645
4646         for (i = 0; i < nr_pages; i++)
4647                 map_page[i] = kmap_atomic(pages[i]);
4648
4649         local_save_flags(irq_flags);
4650         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4651         buffer = tr->trace_buffer.buffer;
4652         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4653                                           irq_flags, preempt_count());
4654         if (!event) {
4655                 /* Ring buffer disabled, return as if not open for write */
4656                 written = -EBADF;
4657                 goto out_unlock;
4658         }
4659
4660         entry = ring_buffer_event_data(event);
4661         entry->ip = _THIS_IP_;
4662
4663         if (nr_pages == 2) {
4664                 len = PAGE_SIZE - offset;
4665                 memcpy(&entry->buf, map_page[0] + offset, len);
4666                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4667         } else
4668                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4669
4670         if (entry->buf[cnt - 1] != '\n') {
4671                 entry->buf[cnt] = '\n';
4672                 entry->buf[cnt + 1] = '\0';
4673         } else
4674                 entry->buf[cnt] = '\0';
4675
4676         __buffer_unlock_commit(buffer, event);
4677
4678         written = cnt;
4679
4680         *fpos += written;
4681
4682  out_unlock:
4683         for (i = 0; i < nr_pages; i++){
4684                 kunmap_atomic(map_page[i]);
4685                 put_page(pages[i]);
4686         }
4687  out:
4688         return written;
4689 }
4690
4691 static int tracing_clock_show(struct seq_file *m, void *v)
4692 {
4693         struct trace_array *tr = m->private;
4694         int i;
4695
4696         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4697                 seq_printf(m,
4698                         "%s%s%s%s", i ? " " : "",
4699                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4700                         i == tr->clock_id ? "]" : "");
4701         seq_putc(m, '\n');
4702
4703         return 0;
4704 }
4705
4706 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4707                                    size_t cnt, loff_t *fpos)
4708 {
4709         struct seq_file *m = filp->private_data;
4710         struct trace_array *tr = m->private;
4711         char buf[64];
4712         const char *clockstr;
4713         int i;
4714
4715         if (cnt >= sizeof(buf))
4716                 return -EINVAL;
4717
4718         if (copy_from_user(&buf, ubuf, cnt))
4719                 return -EFAULT;
4720
4721         buf[cnt] = 0;
4722
4723         clockstr = strstrip(buf);
4724
4725         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4726                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4727                         break;
4728         }
4729         if (i == ARRAY_SIZE(trace_clocks))
4730                 return -EINVAL;
4731
4732         mutex_lock(&trace_types_lock);
4733
4734         tr->clock_id = i;
4735
4736         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4737
4738         /*
4739          * New clock may not be consistent with the previous clock.
4740          * Reset the buffer so that it doesn't have incomparable timestamps.
4741          */
4742         tracing_reset_online_cpus(&tr->trace_buffer);
4743
4744 #ifdef CONFIG_TRACER_MAX_TRACE
4745         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4746                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4747         tracing_reset_online_cpus(&tr->max_buffer);
4748 #endif
4749
4750         mutex_unlock(&trace_types_lock);
4751
4752         *fpos += cnt;
4753
4754         return cnt;
4755 }
4756
4757 static int tracing_clock_open(struct inode *inode, struct file *file)
4758 {
4759         struct trace_array *tr = inode->i_private;
4760         int ret;
4761
4762         if (tracing_disabled)
4763                 return -ENODEV;
4764
4765         if (trace_array_get(tr))
4766                 return -ENODEV;
4767
4768         ret = single_open(file, tracing_clock_show, inode->i_private);
4769         if (ret < 0)
4770                 trace_array_put(tr);
4771
4772         return ret;
4773 }
4774
4775 struct ftrace_buffer_info {
4776         struct trace_iterator   iter;
4777         void                    *spare;
4778         unsigned int            read;
4779 };
4780
4781 #ifdef CONFIG_TRACER_SNAPSHOT
4782 static int tracing_snapshot_open(struct inode *inode, struct file *file)
4783 {
4784         struct trace_array *tr = inode->i_private;
4785         struct trace_iterator *iter;
4786         struct seq_file *m;
4787         int ret = 0;
4788
4789         if (trace_array_get(tr) < 0)
4790                 return -ENODEV;
4791
4792         if (file->f_mode & FMODE_READ) {
4793                 iter = __tracing_open(inode, file, true);
4794                 if (IS_ERR(iter))
4795                         ret = PTR_ERR(iter);
4796         } else {
4797                 /* Writes still need the seq_file to hold the private data */
4798                 ret = -ENOMEM;
4799                 m = kzalloc(sizeof(*m), GFP_KERNEL);
4800                 if (!m)
4801                         goto out;
4802                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4803                 if (!iter) {
4804                         kfree(m);
4805                         goto out;
4806                 }
4807                 ret = 0;
4808
4809                 iter->tr = tr;
4810                 iter->trace_buffer = &tr->max_buffer;
4811                 iter->cpu_file = tracing_get_cpu(inode);
4812                 m->private = iter;
4813                 file->private_data = m;
4814         }
4815 out:
4816         if (ret < 0)
4817                 trace_array_put(tr);
4818
4819         return ret;
4820 }
4821
4822 static ssize_t
4823 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
4824                        loff_t *ppos)
4825 {
4826         struct seq_file *m = filp->private_data;
4827         struct trace_iterator *iter = m->private;
4828         struct trace_array *tr = iter->tr;
4829         unsigned long val;
4830         int ret;
4831
4832         ret = tracing_update_buffers();
4833         if (ret < 0)
4834                 return ret;
4835
4836         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4837         if (ret)
4838                 return ret;
4839
4840         mutex_lock(&trace_types_lock);
4841
4842         if (tr->current_trace->use_max_tr) {
4843                 ret = -EBUSY;
4844                 goto out;
4845         }
4846
4847         switch (val) {
4848         case 0:
4849                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4850                         ret = -EINVAL;
4851                         break;
4852                 }
4853                 if (tr->allocated_snapshot)
4854                         free_snapshot(tr);
4855                 break;
4856         case 1:
4857 /* Only allow per-cpu swap if the ring buffer supports it */
4858 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
4859                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4860                         ret = -EINVAL;
4861                         break;
4862                 }
4863 #endif
4864                 if (!tr->allocated_snapshot) {
4865                         ret = alloc_snapshot(tr);
4866                         if (ret < 0)
4867                                 break;
4868                 }
4869                 local_irq_disable();
4870                 /* Now, we're going to swap */
4871                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4872                         update_max_tr(tr, current, smp_processor_id());
4873                 else
4874                         update_max_tr_single(tr, current, iter->cpu_file);
4875                 local_irq_enable();
4876                 break;
4877         default:
4878                 if (tr->allocated_snapshot) {
4879                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4880                                 tracing_reset_online_cpus(&tr->max_buffer);
4881                         else
4882                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
4883                 }
4884                 break;
4885         }
4886
4887         if (ret >= 0) {
4888                 *ppos += cnt;
4889                 ret = cnt;
4890         }
4891 out:
4892         mutex_unlock(&trace_types_lock);
4893         return ret;
4894 }
4895
4896 static int tracing_snapshot_release(struct inode *inode, struct file *file)
4897 {
4898         struct seq_file *m = file->private_data;
4899         int ret;
4900
4901         ret = tracing_release(inode, file);
4902
4903         if (file->f_mode & FMODE_READ)
4904                 return ret;
4905
4906         /* If write only, the seq_file is just a stub */
4907         if (m)
4908                 kfree(m->private);
4909         kfree(m);
4910
4911         return 0;
4912 }
4913
4914 static int tracing_buffers_open(struct inode *inode, struct file *filp);
4915 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
4916                                     size_t count, loff_t *ppos);
4917 static int tracing_buffers_release(struct inode *inode, struct file *file);
4918 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4919                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
4920
4921 static int snapshot_raw_open(struct inode *inode, struct file *filp)
4922 {
4923         struct ftrace_buffer_info *info;
4924         int ret;
4925
4926         ret = tracing_buffers_open(inode, filp);
4927         if (ret < 0)
4928                 return ret;
4929
4930         info = filp->private_data;
4931
4932         if (info->iter.trace->use_max_tr) {
4933                 tracing_buffers_release(inode, filp);
4934                 return -EBUSY;
4935         }
4936
4937         info->iter.snapshot = true;
4938         info->iter.trace_buffer = &info->iter.tr->max_buffer;
4939
4940         return ret;
4941 }
4942
4943 #endif /* CONFIG_TRACER_SNAPSHOT */
4944
4945
4946 static const struct file_operations tracing_max_lat_fops = {
4947         .open           = tracing_open_generic,
4948         .read           = tracing_max_lat_read,
4949         .write          = tracing_max_lat_write,
4950         .llseek         = generic_file_llseek,
4951 };
4952
4953 static const struct file_operations set_tracer_fops = {
4954         .open           = tracing_open_generic,
4955         .read           = tracing_set_trace_read,
4956         .write          = tracing_set_trace_write,
4957         .llseek         = generic_file_llseek,
4958 };
4959
4960 static const struct file_operations tracing_pipe_fops = {
4961         .open           = tracing_open_pipe,
4962         .poll           = tracing_poll_pipe,
4963         .read           = tracing_read_pipe,
4964         .splice_read    = tracing_splice_read_pipe,
4965         .release        = tracing_release_pipe,
4966         .llseek         = no_llseek,
4967 };
4968
4969 static const struct file_operations tracing_entries_fops = {
4970         .open           = tracing_open_generic_tr,
4971         .read           = tracing_entries_read,
4972         .write          = tracing_entries_write,
4973         .llseek         = generic_file_llseek,
4974         .release        = tracing_release_generic_tr,
4975 };
4976
4977 static const struct file_operations tracing_total_entries_fops = {
4978         .open           = tracing_open_generic_tr,
4979         .read           = tracing_total_entries_read,
4980         .llseek         = generic_file_llseek,
4981         .release        = tracing_release_generic_tr,
4982 };
4983
4984 static const struct file_operations tracing_free_buffer_fops = {
4985         .open           = tracing_open_generic_tr,
4986         .write          = tracing_free_buffer_write,
4987         .release        = tracing_free_buffer_release,
4988 };
4989
4990 static const struct file_operations tracing_mark_fops = {
4991         .open           = tracing_open_generic_tr,
4992         .write          = tracing_mark_write,
4993         .llseek         = generic_file_llseek,
4994         .release        = tracing_release_generic_tr,
4995 };
4996
4997 static const struct file_operations trace_clock_fops = {
4998         .open           = tracing_clock_open,
4999         .read           = seq_read,
5000         .llseek         = seq_lseek,
5001         .release        = tracing_single_release_tr,
5002         .write          = tracing_clock_write,
5003 };
5004
5005 #ifdef CONFIG_TRACER_SNAPSHOT
5006 static const struct file_operations snapshot_fops = {
5007         .open           = tracing_snapshot_open,
5008         .read           = seq_read,
5009         .write          = tracing_snapshot_write,
5010         .llseek         = tracing_lseek,
5011         .release        = tracing_snapshot_release,
5012 };
5013
5014 static const struct file_operations snapshot_raw_fops = {
5015         .open           = snapshot_raw_open,
5016         .read           = tracing_buffers_read,
5017         .release        = tracing_buffers_release,
5018         .splice_read    = tracing_buffers_splice_read,
5019         .llseek         = no_llseek,
5020 };
5021
5022 #endif /* CONFIG_TRACER_SNAPSHOT */
5023
5024 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5025 {
5026         struct trace_array *tr = inode->i_private;
5027         struct ftrace_buffer_info *info;
5028         int ret;
5029
5030         if (tracing_disabled)
5031                 return -ENODEV;
5032
5033         if (trace_array_get(tr) < 0)
5034                 return -ENODEV;
5035
5036         info = kzalloc(sizeof(*info), GFP_KERNEL);
5037         if (!info) {
5038                 trace_array_put(tr);
5039                 return -ENOMEM;
5040         }
5041
5042         mutex_lock(&trace_types_lock);
5043
5044         info->iter.tr           = tr;
5045         info->iter.cpu_file     = tracing_get_cpu(inode);
5046         info->iter.trace        = tr->current_trace;
5047         info->iter.trace_buffer = &tr->trace_buffer;
5048         info->spare             = NULL;
5049         /* Force reading ring buffer for first read */
5050         info->read              = (unsigned int)-1;
5051
5052         filp->private_data = info;
5053
5054         mutex_unlock(&trace_types_lock);
5055
5056         ret = nonseekable_open(inode, filp);
5057         if (ret < 0)
5058                 trace_array_put(tr);
5059
5060         return ret;
5061 }
5062
5063 static unsigned int
5064 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5065 {
5066         struct ftrace_buffer_info *info = filp->private_data;
5067         struct trace_iterator *iter = &info->iter;
5068
5069         return trace_poll(iter, filp, poll_table);
5070 }
5071
5072 static ssize_t
5073 tracing_buffers_read(struct file *filp, char __user *ubuf,
5074                      size_t count, loff_t *ppos)
5075 {
5076         struct ftrace_buffer_info *info = filp->private_data;
5077         struct trace_iterator *iter = &info->iter;
5078         ssize_t ret;
5079         ssize_t size;
5080
5081         if (!count)
5082                 return 0;
5083
5084         mutex_lock(&trace_types_lock);
5085
5086 #ifdef CONFIG_TRACER_MAX_TRACE
5087         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5088                 size = -EBUSY;
5089                 goto out_unlock;
5090         }
5091 #endif
5092
5093         if (!info->spare)
5094                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5095                                                           iter->cpu_file);
5096         size = -ENOMEM;
5097         if (!info->spare)
5098                 goto out_unlock;
5099
5100         /* Do we have previous read data to read? */
5101         if (info->read < PAGE_SIZE)
5102                 goto read;
5103
5104  again:
5105         trace_access_lock(iter->cpu_file);
5106         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5107                                     &info->spare,
5108                                     count,
5109                                     iter->cpu_file, 0);
5110         trace_access_unlock(iter->cpu_file);
5111
5112         if (ret < 0) {
5113                 if (trace_empty(iter)) {
5114                         if ((filp->f_flags & O_NONBLOCK)) {
5115                                 size = -EAGAIN;
5116                                 goto out_unlock;
5117                         }
5118                         mutex_unlock(&trace_types_lock);
5119                         ret = iter->trace->wait_pipe(iter);
5120                         mutex_lock(&trace_types_lock);
5121                         if (ret) {
5122                                 size = ret;
5123                                 goto out_unlock;
5124                         }
5125                         if (signal_pending(current)) {
5126                                 size = -EINTR;
5127                                 goto out_unlock;
5128                         }
5129                         goto again;
5130                 }
5131                 size = 0;
5132                 goto out_unlock;
5133         }
5134
5135         info->read = 0;
5136  read:
5137         size = PAGE_SIZE - info->read;
5138         if (size > count)
5139                 size = count;
5140
5141         ret = copy_to_user(ubuf, info->spare + info->read, size);
5142         if (ret == size) {
5143                 size = -EFAULT;
5144                 goto out_unlock;
5145         }
5146         size -= ret;
5147
5148         *ppos += size;
5149         info->read += size;
5150
5151  out_unlock:
5152         mutex_unlock(&trace_types_lock);
5153
5154         return size;
5155 }
5156
5157 static int tracing_buffers_release(struct inode *inode, struct file *file)
5158 {
5159         struct ftrace_buffer_info *info = file->private_data;
5160         struct trace_iterator *iter = &info->iter;
5161
5162         mutex_lock(&trace_types_lock);
5163
5164         __trace_array_put(iter->tr);
5165
5166         if (info->spare)
5167                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5168         kfree(info);
5169
5170         mutex_unlock(&trace_types_lock);
5171
5172         return 0;
5173 }
5174
5175 struct buffer_ref {
5176         struct ring_buffer      *buffer;
5177         void                    *page;
5178         int                     ref;
5179 };
5180
5181 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5182                                     struct pipe_buffer *buf)
5183 {
5184         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5185
5186         if (--ref->ref)
5187                 return;
5188
5189         ring_buffer_free_read_page(ref->buffer, ref->page);
5190         kfree(ref);
5191         buf->private = 0;
5192 }
5193
5194 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5195                                 struct pipe_buffer *buf)
5196 {
5197         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5198
5199         ref->ref++;
5200 }
5201
5202 /* Pipe buffer operations for a buffer. */
5203 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5204         .can_merge              = 0,
5205         .map                    = generic_pipe_buf_map,
5206         .unmap                  = generic_pipe_buf_unmap,
5207         .confirm                = generic_pipe_buf_confirm,
5208         .release                = buffer_pipe_buf_release,
5209         .steal                  = generic_pipe_buf_steal,
5210         .get                    = buffer_pipe_buf_get,
5211 };
5212
5213 /*
5214  * Callback from splice_to_pipe(), if we need to release some pages
5215  * at the end of the spd in case we error'ed out in filling the pipe.
5216  */
5217 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5218 {
5219         struct buffer_ref *ref =
5220                 (struct buffer_ref *)spd->partial[i].private;
5221
5222         if (--ref->ref)
5223                 return;
5224
5225         ring_buffer_free_read_page(ref->buffer, ref->page);
5226         kfree(ref);
5227         spd->partial[i].private = 0;
5228 }
5229
5230 static ssize_t
5231 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5232                             struct pipe_inode_info *pipe, size_t len,
5233                             unsigned int flags)
5234 {
5235         struct ftrace_buffer_info *info = file->private_data;
5236         struct trace_iterator *iter = &info->iter;
5237         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5238         struct page *pages_def[PIPE_DEF_BUFFERS];
5239         struct splice_pipe_desc spd = {
5240                 .pages          = pages_def,
5241                 .partial        = partial_def,
5242                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5243                 .flags          = flags,
5244                 .ops            = &buffer_pipe_buf_ops,
5245                 .spd_release    = buffer_spd_release,
5246         };
5247         struct buffer_ref *ref;
5248         int entries, size, i;
5249         ssize_t ret;
5250
5251         mutex_lock(&trace_types_lock);
5252
5253 #ifdef CONFIG_TRACER_MAX_TRACE
5254         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5255                 ret = -EBUSY;
5256                 goto out;
5257         }
5258 #endif
5259
5260         if (splice_grow_spd(pipe, &spd)) {
5261                 ret = -ENOMEM;
5262                 goto out;
5263         }
5264
5265         if (*ppos & (PAGE_SIZE - 1)) {
5266                 ret = -EINVAL;
5267                 goto out;
5268         }
5269
5270         if (len & (PAGE_SIZE - 1)) {
5271                 if (len < PAGE_SIZE) {
5272                         ret = -EINVAL;
5273                         goto out;
5274                 }
5275                 len &= PAGE_MASK;
5276         }
5277
5278  again:
5279         trace_access_lock(iter->cpu_file);
5280         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5281
5282         for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
5283                 struct page *page;
5284                 int r;
5285
5286                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5287                 if (!ref)
5288                         break;
5289
5290                 ref->ref = 1;
5291                 ref->buffer = iter->trace_buffer->buffer;
5292                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5293                 if (!ref->page) {
5294                         kfree(ref);
5295                         break;
5296                 }
5297
5298                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5299                                           len, iter->cpu_file, 1);
5300                 if (r < 0) {
5301                         ring_buffer_free_read_page(ref->buffer, ref->page);
5302                         kfree(ref);
5303                         break;
5304                 }
5305
5306                 /*
5307                  * zero out any left over data, this is going to
5308                  * user land.
5309                  */
5310                 size = ring_buffer_page_len(ref->page);
5311                 if (size < PAGE_SIZE)
5312                         memset(ref->page + size, 0, PAGE_SIZE - size);
5313
5314                 page = virt_to_page(ref->page);
5315
5316                 spd.pages[i] = page;
5317                 spd.partial[i].len = PAGE_SIZE;
5318                 spd.partial[i].offset = 0;
5319                 spd.partial[i].private = (unsigned long)ref;
5320                 spd.nr_pages++;
5321                 *ppos += PAGE_SIZE;
5322
5323                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5324         }
5325
5326         trace_access_unlock(iter->cpu_file);
5327         spd.nr_pages = i;
5328
5329         /* did we read anything? */
5330         if (!spd.nr_pages) {
5331                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5332                         ret = -EAGAIN;
5333                         goto out;
5334                 }
5335                 mutex_unlock(&trace_types_lock);
5336                 ret = iter->trace->wait_pipe(iter);
5337                 mutex_lock(&trace_types_lock);
5338                 if (ret)
5339                         goto out;
5340                 if (signal_pending(current)) {
5341                         ret = -EINTR;
5342                         goto out;
5343                 }
5344                 goto again;
5345         }
5346
5347         ret = splice_to_pipe(pipe, &spd);
5348         splice_shrink_spd(&spd);
5349 out:
5350         mutex_unlock(&trace_types_lock);
5351
5352         return ret;
5353 }
5354
5355 static const struct file_operations tracing_buffers_fops = {
5356         .open           = tracing_buffers_open,
5357         .read           = tracing_buffers_read,
5358         .poll           = tracing_buffers_poll,
5359         .release        = tracing_buffers_release,
5360         .splice_read    = tracing_buffers_splice_read,
5361         .llseek         = no_llseek,
5362 };
5363
5364 static ssize_t
5365 tracing_stats_read(struct file *filp, char __user *ubuf,
5366                    size_t count, loff_t *ppos)
5367 {
5368         struct inode *inode = file_inode(filp);
5369         struct trace_array *tr = inode->i_private;
5370         struct trace_buffer *trace_buf = &tr->trace_buffer;
5371         int cpu = tracing_get_cpu(inode);
5372         struct trace_seq *s;
5373         unsigned long cnt;
5374         unsigned long long t;
5375         unsigned long usec_rem;
5376
5377         s = kmalloc(sizeof(*s), GFP_KERNEL);
5378         if (!s)
5379                 return -ENOMEM;
5380
5381         trace_seq_init(s);
5382
5383         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5384         trace_seq_printf(s, "entries: %ld\n", cnt);
5385
5386         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5387         trace_seq_printf(s, "overrun: %ld\n", cnt);
5388
5389         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5390         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5391
5392         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5393         trace_seq_printf(s, "bytes: %ld\n", cnt);
5394
5395         if (trace_clocks[tr->clock_id].in_ns) {
5396                 /* local or global for trace_clock */
5397                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5398                 usec_rem = do_div(t, USEC_PER_SEC);
5399                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5400                                                                 t, usec_rem);
5401
5402                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5403                 usec_rem = do_div(t, USEC_PER_SEC);
5404                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5405         } else {
5406                 /* counter or tsc mode for trace_clock */
5407                 trace_seq_printf(s, "oldest event ts: %llu\n",
5408                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5409
5410                 trace_seq_printf(s, "now ts: %llu\n",
5411                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5412         }
5413
5414         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5415         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5416
5417         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5418         trace_seq_printf(s, "read events: %ld\n", cnt);
5419
5420         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5421
5422         kfree(s);
5423
5424         return count;
5425 }
5426
5427 static const struct file_operations tracing_stats_fops = {
5428         .open           = tracing_open_generic_tr,
5429         .read           = tracing_stats_read,
5430         .llseek         = generic_file_llseek,
5431         .release        = tracing_release_generic_tr,
5432 };
5433
5434 #ifdef CONFIG_DYNAMIC_FTRACE
5435
5436 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5437 {
5438         return 0;
5439 }
5440
5441 static ssize_t
5442 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5443                   size_t cnt, loff_t *ppos)
5444 {
5445         static char ftrace_dyn_info_buffer[1024];
5446         static DEFINE_MUTEX(dyn_info_mutex);
5447         unsigned long *p = filp->private_data;
5448         char *buf = ftrace_dyn_info_buffer;
5449         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5450         int r;
5451
5452         mutex_lock(&dyn_info_mutex);
5453         r = sprintf(buf, "%ld ", *p);
5454
5455         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5456         buf[r++] = '\n';
5457
5458         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5459
5460         mutex_unlock(&dyn_info_mutex);
5461
5462         return r;
5463 }
5464
5465 static const struct file_operations tracing_dyn_info_fops = {
5466         .open           = tracing_open_generic,
5467         .read           = tracing_read_dyn_info,
5468         .llseek         = generic_file_llseek,
5469 };
5470 #endif /* CONFIG_DYNAMIC_FTRACE */
5471
5472 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5473 static void
5474 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5475 {
5476         tracing_snapshot();
5477 }
5478
5479 static void
5480 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5481 {
5482         unsigned long *count = (long *)data;
5483
5484         if (!*count)
5485                 return;
5486
5487         if (*count != -1)
5488                 (*count)--;
5489
5490         tracing_snapshot();
5491 }
5492
5493 static int
5494 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5495                       struct ftrace_probe_ops *ops, void *data)
5496 {
5497         long count = (long)data;
5498
5499         seq_printf(m, "%ps:", (void *)ip);
5500
5501         seq_printf(m, "snapshot");
5502
5503         if (count == -1)
5504                 seq_printf(m, ":unlimited\n");
5505         else
5506                 seq_printf(m, ":count=%ld\n", count);
5507
5508         return 0;
5509 }
5510
5511 static struct ftrace_probe_ops snapshot_probe_ops = {
5512         .func                   = ftrace_snapshot,
5513         .print                  = ftrace_snapshot_print,
5514 };
5515
5516 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5517         .func                   = ftrace_count_snapshot,
5518         .print                  = ftrace_snapshot_print,
5519 };
5520
5521 static int
5522 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5523                                char *glob, char *cmd, char *param, int enable)
5524 {
5525         struct ftrace_probe_ops *ops;
5526         void *count = (void *)-1;
5527         char *number;
5528         int ret;
5529
5530         /* hash funcs only work with set_ftrace_filter */
5531         if (!enable)
5532                 return -EINVAL;
5533
5534         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5535
5536         if (glob[0] == '!') {
5537                 unregister_ftrace_function_probe_func(glob+1, ops);
5538                 return 0;
5539         }
5540
5541         if (!param)
5542                 goto out_reg;
5543
5544         number = strsep(&param, ":");
5545
5546         if (!strlen(number))
5547                 goto out_reg;
5548
5549         /*
5550          * We use the callback data field (which is a pointer)
5551          * as our counter.
5552          */
5553         ret = kstrtoul(number, 0, (unsigned long *)&count);
5554         if (ret)
5555                 return ret;
5556
5557  out_reg:
5558         ret = register_ftrace_function_probe(glob, ops, count);
5559
5560         if (ret >= 0)
5561                 alloc_snapshot(&global_trace);
5562
5563         return ret < 0 ? ret : 0;
5564 }
5565
5566 static struct ftrace_func_command ftrace_snapshot_cmd = {
5567         .name                   = "snapshot",
5568         .func                   = ftrace_trace_snapshot_callback,
5569 };
5570
5571 static __init int register_snapshot_cmd(void)
5572 {
5573         return register_ftrace_command(&ftrace_snapshot_cmd);
5574 }
5575 #else
5576 static inline __init int register_snapshot_cmd(void) { return 0; }
5577 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5578
5579 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5580 {
5581         if (tr->dir)
5582                 return tr->dir;
5583
5584         if (!debugfs_initialized())
5585                 return NULL;
5586
5587         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5588                 tr->dir = debugfs_create_dir("tracing", NULL);
5589
5590         if (!tr->dir)
5591                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5592
5593         return tr->dir;
5594 }
5595
5596 struct dentry *tracing_init_dentry(void)
5597 {
5598         return tracing_init_dentry_tr(&global_trace);
5599 }
5600
5601 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5602 {
5603         struct dentry *d_tracer;
5604
5605         if (tr->percpu_dir)
5606                 return tr->percpu_dir;
5607
5608         d_tracer = tracing_init_dentry_tr(tr);
5609         if (!d_tracer)
5610                 return NULL;
5611
5612         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5613
5614         WARN_ONCE(!tr->percpu_dir,
5615                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5616
5617         return tr->percpu_dir;
5618 }
5619
5620 static struct dentry *
5621 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5622                       void *data, long cpu, const struct file_operations *fops)
5623 {
5624         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5625
5626         if (ret) /* See tracing_get_cpu() */
5627                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5628         return ret;
5629 }
5630
5631 static void
5632 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5633 {
5634         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5635         struct dentry *d_cpu;
5636         char cpu_dir[30]; /* 30 characters should be more than enough */
5637
5638         if (!d_percpu)
5639                 return;
5640
5641         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5642         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5643         if (!d_cpu) {
5644                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5645                 return;
5646         }
5647
5648         /* per cpu trace_pipe */
5649         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5650                                 tr, cpu, &tracing_pipe_fops);
5651
5652         /* per cpu trace */
5653         trace_create_cpu_file("trace", 0644, d_cpu,
5654                                 tr, cpu, &tracing_fops);
5655
5656         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5657                                 tr, cpu, &tracing_buffers_fops);
5658
5659         trace_create_cpu_file("stats", 0444, d_cpu,
5660                                 tr, cpu, &tracing_stats_fops);
5661
5662         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5663                                 tr, cpu, &tracing_entries_fops);
5664
5665 #ifdef CONFIG_TRACER_SNAPSHOT
5666         trace_create_cpu_file("snapshot", 0644, d_cpu,
5667                                 tr, cpu, &snapshot_fops);
5668
5669         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5670                                 tr, cpu, &snapshot_raw_fops);
5671 #endif
5672 }
5673
5674 #ifdef CONFIG_FTRACE_SELFTEST
5675 /* Let selftest have access to static functions in this file */
5676 #include "trace_selftest.c"
5677 #endif
5678
5679 struct trace_option_dentry {
5680         struct tracer_opt               *opt;
5681         struct tracer_flags             *flags;
5682         struct trace_array              *tr;
5683         struct dentry                   *entry;
5684 };
5685
5686 static ssize_t
5687 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5688                         loff_t *ppos)
5689 {
5690         struct trace_option_dentry *topt = filp->private_data;
5691         char *buf;
5692
5693         if (topt->flags->val & topt->opt->bit)
5694                 buf = "1\n";
5695         else
5696                 buf = "0\n";
5697
5698         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5699 }
5700
5701 static ssize_t
5702 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5703                          loff_t *ppos)
5704 {
5705         struct trace_option_dentry *topt = filp->private_data;
5706         unsigned long val;
5707         int ret;
5708
5709         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5710         if (ret)
5711                 return ret;
5712
5713         if (val != 0 && val != 1)
5714                 return -EINVAL;
5715
5716         if (!!(topt->flags->val & topt->opt->bit) != val) {
5717                 mutex_lock(&trace_types_lock);
5718                 ret = __set_tracer_option(topt->tr->current_trace, topt->flags,
5719                                           topt->opt, !val);
5720                 mutex_unlock(&trace_types_lock);
5721                 if (ret)
5722                         return ret;
5723         }
5724
5725         *ppos += cnt;
5726
5727         return cnt;
5728 }
5729
5730
5731 static const struct file_operations trace_options_fops = {
5732         .open = tracing_open_generic,
5733         .read = trace_options_read,
5734         .write = trace_options_write,
5735         .llseek = generic_file_llseek,
5736 };
5737
5738 static ssize_t
5739 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5740                         loff_t *ppos)
5741 {
5742         long index = (long)filp->private_data;
5743         char *buf;
5744
5745         if (trace_flags & (1 << index))
5746                 buf = "1\n";
5747         else
5748                 buf = "0\n";
5749
5750         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5751 }
5752
5753 static ssize_t
5754 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5755                          loff_t *ppos)
5756 {
5757         struct trace_array *tr = &global_trace;
5758         long index = (long)filp->private_data;
5759         unsigned long val;
5760         int ret;
5761
5762         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5763         if (ret)
5764                 return ret;
5765
5766         if (val != 0 && val != 1)
5767                 return -EINVAL;
5768
5769         mutex_lock(&trace_types_lock);
5770         ret = set_tracer_flag(tr, 1 << index, val);
5771         mutex_unlock(&trace_types_lock);
5772
5773         if (ret < 0)
5774                 return ret;
5775
5776         *ppos += cnt;
5777
5778         return cnt;
5779 }
5780
5781 static const struct file_operations trace_options_core_fops = {
5782         .open = tracing_open_generic,
5783         .read = trace_options_core_read,
5784         .write = trace_options_core_write,
5785         .llseek = generic_file_llseek,
5786 };
5787
5788 struct dentry *trace_create_file(const char *name,
5789                                  umode_t mode,
5790                                  struct dentry *parent,
5791                                  void *data,
5792                                  const struct file_operations *fops)
5793 {
5794         struct dentry *ret;
5795
5796         ret = debugfs_create_file(name, mode, parent, data, fops);
5797         if (!ret)
5798                 pr_warning("Could not create debugfs '%s' entry\n", name);
5799
5800         return ret;
5801 }
5802
5803
5804 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
5805 {
5806         struct dentry *d_tracer;
5807
5808         if (tr->options)
5809                 return tr->options;
5810
5811         d_tracer = tracing_init_dentry_tr(tr);
5812         if (!d_tracer)
5813                 return NULL;
5814
5815         tr->options = debugfs_create_dir("options", d_tracer);
5816         if (!tr->options) {
5817                 pr_warning("Could not create debugfs directory 'options'\n");
5818                 return NULL;
5819         }
5820
5821         return tr->options;
5822 }
5823
5824 static void
5825 create_trace_option_file(struct trace_array *tr,
5826                          struct trace_option_dentry *topt,
5827                          struct tracer_flags *flags,
5828                          struct tracer_opt *opt)
5829 {
5830         struct dentry *t_options;
5831
5832         t_options = trace_options_init_dentry(tr);
5833         if (!t_options)
5834                 return;
5835
5836         topt->flags = flags;
5837         topt->opt = opt;
5838         topt->tr = tr;
5839
5840         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
5841                                     &trace_options_fops);
5842
5843 }
5844
5845 static struct trace_option_dentry *
5846 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
5847 {
5848         struct trace_option_dentry *topts;
5849         struct tracer_flags *flags;
5850         struct tracer_opt *opts;
5851         int cnt;
5852
5853         if (!tracer)
5854                 return NULL;
5855
5856         flags = tracer->flags;
5857
5858         if (!flags || !flags->opts)
5859                 return NULL;
5860
5861         opts = flags->opts;
5862
5863         for (cnt = 0; opts[cnt].name; cnt++)
5864                 ;
5865
5866         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
5867         if (!topts)
5868                 return NULL;
5869
5870         for (cnt = 0; opts[cnt].name; cnt++)
5871                 create_trace_option_file(tr, &topts[cnt], flags,
5872                                          &opts[cnt]);
5873
5874         return topts;
5875 }
5876
5877 static void
5878 destroy_trace_option_files(struct trace_option_dentry *topts)
5879 {
5880         int cnt;
5881
5882         if (!topts)
5883                 return;
5884
5885         for (cnt = 0; topts[cnt].opt; cnt++) {
5886                 if (topts[cnt].entry)
5887                         debugfs_remove(topts[cnt].entry);
5888         }
5889
5890         kfree(topts);
5891 }
5892
5893 static struct dentry *
5894 create_trace_option_core_file(struct trace_array *tr,
5895                               const char *option, long index)
5896 {
5897         struct dentry *t_options;
5898
5899         t_options = trace_options_init_dentry(tr);
5900         if (!t_options)
5901                 return NULL;
5902
5903         return trace_create_file(option, 0644, t_options, (void *)index,
5904                                     &trace_options_core_fops);
5905 }
5906
5907 static __init void create_trace_options_dir(struct trace_array *tr)
5908 {
5909         struct dentry *t_options;
5910         int i;
5911
5912         t_options = trace_options_init_dentry(tr);
5913         if (!t_options)
5914                 return;
5915
5916         for (i = 0; trace_options[i]; i++)
5917                 create_trace_option_core_file(tr, trace_options[i], i);
5918 }
5919
5920 static ssize_t
5921 rb_simple_read(struct file *filp, char __user *ubuf,
5922                size_t cnt, loff_t *ppos)
5923 {
5924         struct trace_array *tr = filp->private_data;
5925         char buf[64];
5926         int r;
5927
5928         r = tracer_tracing_is_on(tr);
5929         r = sprintf(buf, "%d\n", r);
5930
5931         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5932 }
5933
5934 static ssize_t
5935 rb_simple_write(struct file *filp, const char __user *ubuf,
5936                 size_t cnt, loff_t *ppos)
5937 {
5938         struct trace_array *tr = filp->private_data;
5939         struct ring_buffer *buffer = tr->trace_buffer.buffer;
5940         unsigned long val;
5941         int ret;
5942
5943         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5944         if (ret)
5945                 return ret;
5946
5947         if (buffer) {
5948                 mutex_lock(&trace_types_lock);
5949                 if (val) {
5950                         tracer_tracing_on(tr);
5951                         if (tr->current_trace->start)
5952                                 tr->current_trace->start(tr);
5953                 } else {
5954                         tracer_tracing_off(tr);
5955                         if (tr->current_trace->stop)
5956                                 tr->current_trace->stop(tr);
5957                 }
5958                 mutex_unlock(&trace_types_lock);
5959         }
5960
5961         (*ppos)++;
5962
5963         return cnt;
5964 }
5965
5966 static const struct file_operations rb_simple_fops = {
5967         .open           = tracing_open_generic_tr,
5968         .read           = rb_simple_read,
5969         .write          = rb_simple_write,
5970         .release        = tracing_release_generic_tr,
5971         .llseek         = default_llseek,
5972 };
5973
5974 struct dentry *trace_instance_dir;
5975
5976 static void
5977 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
5978
5979 static int
5980 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
5981 {
5982         enum ring_buffer_flags rb_flags;
5983
5984         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
5985
5986         buf->tr = tr;
5987
5988         buf->buffer = ring_buffer_alloc(size, rb_flags);
5989         if (!buf->buffer)
5990                 return -ENOMEM;
5991
5992         buf->data = alloc_percpu(struct trace_array_cpu);
5993         if (!buf->data) {
5994                 ring_buffer_free(buf->buffer);
5995                 return -ENOMEM;
5996         }
5997
5998         /* Allocate the first page for all buffers */
5999         set_buffer_entries(&tr->trace_buffer,
6000                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6001
6002         return 0;
6003 }
6004
6005 static int allocate_trace_buffers(struct trace_array *tr, int size)
6006 {
6007         int ret;
6008
6009         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6010         if (ret)
6011                 return ret;
6012
6013 #ifdef CONFIG_TRACER_MAX_TRACE
6014         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6015                                     allocate_snapshot ? size : 1);
6016         if (WARN_ON(ret)) {
6017                 ring_buffer_free(tr->trace_buffer.buffer);
6018                 free_percpu(tr->trace_buffer.data);
6019                 return -ENOMEM;
6020         }
6021         tr->allocated_snapshot = allocate_snapshot;
6022
6023         /*
6024          * Only the top level trace array gets its snapshot allocated
6025          * from the kernel command line.
6026          */
6027         allocate_snapshot = false;
6028 #endif
6029         return 0;
6030 }
6031
6032 static int new_instance_create(const char *name)
6033 {
6034         struct trace_array *tr;
6035         int ret;
6036
6037         mutex_lock(&trace_types_lock);
6038
6039         ret = -EEXIST;
6040         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6041                 if (tr->name && strcmp(tr->name, name) == 0)
6042                         goto out_unlock;
6043         }
6044
6045         ret = -ENOMEM;
6046         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6047         if (!tr)
6048                 goto out_unlock;
6049
6050         tr->name = kstrdup(name, GFP_KERNEL);
6051         if (!tr->name)
6052                 goto out_free_tr;
6053
6054         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6055                 goto out_free_tr;
6056
6057         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6058
6059         raw_spin_lock_init(&tr->start_lock);
6060
6061         tr->current_trace = &nop_trace;
6062
6063         INIT_LIST_HEAD(&tr->systems);
6064         INIT_LIST_HEAD(&tr->events);
6065
6066         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6067                 goto out_free_tr;
6068
6069         tr->dir = debugfs_create_dir(name, trace_instance_dir);
6070         if (!tr->dir)
6071                 goto out_free_tr;
6072
6073         ret = event_trace_add_tracer(tr->dir, tr);
6074         if (ret) {
6075                 debugfs_remove_recursive(tr->dir);
6076                 goto out_free_tr;
6077         }
6078
6079         init_tracer_debugfs(tr, tr->dir);
6080
6081         list_add(&tr->list, &ftrace_trace_arrays);
6082
6083         mutex_unlock(&trace_types_lock);
6084
6085         return 0;
6086
6087  out_free_tr:
6088         if (tr->trace_buffer.buffer)
6089                 ring_buffer_free(tr->trace_buffer.buffer);
6090         free_cpumask_var(tr->tracing_cpumask);
6091         kfree(tr->name);
6092         kfree(tr);
6093
6094  out_unlock:
6095         mutex_unlock(&trace_types_lock);
6096
6097         return ret;
6098
6099 }
6100
6101 static int instance_delete(const char *name)
6102 {
6103         struct trace_array *tr;
6104         int found = 0;
6105         int ret;
6106
6107         mutex_lock(&trace_types_lock);
6108
6109         ret = -ENODEV;
6110         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6111                 if (tr->name && strcmp(tr->name, name) == 0) {
6112                         found = 1;
6113                         break;
6114                 }
6115         }
6116         if (!found)
6117                 goto out_unlock;
6118
6119         ret = -EBUSY;
6120         if (tr->ref)
6121                 goto out_unlock;
6122
6123         list_del(&tr->list);
6124
6125         event_trace_del_tracer(tr);
6126         debugfs_remove_recursive(tr->dir);
6127         free_percpu(tr->trace_buffer.data);
6128         ring_buffer_free(tr->trace_buffer.buffer);
6129
6130         kfree(tr->name);
6131         kfree(tr);
6132
6133         ret = 0;
6134
6135  out_unlock:
6136         mutex_unlock(&trace_types_lock);
6137
6138         return ret;
6139 }
6140
6141 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6142 {
6143         struct dentry *parent;
6144         int ret;
6145
6146         /* Paranoid: Make sure the parent is the "instances" directory */
6147         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6148         if (WARN_ON_ONCE(parent != trace_instance_dir))
6149                 return -ENOENT;
6150
6151         /*
6152          * The inode mutex is locked, but debugfs_create_dir() will also
6153          * take the mutex. As the instances directory can not be destroyed
6154          * or changed in any other way, it is safe to unlock it, and
6155          * let the dentry try. If two users try to make the same dir at
6156          * the same time, then the new_instance_create() will determine the
6157          * winner.
6158          */
6159         mutex_unlock(&inode->i_mutex);
6160
6161         ret = new_instance_create(dentry->d_iname);
6162
6163         mutex_lock(&inode->i_mutex);
6164
6165         return ret;
6166 }
6167
6168 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6169 {
6170         struct dentry *parent;
6171         int ret;
6172
6173         /* Paranoid: Make sure the parent is the "instances" directory */
6174         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6175         if (WARN_ON_ONCE(parent != trace_instance_dir))
6176                 return -ENOENT;
6177
6178         /* The caller did a dget() on dentry */
6179         mutex_unlock(&dentry->d_inode->i_mutex);
6180
6181         /*
6182          * The inode mutex is locked, but debugfs_create_dir() will also
6183          * take the mutex. As the instances directory can not be destroyed
6184          * or changed in any other way, it is safe to unlock it, and
6185          * let the dentry try. If two users try to make the same dir at
6186          * the same time, then the instance_delete() will determine the
6187          * winner.
6188          */
6189         mutex_unlock(&inode->i_mutex);
6190
6191         ret = instance_delete(dentry->d_iname);
6192
6193         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6194         mutex_lock(&dentry->d_inode->i_mutex);
6195
6196         return ret;
6197 }
6198
6199 static const struct inode_operations instance_dir_inode_operations = {
6200         .lookup         = simple_lookup,
6201         .mkdir          = instance_mkdir,
6202         .rmdir          = instance_rmdir,
6203 };
6204
6205 static __init void create_trace_instances(struct dentry *d_tracer)
6206 {
6207         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6208         if (WARN_ON(!trace_instance_dir))
6209                 return;
6210
6211         /* Hijack the dir inode operations, to allow mkdir */
6212         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6213 }
6214
6215 static void
6216 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6217 {
6218         int cpu;
6219
6220         trace_create_file("tracing_cpumask", 0644, d_tracer,
6221                           tr, &tracing_cpumask_fops);
6222
6223         trace_create_file("trace_options", 0644, d_tracer,
6224                           tr, &tracing_iter_fops);
6225
6226         trace_create_file("trace", 0644, d_tracer,
6227                           tr, &tracing_fops);
6228
6229         trace_create_file("trace_pipe", 0444, d_tracer,
6230                           tr, &tracing_pipe_fops);
6231
6232         trace_create_file("buffer_size_kb", 0644, d_tracer,
6233                           tr, &tracing_entries_fops);
6234
6235         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6236                           tr, &tracing_total_entries_fops);
6237
6238         trace_create_file("free_buffer", 0200, d_tracer,
6239                           tr, &tracing_free_buffer_fops);
6240
6241         trace_create_file("trace_marker", 0220, d_tracer,
6242                           tr, &tracing_mark_fops);
6243
6244         trace_create_file("trace_clock", 0644, d_tracer, tr,
6245                           &trace_clock_fops);
6246
6247         trace_create_file("tracing_on", 0644, d_tracer,
6248                           tr, &rb_simple_fops);
6249
6250 #ifdef CONFIG_TRACER_SNAPSHOT
6251         trace_create_file("snapshot", 0644, d_tracer,
6252                           tr, &snapshot_fops);
6253 #endif
6254
6255         for_each_tracing_cpu(cpu)
6256                 tracing_init_debugfs_percpu(tr, cpu);
6257
6258 }
6259
6260 static __init int tracer_init_debugfs(void)
6261 {
6262         struct dentry *d_tracer;
6263
6264         trace_access_lock_init();
6265
6266         d_tracer = tracing_init_dentry();
6267         if (!d_tracer)
6268                 return 0;
6269
6270         init_tracer_debugfs(&global_trace, d_tracer);
6271
6272         trace_create_file("available_tracers", 0444, d_tracer,
6273                         &global_trace, &show_traces_fops);
6274
6275         trace_create_file("current_tracer", 0644, d_tracer,
6276                         &global_trace, &set_tracer_fops);
6277
6278 #ifdef CONFIG_TRACER_MAX_TRACE
6279         trace_create_file("tracing_max_latency", 0644, d_tracer,
6280                         &tracing_max_latency, &tracing_max_lat_fops);
6281 #endif
6282
6283         trace_create_file("tracing_thresh", 0644, d_tracer,
6284                         &tracing_thresh, &tracing_max_lat_fops);
6285
6286         trace_create_file("README", 0444, d_tracer,
6287                         NULL, &tracing_readme_fops);
6288
6289         trace_create_file("saved_cmdlines", 0444, d_tracer,
6290                         NULL, &tracing_saved_cmdlines_fops);
6291
6292 #ifdef CONFIG_DYNAMIC_FTRACE
6293         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6294                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6295 #endif
6296
6297         create_trace_instances(d_tracer);
6298
6299         create_trace_options_dir(&global_trace);
6300
6301         return 0;
6302 }
6303
6304 static int trace_panic_handler(struct notifier_block *this,
6305                                unsigned long event, void *unused)
6306 {
6307         if (ftrace_dump_on_oops)
6308                 ftrace_dump(ftrace_dump_on_oops);
6309         return NOTIFY_OK;
6310 }
6311
6312 static struct notifier_block trace_panic_notifier = {
6313         .notifier_call  = trace_panic_handler,
6314         .next           = NULL,
6315         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6316 };
6317
6318 static int trace_die_handler(struct notifier_block *self,
6319                              unsigned long val,
6320                              void *data)
6321 {
6322         switch (val) {
6323         case DIE_OOPS:
6324                 if (ftrace_dump_on_oops)
6325                         ftrace_dump(ftrace_dump_on_oops);
6326                 break;
6327         default:
6328                 break;
6329         }
6330         return NOTIFY_OK;
6331 }
6332
6333 static struct notifier_block trace_die_notifier = {
6334         .notifier_call = trace_die_handler,
6335         .priority = 200
6336 };
6337
6338 /*
6339  * printk is set to max of 1024, we really don't need it that big.
6340  * Nothing should be printing 1000 characters anyway.
6341  */
6342 #define TRACE_MAX_PRINT         1000
6343
6344 /*
6345  * Define here KERN_TRACE so that we have one place to modify
6346  * it if we decide to change what log level the ftrace dump
6347  * should be at.
6348  */
6349 #define KERN_TRACE              KERN_EMERG
6350
6351 void
6352 trace_printk_seq(struct trace_seq *s)
6353 {
6354         /* Probably should print a warning here. */
6355         if (s->len >= TRACE_MAX_PRINT)
6356                 s->len = TRACE_MAX_PRINT;
6357
6358         /* should be zero ended, but we are paranoid. */
6359         s->buffer[s->len] = 0;
6360
6361         printk(KERN_TRACE "%s", s->buffer);
6362
6363         trace_seq_init(s);
6364 }
6365
6366 void trace_init_global_iter(struct trace_iterator *iter)
6367 {
6368         iter->tr = &global_trace;
6369         iter->trace = iter->tr->current_trace;
6370         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6371         iter->trace_buffer = &global_trace.trace_buffer;
6372
6373         if (iter->trace && iter->trace->open)
6374                 iter->trace->open(iter);
6375
6376         /* Annotate start of buffers if we had overruns */
6377         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6378                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6379
6380         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6381         if (trace_clocks[iter->tr->clock_id].in_ns)
6382                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6383 }
6384
6385 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6386 {
6387         /* use static because iter can be a bit big for the stack */
6388         static struct trace_iterator iter;
6389         static atomic_t dump_running;
6390         unsigned int old_userobj;
6391         unsigned long flags;
6392         int cnt = 0, cpu;
6393
6394         /* Only allow one dump user at a time. */
6395         if (atomic_inc_return(&dump_running) != 1) {
6396                 atomic_dec(&dump_running);
6397                 return;
6398         }
6399
6400         /*
6401          * Always turn off tracing when we dump.
6402          * We don't need to show trace output of what happens
6403          * between multiple crashes.
6404          *
6405          * If the user does a sysrq-z, then they can re-enable
6406          * tracing with echo 1 > tracing_on.
6407          */
6408         tracing_off();
6409
6410         local_irq_save(flags);
6411
6412         /* Simulate the iterator */
6413         trace_init_global_iter(&iter);
6414
6415         for_each_tracing_cpu(cpu) {
6416                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6417         }
6418
6419         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6420
6421         /* don't look at user memory in panic mode */
6422         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6423
6424         switch (oops_dump_mode) {
6425         case DUMP_ALL:
6426                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6427                 break;
6428         case DUMP_ORIG:
6429                 iter.cpu_file = raw_smp_processor_id();
6430                 break;
6431         case DUMP_NONE:
6432                 goto out_enable;
6433         default:
6434                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6435                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6436         }
6437
6438         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6439
6440         /* Did function tracer already get disabled? */
6441         if (ftrace_is_dead()) {
6442                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6443                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6444         }
6445
6446         /*
6447          * We need to stop all tracing on all CPUS to read the
6448          * the next buffer. This is a bit expensive, but is
6449          * not done often. We fill all what we can read,
6450          * and then release the locks again.
6451          */
6452
6453         while (!trace_empty(&iter)) {
6454
6455                 if (!cnt)
6456                         printk(KERN_TRACE "---------------------------------\n");
6457
6458                 cnt++;
6459
6460                 /* reset all but tr, trace, and overruns */
6461                 memset(&iter.seq, 0,
6462                        sizeof(struct trace_iterator) -
6463                        offsetof(struct trace_iterator, seq));
6464                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6465                 iter.pos = -1;
6466
6467                 if (trace_find_next_entry_inc(&iter) != NULL) {
6468                         int ret;
6469
6470                         ret = print_trace_line(&iter);
6471                         if (ret != TRACE_TYPE_NO_CONSUME)
6472                                 trace_consume(&iter);
6473                 }
6474                 touch_nmi_watchdog();
6475
6476                 trace_printk_seq(&iter.seq);
6477         }
6478
6479         if (!cnt)
6480                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6481         else
6482                 printk(KERN_TRACE "---------------------------------\n");
6483
6484  out_enable:
6485         trace_flags |= old_userobj;
6486
6487         for_each_tracing_cpu(cpu) {
6488                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6489         }
6490         atomic_dec(&dump_running);
6491         local_irq_restore(flags);
6492 }
6493 EXPORT_SYMBOL_GPL(ftrace_dump);
6494
6495 __init static int tracer_alloc_buffers(void)
6496 {
6497         int ring_buf_size;
6498         int ret = -ENOMEM;
6499
6500
6501         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6502                 goto out;
6503
6504         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6505                 goto out_free_buffer_mask;
6506
6507         /* Only allocate trace_printk buffers if a trace_printk exists */
6508         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6509                 /* Must be called before global_trace.buffer is allocated */
6510                 trace_printk_init_buffers();
6511
6512         /* To save memory, keep the ring buffer size to its minimum */
6513         if (ring_buffer_expanded)
6514                 ring_buf_size = trace_buf_size;
6515         else
6516                 ring_buf_size = 1;
6517
6518         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6519         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6520
6521         raw_spin_lock_init(&global_trace.start_lock);
6522
6523         /* Used for event triggers */
6524         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
6525         if (!temp_buffer)
6526                 goto out_free_cpumask;
6527
6528         /* TODO: make the number of buffers hot pluggable with CPUS */
6529         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6530                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6531                 WARN_ON(1);
6532                 goto out_free_temp_buffer;
6533         }
6534
6535         if (global_trace.buffer_disabled)
6536                 tracing_off();
6537
6538         trace_init_cmdlines();
6539
6540         /*
6541          * register_tracer() might reference current_trace, so it
6542          * needs to be set before we register anything. This is
6543          * just a bootstrap of current_trace anyway.
6544          */
6545         global_trace.current_trace = &nop_trace;
6546
6547         register_tracer(&nop_trace);
6548
6549         /* All seems OK, enable tracing */
6550         tracing_disabled = 0;
6551
6552         atomic_notifier_chain_register(&panic_notifier_list,
6553                                        &trace_panic_notifier);
6554
6555         register_die_notifier(&trace_die_notifier);
6556
6557         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6558
6559         INIT_LIST_HEAD(&global_trace.systems);
6560         INIT_LIST_HEAD(&global_trace.events);
6561         list_add(&global_trace.list, &ftrace_trace_arrays);
6562
6563         while (trace_boot_options) {
6564                 char *option;
6565
6566                 option = strsep(&trace_boot_options, ",");
6567                 trace_set_options(&global_trace, option);
6568         }
6569
6570         register_snapshot_cmd();
6571
6572         return 0;
6573
6574 out_free_temp_buffer:
6575         ring_buffer_free(temp_buffer);
6576 out_free_cpumask:
6577         free_percpu(global_trace.trace_buffer.data);
6578 #ifdef CONFIG_TRACER_MAX_TRACE
6579         free_percpu(global_trace.max_buffer.data);
6580 #endif
6581         free_cpumask_var(global_trace.tracing_cpumask);
6582 out_free_buffer_mask:
6583         free_cpumask_var(tracing_buffer_mask);
6584 out:
6585         return ret;
6586 }
6587
6588 __init static int clear_boot_tracer(void)
6589 {
6590         /*
6591          * The default tracer at boot buffer is an init section.
6592          * This function is called in lateinit. If we did not
6593          * find the boot tracer, then clear it out, to prevent
6594          * later registration from accessing the buffer that is
6595          * about to be freed.
6596          */
6597         if (!default_bootup_tracer)
6598                 return 0;
6599
6600         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6601                default_bootup_tracer);
6602         default_bootup_tracer = NULL;
6603
6604         return 0;
6605 }
6606
6607 early_initcall(tracer_alloc_buffers);
6608 fs_initcall(tracer_init_debugfs);
6609 late_initcall(clear_boot_tracer);