mm, pcp: allow restoring percpu_pagelist_fraction default
[platform/adaptation/renesas_rcar/renesas_kernel.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int dummy_set_flag(u32 old_flags, u32 bit, int set)
77 {
78         return 0;
79 }
80
81 /*
82  * To prevent the comm cache from being overwritten when no
83  * tracing is active, only save the comm when a trace event
84  * occurred.
85  */
86 static DEFINE_PER_CPU(bool, trace_cmdline_save);
87
88 /*
89  * Kill all tracing for good (never come back).
90  * It is initialized to 1 but will turn to zero if the initialization
91  * of the tracer is successful. But that is the only place that sets
92  * this back to zero.
93  */
94 static int tracing_disabled = 1;
95
96 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 static int tracing_set_tracer(const char *buf);
122
123 #define MAX_TRACER_SIZE         100
124 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
125 static char *default_bootup_tracer;
126
127 static bool allocate_snapshot;
128
129 static int __init set_cmdline_ftrace(char *str)
130 {
131         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
132         default_bootup_tracer = bootup_tracer_buf;
133         /* We are using ftrace early, expand it */
134         ring_buffer_expanded = true;
135         return 1;
136 }
137 __setup("ftrace=", set_cmdline_ftrace);
138
139 static int __init set_ftrace_dump_on_oops(char *str)
140 {
141         if (*str++ != '=' || !*str) {
142                 ftrace_dump_on_oops = DUMP_ALL;
143                 return 1;
144         }
145
146         if (!strcmp("orig_cpu", str)) {
147                 ftrace_dump_on_oops = DUMP_ORIG;
148                 return 1;
149         }
150
151         return 0;
152 }
153 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
154
155 static int __init stop_trace_on_warning(char *str)
156 {
157         __disable_trace_on_warning = 1;
158         return 1;
159 }
160 __setup("traceoff_on_warning=", stop_trace_on_warning);
161
162 static int __init boot_alloc_snapshot(char *str)
163 {
164         allocate_snapshot = true;
165         /* We also need the main ring buffer expanded */
166         ring_buffer_expanded = true;
167         return 1;
168 }
169 __setup("alloc_snapshot", boot_alloc_snapshot);
170
171
172 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
173 static char *trace_boot_options __initdata;
174
175 static int __init set_trace_boot_options(char *str)
176 {
177         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
178         trace_boot_options = trace_boot_options_buf;
179         return 0;
180 }
181 __setup("trace_options=", set_trace_boot_options);
182
183
184 unsigned long long ns2usecs(cycle_t nsec)
185 {
186         nsec += 500;
187         do_div(nsec, 1000);
188         return nsec;
189 }
190
191 /*
192  * The global_trace is the descriptor that holds the tracing
193  * buffers for the live tracing. For each CPU, it contains
194  * a link list of pages that will store trace entries. The
195  * page descriptor of the pages in the memory is used to hold
196  * the link list by linking the lru item in the page descriptor
197  * to each of the pages in the buffer per CPU.
198  *
199  * For each active CPU there is a data field that holds the
200  * pages for the buffer for that CPU. Each CPU has the same number
201  * of pages allocated for its buffer.
202  */
203 static struct trace_array       global_trace;
204
205 LIST_HEAD(ftrace_trace_arrays);
206
207 int trace_array_get(struct trace_array *this_tr)
208 {
209         struct trace_array *tr;
210         int ret = -ENODEV;
211
212         mutex_lock(&trace_types_lock);
213         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
214                 if (tr == this_tr) {
215                         tr->ref++;
216                         ret = 0;
217                         break;
218                 }
219         }
220         mutex_unlock(&trace_types_lock);
221
222         return ret;
223 }
224
225 static void __trace_array_put(struct trace_array *this_tr)
226 {
227         WARN_ON(!this_tr->ref);
228         this_tr->ref--;
229 }
230
231 void trace_array_put(struct trace_array *this_tr)
232 {
233         mutex_lock(&trace_types_lock);
234         __trace_array_put(this_tr);
235         mutex_unlock(&trace_types_lock);
236 }
237
238 int filter_check_discard(struct ftrace_event_file *file, void *rec,
239                          struct ring_buffer *buffer,
240                          struct ring_buffer_event *event)
241 {
242         if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
243             !filter_match_preds(file->filter, rec)) {
244                 ring_buffer_discard_commit(buffer, event);
245                 return 1;
246         }
247
248         return 0;
249 }
250 EXPORT_SYMBOL_GPL(filter_check_discard);
251
252 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
253                               struct ring_buffer *buffer,
254                               struct ring_buffer_event *event)
255 {
256         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
257             !filter_match_preds(call->filter, rec)) {
258                 ring_buffer_discard_commit(buffer, event);
259                 return 1;
260         }
261
262         return 0;
263 }
264 EXPORT_SYMBOL_GPL(call_filter_check_discard);
265
266 cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
267 {
268         u64 ts;
269
270         /* Early boot up does not have a buffer yet */
271         if (!buf->buffer)
272                 return trace_clock_local();
273
274         ts = ring_buffer_time_stamp(buf->buffer, cpu);
275         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
276
277         return ts;
278 }
279
280 cycle_t ftrace_now(int cpu)
281 {
282         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
283 }
284
285 /**
286  * tracing_is_enabled - Show if global_trace has been disabled
287  *
288  * Shows if the global trace has been enabled or not. It uses the
289  * mirror flag "buffer_disabled" to be used in fast paths such as for
290  * the irqsoff tracer. But it may be inaccurate due to races. If you
291  * need to know the accurate state, use tracing_is_on() which is a little
292  * slower, but accurate.
293  */
294 int tracing_is_enabled(void)
295 {
296         /*
297          * For quick access (irqsoff uses this in fast path), just
298          * return the mirror variable of the state of the ring buffer.
299          * It's a little racy, but we don't really care.
300          */
301         smp_rmb();
302         return !global_trace.buffer_disabled;
303 }
304
305 /*
306  * trace_buf_size is the size in bytes that is allocated
307  * for a buffer. Note, the number of bytes is always rounded
308  * to page size.
309  *
310  * This number is purposely set to a low number of 16384.
311  * If the dump on oops happens, it will be much appreciated
312  * to not have to wait for all that output. Anyway this can be
313  * boot time and run time configurable.
314  */
315 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
316
317 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
318
319 /* trace_types holds a link list of available tracers. */
320 static struct tracer            *trace_types __read_mostly;
321
322 /*
323  * trace_types_lock is used to protect the trace_types list.
324  */
325 DEFINE_MUTEX(trace_types_lock);
326
327 /*
328  * serialize the access of the ring buffer
329  *
330  * ring buffer serializes readers, but it is low level protection.
331  * The validity of the events (which returns by ring_buffer_peek() ..etc)
332  * are not protected by ring buffer.
333  *
334  * The content of events may become garbage if we allow other process consumes
335  * these events concurrently:
336  *   A) the page of the consumed events may become a normal page
337  *      (not reader page) in ring buffer, and this page will be rewrited
338  *      by events producer.
339  *   B) The page of the consumed events may become a page for splice_read,
340  *      and this page will be returned to system.
341  *
342  * These primitives allow multi process access to different cpu ring buffer
343  * concurrently.
344  *
345  * These primitives don't distinguish read-only and read-consume access.
346  * Multi read-only access are also serialized.
347  */
348
349 #ifdef CONFIG_SMP
350 static DECLARE_RWSEM(all_cpu_access_lock);
351 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
352
353 static inline void trace_access_lock(int cpu)
354 {
355         if (cpu == RING_BUFFER_ALL_CPUS) {
356                 /* gain it for accessing the whole ring buffer. */
357                 down_write(&all_cpu_access_lock);
358         } else {
359                 /* gain it for accessing a cpu ring buffer. */
360
361                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
362                 down_read(&all_cpu_access_lock);
363
364                 /* Secondly block other access to this @cpu ring buffer. */
365                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
366         }
367 }
368
369 static inline void trace_access_unlock(int cpu)
370 {
371         if (cpu == RING_BUFFER_ALL_CPUS) {
372                 up_write(&all_cpu_access_lock);
373         } else {
374                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
375                 up_read(&all_cpu_access_lock);
376         }
377 }
378
379 static inline void trace_access_lock_init(void)
380 {
381         int cpu;
382
383         for_each_possible_cpu(cpu)
384                 mutex_init(&per_cpu(cpu_access_lock, cpu));
385 }
386
387 #else
388
389 static DEFINE_MUTEX(access_lock);
390
391 static inline void trace_access_lock(int cpu)
392 {
393         (void)cpu;
394         mutex_lock(&access_lock);
395 }
396
397 static inline void trace_access_unlock(int cpu)
398 {
399         (void)cpu;
400         mutex_unlock(&access_lock);
401 }
402
403 static inline void trace_access_lock_init(void)
404 {
405 }
406
407 #endif
408
409 /* trace_flags holds trace_options default values */
410 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
411         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
412         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
413         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
414
415 static void tracer_tracing_on(struct trace_array *tr)
416 {
417         if (tr->trace_buffer.buffer)
418                 ring_buffer_record_on(tr->trace_buffer.buffer);
419         /*
420          * This flag is looked at when buffers haven't been allocated
421          * yet, or by some tracers (like irqsoff), that just want to
422          * know if the ring buffer has been disabled, but it can handle
423          * races of where it gets disabled but we still do a record.
424          * As the check is in the fast path of the tracers, it is more
425          * important to be fast than accurate.
426          */
427         tr->buffer_disabled = 0;
428         /* Make the flag seen by readers */
429         smp_wmb();
430 }
431
432 /**
433  * tracing_on - enable tracing buffers
434  *
435  * This function enables tracing buffers that may have been
436  * disabled with tracing_off.
437  */
438 void tracing_on(void)
439 {
440         tracer_tracing_on(&global_trace);
441 }
442 EXPORT_SYMBOL_GPL(tracing_on);
443
444 /**
445  * __trace_puts - write a constant string into the trace buffer.
446  * @ip:    The address of the caller
447  * @str:   The constant string to write
448  * @size:  The size of the string.
449  */
450 int __trace_puts(unsigned long ip, const char *str, int size)
451 {
452         struct ring_buffer_event *event;
453         struct ring_buffer *buffer;
454         struct print_entry *entry;
455         unsigned long irq_flags;
456         int alloc;
457
458         if (unlikely(tracing_selftest_running || tracing_disabled))
459                 return 0;
460
461         alloc = sizeof(*entry) + size + 2; /* possible \n added */
462
463         local_save_flags(irq_flags);
464         buffer = global_trace.trace_buffer.buffer;
465         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
466                                           irq_flags, preempt_count());
467         if (!event)
468                 return 0;
469
470         entry = ring_buffer_event_data(event);
471         entry->ip = ip;
472
473         memcpy(&entry->buf, str, size);
474
475         /* Add a newline if necessary */
476         if (entry->buf[size - 1] != '\n') {
477                 entry->buf[size] = '\n';
478                 entry->buf[size + 1] = '\0';
479         } else
480                 entry->buf[size] = '\0';
481
482         __buffer_unlock_commit(buffer, event);
483
484         return size;
485 }
486 EXPORT_SYMBOL_GPL(__trace_puts);
487
488 /**
489  * __trace_bputs - write the pointer to a constant string into trace buffer
490  * @ip:    The address of the caller
491  * @str:   The constant string to write to the buffer to
492  */
493 int __trace_bputs(unsigned long ip, const char *str)
494 {
495         struct ring_buffer_event *event;
496         struct ring_buffer *buffer;
497         struct bputs_entry *entry;
498         unsigned long irq_flags;
499         int size = sizeof(struct bputs_entry);
500
501         if (unlikely(tracing_selftest_running || tracing_disabled))
502                 return 0;
503
504         local_save_flags(irq_flags);
505         buffer = global_trace.trace_buffer.buffer;
506         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
507                                           irq_flags, preempt_count());
508         if (!event)
509                 return 0;
510
511         entry = ring_buffer_event_data(event);
512         entry->ip                       = ip;
513         entry->str                      = str;
514
515         __buffer_unlock_commit(buffer, event);
516
517         return 1;
518 }
519 EXPORT_SYMBOL_GPL(__trace_bputs);
520
521 #ifdef CONFIG_TRACER_SNAPSHOT
522 /**
523  * trace_snapshot - take a snapshot of the current buffer.
524  *
525  * This causes a swap between the snapshot buffer and the current live
526  * tracing buffer. You can use this to take snapshots of the live
527  * trace when some condition is triggered, but continue to trace.
528  *
529  * Note, make sure to allocate the snapshot with either
530  * a tracing_snapshot_alloc(), or by doing it manually
531  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
532  *
533  * If the snapshot buffer is not allocated, it will stop tracing.
534  * Basically making a permanent snapshot.
535  */
536 void tracing_snapshot(void)
537 {
538         struct trace_array *tr = &global_trace;
539         struct tracer *tracer = tr->current_trace;
540         unsigned long flags;
541
542         if (in_nmi()) {
543                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
544                 internal_trace_puts("*** snapshot is being ignored        ***\n");
545                 return;
546         }
547
548         if (!tr->allocated_snapshot) {
549                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
550                 internal_trace_puts("*** stopping trace here!   ***\n");
551                 tracing_off();
552                 return;
553         }
554
555         /* Note, snapshot can not be used when the tracer uses it */
556         if (tracer->use_max_tr) {
557                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
558                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
559                 return;
560         }
561
562         local_irq_save(flags);
563         update_max_tr(tr, current, smp_processor_id());
564         local_irq_restore(flags);
565 }
566 EXPORT_SYMBOL_GPL(tracing_snapshot);
567
568 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
569                                         struct trace_buffer *size_buf, int cpu_id);
570 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
571
572 static int alloc_snapshot(struct trace_array *tr)
573 {
574         int ret;
575
576         if (!tr->allocated_snapshot) {
577
578                 /* allocate spare buffer */
579                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
580                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
581                 if (ret < 0)
582                         return ret;
583
584                 tr->allocated_snapshot = true;
585         }
586
587         return 0;
588 }
589
590 void free_snapshot(struct trace_array *tr)
591 {
592         /*
593          * We don't free the ring buffer. instead, resize it because
594          * The max_tr ring buffer has some state (e.g. ring->clock) and
595          * we want preserve it.
596          */
597         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
598         set_buffer_entries(&tr->max_buffer, 1);
599         tracing_reset_online_cpus(&tr->max_buffer);
600         tr->allocated_snapshot = false;
601 }
602
603 /**
604  * tracing_alloc_snapshot - allocate snapshot buffer.
605  *
606  * This only allocates the snapshot buffer if it isn't already
607  * allocated - it doesn't also take a snapshot.
608  *
609  * This is meant to be used in cases where the snapshot buffer needs
610  * to be set up for events that can't sleep but need to be able to
611  * trigger a snapshot.
612  */
613 int tracing_alloc_snapshot(void)
614 {
615         struct trace_array *tr = &global_trace;
616         int ret;
617
618         ret = alloc_snapshot(tr);
619         WARN_ON(ret < 0);
620
621         return ret;
622 }
623 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
624
625 /**
626  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
627  *
628  * This is similar to trace_snapshot(), but it will allocate the
629  * snapshot buffer if it isn't already allocated. Use this only
630  * where it is safe to sleep, as the allocation may sleep.
631  *
632  * This causes a swap between the snapshot buffer and the current live
633  * tracing buffer. You can use this to take snapshots of the live
634  * trace when some condition is triggered, but continue to trace.
635  */
636 void tracing_snapshot_alloc(void)
637 {
638         int ret;
639
640         ret = tracing_alloc_snapshot();
641         if (ret < 0)
642                 return;
643
644         tracing_snapshot();
645 }
646 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
647 #else
648 void tracing_snapshot(void)
649 {
650         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
651 }
652 EXPORT_SYMBOL_GPL(tracing_snapshot);
653 int tracing_alloc_snapshot(void)
654 {
655         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
656         return -ENODEV;
657 }
658 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
659 void tracing_snapshot_alloc(void)
660 {
661         /* Give warning */
662         tracing_snapshot();
663 }
664 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
665 #endif /* CONFIG_TRACER_SNAPSHOT */
666
667 static void tracer_tracing_off(struct trace_array *tr)
668 {
669         if (tr->trace_buffer.buffer)
670                 ring_buffer_record_off(tr->trace_buffer.buffer);
671         /*
672          * This flag is looked at when buffers haven't been allocated
673          * yet, or by some tracers (like irqsoff), that just want to
674          * know if the ring buffer has been disabled, but it can handle
675          * races of where it gets disabled but we still do a record.
676          * As the check is in the fast path of the tracers, it is more
677          * important to be fast than accurate.
678          */
679         tr->buffer_disabled = 1;
680         /* Make the flag seen by readers */
681         smp_wmb();
682 }
683
684 /**
685  * tracing_off - turn off tracing buffers
686  *
687  * This function stops the tracing buffers from recording data.
688  * It does not disable any overhead the tracers themselves may
689  * be causing. This function simply causes all recording to
690  * the ring buffers to fail.
691  */
692 void tracing_off(void)
693 {
694         tracer_tracing_off(&global_trace);
695 }
696 EXPORT_SYMBOL_GPL(tracing_off);
697
698 void disable_trace_on_warning(void)
699 {
700         if (__disable_trace_on_warning)
701                 tracing_off();
702 }
703
704 /**
705  * tracer_tracing_is_on - show real state of ring buffer enabled
706  * @tr : the trace array to know if ring buffer is enabled
707  *
708  * Shows real state of the ring buffer if it is enabled or not.
709  */
710 static int tracer_tracing_is_on(struct trace_array *tr)
711 {
712         if (tr->trace_buffer.buffer)
713                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
714         return !tr->buffer_disabled;
715 }
716
717 /**
718  * tracing_is_on - show state of ring buffers enabled
719  */
720 int tracing_is_on(void)
721 {
722         return tracer_tracing_is_on(&global_trace);
723 }
724 EXPORT_SYMBOL_GPL(tracing_is_on);
725
726 static int __init set_buf_size(char *str)
727 {
728         unsigned long buf_size;
729
730         if (!str)
731                 return 0;
732         buf_size = memparse(str, &str);
733         /* nr_entries can not be zero */
734         if (buf_size == 0)
735                 return 0;
736         trace_buf_size = buf_size;
737         return 1;
738 }
739 __setup("trace_buf_size=", set_buf_size);
740
741 static int __init set_tracing_thresh(char *str)
742 {
743         unsigned long threshold;
744         int ret;
745
746         if (!str)
747                 return 0;
748         ret = kstrtoul(str, 0, &threshold);
749         if (ret < 0)
750                 return 0;
751         tracing_thresh = threshold * 1000;
752         return 1;
753 }
754 __setup("tracing_thresh=", set_tracing_thresh);
755
756 unsigned long nsecs_to_usecs(unsigned long nsecs)
757 {
758         return nsecs / 1000;
759 }
760
761 /* These must match the bit postions in trace_iterator_flags */
762 static const char *trace_options[] = {
763         "print-parent",
764         "sym-offset",
765         "sym-addr",
766         "verbose",
767         "raw",
768         "hex",
769         "bin",
770         "block",
771         "stacktrace",
772         "trace_printk",
773         "ftrace_preempt",
774         "branch",
775         "annotate",
776         "userstacktrace",
777         "sym-userobj",
778         "printk-msg-only",
779         "context-info",
780         "latency-format",
781         "sleep-time",
782         "graph-time",
783         "record-cmd",
784         "overwrite",
785         "disable_on_free",
786         "irq-info",
787         "markers",
788         "function-trace",
789         NULL
790 };
791
792 static struct {
793         u64 (*func)(void);
794         const char *name;
795         int in_ns;              /* is this clock in nanoseconds? */
796 } trace_clocks[] = {
797         { trace_clock_local,    "local",        1 },
798         { trace_clock_global,   "global",       1 },
799         { trace_clock_counter,  "counter",      0 },
800         { trace_clock_jiffies,  "uptime",       1 },
801         { trace_clock,          "perf",         1 },
802         ARCH_TRACE_CLOCKS
803 };
804
805 /*
806  * trace_parser_get_init - gets the buffer for trace parser
807  */
808 int trace_parser_get_init(struct trace_parser *parser, int size)
809 {
810         memset(parser, 0, sizeof(*parser));
811
812         parser->buffer = kmalloc(size, GFP_KERNEL);
813         if (!parser->buffer)
814                 return 1;
815
816         parser->size = size;
817         return 0;
818 }
819
820 /*
821  * trace_parser_put - frees the buffer for trace parser
822  */
823 void trace_parser_put(struct trace_parser *parser)
824 {
825         kfree(parser->buffer);
826 }
827
828 /*
829  * trace_get_user - reads the user input string separated by  space
830  * (matched by isspace(ch))
831  *
832  * For each string found the 'struct trace_parser' is updated,
833  * and the function returns.
834  *
835  * Returns number of bytes read.
836  *
837  * See kernel/trace/trace.h for 'struct trace_parser' details.
838  */
839 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
840         size_t cnt, loff_t *ppos)
841 {
842         char ch;
843         size_t read = 0;
844         ssize_t ret;
845
846         if (!*ppos)
847                 trace_parser_clear(parser);
848
849         ret = get_user(ch, ubuf++);
850         if (ret)
851                 goto out;
852
853         read++;
854         cnt--;
855
856         /*
857          * The parser is not finished with the last write,
858          * continue reading the user input without skipping spaces.
859          */
860         if (!parser->cont) {
861                 /* skip white space */
862                 while (cnt && isspace(ch)) {
863                         ret = get_user(ch, ubuf++);
864                         if (ret)
865                                 goto out;
866                         read++;
867                         cnt--;
868                 }
869
870                 /* only spaces were written */
871                 if (isspace(ch)) {
872                         *ppos += read;
873                         ret = read;
874                         goto out;
875                 }
876
877                 parser->idx = 0;
878         }
879
880         /* read the non-space input */
881         while (cnt && !isspace(ch)) {
882                 if (parser->idx < parser->size - 1)
883                         parser->buffer[parser->idx++] = ch;
884                 else {
885                         ret = -EINVAL;
886                         goto out;
887                 }
888                 ret = get_user(ch, ubuf++);
889                 if (ret)
890                         goto out;
891                 read++;
892                 cnt--;
893         }
894
895         /* We either got finished input or we have to wait for another call. */
896         if (isspace(ch)) {
897                 parser->buffer[parser->idx] = 0;
898                 parser->cont = false;
899         } else if (parser->idx < parser->size - 1) {
900                 parser->cont = true;
901                 parser->buffer[parser->idx++] = ch;
902         } else {
903                 ret = -EINVAL;
904                 goto out;
905         }
906
907         *ppos += read;
908         ret = read;
909
910 out:
911         return ret;
912 }
913
914 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
915 {
916         int len;
917         int ret;
918
919         if (!cnt)
920                 return 0;
921
922         if (s->len <= s->readpos)
923                 return -EBUSY;
924
925         len = s->len - s->readpos;
926         if (cnt > len)
927                 cnt = len;
928         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
929         if (ret == cnt)
930                 return -EFAULT;
931
932         cnt -= ret;
933
934         s->readpos += cnt;
935         return cnt;
936 }
937
938 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
939 {
940         int len;
941
942         if (s->len <= s->readpos)
943                 return -EBUSY;
944
945         len = s->len - s->readpos;
946         if (cnt > len)
947                 cnt = len;
948         memcpy(buf, s->buffer + s->readpos, cnt);
949
950         s->readpos += cnt;
951         return cnt;
952 }
953
954 /*
955  * ftrace_max_lock is used to protect the swapping of buffers
956  * when taking a max snapshot. The buffers themselves are
957  * protected by per_cpu spinlocks. But the action of the swap
958  * needs its own lock.
959  *
960  * This is defined as a arch_spinlock_t in order to help
961  * with performance when lockdep debugging is enabled.
962  *
963  * It is also used in other places outside the update_max_tr
964  * so it needs to be defined outside of the
965  * CONFIG_TRACER_MAX_TRACE.
966  */
967 static arch_spinlock_t ftrace_max_lock =
968         (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
969
970 unsigned long __read_mostly     tracing_thresh;
971
972 #ifdef CONFIG_TRACER_MAX_TRACE
973 unsigned long __read_mostly     tracing_max_latency;
974
975 /*
976  * Copy the new maximum trace into the separate maximum-trace
977  * structure. (this way the maximum trace is permanently saved,
978  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
979  */
980 static void
981 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
982 {
983         struct trace_buffer *trace_buf = &tr->trace_buffer;
984         struct trace_buffer *max_buf = &tr->max_buffer;
985         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
986         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
987
988         max_buf->cpu = cpu;
989         max_buf->time_start = data->preempt_timestamp;
990
991         max_data->saved_latency = tracing_max_latency;
992         max_data->critical_start = data->critical_start;
993         max_data->critical_end = data->critical_end;
994
995         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
996         max_data->pid = tsk->pid;
997         /*
998          * If tsk == current, then use current_uid(), as that does not use
999          * RCU. The irq tracer can be called out of RCU scope.
1000          */
1001         if (tsk == current)
1002                 max_data->uid = current_uid();
1003         else
1004                 max_data->uid = task_uid(tsk);
1005
1006         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1007         max_data->policy = tsk->policy;
1008         max_data->rt_priority = tsk->rt_priority;
1009
1010         /* record this tasks comm */
1011         tracing_record_cmdline(tsk);
1012 }
1013
1014 /**
1015  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1016  * @tr: tracer
1017  * @tsk: the task with the latency
1018  * @cpu: The cpu that initiated the trace.
1019  *
1020  * Flip the buffers between the @tr and the max_tr and record information
1021  * about which task was the cause of this latency.
1022  */
1023 void
1024 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1025 {
1026         struct ring_buffer *buf;
1027
1028         if (tr->stop_count)
1029                 return;
1030
1031         WARN_ON_ONCE(!irqs_disabled());
1032
1033         if (!tr->allocated_snapshot) {
1034                 /* Only the nop tracer should hit this when disabling */
1035                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1036                 return;
1037         }
1038
1039         arch_spin_lock(&ftrace_max_lock);
1040
1041         buf = tr->trace_buffer.buffer;
1042         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1043         tr->max_buffer.buffer = buf;
1044
1045         __update_max_tr(tr, tsk, cpu);
1046         arch_spin_unlock(&ftrace_max_lock);
1047 }
1048
1049 /**
1050  * update_max_tr_single - only copy one trace over, and reset the rest
1051  * @tr - tracer
1052  * @tsk - task with the latency
1053  * @cpu - the cpu of the buffer to copy.
1054  *
1055  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1056  */
1057 void
1058 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1059 {
1060         int ret;
1061
1062         if (tr->stop_count)
1063                 return;
1064
1065         WARN_ON_ONCE(!irqs_disabled());
1066         if (!tr->allocated_snapshot) {
1067                 /* Only the nop tracer should hit this when disabling */
1068                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1069                 return;
1070         }
1071
1072         arch_spin_lock(&ftrace_max_lock);
1073
1074         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1075
1076         if (ret == -EBUSY) {
1077                 /*
1078                  * We failed to swap the buffer due to a commit taking
1079                  * place on this CPU. We fail to record, but we reset
1080                  * the max trace buffer (no one writes directly to it)
1081                  * and flag that it failed.
1082                  */
1083                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1084                         "Failed to swap buffers due to commit in progress\n");
1085         }
1086
1087         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1088
1089         __update_max_tr(tr, tsk, cpu);
1090         arch_spin_unlock(&ftrace_max_lock);
1091 }
1092 #endif /* CONFIG_TRACER_MAX_TRACE */
1093
1094 static void default_wait_pipe(struct trace_iterator *iter)
1095 {
1096         /* Iterators are static, they should be filled or empty */
1097         if (trace_buffer_iter(iter, iter->cpu_file))
1098                 return;
1099
1100         ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
1101 }
1102
1103 #ifdef CONFIG_FTRACE_STARTUP_TEST
1104 static int run_tracer_selftest(struct tracer *type)
1105 {
1106         struct trace_array *tr = &global_trace;
1107         struct tracer *saved_tracer = tr->current_trace;
1108         int ret;
1109
1110         if (!type->selftest || tracing_selftest_disabled)
1111                 return 0;
1112
1113         /*
1114          * Run a selftest on this tracer.
1115          * Here we reset the trace buffer, and set the current
1116          * tracer to be this tracer. The tracer can then run some
1117          * internal tracing to verify that everything is in order.
1118          * If we fail, we do not register this tracer.
1119          */
1120         tracing_reset_online_cpus(&tr->trace_buffer);
1121
1122         tr->current_trace = type;
1123
1124 #ifdef CONFIG_TRACER_MAX_TRACE
1125         if (type->use_max_tr) {
1126                 /* If we expanded the buffers, make sure the max is expanded too */
1127                 if (ring_buffer_expanded)
1128                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1129                                            RING_BUFFER_ALL_CPUS);
1130                 tr->allocated_snapshot = true;
1131         }
1132 #endif
1133
1134         /* the test is responsible for initializing and enabling */
1135         pr_info("Testing tracer %s: ", type->name);
1136         ret = type->selftest(type, tr);
1137         /* the test is responsible for resetting too */
1138         tr->current_trace = saved_tracer;
1139         if (ret) {
1140                 printk(KERN_CONT "FAILED!\n");
1141                 /* Add the warning after printing 'FAILED' */
1142                 WARN_ON(1);
1143                 return -1;
1144         }
1145         /* Only reset on passing, to avoid touching corrupted buffers */
1146         tracing_reset_online_cpus(&tr->trace_buffer);
1147
1148 #ifdef CONFIG_TRACER_MAX_TRACE
1149         if (type->use_max_tr) {
1150                 tr->allocated_snapshot = false;
1151
1152                 /* Shrink the max buffer again */
1153                 if (ring_buffer_expanded)
1154                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1155                                            RING_BUFFER_ALL_CPUS);
1156         }
1157 #endif
1158
1159         printk(KERN_CONT "PASSED\n");
1160         return 0;
1161 }
1162 #else
1163 static inline int run_tracer_selftest(struct tracer *type)
1164 {
1165         return 0;
1166 }
1167 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1168
1169 /**
1170  * register_tracer - register a tracer with the ftrace system.
1171  * @type - the plugin for the tracer
1172  *
1173  * Register a new plugin tracer.
1174  */
1175 int register_tracer(struct tracer *type)
1176 {
1177         struct tracer *t;
1178         int ret = 0;
1179
1180         if (!type->name) {
1181                 pr_info("Tracer must have a name\n");
1182                 return -1;
1183         }
1184
1185         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1186                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1187                 return -1;
1188         }
1189
1190         mutex_lock(&trace_types_lock);
1191
1192         tracing_selftest_running = true;
1193
1194         for (t = trace_types; t; t = t->next) {
1195                 if (strcmp(type->name, t->name) == 0) {
1196                         /* already found */
1197                         pr_info("Tracer %s already registered\n",
1198                                 type->name);
1199                         ret = -1;
1200                         goto out;
1201                 }
1202         }
1203
1204         if (!type->set_flag)
1205                 type->set_flag = &dummy_set_flag;
1206         if (!type->flags)
1207                 type->flags = &dummy_tracer_flags;
1208         else
1209                 if (!type->flags->opts)
1210                         type->flags->opts = dummy_tracer_opt;
1211         if (!type->wait_pipe)
1212                 type->wait_pipe = default_wait_pipe;
1213
1214         ret = run_tracer_selftest(type);
1215         if (ret < 0)
1216                 goto out;
1217
1218         type->next = trace_types;
1219         trace_types = type;
1220
1221  out:
1222         tracing_selftest_running = false;
1223         mutex_unlock(&trace_types_lock);
1224
1225         if (ret || !default_bootup_tracer)
1226                 goto out_unlock;
1227
1228         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1229                 goto out_unlock;
1230
1231         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1232         /* Do we want this tracer to start on bootup? */
1233         tracing_set_tracer(type->name);
1234         default_bootup_tracer = NULL;
1235         /* disable other selftests, since this will break it. */
1236         tracing_selftest_disabled = true;
1237 #ifdef CONFIG_FTRACE_STARTUP_TEST
1238         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1239                type->name);
1240 #endif
1241
1242  out_unlock:
1243         return ret;
1244 }
1245
1246 void tracing_reset(struct trace_buffer *buf, int cpu)
1247 {
1248         struct ring_buffer *buffer = buf->buffer;
1249
1250         if (!buffer)
1251                 return;
1252
1253         ring_buffer_record_disable(buffer);
1254
1255         /* Make sure all commits have finished */
1256         synchronize_sched();
1257         ring_buffer_reset_cpu(buffer, cpu);
1258
1259         ring_buffer_record_enable(buffer);
1260 }
1261
1262 void tracing_reset_online_cpus(struct trace_buffer *buf)
1263 {
1264         struct ring_buffer *buffer = buf->buffer;
1265         int cpu;
1266
1267         if (!buffer)
1268                 return;
1269
1270         ring_buffer_record_disable(buffer);
1271
1272         /* Make sure all commits have finished */
1273         synchronize_sched();
1274
1275         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1276
1277         for_each_online_cpu(cpu)
1278                 ring_buffer_reset_cpu(buffer, cpu);
1279
1280         ring_buffer_record_enable(buffer);
1281 }
1282
1283 /* Must have trace_types_lock held */
1284 void tracing_reset_all_online_cpus(void)
1285 {
1286         struct trace_array *tr;
1287
1288         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1289                 tracing_reset_online_cpus(&tr->trace_buffer);
1290 #ifdef CONFIG_TRACER_MAX_TRACE
1291                 tracing_reset_online_cpus(&tr->max_buffer);
1292 #endif
1293         }
1294 }
1295
1296 #define SAVED_CMDLINES 128
1297 #define NO_CMDLINE_MAP UINT_MAX
1298 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1299 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
1300 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
1301 static int cmdline_idx;
1302 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1303
1304 /* temporary disable recording */
1305 static atomic_t trace_record_cmdline_disabled __read_mostly;
1306
1307 static void trace_init_cmdlines(void)
1308 {
1309         memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
1310         memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
1311         cmdline_idx = 0;
1312 }
1313
1314 int is_tracing_stopped(void)
1315 {
1316         return global_trace.stop_count;
1317 }
1318
1319 /**
1320  * tracing_start - quick start of the tracer
1321  *
1322  * If tracing is enabled but was stopped by tracing_stop,
1323  * this will start the tracer back up.
1324  */
1325 void tracing_start(void)
1326 {
1327         struct ring_buffer *buffer;
1328         unsigned long flags;
1329
1330         if (tracing_disabled)
1331                 return;
1332
1333         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1334         if (--global_trace.stop_count) {
1335                 if (global_trace.stop_count < 0) {
1336                         /* Someone screwed up their debugging */
1337                         WARN_ON_ONCE(1);
1338                         global_trace.stop_count = 0;
1339                 }
1340                 goto out;
1341         }
1342
1343         /* Prevent the buffers from switching */
1344         arch_spin_lock(&ftrace_max_lock);
1345
1346         buffer = global_trace.trace_buffer.buffer;
1347         if (buffer)
1348                 ring_buffer_record_enable(buffer);
1349
1350 #ifdef CONFIG_TRACER_MAX_TRACE
1351         buffer = global_trace.max_buffer.buffer;
1352         if (buffer)
1353                 ring_buffer_record_enable(buffer);
1354 #endif
1355
1356         arch_spin_unlock(&ftrace_max_lock);
1357
1358         ftrace_start();
1359  out:
1360         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1361 }
1362
1363 static void tracing_start_tr(struct trace_array *tr)
1364 {
1365         struct ring_buffer *buffer;
1366         unsigned long flags;
1367
1368         if (tracing_disabled)
1369                 return;
1370
1371         /* If global, we need to also start the max tracer */
1372         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1373                 return tracing_start();
1374
1375         raw_spin_lock_irqsave(&tr->start_lock, flags);
1376
1377         if (--tr->stop_count) {
1378                 if (tr->stop_count < 0) {
1379                         /* Someone screwed up their debugging */
1380                         WARN_ON_ONCE(1);
1381                         tr->stop_count = 0;
1382                 }
1383                 goto out;
1384         }
1385
1386         buffer = tr->trace_buffer.buffer;
1387         if (buffer)
1388                 ring_buffer_record_enable(buffer);
1389
1390  out:
1391         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1392 }
1393
1394 /**
1395  * tracing_stop - quick stop of the tracer
1396  *
1397  * Light weight way to stop tracing. Use in conjunction with
1398  * tracing_start.
1399  */
1400 void tracing_stop(void)
1401 {
1402         struct ring_buffer *buffer;
1403         unsigned long flags;
1404
1405         ftrace_stop();
1406         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1407         if (global_trace.stop_count++)
1408                 goto out;
1409
1410         /* Prevent the buffers from switching */
1411         arch_spin_lock(&ftrace_max_lock);
1412
1413         buffer = global_trace.trace_buffer.buffer;
1414         if (buffer)
1415                 ring_buffer_record_disable(buffer);
1416
1417 #ifdef CONFIG_TRACER_MAX_TRACE
1418         buffer = global_trace.max_buffer.buffer;
1419         if (buffer)
1420                 ring_buffer_record_disable(buffer);
1421 #endif
1422
1423         arch_spin_unlock(&ftrace_max_lock);
1424
1425  out:
1426         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1427 }
1428
1429 static void tracing_stop_tr(struct trace_array *tr)
1430 {
1431         struct ring_buffer *buffer;
1432         unsigned long flags;
1433
1434         /* If global, we need to also stop the max tracer */
1435         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1436                 return tracing_stop();
1437
1438         raw_spin_lock_irqsave(&tr->start_lock, flags);
1439         if (tr->stop_count++)
1440                 goto out;
1441
1442         buffer = tr->trace_buffer.buffer;
1443         if (buffer)
1444                 ring_buffer_record_disable(buffer);
1445
1446  out:
1447         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1448 }
1449
1450 void trace_stop_cmdline_recording(void);
1451
1452 static int trace_save_cmdline(struct task_struct *tsk)
1453 {
1454         unsigned pid, idx;
1455
1456         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1457                 return 0;
1458
1459         /*
1460          * It's not the end of the world if we don't get
1461          * the lock, but we also don't want to spin
1462          * nor do we want to disable interrupts,
1463          * so if we miss here, then better luck next time.
1464          */
1465         if (!arch_spin_trylock(&trace_cmdline_lock))
1466                 return 0;
1467
1468         idx = map_pid_to_cmdline[tsk->pid];
1469         if (idx == NO_CMDLINE_MAP) {
1470                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
1471
1472                 /*
1473                  * Check whether the cmdline buffer at idx has a pid
1474                  * mapped. We are going to overwrite that entry so we
1475                  * need to clear the map_pid_to_cmdline. Otherwise we
1476                  * would read the new comm for the old pid.
1477                  */
1478                 pid = map_cmdline_to_pid[idx];
1479                 if (pid != NO_CMDLINE_MAP)
1480                         map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1481
1482                 map_cmdline_to_pid[idx] = tsk->pid;
1483                 map_pid_to_cmdline[tsk->pid] = idx;
1484
1485                 cmdline_idx = idx;
1486         }
1487
1488         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
1489
1490         arch_spin_unlock(&trace_cmdline_lock);
1491
1492         return 1;
1493 }
1494
1495 void trace_find_cmdline(int pid, char comm[])
1496 {
1497         unsigned map;
1498
1499         if (!pid) {
1500                 strcpy(comm, "<idle>");
1501                 return;
1502         }
1503
1504         if (WARN_ON_ONCE(pid < 0)) {
1505                 strcpy(comm, "<XXX>");
1506                 return;
1507         }
1508
1509         if (pid > PID_MAX_DEFAULT) {
1510                 strcpy(comm, "<...>");
1511                 return;
1512         }
1513
1514         preempt_disable();
1515         arch_spin_lock(&trace_cmdline_lock);
1516         map = map_pid_to_cmdline[pid];
1517         if (map != NO_CMDLINE_MAP)
1518                 strcpy(comm, saved_cmdlines[map]);
1519         else
1520                 strcpy(comm, "<...>");
1521
1522         arch_spin_unlock(&trace_cmdline_lock);
1523         preempt_enable();
1524 }
1525
1526 void tracing_record_cmdline(struct task_struct *tsk)
1527 {
1528         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1529                 return;
1530
1531         if (!__this_cpu_read(trace_cmdline_save))
1532                 return;
1533
1534         if (trace_save_cmdline(tsk))
1535                 __this_cpu_write(trace_cmdline_save, false);
1536 }
1537
1538 void
1539 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1540                              int pc)
1541 {
1542         struct task_struct *tsk = current;
1543
1544         entry->preempt_count            = pc & 0xff;
1545         entry->pid                      = (tsk) ? tsk->pid : 0;
1546         entry->flags =
1547 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1548                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1549 #else
1550                 TRACE_FLAG_IRQS_NOSUPPORT |
1551 #endif
1552                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1553                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1554                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1555                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1556 }
1557 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1558
1559 struct ring_buffer_event *
1560 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1561                           int type,
1562                           unsigned long len,
1563                           unsigned long flags, int pc)
1564 {
1565         struct ring_buffer_event *event;
1566
1567         event = ring_buffer_lock_reserve(buffer, len);
1568         if (event != NULL) {
1569                 struct trace_entry *ent = ring_buffer_event_data(event);
1570
1571                 tracing_generic_entry_update(ent, flags, pc);
1572                 ent->type = type;
1573         }
1574
1575         return event;
1576 }
1577
1578 void
1579 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1580 {
1581         __this_cpu_write(trace_cmdline_save, true);
1582         ring_buffer_unlock_commit(buffer, event);
1583 }
1584
1585 static inline void
1586 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1587                              struct ring_buffer_event *event,
1588                              unsigned long flags, int pc)
1589 {
1590         __buffer_unlock_commit(buffer, event);
1591
1592         ftrace_trace_stack(buffer, flags, 6, pc);
1593         ftrace_trace_userstack(buffer, flags, pc);
1594 }
1595
1596 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1597                                 struct ring_buffer_event *event,
1598                                 unsigned long flags, int pc)
1599 {
1600         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1601 }
1602 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1603
1604 static struct ring_buffer *temp_buffer;
1605
1606 struct ring_buffer_event *
1607 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1608                           struct ftrace_event_file *ftrace_file,
1609                           int type, unsigned long len,
1610                           unsigned long flags, int pc)
1611 {
1612         struct ring_buffer_event *entry;
1613
1614         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1615         entry = trace_buffer_lock_reserve(*current_rb,
1616                                          type, len, flags, pc);
1617         /*
1618          * If tracing is off, but we have triggers enabled
1619          * we still need to look at the event data. Use the temp_buffer
1620          * to store the trace event for the tigger to use. It's recusive
1621          * safe and will not be recorded anywhere.
1622          */
1623         if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1624                 *current_rb = temp_buffer;
1625                 entry = trace_buffer_lock_reserve(*current_rb,
1626                                                   type, len, flags, pc);
1627         }
1628         return entry;
1629 }
1630 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1631
1632 struct ring_buffer_event *
1633 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1634                                   int type, unsigned long len,
1635                                   unsigned long flags, int pc)
1636 {
1637         *current_rb = global_trace.trace_buffer.buffer;
1638         return trace_buffer_lock_reserve(*current_rb,
1639                                          type, len, flags, pc);
1640 }
1641 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1642
1643 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1644                                         struct ring_buffer_event *event,
1645                                         unsigned long flags, int pc)
1646 {
1647         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1648 }
1649 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1650
1651 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1652                                      struct ring_buffer_event *event,
1653                                      unsigned long flags, int pc,
1654                                      struct pt_regs *regs)
1655 {
1656         __buffer_unlock_commit(buffer, event);
1657
1658         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1659         ftrace_trace_userstack(buffer, flags, pc);
1660 }
1661 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1662
1663 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1664                                          struct ring_buffer_event *event)
1665 {
1666         ring_buffer_discard_commit(buffer, event);
1667 }
1668 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1669
1670 void
1671 trace_function(struct trace_array *tr,
1672                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1673                int pc)
1674 {
1675         struct ftrace_event_call *call = &event_function;
1676         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1677         struct ring_buffer_event *event;
1678         struct ftrace_entry *entry;
1679
1680         /* If we are reading the ring buffer, don't trace */
1681         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1682                 return;
1683
1684         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1685                                           flags, pc);
1686         if (!event)
1687                 return;
1688         entry   = ring_buffer_event_data(event);
1689         entry->ip                       = ip;
1690         entry->parent_ip                = parent_ip;
1691
1692         if (!call_filter_check_discard(call, entry, buffer, event))
1693                 __buffer_unlock_commit(buffer, event);
1694 }
1695
1696 #ifdef CONFIG_STACKTRACE
1697
1698 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1699 struct ftrace_stack {
1700         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1701 };
1702
1703 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1704 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1705
1706 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1707                                  unsigned long flags,
1708                                  int skip, int pc, struct pt_regs *regs)
1709 {
1710         struct ftrace_event_call *call = &event_kernel_stack;
1711         struct ring_buffer_event *event;
1712         struct stack_entry *entry;
1713         struct stack_trace trace;
1714         int use_stack;
1715         int size = FTRACE_STACK_ENTRIES;
1716
1717         trace.nr_entries        = 0;
1718         trace.skip              = skip;
1719
1720         /*
1721          * Since events can happen in NMIs there's no safe way to
1722          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1723          * or NMI comes in, it will just have to use the default
1724          * FTRACE_STACK_SIZE.
1725          */
1726         preempt_disable_notrace();
1727
1728         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1729         /*
1730          * We don't need any atomic variables, just a barrier.
1731          * If an interrupt comes in, we don't care, because it would
1732          * have exited and put the counter back to what we want.
1733          * We just need a barrier to keep gcc from moving things
1734          * around.
1735          */
1736         barrier();
1737         if (use_stack == 1) {
1738                 trace.entries           = &__get_cpu_var(ftrace_stack).calls[0];
1739                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1740
1741                 if (regs)
1742                         save_stack_trace_regs(regs, &trace);
1743                 else
1744                         save_stack_trace(&trace);
1745
1746                 if (trace.nr_entries > size)
1747                         size = trace.nr_entries;
1748         } else
1749                 /* From now on, use_stack is a boolean */
1750                 use_stack = 0;
1751
1752         size *= sizeof(unsigned long);
1753
1754         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1755                                           sizeof(*entry) + size, flags, pc);
1756         if (!event)
1757                 goto out;
1758         entry = ring_buffer_event_data(event);
1759
1760         memset(&entry->caller, 0, size);
1761
1762         if (use_stack)
1763                 memcpy(&entry->caller, trace.entries,
1764                        trace.nr_entries * sizeof(unsigned long));
1765         else {
1766                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1767                 trace.entries           = entry->caller;
1768                 if (regs)
1769                         save_stack_trace_regs(regs, &trace);
1770                 else
1771                         save_stack_trace(&trace);
1772         }
1773
1774         entry->size = trace.nr_entries;
1775
1776         if (!call_filter_check_discard(call, entry, buffer, event))
1777                 __buffer_unlock_commit(buffer, event);
1778
1779  out:
1780         /* Again, don't let gcc optimize things here */
1781         barrier();
1782         __this_cpu_dec(ftrace_stack_reserve);
1783         preempt_enable_notrace();
1784
1785 }
1786
1787 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1788                              int skip, int pc, struct pt_regs *regs)
1789 {
1790         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1791                 return;
1792
1793         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1794 }
1795
1796 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1797                         int skip, int pc)
1798 {
1799         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1800                 return;
1801
1802         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1803 }
1804
1805 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1806                    int pc)
1807 {
1808         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1809 }
1810
1811 /**
1812  * trace_dump_stack - record a stack back trace in the trace buffer
1813  * @skip: Number of functions to skip (helper handlers)
1814  */
1815 void trace_dump_stack(int skip)
1816 {
1817         unsigned long flags;
1818
1819         if (tracing_disabled || tracing_selftest_running)
1820                 return;
1821
1822         local_save_flags(flags);
1823
1824         /*
1825          * Skip 3 more, seems to get us at the caller of
1826          * this function.
1827          */
1828         skip += 3;
1829         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1830                              flags, skip, preempt_count(), NULL);
1831 }
1832
1833 static DEFINE_PER_CPU(int, user_stack_count);
1834
1835 void
1836 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1837 {
1838         struct ftrace_event_call *call = &event_user_stack;
1839         struct ring_buffer_event *event;
1840         struct userstack_entry *entry;
1841         struct stack_trace trace;
1842
1843         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1844                 return;
1845
1846         /*
1847          * NMIs can not handle page faults, even with fix ups.
1848          * The save user stack can (and often does) fault.
1849          */
1850         if (unlikely(in_nmi()))
1851                 return;
1852
1853         /*
1854          * prevent recursion, since the user stack tracing may
1855          * trigger other kernel events.
1856          */
1857         preempt_disable();
1858         if (__this_cpu_read(user_stack_count))
1859                 goto out;
1860
1861         __this_cpu_inc(user_stack_count);
1862
1863         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1864                                           sizeof(*entry), flags, pc);
1865         if (!event)
1866                 goto out_drop_count;
1867         entry   = ring_buffer_event_data(event);
1868
1869         entry->tgid             = current->tgid;
1870         memset(&entry->caller, 0, sizeof(entry->caller));
1871
1872         trace.nr_entries        = 0;
1873         trace.max_entries       = FTRACE_STACK_ENTRIES;
1874         trace.skip              = 0;
1875         trace.entries           = entry->caller;
1876
1877         save_stack_trace_user(&trace);
1878         if (!call_filter_check_discard(call, entry, buffer, event))
1879                 __buffer_unlock_commit(buffer, event);
1880
1881  out_drop_count:
1882         __this_cpu_dec(user_stack_count);
1883  out:
1884         preempt_enable();
1885 }
1886
1887 #ifdef UNUSED
1888 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1889 {
1890         ftrace_trace_userstack(tr, flags, preempt_count());
1891 }
1892 #endif /* UNUSED */
1893
1894 #endif /* CONFIG_STACKTRACE */
1895
1896 /* created for use with alloc_percpu */
1897 struct trace_buffer_struct {
1898         char buffer[TRACE_BUF_SIZE];
1899 };
1900
1901 static struct trace_buffer_struct *trace_percpu_buffer;
1902 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1903 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1904 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1905
1906 /*
1907  * The buffer used is dependent on the context. There is a per cpu
1908  * buffer for normal context, softirq contex, hard irq context and
1909  * for NMI context. Thise allows for lockless recording.
1910  *
1911  * Note, if the buffers failed to be allocated, then this returns NULL
1912  */
1913 static char *get_trace_buf(void)
1914 {
1915         struct trace_buffer_struct *percpu_buffer;
1916
1917         /*
1918          * If we have allocated per cpu buffers, then we do not
1919          * need to do any locking.
1920          */
1921         if (in_nmi())
1922                 percpu_buffer = trace_percpu_nmi_buffer;
1923         else if (in_irq())
1924                 percpu_buffer = trace_percpu_irq_buffer;
1925         else if (in_softirq())
1926                 percpu_buffer = trace_percpu_sirq_buffer;
1927         else
1928                 percpu_buffer = trace_percpu_buffer;
1929
1930         if (!percpu_buffer)
1931                 return NULL;
1932
1933         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1934 }
1935
1936 static int alloc_percpu_trace_buffer(void)
1937 {
1938         struct trace_buffer_struct *buffers;
1939         struct trace_buffer_struct *sirq_buffers;
1940         struct trace_buffer_struct *irq_buffers;
1941         struct trace_buffer_struct *nmi_buffers;
1942
1943         buffers = alloc_percpu(struct trace_buffer_struct);
1944         if (!buffers)
1945                 goto err_warn;
1946
1947         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1948         if (!sirq_buffers)
1949                 goto err_sirq;
1950
1951         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1952         if (!irq_buffers)
1953                 goto err_irq;
1954
1955         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1956         if (!nmi_buffers)
1957                 goto err_nmi;
1958
1959         trace_percpu_buffer = buffers;
1960         trace_percpu_sirq_buffer = sirq_buffers;
1961         trace_percpu_irq_buffer = irq_buffers;
1962         trace_percpu_nmi_buffer = nmi_buffers;
1963
1964         return 0;
1965
1966  err_nmi:
1967         free_percpu(irq_buffers);
1968  err_irq:
1969         free_percpu(sirq_buffers);
1970  err_sirq:
1971         free_percpu(buffers);
1972  err_warn:
1973         WARN(1, "Could not allocate percpu trace_printk buffer");
1974         return -ENOMEM;
1975 }
1976
1977 static int buffers_allocated;
1978
1979 void trace_printk_init_buffers(void)
1980 {
1981         if (buffers_allocated)
1982                 return;
1983
1984         if (alloc_percpu_trace_buffer())
1985                 return;
1986
1987         pr_info("ftrace: Allocated trace_printk buffers\n");
1988
1989         /* Expand the buffers to set size */
1990         tracing_update_buffers();
1991
1992         buffers_allocated = 1;
1993
1994         /*
1995          * trace_printk_init_buffers() can be called by modules.
1996          * If that happens, then we need to start cmdline recording
1997          * directly here. If the global_trace.buffer is already
1998          * allocated here, then this was called by module code.
1999          */
2000         if (global_trace.trace_buffer.buffer)
2001                 tracing_start_cmdline_record();
2002 }
2003
2004 void trace_printk_start_comm(void)
2005 {
2006         /* Start tracing comms if trace printk is set */
2007         if (!buffers_allocated)
2008                 return;
2009         tracing_start_cmdline_record();
2010 }
2011
2012 static void trace_printk_start_stop_comm(int enabled)
2013 {
2014         if (!buffers_allocated)
2015                 return;
2016
2017         if (enabled)
2018                 tracing_start_cmdline_record();
2019         else
2020                 tracing_stop_cmdline_record();
2021 }
2022
2023 /**
2024  * trace_vbprintk - write binary msg to tracing buffer
2025  *
2026  */
2027 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2028 {
2029         struct ftrace_event_call *call = &event_bprint;
2030         struct ring_buffer_event *event;
2031         struct ring_buffer *buffer;
2032         struct trace_array *tr = &global_trace;
2033         struct bprint_entry *entry;
2034         unsigned long flags;
2035         char *tbuffer;
2036         int len = 0, size, pc;
2037
2038         if (unlikely(tracing_selftest_running || tracing_disabled))
2039                 return 0;
2040
2041         /* Don't pollute graph traces with trace_vprintk internals */
2042         pause_graph_tracing();
2043
2044         pc = preempt_count();
2045         preempt_disable_notrace();
2046
2047         tbuffer = get_trace_buf();
2048         if (!tbuffer) {
2049                 len = 0;
2050                 goto out;
2051         }
2052
2053         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2054
2055         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2056                 goto out;
2057
2058         local_save_flags(flags);
2059         size = sizeof(*entry) + sizeof(u32) * len;
2060         buffer = tr->trace_buffer.buffer;
2061         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2062                                           flags, pc);
2063         if (!event)
2064                 goto out;
2065         entry = ring_buffer_event_data(event);
2066         entry->ip                       = ip;
2067         entry->fmt                      = fmt;
2068
2069         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2070         if (!call_filter_check_discard(call, entry, buffer, event)) {
2071                 __buffer_unlock_commit(buffer, event);
2072                 ftrace_trace_stack(buffer, flags, 6, pc);
2073         }
2074
2075 out:
2076         preempt_enable_notrace();
2077         unpause_graph_tracing();
2078
2079         return len;
2080 }
2081 EXPORT_SYMBOL_GPL(trace_vbprintk);
2082
2083 static int
2084 __trace_array_vprintk(struct ring_buffer *buffer,
2085                       unsigned long ip, const char *fmt, va_list args)
2086 {
2087         struct ftrace_event_call *call = &event_print;
2088         struct ring_buffer_event *event;
2089         int len = 0, size, pc;
2090         struct print_entry *entry;
2091         unsigned long flags;
2092         char *tbuffer;
2093
2094         if (tracing_disabled || tracing_selftest_running)
2095                 return 0;
2096
2097         /* Don't pollute graph traces with trace_vprintk internals */
2098         pause_graph_tracing();
2099
2100         pc = preempt_count();
2101         preempt_disable_notrace();
2102
2103
2104         tbuffer = get_trace_buf();
2105         if (!tbuffer) {
2106                 len = 0;
2107                 goto out;
2108         }
2109
2110         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2111         if (len > TRACE_BUF_SIZE)
2112                 goto out;
2113
2114         local_save_flags(flags);
2115         size = sizeof(*entry) + len + 1;
2116         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2117                                           flags, pc);
2118         if (!event)
2119                 goto out;
2120         entry = ring_buffer_event_data(event);
2121         entry->ip = ip;
2122
2123         memcpy(&entry->buf, tbuffer, len);
2124         entry->buf[len] = '\0';
2125         if (!call_filter_check_discard(call, entry, buffer, event)) {
2126                 __buffer_unlock_commit(buffer, event);
2127                 ftrace_trace_stack(buffer, flags, 6, pc);
2128         }
2129  out:
2130         preempt_enable_notrace();
2131         unpause_graph_tracing();
2132
2133         return len;
2134 }
2135
2136 int trace_array_vprintk(struct trace_array *tr,
2137                         unsigned long ip, const char *fmt, va_list args)
2138 {
2139         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2140 }
2141
2142 int trace_array_printk(struct trace_array *tr,
2143                        unsigned long ip, const char *fmt, ...)
2144 {
2145         int ret;
2146         va_list ap;
2147
2148         if (!(trace_flags & TRACE_ITER_PRINTK))
2149                 return 0;
2150
2151         va_start(ap, fmt);
2152         ret = trace_array_vprintk(tr, ip, fmt, ap);
2153         va_end(ap);
2154         return ret;
2155 }
2156
2157 int trace_array_printk_buf(struct ring_buffer *buffer,
2158                            unsigned long ip, const char *fmt, ...)
2159 {
2160         int ret;
2161         va_list ap;
2162
2163         if (!(trace_flags & TRACE_ITER_PRINTK))
2164                 return 0;
2165
2166         va_start(ap, fmt);
2167         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2168         va_end(ap);
2169         return ret;
2170 }
2171
2172 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2173 {
2174         return trace_array_vprintk(&global_trace, ip, fmt, args);
2175 }
2176 EXPORT_SYMBOL_GPL(trace_vprintk);
2177
2178 static void trace_iterator_increment(struct trace_iterator *iter)
2179 {
2180         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2181
2182         iter->idx++;
2183         if (buf_iter)
2184                 ring_buffer_read(buf_iter, NULL);
2185 }
2186
2187 static struct trace_entry *
2188 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2189                 unsigned long *lost_events)
2190 {
2191         struct ring_buffer_event *event;
2192         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2193
2194         if (buf_iter)
2195                 event = ring_buffer_iter_peek(buf_iter, ts);
2196         else
2197                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2198                                          lost_events);
2199
2200         if (event) {
2201                 iter->ent_size = ring_buffer_event_length(event);
2202                 return ring_buffer_event_data(event);
2203         }
2204         iter->ent_size = 0;
2205         return NULL;
2206 }
2207
2208 static struct trace_entry *
2209 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2210                   unsigned long *missing_events, u64 *ent_ts)
2211 {
2212         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2213         struct trace_entry *ent, *next = NULL;
2214         unsigned long lost_events = 0, next_lost = 0;
2215         int cpu_file = iter->cpu_file;
2216         u64 next_ts = 0, ts;
2217         int next_cpu = -1;
2218         int next_size = 0;
2219         int cpu;
2220
2221         /*
2222          * If we are in a per_cpu trace file, don't bother by iterating over
2223          * all cpu and peek directly.
2224          */
2225         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2226                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2227                         return NULL;
2228                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2229                 if (ent_cpu)
2230                         *ent_cpu = cpu_file;
2231
2232                 return ent;
2233         }
2234
2235         for_each_tracing_cpu(cpu) {
2236
2237                 if (ring_buffer_empty_cpu(buffer, cpu))
2238                         continue;
2239
2240                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2241
2242                 /*
2243                  * Pick the entry with the smallest timestamp:
2244                  */
2245                 if (ent && (!next || ts < next_ts)) {
2246                         next = ent;
2247                         next_cpu = cpu;
2248                         next_ts = ts;
2249                         next_lost = lost_events;
2250                         next_size = iter->ent_size;
2251                 }
2252         }
2253
2254         iter->ent_size = next_size;
2255
2256         if (ent_cpu)
2257                 *ent_cpu = next_cpu;
2258
2259         if (ent_ts)
2260                 *ent_ts = next_ts;
2261
2262         if (missing_events)
2263                 *missing_events = next_lost;
2264
2265         return next;
2266 }
2267
2268 /* Find the next real entry, without updating the iterator itself */
2269 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2270                                           int *ent_cpu, u64 *ent_ts)
2271 {
2272         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2273 }
2274
2275 /* Find the next real entry, and increment the iterator to the next entry */
2276 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2277 {
2278         iter->ent = __find_next_entry(iter, &iter->cpu,
2279                                       &iter->lost_events, &iter->ts);
2280
2281         if (iter->ent)
2282                 trace_iterator_increment(iter);
2283
2284         return iter->ent ? iter : NULL;
2285 }
2286
2287 static void trace_consume(struct trace_iterator *iter)
2288 {
2289         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2290                             &iter->lost_events);
2291 }
2292
2293 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2294 {
2295         struct trace_iterator *iter = m->private;
2296         int i = (int)*pos;
2297         void *ent;
2298
2299         WARN_ON_ONCE(iter->leftover);
2300
2301         (*pos)++;
2302
2303         /* can't go backwards */
2304         if (iter->idx > i)
2305                 return NULL;
2306
2307         if (iter->idx < 0)
2308                 ent = trace_find_next_entry_inc(iter);
2309         else
2310                 ent = iter;
2311
2312         while (ent && iter->idx < i)
2313                 ent = trace_find_next_entry_inc(iter);
2314
2315         iter->pos = *pos;
2316
2317         return ent;
2318 }
2319
2320 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2321 {
2322         struct ring_buffer_event *event;
2323         struct ring_buffer_iter *buf_iter;
2324         unsigned long entries = 0;
2325         u64 ts;
2326
2327         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2328
2329         buf_iter = trace_buffer_iter(iter, cpu);
2330         if (!buf_iter)
2331                 return;
2332
2333         ring_buffer_iter_reset(buf_iter);
2334
2335         /*
2336          * We could have the case with the max latency tracers
2337          * that a reset never took place on a cpu. This is evident
2338          * by the timestamp being before the start of the buffer.
2339          */
2340         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2341                 if (ts >= iter->trace_buffer->time_start)
2342                         break;
2343                 entries++;
2344                 ring_buffer_read(buf_iter, NULL);
2345         }
2346
2347         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2348 }
2349
2350 /*
2351  * The current tracer is copied to avoid a global locking
2352  * all around.
2353  */
2354 static void *s_start(struct seq_file *m, loff_t *pos)
2355 {
2356         struct trace_iterator *iter = m->private;
2357         struct trace_array *tr = iter->tr;
2358         int cpu_file = iter->cpu_file;
2359         void *p = NULL;
2360         loff_t l = 0;
2361         int cpu;
2362
2363         /*
2364          * copy the tracer to avoid using a global lock all around.
2365          * iter->trace is a copy of current_trace, the pointer to the
2366          * name may be used instead of a strcmp(), as iter->trace->name
2367          * will point to the same string as current_trace->name.
2368          */
2369         mutex_lock(&trace_types_lock);
2370         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2371                 *iter->trace = *tr->current_trace;
2372         mutex_unlock(&trace_types_lock);
2373
2374 #ifdef CONFIG_TRACER_MAX_TRACE
2375         if (iter->snapshot && iter->trace->use_max_tr)
2376                 return ERR_PTR(-EBUSY);
2377 #endif
2378
2379         if (!iter->snapshot)
2380                 atomic_inc(&trace_record_cmdline_disabled);
2381
2382         if (*pos != iter->pos) {
2383                 iter->ent = NULL;
2384                 iter->cpu = 0;
2385                 iter->idx = -1;
2386
2387                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2388                         for_each_tracing_cpu(cpu)
2389                                 tracing_iter_reset(iter, cpu);
2390                 } else
2391                         tracing_iter_reset(iter, cpu_file);
2392
2393                 iter->leftover = 0;
2394                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2395                         ;
2396
2397         } else {
2398                 /*
2399                  * If we overflowed the seq_file before, then we want
2400                  * to just reuse the trace_seq buffer again.
2401                  */
2402                 if (iter->leftover)
2403                         p = iter;
2404                 else {
2405                         l = *pos - 1;
2406                         p = s_next(m, p, &l);
2407                 }
2408         }
2409
2410         trace_event_read_lock();
2411         trace_access_lock(cpu_file);
2412         return p;
2413 }
2414
2415 static void s_stop(struct seq_file *m, void *p)
2416 {
2417         struct trace_iterator *iter = m->private;
2418
2419 #ifdef CONFIG_TRACER_MAX_TRACE
2420         if (iter->snapshot && iter->trace->use_max_tr)
2421                 return;
2422 #endif
2423
2424         if (!iter->snapshot)
2425                 atomic_dec(&trace_record_cmdline_disabled);
2426
2427         trace_access_unlock(iter->cpu_file);
2428         trace_event_read_unlock();
2429 }
2430
2431 static void
2432 get_total_entries(struct trace_buffer *buf,
2433                   unsigned long *total, unsigned long *entries)
2434 {
2435         unsigned long count;
2436         int cpu;
2437
2438         *total = 0;
2439         *entries = 0;
2440
2441         for_each_tracing_cpu(cpu) {
2442                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2443                 /*
2444                  * If this buffer has skipped entries, then we hold all
2445                  * entries for the trace and we need to ignore the
2446                  * ones before the time stamp.
2447                  */
2448                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2449                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2450                         /* total is the same as the entries */
2451                         *total += count;
2452                 } else
2453                         *total += count +
2454                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2455                 *entries += count;
2456         }
2457 }
2458
2459 static void print_lat_help_header(struct seq_file *m)
2460 {
2461         seq_puts(m, "#                  _------=> CPU#            \n");
2462         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2463         seq_puts(m, "#                | / _----=> need-resched    \n");
2464         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2465         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2466         seq_puts(m, "#                |||| /     delay             \n");
2467         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2468         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2469 }
2470
2471 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2472 {
2473         unsigned long total;
2474         unsigned long entries;
2475
2476         get_total_entries(buf, &total, &entries);
2477         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2478                    entries, total, num_online_cpus());
2479         seq_puts(m, "#\n");
2480 }
2481
2482 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2483 {
2484         print_event_info(buf, m);
2485         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2486         seq_puts(m, "#              | |       |          |         |\n");
2487 }
2488
2489 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2490 {
2491         print_event_info(buf, m);
2492         seq_puts(m, "#                              _-----=> irqs-off\n");
2493         seq_puts(m, "#                             / _----=> need-resched\n");
2494         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2495         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2496         seq_puts(m, "#                            ||| /     delay\n");
2497         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2498         seq_puts(m, "#              | |       |   ||||       |         |\n");
2499 }
2500
2501 void
2502 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2503 {
2504         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2505         struct trace_buffer *buf = iter->trace_buffer;
2506         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2507         struct tracer *type = iter->trace;
2508         unsigned long entries;
2509         unsigned long total;
2510         const char *name = "preemption";
2511
2512         name = type->name;
2513
2514         get_total_entries(buf, &total, &entries);
2515
2516         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2517                    name, UTS_RELEASE);
2518         seq_puts(m, "# -----------------------------------"
2519                  "---------------------------------\n");
2520         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2521                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2522                    nsecs_to_usecs(data->saved_latency),
2523                    entries,
2524                    total,
2525                    buf->cpu,
2526 #if defined(CONFIG_PREEMPT_NONE)
2527                    "server",
2528 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2529                    "desktop",
2530 #elif defined(CONFIG_PREEMPT)
2531                    "preempt",
2532 #else
2533                    "unknown",
2534 #endif
2535                    /* These are reserved for later use */
2536                    0, 0, 0, 0);
2537 #ifdef CONFIG_SMP
2538         seq_printf(m, " #P:%d)\n", num_online_cpus());
2539 #else
2540         seq_puts(m, ")\n");
2541 #endif
2542         seq_puts(m, "#    -----------------\n");
2543         seq_printf(m, "#    | task: %.16s-%d "
2544                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2545                    data->comm, data->pid,
2546                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2547                    data->policy, data->rt_priority);
2548         seq_puts(m, "#    -----------------\n");
2549
2550         if (data->critical_start) {
2551                 seq_puts(m, "#  => started at: ");
2552                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2553                 trace_print_seq(m, &iter->seq);
2554                 seq_puts(m, "\n#  => ended at:   ");
2555                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2556                 trace_print_seq(m, &iter->seq);
2557                 seq_puts(m, "\n#\n");
2558         }
2559
2560         seq_puts(m, "#\n");
2561 }
2562
2563 static void test_cpu_buff_start(struct trace_iterator *iter)
2564 {
2565         struct trace_seq *s = &iter->seq;
2566
2567         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2568                 return;
2569
2570         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2571                 return;
2572
2573         if (cpumask_test_cpu(iter->cpu, iter->started))
2574                 return;
2575
2576         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2577                 return;
2578
2579         cpumask_set_cpu(iter->cpu, iter->started);
2580
2581         /* Don't print started cpu buffer for the first entry of the trace */
2582         if (iter->idx > 1)
2583                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2584                                 iter->cpu);
2585 }
2586
2587 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2588 {
2589         struct trace_seq *s = &iter->seq;
2590         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2591         struct trace_entry *entry;
2592         struct trace_event *event;
2593
2594         entry = iter->ent;
2595
2596         test_cpu_buff_start(iter);
2597
2598         event = ftrace_find_event(entry->type);
2599
2600         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2601                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2602                         if (!trace_print_lat_context(iter))
2603                                 goto partial;
2604                 } else {
2605                         if (!trace_print_context(iter))
2606                                 goto partial;
2607                 }
2608         }
2609
2610         if (event)
2611                 return event->funcs->trace(iter, sym_flags, event);
2612
2613         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2614                 goto partial;
2615
2616         return TRACE_TYPE_HANDLED;
2617 partial:
2618         return TRACE_TYPE_PARTIAL_LINE;
2619 }
2620
2621 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2622 {
2623         struct trace_seq *s = &iter->seq;
2624         struct trace_entry *entry;
2625         struct trace_event *event;
2626
2627         entry = iter->ent;
2628
2629         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2630                 if (!trace_seq_printf(s, "%d %d %llu ",
2631                                       entry->pid, iter->cpu, iter->ts))
2632                         goto partial;
2633         }
2634
2635         event = ftrace_find_event(entry->type);
2636         if (event)
2637                 return event->funcs->raw(iter, 0, event);
2638
2639         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2640                 goto partial;
2641
2642         return TRACE_TYPE_HANDLED;
2643 partial:
2644         return TRACE_TYPE_PARTIAL_LINE;
2645 }
2646
2647 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2648 {
2649         struct trace_seq *s = &iter->seq;
2650         unsigned char newline = '\n';
2651         struct trace_entry *entry;
2652         struct trace_event *event;
2653
2654         entry = iter->ent;
2655
2656         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2657                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2658                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2659                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2660         }
2661
2662         event = ftrace_find_event(entry->type);
2663         if (event) {
2664                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2665                 if (ret != TRACE_TYPE_HANDLED)
2666                         return ret;
2667         }
2668
2669         SEQ_PUT_FIELD_RET(s, newline);
2670
2671         return TRACE_TYPE_HANDLED;
2672 }
2673
2674 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2675 {
2676         struct trace_seq *s = &iter->seq;
2677         struct trace_entry *entry;
2678         struct trace_event *event;
2679
2680         entry = iter->ent;
2681
2682         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2683                 SEQ_PUT_FIELD_RET(s, entry->pid);
2684                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2685                 SEQ_PUT_FIELD_RET(s, iter->ts);
2686         }
2687
2688         event = ftrace_find_event(entry->type);
2689         return event ? event->funcs->binary(iter, 0, event) :
2690                 TRACE_TYPE_HANDLED;
2691 }
2692
2693 int trace_empty(struct trace_iterator *iter)
2694 {
2695         struct ring_buffer_iter *buf_iter;
2696         int cpu;
2697
2698         /* If we are looking at one CPU buffer, only check that one */
2699         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2700                 cpu = iter->cpu_file;
2701                 buf_iter = trace_buffer_iter(iter, cpu);
2702                 if (buf_iter) {
2703                         if (!ring_buffer_iter_empty(buf_iter))
2704                                 return 0;
2705                 } else {
2706                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2707                                 return 0;
2708                 }
2709                 return 1;
2710         }
2711
2712         for_each_tracing_cpu(cpu) {
2713                 buf_iter = trace_buffer_iter(iter, cpu);
2714                 if (buf_iter) {
2715                         if (!ring_buffer_iter_empty(buf_iter))
2716                                 return 0;
2717                 } else {
2718                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2719                                 return 0;
2720                 }
2721         }
2722
2723         return 1;
2724 }
2725
2726 /*  Called with trace_event_read_lock() held. */
2727 enum print_line_t print_trace_line(struct trace_iterator *iter)
2728 {
2729         enum print_line_t ret;
2730
2731         if (iter->lost_events &&
2732             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2733                                  iter->cpu, iter->lost_events))
2734                 return TRACE_TYPE_PARTIAL_LINE;
2735
2736         if (iter->trace && iter->trace->print_line) {
2737                 ret = iter->trace->print_line(iter);
2738                 if (ret != TRACE_TYPE_UNHANDLED)
2739                         return ret;
2740         }
2741
2742         if (iter->ent->type == TRACE_BPUTS &&
2743                         trace_flags & TRACE_ITER_PRINTK &&
2744                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2745                 return trace_print_bputs_msg_only(iter);
2746
2747         if (iter->ent->type == TRACE_BPRINT &&
2748                         trace_flags & TRACE_ITER_PRINTK &&
2749                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2750                 return trace_print_bprintk_msg_only(iter);
2751
2752         if (iter->ent->type == TRACE_PRINT &&
2753                         trace_flags & TRACE_ITER_PRINTK &&
2754                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2755                 return trace_print_printk_msg_only(iter);
2756
2757         if (trace_flags & TRACE_ITER_BIN)
2758                 return print_bin_fmt(iter);
2759
2760         if (trace_flags & TRACE_ITER_HEX)
2761                 return print_hex_fmt(iter);
2762
2763         if (trace_flags & TRACE_ITER_RAW)
2764                 return print_raw_fmt(iter);
2765
2766         return print_trace_fmt(iter);
2767 }
2768
2769 void trace_latency_header(struct seq_file *m)
2770 {
2771         struct trace_iterator *iter = m->private;
2772
2773         /* print nothing if the buffers are empty */
2774         if (trace_empty(iter))
2775                 return;
2776
2777         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2778                 print_trace_header(m, iter);
2779
2780         if (!(trace_flags & TRACE_ITER_VERBOSE))
2781                 print_lat_help_header(m);
2782 }
2783
2784 void trace_default_header(struct seq_file *m)
2785 {
2786         struct trace_iterator *iter = m->private;
2787
2788         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2789                 return;
2790
2791         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2792                 /* print nothing if the buffers are empty */
2793                 if (trace_empty(iter))
2794                         return;
2795                 print_trace_header(m, iter);
2796                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2797                         print_lat_help_header(m);
2798         } else {
2799                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2800                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2801                                 print_func_help_header_irq(iter->trace_buffer, m);
2802                         else
2803                                 print_func_help_header(iter->trace_buffer, m);
2804                 }
2805         }
2806 }
2807
2808 static void test_ftrace_alive(struct seq_file *m)
2809 {
2810         if (!ftrace_is_dead())
2811                 return;
2812         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2813         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2814 }
2815
2816 #ifdef CONFIG_TRACER_MAX_TRACE
2817 static void show_snapshot_main_help(struct seq_file *m)
2818 {
2819         seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2820         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2821         seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2822         seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
2823         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2824         seq_printf(m, "#                       is not a '0' or '1')\n");
2825 }
2826
2827 static void show_snapshot_percpu_help(struct seq_file *m)
2828 {
2829         seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2830 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2831         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2832         seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2833 #else
2834         seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2835         seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2836 #endif
2837         seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2838         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2839         seq_printf(m, "#                       is not a '0' or '1')\n");
2840 }
2841
2842 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2843 {
2844         if (iter->tr->allocated_snapshot)
2845                 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2846         else
2847                 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2848
2849         seq_printf(m, "# Snapshot commands:\n");
2850         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2851                 show_snapshot_main_help(m);
2852         else
2853                 show_snapshot_percpu_help(m);
2854 }
2855 #else
2856 /* Should never be called */
2857 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2858 #endif
2859
2860 static int s_show(struct seq_file *m, void *v)
2861 {
2862         struct trace_iterator *iter = v;
2863         int ret;
2864
2865         if (iter->ent == NULL) {
2866                 if (iter->tr) {
2867                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2868                         seq_puts(m, "#\n");
2869                         test_ftrace_alive(m);
2870                 }
2871                 if (iter->snapshot && trace_empty(iter))
2872                         print_snapshot_help(m, iter);
2873                 else if (iter->trace && iter->trace->print_header)
2874                         iter->trace->print_header(m);
2875                 else
2876                         trace_default_header(m);
2877
2878         } else if (iter->leftover) {
2879                 /*
2880                  * If we filled the seq_file buffer earlier, we
2881                  * want to just show it now.
2882                  */
2883                 ret = trace_print_seq(m, &iter->seq);
2884
2885                 /* ret should this time be zero, but you never know */
2886                 iter->leftover = ret;
2887
2888         } else {
2889                 print_trace_line(iter);
2890                 ret = trace_print_seq(m, &iter->seq);
2891                 /*
2892                  * If we overflow the seq_file buffer, then it will
2893                  * ask us for this data again at start up.
2894                  * Use that instead.
2895                  *  ret is 0 if seq_file write succeeded.
2896                  *        -1 otherwise.
2897                  */
2898                 iter->leftover = ret;
2899         }
2900
2901         return 0;
2902 }
2903
2904 /*
2905  * Should be used after trace_array_get(), trace_types_lock
2906  * ensures that i_cdev was already initialized.
2907  */
2908 static inline int tracing_get_cpu(struct inode *inode)
2909 {
2910         if (inode->i_cdev) /* See trace_create_cpu_file() */
2911                 return (long)inode->i_cdev - 1;
2912         return RING_BUFFER_ALL_CPUS;
2913 }
2914
2915 static const struct seq_operations tracer_seq_ops = {
2916         .start          = s_start,
2917         .next           = s_next,
2918         .stop           = s_stop,
2919         .show           = s_show,
2920 };
2921
2922 static struct trace_iterator *
2923 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2924 {
2925         struct trace_array *tr = inode->i_private;
2926         struct trace_iterator *iter;
2927         int cpu;
2928
2929         if (tracing_disabled)
2930                 return ERR_PTR(-ENODEV);
2931
2932         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2933         if (!iter)
2934                 return ERR_PTR(-ENOMEM);
2935
2936         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2937                                     GFP_KERNEL);
2938         if (!iter->buffer_iter)
2939                 goto release;
2940
2941         /*
2942          * We make a copy of the current tracer to avoid concurrent
2943          * changes on it while we are reading.
2944          */
2945         mutex_lock(&trace_types_lock);
2946         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2947         if (!iter->trace)
2948                 goto fail;
2949
2950         *iter->trace = *tr->current_trace;
2951
2952         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2953                 goto fail;
2954
2955         iter->tr = tr;
2956
2957 #ifdef CONFIG_TRACER_MAX_TRACE
2958         /* Currently only the top directory has a snapshot */
2959         if (tr->current_trace->print_max || snapshot)
2960                 iter->trace_buffer = &tr->max_buffer;
2961         else
2962 #endif
2963                 iter->trace_buffer = &tr->trace_buffer;
2964         iter->snapshot = snapshot;
2965         iter->pos = -1;
2966         iter->cpu_file = tracing_get_cpu(inode);
2967         mutex_init(&iter->mutex);
2968
2969         /* Notify the tracer early; before we stop tracing. */
2970         if (iter->trace && iter->trace->open)
2971                 iter->trace->open(iter);
2972
2973         /* Annotate start of buffers if we had overruns */
2974         if (ring_buffer_overruns(iter->trace_buffer->buffer))
2975                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2976
2977         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
2978         if (trace_clocks[tr->clock_id].in_ns)
2979                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
2980
2981         /* stop the trace while dumping if we are not opening "snapshot" */
2982         if (!iter->snapshot)
2983                 tracing_stop_tr(tr);
2984
2985         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
2986                 for_each_tracing_cpu(cpu) {
2987                         iter->buffer_iter[cpu] =
2988                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2989                 }
2990                 ring_buffer_read_prepare_sync();
2991                 for_each_tracing_cpu(cpu) {
2992                         ring_buffer_read_start(iter->buffer_iter[cpu]);
2993                         tracing_iter_reset(iter, cpu);
2994                 }
2995         } else {
2996                 cpu = iter->cpu_file;
2997                 iter->buffer_iter[cpu] =
2998                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
2999                 ring_buffer_read_prepare_sync();
3000                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3001                 tracing_iter_reset(iter, cpu);
3002         }
3003
3004         mutex_unlock(&trace_types_lock);
3005
3006         return iter;
3007
3008  fail:
3009         mutex_unlock(&trace_types_lock);
3010         kfree(iter->trace);
3011         kfree(iter->buffer_iter);
3012 release:
3013         seq_release_private(inode, file);
3014         return ERR_PTR(-ENOMEM);
3015 }
3016
3017 int tracing_open_generic(struct inode *inode, struct file *filp)
3018 {
3019         if (tracing_disabled)
3020                 return -ENODEV;
3021
3022         filp->private_data = inode->i_private;
3023         return 0;
3024 }
3025
3026 bool tracing_is_disabled(void)
3027 {
3028         return (tracing_disabled) ? true: false;
3029 }
3030
3031 /*
3032  * Open and update trace_array ref count.
3033  * Must have the current trace_array passed to it.
3034  */
3035 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3036 {
3037         struct trace_array *tr = inode->i_private;
3038
3039         if (tracing_disabled)
3040                 return -ENODEV;
3041
3042         if (trace_array_get(tr) < 0)
3043                 return -ENODEV;
3044
3045         filp->private_data = inode->i_private;
3046
3047         return 0;
3048 }
3049
3050 static int tracing_release(struct inode *inode, struct file *file)
3051 {
3052         struct trace_array *tr = inode->i_private;
3053         struct seq_file *m = file->private_data;
3054         struct trace_iterator *iter;
3055         int cpu;
3056
3057         if (!(file->f_mode & FMODE_READ)) {
3058                 trace_array_put(tr);
3059                 return 0;
3060         }
3061
3062         /* Writes do not use seq_file */
3063         iter = m->private;
3064         mutex_lock(&trace_types_lock);
3065
3066         for_each_tracing_cpu(cpu) {
3067                 if (iter->buffer_iter[cpu])
3068                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3069         }
3070
3071         if (iter->trace && iter->trace->close)
3072                 iter->trace->close(iter);
3073
3074         if (!iter->snapshot)
3075                 /* reenable tracing if it was previously enabled */
3076                 tracing_start_tr(tr);
3077
3078         __trace_array_put(tr);
3079
3080         mutex_unlock(&trace_types_lock);
3081
3082         mutex_destroy(&iter->mutex);
3083         free_cpumask_var(iter->started);
3084         kfree(iter->trace);
3085         kfree(iter->buffer_iter);
3086         seq_release_private(inode, file);
3087
3088         return 0;
3089 }
3090
3091 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3092 {
3093         struct trace_array *tr = inode->i_private;
3094
3095         trace_array_put(tr);
3096         return 0;
3097 }
3098
3099 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3100 {
3101         struct trace_array *tr = inode->i_private;
3102
3103         trace_array_put(tr);
3104
3105         return single_release(inode, file);
3106 }
3107
3108 static int tracing_open(struct inode *inode, struct file *file)
3109 {
3110         struct trace_array *tr = inode->i_private;
3111         struct trace_iterator *iter;
3112         int ret = 0;
3113
3114         if (trace_array_get(tr) < 0)
3115                 return -ENODEV;
3116
3117         /* If this file was open for write, then erase contents */
3118         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3119                 int cpu = tracing_get_cpu(inode);
3120
3121                 if (cpu == RING_BUFFER_ALL_CPUS)
3122                         tracing_reset_online_cpus(&tr->trace_buffer);
3123                 else
3124                         tracing_reset(&tr->trace_buffer, cpu);
3125         }
3126
3127         if (file->f_mode & FMODE_READ) {
3128                 iter = __tracing_open(inode, file, false);
3129                 if (IS_ERR(iter))
3130                         ret = PTR_ERR(iter);
3131                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3132                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3133         }
3134
3135         if (ret < 0)
3136                 trace_array_put(tr);
3137
3138         return ret;
3139 }
3140
3141 static void *
3142 t_next(struct seq_file *m, void *v, loff_t *pos)
3143 {
3144         struct tracer *t = v;
3145
3146         (*pos)++;
3147
3148         if (t)
3149                 t = t->next;
3150
3151         return t;
3152 }
3153
3154 static void *t_start(struct seq_file *m, loff_t *pos)
3155 {
3156         struct tracer *t;
3157         loff_t l = 0;
3158
3159         mutex_lock(&trace_types_lock);
3160         for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
3161                 ;
3162
3163         return t;
3164 }
3165
3166 static void t_stop(struct seq_file *m, void *p)
3167 {
3168         mutex_unlock(&trace_types_lock);
3169 }
3170
3171 static int t_show(struct seq_file *m, void *v)
3172 {
3173         struct tracer *t = v;
3174
3175         if (!t)
3176                 return 0;
3177
3178         seq_printf(m, "%s", t->name);
3179         if (t->next)
3180                 seq_putc(m, ' ');
3181         else
3182                 seq_putc(m, '\n');
3183
3184         return 0;
3185 }
3186
3187 static const struct seq_operations show_traces_seq_ops = {
3188         .start          = t_start,
3189         .next           = t_next,
3190         .stop           = t_stop,
3191         .show           = t_show,
3192 };
3193
3194 static int show_traces_open(struct inode *inode, struct file *file)
3195 {
3196         if (tracing_disabled)
3197                 return -ENODEV;
3198
3199         return seq_open(file, &show_traces_seq_ops);
3200 }
3201
3202 static ssize_t
3203 tracing_write_stub(struct file *filp, const char __user *ubuf,
3204                    size_t count, loff_t *ppos)
3205 {
3206         return count;
3207 }
3208
3209 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3210 {
3211         int ret;
3212
3213         if (file->f_mode & FMODE_READ)
3214                 ret = seq_lseek(file, offset, whence);
3215         else
3216                 file->f_pos = ret = 0;
3217
3218         return ret;
3219 }
3220
3221 static const struct file_operations tracing_fops = {
3222         .open           = tracing_open,
3223         .read           = seq_read,
3224         .write          = tracing_write_stub,
3225         .llseek         = tracing_lseek,
3226         .release        = tracing_release,
3227 };
3228
3229 static const struct file_operations show_traces_fops = {
3230         .open           = show_traces_open,
3231         .read           = seq_read,
3232         .release        = seq_release,
3233         .llseek         = seq_lseek,
3234 };
3235
3236 /*
3237  * The tracer itself will not take this lock, but still we want
3238  * to provide a consistent cpumask to user-space:
3239  */
3240 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3241
3242 /*
3243  * Temporary storage for the character representation of the
3244  * CPU bitmask (and one more byte for the newline):
3245  */
3246 static char mask_str[NR_CPUS + 1];
3247
3248 static ssize_t
3249 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3250                      size_t count, loff_t *ppos)
3251 {
3252         struct trace_array *tr = file_inode(filp)->i_private;
3253         int len;
3254
3255         mutex_lock(&tracing_cpumask_update_lock);
3256
3257         len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3258         if (count - len < 2) {
3259                 count = -EINVAL;
3260                 goto out_err;
3261         }
3262         len += sprintf(mask_str + len, "\n");
3263         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3264
3265 out_err:
3266         mutex_unlock(&tracing_cpumask_update_lock);
3267
3268         return count;
3269 }
3270
3271 static ssize_t
3272 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3273                       size_t count, loff_t *ppos)
3274 {
3275         struct trace_array *tr = file_inode(filp)->i_private;
3276         cpumask_var_t tracing_cpumask_new;
3277         int err, cpu;
3278
3279         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3280                 return -ENOMEM;
3281
3282         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3283         if (err)
3284                 goto err_unlock;
3285
3286         mutex_lock(&tracing_cpumask_update_lock);
3287
3288         local_irq_disable();
3289         arch_spin_lock(&ftrace_max_lock);
3290         for_each_tracing_cpu(cpu) {
3291                 /*
3292                  * Increase/decrease the disabled counter if we are
3293                  * about to flip a bit in the cpumask:
3294                  */
3295                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3296                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3297                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3298                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3299                 }
3300                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3301                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3302                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3303                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3304                 }
3305         }
3306         arch_spin_unlock(&ftrace_max_lock);
3307         local_irq_enable();
3308
3309         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3310
3311         mutex_unlock(&tracing_cpumask_update_lock);
3312         free_cpumask_var(tracing_cpumask_new);
3313
3314         return count;
3315
3316 err_unlock:
3317         free_cpumask_var(tracing_cpumask_new);
3318
3319         return err;
3320 }
3321
3322 static const struct file_operations tracing_cpumask_fops = {
3323         .open           = tracing_open_generic_tr,
3324         .read           = tracing_cpumask_read,
3325         .write          = tracing_cpumask_write,
3326         .release        = tracing_release_generic_tr,
3327         .llseek         = generic_file_llseek,
3328 };
3329
3330 static int tracing_trace_options_show(struct seq_file *m, void *v)
3331 {
3332         struct tracer_opt *trace_opts;
3333         struct trace_array *tr = m->private;
3334         u32 tracer_flags;
3335         int i;
3336
3337         mutex_lock(&trace_types_lock);
3338         tracer_flags = tr->current_trace->flags->val;
3339         trace_opts = tr->current_trace->flags->opts;
3340
3341         for (i = 0; trace_options[i]; i++) {
3342                 if (trace_flags & (1 << i))
3343                         seq_printf(m, "%s\n", trace_options[i]);
3344                 else
3345                         seq_printf(m, "no%s\n", trace_options[i]);
3346         }
3347
3348         for (i = 0; trace_opts[i].name; i++) {
3349                 if (tracer_flags & trace_opts[i].bit)
3350                         seq_printf(m, "%s\n", trace_opts[i].name);
3351                 else
3352                         seq_printf(m, "no%s\n", trace_opts[i].name);
3353         }
3354         mutex_unlock(&trace_types_lock);
3355
3356         return 0;
3357 }
3358
3359 static int __set_tracer_option(struct tracer *trace,
3360                                struct tracer_flags *tracer_flags,
3361                                struct tracer_opt *opts, int neg)
3362 {
3363         int ret;
3364
3365         ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
3366         if (ret)
3367                 return ret;
3368
3369         if (neg)
3370                 tracer_flags->val &= ~opts->bit;
3371         else
3372                 tracer_flags->val |= opts->bit;
3373         return 0;
3374 }
3375
3376 /* Try to assign a tracer specific option */
3377 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
3378 {
3379         struct tracer_flags *tracer_flags = trace->flags;
3380         struct tracer_opt *opts = NULL;
3381         int i;
3382
3383         for (i = 0; tracer_flags->opts[i].name; i++) {
3384                 opts = &tracer_flags->opts[i];
3385
3386                 if (strcmp(cmp, opts->name) == 0)
3387                         return __set_tracer_option(trace, trace->flags,
3388                                                    opts, neg);
3389         }
3390
3391         return -EINVAL;
3392 }
3393
3394 /* Some tracers require overwrite to stay enabled */
3395 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3396 {
3397         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3398                 return -1;
3399
3400         return 0;
3401 }
3402
3403 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3404 {
3405         /* do nothing if flag is already set */
3406         if (!!(trace_flags & mask) == !!enabled)
3407                 return 0;
3408
3409         /* Give the tracer a chance to approve the change */
3410         if (tr->current_trace->flag_changed)
3411                 if (tr->current_trace->flag_changed(tr->current_trace, mask, !!enabled))
3412                         return -EINVAL;
3413
3414         if (enabled)
3415                 trace_flags |= mask;
3416         else
3417                 trace_flags &= ~mask;
3418
3419         if (mask == TRACE_ITER_RECORD_CMD)
3420                 trace_event_enable_cmd_record(enabled);
3421
3422         if (mask == TRACE_ITER_OVERWRITE) {
3423                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3424 #ifdef CONFIG_TRACER_MAX_TRACE
3425                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3426 #endif
3427         }
3428
3429         if (mask == TRACE_ITER_PRINTK)
3430                 trace_printk_start_stop_comm(enabled);
3431
3432         return 0;
3433 }
3434
3435 static int trace_set_options(struct trace_array *tr, char *option)
3436 {
3437         char *cmp;
3438         int neg = 0;
3439         int ret = -ENODEV;
3440         int i;
3441
3442         cmp = strstrip(option);
3443
3444         if (strncmp(cmp, "no", 2) == 0) {
3445                 neg = 1;
3446                 cmp += 2;
3447         }
3448
3449         mutex_lock(&trace_types_lock);
3450
3451         for (i = 0; trace_options[i]; i++) {
3452                 if (strcmp(cmp, trace_options[i]) == 0) {
3453                         ret = set_tracer_flag(tr, 1 << i, !neg);
3454                         break;
3455                 }
3456         }
3457
3458         /* If no option could be set, test the specific tracer options */
3459         if (!trace_options[i])
3460                 ret = set_tracer_option(tr->current_trace, cmp, neg);
3461
3462         mutex_unlock(&trace_types_lock);
3463
3464         return ret;
3465 }
3466
3467 static ssize_t
3468 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3469                         size_t cnt, loff_t *ppos)
3470 {
3471         struct seq_file *m = filp->private_data;
3472         struct trace_array *tr = m->private;
3473         char buf[64];
3474         int ret;
3475
3476         if (cnt >= sizeof(buf))
3477                 return -EINVAL;
3478
3479         if (copy_from_user(&buf, ubuf, cnt))
3480                 return -EFAULT;
3481
3482         buf[cnt] = 0;
3483
3484         ret = trace_set_options(tr, buf);
3485         if (ret < 0)
3486                 return ret;
3487
3488         *ppos += cnt;
3489
3490         return cnt;
3491 }
3492
3493 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3494 {
3495         struct trace_array *tr = inode->i_private;
3496         int ret;
3497
3498         if (tracing_disabled)
3499                 return -ENODEV;
3500
3501         if (trace_array_get(tr) < 0)
3502                 return -ENODEV;
3503
3504         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3505         if (ret < 0)
3506                 trace_array_put(tr);
3507
3508         return ret;
3509 }
3510
3511 static const struct file_operations tracing_iter_fops = {
3512         .open           = tracing_trace_options_open,
3513         .read           = seq_read,
3514         .llseek         = seq_lseek,
3515         .release        = tracing_single_release_tr,
3516         .write          = tracing_trace_options_write,
3517 };
3518
3519 static const char readme_msg[] =
3520         "tracing mini-HOWTO:\n\n"
3521         "# echo 0 > tracing_on : quick way to disable tracing\n"
3522         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3523         " Important files:\n"
3524         "  trace\t\t\t- The static contents of the buffer\n"
3525         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3526         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3527         "  current_tracer\t- function and latency tracers\n"
3528         "  available_tracers\t- list of configured tracers for current_tracer\n"
3529         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3530         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3531         "  trace_clock\t\t-change the clock used to order events\n"
3532         "       local:   Per cpu clock but may not be synced across CPUs\n"
3533         "      global:   Synced across CPUs but slows tracing down.\n"
3534         "     counter:   Not a clock, but just an increment\n"
3535         "      uptime:   Jiffy counter from time of boot\n"
3536         "        perf:   Same clock that perf events use\n"
3537 #ifdef CONFIG_X86_64
3538         "     x86-tsc:   TSC cycle counter\n"
3539 #endif
3540         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3541         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3542         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3543         "\t\t\t  Remove sub-buffer with rmdir\n"
3544         "  trace_options\t\t- Set format or modify how tracing happens\n"
3545         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3546         "\t\t\t  option name\n"
3547 #ifdef CONFIG_DYNAMIC_FTRACE
3548         "\n  available_filter_functions - list of functions that can be filtered on\n"
3549         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3550         "\t\t\t  functions\n"
3551         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3552         "\t     modules: Can select a group via module\n"
3553         "\t      Format: :mod:<module-name>\n"
3554         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3555         "\t    triggers: a command to perform when function is hit\n"
3556         "\t      Format: <function>:<trigger>[:count]\n"
3557         "\t     trigger: traceon, traceoff\n"
3558         "\t\t      enable_event:<system>:<event>\n"
3559         "\t\t      disable_event:<system>:<event>\n"
3560 #ifdef CONFIG_STACKTRACE
3561         "\t\t      stacktrace\n"
3562 #endif
3563 #ifdef CONFIG_TRACER_SNAPSHOT
3564         "\t\t      snapshot\n"
3565 #endif
3566         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3567         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3568         "\t     The first one will disable tracing every time do_fault is hit\n"
3569         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3570         "\t       The first time do trap is hit and it disables tracing, the\n"
3571         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3572         "\t       the counter will not decrement. It only decrements when the\n"
3573         "\t       trigger did work\n"
3574         "\t     To remove trigger without count:\n"
3575         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3576         "\t     To remove trigger with a count:\n"
3577         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3578         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3579         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3580         "\t    modules: Can select a group via module command :mod:\n"
3581         "\t    Does not accept triggers\n"
3582 #endif /* CONFIG_DYNAMIC_FTRACE */
3583 #ifdef CONFIG_FUNCTION_TRACER
3584         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3585         "\t\t    (function)\n"
3586 #endif
3587 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3588         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3589         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3590 #endif
3591 #ifdef CONFIG_TRACER_SNAPSHOT
3592         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3593         "\t\t\t  snapshot buffer. Read the contents for more\n"
3594         "\t\t\t  information\n"
3595 #endif
3596 #ifdef CONFIG_STACK_TRACER
3597         "  stack_trace\t\t- Shows the max stack trace when active\n"
3598         "  stack_max_size\t- Shows current max stack size that was traced\n"
3599         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3600         "\t\t\t  new trace)\n"
3601 #ifdef CONFIG_DYNAMIC_FTRACE
3602         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3603         "\t\t\t  traces\n"
3604 #endif
3605 #endif /* CONFIG_STACK_TRACER */
3606         "  events/\t\t- Directory containing all trace event subsystems:\n"
3607         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3608         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3609         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3610         "\t\t\t  events\n"
3611         "      filter\t\t- If set, only events passing filter are traced\n"
3612         "  events/<system>/<event>/\t- Directory containing control files for\n"
3613         "\t\t\t  <event>:\n"
3614         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3615         "      filter\t\t- If set, only events passing filter are traced\n"
3616         "      trigger\t\t- If set, a command to perform when event is hit\n"
3617         "\t    Format: <trigger>[:count][if <filter>]\n"
3618         "\t   trigger: traceon, traceoff\n"
3619         "\t            enable_event:<system>:<event>\n"
3620         "\t            disable_event:<system>:<event>\n"
3621 #ifdef CONFIG_STACKTRACE
3622         "\t\t    stacktrace\n"
3623 #endif
3624 #ifdef CONFIG_TRACER_SNAPSHOT
3625         "\t\t    snapshot\n"
3626 #endif
3627         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3628         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3629         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3630         "\t                  events/block/block_unplug/trigger\n"
3631         "\t   The first disables tracing every time block_unplug is hit.\n"
3632         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3633         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3634         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3635         "\t   Like function triggers, the counter is only decremented if it\n"
3636         "\t    enabled or disabled tracing.\n"
3637         "\t   To remove a trigger without a count:\n"
3638         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3639         "\t   To remove a trigger with a count:\n"
3640         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3641         "\t   Filters can be ignored when removing a trigger.\n"
3642 ;
3643
3644 static ssize_t
3645 tracing_readme_read(struct file *filp, char __user *ubuf,
3646                        size_t cnt, loff_t *ppos)
3647 {
3648         return simple_read_from_buffer(ubuf, cnt, ppos,
3649                                         readme_msg, strlen(readme_msg));
3650 }
3651
3652 static const struct file_operations tracing_readme_fops = {
3653         .open           = tracing_open_generic,
3654         .read           = tracing_readme_read,
3655         .llseek         = generic_file_llseek,
3656 };
3657
3658 static ssize_t
3659 tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
3660                                 size_t cnt, loff_t *ppos)
3661 {
3662         char *buf_comm;
3663         char *file_buf;
3664         char *buf;
3665         int len = 0;
3666         int pid;
3667         int i;
3668
3669         file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
3670         if (!file_buf)
3671                 return -ENOMEM;
3672
3673         buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
3674         if (!buf_comm) {
3675                 kfree(file_buf);
3676                 return -ENOMEM;
3677         }
3678
3679         buf = file_buf;
3680
3681         for (i = 0; i < SAVED_CMDLINES; i++) {
3682                 int r;
3683
3684                 pid = map_cmdline_to_pid[i];
3685                 if (pid == -1 || pid == NO_CMDLINE_MAP)
3686                         continue;
3687
3688                 trace_find_cmdline(pid, buf_comm);
3689                 r = sprintf(buf, "%d %s\n", pid, buf_comm);
3690                 buf += r;
3691                 len += r;
3692         }
3693
3694         len = simple_read_from_buffer(ubuf, cnt, ppos,
3695                                       file_buf, len);
3696
3697         kfree(file_buf);
3698         kfree(buf_comm);
3699
3700         return len;
3701 }
3702
3703 static const struct file_operations tracing_saved_cmdlines_fops = {
3704     .open       = tracing_open_generic,
3705     .read       = tracing_saved_cmdlines_read,
3706     .llseek     = generic_file_llseek,
3707 };
3708
3709 static ssize_t
3710 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3711                        size_t cnt, loff_t *ppos)
3712 {
3713         struct trace_array *tr = filp->private_data;
3714         char buf[MAX_TRACER_SIZE+2];
3715         int r;
3716
3717         mutex_lock(&trace_types_lock);
3718         r = sprintf(buf, "%s\n", tr->current_trace->name);
3719         mutex_unlock(&trace_types_lock);
3720
3721         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3722 }
3723
3724 int tracer_init(struct tracer *t, struct trace_array *tr)
3725 {
3726         tracing_reset_online_cpus(&tr->trace_buffer);
3727         return t->init(tr);
3728 }
3729
3730 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3731 {
3732         int cpu;
3733
3734         for_each_tracing_cpu(cpu)
3735                 per_cpu_ptr(buf->data, cpu)->entries = val;
3736 }
3737
3738 #ifdef CONFIG_TRACER_MAX_TRACE
3739 /* resize @tr's buffer to the size of @size_tr's entries */
3740 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3741                                         struct trace_buffer *size_buf, int cpu_id)
3742 {
3743         int cpu, ret = 0;
3744
3745         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3746                 for_each_tracing_cpu(cpu) {
3747                         ret = ring_buffer_resize(trace_buf->buffer,
3748                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3749                         if (ret < 0)
3750                                 break;
3751                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3752                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3753                 }
3754         } else {
3755                 ret = ring_buffer_resize(trace_buf->buffer,
3756                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3757                 if (ret == 0)
3758                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3759                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3760         }
3761
3762         return ret;
3763 }
3764 #endif /* CONFIG_TRACER_MAX_TRACE */
3765
3766 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3767                                         unsigned long size, int cpu)
3768 {
3769         int ret;
3770
3771         /*
3772          * If kernel or user changes the size of the ring buffer
3773          * we use the size that was given, and we can forget about
3774          * expanding it later.
3775          */
3776         ring_buffer_expanded = true;
3777
3778         /* May be called before buffers are initialized */
3779         if (!tr->trace_buffer.buffer)
3780                 return 0;
3781
3782         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3783         if (ret < 0)
3784                 return ret;
3785
3786 #ifdef CONFIG_TRACER_MAX_TRACE
3787         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3788             !tr->current_trace->use_max_tr)
3789                 goto out;
3790
3791         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3792         if (ret < 0) {
3793                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3794                                                      &tr->trace_buffer, cpu);
3795                 if (r < 0) {
3796                         /*
3797                          * AARGH! We are left with different
3798                          * size max buffer!!!!
3799                          * The max buffer is our "snapshot" buffer.
3800                          * When a tracer needs a snapshot (one of the
3801                          * latency tracers), it swaps the max buffer
3802                          * with the saved snap shot. We succeeded to
3803                          * update the size of the main buffer, but failed to
3804                          * update the size of the max buffer. But when we tried
3805                          * to reset the main buffer to the original size, we
3806                          * failed there too. This is very unlikely to
3807                          * happen, but if it does, warn and kill all
3808                          * tracing.
3809                          */
3810                         WARN_ON(1);
3811                         tracing_disabled = 1;
3812                 }
3813                 return ret;
3814         }
3815
3816         if (cpu == RING_BUFFER_ALL_CPUS)
3817                 set_buffer_entries(&tr->max_buffer, size);
3818         else
3819                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
3820
3821  out:
3822 #endif /* CONFIG_TRACER_MAX_TRACE */
3823
3824         if (cpu == RING_BUFFER_ALL_CPUS)
3825                 set_buffer_entries(&tr->trace_buffer, size);
3826         else
3827                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
3828
3829         return ret;
3830 }
3831
3832 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
3833                                           unsigned long size, int cpu_id)
3834 {
3835         int ret = size;
3836
3837         mutex_lock(&trace_types_lock);
3838
3839         if (cpu_id != RING_BUFFER_ALL_CPUS) {
3840                 /* make sure, this cpu is enabled in the mask */
3841                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
3842                         ret = -EINVAL;
3843                         goto out;
3844                 }
3845         }
3846
3847         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
3848         if (ret < 0)
3849                 ret = -ENOMEM;
3850
3851 out:
3852         mutex_unlock(&trace_types_lock);
3853
3854         return ret;
3855 }
3856
3857
3858 /**
3859  * tracing_update_buffers - used by tracing facility to expand ring buffers
3860  *
3861  * To save on memory when the tracing is never used on a system with it
3862  * configured in. The ring buffers are set to a minimum size. But once
3863  * a user starts to use the tracing facility, then they need to grow
3864  * to their default size.
3865  *
3866  * This function is to be called when a tracer is about to be used.
3867  */
3868 int tracing_update_buffers(void)
3869 {
3870         int ret = 0;
3871
3872         mutex_lock(&trace_types_lock);
3873         if (!ring_buffer_expanded)
3874                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
3875                                                 RING_BUFFER_ALL_CPUS);
3876         mutex_unlock(&trace_types_lock);
3877
3878         return ret;
3879 }
3880
3881 struct trace_option_dentry;
3882
3883 static struct trace_option_dentry *
3884 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
3885
3886 static void
3887 destroy_trace_option_files(struct trace_option_dentry *topts);
3888
3889 static int tracing_set_tracer(const char *buf)
3890 {
3891         static struct trace_option_dentry *topts;
3892         struct trace_array *tr = &global_trace;
3893         struct tracer *t;
3894 #ifdef CONFIG_TRACER_MAX_TRACE
3895         bool had_max_tr;
3896 #endif
3897         int ret = 0;
3898
3899         mutex_lock(&trace_types_lock);
3900
3901         if (!ring_buffer_expanded) {
3902                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
3903                                                 RING_BUFFER_ALL_CPUS);
3904                 if (ret < 0)
3905                         goto out;
3906                 ret = 0;
3907         }
3908
3909         for (t = trace_types; t; t = t->next) {
3910                 if (strcmp(t->name, buf) == 0)
3911                         break;
3912         }
3913         if (!t) {
3914                 ret = -EINVAL;
3915                 goto out;
3916         }
3917         if (t == tr->current_trace)
3918                 goto out;
3919
3920         trace_branch_disable();
3921
3922         tr->current_trace->enabled = false;
3923
3924         if (tr->current_trace->reset)
3925                 tr->current_trace->reset(tr);
3926
3927         /* Current trace needs to be nop_trace before synchronize_sched */
3928         tr->current_trace = &nop_trace;
3929
3930 #ifdef CONFIG_TRACER_MAX_TRACE
3931         had_max_tr = tr->allocated_snapshot;
3932
3933         if (had_max_tr && !t->use_max_tr) {
3934                 /*
3935                  * We need to make sure that the update_max_tr sees that
3936                  * current_trace changed to nop_trace to keep it from
3937                  * swapping the buffers after we resize it.
3938                  * The update_max_tr is called from interrupts disabled
3939                  * so a synchronized_sched() is sufficient.
3940                  */
3941                 synchronize_sched();
3942                 free_snapshot(tr);
3943         }
3944 #endif
3945         destroy_trace_option_files(topts);
3946
3947         topts = create_trace_option_files(tr, t);
3948
3949 #ifdef CONFIG_TRACER_MAX_TRACE
3950         if (t->use_max_tr && !had_max_tr) {
3951                 ret = alloc_snapshot(tr);
3952                 if (ret < 0)
3953                         goto out;
3954         }
3955 #endif
3956
3957         if (t->init) {
3958                 ret = tracer_init(t, tr);
3959                 if (ret)
3960                         goto out;
3961         }
3962
3963         tr->current_trace = t;
3964         tr->current_trace->enabled = true;
3965         trace_branch_enable(tr);
3966  out:
3967         mutex_unlock(&trace_types_lock);
3968
3969         return ret;
3970 }
3971
3972 static ssize_t
3973 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
3974                         size_t cnt, loff_t *ppos)
3975 {
3976         char buf[MAX_TRACER_SIZE+1];
3977         int i;
3978         size_t ret;
3979         int err;
3980
3981         ret = cnt;
3982
3983         if (cnt > MAX_TRACER_SIZE)
3984                 cnt = MAX_TRACER_SIZE;
3985
3986         if (copy_from_user(&buf, ubuf, cnt))
3987                 return -EFAULT;
3988
3989         buf[cnt] = 0;
3990
3991         /* strip ending whitespace. */
3992         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
3993                 buf[i] = 0;
3994
3995         err = tracing_set_tracer(buf);
3996         if (err)
3997                 return err;
3998
3999         *ppos += ret;
4000
4001         return ret;
4002 }
4003
4004 static ssize_t
4005 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4006                      size_t cnt, loff_t *ppos)
4007 {
4008         unsigned long *ptr = filp->private_data;
4009         char buf[64];
4010         int r;
4011
4012         r = snprintf(buf, sizeof(buf), "%ld\n",
4013                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4014         if (r > sizeof(buf))
4015                 r = sizeof(buf);
4016         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4017 }
4018
4019 static ssize_t
4020 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4021                       size_t cnt, loff_t *ppos)
4022 {
4023         unsigned long *ptr = filp->private_data;
4024         unsigned long val;
4025         int ret;
4026
4027         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4028         if (ret)
4029                 return ret;
4030
4031         *ptr = val * 1000;
4032
4033         return cnt;
4034 }
4035
4036 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4037 {
4038         struct trace_array *tr = inode->i_private;
4039         struct trace_iterator *iter;
4040         int ret = 0;
4041
4042         if (tracing_disabled)
4043                 return -ENODEV;
4044
4045         if (trace_array_get(tr) < 0)
4046                 return -ENODEV;
4047
4048         mutex_lock(&trace_types_lock);
4049
4050         /* create a buffer to store the information to pass to userspace */
4051         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4052         if (!iter) {
4053                 ret = -ENOMEM;
4054                 __trace_array_put(tr);
4055                 goto out;
4056         }
4057
4058         /*
4059          * We make a copy of the current tracer to avoid concurrent
4060          * changes on it while we are reading.
4061          */
4062         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4063         if (!iter->trace) {
4064                 ret = -ENOMEM;
4065                 goto fail;
4066         }
4067         *iter->trace = *tr->current_trace;
4068
4069         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4070                 ret = -ENOMEM;
4071                 goto fail;
4072         }
4073
4074         /* trace pipe does not show start of buffer */
4075         cpumask_setall(iter->started);
4076
4077         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4078                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4079
4080         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4081         if (trace_clocks[tr->clock_id].in_ns)
4082                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4083
4084         iter->tr = tr;
4085         iter->trace_buffer = &tr->trace_buffer;
4086         iter->cpu_file = tracing_get_cpu(inode);
4087         mutex_init(&iter->mutex);
4088         filp->private_data = iter;
4089
4090         if (iter->trace->pipe_open)
4091                 iter->trace->pipe_open(iter);
4092
4093         nonseekable_open(inode, filp);
4094 out:
4095         mutex_unlock(&trace_types_lock);
4096         return ret;
4097
4098 fail:
4099         kfree(iter->trace);
4100         kfree(iter);
4101         __trace_array_put(tr);
4102         mutex_unlock(&trace_types_lock);
4103         return ret;
4104 }
4105
4106 static int tracing_release_pipe(struct inode *inode, struct file *file)
4107 {
4108         struct trace_iterator *iter = file->private_data;
4109         struct trace_array *tr = inode->i_private;
4110
4111         mutex_lock(&trace_types_lock);
4112
4113         if (iter->trace->pipe_close)
4114                 iter->trace->pipe_close(iter);
4115
4116         mutex_unlock(&trace_types_lock);
4117
4118         free_cpumask_var(iter->started);
4119         mutex_destroy(&iter->mutex);
4120         kfree(iter->trace);
4121         kfree(iter);
4122
4123         trace_array_put(tr);
4124
4125         return 0;
4126 }
4127
4128 static unsigned int
4129 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4130 {
4131         /* Iterators are static, they should be filled or empty */
4132         if (trace_buffer_iter(iter, iter->cpu_file))
4133                 return POLLIN | POLLRDNORM;
4134
4135         if (trace_flags & TRACE_ITER_BLOCK)
4136                 /*
4137                  * Always select as readable when in blocking mode
4138                  */
4139                 return POLLIN | POLLRDNORM;
4140         else
4141                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4142                                              filp, poll_table);
4143 }
4144
4145 static unsigned int
4146 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4147 {
4148         struct trace_iterator *iter = filp->private_data;
4149
4150         return trace_poll(iter, filp, poll_table);
4151 }
4152
4153 /*
4154  * This is a make-shift waitqueue.
4155  * A tracer might use this callback on some rare cases:
4156  *
4157  *  1) the current tracer might hold the runqueue lock when it wakes up
4158  *     a reader, hence a deadlock (sched, function, and function graph tracers)
4159  *  2) the function tracers, trace all functions, we don't want
4160  *     the overhead of calling wake_up and friends
4161  *     (and tracing them too)
4162  *
4163  *     Anyway, this is really very primitive wakeup.
4164  */
4165 void poll_wait_pipe(struct trace_iterator *iter)
4166 {
4167         set_current_state(TASK_INTERRUPTIBLE);
4168         /* sleep for 100 msecs, and try again. */
4169         schedule_timeout(HZ / 10);
4170 }
4171
4172 /* Must be called with trace_types_lock mutex held. */
4173 static int tracing_wait_pipe(struct file *filp)
4174 {
4175         struct trace_iterator *iter = filp->private_data;
4176
4177         while (trace_empty(iter)) {
4178
4179                 if ((filp->f_flags & O_NONBLOCK)) {
4180                         return -EAGAIN;
4181                 }
4182
4183                 mutex_unlock(&iter->mutex);
4184
4185                 iter->trace->wait_pipe(iter);
4186
4187                 mutex_lock(&iter->mutex);
4188
4189                 if (signal_pending(current))
4190                         return -EINTR;
4191
4192                 /*
4193                  * We block until we read something and tracing is disabled.
4194                  * We still block if tracing is disabled, but we have never
4195                  * read anything. This allows a user to cat this file, and
4196                  * then enable tracing. But after we have read something,
4197                  * we give an EOF when tracing is again disabled.
4198                  *
4199                  * iter->pos will be 0 if we haven't read anything.
4200                  */
4201                 if (!tracing_is_on() && iter->pos)
4202                         break;
4203         }
4204
4205         return 1;
4206 }
4207
4208 /*
4209  * Consumer reader.
4210  */
4211 static ssize_t
4212 tracing_read_pipe(struct file *filp, char __user *ubuf,
4213                   size_t cnt, loff_t *ppos)
4214 {
4215         struct trace_iterator *iter = filp->private_data;
4216         struct trace_array *tr = iter->tr;
4217         ssize_t sret;
4218
4219         /* return any leftover data */
4220         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4221         if (sret != -EBUSY)
4222                 return sret;
4223
4224         trace_seq_init(&iter->seq);
4225
4226         /* copy the tracer to avoid using a global lock all around */
4227         mutex_lock(&trace_types_lock);
4228         if (unlikely(iter->trace->name != tr->current_trace->name))
4229                 *iter->trace = *tr->current_trace;
4230         mutex_unlock(&trace_types_lock);
4231
4232         /*
4233          * Avoid more than one consumer on a single file descriptor
4234          * This is just a matter of traces coherency, the ring buffer itself
4235          * is protected.
4236          */
4237         mutex_lock(&iter->mutex);
4238         if (iter->trace->read) {
4239                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4240                 if (sret)
4241                         goto out;
4242         }
4243
4244 waitagain:
4245         sret = tracing_wait_pipe(filp);
4246         if (sret <= 0)
4247                 goto out;
4248
4249         /* stop when tracing is finished */
4250         if (trace_empty(iter)) {
4251                 sret = 0;
4252                 goto out;
4253         }
4254
4255         if (cnt >= PAGE_SIZE)
4256                 cnt = PAGE_SIZE - 1;
4257
4258         /* reset all but tr, trace, and overruns */
4259         memset(&iter->seq, 0,
4260                sizeof(struct trace_iterator) -
4261                offsetof(struct trace_iterator, seq));
4262         cpumask_clear(iter->started);
4263         iter->pos = -1;
4264
4265         trace_event_read_lock();
4266         trace_access_lock(iter->cpu_file);
4267         while (trace_find_next_entry_inc(iter) != NULL) {
4268                 enum print_line_t ret;
4269                 int len = iter->seq.len;
4270
4271                 ret = print_trace_line(iter);
4272                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4273                         /* don't print partial lines */
4274                         iter->seq.len = len;
4275                         break;
4276                 }
4277                 if (ret != TRACE_TYPE_NO_CONSUME)
4278                         trace_consume(iter);
4279
4280                 if (iter->seq.len >= cnt)
4281                         break;
4282
4283                 /*
4284                  * Setting the full flag means we reached the trace_seq buffer
4285                  * size and we should leave by partial output condition above.
4286                  * One of the trace_seq_* functions is not used properly.
4287                  */
4288                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4289                           iter->ent->type);
4290         }
4291         trace_access_unlock(iter->cpu_file);
4292         trace_event_read_unlock();
4293
4294         /* Now copy what we have to the user */
4295         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4296         if (iter->seq.readpos >= iter->seq.len)
4297                 trace_seq_init(&iter->seq);
4298
4299         /*
4300          * If there was nothing to send to user, in spite of consuming trace
4301          * entries, go back to wait for more entries.
4302          */
4303         if (sret == -EBUSY)
4304                 goto waitagain;
4305
4306 out:
4307         mutex_unlock(&iter->mutex);
4308
4309         return sret;
4310 }
4311
4312 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4313                                      unsigned int idx)
4314 {
4315         __free_page(spd->pages[idx]);
4316 }
4317
4318 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4319         .can_merge              = 0,
4320         .map                    = generic_pipe_buf_map,
4321         .unmap                  = generic_pipe_buf_unmap,
4322         .confirm                = generic_pipe_buf_confirm,
4323         .release                = generic_pipe_buf_release,
4324         .steal                  = generic_pipe_buf_steal,
4325         .get                    = generic_pipe_buf_get,
4326 };
4327
4328 static size_t
4329 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4330 {
4331         size_t count;
4332         int ret;
4333
4334         /* Seq buffer is page-sized, exactly what we need. */
4335         for (;;) {
4336                 count = iter->seq.len;
4337                 ret = print_trace_line(iter);
4338                 count = iter->seq.len - count;
4339                 if (rem < count) {
4340                         rem = 0;
4341                         iter->seq.len -= count;
4342                         break;
4343                 }
4344                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4345                         iter->seq.len -= count;
4346                         break;
4347                 }
4348
4349                 if (ret != TRACE_TYPE_NO_CONSUME)
4350                         trace_consume(iter);
4351                 rem -= count;
4352                 if (!trace_find_next_entry_inc(iter))   {
4353                         rem = 0;
4354                         iter->ent = NULL;
4355                         break;
4356                 }
4357         }
4358
4359         return rem;
4360 }
4361
4362 static ssize_t tracing_splice_read_pipe(struct file *filp,
4363                                         loff_t *ppos,
4364                                         struct pipe_inode_info *pipe,
4365                                         size_t len,
4366                                         unsigned int flags)
4367 {
4368         struct page *pages_def[PIPE_DEF_BUFFERS];
4369         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4370         struct trace_iterator *iter = filp->private_data;
4371         struct splice_pipe_desc spd = {
4372                 .pages          = pages_def,
4373                 .partial        = partial_def,
4374                 .nr_pages       = 0, /* This gets updated below. */
4375                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4376                 .flags          = flags,
4377                 .ops            = &tracing_pipe_buf_ops,
4378                 .spd_release    = tracing_spd_release_pipe,
4379         };
4380         struct trace_array *tr = iter->tr;
4381         ssize_t ret;
4382         size_t rem;
4383         unsigned int i;
4384
4385         if (splice_grow_spd(pipe, &spd))
4386                 return -ENOMEM;
4387
4388         /* copy the tracer to avoid using a global lock all around */
4389         mutex_lock(&trace_types_lock);
4390         if (unlikely(iter->trace->name != tr->current_trace->name))
4391                 *iter->trace = *tr->current_trace;
4392         mutex_unlock(&trace_types_lock);
4393
4394         mutex_lock(&iter->mutex);
4395
4396         if (iter->trace->splice_read) {
4397                 ret = iter->trace->splice_read(iter, filp,
4398                                                ppos, pipe, len, flags);
4399                 if (ret)
4400                         goto out_err;
4401         }
4402
4403         ret = tracing_wait_pipe(filp);
4404         if (ret <= 0)
4405                 goto out_err;
4406
4407         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4408                 ret = -EFAULT;
4409                 goto out_err;
4410         }
4411
4412         trace_event_read_lock();
4413         trace_access_lock(iter->cpu_file);
4414
4415         /* Fill as many pages as possible. */
4416         for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
4417                 spd.pages[i] = alloc_page(GFP_KERNEL);
4418                 if (!spd.pages[i])
4419                         break;
4420
4421                 rem = tracing_fill_pipe_page(rem, iter);
4422
4423                 /* Copy the data into the page, so we can start over. */
4424                 ret = trace_seq_to_buffer(&iter->seq,
4425                                           page_address(spd.pages[i]),
4426                                           iter->seq.len);
4427                 if (ret < 0) {
4428                         __free_page(spd.pages[i]);
4429                         break;
4430                 }
4431                 spd.partial[i].offset = 0;
4432                 spd.partial[i].len = iter->seq.len;
4433
4434                 trace_seq_init(&iter->seq);
4435         }
4436
4437         trace_access_unlock(iter->cpu_file);
4438         trace_event_read_unlock();
4439         mutex_unlock(&iter->mutex);
4440
4441         spd.nr_pages = i;
4442
4443         ret = splice_to_pipe(pipe, &spd);
4444 out:
4445         splice_shrink_spd(&spd);
4446         return ret;
4447
4448 out_err:
4449         mutex_unlock(&iter->mutex);
4450         goto out;
4451 }
4452
4453 static ssize_t
4454 tracing_entries_read(struct file *filp, char __user *ubuf,
4455                      size_t cnt, loff_t *ppos)
4456 {
4457         struct inode *inode = file_inode(filp);
4458         struct trace_array *tr = inode->i_private;
4459         int cpu = tracing_get_cpu(inode);
4460         char buf[64];
4461         int r = 0;
4462         ssize_t ret;
4463
4464         mutex_lock(&trace_types_lock);
4465
4466         if (cpu == RING_BUFFER_ALL_CPUS) {
4467                 int cpu, buf_size_same;
4468                 unsigned long size;
4469
4470                 size = 0;
4471                 buf_size_same = 1;
4472                 /* check if all cpu sizes are same */
4473                 for_each_tracing_cpu(cpu) {
4474                         /* fill in the size from first enabled cpu */
4475                         if (size == 0)
4476                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4477                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4478                                 buf_size_same = 0;
4479                                 break;
4480                         }
4481                 }
4482
4483                 if (buf_size_same) {
4484                         if (!ring_buffer_expanded)
4485                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4486                                             size >> 10,
4487                                             trace_buf_size >> 10);
4488                         else
4489                                 r = sprintf(buf, "%lu\n", size >> 10);
4490                 } else
4491                         r = sprintf(buf, "X\n");
4492         } else
4493                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4494
4495         mutex_unlock(&trace_types_lock);
4496
4497         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4498         return ret;
4499 }
4500
4501 static ssize_t
4502 tracing_entries_write(struct file *filp, const char __user *ubuf,
4503                       size_t cnt, loff_t *ppos)
4504 {
4505         struct inode *inode = file_inode(filp);
4506         struct trace_array *tr = inode->i_private;
4507         unsigned long val;
4508         int ret;
4509
4510         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4511         if (ret)
4512                 return ret;
4513
4514         /* must have at least 1 entry */
4515         if (!val)
4516                 return -EINVAL;
4517
4518         /* value is in KB */
4519         val <<= 10;
4520         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4521         if (ret < 0)
4522                 return ret;
4523
4524         *ppos += cnt;
4525
4526         return cnt;
4527 }
4528
4529 static ssize_t
4530 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4531                                 size_t cnt, loff_t *ppos)
4532 {
4533         struct trace_array *tr = filp->private_data;
4534         char buf[64];
4535         int r, cpu;
4536         unsigned long size = 0, expanded_size = 0;
4537
4538         mutex_lock(&trace_types_lock);
4539         for_each_tracing_cpu(cpu) {
4540                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4541                 if (!ring_buffer_expanded)
4542                         expanded_size += trace_buf_size >> 10;
4543         }
4544         if (ring_buffer_expanded)
4545                 r = sprintf(buf, "%lu\n", size);
4546         else
4547                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4548         mutex_unlock(&trace_types_lock);
4549
4550         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4551 }
4552
4553 static ssize_t
4554 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4555                           size_t cnt, loff_t *ppos)
4556 {
4557         /*
4558          * There is no need to read what the user has written, this function
4559          * is just to make sure that there is no error when "echo" is used
4560          */
4561
4562         *ppos += cnt;
4563
4564         return cnt;
4565 }
4566
4567 static int
4568 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4569 {
4570         struct trace_array *tr = inode->i_private;
4571
4572         /* disable tracing ? */
4573         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4574                 tracer_tracing_off(tr);
4575         /* resize the ring buffer to 0 */
4576         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4577
4578         trace_array_put(tr);
4579
4580         return 0;
4581 }
4582
4583 static ssize_t
4584 tracing_mark_write(struct file *filp, const char __user *ubuf,
4585                                         size_t cnt, loff_t *fpos)
4586 {
4587         unsigned long addr = (unsigned long)ubuf;
4588         struct trace_array *tr = filp->private_data;
4589         struct ring_buffer_event *event;
4590         struct ring_buffer *buffer;
4591         struct print_entry *entry;
4592         unsigned long irq_flags;
4593         struct page *pages[2];
4594         void *map_page[2];
4595         int nr_pages = 1;
4596         ssize_t written;
4597         int offset;
4598         int size;
4599         int len;
4600         int ret;
4601         int i;
4602
4603         if (tracing_disabled)
4604                 return -EINVAL;
4605
4606         if (!(trace_flags & TRACE_ITER_MARKERS))
4607                 return -EINVAL;
4608
4609         if (cnt > TRACE_BUF_SIZE)
4610                 cnt = TRACE_BUF_SIZE;
4611
4612         /*
4613          * Userspace is injecting traces into the kernel trace buffer.
4614          * We want to be as non intrusive as possible.
4615          * To do so, we do not want to allocate any special buffers
4616          * or take any locks, but instead write the userspace data
4617          * straight into the ring buffer.
4618          *
4619          * First we need to pin the userspace buffer into memory,
4620          * which, most likely it is, because it just referenced it.
4621          * But there's no guarantee that it is. By using get_user_pages_fast()
4622          * and kmap_atomic/kunmap_atomic() we can get access to the
4623          * pages directly. We then write the data directly into the
4624          * ring buffer.
4625          */
4626         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4627
4628         /* check if we cross pages */
4629         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4630                 nr_pages = 2;
4631
4632         offset = addr & (PAGE_SIZE - 1);
4633         addr &= PAGE_MASK;
4634
4635         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4636         if (ret < nr_pages) {
4637                 while (--ret >= 0)
4638                         put_page(pages[ret]);
4639                 written = -EFAULT;
4640                 goto out;
4641         }
4642
4643         for (i = 0; i < nr_pages; i++)
4644                 map_page[i] = kmap_atomic(pages[i]);
4645
4646         local_save_flags(irq_flags);
4647         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4648         buffer = tr->trace_buffer.buffer;
4649         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4650                                           irq_flags, preempt_count());
4651         if (!event) {
4652                 /* Ring buffer disabled, return as if not open for write */
4653                 written = -EBADF;
4654                 goto out_unlock;
4655         }
4656
4657         entry = ring_buffer_event_data(event);
4658         entry->ip = _THIS_IP_;
4659
4660         if (nr_pages == 2) {
4661                 len = PAGE_SIZE - offset;
4662                 memcpy(&entry->buf, map_page[0] + offset, len);
4663                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4664         } else
4665                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4666
4667         if (entry->buf[cnt - 1] != '\n') {
4668                 entry->buf[cnt] = '\n';
4669                 entry->buf[cnt + 1] = '\0';
4670         } else
4671                 entry->buf[cnt] = '\0';
4672
4673         __buffer_unlock_commit(buffer, event);
4674
4675         written = cnt;
4676
4677         *fpos += written;
4678
4679  out_unlock:
4680         for (i = 0; i < nr_pages; i++){
4681                 kunmap_atomic(map_page[i]);
4682                 put_page(pages[i]);
4683         }
4684  out:
4685         return written;
4686 }
4687
4688 static int tracing_clock_show(struct seq_file *m, void *v)
4689 {
4690         struct trace_array *tr = m->private;
4691         int i;
4692
4693         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4694                 seq_printf(m,
4695                         "%s%s%s%s", i ? " " : "",
4696                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4697                         i == tr->clock_id ? "]" : "");
4698         seq_putc(m, '\n');
4699
4700         return 0;
4701 }
4702
4703 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4704                                    size_t cnt, loff_t *fpos)
4705 {
4706         struct seq_file *m = filp->private_data;
4707         struct trace_array *tr = m->private;
4708         char buf[64];
4709         const char *clockstr;
4710         int i;
4711
4712         if (cnt >= sizeof(buf))
4713                 return -EINVAL;
4714
4715         if (copy_from_user(&buf, ubuf, cnt))
4716                 return -EFAULT;
4717
4718         buf[cnt] = 0;
4719
4720         clockstr = strstrip(buf);
4721
4722         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4723                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4724                         break;
4725         }
4726         if (i == ARRAY_SIZE(trace_clocks))
4727                 return -EINVAL;
4728
4729         mutex_lock(&trace_types_lock);
4730
4731         tr->clock_id = i;
4732
4733         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4734
4735         /*
4736          * New clock may not be consistent with the previous clock.
4737          * Reset the buffer so that it doesn't have incomparable timestamps.
4738          */
4739         tracing_reset_online_cpus(&tr->trace_buffer);
4740
4741 #ifdef CONFIG_TRACER_MAX_TRACE
4742         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4743                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4744         tracing_reset_online_cpus(&tr->max_buffer);
4745 #endif
4746
4747         mutex_unlock(&trace_types_lock);
4748
4749         *fpos += cnt;
4750
4751         return cnt;
4752 }
4753
4754 static int tracing_clock_open(struct inode *inode, struct file *file)
4755 {
4756         struct trace_array *tr = inode->i_private;
4757         int ret;
4758
4759         if (tracing_disabled)
4760                 return -ENODEV;
4761
4762         if (trace_array_get(tr))
4763                 return -ENODEV;
4764
4765         ret = single_open(file, tracing_clock_show, inode->i_private);
4766         if (ret < 0)
4767                 trace_array_put(tr);
4768
4769         return ret;
4770 }
4771
4772 struct ftrace_buffer_info {
4773         struct trace_iterator   iter;
4774         void                    *spare;
4775         unsigned int            read;
4776 };
4777
4778 #ifdef CONFIG_TRACER_SNAPSHOT
4779 static int tracing_snapshot_open(struct inode *inode, struct file *file)
4780 {
4781         struct trace_array *tr = inode->i_private;
4782         struct trace_iterator *iter;
4783         struct seq_file *m;
4784         int ret = 0;
4785
4786         if (trace_array_get(tr) < 0)
4787                 return -ENODEV;
4788
4789         if (file->f_mode & FMODE_READ) {
4790                 iter = __tracing_open(inode, file, true);
4791                 if (IS_ERR(iter))
4792                         ret = PTR_ERR(iter);
4793         } else {
4794                 /* Writes still need the seq_file to hold the private data */
4795                 ret = -ENOMEM;
4796                 m = kzalloc(sizeof(*m), GFP_KERNEL);
4797                 if (!m)
4798                         goto out;
4799                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4800                 if (!iter) {
4801                         kfree(m);
4802                         goto out;
4803                 }
4804                 ret = 0;
4805
4806                 iter->tr = tr;
4807                 iter->trace_buffer = &tr->max_buffer;
4808                 iter->cpu_file = tracing_get_cpu(inode);
4809                 m->private = iter;
4810                 file->private_data = m;
4811         }
4812 out:
4813         if (ret < 0)
4814                 trace_array_put(tr);
4815
4816         return ret;
4817 }
4818
4819 static ssize_t
4820 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
4821                        loff_t *ppos)
4822 {
4823         struct seq_file *m = filp->private_data;
4824         struct trace_iterator *iter = m->private;
4825         struct trace_array *tr = iter->tr;
4826         unsigned long val;
4827         int ret;
4828
4829         ret = tracing_update_buffers();
4830         if (ret < 0)
4831                 return ret;
4832
4833         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4834         if (ret)
4835                 return ret;
4836
4837         mutex_lock(&trace_types_lock);
4838
4839         if (tr->current_trace->use_max_tr) {
4840                 ret = -EBUSY;
4841                 goto out;
4842         }
4843
4844         switch (val) {
4845         case 0:
4846                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4847                         ret = -EINVAL;
4848                         break;
4849                 }
4850                 if (tr->allocated_snapshot)
4851                         free_snapshot(tr);
4852                 break;
4853         case 1:
4854 /* Only allow per-cpu swap if the ring buffer supports it */
4855 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
4856                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4857                         ret = -EINVAL;
4858                         break;
4859                 }
4860 #endif
4861                 if (!tr->allocated_snapshot) {
4862                         ret = alloc_snapshot(tr);
4863                         if (ret < 0)
4864                                 break;
4865                 }
4866                 local_irq_disable();
4867                 /* Now, we're going to swap */
4868                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4869                         update_max_tr(tr, current, smp_processor_id());
4870                 else
4871                         update_max_tr_single(tr, current, iter->cpu_file);
4872                 local_irq_enable();
4873                 break;
4874         default:
4875                 if (tr->allocated_snapshot) {
4876                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4877                                 tracing_reset_online_cpus(&tr->max_buffer);
4878                         else
4879                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
4880                 }
4881                 break;
4882         }
4883
4884         if (ret >= 0) {
4885                 *ppos += cnt;
4886                 ret = cnt;
4887         }
4888 out:
4889         mutex_unlock(&trace_types_lock);
4890         return ret;
4891 }
4892
4893 static int tracing_snapshot_release(struct inode *inode, struct file *file)
4894 {
4895         struct seq_file *m = file->private_data;
4896         int ret;
4897
4898         ret = tracing_release(inode, file);
4899
4900         if (file->f_mode & FMODE_READ)
4901                 return ret;
4902
4903         /* If write only, the seq_file is just a stub */
4904         if (m)
4905                 kfree(m->private);
4906         kfree(m);
4907
4908         return 0;
4909 }
4910
4911 static int tracing_buffers_open(struct inode *inode, struct file *filp);
4912 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
4913                                     size_t count, loff_t *ppos);
4914 static int tracing_buffers_release(struct inode *inode, struct file *file);
4915 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
4916                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
4917
4918 static int snapshot_raw_open(struct inode *inode, struct file *filp)
4919 {
4920         struct ftrace_buffer_info *info;
4921         int ret;
4922
4923         ret = tracing_buffers_open(inode, filp);
4924         if (ret < 0)
4925                 return ret;
4926
4927         info = filp->private_data;
4928
4929         if (info->iter.trace->use_max_tr) {
4930                 tracing_buffers_release(inode, filp);
4931                 return -EBUSY;
4932         }
4933
4934         info->iter.snapshot = true;
4935         info->iter.trace_buffer = &info->iter.tr->max_buffer;
4936
4937         return ret;
4938 }
4939
4940 #endif /* CONFIG_TRACER_SNAPSHOT */
4941
4942
4943 static const struct file_operations tracing_max_lat_fops = {
4944         .open           = tracing_open_generic,
4945         .read           = tracing_max_lat_read,
4946         .write          = tracing_max_lat_write,
4947         .llseek         = generic_file_llseek,
4948 };
4949
4950 static const struct file_operations set_tracer_fops = {
4951         .open           = tracing_open_generic,
4952         .read           = tracing_set_trace_read,
4953         .write          = tracing_set_trace_write,
4954         .llseek         = generic_file_llseek,
4955 };
4956
4957 static const struct file_operations tracing_pipe_fops = {
4958         .open           = tracing_open_pipe,
4959         .poll           = tracing_poll_pipe,
4960         .read           = tracing_read_pipe,
4961         .splice_read    = tracing_splice_read_pipe,
4962         .release        = tracing_release_pipe,
4963         .llseek         = no_llseek,
4964 };
4965
4966 static const struct file_operations tracing_entries_fops = {
4967         .open           = tracing_open_generic_tr,
4968         .read           = tracing_entries_read,
4969         .write          = tracing_entries_write,
4970         .llseek         = generic_file_llseek,
4971         .release        = tracing_release_generic_tr,
4972 };
4973
4974 static const struct file_operations tracing_total_entries_fops = {
4975         .open           = tracing_open_generic_tr,
4976         .read           = tracing_total_entries_read,
4977         .llseek         = generic_file_llseek,
4978         .release        = tracing_release_generic_tr,
4979 };
4980
4981 static const struct file_operations tracing_free_buffer_fops = {
4982         .open           = tracing_open_generic_tr,
4983         .write          = tracing_free_buffer_write,
4984         .release        = tracing_free_buffer_release,
4985 };
4986
4987 static const struct file_operations tracing_mark_fops = {
4988         .open           = tracing_open_generic_tr,
4989         .write          = tracing_mark_write,
4990         .llseek         = generic_file_llseek,
4991         .release        = tracing_release_generic_tr,
4992 };
4993
4994 static const struct file_operations trace_clock_fops = {
4995         .open           = tracing_clock_open,
4996         .read           = seq_read,
4997         .llseek         = seq_lseek,
4998         .release        = tracing_single_release_tr,
4999         .write          = tracing_clock_write,
5000 };
5001
5002 #ifdef CONFIG_TRACER_SNAPSHOT
5003 static const struct file_operations snapshot_fops = {
5004         .open           = tracing_snapshot_open,
5005         .read           = seq_read,
5006         .write          = tracing_snapshot_write,
5007         .llseek         = tracing_lseek,
5008         .release        = tracing_snapshot_release,
5009 };
5010
5011 static const struct file_operations snapshot_raw_fops = {
5012         .open           = snapshot_raw_open,
5013         .read           = tracing_buffers_read,
5014         .release        = tracing_buffers_release,
5015         .splice_read    = tracing_buffers_splice_read,
5016         .llseek         = no_llseek,
5017 };
5018
5019 #endif /* CONFIG_TRACER_SNAPSHOT */
5020
5021 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5022 {
5023         struct trace_array *tr = inode->i_private;
5024         struct ftrace_buffer_info *info;
5025         int ret;
5026
5027         if (tracing_disabled)
5028                 return -ENODEV;
5029
5030         if (trace_array_get(tr) < 0)
5031                 return -ENODEV;
5032
5033         info = kzalloc(sizeof(*info), GFP_KERNEL);
5034         if (!info) {
5035                 trace_array_put(tr);
5036                 return -ENOMEM;
5037         }
5038
5039         mutex_lock(&trace_types_lock);
5040
5041         info->iter.tr           = tr;
5042         info->iter.cpu_file     = tracing_get_cpu(inode);
5043         info->iter.trace        = tr->current_trace;
5044         info->iter.trace_buffer = &tr->trace_buffer;
5045         info->spare             = NULL;
5046         /* Force reading ring buffer for first read */
5047         info->read              = (unsigned int)-1;
5048
5049         filp->private_data = info;
5050
5051         mutex_unlock(&trace_types_lock);
5052
5053         ret = nonseekable_open(inode, filp);
5054         if (ret < 0)
5055                 trace_array_put(tr);
5056
5057         return ret;
5058 }
5059
5060 static unsigned int
5061 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5062 {
5063         struct ftrace_buffer_info *info = filp->private_data;
5064         struct trace_iterator *iter = &info->iter;
5065
5066         return trace_poll(iter, filp, poll_table);
5067 }
5068
5069 static ssize_t
5070 tracing_buffers_read(struct file *filp, char __user *ubuf,
5071                      size_t count, loff_t *ppos)
5072 {
5073         struct ftrace_buffer_info *info = filp->private_data;
5074         struct trace_iterator *iter = &info->iter;
5075         ssize_t ret;
5076         ssize_t size;
5077
5078         if (!count)
5079                 return 0;
5080
5081         mutex_lock(&trace_types_lock);
5082
5083 #ifdef CONFIG_TRACER_MAX_TRACE
5084         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5085                 size = -EBUSY;
5086                 goto out_unlock;
5087         }
5088 #endif
5089
5090         if (!info->spare)
5091                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5092                                                           iter->cpu_file);
5093         size = -ENOMEM;
5094         if (!info->spare)
5095                 goto out_unlock;
5096
5097         /* Do we have previous read data to read? */
5098         if (info->read < PAGE_SIZE)
5099                 goto read;
5100
5101  again:
5102         trace_access_lock(iter->cpu_file);
5103         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5104                                     &info->spare,
5105                                     count,
5106                                     iter->cpu_file, 0);
5107         trace_access_unlock(iter->cpu_file);
5108
5109         if (ret < 0) {
5110                 if (trace_empty(iter)) {
5111                         if ((filp->f_flags & O_NONBLOCK)) {
5112                                 size = -EAGAIN;
5113                                 goto out_unlock;
5114                         }
5115                         mutex_unlock(&trace_types_lock);
5116                         iter->trace->wait_pipe(iter);
5117                         mutex_lock(&trace_types_lock);
5118                         if (signal_pending(current)) {
5119                                 size = -EINTR;
5120                                 goto out_unlock;
5121                         }
5122                         goto again;
5123                 }
5124                 size = 0;
5125                 goto out_unlock;
5126         }
5127
5128         info->read = 0;
5129  read:
5130         size = PAGE_SIZE - info->read;
5131         if (size > count)
5132                 size = count;
5133
5134         ret = copy_to_user(ubuf, info->spare + info->read, size);
5135         if (ret == size) {
5136                 size = -EFAULT;
5137                 goto out_unlock;
5138         }
5139         size -= ret;
5140
5141         *ppos += size;
5142         info->read += size;
5143
5144  out_unlock:
5145         mutex_unlock(&trace_types_lock);
5146
5147         return size;
5148 }
5149
5150 static int tracing_buffers_release(struct inode *inode, struct file *file)
5151 {
5152         struct ftrace_buffer_info *info = file->private_data;
5153         struct trace_iterator *iter = &info->iter;
5154
5155         mutex_lock(&trace_types_lock);
5156
5157         __trace_array_put(iter->tr);
5158
5159         if (info->spare)
5160                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5161         kfree(info);
5162
5163         mutex_unlock(&trace_types_lock);
5164
5165         return 0;
5166 }
5167
5168 struct buffer_ref {
5169         struct ring_buffer      *buffer;
5170         void                    *page;
5171         int                     ref;
5172 };
5173
5174 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5175                                     struct pipe_buffer *buf)
5176 {
5177         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5178
5179         if (--ref->ref)
5180                 return;
5181
5182         ring_buffer_free_read_page(ref->buffer, ref->page);
5183         kfree(ref);
5184         buf->private = 0;
5185 }
5186
5187 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5188                                 struct pipe_buffer *buf)
5189 {
5190         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5191
5192         ref->ref++;
5193 }
5194
5195 /* Pipe buffer operations for a buffer. */
5196 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5197         .can_merge              = 0,
5198         .map                    = generic_pipe_buf_map,
5199         .unmap                  = generic_pipe_buf_unmap,
5200         .confirm                = generic_pipe_buf_confirm,
5201         .release                = buffer_pipe_buf_release,
5202         .steal                  = generic_pipe_buf_steal,
5203         .get                    = buffer_pipe_buf_get,
5204 };
5205
5206 /*
5207  * Callback from splice_to_pipe(), if we need to release some pages
5208  * at the end of the spd in case we error'ed out in filling the pipe.
5209  */
5210 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5211 {
5212         struct buffer_ref *ref =
5213                 (struct buffer_ref *)spd->partial[i].private;
5214
5215         if (--ref->ref)
5216                 return;
5217
5218         ring_buffer_free_read_page(ref->buffer, ref->page);
5219         kfree(ref);
5220         spd->partial[i].private = 0;
5221 }
5222
5223 static ssize_t
5224 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5225                             struct pipe_inode_info *pipe, size_t len,
5226                             unsigned int flags)
5227 {
5228         struct ftrace_buffer_info *info = file->private_data;
5229         struct trace_iterator *iter = &info->iter;
5230         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5231         struct page *pages_def[PIPE_DEF_BUFFERS];
5232         struct splice_pipe_desc spd = {
5233                 .pages          = pages_def,
5234                 .partial        = partial_def,
5235                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5236                 .flags          = flags,
5237                 .ops            = &buffer_pipe_buf_ops,
5238                 .spd_release    = buffer_spd_release,
5239         };
5240         struct buffer_ref *ref;
5241         int entries, size, i;
5242         ssize_t ret;
5243
5244         mutex_lock(&trace_types_lock);
5245
5246 #ifdef CONFIG_TRACER_MAX_TRACE
5247         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5248                 ret = -EBUSY;
5249                 goto out;
5250         }
5251 #endif
5252
5253         if (splice_grow_spd(pipe, &spd)) {
5254                 ret = -ENOMEM;
5255                 goto out;
5256         }
5257
5258         if (*ppos & (PAGE_SIZE - 1)) {
5259                 ret = -EINVAL;
5260                 goto out;
5261         }
5262
5263         if (len & (PAGE_SIZE - 1)) {
5264                 if (len < PAGE_SIZE) {
5265                         ret = -EINVAL;
5266                         goto out;
5267                 }
5268                 len &= PAGE_MASK;
5269         }
5270
5271  again:
5272         trace_access_lock(iter->cpu_file);
5273         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5274
5275         for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
5276                 struct page *page;
5277                 int r;
5278
5279                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5280                 if (!ref)
5281                         break;
5282
5283                 ref->ref = 1;
5284                 ref->buffer = iter->trace_buffer->buffer;
5285                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5286                 if (!ref->page) {
5287                         kfree(ref);
5288                         break;
5289                 }
5290
5291                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5292                                           len, iter->cpu_file, 1);
5293                 if (r < 0) {
5294                         ring_buffer_free_read_page(ref->buffer, ref->page);
5295                         kfree(ref);
5296                         break;
5297                 }
5298
5299                 /*
5300                  * zero out any left over data, this is going to
5301                  * user land.
5302                  */
5303                 size = ring_buffer_page_len(ref->page);
5304                 if (size < PAGE_SIZE)
5305                         memset(ref->page + size, 0, PAGE_SIZE - size);
5306
5307                 page = virt_to_page(ref->page);
5308
5309                 spd.pages[i] = page;
5310                 spd.partial[i].len = PAGE_SIZE;
5311                 spd.partial[i].offset = 0;
5312                 spd.partial[i].private = (unsigned long)ref;
5313                 spd.nr_pages++;
5314                 *ppos += PAGE_SIZE;
5315
5316                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5317         }
5318
5319         trace_access_unlock(iter->cpu_file);
5320         spd.nr_pages = i;
5321
5322         /* did we read anything? */
5323         if (!spd.nr_pages) {
5324                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5325                         ret = -EAGAIN;
5326                         goto out;
5327                 }
5328                 mutex_unlock(&trace_types_lock);
5329                 iter->trace->wait_pipe(iter);
5330                 mutex_lock(&trace_types_lock);
5331                 if (signal_pending(current)) {
5332                         ret = -EINTR;
5333                         goto out;
5334                 }
5335                 goto again;
5336         }
5337
5338         ret = splice_to_pipe(pipe, &spd);
5339         splice_shrink_spd(&spd);
5340 out:
5341         mutex_unlock(&trace_types_lock);
5342
5343         return ret;
5344 }
5345
5346 static const struct file_operations tracing_buffers_fops = {
5347         .open           = tracing_buffers_open,
5348         .read           = tracing_buffers_read,
5349         .poll           = tracing_buffers_poll,
5350         .release        = tracing_buffers_release,
5351         .splice_read    = tracing_buffers_splice_read,
5352         .llseek         = no_llseek,
5353 };
5354
5355 static ssize_t
5356 tracing_stats_read(struct file *filp, char __user *ubuf,
5357                    size_t count, loff_t *ppos)
5358 {
5359         struct inode *inode = file_inode(filp);
5360         struct trace_array *tr = inode->i_private;
5361         struct trace_buffer *trace_buf = &tr->trace_buffer;
5362         int cpu = tracing_get_cpu(inode);
5363         struct trace_seq *s;
5364         unsigned long cnt;
5365         unsigned long long t;
5366         unsigned long usec_rem;
5367
5368         s = kmalloc(sizeof(*s), GFP_KERNEL);
5369         if (!s)
5370                 return -ENOMEM;
5371
5372         trace_seq_init(s);
5373
5374         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5375         trace_seq_printf(s, "entries: %ld\n", cnt);
5376
5377         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5378         trace_seq_printf(s, "overrun: %ld\n", cnt);
5379
5380         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5381         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5382
5383         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5384         trace_seq_printf(s, "bytes: %ld\n", cnt);
5385
5386         if (trace_clocks[tr->clock_id].in_ns) {
5387                 /* local or global for trace_clock */
5388                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5389                 usec_rem = do_div(t, USEC_PER_SEC);
5390                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5391                                                                 t, usec_rem);
5392
5393                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5394                 usec_rem = do_div(t, USEC_PER_SEC);
5395                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5396         } else {
5397                 /* counter or tsc mode for trace_clock */
5398                 trace_seq_printf(s, "oldest event ts: %llu\n",
5399                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5400
5401                 trace_seq_printf(s, "now ts: %llu\n",
5402                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5403         }
5404
5405         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5406         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5407
5408         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5409         trace_seq_printf(s, "read events: %ld\n", cnt);
5410
5411         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5412
5413         kfree(s);
5414
5415         return count;
5416 }
5417
5418 static const struct file_operations tracing_stats_fops = {
5419         .open           = tracing_open_generic_tr,
5420         .read           = tracing_stats_read,
5421         .llseek         = generic_file_llseek,
5422         .release        = tracing_release_generic_tr,
5423 };
5424
5425 #ifdef CONFIG_DYNAMIC_FTRACE
5426
5427 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5428 {
5429         return 0;
5430 }
5431
5432 static ssize_t
5433 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5434                   size_t cnt, loff_t *ppos)
5435 {
5436         static char ftrace_dyn_info_buffer[1024];
5437         static DEFINE_MUTEX(dyn_info_mutex);
5438         unsigned long *p = filp->private_data;
5439         char *buf = ftrace_dyn_info_buffer;
5440         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5441         int r;
5442
5443         mutex_lock(&dyn_info_mutex);
5444         r = sprintf(buf, "%ld ", *p);
5445
5446         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5447         buf[r++] = '\n';
5448
5449         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5450
5451         mutex_unlock(&dyn_info_mutex);
5452
5453         return r;
5454 }
5455
5456 static const struct file_operations tracing_dyn_info_fops = {
5457         .open           = tracing_open_generic,
5458         .read           = tracing_read_dyn_info,
5459         .llseek         = generic_file_llseek,
5460 };
5461 #endif /* CONFIG_DYNAMIC_FTRACE */
5462
5463 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5464 static void
5465 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5466 {
5467         tracing_snapshot();
5468 }
5469
5470 static void
5471 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5472 {
5473         unsigned long *count = (long *)data;
5474
5475         if (!*count)
5476                 return;
5477
5478         if (*count != -1)
5479                 (*count)--;
5480
5481         tracing_snapshot();
5482 }
5483
5484 static int
5485 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5486                       struct ftrace_probe_ops *ops, void *data)
5487 {
5488         long count = (long)data;
5489
5490         seq_printf(m, "%ps:", (void *)ip);
5491
5492         seq_printf(m, "snapshot");
5493
5494         if (count == -1)
5495                 seq_printf(m, ":unlimited\n");
5496         else
5497                 seq_printf(m, ":count=%ld\n", count);
5498
5499         return 0;
5500 }
5501
5502 static struct ftrace_probe_ops snapshot_probe_ops = {
5503         .func                   = ftrace_snapshot,
5504         .print                  = ftrace_snapshot_print,
5505 };
5506
5507 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5508         .func                   = ftrace_count_snapshot,
5509         .print                  = ftrace_snapshot_print,
5510 };
5511
5512 static int
5513 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5514                                char *glob, char *cmd, char *param, int enable)
5515 {
5516         struct ftrace_probe_ops *ops;
5517         void *count = (void *)-1;
5518         char *number;
5519         int ret;
5520
5521         /* hash funcs only work with set_ftrace_filter */
5522         if (!enable)
5523                 return -EINVAL;
5524
5525         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5526
5527         if (glob[0] == '!') {
5528                 unregister_ftrace_function_probe_func(glob+1, ops);
5529                 return 0;
5530         }
5531
5532         if (!param)
5533                 goto out_reg;
5534
5535         number = strsep(&param, ":");
5536
5537         if (!strlen(number))
5538                 goto out_reg;
5539
5540         /*
5541          * We use the callback data field (which is a pointer)
5542          * as our counter.
5543          */
5544         ret = kstrtoul(number, 0, (unsigned long *)&count);
5545         if (ret)
5546                 return ret;
5547
5548  out_reg:
5549         ret = register_ftrace_function_probe(glob, ops, count);
5550
5551         if (ret >= 0)
5552                 alloc_snapshot(&global_trace);
5553
5554         return ret < 0 ? ret : 0;
5555 }
5556
5557 static struct ftrace_func_command ftrace_snapshot_cmd = {
5558         .name                   = "snapshot",
5559         .func                   = ftrace_trace_snapshot_callback,
5560 };
5561
5562 static __init int register_snapshot_cmd(void)
5563 {
5564         return register_ftrace_command(&ftrace_snapshot_cmd);
5565 }
5566 #else
5567 static inline __init int register_snapshot_cmd(void) { return 0; }
5568 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5569
5570 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5571 {
5572         if (tr->dir)
5573                 return tr->dir;
5574
5575         if (!debugfs_initialized())
5576                 return NULL;
5577
5578         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5579                 tr->dir = debugfs_create_dir("tracing", NULL);
5580
5581         if (!tr->dir)
5582                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5583
5584         return tr->dir;
5585 }
5586
5587 struct dentry *tracing_init_dentry(void)
5588 {
5589         return tracing_init_dentry_tr(&global_trace);
5590 }
5591
5592 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5593 {
5594         struct dentry *d_tracer;
5595
5596         if (tr->percpu_dir)
5597                 return tr->percpu_dir;
5598
5599         d_tracer = tracing_init_dentry_tr(tr);
5600         if (!d_tracer)
5601                 return NULL;
5602
5603         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5604
5605         WARN_ONCE(!tr->percpu_dir,
5606                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5607
5608         return tr->percpu_dir;
5609 }
5610
5611 static struct dentry *
5612 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5613                       void *data, long cpu, const struct file_operations *fops)
5614 {
5615         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5616
5617         if (ret) /* See tracing_get_cpu() */
5618                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5619         return ret;
5620 }
5621
5622 static void
5623 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5624 {
5625         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5626         struct dentry *d_cpu;
5627         char cpu_dir[30]; /* 30 characters should be more than enough */
5628
5629         if (!d_percpu)
5630                 return;
5631
5632         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5633         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5634         if (!d_cpu) {
5635                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5636                 return;
5637         }
5638
5639         /* per cpu trace_pipe */
5640         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5641                                 tr, cpu, &tracing_pipe_fops);
5642
5643         /* per cpu trace */
5644         trace_create_cpu_file("trace", 0644, d_cpu,
5645                                 tr, cpu, &tracing_fops);
5646
5647         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5648                                 tr, cpu, &tracing_buffers_fops);
5649
5650         trace_create_cpu_file("stats", 0444, d_cpu,
5651                                 tr, cpu, &tracing_stats_fops);
5652
5653         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5654                                 tr, cpu, &tracing_entries_fops);
5655
5656 #ifdef CONFIG_TRACER_SNAPSHOT
5657         trace_create_cpu_file("snapshot", 0644, d_cpu,
5658                                 tr, cpu, &snapshot_fops);
5659
5660         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5661                                 tr, cpu, &snapshot_raw_fops);
5662 #endif
5663 }
5664
5665 #ifdef CONFIG_FTRACE_SELFTEST
5666 /* Let selftest have access to static functions in this file */
5667 #include "trace_selftest.c"
5668 #endif
5669
5670 struct trace_option_dentry {
5671         struct tracer_opt               *opt;
5672         struct tracer_flags             *flags;
5673         struct trace_array              *tr;
5674         struct dentry                   *entry;
5675 };
5676
5677 static ssize_t
5678 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5679                         loff_t *ppos)
5680 {
5681         struct trace_option_dentry *topt = filp->private_data;
5682         char *buf;
5683
5684         if (topt->flags->val & topt->opt->bit)
5685                 buf = "1\n";
5686         else
5687                 buf = "0\n";
5688
5689         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5690 }
5691
5692 static ssize_t
5693 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5694                          loff_t *ppos)
5695 {
5696         struct trace_option_dentry *topt = filp->private_data;
5697         unsigned long val;
5698         int ret;
5699
5700         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5701         if (ret)
5702                 return ret;
5703
5704         if (val != 0 && val != 1)
5705                 return -EINVAL;
5706
5707         if (!!(topt->flags->val & topt->opt->bit) != val) {
5708                 mutex_lock(&trace_types_lock);
5709                 ret = __set_tracer_option(topt->tr->current_trace, topt->flags,
5710                                           topt->opt, !val);
5711                 mutex_unlock(&trace_types_lock);
5712                 if (ret)
5713                         return ret;
5714         }
5715
5716         *ppos += cnt;
5717
5718         return cnt;
5719 }
5720
5721
5722 static const struct file_operations trace_options_fops = {
5723         .open = tracing_open_generic,
5724         .read = trace_options_read,
5725         .write = trace_options_write,
5726         .llseek = generic_file_llseek,
5727 };
5728
5729 static ssize_t
5730 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5731                         loff_t *ppos)
5732 {
5733         long index = (long)filp->private_data;
5734         char *buf;
5735
5736         if (trace_flags & (1 << index))
5737                 buf = "1\n";
5738         else
5739                 buf = "0\n";
5740
5741         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5742 }
5743
5744 static ssize_t
5745 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5746                          loff_t *ppos)
5747 {
5748         struct trace_array *tr = &global_trace;
5749         long index = (long)filp->private_data;
5750         unsigned long val;
5751         int ret;
5752
5753         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5754         if (ret)
5755                 return ret;
5756
5757         if (val != 0 && val != 1)
5758                 return -EINVAL;
5759
5760         mutex_lock(&trace_types_lock);
5761         ret = set_tracer_flag(tr, 1 << index, val);
5762         mutex_unlock(&trace_types_lock);
5763
5764         if (ret < 0)
5765                 return ret;
5766
5767         *ppos += cnt;
5768
5769         return cnt;
5770 }
5771
5772 static const struct file_operations trace_options_core_fops = {
5773         .open = tracing_open_generic,
5774         .read = trace_options_core_read,
5775         .write = trace_options_core_write,
5776         .llseek = generic_file_llseek,
5777 };
5778
5779 struct dentry *trace_create_file(const char *name,
5780                                  umode_t mode,
5781                                  struct dentry *parent,
5782                                  void *data,
5783                                  const struct file_operations *fops)
5784 {
5785         struct dentry *ret;
5786
5787         ret = debugfs_create_file(name, mode, parent, data, fops);
5788         if (!ret)
5789                 pr_warning("Could not create debugfs '%s' entry\n", name);
5790
5791         return ret;
5792 }
5793
5794
5795 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
5796 {
5797         struct dentry *d_tracer;
5798
5799         if (tr->options)
5800                 return tr->options;
5801
5802         d_tracer = tracing_init_dentry_tr(tr);
5803         if (!d_tracer)
5804                 return NULL;
5805
5806         tr->options = debugfs_create_dir("options", d_tracer);
5807         if (!tr->options) {
5808                 pr_warning("Could not create debugfs directory 'options'\n");
5809                 return NULL;
5810         }
5811
5812         return tr->options;
5813 }
5814
5815 static void
5816 create_trace_option_file(struct trace_array *tr,
5817                          struct trace_option_dentry *topt,
5818                          struct tracer_flags *flags,
5819                          struct tracer_opt *opt)
5820 {
5821         struct dentry *t_options;
5822
5823         t_options = trace_options_init_dentry(tr);
5824         if (!t_options)
5825                 return;
5826
5827         topt->flags = flags;
5828         topt->opt = opt;
5829         topt->tr = tr;
5830
5831         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
5832                                     &trace_options_fops);
5833
5834 }
5835
5836 static struct trace_option_dentry *
5837 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
5838 {
5839         struct trace_option_dentry *topts;
5840         struct tracer_flags *flags;
5841         struct tracer_opt *opts;
5842         int cnt;
5843
5844         if (!tracer)
5845                 return NULL;
5846
5847         flags = tracer->flags;
5848
5849         if (!flags || !flags->opts)
5850                 return NULL;
5851
5852         opts = flags->opts;
5853
5854         for (cnt = 0; opts[cnt].name; cnt++)
5855                 ;
5856
5857         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
5858         if (!topts)
5859                 return NULL;
5860
5861         for (cnt = 0; opts[cnt].name; cnt++)
5862                 create_trace_option_file(tr, &topts[cnt], flags,
5863                                          &opts[cnt]);
5864
5865         return topts;
5866 }
5867
5868 static void
5869 destroy_trace_option_files(struct trace_option_dentry *topts)
5870 {
5871         int cnt;
5872
5873         if (!topts)
5874                 return;
5875
5876         for (cnt = 0; topts[cnt].opt; cnt++) {
5877                 if (topts[cnt].entry)
5878                         debugfs_remove(topts[cnt].entry);
5879         }
5880
5881         kfree(topts);
5882 }
5883
5884 static struct dentry *
5885 create_trace_option_core_file(struct trace_array *tr,
5886                               const char *option, long index)
5887 {
5888         struct dentry *t_options;
5889
5890         t_options = trace_options_init_dentry(tr);
5891         if (!t_options)
5892                 return NULL;
5893
5894         return trace_create_file(option, 0644, t_options, (void *)index,
5895                                     &trace_options_core_fops);
5896 }
5897
5898 static __init void create_trace_options_dir(struct trace_array *tr)
5899 {
5900         struct dentry *t_options;
5901         int i;
5902
5903         t_options = trace_options_init_dentry(tr);
5904         if (!t_options)
5905                 return;
5906
5907         for (i = 0; trace_options[i]; i++)
5908                 create_trace_option_core_file(tr, trace_options[i], i);
5909 }
5910
5911 static ssize_t
5912 rb_simple_read(struct file *filp, char __user *ubuf,
5913                size_t cnt, loff_t *ppos)
5914 {
5915         struct trace_array *tr = filp->private_data;
5916         char buf[64];
5917         int r;
5918
5919         r = tracer_tracing_is_on(tr);
5920         r = sprintf(buf, "%d\n", r);
5921
5922         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5923 }
5924
5925 static ssize_t
5926 rb_simple_write(struct file *filp, const char __user *ubuf,
5927                 size_t cnt, loff_t *ppos)
5928 {
5929         struct trace_array *tr = filp->private_data;
5930         struct ring_buffer *buffer = tr->trace_buffer.buffer;
5931         unsigned long val;
5932         int ret;
5933
5934         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5935         if (ret)
5936                 return ret;
5937
5938         if (buffer) {
5939                 mutex_lock(&trace_types_lock);
5940                 if (val) {
5941                         tracer_tracing_on(tr);
5942                         if (tr->current_trace->start)
5943                                 tr->current_trace->start(tr);
5944                 } else {
5945                         tracer_tracing_off(tr);
5946                         if (tr->current_trace->stop)
5947                                 tr->current_trace->stop(tr);
5948                 }
5949                 mutex_unlock(&trace_types_lock);
5950         }
5951
5952         (*ppos)++;
5953
5954         return cnt;
5955 }
5956
5957 static const struct file_operations rb_simple_fops = {
5958         .open           = tracing_open_generic_tr,
5959         .read           = rb_simple_read,
5960         .write          = rb_simple_write,
5961         .release        = tracing_release_generic_tr,
5962         .llseek         = default_llseek,
5963 };
5964
5965 struct dentry *trace_instance_dir;
5966
5967 static void
5968 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
5969
5970 static int
5971 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
5972 {
5973         enum ring_buffer_flags rb_flags;
5974
5975         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
5976
5977         buf->tr = tr;
5978
5979         buf->buffer = ring_buffer_alloc(size, rb_flags);
5980         if (!buf->buffer)
5981                 return -ENOMEM;
5982
5983         buf->data = alloc_percpu(struct trace_array_cpu);
5984         if (!buf->data) {
5985                 ring_buffer_free(buf->buffer);
5986                 return -ENOMEM;
5987         }
5988
5989         /* Allocate the first page for all buffers */
5990         set_buffer_entries(&tr->trace_buffer,
5991                            ring_buffer_size(tr->trace_buffer.buffer, 0));
5992
5993         return 0;
5994 }
5995
5996 static int allocate_trace_buffers(struct trace_array *tr, int size)
5997 {
5998         int ret;
5999
6000         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6001         if (ret)
6002                 return ret;
6003
6004 #ifdef CONFIG_TRACER_MAX_TRACE
6005         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6006                                     allocate_snapshot ? size : 1);
6007         if (WARN_ON(ret)) {
6008                 ring_buffer_free(tr->trace_buffer.buffer);
6009                 free_percpu(tr->trace_buffer.data);
6010                 return -ENOMEM;
6011         }
6012         tr->allocated_snapshot = allocate_snapshot;
6013
6014         /*
6015          * Only the top level trace array gets its snapshot allocated
6016          * from the kernel command line.
6017          */
6018         allocate_snapshot = false;
6019 #endif
6020         return 0;
6021 }
6022
6023 static int new_instance_create(const char *name)
6024 {
6025         struct trace_array *tr;
6026         int ret;
6027
6028         mutex_lock(&trace_types_lock);
6029
6030         ret = -EEXIST;
6031         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6032                 if (tr->name && strcmp(tr->name, name) == 0)
6033                         goto out_unlock;
6034         }
6035
6036         ret = -ENOMEM;
6037         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6038         if (!tr)
6039                 goto out_unlock;
6040
6041         tr->name = kstrdup(name, GFP_KERNEL);
6042         if (!tr->name)
6043                 goto out_free_tr;
6044
6045         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6046                 goto out_free_tr;
6047
6048         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6049
6050         raw_spin_lock_init(&tr->start_lock);
6051
6052         tr->current_trace = &nop_trace;
6053
6054         INIT_LIST_HEAD(&tr->systems);
6055         INIT_LIST_HEAD(&tr->events);
6056
6057         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6058                 goto out_free_tr;
6059
6060         tr->dir = debugfs_create_dir(name, trace_instance_dir);
6061         if (!tr->dir)
6062                 goto out_free_tr;
6063
6064         ret = event_trace_add_tracer(tr->dir, tr);
6065         if (ret) {
6066                 debugfs_remove_recursive(tr->dir);
6067                 goto out_free_tr;
6068         }
6069
6070         init_tracer_debugfs(tr, tr->dir);
6071
6072         list_add(&tr->list, &ftrace_trace_arrays);
6073
6074         mutex_unlock(&trace_types_lock);
6075
6076         return 0;
6077
6078  out_free_tr:
6079         if (tr->trace_buffer.buffer)
6080                 ring_buffer_free(tr->trace_buffer.buffer);
6081         free_cpumask_var(tr->tracing_cpumask);
6082         kfree(tr->name);
6083         kfree(tr);
6084
6085  out_unlock:
6086         mutex_unlock(&trace_types_lock);
6087
6088         return ret;
6089
6090 }
6091
6092 static int instance_delete(const char *name)
6093 {
6094         struct trace_array *tr;
6095         int found = 0;
6096         int ret;
6097
6098         mutex_lock(&trace_types_lock);
6099
6100         ret = -ENODEV;
6101         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6102                 if (tr->name && strcmp(tr->name, name) == 0) {
6103                         found = 1;
6104                         break;
6105                 }
6106         }
6107         if (!found)
6108                 goto out_unlock;
6109
6110         ret = -EBUSY;
6111         if (tr->ref)
6112                 goto out_unlock;
6113
6114         list_del(&tr->list);
6115
6116         event_trace_del_tracer(tr);
6117         debugfs_remove_recursive(tr->dir);
6118         free_percpu(tr->trace_buffer.data);
6119         ring_buffer_free(tr->trace_buffer.buffer);
6120
6121         kfree(tr->name);
6122         kfree(tr);
6123
6124         ret = 0;
6125
6126  out_unlock:
6127         mutex_unlock(&trace_types_lock);
6128
6129         return ret;
6130 }
6131
6132 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6133 {
6134         struct dentry *parent;
6135         int ret;
6136
6137         /* Paranoid: Make sure the parent is the "instances" directory */
6138         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6139         if (WARN_ON_ONCE(parent != trace_instance_dir))
6140                 return -ENOENT;
6141
6142         /*
6143          * The inode mutex is locked, but debugfs_create_dir() will also
6144          * take the mutex. As the instances directory can not be destroyed
6145          * or changed in any other way, it is safe to unlock it, and
6146          * let the dentry try. If two users try to make the same dir at
6147          * the same time, then the new_instance_create() will determine the
6148          * winner.
6149          */
6150         mutex_unlock(&inode->i_mutex);
6151
6152         ret = new_instance_create(dentry->d_iname);
6153
6154         mutex_lock(&inode->i_mutex);
6155
6156         return ret;
6157 }
6158
6159 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6160 {
6161         struct dentry *parent;
6162         int ret;
6163
6164         /* Paranoid: Make sure the parent is the "instances" directory */
6165         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6166         if (WARN_ON_ONCE(parent != trace_instance_dir))
6167                 return -ENOENT;
6168
6169         /* The caller did a dget() on dentry */
6170         mutex_unlock(&dentry->d_inode->i_mutex);
6171
6172         /*
6173          * The inode mutex is locked, but debugfs_create_dir() will also
6174          * take the mutex. As the instances directory can not be destroyed
6175          * or changed in any other way, it is safe to unlock it, and
6176          * let the dentry try. If two users try to make the same dir at
6177          * the same time, then the instance_delete() will determine the
6178          * winner.
6179          */
6180         mutex_unlock(&inode->i_mutex);
6181
6182         ret = instance_delete(dentry->d_iname);
6183
6184         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6185         mutex_lock(&dentry->d_inode->i_mutex);
6186
6187         return ret;
6188 }
6189
6190 static const struct inode_operations instance_dir_inode_operations = {
6191         .lookup         = simple_lookup,
6192         .mkdir          = instance_mkdir,
6193         .rmdir          = instance_rmdir,
6194 };
6195
6196 static __init void create_trace_instances(struct dentry *d_tracer)
6197 {
6198         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6199         if (WARN_ON(!trace_instance_dir))
6200                 return;
6201
6202         /* Hijack the dir inode operations, to allow mkdir */
6203         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6204 }
6205
6206 static void
6207 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6208 {
6209         int cpu;
6210
6211         trace_create_file("tracing_cpumask", 0644, d_tracer,
6212                           tr, &tracing_cpumask_fops);
6213
6214         trace_create_file("trace_options", 0644, d_tracer,
6215                           tr, &tracing_iter_fops);
6216
6217         trace_create_file("trace", 0644, d_tracer,
6218                           tr, &tracing_fops);
6219
6220         trace_create_file("trace_pipe", 0444, d_tracer,
6221                           tr, &tracing_pipe_fops);
6222
6223         trace_create_file("buffer_size_kb", 0644, d_tracer,
6224                           tr, &tracing_entries_fops);
6225
6226         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6227                           tr, &tracing_total_entries_fops);
6228
6229         trace_create_file("free_buffer", 0200, d_tracer,
6230                           tr, &tracing_free_buffer_fops);
6231
6232         trace_create_file("trace_marker", 0220, d_tracer,
6233                           tr, &tracing_mark_fops);
6234
6235         trace_create_file("trace_clock", 0644, d_tracer, tr,
6236                           &trace_clock_fops);
6237
6238         trace_create_file("tracing_on", 0644, d_tracer,
6239                           tr, &rb_simple_fops);
6240
6241 #ifdef CONFIG_TRACER_SNAPSHOT
6242         trace_create_file("snapshot", 0644, d_tracer,
6243                           tr, &snapshot_fops);
6244 #endif
6245
6246         for_each_tracing_cpu(cpu)
6247                 tracing_init_debugfs_percpu(tr, cpu);
6248
6249 }
6250
6251 static __init int tracer_init_debugfs(void)
6252 {
6253         struct dentry *d_tracer;
6254
6255         trace_access_lock_init();
6256
6257         d_tracer = tracing_init_dentry();
6258         if (!d_tracer)
6259                 return 0;
6260
6261         init_tracer_debugfs(&global_trace, d_tracer);
6262
6263         trace_create_file("available_tracers", 0444, d_tracer,
6264                         &global_trace, &show_traces_fops);
6265
6266         trace_create_file("current_tracer", 0644, d_tracer,
6267                         &global_trace, &set_tracer_fops);
6268
6269 #ifdef CONFIG_TRACER_MAX_TRACE
6270         trace_create_file("tracing_max_latency", 0644, d_tracer,
6271                         &tracing_max_latency, &tracing_max_lat_fops);
6272 #endif
6273
6274         trace_create_file("tracing_thresh", 0644, d_tracer,
6275                         &tracing_thresh, &tracing_max_lat_fops);
6276
6277         trace_create_file("README", 0444, d_tracer,
6278                         NULL, &tracing_readme_fops);
6279
6280         trace_create_file("saved_cmdlines", 0444, d_tracer,
6281                         NULL, &tracing_saved_cmdlines_fops);
6282
6283 #ifdef CONFIG_DYNAMIC_FTRACE
6284         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6285                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6286 #endif
6287
6288         create_trace_instances(d_tracer);
6289
6290         create_trace_options_dir(&global_trace);
6291
6292         return 0;
6293 }
6294
6295 static int trace_panic_handler(struct notifier_block *this,
6296                                unsigned long event, void *unused)
6297 {
6298         if (ftrace_dump_on_oops)
6299                 ftrace_dump(ftrace_dump_on_oops);
6300         return NOTIFY_OK;
6301 }
6302
6303 static struct notifier_block trace_panic_notifier = {
6304         .notifier_call  = trace_panic_handler,
6305         .next           = NULL,
6306         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6307 };
6308
6309 static int trace_die_handler(struct notifier_block *self,
6310                              unsigned long val,
6311                              void *data)
6312 {
6313         switch (val) {
6314         case DIE_OOPS:
6315                 if (ftrace_dump_on_oops)
6316                         ftrace_dump(ftrace_dump_on_oops);
6317                 break;
6318         default:
6319                 break;
6320         }
6321         return NOTIFY_OK;
6322 }
6323
6324 static struct notifier_block trace_die_notifier = {
6325         .notifier_call = trace_die_handler,
6326         .priority = 200
6327 };
6328
6329 /*
6330  * printk is set to max of 1024, we really don't need it that big.
6331  * Nothing should be printing 1000 characters anyway.
6332  */
6333 #define TRACE_MAX_PRINT         1000
6334
6335 /*
6336  * Define here KERN_TRACE so that we have one place to modify
6337  * it if we decide to change what log level the ftrace dump
6338  * should be at.
6339  */
6340 #define KERN_TRACE              KERN_EMERG
6341
6342 void
6343 trace_printk_seq(struct trace_seq *s)
6344 {
6345         /* Probably should print a warning here. */
6346         if (s->len >= TRACE_MAX_PRINT)
6347                 s->len = TRACE_MAX_PRINT;
6348
6349         /* should be zero ended, but we are paranoid. */
6350         s->buffer[s->len] = 0;
6351
6352         printk(KERN_TRACE "%s", s->buffer);
6353
6354         trace_seq_init(s);
6355 }
6356
6357 void trace_init_global_iter(struct trace_iterator *iter)
6358 {
6359         iter->tr = &global_trace;
6360         iter->trace = iter->tr->current_trace;
6361         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6362         iter->trace_buffer = &global_trace.trace_buffer;
6363
6364         if (iter->trace && iter->trace->open)
6365                 iter->trace->open(iter);
6366
6367         /* Annotate start of buffers if we had overruns */
6368         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6369                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6370
6371         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6372         if (trace_clocks[iter->tr->clock_id].in_ns)
6373                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6374 }
6375
6376 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6377 {
6378         /* use static because iter can be a bit big for the stack */
6379         static struct trace_iterator iter;
6380         static atomic_t dump_running;
6381         unsigned int old_userobj;
6382         unsigned long flags;
6383         int cnt = 0, cpu;
6384
6385         /* Only allow one dump user at a time. */
6386         if (atomic_inc_return(&dump_running) != 1) {
6387                 atomic_dec(&dump_running);
6388                 return;
6389         }
6390
6391         /*
6392          * Always turn off tracing when we dump.
6393          * We don't need to show trace output of what happens
6394          * between multiple crashes.
6395          *
6396          * If the user does a sysrq-z, then they can re-enable
6397          * tracing with echo 1 > tracing_on.
6398          */
6399         tracing_off();
6400
6401         local_irq_save(flags);
6402
6403         /* Simulate the iterator */
6404         trace_init_global_iter(&iter);
6405
6406         for_each_tracing_cpu(cpu) {
6407                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6408         }
6409
6410         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6411
6412         /* don't look at user memory in panic mode */
6413         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6414
6415         switch (oops_dump_mode) {
6416         case DUMP_ALL:
6417                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6418                 break;
6419         case DUMP_ORIG:
6420                 iter.cpu_file = raw_smp_processor_id();
6421                 break;
6422         case DUMP_NONE:
6423                 goto out_enable;
6424         default:
6425                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6426                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6427         }
6428
6429         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6430
6431         /* Did function tracer already get disabled? */
6432         if (ftrace_is_dead()) {
6433                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6434                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6435         }
6436
6437         /*
6438          * We need to stop all tracing on all CPUS to read the
6439          * the next buffer. This is a bit expensive, but is
6440          * not done often. We fill all what we can read,
6441          * and then release the locks again.
6442          */
6443
6444         while (!trace_empty(&iter)) {
6445
6446                 if (!cnt)
6447                         printk(KERN_TRACE "---------------------------------\n");
6448
6449                 cnt++;
6450
6451                 /* reset all but tr, trace, and overruns */
6452                 memset(&iter.seq, 0,
6453                        sizeof(struct trace_iterator) -
6454                        offsetof(struct trace_iterator, seq));
6455                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6456                 iter.pos = -1;
6457
6458                 if (trace_find_next_entry_inc(&iter) != NULL) {
6459                         int ret;
6460
6461                         ret = print_trace_line(&iter);
6462                         if (ret != TRACE_TYPE_NO_CONSUME)
6463                                 trace_consume(&iter);
6464                 }
6465                 touch_nmi_watchdog();
6466
6467                 trace_printk_seq(&iter.seq);
6468         }
6469
6470         if (!cnt)
6471                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6472         else
6473                 printk(KERN_TRACE "---------------------------------\n");
6474
6475  out_enable:
6476         trace_flags |= old_userobj;
6477
6478         for_each_tracing_cpu(cpu) {
6479                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6480         }
6481         atomic_dec(&dump_running);
6482         local_irq_restore(flags);
6483 }
6484 EXPORT_SYMBOL_GPL(ftrace_dump);
6485
6486 __init static int tracer_alloc_buffers(void)
6487 {
6488         int ring_buf_size;
6489         int ret = -ENOMEM;
6490
6491
6492         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6493                 goto out;
6494
6495         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6496                 goto out_free_buffer_mask;
6497
6498         /* Only allocate trace_printk buffers if a trace_printk exists */
6499         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6500                 /* Must be called before global_trace.buffer is allocated */
6501                 trace_printk_init_buffers();
6502
6503         /* To save memory, keep the ring buffer size to its minimum */
6504         if (ring_buffer_expanded)
6505                 ring_buf_size = trace_buf_size;
6506         else
6507                 ring_buf_size = 1;
6508
6509         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6510         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6511
6512         raw_spin_lock_init(&global_trace.start_lock);
6513
6514         /* Used for event triggers */
6515         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
6516         if (!temp_buffer)
6517                 goto out_free_cpumask;
6518
6519         /* TODO: make the number of buffers hot pluggable with CPUS */
6520         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6521                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6522                 WARN_ON(1);
6523                 goto out_free_temp_buffer;
6524         }
6525
6526         if (global_trace.buffer_disabled)
6527                 tracing_off();
6528
6529         trace_init_cmdlines();
6530
6531         /*
6532          * register_tracer() might reference current_trace, so it
6533          * needs to be set before we register anything. This is
6534          * just a bootstrap of current_trace anyway.
6535          */
6536         global_trace.current_trace = &nop_trace;
6537
6538         register_tracer(&nop_trace);
6539
6540         /* All seems OK, enable tracing */
6541         tracing_disabled = 0;
6542
6543         atomic_notifier_chain_register(&panic_notifier_list,
6544                                        &trace_panic_notifier);
6545
6546         register_die_notifier(&trace_die_notifier);
6547
6548         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6549
6550         INIT_LIST_HEAD(&global_trace.systems);
6551         INIT_LIST_HEAD(&global_trace.events);
6552         list_add(&global_trace.list, &ftrace_trace_arrays);
6553
6554         while (trace_boot_options) {
6555                 char *option;
6556
6557                 option = strsep(&trace_boot_options, ",");
6558                 trace_set_options(&global_trace, option);
6559         }
6560
6561         register_snapshot_cmd();
6562
6563         return 0;
6564
6565 out_free_temp_buffer:
6566         ring_buffer_free(temp_buffer);
6567 out_free_cpumask:
6568         free_percpu(global_trace.trace_buffer.data);
6569 #ifdef CONFIG_TRACER_MAX_TRACE
6570         free_percpu(global_trace.max_buffer.data);
6571 #endif
6572         free_cpumask_var(global_trace.tracing_cpumask);
6573 out_free_buffer_mask:
6574         free_cpumask_var(tracing_buffer_mask);
6575 out:
6576         return ret;
6577 }
6578
6579 __init static int clear_boot_tracer(void)
6580 {
6581         /*
6582          * The default tracer at boot buffer is an init section.
6583          * This function is called in lateinit. If we did not
6584          * find the boot tracer, then clear it out, to prevent
6585          * later registration from accessing the buffer that is
6586          * about to be freed.
6587          */
6588         if (!default_bootup_tracer)
6589                 return 0;
6590
6591         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6592                default_bootup_tracer);
6593         default_bootup_tracer = NULL;
6594
6595         return 0;
6596 }
6597
6598 early_initcall(tracer_alloc_buffers);
6599 fs_initcall(tracer_init_debugfs);
6600 late_initcall(clear_boot_tracer);