Merge tag 'spi-fix-v6.1-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi
[platform/kernel/linux-starfive.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
189
190 static int __init set_cmdline_ftrace(char *str)
191 {
192         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
193         default_bootup_tracer = bootup_tracer_buf;
194         /* We are using ftrace early, expand it */
195         ring_buffer_expanded = true;
196         return 1;
197 }
198 __setup("ftrace=", set_cmdline_ftrace);
199
200 static int __init set_ftrace_dump_on_oops(char *str)
201 {
202         if (*str++ != '=' || !*str || !strcmp("1", str)) {
203                 ftrace_dump_on_oops = DUMP_ALL;
204                 return 1;
205         }
206
207         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
208                 ftrace_dump_on_oops = DUMP_ORIG;
209                 return 1;
210         }
211
212         return 0;
213 }
214 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
215
216 static int __init stop_trace_on_warning(char *str)
217 {
218         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
219                 __disable_trace_on_warning = 1;
220         return 1;
221 }
222 __setup("traceoff_on_warning", stop_trace_on_warning);
223
224 static int __init boot_alloc_snapshot(char *str)
225 {
226         allocate_snapshot = true;
227         /* We also need the main ring buffer expanded */
228         ring_buffer_expanded = true;
229         return 1;
230 }
231 __setup("alloc_snapshot", boot_alloc_snapshot);
232
233
234 static int __init boot_snapshot(char *str)
235 {
236         snapshot_at_boot = true;
237         boot_alloc_snapshot(str);
238         return 1;
239 }
240 __setup("ftrace_boot_snapshot", boot_snapshot);
241
242
243 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
244
245 static int __init set_trace_boot_options(char *str)
246 {
247         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
248         return 1;
249 }
250 __setup("trace_options=", set_trace_boot_options);
251
252 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
253 static char *trace_boot_clock __initdata;
254
255 static int __init set_trace_boot_clock(char *str)
256 {
257         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
258         trace_boot_clock = trace_boot_clock_buf;
259         return 1;
260 }
261 __setup("trace_clock=", set_trace_boot_clock);
262
263 static int __init set_tracepoint_printk(char *str)
264 {
265         /* Ignore the "tp_printk_stop_on_boot" param */
266         if (*str == '_')
267                 return 0;
268
269         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
270                 tracepoint_printk = 1;
271         return 1;
272 }
273 __setup("tp_printk", set_tracepoint_printk);
274
275 static int __init set_tracepoint_printk_stop(char *str)
276 {
277         tracepoint_printk_stop_on_boot = true;
278         return 1;
279 }
280 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
281
282 unsigned long long ns2usecs(u64 nsec)
283 {
284         nsec += 500;
285         do_div(nsec, 1000);
286         return nsec;
287 }
288
289 static void
290 trace_process_export(struct trace_export *export,
291                struct ring_buffer_event *event, int flag)
292 {
293         struct trace_entry *entry;
294         unsigned int size = 0;
295
296         if (export->flags & flag) {
297                 entry = ring_buffer_event_data(event);
298                 size = ring_buffer_event_length(event);
299                 export->write(export, entry, size);
300         }
301 }
302
303 static DEFINE_MUTEX(ftrace_export_lock);
304
305 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
306
307 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
308 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
309 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
310
311 static inline void ftrace_exports_enable(struct trace_export *export)
312 {
313         if (export->flags & TRACE_EXPORT_FUNCTION)
314                 static_branch_inc(&trace_function_exports_enabled);
315
316         if (export->flags & TRACE_EXPORT_EVENT)
317                 static_branch_inc(&trace_event_exports_enabled);
318
319         if (export->flags & TRACE_EXPORT_MARKER)
320                 static_branch_inc(&trace_marker_exports_enabled);
321 }
322
323 static inline void ftrace_exports_disable(struct trace_export *export)
324 {
325         if (export->flags & TRACE_EXPORT_FUNCTION)
326                 static_branch_dec(&trace_function_exports_enabled);
327
328         if (export->flags & TRACE_EXPORT_EVENT)
329                 static_branch_dec(&trace_event_exports_enabled);
330
331         if (export->flags & TRACE_EXPORT_MARKER)
332                 static_branch_dec(&trace_marker_exports_enabled);
333 }
334
335 static void ftrace_exports(struct ring_buffer_event *event, int flag)
336 {
337         struct trace_export *export;
338
339         preempt_disable_notrace();
340
341         export = rcu_dereference_raw_check(ftrace_exports_list);
342         while (export) {
343                 trace_process_export(export, event, flag);
344                 export = rcu_dereference_raw_check(export->next);
345         }
346
347         preempt_enable_notrace();
348 }
349
350 static inline void
351 add_trace_export(struct trace_export **list, struct trace_export *export)
352 {
353         rcu_assign_pointer(export->next, *list);
354         /*
355          * We are entering export into the list but another
356          * CPU might be walking that list. We need to make sure
357          * the export->next pointer is valid before another CPU sees
358          * the export pointer included into the list.
359          */
360         rcu_assign_pointer(*list, export);
361 }
362
363 static inline int
364 rm_trace_export(struct trace_export **list, struct trace_export *export)
365 {
366         struct trace_export **p;
367
368         for (p = list; *p != NULL; p = &(*p)->next)
369                 if (*p == export)
370                         break;
371
372         if (*p != export)
373                 return -1;
374
375         rcu_assign_pointer(*p, (*p)->next);
376
377         return 0;
378 }
379
380 static inline void
381 add_ftrace_export(struct trace_export **list, struct trace_export *export)
382 {
383         ftrace_exports_enable(export);
384
385         add_trace_export(list, export);
386 }
387
388 static inline int
389 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
390 {
391         int ret;
392
393         ret = rm_trace_export(list, export);
394         ftrace_exports_disable(export);
395
396         return ret;
397 }
398
399 int register_ftrace_export(struct trace_export *export)
400 {
401         if (WARN_ON_ONCE(!export->write))
402                 return -1;
403
404         mutex_lock(&ftrace_export_lock);
405
406         add_ftrace_export(&ftrace_exports_list, export);
407
408         mutex_unlock(&ftrace_export_lock);
409
410         return 0;
411 }
412 EXPORT_SYMBOL_GPL(register_ftrace_export);
413
414 int unregister_ftrace_export(struct trace_export *export)
415 {
416         int ret;
417
418         mutex_lock(&ftrace_export_lock);
419
420         ret = rm_ftrace_export(&ftrace_exports_list, export);
421
422         mutex_unlock(&ftrace_export_lock);
423
424         return ret;
425 }
426 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
427
428 /* trace_flags holds trace_options default values */
429 #define TRACE_DEFAULT_FLAGS                                             \
430         (FUNCTION_DEFAULT_FLAGS |                                       \
431          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
432          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
433          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
434          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
435          TRACE_ITER_HASH_PTR)
436
437 /* trace_options that are only supported by global_trace */
438 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
439                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
440
441 /* trace_flags that are default zero for instances */
442 #define ZEROED_TRACE_FLAGS \
443         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
444
445 /*
446  * The global_trace is the descriptor that holds the top-level tracing
447  * buffers for the live tracing.
448  */
449 static struct trace_array global_trace = {
450         .trace_flags = TRACE_DEFAULT_FLAGS,
451 };
452
453 LIST_HEAD(ftrace_trace_arrays);
454
455 int trace_array_get(struct trace_array *this_tr)
456 {
457         struct trace_array *tr;
458         int ret = -ENODEV;
459
460         mutex_lock(&trace_types_lock);
461         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
462                 if (tr == this_tr) {
463                         tr->ref++;
464                         ret = 0;
465                         break;
466                 }
467         }
468         mutex_unlock(&trace_types_lock);
469
470         return ret;
471 }
472
473 static void __trace_array_put(struct trace_array *this_tr)
474 {
475         WARN_ON(!this_tr->ref);
476         this_tr->ref--;
477 }
478
479 /**
480  * trace_array_put - Decrement the reference counter for this trace array.
481  * @this_tr : pointer to the trace array
482  *
483  * NOTE: Use this when we no longer need the trace array returned by
484  * trace_array_get_by_name(). This ensures the trace array can be later
485  * destroyed.
486  *
487  */
488 void trace_array_put(struct trace_array *this_tr)
489 {
490         if (!this_tr)
491                 return;
492
493         mutex_lock(&trace_types_lock);
494         __trace_array_put(this_tr);
495         mutex_unlock(&trace_types_lock);
496 }
497 EXPORT_SYMBOL_GPL(trace_array_put);
498
499 int tracing_check_open_get_tr(struct trace_array *tr)
500 {
501         int ret;
502
503         ret = security_locked_down(LOCKDOWN_TRACEFS);
504         if (ret)
505                 return ret;
506
507         if (tracing_disabled)
508                 return -ENODEV;
509
510         if (tr && trace_array_get(tr) < 0)
511                 return -ENODEV;
512
513         return 0;
514 }
515
516 int call_filter_check_discard(struct trace_event_call *call, void *rec,
517                               struct trace_buffer *buffer,
518                               struct ring_buffer_event *event)
519 {
520         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
521             !filter_match_preds(call->filter, rec)) {
522                 __trace_event_discard_commit(buffer, event);
523                 return 1;
524         }
525
526         return 0;
527 }
528
529 /**
530  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
531  * @filtered_pids: The list of pids to check
532  * @search_pid: The PID to find in @filtered_pids
533  *
534  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
535  */
536 bool
537 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
538 {
539         return trace_pid_list_is_set(filtered_pids, search_pid);
540 }
541
542 /**
543  * trace_ignore_this_task - should a task be ignored for tracing
544  * @filtered_pids: The list of pids to check
545  * @filtered_no_pids: The list of pids not to be traced
546  * @task: The task that should be ignored if not filtered
547  *
548  * Checks if @task should be traced or not from @filtered_pids.
549  * Returns true if @task should *NOT* be traced.
550  * Returns false if @task should be traced.
551  */
552 bool
553 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
554                        struct trace_pid_list *filtered_no_pids,
555                        struct task_struct *task)
556 {
557         /*
558          * If filtered_no_pids is not empty, and the task's pid is listed
559          * in filtered_no_pids, then return true.
560          * Otherwise, if filtered_pids is empty, that means we can
561          * trace all tasks. If it has content, then only trace pids
562          * within filtered_pids.
563          */
564
565         return (filtered_pids &&
566                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
567                 (filtered_no_pids &&
568                  trace_find_filtered_pid(filtered_no_pids, task->pid));
569 }
570
571 /**
572  * trace_filter_add_remove_task - Add or remove a task from a pid_list
573  * @pid_list: The list to modify
574  * @self: The current task for fork or NULL for exit
575  * @task: The task to add or remove
576  *
577  * If adding a task, if @self is defined, the task is only added if @self
578  * is also included in @pid_list. This happens on fork and tasks should
579  * only be added when the parent is listed. If @self is NULL, then the
580  * @task pid will be removed from the list, which would happen on exit
581  * of a task.
582  */
583 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
584                                   struct task_struct *self,
585                                   struct task_struct *task)
586 {
587         if (!pid_list)
588                 return;
589
590         /* For forks, we only add if the forking task is listed */
591         if (self) {
592                 if (!trace_find_filtered_pid(pid_list, self->pid))
593                         return;
594         }
595
596         /* "self" is set for forks, and NULL for exits */
597         if (self)
598                 trace_pid_list_set(pid_list, task->pid);
599         else
600                 trace_pid_list_clear(pid_list, task->pid);
601 }
602
603 /**
604  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
605  * @pid_list: The pid list to show
606  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
607  * @pos: The position of the file
608  *
609  * This is used by the seq_file "next" operation to iterate the pids
610  * listed in a trace_pid_list structure.
611  *
612  * Returns the pid+1 as we want to display pid of zero, but NULL would
613  * stop the iteration.
614  */
615 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
616 {
617         long pid = (unsigned long)v;
618         unsigned int next;
619
620         (*pos)++;
621
622         /* pid already is +1 of the actual previous bit */
623         if (trace_pid_list_next(pid_list, pid, &next) < 0)
624                 return NULL;
625
626         pid = next;
627
628         /* Return pid + 1 to allow zero to be represented */
629         return (void *)(pid + 1);
630 }
631
632 /**
633  * trace_pid_start - Used for seq_file to start reading pid lists
634  * @pid_list: The pid list to show
635  * @pos: The position of the file
636  *
637  * This is used by seq_file "start" operation to start the iteration
638  * of listing pids.
639  *
640  * Returns the pid+1 as we want to display pid of zero, but NULL would
641  * stop the iteration.
642  */
643 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
644 {
645         unsigned long pid;
646         unsigned int first;
647         loff_t l = 0;
648
649         if (trace_pid_list_first(pid_list, &first) < 0)
650                 return NULL;
651
652         pid = first;
653
654         /* Return pid + 1 so that zero can be the exit value */
655         for (pid++; pid && l < *pos;
656              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657                 ;
658         return (void *)pid;
659 }
660
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671         unsigned long pid = (unsigned long)v - 1;
672
673         seq_printf(m, "%lu\n", pid);
674         return 0;
675 }
676
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE            127
679
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681                     struct trace_pid_list **new_pid_list,
682                     const char __user *ubuf, size_t cnt)
683 {
684         struct trace_pid_list *pid_list;
685         struct trace_parser parser;
686         unsigned long val;
687         int nr_pids = 0;
688         ssize_t read = 0;
689         ssize_t ret;
690         loff_t pos;
691         pid_t pid;
692
693         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694                 return -ENOMEM;
695
696         /*
697          * Always recreate a new array. The write is an all or nothing
698          * operation. Always create a new array when adding new pids by
699          * the user. If the operation fails, then the current list is
700          * not modified.
701          */
702         pid_list = trace_pid_list_alloc();
703         if (!pid_list) {
704                 trace_parser_put(&parser);
705                 return -ENOMEM;
706         }
707
708         if (filtered_pids) {
709                 /* copy the current bits to the new max */
710                 ret = trace_pid_list_first(filtered_pids, &pid);
711                 while (!ret) {
712                         trace_pid_list_set(pid_list, pid);
713                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
714                         nr_pids++;
715                 }
716         }
717
718         ret = 0;
719         while (cnt > 0) {
720
721                 pos = 0;
722
723                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
724                 if (ret < 0)
725                         break;
726
727                 read += ret;
728                 ubuf += ret;
729                 cnt -= ret;
730
731                 if (!trace_parser_loaded(&parser))
732                         break;
733
734                 ret = -EINVAL;
735                 if (kstrtoul(parser.buffer, 0, &val))
736                         break;
737
738                 pid = (pid_t)val;
739
740                 if (trace_pid_list_set(pid_list, pid) < 0) {
741                         ret = -1;
742                         break;
743                 }
744                 nr_pids++;
745
746                 trace_parser_clear(&parser);
747                 ret = 0;
748         }
749         trace_parser_put(&parser);
750
751         if (ret < 0) {
752                 trace_pid_list_free(pid_list);
753                 return ret;
754         }
755
756         if (!nr_pids) {
757                 /* Cleared the list of pids */
758                 trace_pid_list_free(pid_list);
759                 pid_list = NULL;
760         }
761
762         *new_pid_list = pid_list;
763
764         return read;
765 }
766
767 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
768 {
769         u64 ts;
770
771         /* Early boot up does not have a buffer yet */
772         if (!buf->buffer)
773                 return trace_clock_local();
774
775         ts = ring_buffer_time_stamp(buf->buffer);
776         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
777
778         return ts;
779 }
780
781 u64 ftrace_now(int cpu)
782 {
783         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
784 }
785
786 /**
787  * tracing_is_enabled - Show if global_trace has been enabled
788  *
789  * Shows if the global trace has been enabled or not. It uses the
790  * mirror flag "buffer_disabled" to be used in fast paths such as for
791  * the irqsoff tracer. But it may be inaccurate due to races. If you
792  * need to know the accurate state, use tracing_is_on() which is a little
793  * slower, but accurate.
794  */
795 int tracing_is_enabled(void)
796 {
797         /*
798          * For quick access (irqsoff uses this in fast path), just
799          * return the mirror variable of the state of the ring buffer.
800          * It's a little racy, but we don't really care.
801          */
802         smp_rmb();
803         return !global_trace.buffer_disabled;
804 }
805
806 /*
807  * trace_buf_size is the size in bytes that is allocated
808  * for a buffer. Note, the number of bytes is always rounded
809  * to page size.
810  *
811  * This number is purposely set to a low number of 16384.
812  * If the dump on oops happens, it will be much appreciated
813  * to not have to wait for all that output. Anyway this can be
814  * boot time and run time configurable.
815  */
816 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
817
818 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
819
820 /* trace_types holds a link list of available tracers. */
821 static struct tracer            *trace_types __read_mostly;
822
823 /*
824  * trace_types_lock is used to protect the trace_types list.
825  */
826 DEFINE_MUTEX(trace_types_lock);
827
828 /*
829  * serialize the access of the ring buffer
830  *
831  * ring buffer serializes readers, but it is low level protection.
832  * The validity of the events (which returns by ring_buffer_peek() ..etc)
833  * are not protected by ring buffer.
834  *
835  * The content of events may become garbage if we allow other process consumes
836  * these events concurrently:
837  *   A) the page of the consumed events may become a normal page
838  *      (not reader page) in ring buffer, and this page will be rewritten
839  *      by events producer.
840  *   B) The page of the consumed events may become a page for splice_read,
841  *      and this page will be returned to system.
842  *
843  * These primitives allow multi process access to different cpu ring buffer
844  * concurrently.
845  *
846  * These primitives don't distinguish read-only and read-consume access.
847  * Multi read-only access are also serialized.
848  */
849
850 #ifdef CONFIG_SMP
851 static DECLARE_RWSEM(all_cpu_access_lock);
852 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
853
854 static inline void trace_access_lock(int cpu)
855 {
856         if (cpu == RING_BUFFER_ALL_CPUS) {
857                 /* gain it for accessing the whole ring buffer. */
858                 down_write(&all_cpu_access_lock);
859         } else {
860                 /* gain it for accessing a cpu ring buffer. */
861
862                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
863                 down_read(&all_cpu_access_lock);
864
865                 /* Secondly block other access to this @cpu ring buffer. */
866                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
867         }
868 }
869
870 static inline void trace_access_unlock(int cpu)
871 {
872         if (cpu == RING_BUFFER_ALL_CPUS) {
873                 up_write(&all_cpu_access_lock);
874         } else {
875                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
876                 up_read(&all_cpu_access_lock);
877         }
878 }
879
880 static inline void trace_access_lock_init(void)
881 {
882         int cpu;
883
884         for_each_possible_cpu(cpu)
885                 mutex_init(&per_cpu(cpu_access_lock, cpu));
886 }
887
888 #else
889
890 static DEFINE_MUTEX(access_lock);
891
892 static inline void trace_access_lock(int cpu)
893 {
894         (void)cpu;
895         mutex_lock(&access_lock);
896 }
897
898 static inline void trace_access_unlock(int cpu)
899 {
900         (void)cpu;
901         mutex_unlock(&access_lock);
902 }
903
904 static inline void trace_access_lock_init(void)
905 {
906 }
907
908 #endif
909
910 #ifdef CONFIG_STACKTRACE
911 static void __ftrace_trace_stack(struct trace_buffer *buffer,
912                                  unsigned int trace_ctx,
913                                  int skip, struct pt_regs *regs);
914 static inline void ftrace_trace_stack(struct trace_array *tr,
915                                       struct trace_buffer *buffer,
916                                       unsigned int trace_ctx,
917                                       int skip, struct pt_regs *regs);
918
919 #else
920 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
921                                         unsigned int trace_ctx,
922                                         int skip, struct pt_regs *regs)
923 {
924 }
925 static inline void ftrace_trace_stack(struct trace_array *tr,
926                                       struct trace_buffer *buffer,
927                                       unsigned long trace_ctx,
928                                       int skip, struct pt_regs *regs)
929 {
930 }
931
932 #endif
933
934 static __always_inline void
935 trace_event_setup(struct ring_buffer_event *event,
936                   int type, unsigned int trace_ctx)
937 {
938         struct trace_entry *ent = ring_buffer_event_data(event);
939
940         tracing_generic_entry_update(ent, type, trace_ctx);
941 }
942
943 static __always_inline struct ring_buffer_event *
944 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
945                           int type,
946                           unsigned long len,
947                           unsigned int trace_ctx)
948 {
949         struct ring_buffer_event *event;
950
951         event = ring_buffer_lock_reserve(buffer, len);
952         if (event != NULL)
953                 trace_event_setup(event, type, trace_ctx);
954
955         return event;
956 }
957
958 void tracer_tracing_on(struct trace_array *tr)
959 {
960         if (tr->array_buffer.buffer)
961                 ring_buffer_record_on(tr->array_buffer.buffer);
962         /*
963          * This flag is looked at when buffers haven't been allocated
964          * yet, or by some tracers (like irqsoff), that just want to
965          * know if the ring buffer has been disabled, but it can handle
966          * races of where it gets disabled but we still do a record.
967          * As the check is in the fast path of the tracers, it is more
968          * important to be fast than accurate.
969          */
970         tr->buffer_disabled = 0;
971         /* Make the flag seen by readers */
972         smp_wmb();
973 }
974
975 /**
976  * tracing_on - enable tracing buffers
977  *
978  * This function enables tracing buffers that may have been
979  * disabled with tracing_off.
980  */
981 void tracing_on(void)
982 {
983         tracer_tracing_on(&global_trace);
984 }
985 EXPORT_SYMBOL_GPL(tracing_on);
986
987
988 static __always_inline void
989 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
990 {
991         __this_cpu_write(trace_taskinfo_save, true);
992
993         /* If this is the temp buffer, we need to commit fully */
994         if (this_cpu_read(trace_buffered_event) == event) {
995                 /* Length is in event->array[0] */
996                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
997                 /* Release the temp buffer */
998                 this_cpu_dec(trace_buffered_event_cnt);
999                 /* ring_buffer_unlock_commit() enables preemption */
1000                 preempt_enable_notrace();
1001         } else
1002                 ring_buffer_unlock_commit(buffer, event);
1003 }
1004
1005 /**
1006  * __trace_puts - write a constant string into the trace buffer.
1007  * @ip:    The address of the caller
1008  * @str:   The constant string to write
1009  * @size:  The size of the string.
1010  */
1011 int __trace_puts(unsigned long ip, const char *str, int size)
1012 {
1013         struct ring_buffer_event *event;
1014         struct trace_buffer *buffer;
1015         struct print_entry *entry;
1016         unsigned int trace_ctx;
1017         int alloc;
1018
1019         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1020                 return 0;
1021
1022         if (unlikely(tracing_selftest_running || tracing_disabled))
1023                 return 0;
1024
1025         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1026
1027         trace_ctx = tracing_gen_ctx();
1028         buffer = global_trace.array_buffer.buffer;
1029         ring_buffer_nest_start(buffer);
1030         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1031                                             trace_ctx);
1032         if (!event) {
1033                 size = 0;
1034                 goto out;
1035         }
1036
1037         entry = ring_buffer_event_data(event);
1038         entry->ip = ip;
1039
1040         memcpy(&entry->buf, str, size);
1041
1042         /* Add a newline if necessary */
1043         if (entry->buf[size - 1] != '\n') {
1044                 entry->buf[size] = '\n';
1045                 entry->buf[size + 1] = '\0';
1046         } else
1047                 entry->buf[size] = '\0';
1048
1049         __buffer_unlock_commit(buffer, event);
1050         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1051  out:
1052         ring_buffer_nest_end(buffer);
1053         return size;
1054 }
1055 EXPORT_SYMBOL_GPL(__trace_puts);
1056
1057 /**
1058  * __trace_bputs - write the pointer to a constant string into trace buffer
1059  * @ip:    The address of the caller
1060  * @str:   The constant string to write to the buffer to
1061  */
1062 int __trace_bputs(unsigned long ip, const char *str)
1063 {
1064         struct ring_buffer_event *event;
1065         struct trace_buffer *buffer;
1066         struct bputs_entry *entry;
1067         unsigned int trace_ctx;
1068         int size = sizeof(struct bputs_entry);
1069         int ret = 0;
1070
1071         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1072                 return 0;
1073
1074         if (unlikely(tracing_selftest_running || tracing_disabled))
1075                 return 0;
1076
1077         trace_ctx = tracing_gen_ctx();
1078         buffer = global_trace.array_buffer.buffer;
1079
1080         ring_buffer_nest_start(buffer);
1081         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1082                                             trace_ctx);
1083         if (!event)
1084                 goto out;
1085
1086         entry = ring_buffer_event_data(event);
1087         entry->ip                       = ip;
1088         entry->str                      = str;
1089
1090         __buffer_unlock_commit(buffer, event);
1091         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1092
1093         ret = 1;
1094  out:
1095         ring_buffer_nest_end(buffer);
1096         return ret;
1097 }
1098 EXPORT_SYMBOL_GPL(__trace_bputs);
1099
1100 #ifdef CONFIG_TRACER_SNAPSHOT
1101 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1102                                            void *cond_data)
1103 {
1104         struct tracer *tracer = tr->current_trace;
1105         unsigned long flags;
1106
1107         if (in_nmi()) {
1108                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1109                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1110                 return;
1111         }
1112
1113         if (!tr->allocated_snapshot) {
1114                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1115                 internal_trace_puts("*** stopping trace here!   ***\n");
1116                 tracing_off();
1117                 return;
1118         }
1119
1120         /* Note, snapshot can not be used when the tracer uses it */
1121         if (tracer->use_max_tr) {
1122                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1123                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1124                 return;
1125         }
1126
1127         local_irq_save(flags);
1128         update_max_tr(tr, current, smp_processor_id(), cond_data);
1129         local_irq_restore(flags);
1130 }
1131
1132 void tracing_snapshot_instance(struct trace_array *tr)
1133 {
1134         tracing_snapshot_instance_cond(tr, NULL);
1135 }
1136
1137 /**
1138  * tracing_snapshot - take a snapshot of the current buffer.
1139  *
1140  * This causes a swap between the snapshot buffer and the current live
1141  * tracing buffer. You can use this to take snapshots of the live
1142  * trace when some condition is triggered, but continue to trace.
1143  *
1144  * Note, make sure to allocate the snapshot with either
1145  * a tracing_snapshot_alloc(), or by doing it manually
1146  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1147  *
1148  * If the snapshot buffer is not allocated, it will stop tracing.
1149  * Basically making a permanent snapshot.
1150  */
1151 void tracing_snapshot(void)
1152 {
1153         struct trace_array *tr = &global_trace;
1154
1155         tracing_snapshot_instance(tr);
1156 }
1157 EXPORT_SYMBOL_GPL(tracing_snapshot);
1158
1159 /**
1160  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1161  * @tr:         The tracing instance to snapshot
1162  * @cond_data:  The data to be tested conditionally, and possibly saved
1163  *
1164  * This is the same as tracing_snapshot() except that the snapshot is
1165  * conditional - the snapshot will only happen if the
1166  * cond_snapshot.update() implementation receiving the cond_data
1167  * returns true, which means that the trace array's cond_snapshot
1168  * update() operation used the cond_data to determine whether the
1169  * snapshot should be taken, and if it was, presumably saved it along
1170  * with the snapshot.
1171  */
1172 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1173 {
1174         tracing_snapshot_instance_cond(tr, cond_data);
1175 }
1176 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1177
1178 /**
1179  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1180  * @tr:         The tracing instance
1181  *
1182  * When the user enables a conditional snapshot using
1183  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1184  * with the snapshot.  This accessor is used to retrieve it.
1185  *
1186  * Should not be called from cond_snapshot.update(), since it takes
1187  * the tr->max_lock lock, which the code calling
1188  * cond_snapshot.update() has already done.
1189  *
1190  * Returns the cond_data associated with the trace array's snapshot.
1191  */
1192 void *tracing_cond_snapshot_data(struct trace_array *tr)
1193 {
1194         void *cond_data = NULL;
1195
1196         local_irq_disable();
1197         arch_spin_lock(&tr->max_lock);
1198
1199         if (tr->cond_snapshot)
1200                 cond_data = tr->cond_snapshot->cond_data;
1201
1202         arch_spin_unlock(&tr->max_lock);
1203         local_irq_enable();
1204
1205         return cond_data;
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1208
1209 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1210                                         struct array_buffer *size_buf, int cpu_id);
1211 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1212
1213 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1214 {
1215         int ret;
1216
1217         if (!tr->allocated_snapshot) {
1218
1219                 /* allocate spare buffer */
1220                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1221                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1222                 if (ret < 0)
1223                         return ret;
1224
1225                 tr->allocated_snapshot = true;
1226         }
1227
1228         return 0;
1229 }
1230
1231 static void free_snapshot(struct trace_array *tr)
1232 {
1233         /*
1234          * We don't free the ring buffer. instead, resize it because
1235          * The max_tr ring buffer has some state (e.g. ring->clock) and
1236          * we want preserve it.
1237          */
1238         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1239         set_buffer_entries(&tr->max_buffer, 1);
1240         tracing_reset_online_cpus(&tr->max_buffer);
1241         tr->allocated_snapshot = false;
1242 }
1243
1244 /**
1245  * tracing_alloc_snapshot - allocate snapshot buffer.
1246  *
1247  * This only allocates the snapshot buffer if it isn't already
1248  * allocated - it doesn't also take a snapshot.
1249  *
1250  * This is meant to be used in cases where the snapshot buffer needs
1251  * to be set up for events that can't sleep but need to be able to
1252  * trigger a snapshot.
1253  */
1254 int tracing_alloc_snapshot(void)
1255 {
1256         struct trace_array *tr = &global_trace;
1257         int ret;
1258
1259         ret = tracing_alloc_snapshot_instance(tr);
1260         WARN_ON(ret < 0);
1261
1262         return ret;
1263 }
1264 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1265
1266 /**
1267  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1268  *
1269  * This is similar to tracing_snapshot(), but it will allocate the
1270  * snapshot buffer if it isn't already allocated. Use this only
1271  * where it is safe to sleep, as the allocation may sleep.
1272  *
1273  * This causes a swap between the snapshot buffer and the current live
1274  * tracing buffer. You can use this to take snapshots of the live
1275  * trace when some condition is triggered, but continue to trace.
1276  */
1277 void tracing_snapshot_alloc(void)
1278 {
1279         int ret;
1280
1281         ret = tracing_alloc_snapshot();
1282         if (ret < 0)
1283                 return;
1284
1285         tracing_snapshot();
1286 }
1287 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1288
1289 /**
1290  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1291  * @tr:         The tracing instance
1292  * @cond_data:  User data to associate with the snapshot
1293  * @update:     Implementation of the cond_snapshot update function
1294  *
1295  * Check whether the conditional snapshot for the given instance has
1296  * already been enabled, or if the current tracer is already using a
1297  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1298  * save the cond_data and update function inside.
1299  *
1300  * Returns 0 if successful, error otherwise.
1301  */
1302 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1303                                  cond_update_fn_t update)
1304 {
1305         struct cond_snapshot *cond_snapshot;
1306         int ret = 0;
1307
1308         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1309         if (!cond_snapshot)
1310                 return -ENOMEM;
1311
1312         cond_snapshot->cond_data = cond_data;
1313         cond_snapshot->update = update;
1314
1315         mutex_lock(&trace_types_lock);
1316
1317         ret = tracing_alloc_snapshot_instance(tr);
1318         if (ret)
1319                 goto fail_unlock;
1320
1321         if (tr->current_trace->use_max_tr) {
1322                 ret = -EBUSY;
1323                 goto fail_unlock;
1324         }
1325
1326         /*
1327          * The cond_snapshot can only change to NULL without the
1328          * trace_types_lock. We don't care if we race with it going
1329          * to NULL, but we want to make sure that it's not set to
1330          * something other than NULL when we get here, which we can
1331          * do safely with only holding the trace_types_lock and not
1332          * having to take the max_lock.
1333          */
1334         if (tr->cond_snapshot) {
1335                 ret = -EBUSY;
1336                 goto fail_unlock;
1337         }
1338
1339         local_irq_disable();
1340         arch_spin_lock(&tr->max_lock);
1341         tr->cond_snapshot = cond_snapshot;
1342         arch_spin_unlock(&tr->max_lock);
1343         local_irq_enable();
1344
1345         mutex_unlock(&trace_types_lock);
1346
1347         return ret;
1348
1349  fail_unlock:
1350         mutex_unlock(&trace_types_lock);
1351         kfree(cond_snapshot);
1352         return ret;
1353 }
1354 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1355
1356 /**
1357  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1358  * @tr:         The tracing instance
1359  *
1360  * Check whether the conditional snapshot for the given instance is
1361  * enabled; if so, free the cond_snapshot associated with it,
1362  * otherwise return -EINVAL.
1363  *
1364  * Returns 0 if successful, error otherwise.
1365  */
1366 int tracing_snapshot_cond_disable(struct trace_array *tr)
1367 {
1368         int ret = 0;
1369
1370         local_irq_disable();
1371         arch_spin_lock(&tr->max_lock);
1372
1373         if (!tr->cond_snapshot)
1374                 ret = -EINVAL;
1375         else {
1376                 kfree(tr->cond_snapshot);
1377                 tr->cond_snapshot = NULL;
1378         }
1379
1380         arch_spin_unlock(&tr->max_lock);
1381         local_irq_enable();
1382
1383         return ret;
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1386 #else
1387 void tracing_snapshot(void)
1388 {
1389         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot);
1392 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1393 {
1394         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397 int tracing_alloc_snapshot(void)
1398 {
1399         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1400         return -ENODEV;
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403 void tracing_snapshot_alloc(void)
1404 {
1405         /* Give warning */
1406         tracing_snapshot();
1407 }
1408 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409 void *tracing_cond_snapshot_data(struct trace_array *tr)
1410 {
1411         return NULL;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1415 {
1416         return -ENODEV;
1417 }
1418 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419 int tracing_snapshot_cond_disable(struct trace_array *tr)
1420 {
1421         return false;
1422 }
1423 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424 #endif /* CONFIG_TRACER_SNAPSHOT */
1425
1426 void tracer_tracing_off(struct trace_array *tr)
1427 {
1428         if (tr->array_buffer.buffer)
1429                 ring_buffer_record_off(tr->array_buffer.buffer);
1430         /*
1431          * This flag is looked at when buffers haven't been allocated
1432          * yet, or by some tracers (like irqsoff), that just want to
1433          * know if the ring buffer has been disabled, but it can handle
1434          * races of where it gets disabled but we still do a record.
1435          * As the check is in the fast path of the tracers, it is more
1436          * important to be fast than accurate.
1437          */
1438         tr->buffer_disabled = 1;
1439         /* Make the flag seen by readers */
1440         smp_wmb();
1441 }
1442
1443 /**
1444  * tracing_off - turn off tracing buffers
1445  *
1446  * This function stops the tracing buffers from recording data.
1447  * It does not disable any overhead the tracers themselves may
1448  * be causing. This function simply causes all recording to
1449  * the ring buffers to fail.
1450  */
1451 void tracing_off(void)
1452 {
1453         tracer_tracing_off(&global_trace);
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_off);
1456
1457 void disable_trace_on_warning(void)
1458 {
1459         if (__disable_trace_on_warning) {
1460                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461                         "Disabling tracing due to warning\n");
1462                 tracing_off();
1463         }
1464 }
1465
1466 /**
1467  * tracer_tracing_is_on - show real state of ring buffer enabled
1468  * @tr : the trace array to know if ring buffer is enabled
1469  *
1470  * Shows real state of the ring buffer if it is enabled or not.
1471  */
1472 bool tracer_tracing_is_on(struct trace_array *tr)
1473 {
1474         if (tr->array_buffer.buffer)
1475                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476         return !tr->buffer_disabled;
1477 }
1478
1479 /**
1480  * tracing_is_on - show state of ring buffers enabled
1481  */
1482 int tracing_is_on(void)
1483 {
1484         return tracer_tracing_is_on(&global_trace);
1485 }
1486 EXPORT_SYMBOL_GPL(tracing_is_on);
1487
1488 static int __init set_buf_size(char *str)
1489 {
1490         unsigned long buf_size;
1491
1492         if (!str)
1493                 return 0;
1494         buf_size = memparse(str, &str);
1495         /*
1496          * nr_entries can not be zero and the startup
1497          * tests require some buffer space. Therefore
1498          * ensure we have at least 4096 bytes of buffer.
1499          */
1500         trace_buf_size = max(4096UL, buf_size);
1501         return 1;
1502 }
1503 __setup("trace_buf_size=", set_buf_size);
1504
1505 static int __init set_tracing_thresh(char *str)
1506 {
1507         unsigned long threshold;
1508         int ret;
1509
1510         if (!str)
1511                 return 0;
1512         ret = kstrtoul(str, 0, &threshold);
1513         if (ret < 0)
1514                 return 0;
1515         tracing_thresh = threshold * 1000;
1516         return 1;
1517 }
1518 __setup("tracing_thresh=", set_tracing_thresh);
1519
1520 unsigned long nsecs_to_usecs(unsigned long nsecs)
1521 {
1522         return nsecs / 1000;
1523 }
1524
1525 /*
1526  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1527  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1528  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1529  * of strings in the order that the evals (enum) were defined.
1530  */
1531 #undef C
1532 #define C(a, b) b
1533
1534 /* These must match the bit positions in trace_iterator_flags */
1535 static const char *trace_options[] = {
1536         TRACE_FLAGS
1537         NULL
1538 };
1539
1540 static struct {
1541         u64 (*func)(void);
1542         const char *name;
1543         int in_ns;              /* is this clock in nanoseconds? */
1544 } trace_clocks[] = {
1545         { trace_clock_local,            "local",        1 },
1546         { trace_clock_global,           "global",       1 },
1547         { trace_clock_counter,          "counter",      0 },
1548         { trace_clock_jiffies,          "uptime",       0 },
1549         { trace_clock,                  "perf",         1 },
1550         { ktime_get_mono_fast_ns,       "mono",         1 },
1551         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1552         { ktime_get_boot_fast_ns,       "boot",         1 },
1553         { ktime_get_tai_fast_ns,        "tai",          1 },
1554         ARCH_TRACE_CLOCKS
1555 };
1556
1557 bool trace_clock_in_ns(struct trace_array *tr)
1558 {
1559         if (trace_clocks[tr->clock_id].in_ns)
1560                 return true;
1561
1562         return false;
1563 }
1564
1565 /*
1566  * trace_parser_get_init - gets the buffer for trace parser
1567  */
1568 int trace_parser_get_init(struct trace_parser *parser, int size)
1569 {
1570         memset(parser, 0, sizeof(*parser));
1571
1572         parser->buffer = kmalloc(size, GFP_KERNEL);
1573         if (!parser->buffer)
1574                 return 1;
1575
1576         parser->size = size;
1577         return 0;
1578 }
1579
1580 /*
1581  * trace_parser_put - frees the buffer for trace parser
1582  */
1583 void trace_parser_put(struct trace_parser *parser)
1584 {
1585         kfree(parser->buffer);
1586         parser->buffer = NULL;
1587 }
1588
1589 /*
1590  * trace_get_user - reads the user input string separated by  space
1591  * (matched by isspace(ch))
1592  *
1593  * For each string found the 'struct trace_parser' is updated,
1594  * and the function returns.
1595  *
1596  * Returns number of bytes read.
1597  *
1598  * See kernel/trace/trace.h for 'struct trace_parser' details.
1599  */
1600 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1601         size_t cnt, loff_t *ppos)
1602 {
1603         char ch;
1604         size_t read = 0;
1605         ssize_t ret;
1606
1607         if (!*ppos)
1608                 trace_parser_clear(parser);
1609
1610         ret = get_user(ch, ubuf++);
1611         if (ret)
1612                 goto out;
1613
1614         read++;
1615         cnt--;
1616
1617         /*
1618          * The parser is not finished with the last write,
1619          * continue reading the user input without skipping spaces.
1620          */
1621         if (!parser->cont) {
1622                 /* skip white space */
1623                 while (cnt && isspace(ch)) {
1624                         ret = get_user(ch, ubuf++);
1625                         if (ret)
1626                                 goto out;
1627                         read++;
1628                         cnt--;
1629                 }
1630
1631                 parser->idx = 0;
1632
1633                 /* only spaces were written */
1634                 if (isspace(ch) || !ch) {
1635                         *ppos += read;
1636                         ret = read;
1637                         goto out;
1638                 }
1639         }
1640
1641         /* read the non-space input */
1642         while (cnt && !isspace(ch) && ch) {
1643                 if (parser->idx < parser->size - 1)
1644                         parser->buffer[parser->idx++] = ch;
1645                 else {
1646                         ret = -EINVAL;
1647                         goto out;
1648                 }
1649                 ret = get_user(ch, ubuf++);
1650                 if (ret)
1651                         goto out;
1652                 read++;
1653                 cnt--;
1654         }
1655
1656         /* We either got finished input or we have to wait for another call. */
1657         if (isspace(ch) || !ch) {
1658                 parser->buffer[parser->idx] = 0;
1659                 parser->cont = false;
1660         } else if (parser->idx < parser->size - 1) {
1661                 parser->cont = true;
1662                 parser->buffer[parser->idx++] = ch;
1663                 /* Make sure the parsed string always terminates with '\0'. */
1664                 parser->buffer[parser->idx] = 0;
1665         } else {
1666                 ret = -EINVAL;
1667                 goto out;
1668         }
1669
1670         *ppos += read;
1671         ret = read;
1672
1673 out:
1674         return ret;
1675 }
1676
1677 /* TODO add a seq_buf_to_buffer() */
1678 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1679 {
1680         int len;
1681
1682         if (trace_seq_used(s) <= s->seq.readpos)
1683                 return -EBUSY;
1684
1685         len = trace_seq_used(s) - s->seq.readpos;
1686         if (cnt > len)
1687                 cnt = len;
1688         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1689
1690         s->seq.readpos += cnt;
1691         return cnt;
1692 }
1693
1694 unsigned long __read_mostly     tracing_thresh;
1695 static const struct file_operations tracing_max_lat_fops;
1696
1697 #ifdef LATENCY_FS_NOTIFY
1698
1699 static struct workqueue_struct *fsnotify_wq;
1700
1701 static void latency_fsnotify_workfn(struct work_struct *work)
1702 {
1703         struct trace_array *tr = container_of(work, struct trace_array,
1704                                               fsnotify_work);
1705         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1706 }
1707
1708 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1709 {
1710         struct trace_array *tr = container_of(iwork, struct trace_array,
1711                                               fsnotify_irqwork);
1712         queue_work(fsnotify_wq, &tr->fsnotify_work);
1713 }
1714
1715 static void trace_create_maxlat_file(struct trace_array *tr,
1716                                      struct dentry *d_tracer)
1717 {
1718         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1719         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1720         tr->d_max_latency = trace_create_file("tracing_max_latency",
1721                                               TRACE_MODE_WRITE,
1722                                               d_tracer, &tr->max_latency,
1723                                               &tracing_max_lat_fops);
1724 }
1725
1726 __init static int latency_fsnotify_init(void)
1727 {
1728         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1729                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1730         if (!fsnotify_wq) {
1731                 pr_err("Unable to allocate tr_max_lat_wq\n");
1732                 return -ENOMEM;
1733         }
1734         return 0;
1735 }
1736
1737 late_initcall_sync(latency_fsnotify_init);
1738
1739 void latency_fsnotify(struct trace_array *tr)
1740 {
1741         if (!fsnotify_wq)
1742                 return;
1743         /*
1744          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1745          * possible that we are called from __schedule() or do_idle(), which
1746          * could cause a deadlock.
1747          */
1748         irq_work_queue(&tr->fsnotify_irqwork);
1749 }
1750
1751 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)  \
1752         || defined(CONFIG_OSNOISE_TRACER)
1753
1754 #define trace_create_maxlat_file(tr, d_tracer)                          \
1755         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1756                           d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1757
1758 #else
1759 #define trace_create_maxlat_file(tr, d_tracer)   do { } while (0)
1760 #endif
1761
1762 #ifdef CONFIG_TRACER_MAX_TRACE
1763 /*
1764  * Copy the new maximum trace into the separate maximum-trace
1765  * structure. (this way the maximum trace is permanently saved,
1766  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1767  */
1768 static void
1769 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1770 {
1771         struct array_buffer *trace_buf = &tr->array_buffer;
1772         struct array_buffer *max_buf = &tr->max_buffer;
1773         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1774         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1775
1776         max_buf->cpu = cpu;
1777         max_buf->time_start = data->preempt_timestamp;
1778
1779         max_data->saved_latency = tr->max_latency;
1780         max_data->critical_start = data->critical_start;
1781         max_data->critical_end = data->critical_end;
1782
1783         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1784         max_data->pid = tsk->pid;
1785         /*
1786          * If tsk == current, then use current_uid(), as that does not use
1787          * RCU. The irq tracer can be called out of RCU scope.
1788          */
1789         if (tsk == current)
1790                 max_data->uid = current_uid();
1791         else
1792                 max_data->uid = task_uid(tsk);
1793
1794         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1795         max_data->policy = tsk->policy;
1796         max_data->rt_priority = tsk->rt_priority;
1797
1798         /* record this tasks comm */
1799         tracing_record_cmdline(tsk);
1800         latency_fsnotify(tr);
1801 }
1802
1803 /**
1804  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1805  * @tr: tracer
1806  * @tsk: the task with the latency
1807  * @cpu: The cpu that initiated the trace.
1808  * @cond_data: User data associated with a conditional snapshot
1809  *
1810  * Flip the buffers between the @tr and the max_tr and record information
1811  * about which task was the cause of this latency.
1812  */
1813 void
1814 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1815               void *cond_data)
1816 {
1817         if (tr->stop_count)
1818                 return;
1819
1820         WARN_ON_ONCE(!irqs_disabled());
1821
1822         if (!tr->allocated_snapshot) {
1823                 /* Only the nop tracer should hit this when disabling */
1824                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1825                 return;
1826         }
1827
1828         arch_spin_lock(&tr->max_lock);
1829
1830         /* Inherit the recordable setting from array_buffer */
1831         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1832                 ring_buffer_record_on(tr->max_buffer.buffer);
1833         else
1834                 ring_buffer_record_off(tr->max_buffer.buffer);
1835
1836 #ifdef CONFIG_TRACER_SNAPSHOT
1837         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1838                 goto out_unlock;
1839 #endif
1840         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1841
1842         __update_max_tr(tr, tsk, cpu);
1843
1844  out_unlock:
1845         arch_spin_unlock(&tr->max_lock);
1846 }
1847
1848 /**
1849  * update_max_tr_single - only copy one trace over, and reset the rest
1850  * @tr: tracer
1851  * @tsk: task with the latency
1852  * @cpu: the cpu of the buffer to copy.
1853  *
1854  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1855  */
1856 void
1857 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1858 {
1859         int ret;
1860
1861         if (tr->stop_count)
1862                 return;
1863
1864         WARN_ON_ONCE(!irqs_disabled());
1865         if (!tr->allocated_snapshot) {
1866                 /* Only the nop tracer should hit this when disabling */
1867                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1868                 return;
1869         }
1870
1871         arch_spin_lock(&tr->max_lock);
1872
1873         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1874
1875         if (ret == -EBUSY) {
1876                 /*
1877                  * We failed to swap the buffer due to a commit taking
1878                  * place on this CPU. We fail to record, but we reset
1879                  * the max trace buffer (no one writes directly to it)
1880                  * and flag that it failed.
1881                  */
1882                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1883                         "Failed to swap buffers due to commit in progress\n");
1884         }
1885
1886         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1887
1888         __update_max_tr(tr, tsk, cpu);
1889         arch_spin_unlock(&tr->max_lock);
1890 }
1891 #endif /* CONFIG_TRACER_MAX_TRACE */
1892
1893 static int wait_on_pipe(struct trace_iterator *iter, int full)
1894 {
1895         /* Iterators are static, they should be filled or empty */
1896         if (trace_buffer_iter(iter, iter->cpu_file))
1897                 return 0;
1898
1899         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1900                                 full);
1901 }
1902
1903 #ifdef CONFIG_FTRACE_STARTUP_TEST
1904 static bool selftests_can_run;
1905
1906 struct trace_selftests {
1907         struct list_head                list;
1908         struct tracer                   *type;
1909 };
1910
1911 static LIST_HEAD(postponed_selftests);
1912
1913 static int save_selftest(struct tracer *type)
1914 {
1915         struct trace_selftests *selftest;
1916
1917         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1918         if (!selftest)
1919                 return -ENOMEM;
1920
1921         selftest->type = type;
1922         list_add(&selftest->list, &postponed_selftests);
1923         return 0;
1924 }
1925
1926 static int run_tracer_selftest(struct tracer *type)
1927 {
1928         struct trace_array *tr = &global_trace;
1929         struct tracer *saved_tracer = tr->current_trace;
1930         int ret;
1931
1932         if (!type->selftest || tracing_selftest_disabled)
1933                 return 0;
1934
1935         /*
1936          * If a tracer registers early in boot up (before scheduling is
1937          * initialized and such), then do not run its selftests yet.
1938          * Instead, run it a little later in the boot process.
1939          */
1940         if (!selftests_can_run)
1941                 return save_selftest(type);
1942
1943         if (!tracing_is_on()) {
1944                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1945                         type->name);
1946                 return 0;
1947         }
1948
1949         /*
1950          * Run a selftest on this tracer.
1951          * Here we reset the trace buffer, and set the current
1952          * tracer to be this tracer. The tracer can then run some
1953          * internal tracing to verify that everything is in order.
1954          * If we fail, we do not register this tracer.
1955          */
1956         tracing_reset_online_cpus(&tr->array_buffer);
1957
1958         tr->current_trace = type;
1959
1960 #ifdef CONFIG_TRACER_MAX_TRACE
1961         if (type->use_max_tr) {
1962                 /* If we expanded the buffers, make sure the max is expanded too */
1963                 if (ring_buffer_expanded)
1964                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1965                                            RING_BUFFER_ALL_CPUS);
1966                 tr->allocated_snapshot = true;
1967         }
1968 #endif
1969
1970         /* the test is responsible for initializing and enabling */
1971         pr_info("Testing tracer %s: ", type->name);
1972         ret = type->selftest(type, tr);
1973         /* the test is responsible for resetting too */
1974         tr->current_trace = saved_tracer;
1975         if (ret) {
1976                 printk(KERN_CONT "FAILED!\n");
1977                 /* Add the warning after printing 'FAILED' */
1978                 WARN_ON(1);
1979                 return -1;
1980         }
1981         /* Only reset on passing, to avoid touching corrupted buffers */
1982         tracing_reset_online_cpus(&tr->array_buffer);
1983
1984 #ifdef CONFIG_TRACER_MAX_TRACE
1985         if (type->use_max_tr) {
1986                 tr->allocated_snapshot = false;
1987
1988                 /* Shrink the max buffer again */
1989                 if (ring_buffer_expanded)
1990                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1991                                            RING_BUFFER_ALL_CPUS);
1992         }
1993 #endif
1994
1995         printk(KERN_CONT "PASSED\n");
1996         return 0;
1997 }
1998
1999 static __init int init_trace_selftests(void)
2000 {
2001         struct trace_selftests *p, *n;
2002         struct tracer *t, **last;
2003         int ret;
2004
2005         selftests_can_run = true;
2006
2007         mutex_lock(&trace_types_lock);
2008
2009         if (list_empty(&postponed_selftests))
2010                 goto out;
2011
2012         pr_info("Running postponed tracer tests:\n");
2013
2014         tracing_selftest_running = true;
2015         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2016                 /* This loop can take minutes when sanitizers are enabled, so
2017                  * lets make sure we allow RCU processing.
2018                  */
2019                 cond_resched();
2020                 ret = run_tracer_selftest(p->type);
2021                 /* If the test fails, then warn and remove from available_tracers */
2022                 if (ret < 0) {
2023                         WARN(1, "tracer: %s failed selftest, disabling\n",
2024                              p->type->name);
2025                         last = &trace_types;
2026                         for (t = trace_types; t; t = t->next) {
2027                                 if (t == p->type) {
2028                                         *last = t->next;
2029                                         break;
2030                                 }
2031                                 last = &t->next;
2032                         }
2033                 }
2034                 list_del(&p->list);
2035                 kfree(p);
2036         }
2037         tracing_selftest_running = false;
2038
2039  out:
2040         mutex_unlock(&trace_types_lock);
2041
2042         return 0;
2043 }
2044 core_initcall(init_trace_selftests);
2045 #else
2046 static inline int run_tracer_selftest(struct tracer *type)
2047 {
2048         return 0;
2049 }
2050 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2051
2052 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2053
2054 static void __init apply_trace_boot_options(void);
2055
2056 /**
2057  * register_tracer - register a tracer with the ftrace system.
2058  * @type: the plugin for the tracer
2059  *
2060  * Register a new plugin tracer.
2061  */
2062 int __init register_tracer(struct tracer *type)
2063 {
2064         struct tracer *t;
2065         int ret = 0;
2066
2067         if (!type->name) {
2068                 pr_info("Tracer must have a name\n");
2069                 return -1;
2070         }
2071
2072         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2073                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2074                 return -1;
2075         }
2076
2077         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2078                 pr_warn("Can not register tracer %s due to lockdown\n",
2079                            type->name);
2080                 return -EPERM;
2081         }
2082
2083         mutex_lock(&trace_types_lock);
2084
2085         tracing_selftest_running = true;
2086
2087         for (t = trace_types; t; t = t->next) {
2088                 if (strcmp(type->name, t->name) == 0) {
2089                         /* already found */
2090                         pr_info("Tracer %s already registered\n",
2091                                 type->name);
2092                         ret = -1;
2093                         goto out;
2094                 }
2095         }
2096
2097         if (!type->set_flag)
2098                 type->set_flag = &dummy_set_flag;
2099         if (!type->flags) {
2100                 /*allocate a dummy tracer_flags*/
2101                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2102                 if (!type->flags) {
2103                         ret = -ENOMEM;
2104                         goto out;
2105                 }
2106                 type->flags->val = 0;
2107                 type->flags->opts = dummy_tracer_opt;
2108         } else
2109                 if (!type->flags->opts)
2110                         type->flags->opts = dummy_tracer_opt;
2111
2112         /* store the tracer for __set_tracer_option */
2113         type->flags->trace = type;
2114
2115         ret = run_tracer_selftest(type);
2116         if (ret < 0)
2117                 goto out;
2118
2119         type->next = trace_types;
2120         trace_types = type;
2121         add_tracer_options(&global_trace, type);
2122
2123  out:
2124         tracing_selftest_running = false;
2125         mutex_unlock(&trace_types_lock);
2126
2127         if (ret || !default_bootup_tracer)
2128                 goto out_unlock;
2129
2130         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2131                 goto out_unlock;
2132
2133         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2134         /* Do we want this tracer to start on bootup? */
2135         tracing_set_tracer(&global_trace, type->name);
2136         default_bootup_tracer = NULL;
2137
2138         apply_trace_boot_options();
2139
2140         /* disable other selftests, since this will break it. */
2141         disable_tracing_selftest("running a tracer");
2142
2143  out_unlock:
2144         return ret;
2145 }
2146
2147 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2148 {
2149         struct trace_buffer *buffer = buf->buffer;
2150
2151         if (!buffer)
2152                 return;
2153
2154         ring_buffer_record_disable(buffer);
2155
2156         /* Make sure all commits have finished */
2157         synchronize_rcu();
2158         ring_buffer_reset_cpu(buffer, cpu);
2159
2160         ring_buffer_record_enable(buffer);
2161 }
2162
2163 void tracing_reset_online_cpus(struct array_buffer *buf)
2164 {
2165         struct trace_buffer *buffer = buf->buffer;
2166
2167         if (!buffer)
2168                 return;
2169
2170         ring_buffer_record_disable(buffer);
2171
2172         /* Make sure all commits have finished */
2173         synchronize_rcu();
2174
2175         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2176
2177         ring_buffer_reset_online_cpus(buffer);
2178
2179         ring_buffer_record_enable(buffer);
2180 }
2181
2182 /* Must have trace_types_lock held */
2183 void tracing_reset_all_online_cpus(void)
2184 {
2185         struct trace_array *tr;
2186
2187         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2188                 if (!tr->clear_trace)
2189                         continue;
2190                 tr->clear_trace = false;
2191                 tracing_reset_online_cpus(&tr->array_buffer);
2192 #ifdef CONFIG_TRACER_MAX_TRACE
2193                 tracing_reset_online_cpus(&tr->max_buffer);
2194 #endif
2195         }
2196 }
2197
2198 /*
2199  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2200  * is the tgid last observed corresponding to pid=i.
2201  */
2202 static int *tgid_map;
2203
2204 /* The maximum valid index into tgid_map. */
2205 static size_t tgid_map_max;
2206
2207 #define SAVED_CMDLINES_DEFAULT 128
2208 #define NO_CMDLINE_MAP UINT_MAX
2209 /*
2210  * Preemption must be disabled before acquiring trace_cmdline_lock.
2211  * The various trace_arrays' max_lock must be acquired in a context
2212  * where interrupt is disabled.
2213  */
2214 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2215 struct saved_cmdlines_buffer {
2216         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2217         unsigned *map_cmdline_to_pid;
2218         unsigned cmdline_num;
2219         int cmdline_idx;
2220         char *saved_cmdlines;
2221 };
2222 static struct saved_cmdlines_buffer *savedcmd;
2223
2224 static inline char *get_saved_cmdlines(int idx)
2225 {
2226         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2227 }
2228
2229 static inline void set_cmdline(int idx, const char *cmdline)
2230 {
2231         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2232 }
2233
2234 static int allocate_cmdlines_buffer(unsigned int val,
2235                                     struct saved_cmdlines_buffer *s)
2236 {
2237         s->map_cmdline_to_pid = kmalloc_array(val,
2238                                               sizeof(*s->map_cmdline_to_pid),
2239                                               GFP_KERNEL);
2240         if (!s->map_cmdline_to_pid)
2241                 return -ENOMEM;
2242
2243         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2244         if (!s->saved_cmdlines) {
2245                 kfree(s->map_cmdline_to_pid);
2246                 return -ENOMEM;
2247         }
2248
2249         s->cmdline_idx = 0;
2250         s->cmdline_num = val;
2251         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2252                sizeof(s->map_pid_to_cmdline));
2253         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2254                val * sizeof(*s->map_cmdline_to_pid));
2255
2256         return 0;
2257 }
2258
2259 static int trace_create_savedcmd(void)
2260 {
2261         int ret;
2262
2263         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2264         if (!savedcmd)
2265                 return -ENOMEM;
2266
2267         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2268         if (ret < 0) {
2269                 kfree(savedcmd);
2270                 savedcmd = NULL;
2271                 return -ENOMEM;
2272         }
2273
2274         return 0;
2275 }
2276
2277 int is_tracing_stopped(void)
2278 {
2279         return global_trace.stop_count;
2280 }
2281
2282 /**
2283  * tracing_start - quick start of the tracer
2284  *
2285  * If tracing is enabled but was stopped by tracing_stop,
2286  * this will start the tracer back up.
2287  */
2288 void tracing_start(void)
2289 {
2290         struct trace_buffer *buffer;
2291         unsigned long flags;
2292
2293         if (tracing_disabled)
2294                 return;
2295
2296         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2297         if (--global_trace.stop_count) {
2298                 if (global_trace.stop_count < 0) {
2299                         /* Someone screwed up their debugging */
2300                         WARN_ON_ONCE(1);
2301                         global_trace.stop_count = 0;
2302                 }
2303                 goto out;
2304         }
2305
2306         /* Prevent the buffers from switching */
2307         arch_spin_lock(&global_trace.max_lock);
2308
2309         buffer = global_trace.array_buffer.buffer;
2310         if (buffer)
2311                 ring_buffer_record_enable(buffer);
2312
2313 #ifdef CONFIG_TRACER_MAX_TRACE
2314         buffer = global_trace.max_buffer.buffer;
2315         if (buffer)
2316                 ring_buffer_record_enable(buffer);
2317 #endif
2318
2319         arch_spin_unlock(&global_trace.max_lock);
2320
2321  out:
2322         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2323 }
2324
2325 static void tracing_start_tr(struct trace_array *tr)
2326 {
2327         struct trace_buffer *buffer;
2328         unsigned long flags;
2329
2330         if (tracing_disabled)
2331                 return;
2332
2333         /* If global, we need to also start the max tracer */
2334         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2335                 return tracing_start();
2336
2337         raw_spin_lock_irqsave(&tr->start_lock, flags);
2338
2339         if (--tr->stop_count) {
2340                 if (tr->stop_count < 0) {
2341                         /* Someone screwed up their debugging */
2342                         WARN_ON_ONCE(1);
2343                         tr->stop_count = 0;
2344                 }
2345                 goto out;
2346         }
2347
2348         buffer = tr->array_buffer.buffer;
2349         if (buffer)
2350                 ring_buffer_record_enable(buffer);
2351
2352  out:
2353         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2354 }
2355
2356 /**
2357  * tracing_stop - quick stop of the tracer
2358  *
2359  * Light weight way to stop tracing. Use in conjunction with
2360  * tracing_start.
2361  */
2362 void tracing_stop(void)
2363 {
2364         struct trace_buffer *buffer;
2365         unsigned long flags;
2366
2367         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2368         if (global_trace.stop_count++)
2369                 goto out;
2370
2371         /* Prevent the buffers from switching */
2372         arch_spin_lock(&global_trace.max_lock);
2373
2374         buffer = global_trace.array_buffer.buffer;
2375         if (buffer)
2376                 ring_buffer_record_disable(buffer);
2377
2378 #ifdef CONFIG_TRACER_MAX_TRACE
2379         buffer = global_trace.max_buffer.buffer;
2380         if (buffer)
2381                 ring_buffer_record_disable(buffer);
2382 #endif
2383
2384         arch_spin_unlock(&global_trace.max_lock);
2385
2386  out:
2387         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2388 }
2389
2390 static void tracing_stop_tr(struct trace_array *tr)
2391 {
2392         struct trace_buffer *buffer;
2393         unsigned long flags;
2394
2395         /* If global, we need to also stop the max tracer */
2396         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2397                 return tracing_stop();
2398
2399         raw_spin_lock_irqsave(&tr->start_lock, flags);
2400         if (tr->stop_count++)
2401                 goto out;
2402
2403         buffer = tr->array_buffer.buffer;
2404         if (buffer)
2405                 ring_buffer_record_disable(buffer);
2406
2407  out:
2408         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2409 }
2410
2411 static int trace_save_cmdline(struct task_struct *tsk)
2412 {
2413         unsigned tpid, idx;
2414
2415         /* treat recording of idle task as a success */
2416         if (!tsk->pid)
2417                 return 1;
2418
2419         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2420
2421         /*
2422          * It's not the end of the world if we don't get
2423          * the lock, but we also don't want to spin
2424          * nor do we want to disable interrupts,
2425          * so if we miss here, then better luck next time.
2426          *
2427          * This is called within the scheduler and wake up, so interrupts
2428          * had better been disabled and run queue lock been held.
2429          */
2430         lockdep_assert_preemption_disabled();
2431         if (!arch_spin_trylock(&trace_cmdline_lock))
2432                 return 0;
2433
2434         idx = savedcmd->map_pid_to_cmdline[tpid];
2435         if (idx == NO_CMDLINE_MAP) {
2436                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2437
2438                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2439                 savedcmd->cmdline_idx = idx;
2440         }
2441
2442         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2443         set_cmdline(idx, tsk->comm);
2444
2445         arch_spin_unlock(&trace_cmdline_lock);
2446
2447         return 1;
2448 }
2449
2450 static void __trace_find_cmdline(int pid, char comm[])
2451 {
2452         unsigned map;
2453         int tpid;
2454
2455         if (!pid) {
2456                 strcpy(comm, "<idle>");
2457                 return;
2458         }
2459
2460         if (WARN_ON_ONCE(pid < 0)) {
2461                 strcpy(comm, "<XXX>");
2462                 return;
2463         }
2464
2465         tpid = pid & (PID_MAX_DEFAULT - 1);
2466         map = savedcmd->map_pid_to_cmdline[tpid];
2467         if (map != NO_CMDLINE_MAP) {
2468                 tpid = savedcmd->map_cmdline_to_pid[map];
2469                 if (tpid == pid) {
2470                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2471                         return;
2472                 }
2473         }
2474         strcpy(comm, "<...>");
2475 }
2476
2477 void trace_find_cmdline(int pid, char comm[])
2478 {
2479         preempt_disable();
2480         arch_spin_lock(&trace_cmdline_lock);
2481
2482         __trace_find_cmdline(pid, comm);
2483
2484         arch_spin_unlock(&trace_cmdline_lock);
2485         preempt_enable();
2486 }
2487
2488 static int *trace_find_tgid_ptr(int pid)
2489 {
2490         /*
2491          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2492          * if we observe a non-NULL tgid_map then we also observe the correct
2493          * tgid_map_max.
2494          */
2495         int *map = smp_load_acquire(&tgid_map);
2496
2497         if (unlikely(!map || pid > tgid_map_max))
2498                 return NULL;
2499
2500         return &map[pid];
2501 }
2502
2503 int trace_find_tgid(int pid)
2504 {
2505         int *ptr = trace_find_tgid_ptr(pid);
2506
2507         return ptr ? *ptr : 0;
2508 }
2509
2510 static int trace_save_tgid(struct task_struct *tsk)
2511 {
2512         int *ptr;
2513
2514         /* treat recording of idle task as a success */
2515         if (!tsk->pid)
2516                 return 1;
2517
2518         ptr = trace_find_tgid_ptr(tsk->pid);
2519         if (!ptr)
2520                 return 0;
2521
2522         *ptr = tsk->tgid;
2523         return 1;
2524 }
2525
2526 static bool tracing_record_taskinfo_skip(int flags)
2527 {
2528         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2529                 return true;
2530         if (!__this_cpu_read(trace_taskinfo_save))
2531                 return true;
2532         return false;
2533 }
2534
2535 /**
2536  * tracing_record_taskinfo - record the task info of a task
2537  *
2538  * @task:  task to record
2539  * @flags: TRACE_RECORD_CMDLINE for recording comm
2540  *         TRACE_RECORD_TGID for recording tgid
2541  */
2542 void tracing_record_taskinfo(struct task_struct *task, int flags)
2543 {
2544         bool done;
2545
2546         if (tracing_record_taskinfo_skip(flags))
2547                 return;
2548
2549         /*
2550          * Record as much task information as possible. If some fail, continue
2551          * to try to record the others.
2552          */
2553         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2554         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2555
2556         /* If recording any information failed, retry again soon. */
2557         if (!done)
2558                 return;
2559
2560         __this_cpu_write(trace_taskinfo_save, false);
2561 }
2562
2563 /**
2564  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2565  *
2566  * @prev: previous task during sched_switch
2567  * @next: next task during sched_switch
2568  * @flags: TRACE_RECORD_CMDLINE for recording comm
2569  *         TRACE_RECORD_TGID for recording tgid
2570  */
2571 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2572                                           struct task_struct *next, int flags)
2573 {
2574         bool done;
2575
2576         if (tracing_record_taskinfo_skip(flags))
2577                 return;
2578
2579         /*
2580          * Record as much task information as possible. If some fail, continue
2581          * to try to record the others.
2582          */
2583         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2584         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2585         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2586         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2587
2588         /* If recording any information failed, retry again soon. */
2589         if (!done)
2590                 return;
2591
2592         __this_cpu_write(trace_taskinfo_save, false);
2593 }
2594
2595 /* Helpers to record a specific task information */
2596 void tracing_record_cmdline(struct task_struct *task)
2597 {
2598         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2599 }
2600
2601 void tracing_record_tgid(struct task_struct *task)
2602 {
2603         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2604 }
2605
2606 /*
2607  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2608  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2609  * simplifies those functions and keeps them in sync.
2610  */
2611 enum print_line_t trace_handle_return(struct trace_seq *s)
2612 {
2613         return trace_seq_has_overflowed(s) ?
2614                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2615 }
2616 EXPORT_SYMBOL_GPL(trace_handle_return);
2617
2618 static unsigned short migration_disable_value(void)
2619 {
2620 #if defined(CONFIG_SMP)
2621         return current->migration_disabled;
2622 #else
2623         return 0;
2624 #endif
2625 }
2626
2627 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2628 {
2629         unsigned int trace_flags = irqs_status;
2630         unsigned int pc;
2631
2632         pc = preempt_count();
2633
2634         if (pc & NMI_MASK)
2635                 trace_flags |= TRACE_FLAG_NMI;
2636         if (pc & HARDIRQ_MASK)
2637                 trace_flags |= TRACE_FLAG_HARDIRQ;
2638         if (in_serving_softirq())
2639                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2640         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2641                 trace_flags |= TRACE_FLAG_BH_OFF;
2642
2643         if (tif_need_resched())
2644                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2645         if (test_preempt_need_resched())
2646                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2647         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2648                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2649 }
2650
2651 struct ring_buffer_event *
2652 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2653                           int type,
2654                           unsigned long len,
2655                           unsigned int trace_ctx)
2656 {
2657         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2658 }
2659
2660 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2661 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2662 static int trace_buffered_event_ref;
2663
2664 /**
2665  * trace_buffered_event_enable - enable buffering events
2666  *
2667  * When events are being filtered, it is quicker to use a temporary
2668  * buffer to write the event data into if there's a likely chance
2669  * that it will not be committed. The discard of the ring buffer
2670  * is not as fast as committing, and is much slower than copying
2671  * a commit.
2672  *
2673  * When an event is to be filtered, allocate per cpu buffers to
2674  * write the event data into, and if the event is filtered and discarded
2675  * it is simply dropped, otherwise, the entire data is to be committed
2676  * in one shot.
2677  */
2678 void trace_buffered_event_enable(void)
2679 {
2680         struct ring_buffer_event *event;
2681         struct page *page;
2682         int cpu;
2683
2684         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2685
2686         if (trace_buffered_event_ref++)
2687                 return;
2688
2689         for_each_tracing_cpu(cpu) {
2690                 page = alloc_pages_node(cpu_to_node(cpu),
2691                                         GFP_KERNEL | __GFP_NORETRY, 0);
2692                 if (!page)
2693                         goto failed;
2694
2695                 event = page_address(page);
2696                 memset(event, 0, sizeof(*event));
2697
2698                 per_cpu(trace_buffered_event, cpu) = event;
2699
2700                 preempt_disable();
2701                 if (cpu == smp_processor_id() &&
2702                     __this_cpu_read(trace_buffered_event) !=
2703                     per_cpu(trace_buffered_event, cpu))
2704                         WARN_ON_ONCE(1);
2705                 preempt_enable();
2706         }
2707
2708         return;
2709  failed:
2710         trace_buffered_event_disable();
2711 }
2712
2713 static void enable_trace_buffered_event(void *data)
2714 {
2715         /* Probably not needed, but do it anyway */
2716         smp_rmb();
2717         this_cpu_dec(trace_buffered_event_cnt);
2718 }
2719
2720 static void disable_trace_buffered_event(void *data)
2721 {
2722         this_cpu_inc(trace_buffered_event_cnt);
2723 }
2724
2725 /**
2726  * trace_buffered_event_disable - disable buffering events
2727  *
2728  * When a filter is removed, it is faster to not use the buffered
2729  * events, and to commit directly into the ring buffer. Free up
2730  * the temp buffers when there are no more users. This requires
2731  * special synchronization with current events.
2732  */
2733 void trace_buffered_event_disable(void)
2734 {
2735         int cpu;
2736
2737         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2738
2739         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2740                 return;
2741
2742         if (--trace_buffered_event_ref)
2743                 return;
2744
2745         preempt_disable();
2746         /* For each CPU, set the buffer as used. */
2747         smp_call_function_many(tracing_buffer_mask,
2748                                disable_trace_buffered_event, NULL, 1);
2749         preempt_enable();
2750
2751         /* Wait for all current users to finish */
2752         synchronize_rcu();
2753
2754         for_each_tracing_cpu(cpu) {
2755                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2756                 per_cpu(trace_buffered_event, cpu) = NULL;
2757         }
2758         /*
2759          * Make sure trace_buffered_event is NULL before clearing
2760          * trace_buffered_event_cnt.
2761          */
2762         smp_wmb();
2763
2764         preempt_disable();
2765         /* Do the work on each cpu */
2766         smp_call_function_many(tracing_buffer_mask,
2767                                enable_trace_buffered_event, NULL, 1);
2768         preempt_enable();
2769 }
2770
2771 static struct trace_buffer *temp_buffer;
2772
2773 struct ring_buffer_event *
2774 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2775                           struct trace_event_file *trace_file,
2776                           int type, unsigned long len,
2777                           unsigned int trace_ctx)
2778 {
2779         struct ring_buffer_event *entry;
2780         struct trace_array *tr = trace_file->tr;
2781         int val;
2782
2783         *current_rb = tr->array_buffer.buffer;
2784
2785         if (!tr->no_filter_buffering_ref &&
2786             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2787                 preempt_disable_notrace();
2788                 /*
2789                  * Filtering is on, so try to use the per cpu buffer first.
2790                  * This buffer will simulate a ring_buffer_event,
2791                  * where the type_len is zero and the array[0] will
2792                  * hold the full length.
2793                  * (see include/linux/ring-buffer.h for details on
2794                  *  how the ring_buffer_event is structured).
2795                  *
2796                  * Using a temp buffer during filtering and copying it
2797                  * on a matched filter is quicker than writing directly
2798                  * into the ring buffer and then discarding it when
2799                  * it doesn't match. That is because the discard
2800                  * requires several atomic operations to get right.
2801                  * Copying on match and doing nothing on a failed match
2802                  * is still quicker than no copy on match, but having
2803                  * to discard out of the ring buffer on a failed match.
2804                  */
2805                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2806                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2807
2808                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2809
2810                         /*
2811                          * Preemption is disabled, but interrupts and NMIs
2812                          * can still come in now. If that happens after
2813                          * the above increment, then it will have to go
2814                          * back to the old method of allocating the event
2815                          * on the ring buffer, and if the filter fails, it
2816                          * will have to call ring_buffer_discard_commit()
2817                          * to remove it.
2818                          *
2819                          * Need to also check the unlikely case that the
2820                          * length is bigger than the temp buffer size.
2821                          * If that happens, then the reserve is pretty much
2822                          * guaranteed to fail, as the ring buffer currently
2823                          * only allows events less than a page. But that may
2824                          * change in the future, so let the ring buffer reserve
2825                          * handle the failure in that case.
2826                          */
2827                         if (val == 1 && likely(len <= max_len)) {
2828                                 trace_event_setup(entry, type, trace_ctx);
2829                                 entry->array[0] = len;
2830                                 /* Return with preemption disabled */
2831                                 return entry;
2832                         }
2833                         this_cpu_dec(trace_buffered_event_cnt);
2834                 }
2835                 /* __trace_buffer_lock_reserve() disables preemption */
2836                 preempt_enable_notrace();
2837         }
2838
2839         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2840                                             trace_ctx);
2841         /*
2842          * If tracing is off, but we have triggers enabled
2843          * we still need to look at the event data. Use the temp_buffer
2844          * to store the trace event for the trigger to use. It's recursive
2845          * safe and will not be recorded anywhere.
2846          */
2847         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2848                 *current_rb = temp_buffer;
2849                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2850                                                     trace_ctx);
2851         }
2852         return entry;
2853 }
2854 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2855
2856 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2857 static DEFINE_MUTEX(tracepoint_printk_mutex);
2858
2859 static void output_printk(struct trace_event_buffer *fbuffer)
2860 {
2861         struct trace_event_call *event_call;
2862         struct trace_event_file *file;
2863         struct trace_event *event;
2864         unsigned long flags;
2865         struct trace_iterator *iter = tracepoint_print_iter;
2866
2867         /* We should never get here if iter is NULL */
2868         if (WARN_ON_ONCE(!iter))
2869                 return;
2870
2871         event_call = fbuffer->trace_file->event_call;
2872         if (!event_call || !event_call->event.funcs ||
2873             !event_call->event.funcs->trace)
2874                 return;
2875
2876         file = fbuffer->trace_file;
2877         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2878             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2879              !filter_match_preds(file->filter, fbuffer->entry)))
2880                 return;
2881
2882         event = &fbuffer->trace_file->event_call->event;
2883
2884         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2885         trace_seq_init(&iter->seq);
2886         iter->ent = fbuffer->entry;
2887         event_call->event.funcs->trace(iter, 0, event);
2888         trace_seq_putc(&iter->seq, 0);
2889         printk("%s", iter->seq.buffer);
2890
2891         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2892 }
2893
2894 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2895                              void *buffer, size_t *lenp,
2896                              loff_t *ppos)
2897 {
2898         int save_tracepoint_printk;
2899         int ret;
2900
2901         mutex_lock(&tracepoint_printk_mutex);
2902         save_tracepoint_printk = tracepoint_printk;
2903
2904         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2905
2906         /*
2907          * This will force exiting early, as tracepoint_printk
2908          * is always zero when tracepoint_printk_iter is not allocated
2909          */
2910         if (!tracepoint_print_iter)
2911                 tracepoint_printk = 0;
2912
2913         if (save_tracepoint_printk == tracepoint_printk)
2914                 goto out;
2915
2916         if (tracepoint_printk)
2917                 static_key_enable(&tracepoint_printk_key.key);
2918         else
2919                 static_key_disable(&tracepoint_printk_key.key);
2920
2921  out:
2922         mutex_unlock(&tracepoint_printk_mutex);
2923
2924         return ret;
2925 }
2926
2927 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2928 {
2929         enum event_trigger_type tt = ETT_NONE;
2930         struct trace_event_file *file = fbuffer->trace_file;
2931
2932         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2933                         fbuffer->entry, &tt))
2934                 goto discard;
2935
2936         if (static_key_false(&tracepoint_printk_key.key))
2937                 output_printk(fbuffer);
2938
2939         if (static_branch_unlikely(&trace_event_exports_enabled))
2940                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2941
2942         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2943                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2944
2945 discard:
2946         if (tt)
2947                 event_triggers_post_call(file, tt);
2948
2949 }
2950 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2951
2952 /*
2953  * Skip 3:
2954  *
2955  *   trace_buffer_unlock_commit_regs()
2956  *   trace_event_buffer_commit()
2957  *   trace_event_raw_event_xxx()
2958  */
2959 # define STACK_SKIP 3
2960
2961 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2962                                      struct trace_buffer *buffer,
2963                                      struct ring_buffer_event *event,
2964                                      unsigned int trace_ctx,
2965                                      struct pt_regs *regs)
2966 {
2967         __buffer_unlock_commit(buffer, event);
2968
2969         /*
2970          * If regs is not set, then skip the necessary functions.
2971          * Note, we can still get here via blktrace, wakeup tracer
2972          * and mmiotrace, but that's ok if they lose a function or
2973          * two. They are not that meaningful.
2974          */
2975         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2976         ftrace_trace_userstack(tr, buffer, trace_ctx);
2977 }
2978
2979 /*
2980  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2981  */
2982 void
2983 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2984                                    struct ring_buffer_event *event)
2985 {
2986         __buffer_unlock_commit(buffer, event);
2987 }
2988
2989 void
2990 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2991                parent_ip, unsigned int trace_ctx)
2992 {
2993         struct trace_event_call *call = &event_function;
2994         struct trace_buffer *buffer = tr->array_buffer.buffer;
2995         struct ring_buffer_event *event;
2996         struct ftrace_entry *entry;
2997
2998         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2999                                             trace_ctx);
3000         if (!event)
3001                 return;
3002         entry   = ring_buffer_event_data(event);
3003         entry->ip                       = ip;
3004         entry->parent_ip                = parent_ip;
3005
3006         if (!call_filter_check_discard(call, entry, buffer, event)) {
3007                 if (static_branch_unlikely(&trace_function_exports_enabled))
3008                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3009                 __buffer_unlock_commit(buffer, event);
3010         }
3011 }
3012
3013 #ifdef CONFIG_STACKTRACE
3014
3015 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3016 #define FTRACE_KSTACK_NESTING   4
3017
3018 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3019
3020 struct ftrace_stack {
3021         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3022 };
3023
3024
3025 struct ftrace_stacks {
3026         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3027 };
3028
3029 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3030 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3031
3032 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3033                                  unsigned int trace_ctx,
3034                                  int skip, struct pt_regs *regs)
3035 {
3036         struct trace_event_call *call = &event_kernel_stack;
3037         struct ring_buffer_event *event;
3038         unsigned int size, nr_entries;
3039         struct ftrace_stack *fstack;
3040         struct stack_entry *entry;
3041         int stackidx;
3042
3043         /*
3044          * Add one, for this function and the call to save_stack_trace()
3045          * If regs is set, then these functions will not be in the way.
3046          */
3047 #ifndef CONFIG_UNWINDER_ORC
3048         if (!regs)
3049                 skip++;
3050 #endif
3051
3052         preempt_disable_notrace();
3053
3054         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3055
3056         /* This should never happen. If it does, yell once and skip */
3057         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3058                 goto out;
3059
3060         /*
3061          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3062          * interrupt will either see the value pre increment or post
3063          * increment. If the interrupt happens pre increment it will have
3064          * restored the counter when it returns.  We just need a barrier to
3065          * keep gcc from moving things around.
3066          */
3067         barrier();
3068
3069         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3070         size = ARRAY_SIZE(fstack->calls);
3071
3072         if (regs) {
3073                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3074                                                    size, skip);
3075         } else {
3076                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3077         }
3078
3079         size = nr_entries * sizeof(unsigned long);
3080         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3081                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3082                                     trace_ctx);
3083         if (!event)
3084                 goto out;
3085         entry = ring_buffer_event_data(event);
3086
3087         memcpy(&entry->caller, fstack->calls, size);
3088         entry->size = nr_entries;
3089
3090         if (!call_filter_check_discard(call, entry, buffer, event))
3091                 __buffer_unlock_commit(buffer, event);
3092
3093  out:
3094         /* Again, don't let gcc optimize things here */
3095         barrier();
3096         __this_cpu_dec(ftrace_stack_reserve);
3097         preempt_enable_notrace();
3098
3099 }
3100
3101 static inline void ftrace_trace_stack(struct trace_array *tr,
3102                                       struct trace_buffer *buffer,
3103                                       unsigned int trace_ctx,
3104                                       int skip, struct pt_regs *regs)
3105 {
3106         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3107                 return;
3108
3109         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3110 }
3111
3112 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3113                    int skip)
3114 {
3115         struct trace_buffer *buffer = tr->array_buffer.buffer;
3116
3117         if (rcu_is_watching()) {
3118                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3119                 return;
3120         }
3121
3122         /*
3123          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3124          * but if the above rcu_is_watching() failed, then the NMI
3125          * triggered someplace critical, and ct_irq_enter() should
3126          * not be called from NMI.
3127          */
3128         if (unlikely(in_nmi()))
3129                 return;
3130
3131         ct_irq_enter_irqson();
3132         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3133         ct_irq_exit_irqson();
3134 }
3135
3136 /**
3137  * trace_dump_stack - record a stack back trace in the trace buffer
3138  * @skip: Number of functions to skip (helper handlers)
3139  */
3140 void trace_dump_stack(int skip)
3141 {
3142         if (tracing_disabled || tracing_selftest_running)
3143                 return;
3144
3145 #ifndef CONFIG_UNWINDER_ORC
3146         /* Skip 1 to skip this function. */
3147         skip++;
3148 #endif
3149         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3150                              tracing_gen_ctx(), skip, NULL);
3151 }
3152 EXPORT_SYMBOL_GPL(trace_dump_stack);
3153
3154 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3155 static DEFINE_PER_CPU(int, user_stack_count);
3156
3157 static void
3158 ftrace_trace_userstack(struct trace_array *tr,
3159                        struct trace_buffer *buffer, unsigned int trace_ctx)
3160 {
3161         struct trace_event_call *call = &event_user_stack;
3162         struct ring_buffer_event *event;
3163         struct userstack_entry *entry;
3164
3165         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3166                 return;
3167
3168         /*
3169          * NMIs can not handle page faults, even with fix ups.
3170          * The save user stack can (and often does) fault.
3171          */
3172         if (unlikely(in_nmi()))
3173                 return;
3174
3175         /*
3176          * prevent recursion, since the user stack tracing may
3177          * trigger other kernel events.
3178          */
3179         preempt_disable();
3180         if (__this_cpu_read(user_stack_count))
3181                 goto out;
3182
3183         __this_cpu_inc(user_stack_count);
3184
3185         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3186                                             sizeof(*entry), trace_ctx);
3187         if (!event)
3188                 goto out_drop_count;
3189         entry   = ring_buffer_event_data(event);
3190
3191         entry->tgid             = current->tgid;
3192         memset(&entry->caller, 0, sizeof(entry->caller));
3193
3194         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3195         if (!call_filter_check_discard(call, entry, buffer, event))
3196                 __buffer_unlock_commit(buffer, event);
3197
3198  out_drop_count:
3199         __this_cpu_dec(user_stack_count);
3200  out:
3201         preempt_enable();
3202 }
3203 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3204 static void ftrace_trace_userstack(struct trace_array *tr,
3205                                    struct trace_buffer *buffer,
3206                                    unsigned int trace_ctx)
3207 {
3208 }
3209 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3210
3211 #endif /* CONFIG_STACKTRACE */
3212
3213 static inline void
3214 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3215                           unsigned long long delta)
3216 {
3217         entry->bottom_delta_ts = delta & U32_MAX;
3218         entry->top_delta_ts = (delta >> 32);
3219 }
3220
3221 void trace_last_func_repeats(struct trace_array *tr,
3222                              struct trace_func_repeats *last_info,
3223                              unsigned int trace_ctx)
3224 {
3225         struct trace_buffer *buffer = tr->array_buffer.buffer;
3226         struct func_repeats_entry *entry;
3227         struct ring_buffer_event *event;
3228         u64 delta;
3229
3230         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3231                                             sizeof(*entry), trace_ctx);
3232         if (!event)
3233                 return;
3234
3235         delta = ring_buffer_event_time_stamp(buffer, event) -
3236                 last_info->ts_last_call;
3237
3238         entry = ring_buffer_event_data(event);
3239         entry->ip = last_info->ip;
3240         entry->parent_ip = last_info->parent_ip;
3241         entry->count = last_info->count;
3242         func_repeats_set_delta_ts(entry, delta);
3243
3244         __buffer_unlock_commit(buffer, event);
3245 }
3246
3247 /* created for use with alloc_percpu */
3248 struct trace_buffer_struct {
3249         int nesting;
3250         char buffer[4][TRACE_BUF_SIZE];
3251 };
3252
3253 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3254
3255 /*
3256  * This allows for lockless recording.  If we're nested too deeply, then
3257  * this returns NULL.
3258  */
3259 static char *get_trace_buf(void)
3260 {
3261         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3262
3263         if (!trace_percpu_buffer || buffer->nesting >= 4)
3264                 return NULL;
3265
3266         buffer->nesting++;
3267
3268         /* Interrupts must see nesting incremented before we use the buffer */
3269         barrier();
3270         return &buffer->buffer[buffer->nesting - 1][0];
3271 }
3272
3273 static void put_trace_buf(void)
3274 {
3275         /* Don't let the decrement of nesting leak before this */
3276         barrier();
3277         this_cpu_dec(trace_percpu_buffer->nesting);
3278 }
3279
3280 static int alloc_percpu_trace_buffer(void)
3281 {
3282         struct trace_buffer_struct __percpu *buffers;
3283
3284         if (trace_percpu_buffer)
3285                 return 0;
3286
3287         buffers = alloc_percpu(struct trace_buffer_struct);
3288         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3289                 return -ENOMEM;
3290
3291         trace_percpu_buffer = buffers;
3292         return 0;
3293 }
3294
3295 static int buffers_allocated;
3296
3297 void trace_printk_init_buffers(void)
3298 {
3299         if (buffers_allocated)
3300                 return;
3301
3302         if (alloc_percpu_trace_buffer())
3303                 return;
3304
3305         /* trace_printk() is for debug use only. Don't use it in production. */
3306
3307         pr_warn("\n");
3308         pr_warn("**********************************************************\n");
3309         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3310         pr_warn("**                                                      **\n");
3311         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3312         pr_warn("**                                                      **\n");
3313         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3314         pr_warn("** unsafe for production use.                           **\n");
3315         pr_warn("**                                                      **\n");
3316         pr_warn("** If you see this message and you are not debugging    **\n");
3317         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3318         pr_warn("**                                                      **\n");
3319         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3320         pr_warn("**********************************************************\n");
3321
3322         /* Expand the buffers to set size */
3323         tracing_update_buffers();
3324
3325         buffers_allocated = 1;
3326
3327         /*
3328          * trace_printk_init_buffers() can be called by modules.
3329          * If that happens, then we need to start cmdline recording
3330          * directly here. If the global_trace.buffer is already
3331          * allocated here, then this was called by module code.
3332          */
3333         if (global_trace.array_buffer.buffer)
3334                 tracing_start_cmdline_record();
3335 }
3336 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3337
3338 void trace_printk_start_comm(void)
3339 {
3340         /* Start tracing comms if trace printk is set */
3341         if (!buffers_allocated)
3342                 return;
3343         tracing_start_cmdline_record();
3344 }
3345
3346 static void trace_printk_start_stop_comm(int enabled)
3347 {
3348         if (!buffers_allocated)
3349                 return;
3350
3351         if (enabled)
3352                 tracing_start_cmdline_record();
3353         else
3354                 tracing_stop_cmdline_record();
3355 }
3356
3357 /**
3358  * trace_vbprintk - write binary msg to tracing buffer
3359  * @ip:    The address of the caller
3360  * @fmt:   The string format to write to the buffer
3361  * @args:  Arguments for @fmt
3362  */
3363 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3364 {
3365         struct trace_event_call *call = &event_bprint;
3366         struct ring_buffer_event *event;
3367         struct trace_buffer *buffer;
3368         struct trace_array *tr = &global_trace;
3369         struct bprint_entry *entry;
3370         unsigned int trace_ctx;
3371         char *tbuffer;
3372         int len = 0, size;
3373
3374         if (unlikely(tracing_selftest_running || tracing_disabled))
3375                 return 0;
3376
3377         /* Don't pollute graph traces with trace_vprintk internals */
3378         pause_graph_tracing();
3379
3380         trace_ctx = tracing_gen_ctx();
3381         preempt_disable_notrace();
3382
3383         tbuffer = get_trace_buf();
3384         if (!tbuffer) {
3385                 len = 0;
3386                 goto out_nobuffer;
3387         }
3388
3389         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3390
3391         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3392                 goto out_put;
3393
3394         size = sizeof(*entry) + sizeof(u32) * len;
3395         buffer = tr->array_buffer.buffer;
3396         ring_buffer_nest_start(buffer);
3397         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3398                                             trace_ctx);
3399         if (!event)
3400                 goto out;
3401         entry = ring_buffer_event_data(event);
3402         entry->ip                       = ip;
3403         entry->fmt                      = fmt;
3404
3405         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3406         if (!call_filter_check_discard(call, entry, buffer, event)) {
3407                 __buffer_unlock_commit(buffer, event);
3408                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3409         }
3410
3411 out:
3412         ring_buffer_nest_end(buffer);
3413 out_put:
3414         put_trace_buf();
3415
3416 out_nobuffer:
3417         preempt_enable_notrace();
3418         unpause_graph_tracing();
3419
3420         return len;
3421 }
3422 EXPORT_SYMBOL_GPL(trace_vbprintk);
3423
3424 __printf(3, 0)
3425 static int
3426 __trace_array_vprintk(struct trace_buffer *buffer,
3427                       unsigned long ip, const char *fmt, va_list args)
3428 {
3429         struct trace_event_call *call = &event_print;
3430         struct ring_buffer_event *event;
3431         int len = 0, size;
3432         struct print_entry *entry;
3433         unsigned int trace_ctx;
3434         char *tbuffer;
3435
3436         if (tracing_disabled || tracing_selftest_running)
3437                 return 0;
3438
3439         /* Don't pollute graph traces with trace_vprintk internals */
3440         pause_graph_tracing();
3441
3442         trace_ctx = tracing_gen_ctx();
3443         preempt_disable_notrace();
3444
3445
3446         tbuffer = get_trace_buf();
3447         if (!tbuffer) {
3448                 len = 0;
3449                 goto out_nobuffer;
3450         }
3451
3452         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3453
3454         size = sizeof(*entry) + len + 1;
3455         ring_buffer_nest_start(buffer);
3456         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3457                                             trace_ctx);
3458         if (!event)
3459                 goto out;
3460         entry = ring_buffer_event_data(event);
3461         entry->ip = ip;
3462
3463         memcpy(&entry->buf, tbuffer, len + 1);
3464         if (!call_filter_check_discard(call, entry, buffer, event)) {
3465                 __buffer_unlock_commit(buffer, event);
3466                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3467         }
3468
3469 out:
3470         ring_buffer_nest_end(buffer);
3471         put_trace_buf();
3472
3473 out_nobuffer:
3474         preempt_enable_notrace();
3475         unpause_graph_tracing();
3476
3477         return len;
3478 }
3479
3480 __printf(3, 0)
3481 int trace_array_vprintk(struct trace_array *tr,
3482                         unsigned long ip, const char *fmt, va_list args)
3483 {
3484         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3485 }
3486
3487 /**
3488  * trace_array_printk - Print a message to a specific instance
3489  * @tr: The instance trace_array descriptor
3490  * @ip: The instruction pointer that this is called from.
3491  * @fmt: The format to print (printf format)
3492  *
3493  * If a subsystem sets up its own instance, they have the right to
3494  * printk strings into their tracing instance buffer using this
3495  * function. Note, this function will not write into the top level
3496  * buffer (use trace_printk() for that), as writing into the top level
3497  * buffer should only have events that can be individually disabled.
3498  * trace_printk() is only used for debugging a kernel, and should not
3499  * be ever incorporated in normal use.
3500  *
3501  * trace_array_printk() can be used, as it will not add noise to the
3502  * top level tracing buffer.
3503  *
3504  * Note, trace_array_init_printk() must be called on @tr before this
3505  * can be used.
3506  */
3507 __printf(3, 0)
3508 int trace_array_printk(struct trace_array *tr,
3509                        unsigned long ip, const char *fmt, ...)
3510 {
3511         int ret;
3512         va_list ap;
3513
3514         if (!tr)
3515                 return -ENOENT;
3516
3517         /* This is only allowed for created instances */
3518         if (tr == &global_trace)
3519                 return 0;
3520
3521         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3522                 return 0;
3523
3524         va_start(ap, fmt);
3525         ret = trace_array_vprintk(tr, ip, fmt, ap);
3526         va_end(ap);
3527         return ret;
3528 }
3529 EXPORT_SYMBOL_GPL(trace_array_printk);
3530
3531 /**
3532  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3533  * @tr: The trace array to initialize the buffers for
3534  *
3535  * As trace_array_printk() only writes into instances, they are OK to
3536  * have in the kernel (unlike trace_printk()). This needs to be called
3537  * before trace_array_printk() can be used on a trace_array.
3538  */
3539 int trace_array_init_printk(struct trace_array *tr)
3540 {
3541         if (!tr)
3542                 return -ENOENT;
3543
3544         /* This is only allowed for created instances */
3545         if (tr == &global_trace)
3546                 return -EINVAL;
3547
3548         return alloc_percpu_trace_buffer();
3549 }
3550 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3551
3552 __printf(3, 4)
3553 int trace_array_printk_buf(struct trace_buffer *buffer,
3554                            unsigned long ip, const char *fmt, ...)
3555 {
3556         int ret;
3557         va_list ap;
3558
3559         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3560                 return 0;
3561
3562         va_start(ap, fmt);
3563         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3564         va_end(ap);
3565         return ret;
3566 }
3567
3568 __printf(2, 0)
3569 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3570 {
3571         return trace_array_vprintk(&global_trace, ip, fmt, args);
3572 }
3573 EXPORT_SYMBOL_GPL(trace_vprintk);
3574
3575 static void trace_iterator_increment(struct trace_iterator *iter)
3576 {
3577         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3578
3579         iter->idx++;
3580         if (buf_iter)
3581                 ring_buffer_iter_advance(buf_iter);
3582 }
3583
3584 static struct trace_entry *
3585 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3586                 unsigned long *lost_events)
3587 {
3588         struct ring_buffer_event *event;
3589         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3590
3591         if (buf_iter) {
3592                 event = ring_buffer_iter_peek(buf_iter, ts);
3593                 if (lost_events)
3594                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3595                                 (unsigned long)-1 : 0;
3596         } else {
3597                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3598                                          lost_events);
3599         }
3600
3601         if (event) {
3602                 iter->ent_size = ring_buffer_event_length(event);
3603                 return ring_buffer_event_data(event);
3604         }
3605         iter->ent_size = 0;
3606         return NULL;
3607 }
3608
3609 static struct trace_entry *
3610 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3611                   unsigned long *missing_events, u64 *ent_ts)
3612 {
3613         struct trace_buffer *buffer = iter->array_buffer->buffer;
3614         struct trace_entry *ent, *next = NULL;
3615         unsigned long lost_events = 0, next_lost = 0;
3616         int cpu_file = iter->cpu_file;
3617         u64 next_ts = 0, ts;
3618         int next_cpu = -1;
3619         int next_size = 0;
3620         int cpu;
3621
3622         /*
3623          * If we are in a per_cpu trace file, don't bother by iterating over
3624          * all cpu and peek directly.
3625          */
3626         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3627                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3628                         return NULL;
3629                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3630                 if (ent_cpu)
3631                         *ent_cpu = cpu_file;
3632
3633                 return ent;
3634         }
3635
3636         for_each_tracing_cpu(cpu) {
3637
3638                 if (ring_buffer_empty_cpu(buffer, cpu))
3639                         continue;
3640
3641                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3642
3643                 /*
3644                  * Pick the entry with the smallest timestamp:
3645                  */
3646                 if (ent && (!next || ts < next_ts)) {
3647                         next = ent;
3648                         next_cpu = cpu;
3649                         next_ts = ts;
3650                         next_lost = lost_events;
3651                         next_size = iter->ent_size;
3652                 }
3653         }
3654
3655         iter->ent_size = next_size;
3656
3657         if (ent_cpu)
3658                 *ent_cpu = next_cpu;
3659
3660         if (ent_ts)
3661                 *ent_ts = next_ts;
3662
3663         if (missing_events)
3664                 *missing_events = next_lost;
3665
3666         return next;
3667 }
3668
3669 #define STATIC_FMT_BUF_SIZE     128
3670 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3671
3672 static char *trace_iter_expand_format(struct trace_iterator *iter)
3673 {
3674         char *tmp;
3675
3676         /*
3677          * iter->tr is NULL when used with tp_printk, which makes
3678          * this get called where it is not safe to call krealloc().
3679          */
3680         if (!iter->tr || iter->fmt == static_fmt_buf)
3681                 return NULL;
3682
3683         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3684                        GFP_KERNEL);
3685         if (tmp) {
3686                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3687                 iter->fmt = tmp;
3688         }
3689
3690         return tmp;
3691 }
3692
3693 /* Returns true if the string is safe to dereference from an event */
3694 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3695                            bool star, int len)
3696 {
3697         unsigned long addr = (unsigned long)str;
3698         struct trace_event *trace_event;
3699         struct trace_event_call *event;
3700
3701         /* Ignore strings with no length */
3702         if (star && !len)
3703                 return true;
3704
3705         /* OK if part of the event data */
3706         if ((addr >= (unsigned long)iter->ent) &&
3707             (addr < (unsigned long)iter->ent + iter->ent_size))
3708                 return true;
3709
3710         /* OK if part of the temp seq buffer */
3711         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3712             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3713                 return true;
3714
3715         /* Core rodata can not be freed */
3716         if (is_kernel_rodata(addr))
3717                 return true;
3718
3719         if (trace_is_tracepoint_string(str))
3720                 return true;
3721
3722         /*
3723          * Now this could be a module event, referencing core module
3724          * data, which is OK.
3725          */
3726         if (!iter->ent)
3727                 return false;
3728
3729         trace_event = ftrace_find_event(iter->ent->type);
3730         if (!trace_event)
3731                 return false;
3732
3733         event = container_of(trace_event, struct trace_event_call, event);
3734         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3735                 return false;
3736
3737         /* Would rather have rodata, but this will suffice */
3738         if (within_module_core(addr, event->module))
3739                 return true;
3740
3741         return false;
3742 }
3743
3744 static const char *show_buffer(struct trace_seq *s)
3745 {
3746         struct seq_buf *seq = &s->seq;
3747
3748         seq_buf_terminate(seq);
3749
3750         return seq->buffer;
3751 }
3752
3753 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3754
3755 static int test_can_verify_check(const char *fmt, ...)
3756 {
3757         char buf[16];
3758         va_list ap;
3759         int ret;
3760
3761         /*
3762          * The verifier is dependent on vsnprintf() modifies the va_list
3763          * passed to it, where it is sent as a reference. Some architectures
3764          * (like x86_32) passes it by value, which means that vsnprintf()
3765          * does not modify the va_list passed to it, and the verifier
3766          * would then need to be able to understand all the values that
3767          * vsnprintf can use. If it is passed by value, then the verifier
3768          * is disabled.
3769          */
3770         va_start(ap, fmt);
3771         vsnprintf(buf, 16, "%d", ap);
3772         ret = va_arg(ap, int);
3773         va_end(ap);
3774
3775         return ret;
3776 }
3777
3778 static void test_can_verify(void)
3779 {
3780         if (!test_can_verify_check("%d %d", 0, 1)) {
3781                 pr_info("trace event string verifier disabled\n");
3782                 static_branch_inc(&trace_no_verify);
3783         }
3784 }
3785
3786 /**
3787  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3788  * @iter: The iterator that holds the seq buffer and the event being printed
3789  * @fmt: The format used to print the event
3790  * @ap: The va_list holding the data to print from @fmt.
3791  *
3792  * This writes the data into the @iter->seq buffer using the data from
3793  * @fmt and @ap. If the format has a %s, then the source of the string
3794  * is examined to make sure it is safe to print, otherwise it will
3795  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3796  * pointer.
3797  */
3798 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3799                          va_list ap)
3800 {
3801         const char *p = fmt;
3802         const char *str;
3803         int i, j;
3804
3805         if (WARN_ON_ONCE(!fmt))
3806                 return;
3807
3808         if (static_branch_unlikely(&trace_no_verify))
3809                 goto print;
3810
3811         /* Don't bother checking when doing a ftrace_dump() */
3812         if (iter->fmt == static_fmt_buf)
3813                 goto print;
3814
3815         while (*p) {
3816                 bool star = false;
3817                 int len = 0;
3818
3819                 j = 0;
3820
3821                 /* We only care about %s and variants */
3822                 for (i = 0; p[i]; i++) {
3823                         if (i + 1 >= iter->fmt_size) {
3824                                 /*
3825                                  * If we can't expand the copy buffer,
3826                                  * just print it.
3827                                  */
3828                                 if (!trace_iter_expand_format(iter))
3829                                         goto print;
3830                         }
3831
3832                         if (p[i] == '\\' && p[i+1]) {
3833                                 i++;
3834                                 continue;
3835                         }
3836                         if (p[i] == '%') {
3837                                 /* Need to test cases like %08.*s */
3838                                 for (j = 1; p[i+j]; j++) {
3839                                         if (isdigit(p[i+j]) ||
3840                                             p[i+j] == '.')
3841                                                 continue;
3842                                         if (p[i+j] == '*') {
3843                                                 star = true;
3844                                                 continue;
3845                                         }
3846                                         break;
3847                                 }
3848                                 if (p[i+j] == 's')
3849                                         break;
3850                                 star = false;
3851                         }
3852                         j = 0;
3853                 }
3854                 /* If no %s found then just print normally */
3855                 if (!p[i])
3856                         break;
3857
3858                 /* Copy up to the %s, and print that */
3859                 strncpy(iter->fmt, p, i);
3860                 iter->fmt[i] = '\0';
3861                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3862
3863                 /*
3864                  * If iter->seq is full, the above call no longer guarantees
3865                  * that ap is in sync with fmt processing, and further calls
3866                  * to va_arg() can return wrong positional arguments.
3867                  *
3868                  * Ensure that ap is no longer used in this case.
3869                  */
3870                 if (iter->seq.full) {
3871                         p = "";
3872                         break;
3873                 }
3874
3875                 if (star)
3876                         len = va_arg(ap, int);
3877
3878                 /* The ap now points to the string data of the %s */
3879                 str = va_arg(ap, const char *);
3880
3881                 /*
3882                  * If you hit this warning, it is likely that the
3883                  * trace event in question used %s on a string that
3884                  * was saved at the time of the event, but may not be
3885                  * around when the trace is read. Use __string(),
3886                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3887                  * instead. See samples/trace_events/trace-events-sample.h
3888                  * for reference.
3889                  */
3890                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3891                               "fmt: '%s' current_buffer: '%s'",
3892                               fmt, show_buffer(&iter->seq))) {
3893                         int ret;
3894
3895                         /* Try to safely read the string */
3896                         if (star) {
3897                                 if (len + 1 > iter->fmt_size)
3898                                         len = iter->fmt_size - 1;
3899                                 if (len < 0)
3900                                         len = 0;
3901                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3902                                 iter->fmt[len] = 0;
3903                                 star = false;
3904                         } else {
3905                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3906                                                                   iter->fmt_size);
3907                         }
3908                         if (ret < 0)
3909                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3910                         else
3911                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3912                                                  str, iter->fmt);
3913                         str = "[UNSAFE-MEMORY]";
3914                         strcpy(iter->fmt, "%s");
3915                 } else {
3916                         strncpy(iter->fmt, p + i, j + 1);
3917                         iter->fmt[j+1] = '\0';
3918                 }
3919                 if (star)
3920                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3921                 else
3922                         trace_seq_printf(&iter->seq, iter->fmt, str);
3923
3924                 p += i + j + 1;
3925         }
3926  print:
3927         if (*p)
3928                 trace_seq_vprintf(&iter->seq, p, ap);
3929 }
3930
3931 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3932 {
3933         const char *p, *new_fmt;
3934         char *q;
3935
3936         if (WARN_ON_ONCE(!fmt))
3937                 return fmt;
3938
3939         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3940                 return fmt;
3941
3942         p = fmt;
3943         new_fmt = q = iter->fmt;
3944         while (*p) {
3945                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3946                         if (!trace_iter_expand_format(iter))
3947                                 return fmt;
3948
3949                         q += iter->fmt - new_fmt;
3950                         new_fmt = iter->fmt;
3951                 }
3952
3953                 *q++ = *p++;
3954
3955                 /* Replace %p with %px */
3956                 if (p[-1] == '%') {
3957                         if (p[0] == '%') {
3958                                 *q++ = *p++;
3959                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3960                                 *q++ = *p++;
3961                                 *q++ = 'x';
3962                         }
3963                 }
3964         }
3965         *q = '\0';
3966
3967         return new_fmt;
3968 }
3969
3970 #define STATIC_TEMP_BUF_SIZE    128
3971 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3972
3973 /* Find the next real entry, without updating the iterator itself */
3974 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3975                                           int *ent_cpu, u64 *ent_ts)
3976 {
3977         /* __find_next_entry will reset ent_size */
3978         int ent_size = iter->ent_size;
3979         struct trace_entry *entry;
3980
3981         /*
3982          * If called from ftrace_dump(), then the iter->temp buffer
3983          * will be the static_temp_buf and not created from kmalloc.
3984          * If the entry size is greater than the buffer, we can
3985          * not save it. Just return NULL in that case. This is only
3986          * used to add markers when two consecutive events' time
3987          * stamps have a large delta. See trace_print_lat_context()
3988          */
3989         if (iter->temp == static_temp_buf &&
3990             STATIC_TEMP_BUF_SIZE < ent_size)
3991                 return NULL;
3992
3993         /*
3994          * The __find_next_entry() may call peek_next_entry(), which may
3995          * call ring_buffer_peek() that may make the contents of iter->ent
3996          * undefined. Need to copy iter->ent now.
3997          */
3998         if (iter->ent && iter->ent != iter->temp) {
3999                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4000                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4001                         void *temp;
4002                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4003                         if (!temp)
4004                                 return NULL;
4005                         kfree(iter->temp);
4006                         iter->temp = temp;
4007                         iter->temp_size = iter->ent_size;
4008                 }
4009                 memcpy(iter->temp, iter->ent, iter->ent_size);
4010                 iter->ent = iter->temp;
4011         }
4012         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4013         /* Put back the original ent_size */
4014         iter->ent_size = ent_size;
4015
4016         return entry;
4017 }
4018
4019 /* Find the next real entry, and increment the iterator to the next entry */
4020 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4021 {
4022         iter->ent = __find_next_entry(iter, &iter->cpu,
4023                                       &iter->lost_events, &iter->ts);
4024
4025         if (iter->ent)
4026                 trace_iterator_increment(iter);
4027
4028         return iter->ent ? iter : NULL;
4029 }
4030
4031 static void trace_consume(struct trace_iterator *iter)
4032 {
4033         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4034                             &iter->lost_events);
4035 }
4036
4037 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4038 {
4039         struct trace_iterator *iter = m->private;
4040         int i = (int)*pos;
4041         void *ent;
4042
4043         WARN_ON_ONCE(iter->leftover);
4044
4045         (*pos)++;
4046
4047         /* can't go backwards */
4048         if (iter->idx > i)
4049                 return NULL;
4050
4051         if (iter->idx < 0)
4052                 ent = trace_find_next_entry_inc(iter);
4053         else
4054                 ent = iter;
4055
4056         while (ent && iter->idx < i)
4057                 ent = trace_find_next_entry_inc(iter);
4058
4059         iter->pos = *pos;
4060
4061         return ent;
4062 }
4063
4064 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4065 {
4066         struct ring_buffer_iter *buf_iter;
4067         unsigned long entries = 0;
4068         u64 ts;
4069
4070         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4071
4072         buf_iter = trace_buffer_iter(iter, cpu);
4073         if (!buf_iter)
4074                 return;
4075
4076         ring_buffer_iter_reset(buf_iter);
4077
4078         /*
4079          * We could have the case with the max latency tracers
4080          * that a reset never took place on a cpu. This is evident
4081          * by the timestamp being before the start of the buffer.
4082          */
4083         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4084                 if (ts >= iter->array_buffer->time_start)
4085                         break;
4086                 entries++;
4087                 ring_buffer_iter_advance(buf_iter);
4088         }
4089
4090         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4091 }
4092
4093 /*
4094  * The current tracer is copied to avoid a global locking
4095  * all around.
4096  */
4097 static void *s_start(struct seq_file *m, loff_t *pos)
4098 {
4099         struct trace_iterator *iter = m->private;
4100         struct trace_array *tr = iter->tr;
4101         int cpu_file = iter->cpu_file;
4102         void *p = NULL;
4103         loff_t l = 0;
4104         int cpu;
4105
4106         /*
4107          * copy the tracer to avoid using a global lock all around.
4108          * iter->trace is a copy of current_trace, the pointer to the
4109          * name may be used instead of a strcmp(), as iter->trace->name
4110          * will point to the same string as current_trace->name.
4111          */
4112         mutex_lock(&trace_types_lock);
4113         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4114                 *iter->trace = *tr->current_trace;
4115         mutex_unlock(&trace_types_lock);
4116
4117 #ifdef CONFIG_TRACER_MAX_TRACE
4118         if (iter->snapshot && iter->trace->use_max_tr)
4119                 return ERR_PTR(-EBUSY);
4120 #endif
4121
4122         if (*pos != iter->pos) {
4123                 iter->ent = NULL;
4124                 iter->cpu = 0;
4125                 iter->idx = -1;
4126
4127                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4128                         for_each_tracing_cpu(cpu)
4129                                 tracing_iter_reset(iter, cpu);
4130                 } else
4131                         tracing_iter_reset(iter, cpu_file);
4132
4133                 iter->leftover = 0;
4134                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4135                         ;
4136
4137         } else {
4138                 /*
4139                  * If we overflowed the seq_file before, then we want
4140                  * to just reuse the trace_seq buffer again.
4141                  */
4142                 if (iter->leftover)
4143                         p = iter;
4144                 else {
4145                         l = *pos - 1;
4146                         p = s_next(m, p, &l);
4147                 }
4148         }
4149
4150         trace_event_read_lock();
4151         trace_access_lock(cpu_file);
4152         return p;
4153 }
4154
4155 static void s_stop(struct seq_file *m, void *p)
4156 {
4157         struct trace_iterator *iter = m->private;
4158
4159 #ifdef CONFIG_TRACER_MAX_TRACE
4160         if (iter->snapshot && iter->trace->use_max_tr)
4161                 return;
4162 #endif
4163
4164         trace_access_unlock(iter->cpu_file);
4165         trace_event_read_unlock();
4166 }
4167
4168 static void
4169 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4170                       unsigned long *entries, int cpu)
4171 {
4172         unsigned long count;
4173
4174         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4175         /*
4176          * If this buffer has skipped entries, then we hold all
4177          * entries for the trace and we need to ignore the
4178          * ones before the time stamp.
4179          */
4180         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4181                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4182                 /* total is the same as the entries */
4183                 *total = count;
4184         } else
4185                 *total = count +
4186                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4187         *entries = count;
4188 }
4189
4190 static void
4191 get_total_entries(struct array_buffer *buf,
4192                   unsigned long *total, unsigned long *entries)
4193 {
4194         unsigned long t, e;
4195         int cpu;
4196
4197         *total = 0;
4198         *entries = 0;
4199
4200         for_each_tracing_cpu(cpu) {
4201                 get_total_entries_cpu(buf, &t, &e, cpu);
4202                 *total += t;
4203                 *entries += e;
4204         }
4205 }
4206
4207 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4208 {
4209         unsigned long total, entries;
4210
4211         if (!tr)
4212                 tr = &global_trace;
4213
4214         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4215
4216         return entries;
4217 }
4218
4219 unsigned long trace_total_entries(struct trace_array *tr)
4220 {
4221         unsigned long total, entries;
4222
4223         if (!tr)
4224                 tr = &global_trace;
4225
4226         get_total_entries(&tr->array_buffer, &total, &entries);
4227
4228         return entries;
4229 }
4230
4231 static void print_lat_help_header(struct seq_file *m)
4232 {
4233         seq_puts(m, "#                    _------=> CPU#            \n"
4234                     "#                   / _-----=> irqs-off/BH-disabled\n"
4235                     "#                  | / _----=> need-resched    \n"
4236                     "#                  || / _---=> hardirq/softirq \n"
4237                     "#                  ||| / _--=> preempt-depth   \n"
4238                     "#                  |||| / _-=> migrate-disable \n"
4239                     "#                  ||||| /     delay           \n"
4240                     "#  cmd     pid     |||||| time  |   caller     \n"
4241                     "#     \\   /        ||||||  \\    |    /       \n");
4242 }
4243
4244 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4245 {
4246         unsigned long total;
4247         unsigned long entries;
4248
4249         get_total_entries(buf, &total, &entries);
4250         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4251                    entries, total, num_online_cpus());
4252         seq_puts(m, "#\n");
4253 }
4254
4255 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4256                                    unsigned int flags)
4257 {
4258         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4259
4260         print_event_info(buf, m);
4261
4262         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4263         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4264 }
4265
4266 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4267                                        unsigned int flags)
4268 {
4269         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4270         static const char space[] = "            ";
4271         int prec = tgid ? 12 : 2;
4272
4273         print_event_info(buf, m);
4274
4275         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4276         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4277         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4278         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4279         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4280         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4281         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4282         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4283 }
4284
4285 void
4286 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4287 {
4288         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4289         struct array_buffer *buf = iter->array_buffer;
4290         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4291         struct tracer *type = iter->trace;
4292         unsigned long entries;
4293         unsigned long total;
4294         const char *name = type->name;
4295
4296         get_total_entries(buf, &total, &entries);
4297
4298         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4299                    name, UTS_RELEASE);
4300         seq_puts(m, "# -----------------------------------"
4301                  "---------------------------------\n");
4302         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4303                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4304                    nsecs_to_usecs(data->saved_latency),
4305                    entries,
4306                    total,
4307                    buf->cpu,
4308                    preempt_model_none()      ? "server" :
4309                    preempt_model_voluntary() ? "desktop" :
4310                    preempt_model_full()      ? "preempt" :
4311                    preempt_model_rt()        ? "preempt_rt" :
4312                    "unknown",
4313                    /* These are reserved for later use */
4314                    0, 0, 0, 0);
4315 #ifdef CONFIG_SMP
4316         seq_printf(m, " #P:%d)\n", num_online_cpus());
4317 #else
4318         seq_puts(m, ")\n");
4319 #endif
4320         seq_puts(m, "#    -----------------\n");
4321         seq_printf(m, "#    | task: %.16s-%d "
4322                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4323                    data->comm, data->pid,
4324                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4325                    data->policy, data->rt_priority);
4326         seq_puts(m, "#    -----------------\n");
4327
4328         if (data->critical_start) {
4329                 seq_puts(m, "#  => started at: ");
4330                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4331                 trace_print_seq(m, &iter->seq);
4332                 seq_puts(m, "\n#  => ended at:   ");
4333                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4334                 trace_print_seq(m, &iter->seq);
4335                 seq_puts(m, "\n#\n");
4336         }
4337
4338         seq_puts(m, "#\n");
4339 }
4340
4341 static void test_cpu_buff_start(struct trace_iterator *iter)
4342 {
4343         struct trace_seq *s = &iter->seq;
4344         struct trace_array *tr = iter->tr;
4345
4346         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4347                 return;
4348
4349         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4350                 return;
4351
4352         if (cpumask_available(iter->started) &&
4353             cpumask_test_cpu(iter->cpu, iter->started))
4354                 return;
4355
4356         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4357                 return;
4358
4359         if (cpumask_available(iter->started))
4360                 cpumask_set_cpu(iter->cpu, iter->started);
4361
4362         /* Don't print started cpu buffer for the first entry of the trace */
4363         if (iter->idx > 1)
4364                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4365                                 iter->cpu);
4366 }
4367
4368 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4369 {
4370         struct trace_array *tr = iter->tr;
4371         struct trace_seq *s = &iter->seq;
4372         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4373         struct trace_entry *entry;
4374         struct trace_event *event;
4375
4376         entry = iter->ent;
4377
4378         test_cpu_buff_start(iter);
4379
4380         event = ftrace_find_event(entry->type);
4381
4382         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4383                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4384                         trace_print_lat_context(iter);
4385                 else
4386                         trace_print_context(iter);
4387         }
4388
4389         if (trace_seq_has_overflowed(s))
4390                 return TRACE_TYPE_PARTIAL_LINE;
4391
4392         if (event)
4393                 return event->funcs->trace(iter, sym_flags, event);
4394
4395         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4396
4397         return trace_handle_return(s);
4398 }
4399
4400 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4401 {
4402         struct trace_array *tr = iter->tr;
4403         struct trace_seq *s = &iter->seq;
4404         struct trace_entry *entry;
4405         struct trace_event *event;
4406
4407         entry = iter->ent;
4408
4409         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4410                 trace_seq_printf(s, "%d %d %llu ",
4411                                  entry->pid, iter->cpu, iter->ts);
4412
4413         if (trace_seq_has_overflowed(s))
4414                 return TRACE_TYPE_PARTIAL_LINE;
4415
4416         event = ftrace_find_event(entry->type);
4417         if (event)
4418                 return event->funcs->raw(iter, 0, event);
4419
4420         trace_seq_printf(s, "%d ?\n", entry->type);
4421
4422         return trace_handle_return(s);
4423 }
4424
4425 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4426 {
4427         struct trace_array *tr = iter->tr;
4428         struct trace_seq *s = &iter->seq;
4429         unsigned char newline = '\n';
4430         struct trace_entry *entry;
4431         struct trace_event *event;
4432
4433         entry = iter->ent;
4434
4435         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4436                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4437                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4438                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4439                 if (trace_seq_has_overflowed(s))
4440                         return TRACE_TYPE_PARTIAL_LINE;
4441         }
4442
4443         event = ftrace_find_event(entry->type);
4444         if (event) {
4445                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4446                 if (ret != TRACE_TYPE_HANDLED)
4447                         return ret;
4448         }
4449
4450         SEQ_PUT_FIELD(s, newline);
4451
4452         return trace_handle_return(s);
4453 }
4454
4455 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4456 {
4457         struct trace_array *tr = iter->tr;
4458         struct trace_seq *s = &iter->seq;
4459         struct trace_entry *entry;
4460         struct trace_event *event;
4461
4462         entry = iter->ent;
4463
4464         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4465                 SEQ_PUT_FIELD(s, entry->pid);
4466                 SEQ_PUT_FIELD(s, iter->cpu);
4467                 SEQ_PUT_FIELD(s, iter->ts);
4468                 if (trace_seq_has_overflowed(s))
4469                         return TRACE_TYPE_PARTIAL_LINE;
4470         }
4471
4472         event = ftrace_find_event(entry->type);
4473         return event ? event->funcs->binary(iter, 0, event) :
4474                 TRACE_TYPE_HANDLED;
4475 }
4476
4477 int trace_empty(struct trace_iterator *iter)
4478 {
4479         struct ring_buffer_iter *buf_iter;
4480         int cpu;
4481
4482         /* If we are looking at one CPU buffer, only check that one */
4483         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4484                 cpu = iter->cpu_file;
4485                 buf_iter = trace_buffer_iter(iter, cpu);
4486                 if (buf_iter) {
4487                         if (!ring_buffer_iter_empty(buf_iter))
4488                                 return 0;
4489                 } else {
4490                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4491                                 return 0;
4492                 }
4493                 return 1;
4494         }
4495
4496         for_each_tracing_cpu(cpu) {
4497                 buf_iter = trace_buffer_iter(iter, cpu);
4498                 if (buf_iter) {
4499                         if (!ring_buffer_iter_empty(buf_iter))
4500                                 return 0;
4501                 } else {
4502                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4503                                 return 0;
4504                 }
4505         }
4506
4507         return 1;
4508 }
4509
4510 /*  Called with trace_event_read_lock() held. */
4511 enum print_line_t print_trace_line(struct trace_iterator *iter)
4512 {
4513         struct trace_array *tr = iter->tr;
4514         unsigned long trace_flags = tr->trace_flags;
4515         enum print_line_t ret;
4516
4517         if (iter->lost_events) {
4518                 if (iter->lost_events == (unsigned long)-1)
4519                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4520                                          iter->cpu);
4521                 else
4522                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4523                                          iter->cpu, iter->lost_events);
4524                 if (trace_seq_has_overflowed(&iter->seq))
4525                         return TRACE_TYPE_PARTIAL_LINE;
4526         }
4527
4528         if (iter->trace && iter->trace->print_line) {
4529                 ret = iter->trace->print_line(iter);
4530                 if (ret != TRACE_TYPE_UNHANDLED)
4531                         return ret;
4532         }
4533
4534         if (iter->ent->type == TRACE_BPUTS &&
4535                         trace_flags & TRACE_ITER_PRINTK &&
4536                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4537                 return trace_print_bputs_msg_only(iter);
4538
4539         if (iter->ent->type == TRACE_BPRINT &&
4540                         trace_flags & TRACE_ITER_PRINTK &&
4541                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4542                 return trace_print_bprintk_msg_only(iter);
4543
4544         if (iter->ent->type == TRACE_PRINT &&
4545                         trace_flags & TRACE_ITER_PRINTK &&
4546                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4547                 return trace_print_printk_msg_only(iter);
4548
4549         if (trace_flags & TRACE_ITER_BIN)
4550                 return print_bin_fmt(iter);
4551
4552         if (trace_flags & TRACE_ITER_HEX)
4553                 return print_hex_fmt(iter);
4554
4555         if (trace_flags & TRACE_ITER_RAW)
4556                 return print_raw_fmt(iter);
4557
4558         return print_trace_fmt(iter);
4559 }
4560
4561 void trace_latency_header(struct seq_file *m)
4562 {
4563         struct trace_iterator *iter = m->private;
4564         struct trace_array *tr = iter->tr;
4565
4566         /* print nothing if the buffers are empty */
4567         if (trace_empty(iter))
4568                 return;
4569
4570         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4571                 print_trace_header(m, iter);
4572
4573         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4574                 print_lat_help_header(m);
4575 }
4576
4577 void trace_default_header(struct seq_file *m)
4578 {
4579         struct trace_iterator *iter = m->private;
4580         struct trace_array *tr = iter->tr;
4581         unsigned long trace_flags = tr->trace_flags;
4582
4583         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4584                 return;
4585
4586         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4587                 /* print nothing if the buffers are empty */
4588                 if (trace_empty(iter))
4589                         return;
4590                 print_trace_header(m, iter);
4591                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4592                         print_lat_help_header(m);
4593         } else {
4594                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4595                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4596                                 print_func_help_header_irq(iter->array_buffer,
4597                                                            m, trace_flags);
4598                         else
4599                                 print_func_help_header(iter->array_buffer, m,
4600                                                        trace_flags);
4601                 }
4602         }
4603 }
4604
4605 static void test_ftrace_alive(struct seq_file *m)
4606 {
4607         if (!ftrace_is_dead())
4608                 return;
4609         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4610                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4611 }
4612
4613 #ifdef CONFIG_TRACER_MAX_TRACE
4614 static void show_snapshot_main_help(struct seq_file *m)
4615 {
4616         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4617                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4618                     "#                      Takes a snapshot of the main buffer.\n"
4619                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4620                     "#                      (Doesn't have to be '2' works with any number that\n"
4621                     "#                       is not a '0' or '1')\n");
4622 }
4623
4624 static void show_snapshot_percpu_help(struct seq_file *m)
4625 {
4626         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4627 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4628         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4629                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4630 #else
4631         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4632                     "#                     Must use main snapshot file to allocate.\n");
4633 #endif
4634         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4635                     "#                      (Doesn't have to be '2' works with any number that\n"
4636                     "#                       is not a '0' or '1')\n");
4637 }
4638
4639 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4640 {
4641         if (iter->tr->allocated_snapshot)
4642                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4643         else
4644                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4645
4646         seq_puts(m, "# Snapshot commands:\n");
4647         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4648                 show_snapshot_main_help(m);
4649         else
4650                 show_snapshot_percpu_help(m);
4651 }
4652 #else
4653 /* Should never be called */
4654 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4655 #endif
4656
4657 static int s_show(struct seq_file *m, void *v)
4658 {
4659         struct trace_iterator *iter = v;
4660         int ret;
4661
4662         if (iter->ent == NULL) {
4663                 if (iter->tr) {
4664                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4665                         seq_puts(m, "#\n");
4666                         test_ftrace_alive(m);
4667                 }
4668                 if (iter->snapshot && trace_empty(iter))
4669                         print_snapshot_help(m, iter);
4670                 else if (iter->trace && iter->trace->print_header)
4671                         iter->trace->print_header(m);
4672                 else
4673                         trace_default_header(m);
4674
4675         } else if (iter->leftover) {
4676                 /*
4677                  * If we filled the seq_file buffer earlier, we
4678                  * want to just show it now.
4679                  */
4680                 ret = trace_print_seq(m, &iter->seq);
4681
4682                 /* ret should this time be zero, but you never know */
4683                 iter->leftover = ret;
4684
4685         } else {
4686                 print_trace_line(iter);
4687                 ret = trace_print_seq(m, &iter->seq);
4688                 /*
4689                  * If we overflow the seq_file buffer, then it will
4690                  * ask us for this data again at start up.
4691                  * Use that instead.
4692                  *  ret is 0 if seq_file write succeeded.
4693                  *        -1 otherwise.
4694                  */
4695                 iter->leftover = ret;
4696         }
4697
4698         return 0;
4699 }
4700
4701 /*
4702  * Should be used after trace_array_get(), trace_types_lock
4703  * ensures that i_cdev was already initialized.
4704  */
4705 static inline int tracing_get_cpu(struct inode *inode)
4706 {
4707         if (inode->i_cdev) /* See trace_create_cpu_file() */
4708                 return (long)inode->i_cdev - 1;
4709         return RING_BUFFER_ALL_CPUS;
4710 }
4711
4712 static const struct seq_operations tracer_seq_ops = {
4713         .start          = s_start,
4714         .next           = s_next,
4715         .stop           = s_stop,
4716         .show           = s_show,
4717 };
4718
4719 static struct trace_iterator *
4720 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4721 {
4722         struct trace_array *tr = inode->i_private;
4723         struct trace_iterator *iter;
4724         int cpu;
4725
4726         if (tracing_disabled)
4727                 return ERR_PTR(-ENODEV);
4728
4729         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4730         if (!iter)
4731                 return ERR_PTR(-ENOMEM);
4732
4733         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4734                                     GFP_KERNEL);
4735         if (!iter->buffer_iter)
4736                 goto release;
4737
4738         /*
4739          * trace_find_next_entry() may need to save off iter->ent.
4740          * It will place it into the iter->temp buffer. As most
4741          * events are less than 128, allocate a buffer of that size.
4742          * If one is greater, then trace_find_next_entry() will
4743          * allocate a new buffer to adjust for the bigger iter->ent.
4744          * It's not critical if it fails to get allocated here.
4745          */
4746         iter->temp = kmalloc(128, GFP_KERNEL);
4747         if (iter->temp)
4748                 iter->temp_size = 128;
4749
4750         /*
4751          * trace_event_printf() may need to modify given format
4752          * string to replace %p with %px so that it shows real address
4753          * instead of hash value. However, that is only for the event
4754          * tracing, other tracer may not need. Defer the allocation
4755          * until it is needed.
4756          */
4757         iter->fmt = NULL;
4758         iter->fmt_size = 0;
4759
4760         /*
4761          * We make a copy of the current tracer to avoid concurrent
4762          * changes on it while we are reading.
4763          */
4764         mutex_lock(&trace_types_lock);
4765         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4766         if (!iter->trace)
4767                 goto fail;
4768
4769         *iter->trace = *tr->current_trace;
4770
4771         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4772                 goto fail;
4773
4774         iter->tr = tr;
4775
4776 #ifdef CONFIG_TRACER_MAX_TRACE
4777         /* Currently only the top directory has a snapshot */
4778         if (tr->current_trace->print_max || snapshot)
4779                 iter->array_buffer = &tr->max_buffer;
4780         else
4781 #endif
4782                 iter->array_buffer = &tr->array_buffer;
4783         iter->snapshot = snapshot;
4784         iter->pos = -1;
4785         iter->cpu_file = tracing_get_cpu(inode);
4786         mutex_init(&iter->mutex);
4787
4788         /* Notify the tracer early; before we stop tracing. */
4789         if (iter->trace->open)
4790                 iter->trace->open(iter);
4791
4792         /* Annotate start of buffers if we had overruns */
4793         if (ring_buffer_overruns(iter->array_buffer->buffer))
4794                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4795
4796         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4797         if (trace_clocks[tr->clock_id].in_ns)
4798                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4799
4800         /*
4801          * If pause-on-trace is enabled, then stop the trace while
4802          * dumping, unless this is the "snapshot" file
4803          */
4804         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4805                 tracing_stop_tr(tr);
4806
4807         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4808                 for_each_tracing_cpu(cpu) {
4809                         iter->buffer_iter[cpu] =
4810                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4811                                                          cpu, GFP_KERNEL);
4812                 }
4813                 ring_buffer_read_prepare_sync();
4814                 for_each_tracing_cpu(cpu) {
4815                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4816                         tracing_iter_reset(iter, cpu);
4817                 }
4818         } else {
4819                 cpu = iter->cpu_file;
4820                 iter->buffer_iter[cpu] =
4821                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4822                                                  cpu, GFP_KERNEL);
4823                 ring_buffer_read_prepare_sync();
4824                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4825                 tracing_iter_reset(iter, cpu);
4826         }
4827
4828         mutex_unlock(&trace_types_lock);
4829
4830         return iter;
4831
4832  fail:
4833         mutex_unlock(&trace_types_lock);
4834         kfree(iter->trace);
4835         kfree(iter->temp);
4836         kfree(iter->buffer_iter);
4837 release:
4838         seq_release_private(inode, file);
4839         return ERR_PTR(-ENOMEM);
4840 }
4841
4842 int tracing_open_generic(struct inode *inode, struct file *filp)
4843 {
4844         int ret;
4845
4846         ret = tracing_check_open_get_tr(NULL);
4847         if (ret)
4848                 return ret;
4849
4850         filp->private_data = inode->i_private;
4851         return 0;
4852 }
4853
4854 bool tracing_is_disabled(void)
4855 {
4856         return (tracing_disabled) ? true: false;
4857 }
4858
4859 /*
4860  * Open and update trace_array ref count.
4861  * Must have the current trace_array passed to it.
4862  */
4863 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4864 {
4865         struct trace_array *tr = inode->i_private;
4866         int ret;
4867
4868         ret = tracing_check_open_get_tr(tr);
4869         if (ret)
4870                 return ret;
4871
4872         filp->private_data = inode->i_private;
4873
4874         return 0;
4875 }
4876
4877 static int tracing_mark_open(struct inode *inode, struct file *filp)
4878 {
4879         stream_open(inode, filp);
4880         return tracing_open_generic_tr(inode, filp);
4881 }
4882
4883 static int tracing_release(struct inode *inode, struct file *file)
4884 {
4885         struct trace_array *tr = inode->i_private;
4886         struct seq_file *m = file->private_data;
4887         struct trace_iterator *iter;
4888         int cpu;
4889
4890         if (!(file->f_mode & FMODE_READ)) {
4891                 trace_array_put(tr);
4892                 return 0;
4893         }
4894
4895         /* Writes do not use seq_file */
4896         iter = m->private;
4897         mutex_lock(&trace_types_lock);
4898
4899         for_each_tracing_cpu(cpu) {
4900                 if (iter->buffer_iter[cpu])
4901                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4902         }
4903
4904         if (iter->trace && iter->trace->close)
4905                 iter->trace->close(iter);
4906
4907         if (!iter->snapshot && tr->stop_count)
4908                 /* reenable tracing if it was previously enabled */
4909                 tracing_start_tr(tr);
4910
4911         __trace_array_put(tr);
4912
4913         mutex_unlock(&trace_types_lock);
4914
4915         mutex_destroy(&iter->mutex);
4916         free_cpumask_var(iter->started);
4917         kfree(iter->fmt);
4918         kfree(iter->temp);
4919         kfree(iter->trace);
4920         kfree(iter->buffer_iter);
4921         seq_release_private(inode, file);
4922
4923         return 0;
4924 }
4925
4926 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4927 {
4928         struct trace_array *tr = inode->i_private;
4929
4930         trace_array_put(tr);
4931         return 0;
4932 }
4933
4934 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4935 {
4936         struct trace_array *tr = inode->i_private;
4937
4938         trace_array_put(tr);
4939
4940         return single_release(inode, file);
4941 }
4942
4943 static int tracing_open(struct inode *inode, struct file *file)
4944 {
4945         struct trace_array *tr = inode->i_private;
4946         struct trace_iterator *iter;
4947         int ret;
4948
4949         ret = tracing_check_open_get_tr(tr);
4950         if (ret)
4951                 return ret;
4952
4953         /* If this file was open for write, then erase contents */
4954         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4955                 int cpu = tracing_get_cpu(inode);
4956                 struct array_buffer *trace_buf = &tr->array_buffer;
4957
4958 #ifdef CONFIG_TRACER_MAX_TRACE
4959                 if (tr->current_trace->print_max)
4960                         trace_buf = &tr->max_buffer;
4961 #endif
4962
4963                 if (cpu == RING_BUFFER_ALL_CPUS)
4964                         tracing_reset_online_cpus(trace_buf);
4965                 else
4966                         tracing_reset_cpu(trace_buf, cpu);
4967         }
4968
4969         if (file->f_mode & FMODE_READ) {
4970                 iter = __tracing_open(inode, file, false);
4971                 if (IS_ERR(iter))
4972                         ret = PTR_ERR(iter);
4973                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4974                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4975         }
4976
4977         if (ret < 0)
4978                 trace_array_put(tr);
4979
4980         return ret;
4981 }
4982
4983 /*
4984  * Some tracers are not suitable for instance buffers.
4985  * A tracer is always available for the global array (toplevel)
4986  * or if it explicitly states that it is.
4987  */
4988 static bool
4989 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4990 {
4991         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4992 }
4993
4994 /* Find the next tracer that this trace array may use */
4995 static struct tracer *
4996 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4997 {
4998         while (t && !trace_ok_for_array(t, tr))
4999                 t = t->next;
5000
5001         return t;
5002 }
5003
5004 static void *
5005 t_next(struct seq_file *m, void *v, loff_t *pos)
5006 {
5007         struct trace_array *tr = m->private;
5008         struct tracer *t = v;
5009
5010         (*pos)++;
5011
5012         if (t)
5013                 t = get_tracer_for_array(tr, t->next);
5014
5015         return t;
5016 }
5017
5018 static void *t_start(struct seq_file *m, loff_t *pos)
5019 {
5020         struct trace_array *tr = m->private;
5021         struct tracer *t;
5022         loff_t l = 0;
5023
5024         mutex_lock(&trace_types_lock);
5025
5026         t = get_tracer_for_array(tr, trace_types);
5027         for (; t && l < *pos; t = t_next(m, t, &l))
5028                         ;
5029
5030         return t;
5031 }
5032
5033 static void t_stop(struct seq_file *m, void *p)
5034 {
5035         mutex_unlock(&trace_types_lock);
5036 }
5037
5038 static int t_show(struct seq_file *m, void *v)
5039 {
5040         struct tracer *t = v;
5041
5042         if (!t)
5043                 return 0;
5044
5045         seq_puts(m, t->name);
5046         if (t->next)
5047                 seq_putc(m, ' ');
5048         else
5049                 seq_putc(m, '\n');
5050
5051         return 0;
5052 }
5053
5054 static const struct seq_operations show_traces_seq_ops = {
5055         .start          = t_start,
5056         .next           = t_next,
5057         .stop           = t_stop,
5058         .show           = t_show,
5059 };
5060
5061 static int show_traces_open(struct inode *inode, struct file *file)
5062 {
5063         struct trace_array *tr = inode->i_private;
5064         struct seq_file *m;
5065         int ret;
5066
5067         ret = tracing_check_open_get_tr(tr);
5068         if (ret)
5069                 return ret;
5070
5071         ret = seq_open(file, &show_traces_seq_ops);
5072         if (ret) {
5073                 trace_array_put(tr);
5074                 return ret;
5075         }
5076
5077         m = file->private_data;
5078         m->private = tr;
5079
5080         return 0;
5081 }
5082
5083 static int show_traces_release(struct inode *inode, struct file *file)
5084 {
5085         struct trace_array *tr = inode->i_private;
5086
5087         trace_array_put(tr);
5088         return seq_release(inode, file);
5089 }
5090
5091 static ssize_t
5092 tracing_write_stub(struct file *filp, const char __user *ubuf,
5093                    size_t count, loff_t *ppos)
5094 {
5095         return count;
5096 }
5097
5098 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5099 {
5100         int ret;
5101
5102         if (file->f_mode & FMODE_READ)
5103                 ret = seq_lseek(file, offset, whence);
5104         else
5105                 file->f_pos = ret = 0;
5106
5107         return ret;
5108 }
5109
5110 static const struct file_operations tracing_fops = {
5111         .open           = tracing_open,
5112         .read           = seq_read,
5113         .write          = tracing_write_stub,
5114         .llseek         = tracing_lseek,
5115         .release        = tracing_release,
5116 };
5117
5118 static const struct file_operations show_traces_fops = {
5119         .open           = show_traces_open,
5120         .read           = seq_read,
5121         .llseek         = seq_lseek,
5122         .release        = show_traces_release,
5123 };
5124
5125 static ssize_t
5126 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5127                      size_t count, loff_t *ppos)
5128 {
5129         struct trace_array *tr = file_inode(filp)->i_private;
5130         char *mask_str;
5131         int len;
5132
5133         len = snprintf(NULL, 0, "%*pb\n",
5134                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5135         mask_str = kmalloc(len, GFP_KERNEL);
5136         if (!mask_str)
5137                 return -ENOMEM;
5138
5139         len = snprintf(mask_str, len, "%*pb\n",
5140                        cpumask_pr_args(tr->tracing_cpumask));
5141         if (len >= count) {
5142                 count = -EINVAL;
5143                 goto out_err;
5144         }
5145         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5146
5147 out_err:
5148         kfree(mask_str);
5149
5150         return count;
5151 }
5152
5153 int tracing_set_cpumask(struct trace_array *tr,
5154                         cpumask_var_t tracing_cpumask_new)
5155 {
5156         int cpu;
5157
5158         if (!tr)
5159                 return -EINVAL;
5160
5161         local_irq_disable();
5162         arch_spin_lock(&tr->max_lock);
5163         for_each_tracing_cpu(cpu) {
5164                 /*
5165                  * Increase/decrease the disabled counter if we are
5166                  * about to flip a bit in the cpumask:
5167                  */
5168                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5169                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5170                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5171                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5172                 }
5173                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5174                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5175                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5176                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5177                 }
5178         }
5179         arch_spin_unlock(&tr->max_lock);
5180         local_irq_enable();
5181
5182         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5183
5184         return 0;
5185 }
5186
5187 static ssize_t
5188 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5189                       size_t count, loff_t *ppos)
5190 {
5191         struct trace_array *tr = file_inode(filp)->i_private;
5192         cpumask_var_t tracing_cpumask_new;
5193         int err;
5194
5195         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5196                 return -ENOMEM;
5197
5198         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5199         if (err)
5200                 goto err_free;
5201
5202         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5203         if (err)
5204                 goto err_free;
5205
5206         free_cpumask_var(tracing_cpumask_new);
5207
5208         return count;
5209
5210 err_free:
5211         free_cpumask_var(tracing_cpumask_new);
5212
5213         return err;
5214 }
5215
5216 static const struct file_operations tracing_cpumask_fops = {
5217         .open           = tracing_open_generic_tr,
5218         .read           = tracing_cpumask_read,
5219         .write          = tracing_cpumask_write,
5220         .release        = tracing_release_generic_tr,
5221         .llseek         = generic_file_llseek,
5222 };
5223
5224 static int tracing_trace_options_show(struct seq_file *m, void *v)
5225 {
5226         struct tracer_opt *trace_opts;
5227         struct trace_array *tr = m->private;
5228         u32 tracer_flags;
5229         int i;
5230
5231         mutex_lock(&trace_types_lock);
5232         tracer_flags = tr->current_trace->flags->val;
5233         trace_opts = tr->current_trace->flags->opts;
5234
5235         for (i = 0; trace_options[i]; i++) {
5236                 if (tr->trace_flags & (1 << i))
5237                         seq_printf(m, "%s\n", trace_options[i]);
5238                 else
5239                         seq_printf(m, "no%s\n", trace_options[i]);
5240         }
5241
5242         for (i = 0; trace_opts[i].name; i++) {
5243                 if (tracer_flags & trace_opts[i].bit)
5244                         seq_printf(m, "%s\n", trace_opts[i].name);
5245                 else
5246                         seq_printf(m, "no%s\n", trace_opts[i].name);
5247         }
5248         mutex_unlock(&trace_types_lock);
5249
5250         return 0;
5251 }
5252
5253 static int __set_tracer_option(struct trace_array *tr,
5254                                struct tracer_flags *tracer_flags,
5255                                struct tracer_opt *opts, int neg)
5256 {
5257         struct tracer *trace = tracer_flags->trace;
5258         int ret;
5259
5260         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5261         if (ret)
5262                 return ret;
5263
5264         if (neg)
5265                 tracer_flags->val &= ~opts->bit;
5266         else
5267                 tracer_flags->val |= opts->bit;
5268         return 0;
5269 }
5270
5271 /* Try to assign a tracer specific option */
5272 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5273 {
5274         struct tracer *trace = tr->current_trace;
5275         struct tracer_flags *tracer_flags = trace->flags;
5276         struct tracer_opt *opts = NULL;
5277         int i;
5278
5279         for (i = 0; tracer_flags->opts[i].name; i++) {
5280                 opts = &tracer_flags->opts[i];
5281
5282                 if (strcmp(cmp, opts->name) == 0)
5283                         return __set_tracer_option(tr, trace->flags, opts, neg);
5284         }
5285
5286         return -EINVAL;
5287 }
5288
5289 /* Some tracers require overwrite to stay enabled */
5290 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5291 {
5292         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5293                 return -1;
5294
5295         return 0;
5296 }
5297
5298 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5299 {
5300         int *map;
5301
5302         if ((mask == TRACE_ITER_RECORD_TGID) ||
5303             (mask == TRACE_ITER_RECORD_CMD))
5304                 lockdep_assert_held(&event_mutex);
5305
5306         /* do nothing if flag is already set */
5307         if (!!(tr->trace_flags & mask) == !!enabled)
5308                 return 0;
5309
5310         /* Give the tracer a chance to approve the change */
5311         if (tr->current_trace->flag_changed)
5312                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5313                         return -EINVAL;
5314
5315         if (enabled)
5316                 tr->trace_flags |= mask;
5317         else
5318                 tr->trace_flags &= ~mask;
5319
5320         if (mask == TRACE_ITER_RECORD_CMD)
5321                 trace_event_enable_cmd_record(enabled);
5322
5323         if (mask == TRACE_ITER_RECORD_TGID) {
5324                 if (!tgid_map) {
5325                         tgid_map_max = pid_max;
5326                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5327                                        GFP_KERNEL);
5328
5329                         /*
5330                          * Pairs with smp_load_acquire() in
5331                          * trace_find_tgid_ptr() to ensure that if it observes
5332                          * the tgid_map we just allocated then it also observes
5333                          * the corresponding tgid_map_max value.
5334                          */
5335                         smp_store_release(&tgid_map, map);
5336                 }
5337                 if (!tgid_map) {
5338                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5339                         return -ENOMEM;
5340                 }
5341
5342                 trace_event_enable_tgid_record(enabled);
5343         }
5344
5345         if (mask == TRACE_ITER_EVENT_FORK)
5346                 trace_event_follow_fork(tr, enabled);
5347
5348         if (mask == TRACE_ITER_FUNC_FORK)
5349                 ftrace_pid_follow_fork(tr, enabled);
5350
5351         if (mask == TRACE_ITER_OVERWRITE) {
5352                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5353 #ifdef CONFIG_TRACER_MAX_TRACE
5354                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5355 #endif
5356         }
5357
5358         if (mask == TRACE_ITER_PRINTK) {
5359                 trace_printk_start_stop_comm(enabled);
5360                 trace_printk_control(enabled);
5361         }
5362
5363         return 0;
5364 }
5365
5366 int trace_set_options(struct trace_array *tr, char *option)
5367 {
5368         char *cmp;
5369         int neg = 0;
5370         int ret;
5371         size_t orig_len = strlen(option);
5372         int len;
5373
5374         cmp = strstrip(option);
5375
5376         len = str_has_prefix(cmp, "no");
5377         if (len)
5378                 neg = 1;
5379
5380         cmp += len;
5381
5382         mutex_lock(&event_mutex);
5383         mutex_lock(&trace_types_lock);
5384
5385         ret = match_string(trace_options, -1, cmp);
5386         /* If no option could be set, test the specific tracer options */
5387         if (ret < 0)
5388                 ret = set_tracer_option(tr, cmp, neg);
5389         else
5390                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5391
5392         mutex_unlock(&trace_types_lock);
5393         mutex_unlock(&event_mutex);
5394
5395         /*
5396          * If the first trailing whitespace is replaced with '\0' by strstrip,
5397          * turn it back into a space.
5398          */
5399         if (orig_len > strlen(option))
5400                 option[strlen(option)] = ' ';
5401
5402         return ret;
5403 }
5404
5405 static void __init apply_trace_boot_options(void)
5406 {
5407         char *buf = trace_boot_options_buf;
5408         char *option;
5409
5410         while (true) {
5411                 option = strsep(&buf, ",");
5412
5413                 if (!option)
5414                         break;
5415
5416                 if (*option)
5417                         trace_set_options(&global_trace, option);
5418
5419                 /* Put back the comma to allow this to be called again */
5420                 if (buf)
5421                         *(buf - 1) = ',';
5422         }
5423 }
5424
5425 static ssize_t
5426 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5427                         size_t cnt, loff_t *ppos)
5428 {
5429         struct seq_file *m = filp->private_data;
5430         struct trace_array *tr = m->private;
5431         char buf[64];
5432         int ret;
5433
5434         if (cnt >= sizeof(buf))
5435                 return -EINVAL;
5436
5437         if (copy_from_user(buf, ubuf, cnt))
5438                 return -EFAULT;
5439
5440         buf[cnt] = 0;
5441
5442         ret = trace_set_options(tr, buf);
5443         if (ret < 0)
5444                 return ret;
5445
5446         *ppos += cnt;
5447
5448         return cnt;
5449 }
5450
5451 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5452 {
5453         struct trace_array *tr = inode->i_private;
5454         int ret;
5455
5456         ret = tracing_check_open_get_tr(tr);
5457         if (ret)
5458                 return ret;
5459
5460         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5461         if (ret < 0)
5462                 trace_array_put(tr);
5463
5464         return ret;
5465 }
5466
5467 static const struct file_operations tracing_iter_fops = {
5468         .open           = tracing_trace_options_open,
5469         .read           = seq_read,
5470         .llseek         = seq_lseek,
5471         .release        = tracing_single_release_tr,
5472         .write          = tracing_trace_options_write,
5473 };
5474
5475 static const char readme_msg[] =
5476         "tracing mini-HOWTO:\n\n"
5477         "# echo 0 > tracing_on : quick way to disable tracing\n"
5478         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5479         " Important files:\n"
5480         "  trace\t\t\t- The static contents of the buffer\n"
5481         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5482         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5483         "  current_tracer\t- function and latency tracers\n"
5484         "  available_tracers\t- list of configured tracers for current_tracer\n"
5485         "  error_log\t- error log for failed commands (that support it)\n"
5486         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5487         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5488         "  trace_clock\t\t- change the clock used to order events\n"
5489         "       local:   Per cpu clock but may not be synced across CPUs\n"
5490         "      global:   Synced across CPUs but slows tracing down.\n"
5491         "     counter:   Not a clock, but just an increment\n"
5492         "      uptime:   Jiffy counter from time of boot\n"
5493         "        perf:   Same clock that perf events use\n"
5494 #ifdef CONFIG_X86_64
5495         "     x86-tsc:   TSC cycle counter\n"
5496 #endif
5497         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5498         "       delta:   Delta difference against a buffer-wide timestamp\n"
5499         "    absolute:   Absolute (standalone) timestamp\n"
5500         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5501         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5502         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5503         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5504         "\t\t\t  Remove sub-buffer with rmdir\n"
5505         "  trace_options\t\t- Set format or modify how tracing happens\n"
5506         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5507         "\t\t\t  option name\n"
5508         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5509 #ifdef CONFIG_DYNAMIC_FTRACE
5510         "\n  available_filter_functions - list of functions that can be filtered on\n"
5511         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5512         "\t\t\t  functions\n"
5513         "\t     accepts: func_full_name or glob-matching-pattern\n"
5514         "\t     modules: Can select a group via module\n"
5515         "\t      Format: :mod:<module-name>\n"
5516         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5517         "\t    triggers: a command to perform when function is hit\n"
5518         "\t      Format: <function>:<trigger>[:count]\n"
5519         "\t     trigger: traceon, traceoff\n"
5520         "\t\t      enable_event:<system>:<event>\n"
5521         "\t\t      disable_event:<system>:<event>\n"
5522 #ifdef CONFIG_STACKTRACE
5523         "\t\t      stacktrace\n"
5524 #endif
5525 #ifdef CONFIG_TRACER_SNAPSHOT
5526         "\t\t      snapshot\n"
5527 #endif
5528         "\t\t      dump\n"
5529         "\t\t      cpudump\n"
5530         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5531         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5532         "\t     The first one will disable tracing every time do_fault is hit\n"
5533         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5534         "\t       The first time do trap is hit and it disables tracing, the\n"
5535         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5536         "\t       the counter will not decrement. It only decrements when the\n"
5537         "\t       trigger did work\n"
5538         "\t     To remove trigger without count:\n"
5539         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5540         "\t     To remove trigger with a count:\n"
5541         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5542         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5543         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5544         "\t    modules: Can select a group via module command :mod:\n"
5545         "\t    Does not accept triggers\n"
5546 #endif /* CONFIG_DYNAMIC_FTRACE */
5547 #ifdef CONFIG_FUNCTION_TRACER
5548         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5549         "\t\t    (function)\n"
5550         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5551         "\t\t    (function)\n"
5552 #endif
5553 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5554         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5555         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5556         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5557 #endif
5558 #ifdef CONFIG_TRACER_SNAPSHOT
5559         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5560         "\t\t\t  snapshot buffer. Read the contents for more\n"
5561         "\t\t\t  information\n"
5562 #endif
5563 #ifdef CONFIG_STACK_TRACER
5564         "  stack_trace\t\t- Shows the max stack trace when active\n"
5565         "  stack_max_size\t- Shows current max stack size that was traced\n"
5566         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5567         "\t\t\t  new trace)\n"
5568 #ifdef CONFIG_DYNAMIC_FTRACE
5569         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5570         "\t\t\t  traces\n"
5571 #endif
5572 #endif /* CONFIG_STACK_TRACER */
5573 #ifdef CONFIG_DYNAMIC_EVENTS
5574         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5575         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5576 #endif
5577 #ifdef CONFIG_KPROBE_EVENTS
5578         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5579         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5580 #endif
5581 #ifdef CONFIG_UPROBE_EVENTS
5582         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5583         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5584 #endif
5585 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5586         "\t  accepts: event-definitions (one definition per line)\n"
5587         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5588         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5589 #ifdef CONFIG_HIST_TRIGGERS
5590         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5591 #endif
5592         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]\n"
5593         "\t           -:[<group>/][<event>]\n"
5594 #ifdef CONFIG_KPROBE_EVENTS
5595         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5596   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5597 #endif
5598 #ifdef CONFIG_UPROBE_EVENTS
5599   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5600 #endif
5601         "\t     args: <name>=fetcharg[:type]\n"
5602         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5603 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5604         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5605 #else
5606         "\t           $stack<index>, $stack, $retval, $comm,\n"
5607 #endif
5608         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5609         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5610         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5611         "\t           <type>\\[<array-size>\\]\n"
5612 #ifdef CONFIG_HIST_TRIGGERS
5613         "\t    field: <stype> <name>;\n"
5614         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5615         "\t           [unsigned] char/int/long\n"
5616 #endif
5617         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5618         "\t            of the <attached-group>/<attached-event>.\n"
5619 #endif
5620         "  events/\t\t- Directory containing all trace event subsystems:\n"
5621         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5622         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5623         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5624         "\t\t\t  events\n"
5625         "      filter\t\t- If set, only events passing filter are traced\n"
5626         "  events/<system>/<event>/\t- Directory containing control files for\n"
5627         "\t\t\t  <event>:\n"
5628         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5629         "      filter\t\t- If set, only events passing filter are traced\n"
5630         "      trigger\t\t- If set, a command to perform when event is hit\n"
5631         "\t    Format: <trigger>[:count][if <filter>]\n"
5632         "\t   trigger: traceon, traceoff\n"
5633         "\t            enable_event:<system>:<event>\n"
5634         "\t            disable_event:<system>:<event>\n"
5635 #ifdef CONFIG_HIST_TRIGGERS
5636         "\t            enable_hist:<system>:<event>\n"
5637         "\t            disable_hist:<system>:<event>\n"
5638 #endif
5639 #ifdef CONFIG_STACKTRACE
5640         "\t\t    stacktrace\n"
5641 #endif
5642 #ifdef CONFIG_TRACER_SNAPSHOT
5643         "\t\t    snapshot\n"
5644 #endif
5645 #ifdef CONFIG_HIST_TRIGGERS
5646         "\t\t    hist (see below)\n"
5647 #endif
5648         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5649         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5650         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5651         "\t                  events/block/block_unplug/trigger\n"
5652         "\t   The first disables tracing every time block_unplug is hit.\n"
5653         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5654         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5655         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5656         "\t   Like function triggers, the counter is only decremented if it\n"
5657         "\t    enabled or disabled tracing.\n"
5658         "\t   To remove a trigger without a count:\n"
5659         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5660         "\t   To remove a trigger with a count:\n"
5661         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5662         "\t   Filters can be ignored when removing a trigger.\n"
5663 #ifdef CONFIG_HIST_TRIGGERS
5664         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5665         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5666         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5667         "\t            [:values=<field1[,field2,...]>]\n"
5668         "\t            [:sort=<field1[,field2,...]>]\n"
5669         "\t            [:size=#entries]\n"
5670         "\t            [:pause][:continue][:clear]\n"
5671         "\t            [:name=histname1]\n"
5672         "\t            [:<handler>.<action>]\n"
5673         "\t            [if <filter>]\n\n"
5674         "\t    Note, special fields can be used as well:\n"
5675         "\t            common_timestamp - to record current timestamp\n"
5676         "\t            common_cpu - to record the CPU the event happened on\n"
5677         "\n"
5678         "\t    A hist trigger variable can be:\n"
5679         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5680         "\t        - a reference to another variable e.g. y=$x,\n"
5681         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5682         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5683         "\n"
5684         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5685         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5686         "\t    variable reference, field or numeric literal.\n"
5687         "\n"
5688         "\t    When a matching event is hit, an entry is added to a hash\n"
5689         "\t    table using the key(s) and value(s) named, and the value of a\n"
5690         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5691         "\t    correspond to fields in the event's format description.  Keys\n"
5692         "\t    can be any field, or the special string 'stacktrace'.\n"
5693         "\t    Compound keys consisting of up to two fields can be specified\n"
5694         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5695         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5696         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5697         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5698         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5699         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5700         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5701         "\t    its histogram data will be shared with other triggers of the\n"
5702         "\t    same name, and trigger hits will update this common data.\n\n"
5703         "\t    Reading the 'hist' file for the event will dump the hash\n"
5704         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5705         "\t    triggers attached to an event, there will be a table for each\n"
5706         "\t    trigger in the output.  The table displayed for a named\n"
5707         "\t    trigger will be the same as any other instance having the\n"
5708         "\t    same name.  The default format used to display a given field\n"
5709         "\t    can be modified by appending any of the following modifiers\n"
5710         "\t    to the field name, as applicable:\n\n"
5711         "\t            .hex        display a number as a hex value\n"
5712         "\t            .sym        display an address as a symbol\n"
5713         "\t            .sym-offset display an address as a symbol and offset\n"
5714         "\t            .execname   display a common_pid as a program name\n"
5715         "\t            .syscall    display a syscall id as a syscall name\n"
5716         "\t            .log2       display log2 value rather than raw number\n"
5717         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5718         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5719         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5720         "\t    trigger or to start a hist trigger but not log any events\n"
5721         "\t    until told to do so.  'continue' can be used to start or\n"
5722         "\t    restart a paused hist trigger.\n\n"
5723         "\t    The 'clear' parameter will clear the contents of a running\n"
5724         "\t    hist trigger and leave its current paused/active state\n"
5725         "\t    unchanged.\n\n"
5726         "\t    The enable_hist and disable_hist triggers can be used to\n"
5727         "\t    have one event conditionally start and stop another event's\n"
5728         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5729         "\t    the enable_event and disable_event triggers.\n\n"
5730         "\t    Hist trigger handlers and actions are executed whenever a\n"
5731         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5732         "\t        <handler>.<action>\n\n"
5733         "\t    The available handlers are:\n\n"
5734         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5735         "\t        onmax(var)               - invoke if var exceeds current max\n"
5736         "\t        onchange(var)            - invoke action if var changes\n\n"
5737         "\t    The available actions are:\n\n"
5738         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5739         "\t        save(field,...)                      - save current event fields\n"
5740 #ifdef CONFIG_TRACER_SNAPSHOT
5741         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5742 #endif
5743 #ifdef CONFIG_SYNTH_EVENTS
5744         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5745         "\t  Write into this file to define/undefine new synthetic events.\n"
5746         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5747 #endif
5748 #endif
5749 ;
5750
5751 static ssize_t
5752 tracing_readme_read(struct file *filp, char __user *ubuf,
5753                        size_t cnt, loff_t *ppos)
5754 {
5755         return simple_read_from_buffer(ubuf, cnt, ppos,
5756                                         readme_msg, strlen(readme_msg));
5757 }
5758
5759 static const struct file_operations tracing_readme_fops = {
5760         .open           = tracing_open_generic,
5761         .read           = tracing_readme_read,
5762         .llseek         = generic_file_llseek,
5763 };
5764
5765 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5766 {
5767         int pid = ++(*pos);
5768
5769         return trace_find_tgid_ptr(pid);
5770 }
5771
5772 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5773 {
5774         int pid = *pos;
5775
5776         return trace_find_tgid_ptr(pid);
5777 }
5778
5779 static void saved_tgids_stop(struct seq_file *m, void *v)
5780 {
5781 }
5782
5783 static int saved_tgids_show(struct seq_file *m, void *v)
5784 {
5785         int *entry = (int *)v;
5786         int pid = entry - tgid_map;
5787         int tgid = *entry;
5788
5789         if (tgid == 0)
5790                 return SEQ_SKIP;
5791
5792         seq_printf(m, "%d %d\n", pid, tgid);
5793         return 0;
5794 }
5795
5796 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5797         .start          = saved_tgids_start,
5798         .stop           = saved_tgids_stop,
5799         .next           = saved_tgids_next,
5800         .show           = saved_tgids_show,
5801 };
5802
5803 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5804 {
5805         int ret;
5806
5807         ret = tracing_check_open_get_tr(NULL);
5808         if (ret)
5809                 return ret;
5810
5811         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5812 }
5813
5814
5815 static const struct file_operations tracing_saved_tgids_fops = {
5816         .open           = tracing_saved_tgids_open,
5817         .read           = seq_read,
5818         .llseek         = seq_lseek,
5819         .release        = seq_release,
5820 };
5821
5822 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5823 {
5824         unsigned int *ptr = v;
5825
5826         if (*pos || m->count)
5827                 ptr++;
5828
5829         (*pos)++;
5830
5831         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5832              ptr++) {
5833                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5834                         continue;
5835
5836                 return ptr;
5837         }
5838
5839         return NULL;
5840 }
5841
5842 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5843 {
5844         void *v;
5845         loff_t l = 0;
5846
5847         preempt_disable();
5848         arch_spin_lock(&trace_cmdline_lock);
5849
5850         v = &savedcmd->map_cmdline_to_pid[0];
5851         while (l <= *pos) {
5852                 v = saved_cmdlines_next(m, v, &l);
5853                 if (!v)
5854                         return NULL;
5855         }
5856
5857         return v;
5858 }
5859
5860 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5861 {
5862         arch_spin_unlock(&trace_cmdline_lock);
5863         preempt_enable();
5864 }
5865
5866 static int saved_cmdlines_show(struct seq_file *m, void *v)
5867 {
5868         char buf[TASK_COMM_LEN];
5869         unsigned int *pid = v;
5870
5871         __trace_find_cmdline(*pid, buf);
5872         seq_printf(m, "%d %s\n", *pid, buf);
5873         return 0;
5874 }
5875
5876 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5877         .start          = saved_cmdlines_start,
5878         .next           = saved_cmdlines_next,
5879         .stop           = saved_cmdlines_stop,
5880         .show           = saved_cmdlines_show,
5881 };
5882
5883 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5884 {
5885         int ret;
5886
5887         ret = tracing_check_open_get_tr(NULL);
5888         if (ret)
5889                 return ret;
5890
5891         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5892 }
5893
5894 static const struct file_operations tracing_saved_cmdlines_fops = {
5895         .open           = tracing_saved_cmdlines_open,
5896         .read           = seq_read,
5897         .llseek         = seq_lseek,
5898         .release        = seq_release,
5899 };
5900
5901 static ssize_t
5902 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5903                                  size_t cnt, loff_t *ppos)
5904 {
5905         char buf[64];
5906         int r;
5907
5908         preempt_disable();
5909         arch_spin_lock(&trace_cmdline_lock);
5910         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5911         arch_spin_unlock(&trace_cmdline_lock);
5912         preempt_enable();
5913
5914         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5915 }
5916
5917 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5918 {
5919         kfree(s->saved_cmdlines);
5920         kfree(s->map_cmdline_to_pid);
5921         kfree(s);
5922 }
5923
5924 static int tracing_resize_saved_cmdlines(unsigned int val)
5925 {
5926         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5927
5928         s = kmalloc(sizeof(*s), GFP_KERNEL);
5929         if (!s)
5930                 return -ENOMEM;
5931
5932         if (allocate_cmdlines_buffer(val, s) < 0) {
5933                 kfree(s);
5934                 return -ENOMEM;
5935         }
5936
5937         preempt_disable();
5938         arch_spin_lock(&trace_cmdline_lock);
5939         savedcmd_temp = savedcmd;
5940         savedcmd = s;
5941         arch_spin_unlock(&trace_cmdline_lock);
5942         preempt_enable();
5943         free_saved_cmdlines_buffer(savedcmd_temp);
5944
5945         return 0;
5946 }
5947
5948 static ssize_t
5949 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5950                                   size_t cnt, loff_t *ppos)
5951 {
5952         unsigned long val;
5953         int ret;
5954
5955         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5956         if (ret)
5957                 return ret;
5958
5959         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5960         if (!val || val > PID_MAX_DEFAULT)
5961                 return -EINVAL;
5962
5963         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5964         if (ret < 0)
5965                 return ret;
5966
5967         *ppos += cnt;
5968
5969         return cnt;
5970 }
5971
5972 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5973         .open           = tracing_open_generic,
5974         .read           = tracing_saved_cmdlines_size_read,
5975         .write          = tracing_saved_cmdlines_size_write,
5976 };
5977
5978 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5979 static union trace_eval_map_item *
5980 update_eval_map(union trace_eval_map_item *ptr)
5981 {
5982         if (!ptr->map.eval_string) {
5983                 if (ptr->tail.next) {
5984                         ptr = ptr->tail.next;
5985                         /* Set ptr to the next real item (skip head) */
5986                         ptr++;
5987                 } else
5988                         return NULL;
5989         }
5990         return ptr;
5991 }
5992
5993 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5994 {
5995         union trace_eval_map_item *ptr = v;
5996
5997         /*
5998          * Paranoid! If ptr points to end, we don't want to increment past it.
5999          * This really should never happen.
6000          */
6001         (*pos)++;
6002         ptr = update_eval_map(ptr);
6003         if (WARN_ON_ONCE(!ptr))
6004                 return NULL;
6005
6006         ptr++;
6007         ptr = update_eval_map(ptr);
6008
6009         return ptr;
6010 }
6011
6012 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6013 {
6014         union trace_eval_map_item *v;
6015         loff_t l = 0;
6016
6017         mutex_lock(&trace_eval_mutex);
6018
6019         v = trace_eval_maps;
6020         if (v)
6021                 v++;
6022
6023         while (v && l < *pos) {
6024                 v = eval_map_next(m, v, &l);
6025         }
6026
6027         return v;
6028 }
6029
6030 static void eval_map_stop(struct seq_file *m, void *v)
6031 {
6032         mutex_unlock(&trace_eval_mutex);
6033 }
6034
6035 static int eval_map_show(struct seq_file *m, void *v)
6036 {
6037         union trace_eval_map_item *ptr = v;
6038
6039         seq_printf(m, "%s %ld (%s)\n",
6040                    ptr->map.eval_string, ptr->map.eval_value,
6041                    ptr->map.system);
6042
6043         return 0;
6044 }
6045
6046 static const struct seq_operations tracing_eval_map_seq_ops = {
6047         .start          = eval_map_start,
6048         .next           = eval_map_next,
6049         .stop           = eval_map_stop,
6050         .show           = eval_map_show,
6051 };
6052
6053 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6054 {
6055         int ret;
6056
6057         ret = tracing_check_open_get_tr(NULL);
6058         if (ret)
6059                 return ret;
6060
6061         return seq_open(filp, &tracing_eval_map_seq_ops);
6062 }
6063
6064 static const struct file_operations tracing_eval_map_fops = {
6065         .open           = tracing_eval_map_open,
6066         .read           = seq_read,
6067         .llseek         = seq_lseek,
6068         .release        = seq_release,
6069 };
6070
6071 static inline union trace_eval_map_item *
6072 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6073 {
6074         /* Return tail of array given the head */
6075         return ptr + ptr->head.length + 1;
6076 }
6077
6078 static void
6079 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6080                            int len)
6081 {
6082         struct trace_eval_map **stop;
6083         struct trace_eval_map **map;
6084         union trace_eval_map_item *map_array;
6085         union trace_eval_map_item *ptr;
6086
6087         stop = start + len;
6088
6089         /*
6090          * The trace_eval_maps contains the map plus a head and tail item,
6091          * where the head holds the module and length of array, and the
6092          * tail holds a pointer to the next list.
6093          */
6094         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6095         if (!map_array) {
6096                 pr_warn("Unable to allocate trace eval mapping\n");
6097                 return;
6098         }
6099
6100         mutex_lock(&trace_eval_mutex);
6101
6102         if (!trace_eval_maps)
6103                 trace_eval_maps = map_array;
6104         else {
6105                 ptr = trace_eval_maps;
6106                 for (;;) {
6107                         ptr = trace_eval_jmp_to_tail(ptr);
6108                         if (!ptr->tail.next)
6109                                 break;
6110                         ptr = ptr->tail.next;
6111
6112                 }
6113                 ptr->tail.next = map_array;
6114         }
6115         map_array->head.mod = mod;
6116         map_array->head.length = len;
6117         map_array++;
6118
6119         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6120                 map_array->map = **map;
6121                 map_array++;
6122         }
6123         memset(map_array, 0, sizeof(*map_array));
6124
6125         mutex_unlock(&trace_eval_mutex);
6126 }
6127
6128 static void trace_create_eval_file(struct dentry *d_tracer)
6129 {
6130         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6131                           NULL, &tracing_eval_map_fops);
6132 }
6133
6134 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6135 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6136 static inline void trace_insert_eval_map_file(struct module *mod,
6137                               struct trace_eval_map **start, int len) { }
6138 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6139
6140 static void trace_insert_eval_map(struct module *mod,
6141                                   struct trace_eval_map **start, int len)
6142 {
6143         struct trace_eval_map **map;
6144
6145         if (len <= 0)
6146                 return;
6147
6148         map = start;
6149
6150         trace_event_eval_update(map, len);
6151
6152         trace_insert_eval_map_file(mod, start, len);
6153 }
6154
6155 static ssize_t
6156 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6157                        size_t cnt, loff_t *ppos)
6158 {
6159         struct trace_array *tr = filp->private_data;
6160         char buf[MAX_TRACER_SIZE+2];
6161         int r;
6162
6163         mutex_lock(&trace_types_lock);
6164         r = sprintf(buf, "%s\n", tr->current_trace->name);
6165         mutex_unlock(&trace_types_lock);
6166
6167         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6168 }
6169
6170 int tracer_init(struct tracer *t, struct trace_array *tr)
6171 {
6172         tracing_reset_online_cpus(&tr->array_buffer);
6173         return t->init(tr);
6174 }
6175
6176 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6177 {
6178         int cpu;
6179
6180         for_each_tracing_cpu(cpu)
6181                 per_cpu_ptr(buf->data, cpu)->entries = val;
6182 }
6183
6184 #ifdef CONFIG_TRACER_MAX_TRACE
6185 /* resize @tr's buffer to the size of @size_tr's entries */
6186 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6187                                         struct array_buffer *size_buf, int cpu_id)
6188 {
6189         int cpu, ret = 0;
6190
6191         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6192                 for_each_tracing_cpu(cpu) {
6193                         ret = ring_buffer_resize(trace_buf->buffer,
6194                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6195                         if (ret < 0)
6196                                 break;
6197                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6198                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6199                 }
6200         } else {
6201                 ret = ring_buffer_resize(trace_buf->buffer,
6202                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6203                 if (ret == 0)
6204                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6205                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6206         }
6207
6208         return ret;
6209 }
6210 #endif /* CONFIG_TRACER_MAX_TRACE */
6211
6212 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6213                                         unsigned long size, int cpu)
6214 {
6215         int ret;
6216
6217         /*
6218          * If kernel or user changes the size of the ring buffer
6219          * we use the size that was given, and we can forget about
6220          * expanding it later.
6221          */
6222         ring_buffer_expanded = true;
6223
6224         /* May be called before buffers are initialized */
6225         if (!tr->array_buffer.buffer)
6226                 return 0;
6227
6228         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6229         if (ret < 0)
6230                 return ret;
6231
6232 #ifdef CONFIG_TRACER_MAX_TRACE
6233         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6234             !tr->current_trace->use_max_tr)
6235                 goto out;
6236
6237         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6238         if (ret < 0) {
6239                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6240                                                      &tr->array_buffer, cpu);
6241                 if (r < 0) {
6242                         /*
6243                          * AARGH! We are left with different
6244                          * size max buffer!!!!
6245                          * The max buffer is our "snapshot" buffer.
6246                          * When a tracer needs a snapshot (one of the
6247                          * latency tracers), it swaps the max buffer
6248                          * with the saved snap shot. We succeeded to
6249                          * update the size of the main buffer, but failed to
6250                          * update the size of the max buffer. But when we tried
6251                          * to reset the main buffer to the original size, we
6252                          * failed there too. This is very unlikely to
6253                          * happen, but if it does, warn and kill all
6254                          * tracing.
6255                          */
6256                         WARN_ON(1);
6257                         tracing_disabled = 1;
6258                 }
6259                 return ret;
6260         }
6261
6262         if (cpu == RING_BUFFER_ALL_CPUS)
6263                 set_buffer_entries(&tr->max_buffer, size);
6264         else
6265                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6266
6267  out:
6268 #endif /* CONFIG_TRACER_MAX_TRACE */
6269
6270         if (cpu == RING_BUFFER_ALL_CPUS)
6271                 set_buffer_entries(&tr->array_buffer, size);
6272         else
6273                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6274
6275         return ret;
6276 }
6277
6278 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6279                                   unsigned long size, int cpu_id)
6280 {
6281         int ret;
6282
6283         mutex_lock(&trace_types_lock);
6284
6285         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6286                 /* make sure, this cpu is enabled in the mask */
6287                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6288                         ret = -EINVAL;
6289                         goto out;
6290                 }
6291         }
6292
6293         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6294         if (ret < 0)
6295                 ret = -ENOMEM;
6296
6297 out:
6298         mutex_unlock(&trace_types_lock);
6299
6300         return ret;
6301 }
6302
6303
6304 /**
6305  * tracing_update_buffers - used by tracing facility to expand ring buffers
6306  *
6307  * To save on memory when the tracing is never used on a system with it
6308  * configured in. The ring buffers are set to a minimum size. But once
6309  * a user starts to use the tracing facility, then they need to grow
6310  * to their default size.
6311  *
6312  * This function is to be called when a tracer is about to be used.
6313  */
6314 int tracing_update_buffers(void)
6315 {
6316         int ret = 0;
6317
6318         mutex_lock(&trace_types_lock);
6319         if (!ring_buffer_expanded)
6320                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6321                                                 RING_BUFFER_ALL_CPUS);
6322         mutex_unlock(&trace_types_lock);
6323
6324         return ret;
6325 }
6326
6327 struct trace_option_dentry;
6328
6329 static void
6330 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6331
6332 /*
6333  * Used to clear out the tracer before deletion of an instance.
6334  * Must have trace_types_lock held.
6335  */
6336 static void tracing_set_nop(struct trace_array *tr)
6337 {
6338         if (tr->current_trace == &nop_trace)
6339                 return;
6340         
6341         tr->current_trace->enabled--;
6342
6343         if (tr->current_trace->reset)
6344                 tr->current_trace->reset(tr);
6345
6346         tr->current_trace = &nop_trace;
6347 }
6348
6349 static bool tracer_options_updated;
6350
6351 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6352 {
6353         /* Only enable if the directory has been created already. */
6354         if (!tr->dir)
6355                 return;
6356
6357         /* Only create trace option files after update_tracer_options finish */
6358         if (!tracer_options_updated)
6359                 return;
6360
6361         create_trace_option_files(tr, t);
6362 }
6363
6364 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6365 {
6366         struct tracer *t;
6367 #ifdef CONFIG_TRACER_MAX_TRACE
6368         bool had_max_tr;
6369 #endif
6370         int ret = 0;
6371
6372         mutex_lock(&trace_types_lock);
6373
6374         if (!ring_buffer_expanded) {
6375                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6376                                                 RING_BUFFER_ALL_CPUS);
6377                 if (ret < 0)
6378                         goto out;
6379                 ret = 0;
6380         }
6381
6382         for (t = trace_types; t; t = t->next) {
6383                 if (strcmp(t->name, buf) == 0)
6384                         break;
6385         }
6386         if (!t) {
6387                 ret = -EINVAL;
6388                 goto out;
6389         }
6390         if (t == tr->current_trace)
6391                 goto out;
6392
6393 #ifdef CONFIG_TRACER_SNAPSHOT
6394         if (t->use_max_tr) {
6395                 local_irq_disable();
6396                 arch_spin_lock(&tr->max_lock);
6397                 if (tr->cond_snapshot)
6398                         ret = -EBUSY;
6399                 arch_spin_unlock(&tr->max_lock);
6400                 local_irq_enable();
6401                 if (ret)
6402                         goto out;
6403         }
6404 #endif
6405         /* Some tracers won't work on kernel command line */
6406         if (system_state < SYSTEM_RUNNING && t->noboot) {
6407                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6408                         t->name);
6409                 goto out;
6410         }
6411
6412         /* Some tracers are only allowed for the top level buffer */
6413         if (!trace_ok_for_array(t, tr)) {
6414                 ret = -EINVAL;
6415                 goto out;
6416         }
6417
6418         /* If trace pipe files are being read, we can't change the tracer */
6419         if (tr->trace_ref) {
6420                 ret = -EBUSY;
6421                 goto out;
6422         }
6423
6424         trace_branch_disable();
6425
6426         tr->current_trace->enabled--;
6427
6428         if (tr->current_trace->reset)
6429                 tr->current_trace->reset(tr);
6430
6431 #ifdef CONFIG_TRACER_MAX_TRACE
6432         had_max_tr = tr->current_trace->use_max_tr;
6433
6434         /* Current trace needs to be nop_trace before synchronize_rcu */
6435         tr->current_trace = &nop_trace;
6436
6437         if (had_max_tr && !t->use_max_tr) {
6438                 /*
6439                  * We need to make sure that the update_max_tr sees that
6440                  * current_trace changed to nop_trace to keep it from
6441                  * swapping the buffers after we resize it.
6442                  * The update_max_tr is called from interrupts disabled
6443                  * so a synchronized_sched() is sufficient.
6444                  */
6445                 synchronize_rcu();
6446                 free_snapshot(tr);
6447         }
6448
6449         if (t->use_max_tr && !tr->allocated_snapshot) {
6450                 ret = tracing_alloc_snapshot_instance(tr);
6451                 if (ret < 0)
6452                         goto out;
6453         }
6454 #else
6455         tr->current_trace = &nop_trace;
6456 #endif
6457
6458         if (t->init) {
6459                 ret = tracer_init(t, tr);
6460                 if (ret)
6461                         goto out;
6462         }
6463
6464         tr->current_trace = t;
6465         tr->current_trace->enabled++;
6466         trace_branch_enable(tr);
6467  out:
6468         mutex_unlock(&trace_types_lock);
6469
6470         return ret;
6471 }
6472
6473 static ssize_t
6474 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6475                         size_t cnt, loff_t *ppos)
6476 {
6477         struct trace_array *tr = filp->private_data;
6478         char buf[MAX_TRACER_SIZE+1];
6479         char *name;
6480         size_t ret;
6481         int err;
6482
6483         ret = cnt;
6484
6485         if (cnt > MAX_TRACER_SIZE)
6486                 cnt = MAX_TRACER_SIZE;
6487
6488         if (copy_from_user(buf, ubuf, cnt))
6489                 return -EFAULT;
6490
6491         buf[cnt] = 0;
6492
6493         name = strim(buf);
6494
6495         err = tracing_set_tracer(tr, name);
6496         if (err)
6497                 return err;
6498
6499         *ppos += ret;
6500
6501         return ret;
6502 }
6503
6504 static ssize_t
6505 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6506                    size_t cnt, loff_t *ppos)
6507 {
6508         char buf[64];
6509         int r;
6510
6511         r = snprintf(buf, sizeof(buf), "%ld\n",
6512                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6513         if (r > sizeof(buf))
6514                 r = sizeof(buf);
6515         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6516 }
6517
6518 static ssize_t
6519 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6520                     size_t cnt, loff_t *ppos)
6521 {
6522         unsigned long val;
6523         int ret;
6524
6525         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6526         if (ret)
6527                 return ret;
6528
6529         *ptr = val * 1000;
6530
6531         return cnt;
6532 }
6533
6534 static ssize_t
6535 tracing_thresh_read(struct file *filp, char __user *ubuf,
6536                     size_t cnt, loff_t *ppos)
6537 {
6538         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6539 }
6540
6541 static ssize_t
6542 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6543                      size_t cnt, loff_t *ppos)
6544 {
6545         struct trace_array *tr = filp->private_data;
6546         int ret;
6547
6548         mutex_lock(&trace_types_lock);
6549         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6550         if (ret < 0)
6551                 goto out;
6552
6553         if (tr->current_trace->update_thresh) {
6554                 ret = tr->current_trace->update_thresh(tr);
6555                 if (ret < 0)
6556                         goto out;
6557         }
6558
6559         ret = cnt;
6560 out:
6561         mutex_unlock(&trace_types_lock);
6562
6563         return ret;
6564 }
6565
6566 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6567
6568 static ssize_t
6569 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6570                      size_t cnt, loff_t *ppos)
6571 {
6572         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6573 }
6574
6575 static ssize_t
6576 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6577                       size_t cnt, loff_t *ppos)
6578 {
6579         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6580 }
6581
6582 #endif
6583
6584 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6585 {
6586         struct trace_array *tr = inode->i_private;
6587         struct trace_iterator *iter;
6588         int ret;
6589
6590         ret = tracing_check_open_get_tr(tr);
6591         if (ret)
6592                 return ret;
6593
6594         mutex_lock(&trace_types_lock);
6595
6596         /* create a buffer to store the information to pass to userspace */
6597         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6598         if (!iter) {
6599                 ret = -ENOMEM;
6600                 __trace_array_put(tr);
6601                 goto out;
6602         }
6603
6604         trace_seq_init(&iter->seq);
6605         iter->trace = tr->current_trace;
6606
6607         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6608                 ret = -ENOMEM;
6609                 goto fail;
6610         }
6611
6612         /* trace pipe does not show start of buffer */
6613         cpumask_setall(iter->started);
6614
6615         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6616                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6617
6618         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6619         if (trace_clocks[tr->clock_id].in_ns)
6620                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6621
6622         iter->tr = tr;
6623         iter->array_buffer = &tr->array_buffer;
6624         iter->cpu_file = tracing_get_cpu(inode);
6625         mutex_init(&iter->mutex);
6626         filp->private_data = iter;
6627
6628         if (iter->trace->pipe_open)
6629                 iter->trace->pipe_open(iter);
6630
6631         nonseekable_open(inode, filp);
6632
6633         tr->trace_ref++;
6634 out:
6635         mutex_unlock(&trace_types_lock);
6636         return ret;
6637
6638 fail:
6639         kfree(iter);
6640         __trace_array_put(tr);
6641         mutex_unlock(&trace_types_lock);
6642         return ret;
6643 }
6644
6645 static int tracing_release_pipe(struct inode *inode, struct file *file)
6646 {
6647         struct trace_iterator *iter = file->private_data;
6648         struct trace_array *tr = inode->i_private;
6649
6650         mutex_lock(&trace_types_lock);
6651
6652         tr->trace_ref--;
6653
6654         if (iter->trace->pipe_close)
6655                 iter->trace->pipe_close(iter);
6656
6657         mutex_unlock(&trace_types_lock);
6658
6659         free_cpumask_var(iter->started);
6660         kfree(iter->fmt);
6661         mutex_destroy(&iter->mutex);
6662         kfree(iter);
6663
6664         trace_array_put(tr);
6665
6666         return 0;
6667 }
6668
6669 static __poll_t
6670 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6671 {
6672         struct trace_array *tr = iter->tr;
6673
6674         /* Iterators are static, they should be filled or empty */
6675         if (trace_buffer_iter(iter, iter->cpu_file))
6676                 return EPOLLIN | EPOLLRDNORM;
6677
6678         if (tr->trace_flags & TRACE_ITER_BLOCK)
6679                 /*
6680                  * Always select as readable when in blocking mode
6681                  */
6682                 return EPOLLIN | EPOLLRDNORM;
6683         else
6684                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6685                                              filp, poll_table, iter->tr->buffer_percent);
6686 }
6687
6688 static __poll_t
6689 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6690 {
6691         struct trace_iterator *iter = filp->private_data;
6692
6693         return trace_poll(iter, filp, poll_table);
6694 }
6695
6696 /* Must be called with iter->mutex held. */
6697 static int tracing_wait_pipe(struct file *filp)
6698 {
6699         struct trace_iterator *iter = filp->private_data;
6700         int ret;
6701
6702         while (trace_empty(iter)) {
6703
6704                 if ((filp->f_flags & O_NONBLOCK)) {
6705                         return -EAGAIN;
6706                 }
6707
6708                 /*
6709                  * We block until we read something and tracing is disabled.
6710                  * We still block if tracing is disabled, but we have never
6711                  * read anything. This allows a user to cat this file, and
6712                  * then enable tracing. But after we have read something,
6713                  * we give an EOF when tracing is again disabled.
6714                  *
6715                  * iter->pos will be 0 if we haven't read anything.
6716                  */
6717                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6718                         break;
6719
6720                 mutex_unlock(&iter->mutex);
6721
6722                 ret = wait_on_pipe(iter, 0);
6723
6724                 mutex_lock(&iter->mutex);
6725
6726                 if (ret)
6727                         return ret;
6728         }
6729
6730         return 1;
6731 }
6732
6733 /*
6734  * Consumer reader.
6735  */
6736 static ssize_t
6737 tracing_read_pipe(struct file *filp, char __user *ubuf,
6738                   size_t cnt, loff_t *ppos)
6739 {
6740         struct trace_iterator *iter = filp->private_data;
6741         ssize_t sret;
6742
6743         /*
6744          * Avoid more than one consumer on a single file descriptor
6745          * This is just a matter of traces coherency, the ring buffer itself
6746          * is protected.
6747          */
6748         mutex_lock(&iter->mutex);
6749
6750         /* return any leftover data */
6751         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6752         if (sret != -EBUSY)
6753                 goto out;
6754
6755         trace_seq_init(&iter->seq);
6756
6757         if (iter->trace->read) {
6758                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6759                 if (sret)
6760                         goto out;
6761         }
6762
6763 waitagain:
6764         sret = tracing_wait_pipe(filp);
6765         if (sret <= 0)
6766                 goto out;
6767
6768         /* stop when tracing is finished */
6769         if (trace_empty(iter)) {
6770                 sret = 0;
6771                 goto out;
6772         }
6773
6774         if (cnt >= PAGE_SIZE)
6775                 cnt = PAGE_SIZE - 1;
6776
6777         /* reset all but tr, trace, and overruns */
6778         trace_iterator_reset(iter);
6779         cpumask_clear(iter->started);
6780         trace_seq_init(&iter->seq);
6781
6782         trace_event_read_lock();
6783         trace_access_lock(iter->cpu_file);
6784         while (trace_find_next_entry_inc(iter) != NULL) {
6785                 enum print_line_t ret;
6786                 int save_len = iter->seq.seq.len;
6787
6788                 ret = print_trace_line(iter);
6789                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6790                         /* don't print partial lines */
6791                         iter->seq.seq.len = save_len;
6792                         break;
6793                 }
6794                 if (ret != TRACE_TYPE_NO_CONSUME)
6795                         trace_consume(iter);
6796
6797                 if (trace_seq_used(&iter->seq) >= cnt)
6798                         break;
6799
6800                 /*
6801                  * Setting the full flag means we reached the trace_seq buffer
6802                  * size and we should leave by partial output condition above.
6803                  * One of the trace_seq_* functions is not used properly.
6804                  */
6805                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6806                           iter->ent->type);
6807         }
6808         trace_access_unlock(iter->cpu_file);
6809         trace_event_read_unlock();
6810
6811         /* Now copy what we have to the user */
6812         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6813         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6814                 trace_seq_init(&iter->seq);
6815
6816         /*
6817          * If there was nothing to send to user, in spite of consuming trace
6818          * entries, go back to wait for more entries.
6819          */
6820         if (sret == -EBUSY)
6821                 goto waitagain;
6822
6823 out:
6824         mutex_unlock(&iter->mutex);
6825
6826         return sret;
6827 }
6828
6829 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6830                                      unsigned int idx)
6831 {
6832         __free_page(spd->pages[idx]);
6833 }
6834
6835 static size_t
6836 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6837 {
6838         size_t count;
6839         int save_len;
6840         int ret;
6841
6842         /* Seq buffer is page-sized, exactly what we need. */
6843         for (;;) {
6844                 save_len = iter->seq.seq.len;
6845                 ret = print_trace_line(iter);
6846
6847                 if (trace_seq_has_overflowed(&iter->seq)) {
6848                         iter->seq.seq.len = save_len;
6849                         break;
6850                 }
6851
6852                 /*
6853                  * This should not be hit, because it should only
6854                  * be set if the iter->seq overflowed. But check it
6855                  * anyway to be safe.
6856                  */
6857                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6858                         iter->seq.seq.len = save_len;
6859                         break;
6860                 }
6861
6862                 count = trace_seq_used(&iter->seq) - save_len;
6863                 if (rem < count) {
6864                         rem = 0;
6865                         iter->seq.seq.len = save_len;
6866                         break;
6867                 }
6868
6869                 if (ret != TRACE_TYPE_NO_CONSUME)
6870                         trace_consume(iter);
6871                 rem -= count;
6872                 if (!trace_find_next_entry_inc(iter))   {
6873                         rem = 0;
6874                         iter->ent = NULL;
6875                         break;
6876                 }
6877         }
6878
6879         return rem;
6880 }
6881
6882 static ssize_t tracing_splice_read_pipe(struct file *filp,
6883                                         loff_t *ppos,
6884                                         struct pipe_inode_info *pipe,
6885                                         size_t len,
6886                                         unsigned int flags)
6887 {
6888         struct page *pages_def[PIPE_DEF_BUFFERS];
6889         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6890         struct trace_iterator *iter = filp->private_data;
6891         struct splice_pipe_desc spd = {
6892                 .pages          = pages_def,
6893                 .partial        = partial_def,
6894                 .nr_pages       = 0, /* This gets updated below. */
6895                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6896                 .ops            = &default_pipe_buf_ops,
6897                 .spd_release    = tracing_spd_release_pipe,
6898         };
6899         ssize_t ret;
6900         size_t rem;
6901         unsigned int i;
6902
6903         if (splice_grow_spd(pipe, &spd))
6904                 return -ENOMEM;
6905
6906         mutex_lock(&iter->mutex);
6907
6908         if (iter->trace->splice_read) {
6909                 ret = iter->trace->splice_read(iter, filp,
6910                                                ppos, pipe, len, flags);
6911                 if (ret)
6912                         goto out_err;
6913         }
6914
6915         ret = tracing_wait_pipe(filp);
6916         if (ret <= 0)
6917                 goto out_err;
6918
6919         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6920                 ret = -EFAULT;
6921                 goto out_err;
6922         }
6923
6924         trace_event_read_lock();
6925         trace_access_lock(iter->cpu_file);
6926
6927         /* Fill as many pages as possible. */
6928         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6929                 spd.pages[i] = alloc_page(GFP_KERNEL);
6930                 if (!spd.pages[i])
6931                         break;
6932
6933                 rem = tracing_fill_pipe_page(rem, iter);
6934
6935                 /* Copy the data into the page, so we can start over. */
6936                 ret = trace_seq_to_buffer(&iter->seq,
6937                                           page_address(spd.pages[i]),
6938                                           trace_seq_used(&iter->seq));
6939                 if (ret < 0) {
6940                         __free_page(spd.pages[i]);
6941                         break;
6942                 }
6943                 spd.partial[i].offset = 0;
6944                 spd.partial[i].len = trace_seq_used(&iter->seq);
6945
6946                 trace_seq_init(&iter->seq);
6947         }
6948
6949         trace_access_unlock(iter->cpu_file);
6950         trace_event_read_unlock();
6951         mutex_unlock(&iter->mutex);
6952
6953         spd.nr_pages = i;
6954
6955         if (i)
6956                 ret = splice_to_pipe(pipe, &spd);
6957         else
6958                 ret = 0;
6959 out:
6960         splice_shrink_spd(&spd);
6961         return ret;
6962
6963 out_err:
6964         mutex_unlock(&iter->mutex);
6965         goto out;
6966 }
6967
6968 static ssize_t
6969 tracing_entries_read(struct file *filp, char __user *ubuf,
6970                      size_t cnt, loff_t *ppos)
6971 {
6972         struct inode *inode = file_inode(filp);
6973         struct trace_array *tr = inode->i_private;
6974         int cpu = tracing_get_cpu(inode);
6975         char buf[64];
6976         int r = 0;
6977         ssize_t ret;
6978
6979         mutex_lock(&trace_types_lock);
6980
6981         if (cpu == RING_BUFFER_ALL_CPUS) {
6982                 int cpu, buf_size_same;
6983                 unsigned long size;
6984
6985                 size = 0;
6986                 buf_size_same = 1;
6987                 /* check if all cpu sizes are same */
6988                 for_each_tracing_cpu(cpu) {
6989                         /* fill in the size from first enabled cpu */
6990                         if (size == 0)
6991                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6992                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6993                                 buf_size_same = 0;
6994                                 break;
6995                         }
6996                 }
6997
6998                 if (buf_size_same) {
6999                         if (!ring_buffer_expanded)
7000                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7001                                             size >> 10,
7002                                             trace_buf_size >> 10);
7003                         else
7004                                 r = sprintf(buf, "%lu\n", size >> 10);
7005                 } else
7006                         r = sprintf(buf, "X\n");
7007         } else
7008                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7009
7010         mutex_unlock(&trace_types_lock);
7011
7012         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7013         return ret;
7014 }
7015
7016 static ssize_t
7017 tracing_entries_write(struct file *filp, const char __user *ubuf,
7018                       size_t cnt, loff_t *ppos)
7019 {
7020         struct inode *inode = file_inode(filp);
7021         struct trace_array *tr = inode->i_private;
7022         unsigned long val;
7023         int ret;
7024
7025         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7026         if (ret)
7027                 return ret;
7028
7029         /* must have at least 1 entry */
7030         if (!val)
7031                 return -EINVAL;
7032
7033         /* value is in KB */
7034         val <<= 10;
7035         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7036         if (ret < 0)
7037                 return ret;
7038
7039         *ppos += cnt;
7040
7041         return cnt;
7042 }
7043
7044 static ssize_t
7045 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7046                                 size_t cnt, loff_t *ppos)
7047 {
7048         struct trace_array *tr = filp->private_data;
7049         char buf[64];
7050         int r, cpu;
7051         unsigned long size = 0, expanded_size = 0;
7052
7053         mutex_lock(&trace_types_lock);
7054         for_each_tracing_cpu(cpu) {
7055                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7056                 if (!ring_buffer_expanded)
7057                         expanded_size += trace_buf_size >> 10;
7058         }
7059         if (ring_buffer_expanded)
7060                 r = sprintf(buf, "%lu\n", size);
7061         else
7062                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7063         mutex_unlock(&trace_types_lock);
7064
7065         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7066 }
7067
7068 static ssize_t
7069 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7070                           size_t cnt, loff_t *ppos)
7071 {
7072         /*
7073          * There is no need to read what the user has written, this function
7074          * is just to make sure that there is no error when "echo" is used
7075          */
7076
7077         *ppos += cnt;
7078
7079         return cnt;
7080 }
7081
7082 static int
7083 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7084 {
7085         struct trace_array *tr = inode->i_private;
7086
7087         /* disable tracing ? */
7088         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7089                 tracer_tracing_off(tr);
7090         /* resize the ring buffer to 0 */
7091         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7092
7093         trace_array_put(tr);
7094
7095         return 0;
7096 }
7097
7098 static ssize_t
7099 tracing_mark_write(struct file *filp, const char __user *ubuf,
7100                                         size_t cnt, loff_t *fpos)
7101 {
7102         struct trace_array *tr = filp->private_data;
7103         struct ring_buffer_event *event;
7104         enum event_trigger_type tt = ETT_NONE;
7105         struct trace_buffer *buffer;
7106         struct print_entry *entry;
7107         ssize_t written;
7108         int size;
7109         int len;
7110
7111 /* Used in tracing_mark_raw_write() as well */
7112 #define FAULTED_STR "<faulted>"
7113 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7114
7115         if (tracing_disabled)
7116                 return -EINVAL;
7117
7118         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7119                 return -EINVAL;
7120
7121         if (cnt > TRACE_BUF_SIZE)
7122                 cnt = TRACE_BUF_SIZE;
7123
7124         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7125
7126         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7127
7128         /* If less than "<faulted>", then make sure we can still add that */
7129         if (cnt < FAULTED_SIZE)
7130                 size += FAULTED_SIZE - cnt;
7131
7132         buffer = tr->array_buffer.buffer;
7133         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7134                                             tracing_gen_ctx());
7135         if (unlikely(!event))
7136                 /* Ring buffer disabled, return as if not open for write */
7137                 return -EBADF;
7138
7139         entry = ring_buffer_event_data(event);
7140         entry->ip = _THIS_IP_;
7141
7142         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7143         if (len) {
7144                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7145                 cnt = FAULTED_SIZE;
7146                 written = -EFAULT;
7147         } else
7148                 written = cnt;
7149
7150         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7151                 /* do not add \n before testing triggers, but add \0 */
7152                 entry->buf[cnt] = '\0';
7153                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7154         }
7155
7156         if (entry->buf[cnt - 1] != '\n') {
7157                 entry->buf[cnt] = '\n';
7158                 entry->buf[cnt + 1] = '\0';
7159         } else
7160                 entry->buf[cnt] = '\0';
7161
7162         if (static_branch_unlikely(&trace_marker_exports_enabled))
7163                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7164         __buffer_unlock_commit(buffer, event);
7165
7166         if (tt)
7167                 event_triggers_post_call(tr->trace_marker_file, tt);
7168
7169         return written;
7170 }
7171
7172 /* Limit it for now to 3K (including tag) */
7173 #define RAW_DATA_MAX_SIZE (1024*3)
7174
7175 static ssize_t
7176 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7177                                         size_t cnt, loff_t *fpos)
7178 {
7179         struct trace_array *tr = filp->private_data;
7180         struct ring_buffer_event *event;
7181         struct trace_buffer *buffer;
7182         struct raw_data_entry *entry;
7183         ssize_t written;
7184         int size;
7185         int len;
7186
7187 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7188
7189         if (tracing_disabled)
7190                 return -EINVAL;
7191
7192         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7193                 return -EINVAL;
7194
7195         /* The marker must at least have a tag id */
7196         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7197                 return -EINVAL;
7198
7199         if (cnt > TRACE_BUF_SIZE)
7200                 cnt = TRACE_BUF_SIZE;
7201
7202         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7203
7204         size = sizeof(*entry) + cnt;
7205         if (cnt < FAULT_SIZE_ID)
7206                 size += FAULT_SIZE_ID - cnt;
7207
7208         buffer = tr->array_buffer.buffer;
7209         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7210                                             tracing_gen_ctx());
7211         if (!event)
7212                 /* Ring buffer disabled, return as if not open for write */
7213                 return -EBADF;
7214
7215         entry = ring_buffer_event_data(event);
7216
7217         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7218         if (len) {
7219                 entry->id = -1;
7220                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7221                 written = -EFAULT;
7222         } else
7223                 written = cnt;
7224
7225         __buffer_unlock_commit(buffer, event);
7226
7227         return written;
7228 }
7229
7230 static int tracing_clock_show(struct seq_file *m, void *v)
7231 {
7232         struct trace_array *tr = m->private;
7233         int i;
7234
7235         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7236                 seq_printf(m,
7237                         "%s%s%s%s", i ? " " : "",
7238                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7239                         i == tr->clock_id ? "]" : "");
7240         seq_putc(m, '\n');
7241
7242         return 0;
7243 }
7244
7245 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7246 {
7247         int i;
7248
7249         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7250                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7251                         break;
7252         }
7253         if (i == ARRAY_SIZE(trace_clocks))
7254                 return -EINVAL;
7255
7256         mutex_lock(&trace_types_lock);
7257
7258         tr->clock_id = i;
7259
7260         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7261
7262         /*
7263          * New clock may not be consistent with the previous clock.
7264          * Reset the buffer so that it doesn't have incomparable timestamps.
7265          */
7266         tracing_reset_online_cpus(&tr->array_buffer);
7267
7268 #ifdef CONFIG_TRACER_MAX_TRACE
7269         if (tr->max_buffer.buffer)
7270                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7271         tracing_reset_online_cpus(&tr->max_buffer);
7272 #endif
7273
7274         mutex_unlock(&trace_types_lock);
7275
7276         return 0;
7277 }
7278
7279 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7280                                    size_t cnt, loff_t *fpos)
7281 {
7282         struct seq_file *m = filp->private_data;
7283         struct trace_array *tr = m->private;
7284         char buf[64];
7285         const char *clockstr;
7286         int ret;
7287
7288         if (cnt >= sizeof(buf))
7289                 return -EINVAL;
7290
7291         if (copy_from_user(buf, ubuf, cnt))
7292                 return -EFAULT;
7293
7294         buf[cnt] = 0;
7295
7296         clockstr = strstrip(buf);
7297
7298         ret = tracing_set_clock(tr, clockstr);
7299         if (ret)
7300                 return ret;
7301
7302         *fpos += cnt;
7303
7304         return cnt;
7305 }
7306
7307 static int tracing_clock_open(struct inode *inode, struct file *file)
7308 {
7309         struct trace_array *tr = inode->i_private;
7310         int ret;
7311
7312         ret = tracing_check_open_get_tr(tr);
7313         if (ret)
7314                 return ret;
7315
7316         ret = single_open(file, tracing_clock_show, inode->i_private);
7317         if (ret < 0)
7318                 trace_array_put(tr);
7319
7320         return ret;
7321 }
7322
7323 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7324 {
7325         struct trace_array *tr = m->private;
7326
7327         mutex_lock(&trace_types_lock);
7328
7329         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7330                 seq_puts(m, "delta [absolute]\n");
7331         else
7332                 seq_puts(m, "[delta] absolute\n");
7333
7334         mutex_unlock(&trace_types_lock);
7335
7336         return 0;
7337 }
7338
7339 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7340 {
7341         struct trace_array *tr = inode->i_private;
7342         int ret;
7343
7344         ret = tracing_check_open_get_tr(tr);
7345         if (ret)
7346                 return ret;
7347
7348         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7349         if (ret < 0)
7350                 trace_array_put(tr);
7351
7352         return ret;
7353 }
7354
7355 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7356 {
7357         if (rbe == this_cpu_read(trace_buffered_event))
7358                 return ring_buffer_time_stamp(buffer);
7359
7360         return ring_buffer_event_time_stamp(buffer, rbe);
7361 }
7362
7363 /*
7364  * Set or disable using the per CPU trace_buffer_event when possible.
7365  */
7366 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7367 {
7368         int ret = 0;
7369
7370         mutex_lock(&trace_types_lock);
7371
7372         if (set && tr->no_filter_buffering_ref++)
7373                 goto out;
7374
7375         if (!set) {
7376                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7377                         ret = -EINVAL;
7378                         goto out;
7379                 }
7380
7381                 --tr->no_filter_buffering_ref;
7382         }
7383  out:
7384         mutex_unlock(&trace_types_lock);
7385
7386         return ret;
7387 }
7388
7389 struct ftrace_buffer_info {
7390         struct trace_iterator   iter;
7391         void                    *spare;
7392         unsigned int            spare_cpu;
7393         unsigned int            read;
7394 };
7395
7396 #ifdef CONFIG_TRACER_SNAPSHOT
7397 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7398 {
7399         struct trace_array *tr = inode->i_private;
7400         struct trace_iterator *iter;
7401         struct seq_file *m;
7402         int ret;
7403
7404         ret = tracing_check_open_get_tr(tr);
7405         if (ret)
7406                 return ret;
7407
7408         if (file->f_mode & FMODE_READ) {
7409                 iter = __tracing_open(inode, file, true);
7410                 if (IS_ERR(iter))
7411                         ret = PTR_ERR(iter);
7412         } else {
7413                 /* Writes still need the seq_file to hold the private data */
7414                 ret = -ENOMEM;
7415                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7416                 if (!m)
7417                         goto out;
7418                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7419                 if (!iter) {
7420                         kfree(m);
7421                         goto out;
7422                 }
7423                 ret = 0;
7424
7425                 iter->tr = tr;
7426                 iter->array_buffer = &tr->max_buffer;
7427                 iter->cpu_file = tracing_get_cpu(inode);
7428                 m->private = iter;
7429                 file->private_data = m;
7430         }
7431 out:
7432         if (ret < 0)
7433                 trace_array_put(tr);
7434
7435         return ret;
7436 }
7437
7438 static ssize_t
7439 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7440                        loff_t *ppos)
7441 {
7442         struct seq_file *m = filp->private_data;
7443         struct trace_iterator *iter = m->private;
7444         struct trace_array *tr = iter->tr;
7445         unsigned long val;
7446         int ret;
7447
7448         ret = tracing_update_buffers();
7449         if (ret < 0)
7450                 return ret;
7451
7452         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7453         if (ret)
7454                 return ret;
7455
7456         mutex_lock(&trace_types_lock);
7457
7458         if (tr->current_trace->use_max_tr) {
7459                 ret = -EBUSY;
7460                 goto out;
7461         }
7462
7463         local_irq_disable();
7464         arch_spin_lock(&tr->max_lock);
7465         if (tr->cond_snapshot)
7466                 ret = -EBUSY;
7467         arch_spin_unlock(&tr->max_lock);
7468         local_irq_enable();
7469         if (ret)
7470                 goto out;
7471
7472         switch (val) {
7473         case 0:
7474                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7475                         ret = -EINVAL;
7476                         break;
7477                 }
7478                 if (tr->allocated_snapshot)
7479                         free_snapshot(tr);
7480                 break;
7481         case 1:
7482 /* Only allow per-cpu swap if the ring buffer supports it */
7483 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7484                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7485                         ret = -EINVAL;
7486                         break;
7487                 }
7488 #endif
7489                 if (tr->allocated_snapshot)
7490                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7491                                         &tr->array_buffer, iter->cpu_file);
7492                 else
7493                         ret = tracing_alloc_snapshot_instance(tr);
7494                 if (ret < 0)
7495                         break;
7496                 local_irq_disable();
7497                 /* Now, we're going to swap */
7498                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7499                         update_max_tr(tr, current, smp_processor_id(), NULL);
7500                 else
7501                         update_max_tr_single(tr, current, iter->cpu_file);
7502                 local_irq_enable();
7503                 break;
7504         default:
7505                 if (tr->allocated_snapshot) {
7506                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7507                                 tracing_reset_online_cpus(&tr->max_buffer);
7508                         else
7509                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7510                 }
7511                 break;
7512         }
7513
7514         if (ret >= 0) {
7515                 *ppos += cnt;
7516                 ret = cnt;
7517         }
7518 out:
7519         mutex_unlock(&trace_types_lock);
7520         return ret;
7521 }
7522
7523 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7524 {
7525         struct seq_file *m = file->private_data;
7526         int ret;
7527
7528         ret = tracing_release(inode, file);
7529
7530         if (file->f_mode & FMODE_READ)
7531                 return ret;
7532
7533         /* If write only, the seq_file is just a stub */
7534         if (m)
7535                 kfree(m->private);
7536         kfree(m);
7537
7538         return 0;
7539 }
7540
7541 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7542 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7543                                     size_t count, loff_t *ppos);
7544 static int tracing_buffers_release(struct inode *inode, struct file *file);
7545 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7546                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7547
7548 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7549 {
7550         struct ftrace_buffer_info *info;
7551         int ret;
7552
7553         /* The following checks for tracefs lockdown */
7554         ret = tracing_buffers_open(inode, filp);
7555         if (ret < 0)
7556                 return ret;
7557
7558         info = filp->private_data;
7559
7560         if (info->iter.trace->use_max_tr) {
7561                 tracing_buffers_release(inode, filp);
7562                 return -EBUSY;
7563         }
7564
7565         info->iter.snapshot = true;
7566         info->iter.array_buffer = &info->iter.tr->max_buffer;
7567
7568         return ret;
7569 }
7570
7571 #endif /* CONFIG_TRACER_SNAPSHOT */
7572
7573
7574 static const struct file_operations tracing_thresh_fops = {
7575         .open           = tracing_open_generic,
7576         .read           = tracing_thresh_read,
7577         .write          = tracing_thresh_write,
7578         .llseek         = generic_file_llseek,
7579 };
7580
7581 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7582 static const struct file_operations tracing_max_lat_fops = {
7583         .open           = tracing_open_generic,
7584         .read           = tracing_max_lat_read,
7585         .write          = tracing_max_lat_write,
7586         .llseek         = generic_file_llseek,
7587 };
7588 #endif
7589
7590 static const struct file_operations set_tracer_fops = {
7591         .open           = tracing_open_generic,
7592         .read           = tracing_set_trace_read,
7593         .write          = tracing_set_trace_write,
7594         .llseek         = generic_file_llseek,
7595 };
7596
7597 static const struct file_operations tracing_pipe_fops = {
7598         .open           = tracing_open_pipe,
7599         .poll           = tracing_poll_pipe,
7600         .read           = tracing_read_pipe,
7601         .splice_read    = tracing_splice_read_pipe,
7602         .release        = tracing_release_pipe,
7603         .llseek         = no_llseek,
7604 };
7605
7606 static const struct file_operations tracing_entries_fops = {
7607         .open           = tracing_open_generic_tr,
7608         .read           = tracing_entries_read,
7609         .write          = tracing_entries_write,
7610         .llseek         = generic_file_llseek,
7611         .release        = tracing_release_generic_tr,
7612 };
7613
7614 static const struct file_operations tracing_total_entries_fops = {
7615         .open           = tracing_open_generic_tr,
7616         .read           = tracing_total_entries_read,
7617         .llseek         = generic_file_llseek,
7618         .release        = tracing_release_generic_tr,
7619 };
7620
7621 static const struct file_operations tracing_free_buffer_fops = {
7622         .open           = tracing_open_generic_tr,
7623         .write          = tracing_free_buffer_write,
7624         .release        = tracing_free_buffer_release,
7625 };
7626
7627 static const struct file_operations tracing_mark_fops = {
7628         .open           = tracing_mark_open,
7629         .write          = tracing_mark_write,
7630         .release        = tracing_release_generic_tr,
7631 };
7632
7633 static const struct file_operations tracing_mark_raw_fops = {
7634         .open           = tracing_mark_open,
7635         .write          = tracing_mark_raw_write,
7636         .release        = tracing_release_generic_tr,
7637 };
7638
7639 static const struct file_operations trace_clock_fops = {
7640         .open           = tracing_clock_open,
7641         .read           = seq_read,
7642         .llseek         = seq_lseek,
7643         .release        = tracing_single_release_tr,
7644         .write          = tracing_clock_write,
7645 };
7646
7647 static const struct file_operations trace_time_stamp_mode_fops = {
7648         .open           = tracing_time_stamp_mode_open,
7649         .read           = seq_read,
7650         .llseek         = seq_lseek,
7651         .release        = tracing_single_release_tr,
7652 };
7653
7654 #ifdef CONFIG_TRACER_SNAPSHOT
7655 static const struct file_operations snapshot_fops = {
7656         .open           = tracing_snapshot_open,
7657         .read           = seq_read,
7658         .write          = tracing_snapshot_write,
7659         .llseek         = tracing_lseek,
7660         .release        = tracing_snapshot_release,
7661 };
7662
7663 static const struct file_operations snapshot_raw_fops = {
7664         .open           = snapshot_raw_open,
7665         .read           = tracing_buffers_read,
7666         .release        = tracing_buffers_release,
7667         .splice_read    = tracing_buffers_splice_read,
7668         .llseek         = no_llseek,
7669 };
7670
7671 #endif /* CONFIG_TRACER_SNAPSHOT */
7672
7673 /*
7674  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7675  * @filp: The active open file structure
7676  * @ubuf: The userspace provided buffer to read value into
7677  * @cnt: The maximum number of bytes to read
7678  * @ppos: The current "file" position
7679  *
7680  * This function implements the write interface for a struct trace_min_max_param.
7681  * The filp->private_data must point to a trace_min_max_param structure that
7682  * defines where to write the value, the min and the max acceptable values,
7683  * and a lock to protect the write.
7684  */
7685 static ssize_t
7686 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7687 {
7688         struct trace_min_max_param *param = filp->private_data;
7689         u64 val;
7690         int err;
7691
7692         if (!param)
7693                 return -EFAULT;
7694
7695         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7696         if (err)
7697                 return err;
7698
7699         if (param->lock)
7700                 mutex_lock(param->lock);
7701
7702         if (param->min && val < *param->min)
7703                 err = -EINVAL;
7704
7705         if (param->max && val > *param->max)
7706                 err = -EINVAL;
7707
7708         if (!err)
7709                 *param->val = val;
7710
7711         if (param->lock)
7712                 mutex_unlock(param->lock);
7713
7714         if (err)
7715                 return err;
7716
7717         return cnt;
7718 }
7719
7720 /*
7721  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7722  * @filp: The active open file structure
7723  * @ubuf: The userspace provided buffer to read value into
7724  * @cnt: The maximum number of bytes to read
7725  * @ppos: The current "file" position
7726  *
7727  * This function implements the read interface for a struct trace_min_max_param.
7728  * The filp->private_data must point to a trace_min_max_param struct with valid
7729  * data.
7730  */
7731 static ssize_t
7732 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7733 {
7734         struct trace_min_max_param *param = filp->private_data;
7735         char buf[U64_STR_SIZE];
7736         int len;
7737         u64 val;
7738
7739         if (!param)
7740                 return -EFAULT;
7741
7742         val = *param->val;
7743
7744         if (cnt > sizeof(buf))
7745                 cnt = sizeof(buf);
7746
7747         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7748
7749         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7750 }
7751
7752 const struct file_operations trace_min_max_fops = {
7753         .open           = tracing_open_generic,
7754         .read           = trace_min_max_read,
7755         .write          = trace_min_max_write,
7756 };
7757
7758 #define TRACING_LOG_ERRS_MAX    8
7759 #define TRACING_LOG_LOC_MAX     128
7760
7761 #define CMD_PREFIX "  Command: "
7762
7763 struct err_info {
7764         const char      **errs; /* ptr to loc-specific array of err strings */
7765         u8              type;   /* index into errs -> specific err string */
7766         u16             pos;    /* caret position */
7767         u64             ts;
7768 };
7769
7770 struct tracing_log_err {
7771         struct list_head        list;
7772         struct err_info         info;
7773         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7774         char                    *cmd;                     /* what caused err */
7775 };
7776
7777 static DEFINE_MUTEX(tracing_err_log_lock);
7778
7779 static struct tracing_log_err *alloc_tracing_log_err(int len)
7780 {
7781         struct tracing_log_err *err;
7782
7783         err = kzalloc(sizeof(*err), GFP_KERNEL);
7784         if (!err)
7785                 return ERR_PTR(-ENOMEM);
7786
7787         err->cmd = kzalloc(len, GFP_KERNEL);
7788         if (!err->cmd) {
7789                 kfree(err);
7790                 return ERR_PTR(-ENOMEM);
7791         }
7792
7793         return err;
7794 }
7795
7796 static void free_tracing_log_err(struct tracing_log_err *err)
7797 {
7798         kfree(err->cmd);
7799         kfree(err);
7800 }
7801
7802 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7803                                                    int len)
7804 {
7805         struct tracing_log_err *err;
7806         char *cmd;
7807
7808         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7809                 err = alloc_tracing_log_err(len);
7810                 if (PTR_ERR(err) != -ENOMEM)
7811                         tr->n_err_log_entries++;
7812
7813                 return err;
7814         }
7815         cmd = kzalloc(len, GFP_KERNEL);
7816         if (!cmd)
7817                 return ERR_PTR(-ENOMEM);
7818         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7819         kfree(err->cmd);
7820         err->cmd = cmd;
7821         list_del(&err->list);
7822
7823         return err;
7824 }
7825
7826 /**
7827  * err_pos - find the position of a string within a command for error careting
7828  * @cmd: The tracing command that caused the error
7829  * @str: The string to position the caret at within @cmd
7830  *
7831  * Finds the position of the first occurrence of @str within @cmd.  The
7832  * return value can be passed to tracing_log_err() for caret placement
7833  * within @cmd.
7834  *
7835  * Returns the index within @cmd of the first occurrence of @str or 0
7836  * if @str was not found.
7837  */
7838 unsigned int err_pos(char *cmd, const char *str)
7839 {
7840         char *found;
7841
7842         if (WARN_ON(!strlen(cmd)))
7843                 return 0;
7844
7845         found = strstr(cmd, str);
7846         if (found)
7847                 return found - cmd;
7848
7849         return 0;
7850 }
7851
7852 /**
7853  * tracing_log_err - write an error to the tracing error log
7854  * @tr: The associated trace array for the error (NULL for top level array)
7855  * @loc: A string describing where the error occurred
7856  * @cmd: The tracing command that caused the error
7857  * @errs: The array of loc-specific static error strings
7858  * @type: The index into errs[], which produces the specific static err string
7859  * @pos: The position the caret should be placed in the cmd
7860  *
7861  * Writes an error into tracing/error_log of the form:
7862  *
7863  * <loc>: error: <text>
7864  *   Command: <cmd>
7865  *              ^
7866  *
7867  * tracing/error_log is a small log file containing the last
7868  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7869  * unless there has been a tracing error, and the error log can be
7870  * cleared and have its memory freed by writing the empty string in
7871  * truncation mode to it i.e. echo > tracing/error_log.
7872  *
7873  * NOTE: the @errs array along with the @type param are used to
7874  * produce a static error string - this string is not copied and saved
7875  * when the error is logged - only a pointer to it is saved.  See
7876  * existing callers for examples of how static strings are typically
7877  * defined for use with tracing_log_err().
7878  */
7879 void tracing_log_err(struct trace_array *tr,
7880                      const char *loc, const char *cmd,
7881                      const char **errs, u8 type, u16 pos)
7882 {
7883         struct tracing_log_err *err;
7884         int len = 0;
7885
7886         if (!tr)
7887                 tr = &global_trace;
7888
7889         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7890
7891         mutex_lock(&tracing_err_log_lock);
7892         err = get_tracing_log_err(tr, len);
7893         if (PTR_ERR(err) == -ENOMEM) {
7894                 mutex_unlock(&tracing_err_log_lock);
7895                 return;
7896         }
7897
7898         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7899         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7900
7901         err->info.errs = errs;
7902         err->info.type = type;
7903         err->info.pos = pos;
7904         err->info.ts = local_clock();
7905
7906         list_add_tail(&err->list, &tr->err_log);
7907         mutex_unlock(&tracing_err_log_lock);
7908 }
7909
7910 static void clear_tracing_err_log(struct trace_array *tr)
7911 {
7912         struct tracing_log_err *err, *next;
7913
7914         mutex_lock(&tracing_err_log_lock);
7915         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7916                 list_del(&err->list);
7917                 free_tracing_log_err(err);
7918         }
7919
7920         tr->n_err_log_entries = 0;
7921         mutex_unlock(&tracing_err_log_lock);
7922 }
7923
7924 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7925 {
7926         struct trace_array *tr = m->private;
7927
7928         mutex_lock(&tracing_err_log_lock);
7929
7930         return seq_list_start(&tr->err_log, *pos);
7931 }
7932
7933 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7934 {
7935         struct trace_array *tr = m->private;
7936
7937         return seq_list_next(v, &tr->err_log, pos);
7938 }
7939
7940 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7941 {
7942         mutex_unlock(&tracing_err_log_lock);
7943 }
7944
7945 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7946 {
7947         u16 i;
7948
7949         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7950                 seq_putc(m, ' ');
7951         for (i = 0; i < pos; i++)
7952                 seq_putc(m, ' ');
7953         seq_puts(m, "^\n");
7954 }
7955
7956 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7957 {
7958         struct tracing_log_err *err = v;
7959
7960         if (err) {
7961                 const char *err_text = err->info.errs[err->info.type];
7962                 u64 sec = err->info.ts;
7963                 u32 nsec;
7964
7965                 nsec = do_div(sec, NSEC_PER_SEC);
7966                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7967                            err->loc, err_text);
7968                 seq_printf(m, "%s", err->cmd);
7969                 tracing_err_log_show_pos(m, err->info.pos);
7970         }
7971
7972         return 0;
7973 }
7974
7975 static const struct seq_operations tracing_err_log_seq_ops = {
7976         .start  = tracing_err_log_seq_start,
7977         .next   = tracing_err_log_seq_next,
7978         .stop   = tracing_err_log_seq_stop,
7979         .show   = tracing_err_log_seq_show
7980 };
7981
7982 static int tracing_err_log_open(struct inode *inode, struct file *file)
7983 {
7984         struct trace_array *tr = inode->i_private;
7985         int ret = 0;
7986
7987         ret = tracing_check_open_get_tr(tr);
7988         if (ret)
7989                 return ret;
7990
7991         /* If this file was opened for write, then erase contents */
7992         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7993                 clear_tracing_err_log(tr);
7994
7995         if (file->f_mode & FMODE_READ) {
7996                 ret = seq_open(file, &tracing_err_log_seq_ops);
7997                 if (!ret) {
7998                         struct seq_file *m = file->private_data;
7999                         m->private = tr;
8000                 } else {
8001                         trace_array_put(tr);
8002                 }
8003         }
8004         return ret;
8005 }
8006
8007 static ssize_t tracing_err_log_write(struct file *file,
8008                                      const char __user *buffer,
8009                                      size_t count, loff_t *ppos)
8010 {
8011         return count;
8012 }
8013
8014 static int tracing_err_log_release(struct inode *inode, struct file *file)
8015 {
8016         struct trace_array *tr = inode->i_private;
8017
8018         trace_array_put(tr);
8019
8020         if (file->f_mode & FMODE_READ)
8021                 seq_release(inode, file);
8022
8023         return 0;
8024 }
8025
8026 static const struct file_operations tracing_err_log_fops = {
8027         .open           = tracing_err_log_open,
8028         .write          = tracing_err_log_write,
8029         .read           = seq_read,
8030         .llseek         = seq_lseek,
8031         .release        = tracing_err_log_release,
8032 };
8033
8034 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8035 {
8036         struct trace_array *tr = inode->i_private;
8037         struct ftrace_buffer_info *info;
8038         int ret;
8039
8040         ret = tracing_check_open_get_tr(tr);
8041         if (ret)
8042                 return ret;
8043
8044         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8045         if (!info) {
8046                 trace_array_put(tr);
8047                 return -ENOMEM;
8048         }
8049
8050         mutex_lock(&trace_types_lock);
8051
8052         info->iter.tr           = tr;
8053         info->iter.cpu_file     = tracing_get_cpu(inode);
8054         info->iter.trace        = tr->current_trace;
8055         info->iter.array_buffer = &tr->array_buffer;
8056         info->spare             = NULL;
8057         /* Force reading ring buffer for first read */
8058         info->read              = (unsigned int)-1;
8059
8060         filp->private_data = info;
8061
8062         tr->trace_ref++;
8063
8064         mutex_unlock(&trace_types_lock);
8065
8066         ret = nonseekable_open(inode, filp);
8067         if (ret < 0)
8068                 trace_array_put(tr);
8069
8070         return ret;
8071 }
8072
8073 static __poll_t
8074 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8075 {
8076         struct ftrace_buffer_info *info = filp->private_data;
8077         struct trace_iterator *iter = &info->iter;
8078
8079         return trace_poll(iter, filp, poll_table);
8080 }
8081
8082 static ssize_t
8083 tracing_buffers_read(struct file *filp, char __user *ubuf,
8084                      size_t count, loff_t *ppos)
8085 {
8086         struct ftrace_buffer_info *info = filp->private_data;
8087         struct trace_iterator *iter = &info->iter;
8088         ssize_t ret = 0;
8089         ssize_t size;
8090
8091         if (!count)
8092                 return 0;
8093
8094 #ifdef CONFIG_TRACER_MAX_TRACE
8095         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8096                 return -EBUSY;
8097 #endif
8098
8099         if (!info->spare) {
8100                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8101                                                           iter->cpu_file);
8102                 if (IS_ERR(info->spare)) {
8103                         ret = PTR_ERR(info->spare);
8104                         info->spare = NULL;
8105                 } else {
8106                         info->spare_cpu = iter->cpu_file;
8107                 }
8108         }
8109         if (!info->spare)
8110                 return ret;
8111
8112         /* Do we have previous read data to read? */
8113         if (info->read < PAGE_SIZE)
8114                 goto read;
8115
8116  again:
8117         trace_access_lock(iter->cpu_file);
8118         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8119                                     &info->spare,
8120                                     count,
8121                                     iter->cpu_file, 0);
8122         trace_access_unlock(iter->cpu_file);
8123
8124         if (ret < 0) {
8125                 if (trace_empty(iter)) {
8126                         if ((filp->f_flags & O_NONBLOCK))
8127                                 return -EAGAIN;
8128
8129                         ret = wait_on_pipe(iter, 0);
8130                         if (ret)
8131                                 return ret;
8132
8133                         goto again;
8134                 }
8135                 return 0;
8136         }
8137
8138         info->read = 0;
8139  read:
8140         size = PAGE_SIZE - info->read;
8141         if (size > count)
8142                 size = count;
8143
8144         ret = copy_to_user(ubuf, info->spare + info->read, size);
8145         if (ret == size)
8146                 return -EFAULT;
8147
8148         size -= ret;
8149
8150         *ppos += size;
8151         info->read += size;
8152
8153         return size;
8154 }
8155
8156 static int tracing_buffers_release(struct inode *inode, struct file *file)
8157 {
8158         struct ftrace_buffer_info *info = file->private_data;
8159         struct trace_iterator *iter = &info->iter;
8160
8161         mutex_lock(&trace_types_lock);
8162
8163         iter->tr->trace_ref--;
8164
8165         __trace_array_put(iter->tr);
8166
8167         iter->wait_index++;
8168         /* Make sure the waiters see the new wait_index */
8169         smp_wmb();
8170
8171         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8172
8173         if (info->spare)
8174                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8175                                            info->spare_cpu, info->spare);
8176         kvfree(info);
8177
8178         mutex_unlock(&trace_types_lock);
8179
8180         return 0;
8181 }
8182
8183 struct buffer_ref {
8184         struct trace_buffer     *buffer;
8185         void                    *page;
8186         int                     cpu;
8187         refcount_t              refcount;
8188 };
8189
8190 static void buffer_ref_release(struct buffer_ref *ref)
8191 {
8192         if (!refcount_dec_and_test(&ref->refcount))
8193                 return;
8194         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8195         kfree(ref);
8196 }
8197
8198 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8199                                     struct pipe_buffer *buf)
8200 {
8201         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8202
8203         buffer_ref_release(ref);
8204         buf->private = 0;
8205 }
8206
8207 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8208                                 struct pipe_buffer *buf)
8209 {
8210         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8211
8212         if (refcount_read(&ref->refcount) > INT_MAX/2)
8213                 return false;
8214
8215         refcount_inc(&ref->refcount);
8216         return true;
8217 }
8218
8219 /* Pipe buffer operations for a buffer. */
8220 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8221         .release                = buffer_pipe_buf_release,
8222         .get                    = buffer_pipe_buf_get,
8223 };
8224
8225 /*
8226  * Callback from splice_to_pipe(), if we need to release some pages
8227  * at the end of the spd in case we error'ed out in filling the pipe.
8228  */
8229 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8230 {
8231         struct buffer_ref *ref =
8232                 (struct buffer_ref *)spd->partial[i].private;
8233
8234         buffer_ref_release(ref);
8235         spd->partial[i].private = 0;
8236 }
8237
8238 static ssize_t
8239 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8240                             struct pipe_inode_info *pipe, size_t len,
8241                             unsigned int flags)
8242 {
8243         struct ftrace_buffer_info *info = file->private_data;
8244         struct trace_iterator *iter = &info->iter;
8245         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8246         struct page *pages_def[PIPE_DEF_BUFFERS];
8247         struct splice_pipe_desc spd = {
8248                 .pages          = pages_def,
8249                 .partial        = partial_def,
8250                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8251                 .ops            = &buffer_pipe_buf_ops,
8252                 .spd_release    = buffer_spd_release,
8253         };
8254         struct buffer_ref *ref;
8255         int entries, i;
8256         ssize_t ret = 0;
8257
8258 #ifdef CONFIG_TRACER_MAX_TRACE
8259         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8260                 return -EBUSY;
8261 #endif
8262
8263         if (*ppos & (PAGE_SIZE - 1))
8264                 return -EINVAL;
8265
8266         if (len & (PAGE_SIZE - 1)) {
8267                 if (len < PAGE_SIZE)
8268                         return -EINVAL;
8269                 len &= PAGE_MASK;
8270         }
8271
8272         if (splice_grow_spd(pipe, &spd))
8273                 return -ENOMEM;
8274
8275  again:
8276         trace_access_lock(iter->cpu_file);
8277         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8278
8279         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8280                 struct page *page;
8281                 int r;
8282
8283                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8284                 if (!ref) {
8285                         ret = -ENOMEM;
8286                         break;
8287                 }
8288
8289                 refcount_set(&ref->refcount, 1);
8290                 ref->buffer = iter->array_buffer->buffer;
8291                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8292                 if (IS_ERR(ref->page)) {
8293                         ret = PTR_ERR(ref->page);
8294                         ref->page = NULL;
8295                         kfree(ref);
8296                         break;
8297                 }
8298                 ref->cpu = iter->cpu_file;
8299
8300                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8301                                           len, iter->cpu_file, 1);
8302                 if (r < 0) {
8303                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8304                                                    ref->page);
8305                         kfree(ref);
8306                         break;
8307                 }
8308
8309                 page = virt_to_page(ref->page);
8310
8311                 spd.pages[i] = page;
8312                 spd.partial[i].len = PAGE_SIZE;
8313                 spd.partial[i].offset = 0;
8314                 spd.partial[i].private = (unsigned long)ref;
8315                 spd.nr_pages++;
8316                 *ppos += PAGE_SIZE;
8317
8318                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8319         }
8320
8321         trace_access_unlock(iter->cpu_file);
8322         spd.nr_pages = i;
8323
8324         /* did we read anything? */
8325         if (!spd.nr_pages) {
8326                 long wait_index;
8327
8328                 if (ret)
8329                         goto out;
8330
8331                 ret = -EAGAIN;
8332                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8333                         goto out;
8334
8335                 wait_index = READ_ONCE(iter->wait_index);
8336
8337                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8338                 if (ret)
8339                         goto out;
8340
8341                 /* No need to wait after waking up when tracing is off */
8342                 if (!tracer_tracing_is_on(iter->tr))
8343                         goto out;
8344
8345                 /* Make sure we see the new wait_index */
8346                 smp_rmb();
8347                 if (wait_index != iter->wait_index)
8348                         goto out;
8349
8350                 goto again;
8351         }
8352
8353         ret = splice_to_pipe(pipe, &spd);
8354 out:
8355         splice_shrink_spd(&spd);
8356
8357         return ret;
8358 }
8359
8360 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8361 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8362 {
8363         struct ftrace_buffer_info *info = file->private_data;
8364         struct trace_iterator *iter = &info->iter;
8365
8366         if (cmd)
8367                 return -ENOIOCTLCMD;
8368
8369         mutex_lock(&trace_types_lock);
8370
8371         iter->wait_index++;
8372         /* Make sure the waiters see the new wait_index */
8373         smp_wmb();
8374
8375         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8376
8377         mutex_unlock(&trace_types_lock);
8378         return 0;
8379 }
8380
8381 static const struct file_operations tracing_buffers_fops = {
8382         .open           = tracing_buffers_open,
8383         .read           = tracing_buffers_read,
8384         .poll           = tracing_buffers_poll,
8385         .release        = tracing_buffers_release,
8386         .splice_read    = tracing_buffers_splice_read,
8387         .unlocked_ioctl = tracing_buffers_ioctl,
8388         .llseek         = no_llseek,
8389 };
8390
8391 static ssize_t
8392 tracing_stats_read(struct file *filp, char __user *ubuf,
8393                    size_t count, loff_t *ppos)
8394 {
8395         struct inode *inode = file_inode(filp);
8396         struct trace_array *tr = inode->i_private;
8397         struct array_buffer *trace_buf = &tr->array_buffer;
8398         int cpu = tracing_get_cpu(inode);
8399         struct trace_seq *s;
8400         unsigned long cnt;
8401         unsigned long long t;
8402         unsigned long usec_rem;
8403
8404         s = kmalloc(sizeof(*s), GFP_KERNEL);
8405         if (!s)
8406                 return -ENOMEM;
8407
8408         trace_seq_init(s);
8409
8410         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8411         trace_seq_printf(s, "entries: %ld\n", cnt);
8412
8413         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8414         trace_seq_printf(s, "overrun: %ld\n", cnt);
8415
8416         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8417         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8418
8419         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8420         trace_seq_printf(s, "bytes: %ld\n", cnt);
8421
8422         if (trace_clocks[tr->clock_id].in_ns) {
8423                 /* local or global for trace_clock */
8424                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8425                 usec_rem = do_div(t, USEC_PER_SEC);
8426                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8427                                                                 t, usec_rem);
8428
8429                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8430                 usec_rem = do_div(t, USEC_PER_SEC);
8431                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8432         } else {
8433                 /* counter or tsc mode for trace_clock */
8434                 trace_seq_printf(s, "oldest event ts: %llu\n",
8435                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8436
8437                 trace_seq_printf(s, "now ts: %llu\n",
8438                                 ring_buffer_time_stamp(trace_buf->buffer));
8439         }
8440
8441         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8442         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8443
8444         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8445         trace_seq_printf(s, "read events: %ld\n", cnt);
8446
8447         count = simple_read_from_buffer(ubuf, count, ppos,
8448                                         s->buffer, trace_seq_used(s));
8449
8450         kfree(s);
8451
8452         return count;
8453 }
8454
8455 static const struct file_operations tracing_stats_fops = {
8456         .open           = tracing_open_generic_tr,
8457         .read           = tracing_stats_read,
8458         .llseek         = generic_file_llseek,
8459         .release        = tracing_release_generic_tr,
8460 };
8461
8462 #ifdef CONFIG_DYNAMIC_FTRACE
8463
8464 static ssize_t
8465 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8466                   size_t cnt, loff_t *ppos)
8467 {
8468         ssize_t ret;
8469         char *buf;
8470         int r;
8471
8472         /* 256 should be plenty to hold the amount needed */
8473         buf = kmalloc(256, GFP_KERNEL);
8474         if (!buf)
8475                 return -ENOMEM;
8476
8477         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8478                       ftrace_update_tot_cnt,
8479                       ftrace_number_of_pages,
8480                       ftrace_number_of_groups);
8481
8482         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8483         kfree(buf);
8484         return ret;
8485 }
8486
8487 static const struct file_operations tracing_dyn_info_fops = {
8488         .open           = tracing_open_generic,
8489         .read           = tracing_read_dyn_info,
8490         .llseek         = generic_file_llseek,
8491 };
8492 #endif /* CONFIG_DYNAMIC_FTRACE */
8493
8494 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8495 static void
8496 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8497                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8498                 void *data)
8499 {
8500         tracing_snapshot_instance(tr);
8501 }
8502
8503 static void
8504 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8505                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8506                       void *data)
8507 {
8508         struct ftrace_func_mapper *mapper = data;
8509         long *count = NULL;
8510
8511         if (mapper)
8512                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8513
8514         if (count) {
8515
8516                 if (*count <= 0)
8517                         return;
8518
8519                 (*count)--;
8520         }
8521
8522         tracing_snapshot_instance(tr);
8523 }
8524
8525 static int
8526 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8527                       struct ftrace_probe_ops *ops, void *data)
8528 {
8529         struct ftrace_func_mapper *mapper = data;
8530         long *count = NULL;
8531
8532         seq_printf(m, "%ps:", (void *)ip);
8533
8534         seq_puts(m, "snapshot");
8535
8536         if (mapper)
8537                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8538
8539         if (count)
8540                 seq_printf(m, ":count=%ld\n", *count);
8541         else
8542                 seq_puts(m, ":unlimited\n");
8543
8544         return 0;
8545 }
8546
8547 static int
8548 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8549                      unsigned long ip, void *init_data, void **data)
8550 {
8551         struct ftrace_func_mapper *mapper = *data;
8552
8553         if (!mapper) {
8554                 mapper = allocate_ftrace_func_mapper();
8555                 if (!mapper)
8556                         return -ENOMEM;
8557                 *data = mapper;
8558         }
8559
8560         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8561 }
8562
8563 static void
8564 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8565                      unsigned long ip, void *data)
8566 {
8567         struct ftrace_func_mapper *mapper = data;
8568
8569         if (!ip) {
8570                 if (!mapper)
8571                         return;
8572                 free_ftrace_func_mapper(mapper, NULL);
8573                 return;
8574         }
8575
8576         ftrace_func_mapper_remove_ip(mapper, ip);
8577 }
8578
8579 static struct ftrace_probe_ops snapshot_probe_ops = {
8580         .func                   = ftrace_snapshot,
8581         .print                  = ftrace_snapshot_print,
8582 };
8583
8584 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8585         .func                   = ftrace_count_snapshot,
8586         .print                  = ftrace_snapshot_print,
8587         .init                   = ftrace_snapshot_init,
8588         .free                   = ftrace_snapshot_free,
8589 };
8590
8591 static int
8592 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8593                                char *glob, char *cmd, char *param, int enable)
8594 {
8595         struct ftrace_probe_ops *ops;
8596         void *count = (void *)-1;
8597         char *number;
8598         int ret;
8599
8600         if (!tr)
8601                 return -ENODEV;
8602
8603         /* hash funcs only work with set_ftrace_filter */
8604         if (!enable)
8605                 return -EINVAL;
8606
8607         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8608
8609         if (glob[0] == '!')
8610                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8611
8612         if (!param)
8613                 goto out_reg;
8614
8615         number = strsep(&param, ":");
8616
8617         if (!strlen(number))
8618                 goto out_reg;
8619
8620         /*
8621          * We use the callback data field (which is a pointer)
8622          * as our counter.
8623          */
8624         ret = kstrtoul(number, 0, (unsigned long *)&count);
8625         if (ret)
8626                 return ret;
8627
8628  out_reg:
8629         ret = tracing_alloc_snapshot_instance(tr);
8630         if (ret < 0)
8631                 goto out;
8632
8633         ret = register_ftrace_function_probe(glob, tr, ops, count);
8634
8635  out:
8636         return ret < 0 ? ret : 0;
8637 }
8638
8639 static struct ftrace_func_command ftrace_snapshot_cmd = {
8640         .name                   = "snapshot",
8641         .func                   = ftrace_trace_snapshot_callback,
8642 };
8643
8644 static __init int register_snapshot_cmd(void)
8645 {
8646         return register_ftrace_command(&ftrace_snapshot_cmd);
8647 }
8648 #else
8649 static inline __init int register_snapshot_cmd(void) { return 0; }
8650 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8651
8652 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8653 {
8654         if (WARN_ON(!tr->dir))
8655                 return ERR_PTR(-ENODEV);
8656
8657         /* Top directory uses NULL as the parent */
8658         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8659                 return NULL;
8660
8661         /* All sub buffers have a descriptor */
8662         return tr->dir;
8663 }
8664
8665 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8666 {
8667         struct dentry *d_tracer;
8668
8669         if (tr->percpu_dir)
8670                 return tr->percpu_dir;
8671
8672         d_tracer = tracing_get_dentry(tr);
8673         if (IS_ERR(d_tracer))
8674                 return NULL;
8675
8676         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8677
8678         MEM_FAIL(!tr->percpu_dir,
8679                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8680
8681         return tr->percpu_dir;
8682 }
8683
8684 static struct dentry *
8685 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8686                       void *data, long cpu, const struct file_operations *fops)
8687 {
8688         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8689
8690         if (ret) /* See tracing_get_cpu() */
8691                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8692         return ret;
8693 }
8694
8695 static void
8696 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8697 {
8698         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8699         struct dentry *d_cpu;
8700         char cpu_dir[30]; /* 30 characters should be more than enough */
8701
8702         if (!d_percpu)
8703                 return;
8704
8705         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8706         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8707         if (!d_cpu) {
8708                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8709                 return;
8710         }
8711
8712         /* per cpu trace_pipe */
8713         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8714                                 tr, cpu, &tracing_pipe_fops);
8715
8716         /* per cpu trace */
8717         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8718                                 tr, cpu, &tracing_fops);
8719
8720         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8721                                 tr, cpu, &tracing_buffers_fops);
8722
8723         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8724                                 tr, cpu, &tracing_stats_fops);
8725
8726         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8727                                 tr, cpu, &tracing_entries_fops);
8728
8729 #ifdef CONFIG_TRACER_SNAPSHOT
8730         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8731                                 tr, cpu, &snapshot_fops);
8732
8733         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8734                                 tr, cpu, &snapshot_raw_fops);
8735 #endif
8736 }
8737
8738 #ifdef CONFIG_FTRACE_SELFTEST
8739 /* Let selftest have access to static functions in this file */
8740 #include "trace_selftest.c"
8741 #endif
8742
8743 static ssize_t
8744 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8745                         loff_t *ppos)
8746 {
8747         struct trace_option_dentry *topt = filp->private_data;
8748         char *buf;
8749
8750         if (topt->flags->val & topt->opt->bit)
8751                 buf = "1\n";
8752         else
8753                 buf = "0\n";
8754
8755         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8756 }
8757
8758 static ssize_t
8759 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8760                          loff_t *ppos)
8761 {
8762         struct trace_option_dentry *topt = filp->private_data;
8763         unsigned long val;
8764         int ret;
8765
8766         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8767         if (ret)
8768                 return ret;
8769
8770         if (val != 0 && val != 1)
8771                 return -EINVAL;
8772
8773         if (!!(topt->flags->val & topt->opt->bit) != val) {
8774                 mutex_lock(&trace_types_lock);
8775                 ret = __set_tracer_option(topt->tr, topt->flags,
8776                                           topt->opt, !val);
8777                 mutex_unlock(&trace_types_lock);
8778                 if (ret)
8779                         return ret;
8780         }
8781
8782         *ppos += cnt;
8783
8784         return cnt;
8785 }
8786
8787
8788 static const struct file_operations trace_options_fops = {
8789         .open = tracing_open_generic,
8790         .read = trace_options_read,
8791         .write = trace_options_write,
8792         .llseek = generic_file_llseek,
8793 };
8794
8795 /*
8796  * In order to pass in both the trace_array descriptor as well as the index
8797  * to the flag that the trace option file represents, the trace_array
8798  * has a character array of trace_flags_index[], which holds the index
8799  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8800  * The address of this character array is passed to the flag option file
8801  * read/write callbacks.
8802  *
8803  * In order to extract both the index and the trace_array descriptor,
8804  * get_tr_index() uses the following algorithm.
8805  *
8806  *   idx = *ptr;
8807  *
8808  * As the pointer itself contains the address of the index (remember
8809  * index[1] == 1).
8810  *
8811  * Then to get the trace_array descriptor, by subtracting that index
8812  * from the ptr, we get to the start of the index itself.
8813  *
8814  *   ptr - idx == &index[0]
8815  *
8816  * Then a simple container_of() from that pointer gets us to the
8817  * trace_array descriptor.
8818  */
8819 static void get_tr_index(void *data, struct trace_array **ptr,
8820                          unsigned int *pindex)
8821 {
8822         *pindex = *(unsigned char *)data;
8823
8824         *ptr = container_of(data - *pindex, struct trace_array,
8825                             trace_flags_index);
8826 }
8827
8828 static ssize_t
8829 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8830                         loff_t *ppos)
8831 {
8832         void *tr_index = filp->private_data;
8833         struct trace_array *tr;
8834         unsigned int index;
8835         char *buf;
8836
8837         get_tr_index(tr_index, &tr, &index);
8838
8839         if (tr->trace_flags & (1 << index))
8840                 buf = "1\n";
8841         else
8842                 buf = "0\n";
8843
8844         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8845 }
8846
8847 static ssize_t
8848 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8849                          loff_t *ppos)
8850 {
8851         void *tr_index = filp->private_data;
8852         struct trace_array *tr;
8853         unsigned int index;
8854         unsigned long val;
8855         int ret;
8856
8857         get_tr_index(tr_index, &tr, &index);
8858
8859         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8860         if (ret)
8861                 return ret;
8862
8863         if (val != 0 && val != 1)
8864                 return -EINVAL;
8865
8866         mutex_lock(&event_mutex);
8867         mutex_lock(&trace_types_lock);
8868         ret = set_tracer_flag(tr, 1 << index, val);
8869         mutex_unlock(&trace_types_lock);
8870         mutex_unlock(&event_mutex);
8871
8872         if (ret < 0)
8873                 return ret;
8874
8875         *ppos += cnt;
8876
8877         return cnt;
8878 }
8879
8880 static const struct file_operations trace_options_core_fops = {
8881         .open = tracing_open_generic,
8882         .read = trace_options_core_read,
8883         .write = trace_options_core_write,
8884         .llseek = generic_file_llseek,
8885 };
8886
8887 struct dentry *trace_create_file(const char *name,
8888                                  umode_t mode,
8889                                  struct dentry *parent,
8890                                  void *data,
8891                                  const struct file_operations *fops)
8892 {
8893         struct dentry *ret;
8894
8895         ret = tracefs_create_file(name, mode, parent, data, fops);
8896         if (!ret)
8897                 pr_warn("Could not create tracefs '%s' entry\n", name);
8898
8899         return ret;
8900 }
8901
8902
8903 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8904 {
8905         struct dentry *d_tracer;
8906
8907         if (tr->options)
8908                 return tr->options;
8909
8910         d_tracer = tracing_get_dentry(tr);
8911         if (IS_ERR(d_tracer))
8912                 return NULL;
8913
8914         tr->options = tracefs_create_dir("options", d_tracer);
8915         if (!tr->options) {
8916                 pr_warn("Could not create tracefs directory 'options'\n");
8917                 return NULL;
8918         }
8919
8920         return tr->options;
8921 }
8922
8923 static void
8924 create_trace_option_file(struct trace_array *tr,
8925                          struct trace_option_dentry *topt,
8926                          struct tracer_flags *flags,
8927                          struct tracer_opt *opt)
8928 {
8929         struct dentry *t_options;
8930
8931         t_options = trace_options_init_dentry(tr);
8932         if (!t_options)
8933                 return;
8934
8935         topt->flags = flags;
8936         topt->opt = opt;
8937         topt->tr = tr;
8938
8939         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8940                                         t_options, topt, &trace_options_fops);
8941
8942 }
8943
8944 static void
8945 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8946 {
8947         struct trace_option_dentry *topts;
8948         struct trace_options *tr_topts;
8949         struct tracer_flags *flags;
8950         struct tracer_opt *opts;
8951         int cnt;
8952         int i;
8953
8954         if (!tracer)
8955                 return;
8956
8957         flags = tracer->flags;
8958
8959         if (!flags || !flags->opts)
8960                 return;
8961
8962         /*
8963          * If this is an instance, only create flags for tracers
8964          * the instance may have.
8965          */
8966         if (!trace_ok_for_array(tracer, tr))
8967                 return;
8968
8969         for (i = 0; i < tr->nr_topts; i++) {
8970                 /* Make sure there's no duplicate flags. */
8971                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8972                         return;
8973         }
8974
8975         opts = flags->opts;
8976
8977         for (cnt = 0; opts[cnt].name; cnt++)
8978                 ;
8979
8980         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8981         if (!topts)
8982                 return;
8983
8984         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8985                             GFP_KERNEL);
8986         if (!tr_topts) {
8987                 kfree(topts);
8988                 return;
8989         }
8990
8991         tr->topts = tr_topts;
8992         tr->topts[tr->nr_topts].tracer = tracer;
8993         tr->topts[tr->nr_topts].topts = topts;
8994         tr->nr_topts++;
8995
8996         for (cnt = 0; opts[cnt].name; cnt++) {
8997                 create_trace_option_file(tr, &topts[cnt], flags,
8998                                          &opts[cnt]);
8999                 MEM_FAIL(topts[cnt].entry == NULL,
9000                           "Failed to create trace option: %s",
9001                           opts[cnt].name);
9002         }
9003 }
9004
9005 static struct dentry *
9006 create_trace_option_core_file(struct trace_array *tr,
9007                               const char *option, long index)
9008 {
9009         struct dentry *t_options;
9010
9011         t_options = trace_options_init_dentry(tr);
9012         if (!t_options)
9013                 return NULL;
9014
9015         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9016                                  (void *)&tr->trace_flags_index[index],
9017                                  &trace_options_core_fops);
9018 }
9019
9020 static void create_trace_options_dir(struct trace_array *tr)
9021 {
9022         struct dentry *t_options;
9023         bool top_level = tr == &global_trace;
9024         int i;
9025
9026         t_options = trace_options_init_dentry(tr);
9027         if (!t_options)
9028                 return;
9029
9030         for (i = 0; trace_options[i]; i++) {
9031                 if (top_level ||
9032                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9033                         create_trace_option_core_file(tr, trace_options[i], i);
9034         }
9035 }
9036
9037 static ssize_t
9038 rb_simple_read(struct file *filp, char __user *ubuf,
9039                size_t cnt, loff_t *ppos)
9040 {
9041         struct trace_array *tr = filp->private_data;
9042         char buf[64];
9043         int r;
9044
9045         r = tracer_tracing_is_on(tr);
9046         r = sprintf(buf, "%d\n", r);
9047
9048         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9049 }
9050
9051 static ssize_t
9052 rb_simple_write(struct file *filp, const char __user *ubuf,
9053                 size_t cnt, loff_t *ppos)
9054 {
9055         struct trace_array *tr = filp->private_data;
9056         struct trace_buffer *buffer = tr->array_buffer.buffer;
9057         unsigned long val;
9058         int ret;
9059
9060         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9061         if (ret)
9062                 return ret;
9063
9064         if (buffer) {
9065                 mutex_lock(&trace_types_lock);
9066                 if (!!val == tracer_tracing_is_on(tr)) {
9067                         val = 0; /* do nothing */
9068                 } else if (val) {
9069                         tracer_tracing_on(tr);
9070                         if (tr->current_trace->start)
9071                                 tr->current_trace->start(tr);
9072                 } else {
9073                         tracer_tracing_off(tr);
9074                         if (tr->current_trace->stop)
9075                                 tr->current_trace->stop(tr);
9076                         /* Wake up any waiters */
9077                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9078                 }
9079                 mutex_unlock(&trace_types_lock);
9080         }
9081
9082         (*ppos)++;
9083
9084         return cnt;
9085 }
9086
9087 static const struct file_operations rb_simple_fops = {
9088         .open           = tracing_open_generic_tr,
9089         .read           = rb_simple_read,
9090         .write          = rb_simple_write,
9091         .release        = tracing_release_generic_tr,
9092         .llseek         = default_llseek,
9093 };
9094
9095 static ssize_t
9096 buffer_percent_read(struct file *filp, char __user *ubuf,
9097                     size_t cnt, loff_t *ppos)
9098 {
9099         struct trace_array *tr = filp->private_data;
9100         char buf[64];
9101         int r;
9102
9103         r = tr->buffer_percent;
9104         r = sprintf(buf, "%d\n", r);
9105
9106         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9107 }
9108
9109 static ssize_t
9110 buffer_percent_write(struct file *filp, const char __user *ubuf,
9111                      size_t cnt, loff_t *ppos)
9112 {
9113         struct trace_array *tr = filp->private_data;
9114         unsigned long val;
9115         int ret;
9116
9117         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9118         if (ret)
9119                 return ret;
9120
9121         if (val > 100)
9122                 return -EINVAL;
9123
9124         if (!val)
9125                 val = 1;
9126
9127         tr->buffer_percent = val;
9128
9129         (*ppos)++;
9130
9131         return cnt;
9132 }
9133
9134 static const struct file_operations buffer_percent_fops = {
9135         .open           = tracing_open_generic_tr,
9136         .read           = buffer_percent_read,
9137         .write          = buffer_percent_write,
9138         .release        = tracing_release_generic_tr,
9139         .llseek         = default_llseek,
9140 };
9141
9142 static struct dentry *trace_instance_dir;
9143
9144 static void
9145 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9146
9147 static int
9148 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9149 {
9150         enum ring_buffer_flags rb_flags;
9151
9152         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9153
9154         buf->tr = tr;
9155
9156         buf->buffer = ring_buffer_alloc(size, rb_flags);
9157         if (!buf->buffer)
9158                 return -ENOMEM;
9159
9160         buf->data = alloc_percpu(struct trace_array_cpu);
9161         if (!buf->data) {
9162                 ring_buffer_free(buf->buffer);
9163                 buf->buffer = NULL;
9164                 return -ENOMEM;
9165         }
9166
9167         /* Allocate the first page for all buffers */
9168         set_buffer_entries(&tr->array_buffer,
9169                            ring_buffer_size(tr->array_buffer.buffer, 0));
9170
9171         return 0;
9172 }
9173
9174 static void free_trace_buffer(struct array_buffer *buf)
9175 {
9176         if (buf->buffer) {
9177                 ring_buffer_free(buf->buffer);
9178                 buf->buffer = NULL;
9179                 free_percpu(buf->data);
9180                 buf->data = NULL;
9181         }
9182 }
9183
9184 static int allocate_trace_buffers(struct trace_array *tr, int size)
9185 {
9186         int ret;
9187
9188         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9189         if (ret)
9190                 return ret;
9191
9192 #ifdef CONFIG_TRACER_MAX_TRACE
9193         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9194                                     allocate_snapshot ? size : 1);
9195         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9196                 free_trace_buffer(&tr->array_buffer);
9197                 return -ENOMEM;
9198         }
9199         tr->allocated_snapshot = allocate_snapshot;
9200
9201         /*
9202          * Only the top level trace array gets its snapshot allocated
9203          * from the kernel command line.
9204          */
9205         allocate_snapshot = false;
9206 #endif
9207
9208         return 0;
9209 }
9210
9211 static void free_trace_buffers(struct trace_array *tr)
9212 {
9213         if (!tr)
9214                 return;
9215
9216         free_trace_buffer(&tr->array_buffer);
9217
9218 #ifdef CONFIG_TRACER_MAX_TRACE
9219         free_trace_buffer(&tr->max_buffer);
9220 #endif
9221 }
9222
9223 static void init_trace_flags_index(struct trace_array *tr)
9224 {
9225         int i;
9226
9227         /* Used by the trace options files */
9228         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9229                 tr->trace_flags_index[i] = i;
9230 }
9231
9232 static void __update_tracer_options(struct trace_array *tr)
9233 {
9234         struct tracer *t;
9235
9236         for (t = trace_types; t; t = t->next)
9237                 add_tracer_options(tr, t);
9238 }
9239
9240 static void update_tracer_options(struct trace_array *tr)
9241 {
9242         mutex_lock(&trace_types_lock);
9243         tracer_options_updated = true;
9244         __update_tracer_options(tr);
9245         mutex_unlock(&trace_types_lock);
9246 }
9247
9248 /* Must have trace_types_lock held */
9249 struct trace_array *trace_array_find(const char *instance)
9250 {
9251         struct trace_array *tr, *found = NULL;
9252
9253         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9254                 if (tr->name && strcmp(tr->name, instance) == 0) {
9255                         found = tr;
9256                         break;
9257                 }
9258         }
9259
9260         return found;
9261 }
9262
9263 struct trace_array *trace_array_find_get(const char *instance)
9264 {
9265         struct trace_array *tr;
9266
9267         mutex_lock(&trace_types_lock);
9268         tr = trace_array_find(instance);
9269         if (tr)
9270                 tr->ref++;
9271         mutex_unlock(&trace_types_lock);
9272
9273         return tr;
9274 }
9275
9276 static int trace_array_create_dir(struct trace_array *tr)
9277 {
9278         int ret;
9279
9280         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9281         if (!tr->dir)
9282                 return -EINVAL;
9283
9284         ret = event_trace_add_tracer(tr->dir, tr);
9285         if (ret) {
9286                 tracefs_remove(tr->dir);
9287                 return ret;
9288         }
9289
9290         init_tracer_tracefs(tr, tr->dir);
9291         __update_tracer_options(tr);
9292
9293         return ret;
9294 }
9295
9296 static struct trace_array *trace_array_create(const char *name)
9297 {
9298         struct trace_array *tr;
9299         int ret;
9300
9301         ret = -ENOMEM;
9302         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9303         if (!tr)
9304                 return ERR_PTR(ret);
9305
9306         tr->name = kstrdup(name, GFP_KERNEL);
9307         if (!tr->name)
9308                 goto out_free_tr;
9309
9310         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9311                 goto out_free_tr;
9312
9313         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9314
9315         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9316
9317         raw_spin_lock_init(&tr->start_lock);
9318
9319         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9320
9321         tr->current_trace = &nop_trace;
9322
9323         INIT_LIST_HEAD(&tr->systems);
9324         INIT_LIST_HEAD(&tr->events);
9325         INIT_LIST_HEAD(&tr->hist_vars);
9326         INIT_LIST_HEAD(&tr->err_log);
9327
9328         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9329                 goto out_free_tr;
9330
9331         if (ftrace_allocate_ftrace_ops(tr) < 0)
9332                 goto out_free_tr;
9333
9334         ftrace_init_trace_array(tr);
9335
9336         init_trace_flags_index(tr);
9337
9338         if (trace_instance_dir) {
9339                 ret = trace_array_create_dir(tr);
9340                 if (ret)
9341                         goto out_free_tr;
9342         } else
9343                 __trace_early_add_events(tr);
9344
9345         list_add(&tr->list, &ftrace_trace_arrays);
9346
9347         tr->ref++;
9348
9349         return tr;
9350
9351  out_free_tr:
9352         ftrace_free_ftrace_ops(tr);
9353         free_trace_buffers(tr);
9354         free_cpumask_var(tr->tracing_cpumask);
9355         kfree(tr->name);
9356         kfree(tr);
9357
9358         return ERR_PTR(ret);
9359 }
9360
9361 static int instance_mkdir(const char *name)
9362 {
9363         struct trace_array *tr;
9364         int ret;
9365
9366         mutex_lock(&event_mutex);
9367         mutex_lock(&trace_types_lock);
9368
9369         ret = -EEXIST;
9370         if (trace_array_find(name))
9371                 goto out_unlock;
9372
9373         tr = trace_array_create(name);
9374
9375         ret = PTR_ERR_OR_ZERO(tr);
9376
9377 out_unlock:
9378         mutex_unlock(&trace_types_lock);
9379         mutex_unlock(&event_mutex);
9380         return ret;
9381 }
9382
9383 /**
9384  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9385  * @name: The name of the trace array to be looked up/created.
9386  *
9387  * Returns pointer to trace array with given name.
9388  * NULL, if it cannot be created.
9389  *
9390  * NOTE: This function increments the reference counter associated with the
9391  * trace array returned. This makes sure it cannot be freed while in use.
9392  * Use trace_array_put() once the trace array is no longer needed.
9393  * If the trace_array is to be freed, trace_array_destroy() needs to
9394  * be called after the trace_array_put(), or simply let user space delete
9395  * it from the tracefs instances directory. But until the
9396  * trace_array_put() is called, user space can not delete it.
9397  *
9398  */
9399 struct trace_array *trace_array_get_by_name(const char *name)
9400 {
9401         struct trace_array *tr;
9402
9403         mutex_lock(&event_mutex);
9404         mutex_lock(&trace_types_lock);
9405
9406         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9407                 if (tr->name && strcmp(tr->name, name) == 0)
9408                         goto out_unlock;
9409         }
9410
9411         tr = trace_array_create(name);
9412
9413         if (IS_ERR(tr))
9414                 tr = NULL;
9415 out_unlock:
9416         if (tr)
9417                 tr->ref++;
9418
9419         mutex_unlock(&trace_types_lock);
9420         mutex_unlock(&event_mutex);
9421         return tr;
9422 }
9423 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9424
9425 static int __remove_instance(struct trace_array *tr)
9426 {
9427         int i;
9428
9429         /* Reference counter for a newly created trace array = 1. */
9430         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9431                 return -EBUSY;
9432
9433         list_del(&tr->list);
9434
9435         /* Disable all the flags that were enabled coming in */
9436         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9437                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9438                         set_tracer_flag(tr, 1 << i, 0);
9439         }
9440
9441         tracing_set_nop(tr);
9442         clear_ftrace_function_probes(tr);
9443         event_trace_del_tracer(tr);
9444         ftrace_clear_pids(tr);
9445         ftrace_destroy_function_files(tr);
9446         tracefs_remove(tr->dir);
9447         free_percpu(tr->last_func_repeats);
9448         free_trace_buffers(tr);
9449
9450         for (i = 0; i < tr->nr_topts; i++) {
9451                 kfree(tr->topts[i].topts);
9452         }
9453         kfree(tr->topts);
9454
9455         free_cpumask_var(tr->tracing_cpumask);
9456         kfree(tr->name);
9457         kfree(tr);
9458
9459         return 0;
9460 }
9461
9462 int trace_array_destroy(struct trace_array *this_tr)
9463 {
9464         struct trace_array *tr;
9465         int ret;
9466
9467         if (!this_tr)
9468                 return -EINVAL;
9469
9470         mutex_lock(&event_mutex);
9471         mutex_lock(&trace_types_lock);
9472
9473         ret = -ENODEV;
9474
9475         /* Making sure trace array exists before destroying it. */
9476         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9477                 if (tr == this_tr) {
9478                         ret = __remove_instance(tr);
9479                         break;
9480                 }
9481         }
9482
9483         mutex_unlock(&trace_types_lock);
9484         mutex_unlock(&event_mutex);
9485
9486         return ret;
9487 }
9488 EXPORT_SYMBOL_GPL(trace_array_destroy);
9489
9490 static int instance_rmdir(const char *name)
9491 {
9492         struct trace_array *tr;
9493         int ret;
9494
9495         mutex_lock(&event_mutex);
9496         mutex_lock(&trace_types_lock);
9497
9498         ret = -ENODEV;
9499         tr = trace_array_find(name);
9500         if (tr)
9501                 ret = __remove_instance(tr);
9502
9503         mutex_unlock(&trace_types_lock);
9504         mutex_unlock(&event_mutex);
9505
9506         return ret;
9507 }
9508
9509 static __init void create_trace_instances(struct dentry *d_tracer)
9510 {
9511         struct trace_array *tr;
9512
9513         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9514                                                          instance_mkdir,
9515                                                          instance_rmdir);
9516         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9517                 return;
9518
9519         mutex_lock(&event_mutex);
9520         mutex_lock(&trace_types_lock);
9521
9522         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9523                 if (!tr->name)
9524                         continue;
9525                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9526                              "Failed to create instance directory\n"))
9527                         break;
9528         }
9529
9530         mutex_unlock(&trace_types_lock);
9531         mutex_unlock(&event_mutex);
9532 }
9533
9534 static void
9535 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9536 {
9537         struct trace_event_file *file;
9538         int cpu;
9539
9540         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9541                         tr, &show_traces_fops);
9542
9543         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9544                         tr, &set_tracer_fops);
9545
9546         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9547                           tr, &tracing_cpumask_fops);
9548
9549         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9550                           tr, &tracing_iter_fops);
9551
9552         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9553                           tr, &tracing_fops);
9554
9555         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9556                           tr, &tracing_pipe_fops);
9557
9558         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9559                           tr, &tracing_entries_fops);
9560
9561         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9562                           tr, &tracing_total_entries_fops);
9563
9564         trace_create_file("free_buffer", 0200, d_tracer,
9565                           tr, &tracing_free_buffer_fops);
9566
9567         trace_create_file("trace_marker", 0220, d_tracer,
9568                           tr, &tracing_mark_fops);
9569
9570         file = __find_event_file(tr, "ftrace", "print");
9571         if (file && file->dir)
9572                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9573                                   file, &event_trigger_fops);
9574         tr->trace_marker_file = file;
9575
9576         trace_create_file("trace_marker_raw", 0220, d_tracer,
9577                           tr, &tracing_mark_raw_fops);
9578
9579         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9580                           &trace_clock_fops);
9581
9582         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9583                           tr, &rb_simple_fops);
9584
9585         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9586                           &trace_time_stamp_mode_fops);
9587
9588         tr->buffer_percent = 50;
9589
9590         trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9591                         tr, &buffer_percent_fops);
9592
9593         create_trace_options_dir(tr);
9594
9595         trace_create_maxlat_file(tr, d_tracer);
9596
9597         if (ftrace_create_function_files(tr, d_tracer))
9598                 MEM_FAIL(1, "Could not allocate function filter files");
9599
9600 #ifdef CONFIG_TRACER_SNAPSHOT
9601         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9602                           tr, &snapshot_fops);
9603 #endif
9604
9605         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9606                           tr, &tracing_err_log_fops);
9607
9608         for_each_tracing_cpu(cpu)
9609                 tracing_init_tracefs_percpu(tr, cpu);
9610
9611         ftrace_init_tracefs(tr, d_tracer);
9612 }
9613
9614 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9615 {
9616         struct vfsmount *mnt;
9617         struct file_system_type *type;
9618
9619         /*
9620          * To maintain backward compatibility for tools that mount
9621          * debugfs to get to the tracing facility, tracefs is automatically
9622          * mounted to the debugfs/tracing directory.
9623          */
9624         type = get_fs_type("tracefs");
9625         if (!type)
9626                 return NULL;
9627         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9628         put_filesystem(type);
9629         if (IS_ERR(mnt))
9630                 return NULL;
9631         mntget(mnt);
9632
9633         return mnt;
9634 }
9635
9636 /**
9637  * tracing_init_dentry - initialize top level trace array
9638  *
9639  * This is called when creating files or directories in the tracing
9640  * directory. It is called via fs_initcall() by any of the boot up code
9641  * and expects to return the dentry of the top level tracing directory.
9642  */
9643 int tracing_init_dentry(void)
9644 {
9645         struct trace_array *tr = &global_trace;
9646
9647         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9648                 pr_warn("Tracing disabled due to lockdown\n");
9649                 return -EPERM;
9650         }
9651
9652         /* The top level trace array uses  NULL as parent */
9653         if (tr->dir)
9654                 return 0;
9655
9656         if (WARN_ON(!tracefs_initialized()))
9657                 return -ENODEV;
9658
9659         /*
9660          * As there may still be users that expect the tracing
9661          * files to exist in debugfs/tracing, we must automount
9662          * the tracefs file system there, so older tools still
9663          * work with the newer kernel.
9664          */
9665         tr->dir = debugfs_create_automount("tracing", NULL,
9666                                            trace_automount, NULL);
9667
9668         return 0;
9669 }
9670
9671 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9672 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9673
9674 static struct workqueue_struct *eval_map_wq __initdata;
9675 static struct work_struct eval_map_work __initdata;
9676 static struct work_struct tracerfs_init_work __initdata;
9677
9678 static void __init eval_map_work_func(struct work_struct *work)
9679 {
9680         int len;
9681
9682         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9683         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9684 }
9685
9686 static int __init trace_eval_init(void)
9687 {
9688         INIT_WORK(&eval_map_work, eval_map_work_func);
9689
9690         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9691         if (!eval_map_wq) {
9692                 pr_err("Unable to allocate eval_map_wq\n");
9693                 /* Do work here */
9694                 eval_map_work_func(&eval_map_work);
9695                 return -ENOMEM;
9696         }
9697
9698         queue_work(eval_map_wq, &eval_map_work);
9699         return 0;
9700 }
9701
9702 subsys_initcall(trace_eval_init);
9703
9704 static int __init trace_eval_sync(void)
9705 {
9706         /* Make sure the eval map updates are finished */
9707         if (eval_map_wq)
9708                 destroy_workqueue(eval_map_wq);
9709         return 0;
9710 }
9711
9712 late_initcall_sync(trace_eval_sync);
9713
9714
9715 #ifdef CONFIG_MODULES
9716 static void trace_module_add_evals(struct module *mod)
9717 {
9718         if (!mod->num_trace_evals)
9719                 return;
9720
9721         /*
9722          * Modules with bad taint do not have events created, do
9723          * not bother with enums either.
9724          */
9725         if (trace_module_has_bad_taint(mod))
9726                 return;
9727
9728         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9729 }
9730
9731 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9732 static void trace_module_remove_evals(struct module *mod)
9733 {
9734         union trace_eval_map_item *map;
9735         union trace_eval_map_item **last = &trace_eval_maps;
9736
9737         if (!mod->num_trace_evals)
9738                 return;
9739
9740         mutex_lock(&trace_eval_mutex);
9741
9742         map = trace_eval_maps;
9743
9744         while (map) {
9745                 if (map->head.mod == mod)
9746                         break;
9747                 map = trace_eval_jmp_to_tail(map);
9748                 last = &map->tail.next;
9749                 map = map->tail.next;
9750         }
9751         if (!map)
9752                 goto out;
9753
9754         *last = trace_eval_jmp_to_tail(map)->tail.next;
9755         kfree(map);
9756  out:
9757         mutex_unlock(&trace_eval_mutex);
9758 }
9759 #else
9760 static inline void trace_module_remove_evals(struct module *mod) { }
9761 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9762
9763 static int trace_module_notify(struct notifier_block *self,
9764                                unsigned long val, void *data)
9765 {
9766         struct module *mod = data;
9767
9768         switch (val) {
9769         case MODULE_STATE_COMING:
9770                 trace_module_add_evals(mod);
9771                 break;
9772         case MODULE_STATE_GOING:
9773                 trace_module_remove_evals(mod);
9774                 break;
9775         }
9776
9777         return NOTIFY_OK;
9778 }
9779
9780 static struct notifier_block trace_module_nb = {
9781         .notifier_call = trace_module_notify,
9782         .priority = 0,
9783 };
9784 #endif /* CONFIG_MODULES */
9785
9786 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9787 {
9788
9789         event_trace_init();
9790
9791         init_tracer_tracefs(&global_trace, NULL);
9792         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9793
9794         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9795                         &global_trace, &tracing_thresh_fops);
9796
9797         trace_create_file("README", TRACE_MODE_READ, NULL,
9798                         NULL, &tracing_readme_fops);
9799
9800         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9801                         NULL, &tracing_saved_cmdlines_fops);
9802
9803         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9804                           NULL, &tracing_saved_cmdlines_size_fops);
9805
9806         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9807                         NULL, &tracing_saved_tgids_fops);
9808
9809         trace_create_eval_file(NULL);
9810
9811 #ifdef CONFIG_MODULES
9812         register_module_notifier(&trace_module_nb);
9813 #endif
9814
9815 #ifdef CONFIG_DYNAMIC_FTRACE
9816         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9817                         NULL, &tracing_dyn_info_fops);
9818 #endif
9819
9820         create_trace_instances(NULL);
9821
9822         update_tracer_options(&global_trace);
9823 }
9824
9825 static __init int tracer_init_tracefs(void)
9826 {
9827         int ret;
9828
9829         trace_access_lock_init();
9830
9831         ret = tracing_init_dentry();
9832         if (ret)
9833                 return 0;
9834
9835         if (eval_map_wq) {
9836                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9837                 queue_work(eval_map_wq, &tracerfs_init_work);
9838         } else {
9839                 tracer_init_tracefs_work_func(NULL);
9840         }
9841
9842         rv_init_interface();
9843
9844         return 0;
9845 }
9846
9847 fs_initcall(tracer_init_tracefs);
9848
9849 static int trace_panic_handler(struct notifier_block *this,
9850                                unsigned long event, void *unused)
9851 {
9852         if (ftrace_dump_on_oops)
9853                 ftrace_dump(ftrace_dump_on_oops);
9854         return NOTIFY_OK;
9855 }
9856
9857 static struct notifier_block trace_panic_notifier = {
9858         .notifier_call  = trace_panic_handler,
9859         .next           = NULL,
9860         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9861 };
9862
9863 static int trace_die_handler(struct notifier_block *self,
9864                              unsigned long val,
9865                              void *data)
9866 {
9867         switch (val) {
9868         case DIE_OOPS:
9869                 if (ftrace_dump_on_oops)
9870                         ftrace_dump(ftrace_dump_on_oops);
9871                 break;
9872         default:
9873                 break;
9874         }
9875         return NOTIFY_OK;
9876 }
9877
9878 static struct notifier_block trace_die_notifier = {
9879         .notifier_call = trace_die_handler,
9880         .priority = 200
9881 };
9882
9883 /*
9884  * printk is set to max of 1024, we really don't need it that big.
9885  * Nothing should be printing 1000 characters anyway.
9886  */
9887 #define TRACE_MAX_PRINT         1000
9888
9889 /*
9890  * Define here KERN_TRACE so that we have one place to modify
9891  * it if we decide to change what log level the ftrace dump
9892  * should be at.
9893  */
9894 #define KERN_TRACE              KERN_EMERG
9895
9896 void
9897 trace_printk_seq(struct trace_seq *s)
9898 {
9899         /* Probably should print a warning here. */
9900         if (s->seq.len >= TRACE_MAX_PRINT)
9901                 s->seq.len = TRACE_MAX_PRINT;
9902
9903         /*
9904          * More paranoid code. Although the buffer size is set to
9905          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9906          * an extra layer of protection.
9907          */
9908         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9909                 s->seq.len = s->seq.size - 1;
9910
9911         /* should be zero ended, but we are paranoid. */
9912         s->buffer[s->seq.len] = 0;
9913
9914         printk(KERN_TRACE "%s", s->buffer);
9915
9916         trace_seq_init(s);
9917 }
9918
9919 void trace_init_global_iter(struct trace_iterator *iter)
9920 {
9921         iter->tr = &global_trace;
9922         iter->trace = iter->tr->current_trace;
9923         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9924         iter->array_buffer = &global_trace.array_buffer;
9925
9926         if (iter->trace && iter->trace->open)
9927                 iter->trace->open(iter);
9928
9929         /* Annotate start of buffers if we had overruns */
9930         if (ring_buffer_overruns(iter->array_buffer->buffer))
9931                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9932
9933         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9934         if (trace_clocks[iter->tr->clock_id].in_ns)
9935                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9936
9937         /* Can not use kmalloc for iter.temp and iter.fmt */
9938         iter->temp = static_temp_buf;
9939         iter->temp_size = STATIC_TEMP_BUF_SIZE;
9940         iter->fmt = static_fmt_buf;
9941         iter->fmt_size = STATIC_FMT_BUF_SIZE;
9942 }
9943
9944 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9945 {
9946         /* use static because iter can be a bit big for the stack */
9947         static struct trace_iterator iter;
9948         static atomic_t dump_running;
9949         struct trace_array *tr = &global_trace;
9950         unsigned int old_userobj;
9951         unsigned long flags;
9952         int cnt = 0, cpu;
9953
9954         /* Only allow one dump user at a time. */
9955         if (atomic_inc_return(&dump_running) != 1) {
9956                 atomic_dec(&dump_running);
9957                 return;
9958         }
9959
9960         /*
9961          * Always turn off tracing when we dump.
9962          * We don't need to show trace output of what happens
9963          * between multiple crashes.
9964          *
9965          * If the user does a sysrq-z, then they can re-enable
9966          * tracing with echo 1 > tracing_on.
9967          */
9968         tracing_off();
9969
9970         local_irq_save(flags);
9971
9972         /* Simulate the iterator */
9973         trace_init_global_iter(&iter);
9974
9975         for_each_tracing_cpu(cpu) {
9976                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9977         }
9978
9979         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9980
9981         /* don't look at user memory in panic mode */
9982         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9983
9984         switch (oops_dump_mode) {
9985         case DUMP_ALL:
9986                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9987                 break;
9988         case DUMP_ORIG:
9989                 iter.cpu_file = raw_smp_processor_id();
9990                 break;
9991         case DUMP_NONE:
9992                 goto out_enable;
9993         default:
9994                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9995                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9996         }
9997
9998         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9999
10000         /* Did function tracer already get disabled? */
10001         if (ftrace_is_dead()) {
10002                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10003                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10004         }
10005
10006         /*
10007          * We need to stop all tracing on all CPUS to read
10008          * the next buffer. This is a bit expensive, but is
10009          * not done often. We fill all what we can read,
10010          * and then release the locks again.
10011          */
10012
10013         while (!trace_empty(&iter)) {
10014
10015                 if (!cnt)
10016                         printk(KERN_TRACE "---------------------------------\n");
10017
10018                 cnt++;
10019
10020                 trace_iterator_reset(&iter);
10021                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10022
10023                 if (trace_find_next_entry_inc(&iter) != NULL) {
10024                         int ret;
10025
10026                         ret = print_trace_line(&iter);
10027                         if (ret != TRACE_TYPE_NO_CONSUME)
10028                                 trace_consume(&iter);
10029                 }
10030                 touch_nmi_watchdog();
10031
10032                 trace_printk_seq(&iter.seq);
10033         }
10034
10035         if (!cnt)
10036                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10037         else
10038                 printk(KERN_TRACE "---------------------------------\n");
10039
10040  out_enable:
10041         tr->trace_flags |= old_userobj;
10042
10043         for_each_tracing_cpu(cpu) {
10044                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10045         }
10046         atomic_dec(&dump_running);
10047         local_irq_restore(flags);
10048 }
10049 EXPORT_SYMBOL_GPL(ftrace_dump);
10050
10051 #define WRITE_BUFSIZE  4096
10052
10053 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10054                                 size_t count, loff_t *ppos,
10055                                 int (*createfn)(const char *))
10056 {
10057         char *kbuf, *buf, *tmp;
10058         int ret = 0;
10059         size_t done = 0;
10060         size_t size;
10061
10062         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10063         if (!kbuf)
10064                 return -ENOMEM;
10065
10066         while (done < count) {
10067                 size = count - done;
10068
10069                 if (size >= WRITE_BUFSIZE)
10070                         size = WRITE_BUFSIZE - 1;
10071
10072                 if (copy_from_user(kbuf, buffer + done, size)) {
10073                         ret = -EFAULT;
10074                         goto out;
10075                 }
10076                 kbuf[size] = '\0';
10077                 buf = kbuf;
10078                 do {
10079                         tmp = strchr(buf, '\n');
10080                         if (tmp) {
10081                                 *tmp = '\0';
10082                                 size = tmp - buf + 1;
10083                         } else {
10084                                 size = strlen(buf);
10085                                 if (done + size < count) {
10086                                         if (buf != kbuf)
10087                                                 break;
10088                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10089                                         pr_warn("Line length is too long: Should be less than %d\n",
10090                                                 WRITE_BUFSIZE - 2);
10091                                         ret = -EINVAL;
10092                                         goto out;
10093                                 }
10094                         }
10095                         done += size;
10096
10097                         /* Remove comments */
10098                         tmp = strchr(buf, '#');
10099
10100                         if (tmp)
10101                                 *tmp = '\0';
10102
10103                         ret = createfn(buf);
10104                         if (ret)
10105                                 goto out;
10106                         buf += size;
10107
10108                 } while (done < count);
10109         }
10110         ret = done;
10111
10112 out:
10113         kfree(kbuf);
10114
10115         return ret;
10116 }
10117
10118 __init static int tracer_alloc_buffers(void)
10119 {
10120         int ring_buf_size;
10121         int ret = -ENOMEM;
10122
10123
10124         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10125                 pr_warn("Tracing disabled due to lockdown\n");
10126                 return -EPERM;
10127         }
10128
10129         /*
10130          * Make sure we don't accidentally add more trace options
10131          * than we have bits for.
10132          */
10133         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10134
10135         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10136                 goto out;
10137
10138         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10139                 goto out_free_buffer_mask;
10140
10141         /* Only allocate trace_printk buffers if a trace_printk exists */
10142         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10143                 /* Must be called before global_trace.buffer is allocated */
10144                 trace_printk_init_buffers();
10145
10146         /* To save memory, keep the ring buffer size to its minimum */
10147         if (ring_buffer_expanded)
10148                 ring_buf_size = trace_buf_size;
10149         else
10150                 ring_buf_size = 1;
10151
10152         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10153         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10154
10155         raw_spin_lock_init(&global_trace.start_lock);
10156
10157         /*
10158          * The prepare callbacks allocates some memory for the ring buffer. We
10159          * don't free the buffer if the CPU goes down. If we were to free
10160          * the buffer, then the user would lose any trace that was in the
10161          * buffer. The memory will be removed once the "instance" is removed.
10162          */
10163         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10164                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10165                                       NULL);
10166         if (ret < 0)
10167                 goto out_free_cpumask;
10168         /* Used for event triggers */
10169         ret = -ENOMEM;
10170         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10171         if (!temp_buffer)
10172                 goto out_rm_hp_state;
10173
10174         if (trace_create_savedcmd() < 0)
10175                 goto out_free_temp_buffer;
10176
10177         /* TODO: make the number of buffers hot pluggable with CPUS */
10178         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10179                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10180                 goto out_free_savedcmd;
10181         }
10182
10183         if (global_trace.buffer_disabled)
10184                 tracing_off();
10185
10186         if (trace_boot_clock) {
10187                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10188                 if (ret < 0)
10189                         pr_warn("Trace clock %s not defined, going back to default\n",
10190                                 trace_boot_clock);
10191         }
10192
10193         /*
10194          * register_tracer() might reference current_trace, so it
10195          * needs to be set before we register anything. This is
10196          * just a bootstrap of current_trace anyway.
10197          */
10198         global_trace.current_trace = &nop_trace;
10199
10200         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10201
10202         ftrace_init_global_array_ops(&global_trace);
10203
10204         init_trace_flags_index(&global_trace);
10205
10206         register_tracer(&nop_trace);
10207
10208         /* Function tracing may start here (via kernel command line) */
10209         init_function_trace();
10210
10211         /* All seems OK, enable tracing */
10212         tracing_disabled = 0;
10213
10214         atomic_notifier_chain_register(&panic_notifier_list,
10215                                        &trace_panic_notifier);
10216
10217         register_die_notifier(&trace_die_notifier);
10218
10219         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10220
10221         INIT_LIST_HEAD(&global_trace.systems);
10222         INIT_LIST_HEAD(&global_trace.events);
10223         INIT_LIST_HEAD(&global_trace.hist_vars);
10224         INIT_LIST_HEAD(&global_trace.err_log);
10225         list_add(&global_trace.list, &ftrace_trace_arrays);
10226
10227         apply_trace_boot_options();
10228
10229         register_snapshot_cmd();
10230
10231         test_can_verify();
10232
10233         return 0;
10234
10235 out_free_savedcmd:
10236         free_saved_cmdlines_buffer(savedcmd);
10237 out_free_temp_buffer:
10238         ring_buffer_free(temp_buffer);
10239 out_rm_hp_state:
10240         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10241 out_free_cpumask:
10242         free_cpumask_var(global_trace.tracing_cpumask);
10243 out_free_buffer_mask:
10244         free_cpumask_var(tracing_buffer_mask);
10245 out:
10246         return ret;
10247 }
10248
10249 void __init ftrace_boot_snapshot(void)
10250 {
10251         if (snapshot_at_boot) {
10252                 tracing_snapshot();
10253                 internal_trace_puts("** Boot snapshot taken **\n");
10254         }
10255 }
10256
10257 void __init early_trace_init(void)
10258 {
10259         if (tracepoint_printk) {
10260                 tracepoint_print_iter =
10261                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10262                 if (MEM_FAIL(!tracepoint_print_iter,
10263                              "Failed to allocate trace iterator\n"))
10264                         tracepoint_printk = 0;
10265                 else
10266                         static_key_enable(&tracepoint_printk_key.key);
10267         }
10268         tracer_alloc_buffers();
10269 }
10270
10271 void __init trace_init(void)
10272 {
10273         trace_event_init();
10274 }
10275
10276 __init static void clear_boot_tracer(void)
10277 {
10278         /*
10279          * The default tracer at boot buffer is an init section.
10280          * This function is called in lateinit. If we did not
10281          * find the boot tracer, then clear it out, to prevent
10282          * later registration from accessing the buffer that is
10283          * about to be freed.
10284          */
10285         if (!default_bootup_tracer)
10286                 return;
10287
10288         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10289                default_bootup_tracer);
10290         default_bootup_tracer = NULL;
10291 }
10292
10293 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10294 __init static void tracing_set_default_clock(void)
10295 {
10296         /* sched_clock_stable() is determined in late_initcall */
10297         if (!trace_boot_clock && !sched_clock_stable()) {
10298                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10299                         pr_warn("Can not set tracing clock due to lockdown\n");
10300                         return;
10301                 }
10302
10303                 printk(KERN_WARNING
10304                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10305                        "If you want to keep using the local clock, then add:\n"
10306                        "  \"trace_clock=local\"\n"
10307                        "on the kernel command line\n");
10308                 tracing_set_clock(&global_trace, "global");
10309         }
10310 }
10311 #else
10312 static inline void tracing_set_default_clock(void) { }
10313 #endif
10314
10315 __init static int late_trace_init(void)
10316 {
10317         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10318                 static_key_disable(&tracepoint_printk_key.key);
10319                 tracepoint_printk = 0;
10320         }
10321
10322         tracing_set_default_clock();
10323         clear_boot_tracer();
10324         return 0;
10325 }
10326
10327 late_initcall_sync(late_trace_init);