f00beac3e1f8af5109982814231941b29ba934d5
[platform/kernel/linux-starfive.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
189
190 static int __init set_cmdline_ftrace(char *str)
191 {
192         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
193         default_bootup_tracer = bootup_tracer_buf;
194         /* We are using ftrace early, expand it */
195         ring_buffer_expanded = true;
196         return 1;
197 }
198 __setup("ftrace=", set_cmdline_ftrace);
199
200 static int __init set_ftrace_dump_on_oops(char *str)
201 {
202         if (*str++ != '=' || !*str || !strcmp("1", str)) {
203                 ftrace_dump_on_oops = DUMP_ALL;
204                 return 1;
205         }
206
207         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
208                 ftrace_dump_on_oops = DUMP_ORIG;
209                 return 1;
210         }
211
212         return 0;
213 }
214 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
215
216 static int __init stop_trace_on_warning(char *str)
217 {
218         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
219                 __disable_trace_on_warning = 1;
220         return 1;
221 }
222 __setup("traceoff_on_warning", stop_trace_on_warning);
223
224 static int __init boot_alloc_snapshot(char *str)
225 {
226         allocate_snapshot = true;
227         /* We also need the main ring buffer expanded */
228         ring_buffer_expanded = true;
229         return 1;
230 }
231 __setup("alloc_snapshot", boot_alloc_snapshot);
232
233
234 static int __init boot_snapshot(char *str)
235 {
236         snapshot_at_boot = true;
237         boot_alloc_snapshot(str);
238         return 1;
239 }
240 __setup("ftrace_boot_snapshot", boot_snapshot);
241
242
243 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
244
245 static int __init set_trace_boot_options(char *str)
246 {
247         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
248         return 1;
249 }
250 __setup("trace_options=", set_trace_boot_options);
251
252 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
253 static char *trace_boot_clock __initdata;
254
255 static int __init set_trace_boot_clock(char *str)
256 {
257         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
258         trace_boot_clock = trace_boot_clock_buf;
259         return 1;
260 }
261 __setup("trace_clock=", set_trace_boot_clock);
262
263 static int __init set_tracepoint_printk(char *str)
264 {
265         /* Ignore the "tp_printk_stop_on_boot" param */
266         if (*str == '_')
267                 return 0;
268
269         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
270                 tracepoint_printk = 1;
271         return 1;
272 }
273 __setup("tp_printk", set_tracepoint_printk);
274
275 static int __init set_tracepoint_printk_stop(char *str)
276 {
277         tracepoint_printk_stop_on_boot = true;
278         return 1;
279 }
280 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
281
282 unsigned long long ns2usecs(u64 nsec)
283 {
284         nsec += 500;
285         do_div(nsec, 1000);
286         return nsec;
287 }
288
289 static void
290 trace_process_export(struct trace_export *export,
291                struct ring_buffer_event *event, int flag)
292 {
293         struct trace_entry *entry;
294         unsigned int size = 0;
295
296         if (export->flags & flag) {
297                 entry = ring_buffer_event_data(event);
298                 size = ring_buffer_event_length(event);
299                 export->write(export, entry, size);
300         }
301 }
302
303 static DEFINE_MUTEX(ftrace_export_lock);
304
305 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
306
307 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
308 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
309 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
310
311 static inline void ftrace_exports_enable(struct trace_export *export)
312 {
313         if (export->flags & TRACE_EXPORT_FUNCTION)
314                 static_branch_inc(&trace_function_exports_enabled);
315
316         if (export->flags & TRACE_EXPORT_EVENT)
317                 static_branch_inc(&trace_event_exports_enabled);
318
319         if (export->flags & TRACE_EXPORT_MARKER)
320                 static_branch_inc(&trace_marker_exports_enabled);
321 }
322
323 static inline void ftrace_exports_disable(struct trace_export *export)
324 {
325         if (export->flags & TRACE_EXPORT_FUNCTION)
326                 static_branch_dec(&trace_function_exports_enabled);
327
328         if (export->flags & TRACE_EXPORT_EVENT)
329                 static_branch_dec(&trace_event_exports_enabled);
330
331         if (export->flags & TRACE_EXPORT_MARKER)
332                 static_branch_dec(&trace_marker_exports_enabled);
333 }
334
335 static void ftrace_exports(struct ring_buffer_event *event, int flag)
336 {
337         struct trace_export *export;
338
339         preempt_disable_notrace();
340
341         export = rcu_dereference_raw_check(ftrace_exports_list);
342         while (export) {
343                 trace_process_export(export, event, flag);
344                 export = rcu_dereference_raw_check(export->next);
345         }
346
347         preempt_enable_notrace();
348 }
349
350 static inline void
351 add_trace_export(struct trace_export **list, struct trace_export *export)
352 {
353         rcu_assign_pointer(export->next, *list);
354         /*
355          * We are entering export into the list but another
356          * CPU might be walking that list. We need to make sure
357          * the export->next pointer is valid before another CPU sees
358          * the export pointer included into the list.
359          */
360         rcu_assign_pointer(*list, export);
361 }
362
363 static inline int
364 rm_trace_export(struct trace_export **list, struct trace_export *export)
365 {
366         struct trace_export **p;
367
368         for (p = list; *p != NULL; p = &(*p)->next)
369                 if (*p == export)
370                         break;
371
372         if (*p != export)
373                 return -1;
374
375         rcu_assign_pointer(*p, (*p)->next);
376
377         return 0;
378 }
379
380 static inline void
381 add_ftrace_export(struct trace_export **list, struct trace_export *export)
382 {
383         ftrace_exports_enable(export);
384
385         add_trace_export(list, export);
386 }
387
388 static inline int
389 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
390 {
391         int ret;
392
393         ret = rm_trace_export(list, export);
394         ftrace_exports_disable(export);
395
396         return ret;
397 }
398
399 int register_ftrace_export(struct trace_export *export)
400 {
401         if (WARN_ON_ONCE(!export->write))
402                 return -1;
403
404         mutex_lock(&ftrace_export_lock);
405
406         add_ftrace_export(&ftrace_exports_list, export);
407
408         mutex_unlock(&ftrace_export_lock);
409
410         return 0;
411 }
412 EXPORT_SYMBOL_GPL(register_ftrace_export);
413
414 int unregister_ftrace_export(struct trace_export *export)
415 {
416         int ret;
417
418         mutex_lock(&ftrace_export_lock);
419
420         ret = rm_ftrace_export(&ftrace_exports_list, export);
421
422         mutex_unlock(&ftrace_export_lock);
423
424         return ret;
425 }
426 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
427
428 /* trace_flags holds trace_options default values */
429 #define TRACE_DEFAULT_FLAGS                                             \
430         (FUNCTION_DEFAULT_FLAGS |                                       \
431          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
432          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
433          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
434          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
435          TRACE_ITER_HASH_PTR)
436
437 /* trace_options that are only supported by global_trace */
438 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
439                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
440
441 /* trace_flags that are default zero for instances */
442 #define ZEROED_TRACE_FLAGS \
443         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
444
445 /*
446  * The global_trace is the descriptor that holds the top-level tracing
447  * buffers for the live tracing.
448  */
449 static struct trace_array global_trace = {
450         .trace_flags = TRACE_DEFAULT_FLAGS,
451 };
452
453 LIST_HEAD(ftrace_trace_arrays);
454
455 int trace_array_get(struct trace_array *this_tr)
456 {
457         struct trace_array *tr;
458         int ret = -ENODEV;
459
460         mutex_lock(&trace_types_lock);
461         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
462                 if (tr == this_tr) {
463                         tr->ref++;
464                         ret = 0;
465                         break;
466                 }
467         }
468         mutex_unlock(&trace_types_lock);
469
470         return ret;
471 }
472
473 static void __trace_array_put(struct trace_array *this_tr)
474 {
475         WARN_ON(!this_tr->ref);
476         this_tr->ref--;
477 }
478
479 /**
480  * trace_array_put - Decrement the reference counter for this trace array.
481  * @this_tr : pointer to the trace array
482  *
483  * NOTE: Use this when we no longer need the trace array returned by
484  * trace_array_get_by_name(). This ensures the trace array can be later
485  * destroyed.
486  *
487  */
488 void trace_array_put(struct trace_array *this_tr)
489 {
490         if (!this_tr)
491                 return;
492
493         mutex_lock(&trace_types_lock);
494         __trace_array_put(this_tr);
495         mutex_unlock(&trace_types_lock);
496 }
497 EXPORT_SYMBOL_GPL(trace_array_put);
498
499 int tracing_check_open_get_tr(struct trace_array *tr)
500 {
501         int ret;
502
503         ret = security_locked_down(LOCKDOWN_TRACEFS);
504         if (ret)
505                 return ret;
506
507         if (tracing_disabled)
508                 return -ENODEV;
509
510         if (tr && trace_array_get(tr) < 0)
511                 return -ENODEV;
512
513         return 0;
514 }
515
516 int call_filter_check_discard(struct trace_event_call *call, void *rec,
517                               struct trace_buffer *buffer,
518                               struct ring_buffer_event *event)
519 {
520         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
521             !filter_match_preds(call->filter, rec)) {
522                 __trace_event_discard_commit(buffer, event);
523                 return 1;
524         }
525
526         return 0;
527 }
528
529 /**
530  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
531  * @filtered_pids: The list of pids to check
532  * @search_pid: The PID to find in @filtered_pids
533  *
534  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
535  */
536 bool
537 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
538 {
539         return trace_pid_list_is_set(filtered_pids, search_pid);
540 }
541
542 /**
543  * trace_ignore_this_task - should a task be ignored for tracing
544  * @filtered_pids: The list of pids to check
545  * @filtered_no_pids: The list of pids not to be traced
546  * @task: The task that should be ignored if not filtered
547  *
548  * Checks if @task should be traced or not from @filtered_pids.
549  * Returns true if @task should *NOT* be traced.
550  * Returns false if @task should be traced.
551  */
552 bool
553 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
554                        struct trace_pid_list *filtered_no_pids,
555                        struct task_struct *task)
556 {
557         /*
558          * If filtered_no_pids is not empty, and the task's pid is listed
559          * in filtered_no_pids, then return true.
560          * Otherwise, if filtered_pids is empty, that means we can
561          * trace all tasks. If it has content, then only trace pids
562          * within filtered_pids.
563          */
564
565         return (filtered_pids &&
566                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
567                 (filtered_no_pids &&
568                  trace_find_filtered_pid(filtered_no_pids, task->pid));
569 }
570
571 /**
572  * trace_filter_add_remove_task - Add or remove a task from a pid_list
573  * @pid_list: The list to modify
574  * @self: The current task for fork or NULL for exit
575  * @task: The task to add or remove
576  *
577  * If adding a task, if @self is defined, the task is only added if @self
578  * is also included in @pid_list. This happens on fork and tasks should
579  * only be added when the parent is listed. If @self is NULL, then the
580  * @task pid will be removed from the list, which would happen on exit
581  * of a task.
582  */
583 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
584                                   struct task_struct *self,
585                                   struct task_struct *task)
586 {
587         if (!pid_list)
588                 return;
589
590         /* For forks, we only add if the forking task is listed */
591         if (self) {
592                 if (!trace_find_filtered_pid(pid_list, self->pid))
593                         return;
594         }
595
596         /* "self" is set for forks, and NULL for exits */
597         if (self)
598                 trace_pid_list_set(pid_list, task->pid);
599         else
600                 trace_pid_list_clear(pid_list, task->pid);
601 }
602
603 /**
604  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
605  * @pid_list: The pid list to show
606  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
607  * @pos: The position of the file
608  *
609  * This is used by the seq_file "next" operation to iterate the pids
610  * listed in a trace_pid_list structure.
611  *
612  * Returns the pid+1 as we want to display pid of zero, but NULL would
613  * stop the iteration.
614  */
615 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
616 {
617         long pid = (unsigned long)v;
618         unsigned int next;
619
620         (*pos)++;
621
622         /* pid already is +1 of the actual previous bit */
623         if (trace_pid_list_next(pid_list, pid, &next) < 0)
624                 return NULL;
625
626         pid = next;
627
628         /* Return pid + 1 to allow zero to be represented */
629         return (void *)(pid + 1);
630 }
631
632 /**
633  * trace_pid_start - Used for seq_file to start reading pid lists
634  * @pid_list: The pid list to show
635  * @pos: The position of the file
636  *
637  * This is used by seq_file "start" operation to start the iteration
638  * of listing pids.
639  *
640  * Returns the pid+1 as we want to display pid of zero, but NULL would
641  * stop the iteration.
642  */
643 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
644 {
645         unsigned long pid;
646         unsigned int first;
647         loff_t l = 0;
648
649         if (trace_pid_list_first(pid_list, &first) < 0)
650                 return NULL;
651
652         pid = first;
653
654         /* Return pid + 1 so that zero can be the exit value */
655         for (pid++; pid && l < *pos;
656              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657                 ;
658         return (void *)pid;
659 }
660
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671         unsigned long pid = (unsigned long)v - 1;
672
673         seq_printf(m, "%lu\n", pid);
674         return 0;
675 }
676
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE            127
679
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681                     struct trace_pid_list **new_pid_list,
682                     const char __user *ubuf, size_t cnt)
683 {
684         struct trace_pid_list *pid_list;
685         struct trace_parser parser;
686         unsigned long val;
687         int nr_pids = 0;
688         ssize_t read = 0;
689         ssize_t ret;
690         loff_t pos;
691         pid_t pid;
692
693         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694                 return -ENOMEM;
695
696         /*
697          * Always recreate a new array. The write is an all or nothing
698          * operation. Always create a new array when adding new pids by
699          * the user. If the operation fails, then the current list is
700          * not modified.
701          */
702         pid_list = trace_pid_list_alloc();
703         if (!pid_list) {
704                 trace_parser_put(&parser);
705                 return -ENOMEM;
706         }
707
708         if (filtered_pids) {
709                 /* copy the current bits to the new max */
710                 ret = trace_pid_list_first(filtered_pids, &pid);
711                 while (!ret) {
712                         trace_pid_list_set(pid_list, pid);
713                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
714                         nr_pids++;
715                 }
716         }
717
718         ret = 0;
719         while (cnt > 0) {
720
721                 pos = 0;
722
723                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
724                 if (ret < 0)
725                         break;
726
727                 read += ret;
728                 ubuf += ret;
729                 cnt -= ret;
730
731                 if (!trace_parser_loaded(&parser))
732                         break;
733
734                 ret = -EINVAL;
735                 if (kstrtoul(parser.buffer, 0, &val))
736                         break;
737
738                 pid = (pid_t)val;
739
740                 if (trace_pid_list_set(pid_list, pid) < 0) {
741                         ret = -1;
742                         break;
743                 }
744                 nr_pids++;
745
746                 trace_parser_clear(&parser);
747                 ret = 0;
748         }
749         trace_parser_put(&parser);
750
751         if (ret < 0) {
752                 trace_pid_list_free(pid_list);
753                 return ret;
754         }
755
756         if (!nr_pids) {
757                 /* Cleared the list of pids */
758                 trace_pid_list_free(pid_list);
759                 pid_list = NULL;
760         }
761
762         *new_pid_list = pid_list;
763
764         return read;
765 }
766
767 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
768 {
769         u64 ts;
770
771         /* Early boot up does not have a buffer yet */
772         if (!buf->buffer)
773                 return trace_clock_local();
774
775         ts = ring_buffer_time_stamp(buf->buffer);
776         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
777
778         return ts;
779 }
780
781 u64 ftrace_now(int cpu)
782 {
783         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
784 }
785
786 /**
787  * tracing_is_enabled - Show if global_trace has been enabled
788  *
789  * Shows if the global trace has been enabled or not. It uses the
790  * mirror flag "buffer_disabled" to be used in fast paths such as for
791  * the irqsoff tracer. But it may be inaccurate due to races. If you
792  * need to know the accurate state, use tracing_is_on() which is a little
793  * slower, but accurate.
794  */
795 int tracing_is_enabled(void)
796 {
797         /*
798          * For quick access (irqsoff uses this in fast path), just
799          * return the mirror variable of the state of the ring buffer.
800          * It's a little racy, but we don't really care.
801          */
802         smp_rmb();
803         return !global_trace.buffer_disabled;
804 }
805
806 /*
807  * trace_buf_size is the size in bytes that is allocated
808  * for a buffer. Note, the number of bytes is always rounded
809  * to page size.
810  *
811  * This number is purposely set to a low number of 16384.
812  * If the dump on oops happens, it will be much appreciated
813  * to not have to wait for all that output. Anyway this can be
814  * boot time and run time configurable.
815  */
816 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
817
818 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
819
820 /* trace_types holds a link list of available tracers. */
821 static struct tracer            *trace_types __read_mostly;
822
823 /*
824  * trace_types_lock is used to protect the trace_types list.
825  */
826 DEFINE_MUTEX(trace_types_lock);
827
828 /*
829  * serialize the access of the ring buffer
830  *
831  * ring buffer serializes readers, but it is low level protection.
832  * The validity of the events (which returns by ring_buffer_peek() ..etc)
833  * are not protected by ring buffer.
834  *
835  * The content of events may become garbage if we allow other process consumes
836  * these events concurrently:
837  *   A) the page of the consumed events may become a normal page
838  *      (not reader page) in ring buffer, and this page will be rewritten
839  *      by events producer.
840  *   B) The page of the consumed events may become a page for splice_read,
841  *      and this page will be returned to system.
842  *
843  * These primitives allow multi process access to different cpu ring buffer
844  * concurrently.
845  *
846  * These primitives don't distinguish read-only and read-consume access.
847  * Multi read-only access are also serialized.
848  */
849
850 #ifdef CONFIG_SMP
851 static DECLARE_RWSEM(all_cpu_access_lock);
852 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
853
854 static inline void trace_access_lock(int cpu)
855 {
856         if (cpu == RING_BUFFER_ALL_CPUS) {
857                 /* gain it for accessing the whole ring buffer. */
858                 down_write(&all_cpu_access_lock);
859         } else {
860                 /* gain it for accessing a cpu ring buffer. */
861
862                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
863                 down_read(&all_cpu_access_lock);
864
865                 /* Secondly block other access to this @cpu ring buffer. */
866                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
867         }
868 }
869
870 static inline void trace_access_unlock(int cpu)
871 {
872         if (cpu == RING_BUFFER_ALL_CPUS) {
873                 up_write(&all_cpu_access_lock);
874         } else {
875                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
876                 up_read(&all_cpu_access_lock);
877         }
878 }
879
880 static inline void trace_access_lock_init(void)
881 {
882         int cpu;
883
884         for_each_possible_cpu(cpu)
885                 mutex_init(&per_cpu(cpu_access_lock, cpu));
886 }
887
888 #else
889
890 static DEFINE_MUTEX(access_lock);
891
892 static inline void trace_access_lock(int cpu)
893 {
894         (void)cpu;
895         mutex_lock(&access_lock);
896 }
897
898 static inline void trace_access_unlock(int cpu)
899 {
900         (void)cpu;
901         mutex_unlock(&access_lock);
902 }
903
904 static inline void trace_access_lock_init(void)
905 {
906 }
907
908 #endif
909
910 #ifdef CONFIG_STACKTRACE
911 static void __ftrace_trace_stack(struct trace_buffer *buffer,
912                                  unsigned int trace_ctx,
913                                  int skip, struct pt_regs *regs);
914 static inline void ftrace_trace_stack(struct trace_array *tr,
915                                       struct trace_buffer *buffer,
916                                       unsigned int trace_ctx,
917                                       int skip, struct pt_regs *regs);
918
919 #else
920 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
921                                         unsigned int trace_ctx,
922                                         int skip, struct pt_regs *regs)
923 {
924 }
925 static inline void ftrace_trace_stack(struct trace_array *tr,
926                                       struct trace_buffer *buffer,
927                                       unsigned long trace_ctx,
928                                       int skip, struct pt_regs *regs)
929 {
930 }
931
932 #endif
933
934 static __always_inline void
935 trace_event_setup(struct ring_buffer_event *event,
936                   int type, unsigned int trace_ctx)
937 {
938         struct trace_entry *ent = ring_buffer_event_data(event);
939
940         tracing_generic_entry_update(ent, type, trace_ctx);
941 }
942
943 static __always_inline struct ring_buffer_event *
944 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
945                           int type,
946                           unsigned long len,
947                           unsigned int trace_ctx)
948 {
949         struct ring_buffer_event *event;
950
951         event = ring_buffer_lock_reserve(buffer, len);
952         if (event != NULL)
953                 trace_event_setup(event, type, trace_ctx);
954
955         return event;
956 }
957
958 void tracer_tracing_on(struct trace_array *tr)
959 {
960         if (tr->array_buffer.buffer)
961                 ring_buffer_record_on(tr->array_buffer.buffer);
962         /*
963          * This flag is looked at when buffers haven't been allocated
964          * yet, or by some tracers (like irqsoff), that just want to
965          * know if the ring buffer has been disabled, but it can handle
966          * races of where it gets disabled but we still do a record.
967          * As the check is in the fast path of the tracers, it is more
968          * important to be fast than accurate.
969          */
970         tr->buffer_disabled = 0;
971         /* Make the flag seen by readers */
972         smp_wmb();
973 }
974
975 /**
976  * tracing_on - enable tracing buffers
977  *
978  * This function enables tracing buffers that may have been
979  * disabled with tracing_off.
980  */
981 void tracing_on(void)
982 {
983         tracer_tracing_on(&global_trace);
984 }
985 EXPORT_SYMBOL_GPL(tracing_on);
986
987
988 static __always_inline void
989 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
990 {
991         __this_cpu_write(trace_taskinfo_save, true);
992
993         /* If this is the temp buffer, we need to commit fully */
994         if (this_cpu_read(trace_buffered_event) == event) {
995                 /* Length is in event->array[0] */
996                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
997                 /* Release the temp buffer */
998                 this_cpu_dec(trace_buffered_event_cnt);
999                 /* ring_buffer_unlock_commit() enables preemption */
1000                 preempt_enable_notrace();
1001         } else
1002                 ring_buffer_unlock_commit(buffer, event);
1003 }
1004
1005 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1006                        const char *str, int size)
1007 {
1008         struct ring_buffer_event *event;
1009         struct trace_buffer *buffer;
1010         struct print_entry *entry;
1011         unsigned int trace_ctx;
1012         int alloc;
1013
1014         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1015                 return 0;
1016
1017         if (unlikely(tracing_selftest_running || tracing_disabled))
1018                 return 0;
1019
1020         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1021
1022         trace_ctx = tracing_gen_ctx();
1023         buffer = tr->array_buffer.buffer;
1024         ring_buffer_nest_start(buffer);
1025         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1026                                             trace_ctx);
1027         if (!event) {
1028                 size = 0;
1029                 goto out;
1030         }
1031
1032         entry = ring_buffer_event_data(event);
1033         entry->ip = ip;
1034
1035         memcpy(&entry->buf, str, size);
1036
1037         /* Add a newline if necessary */
1038         if (entry->buf[size - 1] != '\n') {
1039                 entry->buf[size] = '\n';
1040                 entry->buf[size + 1] = '\0';
1041         } else
1042                 entry->buf[size] = '\0';
1043
1044         __buffer_unlock_commit(buffer, event);
1045         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1046  out:
1047         ring_buffer_nest_end(buffer);
1048         return size;
1049 }
1050 EXPORT_SYMBOL_GPL(__trace_array_puts);
1051
1052 /**
1053  * __trace_puts - write a constant string into the trace buffer.
1054  * @ip:    The address of the caller
1055  * @str:   The constant string to write
1056  * @size:  The size of the string.
1057  */
1058 int __trace_puts(unsigned long ip, const char *str, int size)
1059 {
1060         return __trace_array_puts(&global_trace, ip, str, size);
1061 }
1062 EXPORT_SYMBOL_GPL(__trace_puts);
1063
1064 /**
1065  * __trace_bputs - write the pointer to a constant string into trace buffer
1066  * @ip:    The address of the caller
1067  * @str:   The constant string to write to the buffer to
1068  */
1069 int __trace_bputs(unsigned long ip, const char *str)
1070 {
1071         struct ring_buffer_event *event;
1072         struct trace_buffer *buffer;
1073         struct bputs_entry *entry;
1074         unsigned int trace_ctx;
1075         int size = sizeof(struct bputs_entry);
1076         int ret = 0;
1077
1078         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1079                 return 0;
1080
1081         if (unlikely(tracing_selftest_running || tracing_disabled))
1082                 return 0;
1083
1084         trace_ctx = tracing_gen_ctx();
1085         buffer = global_trace.array_buffer.buffer;
1086
1087         ring_buffer_nest_start(buffer);
1088         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1089                                             trace_ctx);
1090         if (!event)
1091                 goto out;
1092
1093         entry = ring_buffer_event_data(event);
1094         entry->ip                       = ip;
1095         entry->str                      = str;
1096
1097         __buffer_unlock_commit(buffer, event);
1098         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1099
1100         ret = 1;
1101  out:
1102         ring_buffer_nest_end(buffer);
1103         return ret;
1104 }
1105 EXPORT_SYMBOL_GPL(__trace_bputs);
1106
1107 #ifdef CONFIG_TRACER_SNAPSHOT
1108 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1109                                            void *cond_data)
1110 {
1111         struct tracer *tracer = tr->current_trace;
1112         unsigned long flags;
1113
1114         if (in_nmi()) {
1115                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1116                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1117                 return;
1118         }
1119
1120         if (!tr->allocated_snapshot) {
1121                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1122                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1123                 tracer_tracing_off(tr);
1124                 return;
1125         }
1126
1127         /* Note, snapshot can not be used when the tracer uses it */
1128         if (tracer->use_max_tr) {
1129                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1130                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1131                 return;
1132         }
1133
1134         local_irq_save(flags);
1135         update_max_tr(tr, current, smp_processor_id(), cond_data);
1136         local_irq_restore(flags);
1137 }
1138
1139 void tracing_snapshot_instance(struct trace_array *tr)
1140 {
1141         tracing_snapshot_instance_cond(tr, NULL);
1142 }
1143
1144 /**
1145  * tracing_snapshot - take a snapshot of the current buffer.
1146  *
1147  * This causes a swap between the snapshot buffer and the current live
1148  * tracing buffer. You can use this to take snapshots of the live
1149  * trace when some condition is triggered, but continue to trace.
1150  *
1151  * Note, make sure to allocate the snapshot with either
1152  * a tracing_snapshot_alloc(), or by doing it manually
1153  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1154  *
1155  * If the snapshot buffer is not allocated, it will stop tracing.
1156  * Basically making a permanent snapshot.
1157  */
1158 void tracing_snapshot(void)
1159 {
1160         struct trace_array *tr = &global_trace;
1161
1162         tracing_snapshot_instance(tr);
1163 }
1164 EXPORT_SYMBOL_GPL(tracing_snapshot);
1165
1166 /**
1167  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1168  * @tr:         The tracing instance to snapshot
1169  * @cond_data:  The data to be tested conditionally, and possibly saved
1170  *
1171  * This is the same as tracing_snapshot() except that the snapshot is
1172  * conditional - the snapshot will only happen if the
1173  * cond_snapshot.update() implementation receiving the cond_data
1174  * returns true, which means that the trace array's cond_snapshot
1175  * update() operation used the cond_data to determine whether the
1176  * snapshot should be taken, and if it was, presumably saved it along
1177  * with the snapshot.
1178  */
1179 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1180 {
1181         tracing_snapshot_instance_cond(tr, cond_data);
1182 }
1183 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1184
1185 /**
1186  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1187  * @tr:         The tracing instance
1188  *
1189  * When the user enables a conditional snapshot using
1190  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1191  * with the snapshot.  This accessor is used to retrieve it.
1192  *
1193  * Should not be called from cond_snapshot.update(), since it takes
1194  * the tr->max_lock lock, which the code calling
1195  * cond_snapshot.update() has already done.
1196  *
1197  * Returns the cond_data associated with the trace array's snapshot.
1198  */
1199 void *tracing_cond_snapshot_data(struct trace_array *tr)
1200 {
1201         void *cond_data = NULL;
1202
1203         local_irq_disable();
1204         arch_spin_lock(&tr->max_lock);
1205
1206         if (tr->cond_snapshot)
1207                 cond_data = tr->cond_snapshot->cond_data;
1208
1209         arch_spin_unlock(&tr->max_lock);
1210         local_irq_enable();
1211
1212         return cond_data;
1213 }
1214 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1215
1216 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1217                                         struct array_buffer *size_buf, int cpu_id);
1218 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1219
1220 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1221 {
1222         int ret;
1223
1224         if (!tr->allocated_snapshot) {
1225
1226                 /* allocate spare buffer */
1227                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1228                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1229                 if (ret < 0)
1230                         return ret;
1231
1232                 tr->allocated_snapshot = true;
1233         }
1234
1235         return 0;
1236 }
1237
1238 static void free_snapshot(struct trace_array *tr)
1239 {
1240         /*
1241          * We don't free the ring buffer. instead, resize it because
1242          * The max_tr ring buffer has some state (e.g. ring->clock) and
1243          * we want preserve it.
1244          */
1245         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1246         set_buffer_entries(&tr->max_buffer, 1);
1247         tracing_reset_online_cpus(&tr->max_buffer);
1248         tr->allocated_snapshot = false;
1249 }
1250
1251 /**
1252  * tracing_alloc_snapshot - allocate snapshot buffer.
1253  *
1254  * This only allocates the snapshot buffer if it isn't already
1255  * allocated - it doesn't also take a snapshot.
1256  *
1257  * This is meant to be used in cases where the snapshot buffer needs
1258  * to be set up for events that can't sleep but need to be able to
1259  * trigger a snapshot.
1260  */
1261 int tracing_alloc_snapshot(void)
1262 {
1263         struct trace_array *tr = &global_trace;
1264         int ret;
1265
1266         ret = tracing_alloc_snapshot_instance(tr);
1267         WARN_ON(ret < 0);
1268
1269         return ret;
1270 }
1271 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1272
1273 /**
1274  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1275  *
1276  * This is similar to tracing_snapshot(), but it will allocate the
1277  * snapshot buffer if it isn't already allocated. Use this only
1278  * where it is safe to sleep, as the allocation may sleep.
1279  *
1280  * This causes a swap between the snapshot buffer and the current live
1281  * tracing buffer. You can use this to take snapshots of the live
1282  * trace when some condition is triggered, but continue to trace.
1283  */
1284 void tracing_snapshot_alloc(void)
1285 {
1286         int ret;
1287
1288         ret = tracing_alloc_snapshot();
1289         if (ret < 0)
1290                 return;
1291
1292         tracing_snapshot();
1293 }
1294 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1295
1296 /**
1297  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1298  * @tr:         The tracing instance
1299  * @cond_data:  User data to associate with the snapshot
1300  * @update:     Implementation of the cond_snapshot update function
1301  *
1302  * Check whether the conditional snapshot for the given instance has
1303  * already been enabled, or if the current tracer is already using a
1304  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1305  * save the cond_data and update function inside.
1306  *
1307  * Returns 0 if successful, error otherwise.
1308  */
1309 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1310                                  cond_update_fn_t update)
1311 {
1312         struct cond_snapshot *cond_snapshot;
1313         int ret = 0;
1314
1315         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1316         if (!cond_snapshot)
1317                 return -ENOMEM;
1318
1319         cond_snapshot->cond_data = cond_data;
1320         cond_snapshot->update = update;
1321
1322         mutex_lock(&trace_types_lock);
1323
1324         ret = tracing_alloc_snapshot_instance(tr);
1325         if (ret)
1326                 goto fail_unlock;
1327
1328         if (tr->current_trace->use_max_tr) {
1329                 ret = -EBUSY;
1330                 goto fail_unlock;
1331         }
1332
1333         /*
1334          * The cond_snapshot can only change to NULL without the
1335          * trace_types_lock. We don't care if we race with it going
1336          * to NULL, but we want to make sure that it's not set to
1337          * something other than NULL when we get here, which we can
1338          * do safely with only holding the trace_types_lock and not
1339          * having to take the max_lock.
1340          */
1341         if (tr->cond_snapshot) {
1342                 ret = -EBUSY;
1343                 goto fail_unlock;
1344         }
1345
1346         local_irq_disable();
1347         arch_spin_lock(&tr->max_lock);
1348         tr->cond_snapshot = cond_snapshot;
1349         arch_spin_unlock(&tr->max_lock);
1350         local_irq_enable();
1351
1352         mutex_unlock(&trace_types_lock);
1353
1354         return ret;
1355
1356  fail_unlock:
1357         mutex_unlock(&trace_types_lock);
1358         kfree(cond_snapshot);
1359         return ret;
1360 }
1361 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1362
1363 /**
1364  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1365  * @tr:         The tracing instance
1366  *
1367  * Check whether the conditional snapshot for the given instance is
1368  * enabled; if so, free the cond_snapshot associated with it,
1369  * otherwise return -EINVAL.
1370  *
1371  * Returns 0 if successful, error otherwise.
1372  */
1373 int tracing_snapshot_cond_disable(struct trace_array *tr)
1374 {
1375         int ret = 0;
1376
1377         local_irq_disable();
1378         arch_spin_lock(&tr->max_lock);
1379
1380         if (!tr->cond_snapshot)
1381                 ret = -EINVAL;
1382         else {
1383                 kfree(tr->cond_snapshot);
1384                 tr->cond_snapshot = NULL;
1385         }
1386
1387         arch_spin_unlock(&tr->max_lock);
1388         local_irq_enable();
1389
1390         return ret;
1391 }
1392 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1393 #else
1394 void tracing_snapshot(void)
1395 {
1396         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1397 }
1398 EXPORT_SYMBOL_GPL(tracing_snapshot);
1399 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1400 {
1401         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1402 }
1403 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1404 int tracing_alloc_snapshot(void)
1405 {
1406         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1407         return -ENODEV;
1408 }
1409 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1410 void tracing_snapshot_alloc(void)
1411 {
1412         /* Give warning */
1413         tracing_snapshot();
1414 }
1415 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1416 void *tracing_cond_snapshot_data(struct trace_array *tr)
1417 {
1418         return NULL;
1419 }
1420 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1421 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1422 {
1423         return -ENODEV;
1424 }
1425 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1426 int tracing_snapshot_cond_disable(struct trace_array *tr)
1427 {
1428         return false;
1429 }
1430 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1431 #define free_snapshot(tr)       do { } while (0)
1432 #endif /* CONFIG_TRACER_SNAPSHOT */
1433
1434 void tracer_tracing_off(struct trace_array *tr)
1435 {
1436         if (tr->array_buffer.buffer)
1437                 ring_buffer_record_off(tr->array_buffer.buffer);
1438         /*
1439          * This flag is looked at when buffers haven't been allocated
1440          * yet, or by some tracers (like irqsoff), that just want to
1441          * know if the ring buffer has been disabled, but it can handle
1442          * races of where it gets disabled but we still do a record.
1443          * As the check is in the fast path of the tracers, it is more
1444          * important to be fast than accurate.
1445          */
1446         tr->buffer_disabled = 1;
1447         /* Make the flag seen by readers */
1448         smp_wmb();
1449 }
1450
1451 /**
1452  * tracing_off - turn off tracing buffers
1453  *
1454  * This function stops the tracing buffers from recording data.
1455  * It does not disable any overhead the tracers themselves may
1456  * be causing. This function simply causes all recording to
1457  * the ring buffers to fail.
1458  */
1459 void tracing_off(void)
1460 {
1461         tracer_tracing_off(&global_trace);
1462 }
1463 EXPORT_SYMBOL_GPL(tracing_off);
1464
1465 void disable_trace_on_warning(void)
1466 {
1467         if (__disable_trace_on_warning) {
1468                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1469                         "Disabling tracing due to warning\n");
1470                 tracing_off();
1471         }
1472 }
1473
1474 /**
1475  * tracer_tracing_is_on - show real state of ring buffer enabled
1476  * @tr : the trace array to know if ring buffer is enabled
1477  *
1478  * Shows real state of the ring buffer if it is enabled or not.
1479  */
1480 bool tracer_tracing_is_on(struct trace_array *tr)
1481 {
1482         if (tr->array_buffer.buffer)
1483                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1484         return !tr->buffer_disabled;
1485 }
1486
1487 /**
1488  * tracing_is_on - show state of ring buffers enabled
1489  */
1490 int tracing_is_on(void)
1491 {
1492         return tracer_tracing_is_on(&global_trace);
1493 }
1494 EXPORT_SYMBOL_GPL(tracing_is_on);
1495
1496 static int __init set_buf_size(char *str)
1497 {
1498         unsigned long buf_size;
1499
1500         if (!str)
1501                 return 0;
1502         buf_size = memparse(str, &str);
1503         /*
1504          * nr_entries can not be zero and the startup
1505          * tests require some buffer space. Therefore
1506          * ensure we have at least 4096 bytes of buffer.
1507          */
1508         trace_buf_size = max(4096UL, buf_size);
1509         return 1;
1510 }
1511 __setup("trace_buf_size=", set_buf_size);
1512
1513 static int __init set_tracing_thresh(char *str)
1514 {
1515         unsigned long threshold;
1516         int ret;
1517
1518         if (!str)
1519                 return 0;
1520         ret = kstrtoul(str, 0, &threshold);
1521         if (ret < 0)
1522                 return 0;
1523         tracing_thresh = threshold * 1000;
1524         return 1;
1525 }
1526 __setup("tracing_thresh=", set_tracing_thresh);
1527
1528 unsigned long nsecs_to_usecs(unsigned long nsecs)
1529 {
1530         return nsecs / 1000;
1531 }
1532
1533 /*
1534  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1535  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1536  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1537  * of strings in the order that the evals (enum) were defined.
1538  */
1539 #undef C
1540 #define C(a, b) b
1541
1542 /* These must match the bit positions in trace_iterator_flags */
1543 static const char *trace_options[] = {
1544         TRACE_FLAGS
1545         NULL
1546 };
1547
1548 static struct {
1549         u64 (*func)(void);
1550         const char *name;
1551         int in_ns;              /* is this clock in nanoseconds? */
1552 } trace_clocks[] = {
1553         { trace_clock_local,            "local",        1 },
1554         { trace_clock_global,           "global",       1 },
1555         { trace_clock_counter,          "counter",      0 },
1556         { trace_clock_jiffies,          "uptime",       0 },
1557         { trace_clock,                  "perf",         1 },
1558         { ktime_get_mono_fast_ns,       "mono",         1 },
1559         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1560         { ktime_get_boot_fast_ns,       "boot",         1 },
1561         { ktime_get_tai_fast_ns,        "tai",          1 },
1562         ARCH_TRACE_CLOCKS
1563 };
1564
1565 bool trace_clock_in_ns(struct trace_array *tr)
1566 {
1567         if (trace_clocks[tr->clock_id].in_ns)
1568                 return true;
1569
1570         return false;
1571 }
1572
1573 /*
1574  * trace_parser_get_init - gets the buffer for trace parser
1575  */
1576 int trace_parser_get_init(struct trace_parser *parser, int size)
1577 {
1578         memset(parser, 0, sizeof(*parser));
1579
1580         parser->buffer = kmalloc(size, GFP_KERNEL);
1581         if (!parser->buffer)
1582                 return 1;
1583
1584         parser->size = size;
1585         return 0;
1586 }
1587
1588 /*
1589  * trace_parser_put - frees the buffer for trace parser
1590  */
1591 void trace_parser_put(struct trace_parser *parser)
1592 {
1593         kfree(parser->buffer);
1594         parser->buffer = NULL;
1595 }
1596
1597 /*
1598  * trace_get_user - reads the user input string separated by  space
1599  * (matched by isspace(ch))
1600  *
1601  * For each string found the 'struct trace_parser' is updated,
1602  * and the function returns.
1603  *
1604  * Returns number of bytes read.
1605  *
1606  * See kernel/trace/trace.h for 'struct trace_parser' details.
1607  */
1608 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1609         size_t cnt, loff_t *ppos)
1610 {
1611         char ch;
1612         size_t read = 0;
1613         ssize_t ret;
1614
1615         if (!*ppos)
1616                 trace_parser_clear(parser);
1617
1618         ret = get_user(ch, ubuf++);
1619         if (ret)
1620                 goto out;
1621
1622         read++;
1623         cnt--;
1624
1625         /*
1626          * The parser is not finished with the last write,
1627          * continue reading the user input without skipping spaces.
1628          */
1629         if (!parser->cont) {
1630                 /* skip white space */
1631                 while (cnt && isspace(ch)) {
1632                         ret = get_user(ch, ubuf++);
1633                         if (ret)
1634                                 goto out;
1635                         read++;
1636                         cnt--;
1637                 }
1638
1639                 parser->idx = 0;
1640
1641                 /* only spaces were written */
1642                 if (isspace(ch) || !ch) {
1643                         *ppos += read;
1644                         ret = read;
1645                         goto out;
1646                 }
1647         }
1648
1649         /* read the non-space input */
1650         while (cnt && !isspace(ch) && ch) {
1651                 if (parser->idx < parser->size - 1)
1652                         parser->buffer[parser->idx++] = ch;
1653                 else {
1654                         ret = -EINVAL;
1655                         goto out;
1656                 }
1657                 ret = get_user(ch, ubuf++);
1658                 if (ret)
1659                         goto out;
1660                 read++;
1661                 cnt--;
1662         }
1663
1664         /* We either got finished input or we have to wait for another call. */
1665         if (isspace(ch) || !ch) {
1666                 parser->buffer[parser->idx] = 0;
1667                 parser->cont = false;
1668         } else if (parser->idx < parser->size - 1) {
1669                 parser->cont = true;
1670                 parser->buffer[parser->idx++] = ch;
1671                 /* Make sure the parsed string always terminates with '\0'. */
1672                 parser->buffer[parser->idx] = 0;
1673         } else {
1674                 ret = -EINVAL;
1675                 goto out;
1676         }
1677
1678         *ppos += read;
1679         ret = read;
1680
1681 out:
1682         return ret;
1683 }
1684
1685 /* TODO add a seq_buf_to_buffer() */
1686 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1687 {
1688         int len;
1689
1690         if (trace_seq_used(s) <= s->seq.readpos)
1691                 return -EBUSY;
1692
1693         len = trace_seq_used(s) - s->seq.readpos;
1694         if (cnt > len)
1695                 cnt = len;
1696         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1697
1698         s->seq.readpos += cnt;
1699         return cnt;
1700 }
1701
1702 unsigned long __read_mostly     tracing_thresh;
1703
1704 #ifdef CONFIG_TRACER_MAX_TRACE
1705 static const struct file_operations tracing_max_lat_fops;
1706
1707 #ifdef LATENCY_FS_NOTIFY
1708
1709 static struct workqueue_struct *fsnotify_wq;
1710
1711 static void latency_fsnotify_workfn(struct work_struct *work)
1712 {
1713         struct trace_array *tr = container_of(work, struct trace_array,
1714                                               fsnotify_work);
1715         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1716 }
1717
1718 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1719 {
1720         struct trace_array *tr = container_of(iwork, struct trace_array,
1721                                               fsnotify_irqwork);
1722         queue_work(fsnotify_wq, &tr->fsnotify_work);
1723 }
1724
1725 static void trace_create_maxlat_file(struct trace_array *tr,
1726                                      struct dentry *d_tracer)
1727 {
1728         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1729         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1730         tr->d_max_latency = trace_create_file("tracing_max_latency",
1731                                               TRACE_MODE_WRITE,
1732                                               d_tracer, tr,
1733                                               &tracing_max_lat_fops);
1734 }
1735
1736 __init static int latency_fsnotify_init(void)
1737 {
1738         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1739                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1740         if (!fsnotify_wq) {
1741                 pr_err("Unable to allocate tr_max_lat_wq\n");
1742                 return -ENOMEM;
1743         }
1744         return 0;
1745 }
1746
1747 late_initcall_sync(latency_fsnotify_init);
1748
1749 void latency_fsnotify(struct trace_array *tr)
1750 {
1751         if (!fsnotify_wq)
1752                 return;
1753         /*
1754          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1755          * possible that we are called from __schedule() or do_idle(), which
1756          * could cause a deadlock.
1757          */
1758         irq_work_queue(&tr->fsnotify_irqwork);
1759 }
1760
1761 #else /* !LATENCY_FS_NOTIFY */
1762
1763 #define trace_create_maxlat_file(tr, d_tracer)                          \
1764         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1765                           d_tracer, tr, &tracing_max_lat_fops)
1766
1767 #endif
1768
1769 /*
1770  * Copy the new maximum trace into the separate maximum-trace
1771  * structure. (this way the maximum trace is permanently saved,
1772  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1773  */
1774 static void
1775 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1776 {
1777         struct array_buffer *trace_buf = &tr->array_buffer;
1778         struct array_buffer *max_buf = &tr->max_buffer;
1779         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1780         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1781
1782         max_buf->cpu = cpu;
1783         max_buf->time_start = data->preempt_timestamp;
1784
1785         max_data->saved_latency = tr->max_latency;
1786         max_data->critical_start = data->critical_start;
1787         max_data->critical_end = data->critical_end;
1788
1789         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1790         max_data->pid = tsk->pid;
1791         /*
1792          * If tsk == current, then use current_uid(), as that does not use
1793          * RCU. The irq tracer can be called out of RCU scope.
1794          */
1795         if (tsk == current)
1796                 max_data->uid = current_uid();
1797         else
1798                 max_data->uid = task_uid(tsk);
1799
1800         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1801         max_data->policy = tsk->policy;
1802         max_data->rt_priority = tsk->rt_priority;
1803
1804         /* record this tasks comm */
1805         tracing_record_cmdline(tsk);
1806         latency_fsnotify(tr);
1807 }
1808
1809 /**
1810  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1811  * @tr: tracer
1812  * @tsk: the task with the latency
1813  * @cpu: The cpu that initiated the trace.
1814  * @cond_data: User data associated with a conditional snapshot
1815  *
1816  * Flip the buffers between the @tr and the max_tr and record information
1817  * about which task was the cause of this latency.
1818  */
1819 void
1820 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1821               void *cond_data)
1822 {
1823         if (tr->stop_count)
1824                 return;
1825
1826         WARN_ON_ONCE(!irqs_disabled());
1827
1828         if (!tr->allocated_snapshot) {
1829                 /* Only the nop tracer should hit this when disabling */
1830                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1831                 return;
1832         }
1833
1834         arch_spin_lock(&tr->max_lock);
1835
1836         /* Inherit the recordable setting from array_buffer */
1837         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1838                 ring_buffer_record_on(tr->max_buffer.buffer);
1839         else
1840                 ring_buffer_record_off(tr->max_buffer.buffer);
1841
1842 #ifdef CONFIG_TRACER_SNAPSHOT
1843         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1844                 arch_spin_unlock(&tr->max_lock);
1845                 return;
1846         }
1847 #endif
1848         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1849
1850         __update_max_tr(tr, tsk, cpu);
1851
1852         arch_spin_unlock(&tr->max_lock);
1853 }
1854
1855 /**
1856  * update_max_tr_single - only copy one trace over, and reset the rest
1857  * @tr: tracer
1858  * @tsk: task with the latency
1859  * @cpu: the cpu of the buffer to copy.
1860  *
1861  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1862  */
1863 void
1864 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1865 {
1866         int ret;
1867
1868         if (tr->stop_count)
1869                 return;
1870
1871         WARN_ON_ONCE(!irqs_disabled());
1872         if (!tr->allocated_snapshot) {
1873                 /* Only the nop tracer should hit this when disabling */
1874                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1875                 return;
1876         }
1877
1878         arch_spin_lock(&tr->max_lock);
1879
1880         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1881
1882         if (ret == -EBUSY) {
1883                 /*
1884                  * We failed to swap the buffer due to a commit taking
1885                  * place on this CPU. We fail to record, but we reset
1886                  * the max trace buffer (no one writes directly to it)
1887                  * and flag that it failed.
1888                  * Another reason is resize is in progress.
1889                  */
1890                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1891                         "Failed to swap buffers due to commit or resize in progress\n");
1892         }
1893
1894         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1895
1896         __update_max_tr(tr, tsk, cpu);
1897         arch_spin_unlock(&tr->max_lock);
1898 }
1899
1900 #endif /* CONFIG_TRACER_MAX_TRACE */
1901
1902 static int wait_on_pipe(struct trace_iterator *iter, int full)
1903 {
1904         /* Iterators are static, they should be filled or empty */
1905         if (trace_buffer_iter(iter, iter->cpu_file))
1906                 return 0;
1907
1908         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1909                                 full);
1910 }
1911
1912 #ifdef CONFIG_FTRACE_STARTUP_TEST
1913 static bool selftests_can_run;
1914
1915 struct trace_selftests {
1916         struct list_head                list;
1917         struct tracer                   *type;
1918 };
1919
1920 static LIST_HEAD(postponed_selftests);
1921
1922 static int save_selftest(struct tracer *type)
1923 {
1924         struct trace_selftests *selftest;
1925
1926         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1927         if (!selftest)
1928                 return -ENOMEM;
1929
1930         selftest->type = type;
1931         list_add(&selftest->list, &postponed_selftests);
1932         return 0;
1933 }
1934
1935 static int run_tracer_selftest(struct tracer *type)
1936 {
1937         struct trace_array *tr = &global_trace;
1938         struct tracer *saved_tracer = tr->current_trace;
1939         int ret;
1940
1941         if (!type->selftest || tracing_selftest_disabled)
1942                 return 0;
1943
1944         /*
1945          * If a tracer registers early in boot up (before scheduling is
1946          * initialized and such), then do not run its selftests yet.
1947          * Instead, run it a little later in the boot process.
1948          */
1949         if (!selftests_can_run)
1950                 return save_selftest(type);
1951
1952         if (!tracing_is_on()) {
1953                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1954                         type->name);
1955                 return 0;
1956         }
1957
1958         /*
1959          * Run a selftest on this tracer.
1960          * Here we reset the trace buffer, and set the current
1961          * tracer to be this tracer. The tracer can then run some
1962          * internal tracing to verify that everything is in order.
1963          * If we fail, we do not register this tracer.
1964          */
1965         tracing_reset_online_cpus(&tr->array_buffer);
1966
1967         tr->current_trace = type;
1968
1969 #ifdef CONFIG_TRACER_MAX_TRACE
1970         if (type->use_max_tr) {
1971                 /* If we expanded the buffers, make sure the max is expanded too */
1972                 if (ring_buffer_expanded)
1973                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1974                                            RING_BUFFER_ALL_CPUS);
1975                 tr->allocated_snapshot = true;
1976         }
1977 #endif
1978
1979         /* the test is responsible for initializing and enabling */
1980         pr_info("Testing tracer %s: ", type->name);
1981         ret = type->selftest(type, tr);
1982         /* the test is responsible for resetting too */
1983         tr->current_trace = saved_tracer;
1984         if (ret) {
1985                 printk(KERN_CONT "FAILED!\n");
1986                 /* Add the warning after printing 'FAILED' */
1987                 WARN_ON(1);
1988                 return -1;
1989         }
1990         /* Only reset on passing, to avoid touching corrupted buffers */
1991         tracing_reset_online_cpus(&tr->array_buffer);
1992
1993 #ifdef CONFIG_TRACER_MAX_TRACE
1994         if (type->use_max_tr) {
1995                 tr->allocated_snapshot = false;
1996
1997                 /* Shrink the max buffer again */
1998                 if (ring_buffer_expanded)
1999                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2000                                            RING_BUFFER_ALL_CPUS);
2001         }
2002 #endif
2003
2004         printk(KERN_CONT "PASSED\n");
2005         return 0;
2006 }
2007
2008 static __init int init_trace_selftests(void)
2009 {
2010         struct trace_selftests *p, *n;
2011         struct tracer *t, **last;
2012         int ret;
2013
2014         selftests_can_run = true;
2015
2016         mutex_lock(&trace_types_lock);
2017
2018         if (list_empty(&postponed_selftests))
2019                 goto out;
2020
2021         pr_info("Running postponed tracer tests:\n");
2022
2023         tracing_selftest_running = true;
2024         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2025                 /* This loop can take minutes when sanitizers are enabled, so
2026                  * lets make sure we allow RCU processing.
2027                  */
2028                 cond_resched();
2029                 ret = run_tracer_selftest(p->type);
2030                 /* If the test fails, then warn and remove from available_tracers */
2031                 if (ret < 0) {
2032                         WARN(1, "tracer: %s failed selftest, disabling\n",
2033                              p->type->name);
2034                         last = &trace_types;
2035                         for (t = trace_types; t; t = t->next) {
2036                                 if (t == p->type) {
2037                                         *last = t->next;
2038                                         break;
2039                                 }
2040                                 last = &t->next;
2041                         }
2042                 }
2043                 list_del(&p->list);
2044                 kfree(p);
2045         }
2046         tracing_selftest_running = false;
2047
2048  out:
2049         mutex_unlock(&trace_types_lock);
2050
2051         return 0;
2052 }
2053 core_initcall(init_trace_selftests);
2054 #else
2055 static inline int run_tracer_selftest(struct tracer *type)
2056 {
2057         return 0;
2058 }
2059 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2060
2061 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2062
2063 static void __init apply_trace_boot_options(void);
2064
2065 /**
2066  * register_tracer - register a tracer with the ftrace system.
2067  * @type: the plugin for the tracer
2068  *
2069  * Register a new plugin tracer.
2070  */
2071 int __init register_tracer(struct tracer *type)
2072 {
2073         struct tracer *t;
2074         int ret = 0;
2075
2076         if (!type->name) {
2077                 pr_info("Tracer must have a name\n");
2078                 return -1;
2079         }
2080
2081         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2082                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2083                 return -1;
2084         }
2085
2086         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2087                 pr_warn("Can not register tracer %s due to lockdown\n",
2088                            type->name);
2089                 return -EPERM;
2090         }
2091
2092         mutex_lock(&trace_types_lock);
2093
2094         tracing_selftest_running = true;
2095
2096         for (t = trace_types; t; t = t->next) {
2097                 if (strcmp(type->name, t->name) == 0) {
2098                         /* already found */
2099                         pr_info("Tracer %s already registered\n",
2100                                 type->name);
2101                         ret = -1;
2102                         goto out;
2103                 }
2104         }
2105
2106         if (!type->set_flag)
2107                 type->set_flag = &dummy_set_flag;
2108         if (!type->flags) {
2109                 /*allocate a dummy tracer_flags*/
2110                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2111                 if (!type->flags) {
2112                         ret = -ENOMEM;
2113                         goto out;
2114                 }
2115                 type->flags->val = 0;
2116                 type->flags->opts = dummy_tracer_opt;
2117         } else
2118                 if (!type->flags->opts)
2119                         type->flags->opts = dummy_tracer_opt;
2120
2121         /* store the tracer for __set_tracer_option */
2122         type->flags->trace = type;
2123
2124         ret = run_tracer_selftest(type);
2125         if (ret < 0)
2126                 goto out;
2127
2128         type->next = trace_types;
2129         trace_types = type;
2130         add_tracer_options(&global_trace, type);
2131
2132  out:
2133         tracing_selftest_running = false;
2134         mutex_unlock(&trace_types_lock);
2135
2136         if (ret || !default_bootup_tracer)
2137                 goto out_unlock;
2138
2139         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2140                 goto out_unlock;
2141
2142         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2143         /* Do we want this tracer to start on bootup? */
2144         tracing_set_tracer(&global_trace, type->name);
2145         default_bootup_tracer = NULL;
2146
2147         apply_trace_boot_options();
2148
2149         /* disable other selftests, since this will break it. */
2150         disable_tracing_selftest("running a tracer");
2151
2152  out_unlock:
2153         return ret;
2154 }
2155
2156 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2157 {
2158         struct trace_buffer *buffer = buf->buffer;
2159
2160         if (!buffer)
2161                 return;
2162
2163         ring_buffer_record_disable(buffer);
2164
2165         /* Make sure all commits have finished */
2166         synchronize_rcu();
2167         ring_buffer_reset_cpu(buffer, cpu);
2168
2169         ring_buffer_record_enable(buffer);
2170 }
2171
2172 void tracing_reset_online_cpus(struct array_buffer *buf)
2173 {
2174         struct trace_buffer *buffer = buf->buffer;
2175
2176         if (!buffer)
2177                 return;
2178
2179         ring_buffer_record_disable(buffer);
2180
2181         /* Make sure all commits have finished */
2182         synchronize_rcu();
2183
2184         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2185
2186         ring_buffer_reset_online_cpus(buffer);
2187
2188         ring_buffer_record_enable(buffer);
2189 }
2190
2191 /* Must have trace_types_lock held */
2192 void tracing_reset_all_online_cpus_unlocked(void)
2193 {
2194         struct trace_array *tr;
2195
2196         lockdep_assert_held(&trace_types_lock);
2197
2198         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2199                 if (!tr->clear_trace)
2200                         continue;
2201                 tr->clear_trace = false;
2202                 tracing_reset_online_cpus(&tr->array_buffer);
2203 #ifdef CONFIG_TRACER_MAX_TRACE
2204                 tracing_reset_online_cpus(&tr->max_buffer);
2205 #endif
2206         }
2207 }
2208
2209 void tracing_reset_all_online_cpus(void)
2210 {
2211         mutex_lock(&trace_types_lock);
2212         tracing_reset_all_online_cpus_unlocked();
2213         mutex_unlock(&trace_types_lock);
2214 }
2215
2216 /*
2217  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2218  * is the tgid last observed corresponding to pid=i.
2219  */
2220 static int *tgid_map;
2221
2222 /* The maximum valid index into tgid_map. */
2223 static size_t tgid_map_max;
2224
2225 #define SAVED_CMDLINES_DEFAULT 128
2226 #define NO_CMDLINE_MAP UINT_MAX
2227 /*
2228  * Preemption must be disabled before acquiring trace_cmdline_lock.
2229  * The various trace_arrays' max_lock must be acquired in a context
2230  * where interrupt is disabled.
2231  */
2232 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2233 struct saved_cmdlines_buffer {
2234         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2235         unsigned *map_cmdline_to_pid;
2236         unsigned cmdline_num;
2237         int cmdline_idx;
2238         char *saved_cmdlines;
2239 };
2240 static struct saved_cmdlines_buffer *savedcmd;
2241
2242 static inline char *get_saved_cmdlines(int idx)
2243 {
2244         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2245 }
2246
2247 static inline void set_cmdline(int idx, const char *cmdline)
2248 {
2249         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2250 }
2251
2252 static int allocate_cmdlines_buffer(unsigned int val,
2253                                     struct saved_cmdlines_buffer *s)
2254 {
2255         s->map_cmdline_to_pid = kmalloc_array(val,
2256                                               sizeof(*s->map_cmdline_to_pid),
2257                                               GFP_KERNEL);
2258         if (!s->map_cmdline_to_pid)
2259                 return -ENOMEM;
2260
2261         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2262         if (!s->saved_cmdlines) {
2263                 kfree(s->map_cmdline_to_pid);
2264                 return -ENOMEM;
2265         }
2266
2267         s->cmdline_idx = 0;
2268         s->cmdline_num = val;
2269         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2270                sizeof(s->map_pid_to_cmdline));
2271         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2272                val * sizeof(*s->map_cmdline_to_pid));
2273
2274         return 0;
2275 }
2276
2277 static int trace_create_savedcmd(void)
2278 {
2279         int ret;
2280
2281         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2282         if (!savedcmd)
2283                 return -ENOMEM;
2284
2285         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2286         if (ret < 0) {
2287                 kfree(savedcmd);
2288                 savedcmd = NULL;
2289                 return -ENOMEM;
2290         }
2291
2292         return 0;
2293 }
2294
2295 int is_tracing_stopped(void)
2296 {
2297         return global_trace.stop_count;
2298 }
2299
2300 static void tracing_start_tr(struct trace_array *tr)
2301 {
2302         struct trace_buffer *buffer;
2303         unsigned long flags;
2304
2305         if (tracing_disabled)
2306                 return;
2307
2308         raw_spin_lock_irqsave(&tr->start_lock, flags);
2309         if (--tr->stop_count) {
2310                 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2311                         /* Someone screwed up their debugging */
2312                         tr->stop_count = 0;
2313                 }
2314                 goto out;
2315         }
2316
2317         /* Prevent the buffers from switching */
2318         arch_spin_lock(&tr->max_lock);
2319
2320         buffer = tr->array_buffer.buffer;
2321         if (buffer)
2322                 ring_buffer_record_enable(buffer);
2323
2324 #ifdef CONFIG_TRACER_MAX_TRACE
2325         buffer = tr->max_buffer.buffer;
2326         if (buffer)
2327                 ring_buffer_record_enable(buffer);
2328 #endif
2329
2330         arch_spin_unlock(&tr->max_lock);
2331
2332  out:
2333         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2334 }
2335
2336 /**
2337  * tracing_start - quick start of the tracer
2338  *
2339  * If tracing is enabled but was stopped by tracing_stop,
2340  * this will start the tracer back up.
2341  */
2342 void tracing_start(void)
2343
2344 {
2345         return tracing_start_tr(&global_trace);
2346 }
2347
2348 static void tracing_stop_tr(struct trace_array *tr)
2349 {
2350         struct trace_buffer *buffer;
2351         unsigned long flags;
2352
2353         raw_spin_lock_irqsave(&tr->start_lock, flags);
2354         if (tr->stop_count++)
2355                 goto out;
2356
2357         /* Prevent the buffers from switching */
2358         arch_spin_lock(&tr->max_lock);
2359
2360         buffer = tr->array_buffer.buffer;
2361         if (buffer)
2362                 ring_buffer_record_disable(buffer);
2363
2364 #ifdef CONFIG_TRACER_MAX_TRACE
2365         buffer = tr->max_buffer.buffer;
2366         if (buffer)
2367                 ring_buffer_record_disable(buffer);
2368 #endif
2369
2370         arch_spin_unlock(&tr->max_lock);
2371
2372  out:
2373         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2374 }
2375
2376 /**
2377  * tracing_stop - quick stop of the tracer
2378  *
2379  * Light weight way to stop tracing. Use in conjunction with
2380  * tracing_start.
2381  */
2382 void tracing_stop(void)
2383 {
2384         return tracing_stop_tr(&global_trace);
2385 }
2386
2387 static int trace_save_cmdline(struct task_struct *tsk)
2388 {
2389         unsigned tpid, idx;
2390
2391         /* treat recording of idle task as a success */
2392         if (!tsk->pid)
2393                 return 1;
2394
2395         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2396
2397         /*
2398          * It's not the end of the world if we don't get
2399          * the lock, but we also don't want to spin
2400          * nor do we want to disable interrupts,
2401          * so if we miss here, then better luck next time.
2402          *
2403          * This is called within the scheduler and wake up, so interrupts
2404          * had better been disabled and run queue lock been held.
2405          */
2406         lockdep_assert_preemption_disabled();
2407         if (!arch_spin_trylock(&trace_cmdline_lock))
2408                 return 0;
2409
2410         idx = savedcmd->map_pid_to_cmdline[tpid];
2411         if (idx == NO_CMDLINE_MAP) {
2412                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2413
2414                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2415                 savedcmd->cmdline_idx = idx;
2416         }
2417
2418         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2419         set_cmdline(idx, tsk->comm);
2420
2421         arch_spin_unlock(&trace_cmdline_lock);
2422
2423         return 1;
2424 }
2425
2426 static void __trace_find_cmdline(int pid, char comm[])
2427 {
2428         unsigned map;
2429         int tpid;
2430
2431         if (!pid) {
2432                 strcpy(comm, "<idle>");
2433                 return;
2434         }
2435
2436         if (WARN_ON_ONCE(pid < 0)) {
2437                 strcpy(comm, "<XXX>");
2438                 return;
2439         }
2440
2441         tpid = pid & (PID_MAX_DEFAULT - 1);
2442         map = savedcmd->map_pid_to_cmdline[tpid];
2443         if (map != NO_CMDLINE_MAP) {
2444                 tpid = savedcmd->map_cmdline_to_pid[map];
2445                 if (tpid == pid) {
2446                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2447                         return;
2448                 }
2449         }
2450         strcpy(comm, "<...>");
2451 }
2452
2453 void trace_find_cmdline(int pid, char comm[])
2454 {
2455         preempt_disable();
2456         arch_spin_lock(&trace_cmdline_lock);
2457
2458         __trace_find_cmdline(pid, comm);
2459
2460         arch_spin_unlock(&trace_cmdline_lock);
2461         preempt_enable();
2462 }
2463
2464 static int *trace_find_tgid_ptr(int pid)
2465 {
2466         /*
2467          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2468          * if we observe a non-NULL tgid_map then we also observe the correct
2469          * tgid_map_max.
2470          */
2471         int *map = smp_load_acquire(&tgid_map);
2472
2473         if (unlikely(!map || pid > tgid_map_max))
2474                 return NULL;
2475
2476         return &map[pid];
2477 }
2478
2479 int trace_find_tgid(int pid)
2480 {
2481         int *ptr = trace_find_tgid_ptr(pid);
2482
2483         return ptr ? *ptr : 0;
2484 }
2485
2486 static int trace_save_tgid(struct task_struct *tsk)
2487 {
2488         int *ptr;
2489
2490         /* treat recording of idle task as a success */
2491         if (!tsk->pid)
2492                 return 1;
2493
2494         ptr = trace_find_tgid_ptr(tsk->pid);
2495         if (!ptr)
2496                 return 0;
2497
2498         *ptr = tsk->tgid;
2499         return 1;
2500 }
2501
2502 static bool tracing_record_taskinfo_skip(int flags)
2503 {
2504         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2505                 return true;
2506         if (!__this_cpu_read(trace_taskinfo_save))
2507                 return true;
2508         return false;
2509 }
2510
2511 /**
2512  * tracing_record_taskinfo - record the task info of a task
2513  *
2514  * @task:  task to record
2515  * @flags: TRACE_RECORD_CMDLINE for recording comm
2516  *         TRACE_RECORD_TGID for recording tgid
2517  */
2518 void tracing_record_taskinfo(struct task_struct *task, int flags)
2519 {
2520         bool done;
2521
2522         if (tracing_record_taskinfo_skip(flags))
2523                 return;
2524
2525         /*
2526          * Record as much task information as possible. If some fail, continue
2527          * to try to record the others.
2528          */
2529         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2530         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2531
2532         /* If recording any information failed, retry again soon. */
2533         if (!done)
2534                 return;
2535
2536         __this_cpu_write(trace_taskinfo_save, false);
2537 }
2538
2539 /**
2540  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2541  *
2542  * @prev: previous task during sched_switch
2543  * @next: next task during sched_switch
2544  * @flags: TRACE_RECORD_CMDLINE for recording comm
2545  *         TRACE_RECORD_TGID for recording tgid
2546  */
2547 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2548                                           struct task_struct *next, int flags)
2549 {
2550         bool done;
2551
2552         if (tracing_record_taskinfo_skip(flags))
2553                 return;
2554
2555         /*
2556          * Record as much task information as possible. If some fail, continue
2557          * to try to record the others.
2558          */
2559         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2560         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2561         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2562         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2563
2564         /* If recording any information failed, retry again soon. */
2565         if (!done)
2566                 return;
2567
2568         __this_cpu_write(trace_taskinfo_save, false);
2569 }
2570
2571 /* Helpers to record a specific task information */
2572 void tracing_record_cmdline(struct task_struct *task)
2573 {
2574         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2575 }
2576
2577 void tracing_record_tgid(struct task_struct *task)
2578 {
2579         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2580 }
2581
2582 /*
2583  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2584  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2585  * simplifies those functions and keeps them in sync.
2586  */
2587 enum print_line_t trace_handle_return(struct trace_seq *s)
2588 {
2589         return trace_seq_has_overflowed(s) ?
2590                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2591 }
2592 EXPORT_SYMBOL_GPL(trace_handle_return);
2593
2594 static unsigned short migration_disable_value(void)
2595 {
2596 #if defined(CONFIG_SMP)
2597         return current->migration_disabled;
2598 #else
2599         return 0;
2600 #endif
2601 }
2602
2603 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2604 {
2605         unsigned int trace_flags = irqs_status;
2606         unsigned int pc;
2607
2608         pc = preempt_count();
2609
2610         if (pc & NMI_MASK)
2611                 trace_flags |= TRACE_FLAG_NMI;
2612         if (pc & HARDIRQ_MASK)
2613                 trace_flags |= TRACE_FLAG_HARDIRQ;
2614         if (in_serving_softirq())
2615                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2616         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2617                 trace_flags |= TRACE_FLAG_BH_OFF;
2618
2619         if (tif_need_resched())
2620                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2621         if (test_preempt_need_resched())
2622                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2623         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2624                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2625 }
2626
2627 struct ring_buffer_event *
2628 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2629                           int type,
2630                           unsigned long len,
2631                           unsigned int trace_ctx)
2632 {
2633         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2634 }
2635
2636 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2637 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2638 static int trace_buffered_event_ref;
2639
2640 /**
2641  * trace_buffered_event_enable - enable buffering events
2642  *
2643  * When events are being filtered, it is quicker to use a temporary
2644  * buffer to write the event data into if there's a likely chance
2645  * that it will not be committed. The discard of the ring buffer
2646  * is not as fast as committing, and is much slower than copying
2647  * a commit.
2648  *
2649  * When an event is to be filtered, allocate per cpu buffers to
2650  * write the event data into, and if the event is filtered and discarded
2651  * it is simply dropped, otherwise, the entire data is to be committed
2652  * in one shot.
2653  */
2654 void trace_buffered_event_enable(void)
2655 {
2656         struct ring_buffer_event *event;
2657         struct page *page;
2658         int cpu;
2659
2660         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2661
2662         if (trace_buffered_event_ref++)
2663                 return;
2664
2665         for_each_tracing_cpu(cpu) {
2666                 page = alloc_pages_node(cpu_to_node(cpu),
2667                                         GFP_KERNEL | __GFP_NORETRY, 0);
2668                 /* This is just an optimization and can handle failures */
2669                 if (!page) {
2670                         pr_err("Failed to allocate event buffer\n");
2671                         break;
2672                 }
2673
2674                 event = page_address(page);
2675                 memset(event, 0, sizeof(*event));
2676
2677                 per_cpu(trace_buffered_event, cpu) = event;
2678
2679                 preempt_disable();
2680                 if (cpu == smp_processor_id() &&
2681                     __this_cpu_read(trace_buffered_event) !=
2682                     per_cpu(trace_buffered_event, cpu))
2683                         WARN_ON_ONCE(1);
2684                 preempt_enable();
2685         }
2686 }
2687
2688 static void enable_trace_buffered_event(void *data)
2689 {
2690         /* Probably not needed, but do it anyway */
2691         smp_rmb();
2692         this_cpu_dec(trace_buffered_event_cnt);
2693 }
2694
2695 static void disable_trace_buffered_event(void *data)
2696 {
2697         this_cpu_inc(trace_buffered_event_cnt);
2698 }
2699
2700 /**
2701  * trace_buffered_event_disable - disable buffering events
2702  *
2703  * When a filter is removed, it is faster to not use the buffered
2704  * events, and to commit directly into the ring buffer. Free up
2705  * the temp buffers when there are no more users. This requires
2706  * special synchronization with current events.
2707  */
2708 void trace_buffered_event_disable(void)
2709 {
2710         int cpu;
2711
2712         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2713
2714         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2715                 return;
2716
2717         if (--trace_buffered_event_ref)
2718                 return;
2719
2720         preempt_disable();
2721         /* For each CPU, set the buffer as used. */
2722         smp_call_function_many(tracing_buffer_mask,
2723                                disable_trace_buffered_event, NULL, 1);
2724         preempt_enable();
2725
2726         /* Wait for all current users to finish */
2727         synchronize_rcu();
2728
2729         for_each_tracing_cpu(cpu) {
2730                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2731                 per_cpu(trace_buffered_event, cpu) = NULL;
2732         }
2733         /*
2734          * Make sure trace_buffered_event is NULL before clearing
2735          * trace_buffered_event_cnt.
2736          */
2737         smp_wmb();
2738
2739         preempt_disable();
2740         /* Do the work on each cpu */
2741         smp_call_function_many(tracing_buffer_mask,
2742                                enable_trace_buffered_event, NULL, 1);
2743         preempt_enable();
2744 }
2745
2746 static struct trace_buffer *temp_buffer;
2747
2748 struct ring_buffer_event *
2749 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2750                           struct trace_event_file *trace_file,
2751                           int type, unsigned long len,
2752                           unsigned int trace_ctx)
2753 {
2754         struct ring_buffer_event *entry;
2755         struct trace_array *tr = trace_file->tr;
2756         int val;
2757
2758         *current_rb = tr->array_buffer.buffer;
2759
2760         if (!tr->no_filter_buffering_ref &&
2761             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2762                 preempt_disable_notrace();
2763                 /*
2764                  * Filtering is on, so try to use the per cpu buffer first.
2765                  * This buffer will simulate a ring_buffer_event,
2766                  * where the type_len is zero and the array[0] will
2767                  * hold the full length.
2768                  * (see include/linux/ring-buffer.h for details on
2769                  *  how the ring_buffer_event is structured).
2770                  *
2771                  * Using a temp buffer during filtering and copying it
2772                  * on a matched filter is quicker than writing directly
2773                  * into the ring buffer and then discarding it when
2774                  * it doesn't match. That is because the discard
2775                  * requires several atomic operations to get right.
2776                  * Copying on match and doing nothing on a failed match
2777                  * is still quicker than no copy on match, but having
2778                  * to discard out of the ring buffer on a failed match.
2779                  */
2780                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2781                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2782
2783                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2784
2785                         /*
2786                          * Preemption is disabled, but interrupts and NMIs
2787                          * can still come in now. If that happens after
2788                          * the above increment, then it will have to go
2789                          * back to the old method of allocating the event
2790                          * on the ring buffer, and if the filter fails, it
2791                          * will have to call ring_buffer_discard_commit()
2792                          * to remove it.
2793                          *
2794                          * Need to also check the unlikely case that the
2795                          * length is bigger than the temp buffer size.
2796                          * If that happens, then the reserve is pretty much
2797                          * guaranteed to fail, as the ring buffer currently
2798                          * only allows events less than a page. But that may
2799                          * change in the future, so let the ring buffer reserve
2800                          * handle the failure in that case.
2801                          */
2802                         if (val == 1 && likely(len <= max_len)) {
2803                                 trace_event_setup(entry, type, trace_ctx);
2804                                 entry->array[0] = len;
2805                                 /* Return with preemption disabled */
2806                                 return entry;
2807                         }
2808                         this_cpu_dec(trace_buffered_event_cnt);
2809                 }
2810                 /* __trace_buffer_lock_reserve() disables preemption */
2811                 preempt_enable_notrace();
2812         }
2813
2814         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2815                                             trace_ctx);
2816         /*
2817          * If tracing is off, but we have triggers enabled
2818          * we still need to look at the event data. Use the temp_buffer
2819          * to store the trace event for the trigger to use. It's recursive
2820          * safe and will not be recorded anywhere.
2821          */
2822         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2823                 *current_rb = temp_buffer;
2824                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2825                                                     trace_ctx);
2826         }
2827         return entry;
2828 }
2829 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2830
2831 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2832 static DEFINE_MUTEX(tracepoint_printk_mutex);
2833
2834 static void output_printk(struct trace_event_buffer *fbuffer)
2835 {
2836         struct trace_event_call *event_call;
2837         struct trace_event_file *file;
2838         struct trace_event *event;
2839         unsigned long flags;
2840         struct trace_iterator *iter = tracepoint_print_iter;
2841
2842         /* We should never get here if iter is NULL */
2843         if (WARN_ON_ONCE(!iter))
2844                 return;
2845
2846         event_call = fbuffer->trace_file->event_call;
2847         if (!event_call || !event_call->event.funcs ||
2848             !event_call->event.funcs->trace)
2849                 return;
2850
2851         file = fbuffer->trace_file;
2852         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2853             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2854              !filter_match_preds(file->filter, fbuffer->entry)))
2855                 return;
2856
2857         event = &fbuffer->trace_file->event_call->event;
2858
2859         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2860         trace_seq_init(&iter->seq);
2861         iter->ent = fbuffer->entry;
2862         event_call->event.funcs->trace(iter, 0, event);
2863         trace_seq_putc(&iter->seq, 0);
2864         printk("%s", iter->seq.buffer);
2865
2866         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2867 }
2868
2869 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2870                              void *buffer, size_t *lenp,
2871                              loff_t *ppos)
2872 {
2873         int save_tracepoint_printk;
2874         int ret;
2875
2876         mutex_lock(&tracepoint_printk_mutex);
2877         save_tracepoint_printk = tracepoint_printk;
2878
2879         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2880
2881         /*
2882          * This will force exiting early, as tracepoint_printk
2883          * is always zero when tracepoint_printk_iter is not allocated
2884          */
2885         if (!tracepoint_print_iter)
2886                 tracepoint_printk = 0;
2887
2888         if (save_tracepoint_printk == tracepoint_printk)
2889                 goto out;
2890
2891         if (tracepoint_printk)
2892                 static_key_enable(&tracepoint_printk_key.key);
2893         else
2894                 static_key_disable(&tracepoint_printk_key.key);
2895
2896  out:
2897         mutex_unlock(&tracepoint_printk_mutex);
2898
2899         return ret;
2900 }
2901
2902 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2903 {
2904         enum event_trigger_type tt = ETT_NONE;
2905         struct trace_event_file *file = fbuffer->trace_file;
2906
2907         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2908                         fbuffer->entry, &tt))
2909                 goto discard;
2910
2911         if (static_key_false(&tracepoint_printk_key.key))
2912                 output_printk(fbuffer);
2913
2914         if (static_branch_unlikely(&trace_event_exports_enabled))
2915                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2916
2917         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2918                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2919
2920 discard:
2921         if (tt)
2922                 event_triggers_post_call(file, tt);
2923
2924 }
2925 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2926
2927 /*
2928  * Skip 3:
2929  *
2930  *   trace_buffer_unlock_commit_regs()
2931  *   trace_event_buffer_commit()
2932  *   trace_event_raw_event_xxx()
2933  */
2934 # define STACK_SKIP 3
2935
2936 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2937                                      struct trace_buffer *buffer,
2938                                      struct ring_buffer_event *event,
2939                                      unsigned int trace_ctx,
2940                                      struct pt_regs *regs)
2941 {
2942         __buffer_unlock_commit(buffer, event);
2943
2944         /*
2945          * If regs is not set, then skip the necessary functions.
2946          * Note, we can still get here via blktrace, wakeup tracer
2947          * and mmiotrace, but that's ok if they lose a function or
2948          * two. They are not that meaningful.
2949          */
2950         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2951         ftrace_trace_userstack(tr, buffer, trace_ctx);
2952 }
2953
2954 /*
2955  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2956  */
2957 void
2958 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2959                                    struct ring_buffer_event *event)
2960 {
2961         __buffer_unlock_commit(buffer, event);
2962 }
2963
2964 void
2965 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2966                parent_ip, unsigned int trace_ctx)
2967 {
2968         struct trace_event_call *call = &event_function;
2969         struct trace_buffer *buffer = tr->array_buffer.buffer;
2970         struct ring_buffer_event *event;
2971         struct ftrace_entry *entry;
2972
2973         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2974                                             trace_ctx);
2975         if (!event)
2976                 return;
2977         entry   = ring_buffer_event_data(event);
2978         entry->ip                       = ip;
2979         entry->parent_ip                = parent_ip;
2980
2981         if (!call_filter_check_discard(call, entry, buffer, event)) {
2982                 if (static_branch_unlikely(&trace_function_exports_enabled))
2983                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2984                 __buffer_unlock_commit(buffer, event);
2985         }
2986 }
2987
2988 #ifdef CONFIG_STACKTRACE
2989
2990 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2991 #define FTRACE_KSTACK_NESTING   4
2992
2993 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2994
2995 struct ftrace_stack {
2996         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2997 };
2998
2999
3000 struct ftrace_stacks {
3001         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3002 };
3003
3004 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3005 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3006
3007 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3008                                  unsigned int trace_ctx,
3009                                  int skip, struct pt_regs *regs)
3010 {
3011         struct trace_event_call *call = &event_kernel_stack;
3012         struct ring_buffer_event *event;
3013         unsigned int size, nr_entries;
3014         struct ftrace_stack *fstack;
3015         struct stack_entry *entry;
3016         int stackidx;
3017
3018         /*
3019          * Add one, for this function and the call to save_stack_trace()
3020          * If regs is set, then these functions will not be in the way.
3021          */
3022 #ifndef CONFIG_UNWINDER_ORC
3023         if (!regs)
3024                 skip++;
3025 #endif
3026
3027         preempt_disable_notrace();
3028
3029         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3030
3031         /* This should never happen. If it does, yell once and skip */
3032         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3033                 goto out;
3034
3035         /*
3036          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3037          * interrupt will either see the value pre increment or post
3038          * increment. If the interrupt happens pre increment it will have
3039          * restored the counter when it returns.  We just need a barrier to
3040          * keep gcc from moving things around.
3041          */
3042         barrier();
3043
3044         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3045         size = ARRAY_SIZE(fstack->calls);
3046
3047         if (regs) {
3048                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3049                                                    size, skip);
3050         } else {
3051                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3052         }
3053
3054         size = nr_entries * sizeof(unsigned long);
3055         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3056                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3057                                     trace_ctx);
3058         if (!event)
3059                 goto out;
3060         entry = ring_buffer_event_data(event);
3061
3062         memcpy(&entry->caller, fstack->calls, size);
3063         entry->size = nr_entries;
3064
3065         if (!call_filter_check_discard(call, entry, buffer, event))
3066                 __buffer_unlock_commit(buffer, event);
3067
3068  out:
3069         /* Again, don't let gcc optimize things here */
3070         barrier();
3071         __this_cpu_dec(ftrace_stack_reserve);
3072         preempt_enable_notrace();
3073
3074 }
3075
3076 static inline void ftrace_trace_stack(struct trace_array *tr,
3077                                       struct trace_buffer *buffer,
3078                                       unsigned int trace_ctx,
3079                                       int skip, struct pt_regs *regs)
3080 {
3081         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3082                 return;
3083
3084         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3085 }
3086
3087 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3088                    int skip)
3089 {
3090         struct trace_buffer *buffer = tr->array_buffer.buffer;
3091
3092         if (rcu_is_watching()) {
3093                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3094                 return;
3095         }
3096
3097         /*
3098          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3099          * but if the above rcu_is_watching() failed, then the NMI
3100          * triggered someplace critical, and ct_irq_enter() should
3101          * not be called from NMI.
3102          */
3103         if (unlikely(in_nmi()))
3104                 return;
3105
3106         ct_irq_enter_irqson();
3107         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3108         ct_irq_exit_irqson();
3109 }
3110
3111 /**
3112  * trace_dump_stack - record a stack back trace in the trace buffer
3113  * @skip: Number of functions to skip (helper handlers)
3114  */
3115 void trace_dump_stack(int skip)
3116 {
3117         if (tracing_disabled || tracing_selftest_running)
3118                 return;
3119
3120 #ifndef CONFIG_UNWINDER_ORC
3121         /* Skip 1 to skip this function. */
3122         skip++;
3123 #endif
3124         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3125                              tracing_gen_ctx(), skip, NULL);
3126 }
3127 EXPORT_SYMBOL_GPL(trace_dump_stack);
3128
3129 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3130 static DEFINE_PER_CPU(int, user_stack_count);
3131
3132 static void
3133 ftrace_trace_userstack(struct trace_array *tr,
3134                        struct trace_buffer *buffer, unsigned int trace_ctx)
3135 {
3136         struct trace_event_call *call = &event_user_stack;
3137         struct ring_buffer_event *event;
3138         struct userstack_entry *entry;
3139
3140         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3141                 return;
3142
3143         /*
3144          * NMIs can not handle page faults, even with fix ups.
3145          * The save user stack can (and often does) fault.
3146          */
3147         if (unlikely(in_nmi()))
3148                 return;
3149
3150         /*
3151          * prevent recursion, since the user stack tracing may
3152          * trigger other kernel events.
3153          */
3154         preempt_disable();
3155         if (__this_cpu_read(user_stack_count))
3156                 goto out;
3157
3158         __this_cpu_inc(user_stack_count);
3159
3160         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3161                                             sizeof(*entry), trace_ctx);
3162         if (!event)
3163                 goto out_drop_count;
3164         entry   = ring_buffer_event_data(event);
3165
3166         entry->tgid             = current->tgid;
3167         memset(&entry->caller, 0, sizeof(entry->caller));
3168
3169         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3170         if (!call_filter_check_discard(call, entry, buffer, event))
3171                 __buffer_unlock_commit(buffer, event);
3172
3173  out_drop_count:
3174         __this_cpu_dec(user_stack_count);
3175  out:
3176         preempt_enable();
3177 }
3178 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3179 static void ftrace_trace_userstack(struct trace_array *tr,
3180                                    struct trace_buffer *buffer,
3181                                    unsigned int trace_ctx)
3182 {
3183 }
3184 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3185
3186 #endif /* CONFIG_STACKTRACE */
3187
3188 static inline void
3189 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3190                           unsigned long long delta)
3191 {
3192         entry->bottom_delta_ts = delta & U32_MAX;
3193         entry->top_delta_ts = (delta >> 32);
3194 }
3195
3196 void trace_last_func_repeats(struct trace_array *tr,
3197                              struct trace_func_repeats *last_info,
3198                              unsigned int trace_ctx)
3199 {
3200         struct trace_buffer *buffer = tr->array_buffer.buffer;
3201         struct func_repeats_entry *entry;
3202         struct ring_buffer_event *event;
3203         u64 delta;
3204
3205         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3206                                             sizeof(*entry), trace_ctx);
3207         if (!event)
3208                 return;
3209
3210         delta = ring_buffer_event_time_stamp(buffer, event) -
3211                 last_info->ts_last_call;
3212
3213         entry = ring_buffer_event_data(event);
3214         entry->ip = last_info->ip;
3215         entry->parent_ip = last_info->parent_ip;
3216         entry->count = last_info->count;
3217         func_repeats_set_delta_ts(entry, delta);
3218
3219         __buffer_unlock_commit(buffer, event);
3220 }
3221
3222 /* created for use with alloc_percpu */
3223 struct trace_buffer_struct {
3224         int nesting;
3225         char buffer[4][TRACE_BUF_SIZE];
3226 };
3227
3228 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3229
3230 /*
3231  * This allows for lockless recording.  If we're nested too deeply, then
3232  * this returns NULL.
3233  */
3234 static char *get_trace_buf(void)
3235 {
3236         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3237
3238         if (!trace_percpu_buffer || buffer->nesting >= 4)
3239                 return NULL;
3240
3241         buffer->nesting++;
3242
3243         /* Interrupts must see nesting incremented before we use the buffer */
3244         barrier();
3245         return &buffer->buffer[buffer->nesting - 1][0];
3246 }
3247
3248 static void put_trace_buf(void)
3249 {
3250         /* Don't let the decrement of nesting leak before this */
3251         barrier();
3252         this_cpu_dec(trace_percpu_buffer->nesting);
3253 }
3254
3255 static int alloc_percpu_trace_buffer(void)
3256 {
3257         struct trace_buffer_struct __percpu *buffers;
3258
3259         if (trace_percpu_buffer)
3260                 return 0;
3261
3262         buffers = alloc_percpu(struct trace_buffer_struct);
3263         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3264                 return -ENOMEM;
3265
3266         trace_percpu_buffer = buffers;
3267         return 0;
3268 }
3269
3270 static int buffers_allocated;
3271
3272 void trace_printk_init_buffers(void)
3273 {
3274         if (buffers_allocated)
3275                 return;
3276
3277         if (alloc_percpu_trace_buffer())
3278                 return;
3279
3280         /* trace_printk() is for debug use only. Don't use it in production. */
3281
3282         pr_warn("\n");
3283         pr_warn("**********************************************************\n");
3284         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3285         pr_warn("**                                                      **\n");
3286         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3287         pr_warn("**                                                      **\n");
3288         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3289         pr_warn("** unsafe for production use.                           **\n");
3290         pr_warn("**                                                      **\n");
3291         pr_warn("** If you see this message and you are not debugging    **\n");
3292         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3293         pr_warn("**                                                      **\n");
3294         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3295         pr_warn("**********************************************************\n");
3296
3297         /* Expand the buffers to set size */
3298         tracing_update_buffers();
3299
3300         buffers_allocated = 1;
3301
3302         /*
3303          * trace_printk_init_buffers() can be called by modules.
3304          * If that happens, then we need to start cmdline recording
3305          * directly here. If the global_trace.buffer is already
3306          * allocated here, then this was called by module code.
3307          */
3308         if (global_trace.array_buffer.buffer)
3309                 tracing_start_cmdline_record();
3310 }
3311 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3312
3313 void trace_printk_start_comm(void)
3314 {
3315         /* Start tracing comms if trace printk is set */
3316         if (!buffers_allocated)
3317                 return;
3318         tracing_start_cmdline_record();
3319 }
3320
3321 static void trace_printk_start_stop_comm(int enabled)
3322 {
3323         if (!buffers_allocated)
3324                 return;
3325
3326         if (enabled)
3327                 tracing_start_cmdline_record();
3328         else
3329                 tracing_stop_cmdline_record();
3330 }
3331
3332 /**
3333  * trace_vbprintk - write binary msg to tracing buffer
3334  * @ip:    The address of the caller
3335  * @fmt:   The string format to write to the buffer
3336  * @args:  Arguments for @fmt
3337  */
3338 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3339 {
3340         struct trace_event_call *call = &event_bprint;
3341         struct ring_buffer_event *event;
3342         struct trace_buffer *buffer;
3343         struct trace_array *tr = &global_trace;
3344         struct bprint_entry *entry;
3345         unsigned int trace_ctx;
3346         char *tbuffer;
3347         int len = 0, size;
3348
3349         if (unlikely(tracing_selftest_running || tracing_disabled))
3350                 return 0;
3351
3352         /* Don't pollute graph traces with trace_vprintk internals */
3353         pause_graph_tracing();
3354
3355         trace_ctx = tracing_gen_ctx();
3356         preempt_disable_notrace();
3357
3358         tbuffer = get_trace_buf();
3359         if (!tbuffer) {
3360                 len = 0;
3361                 goto out_nobuffer;
3362         }
3363
3364         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3365
3366         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3367                 goto out_put;
3368
3369         size = sizeof(*entry) + sizeof(u32) * len;
3370         buffer = tr->array_buffer.buffer;
3371         ring_buffer_nest_start(buffer);
3372         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3373                                             trace_ctx);
3374         if (!event)
3375                 goto out;
3376         entry = ring_buffer_event_data(event);
3377         entry->ip                       = ip;
3378         entry->fmt                      = fmt;
3379
3380         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3381         if (!call_filter_check_discard(call, entry, buffer, event)) {
3382                 __buffer_unlock_commit(buffer, event);
3383                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3384         }
3385
3386 out:
3387         ring_buffer_nest_end(buffer);
3388 out_put:
3389         put_trace_buf();
3390
3391 out_nobuffer:
3392         preempt_enable_notrace();
3393         unpause_graph_tracing();
3394
3395         return len;
3396 }
3397 EXPORT_SYMBOL_GPL(trace_vbprintk);
3398
3399 __printf(3, 0)
3400 static int
3401 __trace_array_vprintk(struct trace_buffer *buffer,
3402                       unsigned long ip, const char *fmt, va_list args)
3403 {
3404         struct trace_event_call *call = &event_print;
3405         struct ring_buffer_event *event;
3406         int len = 0, size;
3407         struct print_entry *entry;
3408         unsigned int trace_ctx;
3409         char *tbuffer;
3410
3411         if (tracing_disabled || tracing_selftest_running)
3412                 return 0;
3413
3414         /* Don't pollute graph traces with trace_vprintk internals */
3415         pause_graph_tracing();
3416
3417         trace_ctx = tracing_gen_ctx();
3418         preempt_disable_notrace();
3419
3420
3421         tbuffer = get_trace_buf();
3422         if (!tbuffer) {
3423                 len = 0;
3424                 goto out_nobuffer;
3425         }
3426
3427         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3428
3429         size = sizeof(*entry) + len + 1;
3430         ring_buffer_nest_start(buffer);
3431         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3432                                             trace_ctx);
3433         if (!event)
3434                 goto out;
3435         entry = ring_buffer_event_data(event);
3436         entry->ip = ip;
3437
3438         memcpy(&entry->buf, tbuffer, len + 1);
3439         if (!call_filter_check_discard(call, entry, buffer, event)) {
3440                 __buffer_unlock_commit(buffer, event);
3441                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3442         }
3443
3444 out:
3445         ring_buffer_nest_end(buffer);
3446         put_trace_buf();
3447
3448 out_nobuffer:
3449         preempt_enable_notrace();
3450         unpause_graph_tracing();
3451
3452         return len;
3453 }
3454
3455 __printf(3, 0)
3456 int trace_array_vprintk(struct trace_array *tr,
3457                         unsigned long ip, const char *fmt, va_list args)
3458 {
3459         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3460 }
3461
3462 /**
3463  * trace_array_printk - Print a message to a specific instance
3464  * @tr: The instance trace_array descriptor
3465  * @ip: The instruction pointer that this is called from.
3466  * @fmt: The format to print (printf format)
3467  *
3468  * If a subsystem sets up its own instance, they have the right to
3469  * printk strings into their tracing instance buffer using this
3470  * function. Note, this function will not write into the top level
3471  * buffer (use trace_printk() for that), as writing into the top level
3472  * buffer should only have events that can be individually disabled.
3473  * trace_printk() is only used for debugging a kernel, and should not
3474  * be ever incorporated in normal use.
3475  *
3476  * trace_array_printk() can be used, as it will not add noise to the
3477  * top level tracing buffer.
3478  *
3479  * Note, trace_array_init_printk() must be called on @tr before this
3480  * can be used.
3481  */
3482 __printf(3, 0)
3483 int trace_array_printk(struct trace_array *tr,
3484                        unsigned long ip, const char *fmt, ...)
3485 {
3486         int ret;
3487         va_list ap;
3488
3489         if (!tr)
3490                 return -ENOENT;
3491
3492         /* This is only allowed for created instances */
3493         if (tr == &global_trace)
3494                 return 0;
3495
3496         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3497                 return 0;
3498
3499         va_start(ap, fmt);
3500         ret = trace_array_vprintk(tr, ip, fmt, ap);
3501         va_end(ap);
3502         return ret;
3503 }
3504 EXPORT_SYMBOL_GPL(trace_array_printk);
3505
3506 /**
3507  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3508  * @tr: The trace array to initialize the buffers for
3509  *
3510  * As trace_array_printk() only writes into instances, they are OK to
3511  * have in the kernel (unlike trace_printk()). This needs to be called
3512  * before trace_array_printk() can be used on a trace_array.
3513  */
3514 int trace_array_init_printk(struct trace_array *tr)
3515 {
3516         if (!tr)
3517                 return -ENOENT;
3518
3519         /* This is only allowed for created instances */
3520         if (tr == &global_trace)
3521                 return -EINVAL;
3522
3523         return alloc_percpu_trace_buffer();
3524 }
3525 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3526
3527 __printf(3, 4)
3528 int trace_array_printk_buf(struct trace_buffer *buffer,
3529                            unsigned long ip, const char *fmt, ...)
3530 {
3531         int ret;
3532         va_list ap;
3533
3534         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3535                 return 0;
3536
3537         va_start(ap, fmt);
3538         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3539         va_end(ap);
3540         return ret;
3541 }
3542
3543 __printf(2, 0)
3544 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3545 {
3546         return trace_array_vprintk(&global_trace, ip, fmt, args);
3547 }
3548 EXPORT_SYMBOL_GPL(trace_vprintk);
3549
3550 static void trace_iterator_increment(struct trace_iterator *iter)
3551 {
3552         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3553
3554         iter->idx++;
3555         if (buf_iter)
3556                 ring_buffer_iter_advance(buf_iter);
3557 }
3558
3559 static struct trace_entry *
3560 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3561                 unsigned long *lost_events)
3562 {
3563         struct ring_buffer_event *event;
3564         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3565
3566         if (buf_iter) {
3567                 event = ring_buffer_iter_peek(buf_iter, ts);
3568                 if (lost_events)
3569                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3570                                 (unsigned long)-1 : 0;
3571         } else {
3572                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3573                                          lost_events);
3574         }
3575
3576         if (event) {
3577                 iter->ent_size = ring_buffer_event_length(event);
3578                 return ring_buffer_event_data(event);
3579         }
3580         iter->ent_size = 0;
3581         return NULL;
3582 }
3583
3584 static struct trace_entry *
3585 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3586                   unsigned long *missing_events, u64 *ent_ts)
3587 {
3588         struct trace_buffer *buffer = iter->array_buffer->buffer;
3589         struct trace_entry *ent, *next = NULL;
3590         unsigned long lost_events = 0, next_lost = 0;
3591         int cpu_file = iter->cpu_file;
3592         u64 next_ts = 0, ts;
3593         int next_cpu = -1;
3594         int next_size = 0;
3595         int cpu;
3596
3597         /*
3598          * If we are in a per_cpu trace file, don't bother by iterating over
3599          * all cpu and peek directly.
3600          */
3601         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3602                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3603                         return NULL;
3604                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3605                 if (ent_cpu)
3606                         *ent_cpu = cpu_file;
3607
3608                 return ent;
3609         }
3610
3611         for_each_tracing_cpu(cpu) {
3612
3613                 if (ring_buffer_empty_cpu(buffer, cpu))
3614                         continue;
3615
3616                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3617
3618                 /*
3619                  * Pick the entry with the smallest timestamp:
3620                  */
3621                 if (ent && (!next || ts < next_ts)) {
3622                         next = ent;
3623                         next_cpu = cpu;
3624                         next_ts = ts;
3625                         next_lost = lost_events;
3626                         next_size = iter->ent_size;
3627                 }
3628         }
3629
3630         iter->ent_size = next_size;
3631
3632         if (ent_cpu)
3633                 *ent_cpu = next_cpu;
3634
3635         if (ent_ts)
3636                 *ent_ts = next_ts;
3637
3638         if (missing_events)
3639                 *missing_events = next_lost;
3640
3641         return next;
3642 }
3643
3644 #define STATIC_FMT_BUF_SIZE     128
3645 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3646
3647 static char *trace_iter_expand_format(struct trace_iterator *iter)
3648 {
3649         char *tmp;
3650
3651         /*
3652          * iter->tr is NULL when used with tp_printk, which makes
3653          * this get called where it is not safe to call krealloc().
3654          */
3655         if (!iter->tr || iter->fmt == static_fmt_buf)
3656                 return NULL;
3657
3658         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3659                        GFP_KERNEL);
3660         if (tmp) {
3661                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3662                 iter->fmt = tmp;
3663         }
3664
3665         return tmp;
3666 }
3667
3668 /* Returns true if the string is safe to dereference from an event */
3669 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3670                            bool star, int len)
3671 {
3672         unsigned long addr = (unsigned long)str;
3673         struct trace_event *trace_event;
3674         struct trace_event_call *event;
3675
3676         /* Ignore strings with no length */
3677         if (star && !len)
3678                 return true;
3679
3680         /* OK if part of the event data */
3681         if ((addr >= (unsigned long)iter->ent) &&
3682             (addr < (unsigned long)iter->ent + iter->ent_size))
3683                 return true;
3684
3685         /* OK if part of the temp seq buffer */
3686         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3687             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3688                 return true;
3689
3690         /* Core rodata can not be freed */
3691         if (is_kernel_rodata(addr))
3692                 return true;
3693
3694         if (trace_is_tracepoint_string(str))
3695                 return true;
3696
3697         /*
3698          * Now this could be a module event, referencing core module
3699          * data, which is OK.
3700          */
3701         if (!iter->ent)
3702                 return false;
3703
3704         trace_event = ftrace_find_event(iter->ent->type);
3705         if (!trace_event)
3706                 return false;
3707
3708         event = container_of(trace_event, struct trace_event_call, event);
3709         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3710                 return false;
3711
3712         /* Would rather have rodata, but this will suffice */
3713         if (within_module_core(addr, event->module))
3714                 return true;
3715
3716         return false;
3717 }
3718
3719 static const char *show_buffer(struct trace_seq *s)
3720 {
3721         struct seq_buf *seq = &s->seq;
3722
3723         seq_buf_terminate(seq);
3724
3725         return seq->buffer;
3726 }
3727
3728 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3729
3730 static int test_can_verify_check(const char *fmt, ...)
3731 {
3732         char buf[16];
3733         va_list ap;
3734         int ret;
3735
3736         /*
3737          * The verifier is dependent on vsnprintf() modifies the va_list
3738          * passed to it, where it is sent as a reference. Some architectures
3739          * (like x86_32) passes it by value, which means that vsnprintf()
3740          * does not modify the va_list passed to it, and the verifier
3741          * would then need to be able to understand all the values that
3742          * vsnprintf can use. If it is passed by value, then the verifier
3743          * is disabled.
3744          */
3745         va_start(ap, fmt);
3746         vsnprintf(buf, 16, "%d", ap);
3747         ret = va_arg(ap, int);
3748         va_end(ap);
3749
3750         return ret;
3751 }
3752
3753 static void test_can_verify(void)
3754 {
3755         if (!test_can_verify_check("%d %d", 0, 1)) {
3756                 pr_info("trace event string verifier disabled\n");
3757                 static_branch_inc(&trace_no_verify);
3758         }
3759 }
3760
3761 /**
3762  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3763  * @iter: The iterator that holds the seq buffer and the event being printed
3764  * @fmt: The format used to print the event
3765  * @ap: The va_list holding the data to print from @fmt.
3766  *
3767  * This writes the data into the @iter->seq buffer using the data from
3768  * @fmt and @ap. If the format has a %s, then the source of the string
3769  * is examined to make sure it is safe to print, otherwise it will
3770  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3771  * pointer.
3772  */
3773 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3774                          va_list ap)
3775 {
3776         const char *p = fmt;
3777         const char *str;
3778         int i, j;
3779
3780         if (WARN_ON_ONCE(!fmt))
3781                 return;
3782
3783         if (static_branch_unlikely(&trace_no_verify))
3784                 goto print;
3785
3786         /* Don't bother checking when doing a ftrace_dump() */
3787         if (iter->fmt == static_fmt_buf)
3788                 goto print;
3789
3790         while (*p) {
3791                 bool star = false;
3792                 int len = 0;
3793
3794                 j = 0;
3795
3796                 /* We only care about %s and variants */
3797                 for (i = 0; p[i]; i++) {
3798                         if (i + 1 >= iter->fmt_size) {
3799                                 /*
3800                                  * If we can't expand the copy buffer,
3801                                  * just print it.
3802                                  */
3803                                 if (!trace_iter_expand_format(iter))
3804                                         goto print;
3805                         }
3806
3807                         if (p[i] == '\\' && p[i+1]) {
3808                                 i++;
3809                                 continue;
3810                         }
3811                         if (p[i] == '%') {
3812                                 /* Need to test cases like %08.*s */
3813                                 for (j = 1; p[i+j]; j++) {
3814                                         if (isdigit(p[i+j]) ||
3815                                             p[i+j] == '.')
3816                                                 continue;
3817                                         if (p[i+j] == '*') {
3818                                                 star = true;
3819                                                 continue;
3820                                         }
3821                                         break;
3822                                 }
3823                                 if (p[i+j] == 's')
3824                                         break;
3825                                 star = false;
3826                         }
3827                         j = 0;
3828                 }
3829                 /* If no %s found then just print normally */
3830                 if (!p[i])
3831                         break;
3832
3833                 /* Copy up to the %s, and print that */
3834                 strncpy(iter->fmt, p, i);
3835                 iter->fmt[i] = '\0';
3836                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3837
3838                 /*
3839                  * If iter->seq is full, the above call no longer guarantees
3840                  * that ap is in sync with fmt processing, and further calls
3841                  * to va_arg() can return wrong positional arguments.
3842                  *
3843                  * Ensure that ap is no longer used in this case.
3844                  */
3845                 if (iter->seq.full) {
3846                         p = "";
3847                         break;
3848                 }
3849
3850                 if (star)
3851                         len = va_arg(ap, int);
3852
3853                 /* The ap now points to the string data of the %s */
3854                 str = va_arg(ap, const char *);
3855
3856                 /*
3857                  * If you hit this warning, it is likely that the
3858                  * trace event in question used %s on a string that
3859                  * was saved at the time of the event, but may not be
3860                  * around when the trace is read. Use __string(),
3861                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3862                  * instead. See samples/trace_events/trace-events-sample.h
3863                  * for reference.
3864                  */
3865                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3866                               "fmt: '%s' current_buffer: '%s'",
3867                               fmt, show_buffer(&iter->seq))) {
3868                         int ret;
3869
3870                         /* Try to safely read the string */
3871                         if (star) {
3872                                 if (len + 1 > iter->fmt_size)
3873                                         len = iter->fmt_size - 1;
3874                                 if (len < 0)
3875                                         len = 0;
3876                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3877                                 iter->fmt[len] = 0;
3878                                 star = false;
3879                         } else {
3880                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3881                                                                   iter->fmt_size);
3882                         }
3883                         if (ret < 0)
3884                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3885                         else
3886                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3887                                                  str, iter->fmt);
3888                         str = "[UNSAFE-MEMORY]";
3889                         strcpy(iter->fmt, "%s");
3890                 } else {
3891                         strncpy(iter->fmt, p + i, j + 1);
3892                         iter->fmt[j+1] = '\0';
3893                 }
3894                 if (star)
3895                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3896                 else
3897                         trace_seq_printf(&iter->seq, iter->fmt, str);
3898
3899                 p += i + j + 1;
3900         }
3901  print:
3902         if (*p)
3903                 trace_seq_vprintf(&iter->seq, p, ap);
3904 }
3905
3906 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3907 {
3908         const char *p, *new_fmt;
3909         char *q;
3910
3911         if (WARN_ON_ONCE(!fmt))
3912                 return fmt;
3913
3914         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3915                 return fmt;
3916
3917         p = fmt;
3918         new_fmt = q = iter->fmt;
3919         while (*p) {
3920                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3921                         if (!trace_iter_expand_format(iter))
3922                                 return fmt;
3923
3924                         q += iter->fmt - new_fmt;
3925                         new_fmt = iter->fmt;
3926                 }
3927
3928                 *q++ = *p++;
3929
3930                 /* Replace %p with %px */
3931                 if (p[-1] == '%') {
3932                         if (p[0] == '%') {
3933                                 *q++ = *p++;
3934                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3935                                 *q++ = *p++;
3936                                 *q++ = 'x';
3937                         }
3938                 }
3939         }
3940         *q = '\0';
3941
3942         return new_fmt;
3943 }
3944
3945 #define STATIC_TEMP_BUF_SIZE    128
3946 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3947
3948 /* Find the next real entry, without updating the iterator itself */
3949 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3950                                           int *ent_cpu, u64 *ent_ts)
3951 {
3952         /* __find_next_entry will reset ent_size */
3953         int ent_size = iter->ent_size;
3954         struct trace_entry *entry;
3955
3956         /*
3957          * If called from ftrace_dump(), then the iter->temp buffer
3958          * will be the static_temp_buf and not created from kmalloc.
3959          * If the entry size is greater than the buffer, we can
3960          * not save it. Just return NULL in that case. This is only
3961          * used to add markers when two consecutive events' time
3962          * stamps have a large delta. See trace_print_lat_context()
3963          */
3964         if (iter->temp == static_temp_buf &&
3965             STATIC_TEMP_BUF_SIZE < ent_size)
3966                 return NULL;
3967
3968         /*
3969          * The __find_next_entry() may call peek_next_entry(), which may
3970          * call ring_buffer_peek() that may make the contents of iter->ent
3971          * undefined. Need to copy iter->ent now.
3972          */
3973         if (iter->ent && iter->ent != iter->temp) {
3974                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3975                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3976                         void *temp;
3977                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3978                         if (!temp)
3979                                 return NULL;
3980                         kfree(iter->temp);
3981                         iter->temp = temp;
3982                         iter->temp_size = iter->ent_size;
3983                 }
3984                 memcpy(iter->temp, iter->ent, iter->ent_size);
3985                 iter->ent = iter->temp;
3986         }
3987         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3988         /* Put back the original ent_size */
3989         iter->ent_size = ent_size;
3990
3991         return entry;
3992 }
3993
3994 /* Find the next real entry, and increment the iterator to the next entry */
3995 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3996 {
3997         iter->ent = __find_next_entry(iter, &iter->cpu,
3998                                       &iter->lost_events, &iter->ts);
3999
4000         if (iter->ent)
4001                 trace_iterator_increment(iter);
4002
4003         return iter->ent ? iter : NULL;
4004 }
4005
4006 static void trace_consume(struct trace_iterator *iter)
4007 {
4008         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4009                             &iter->lost_events);
4010 }
4011
4012 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4013 {
4014         struct trace_iterator *iter = m->private;
4015         int i = (int)*pos;
4016         void *ent;
4017
4018         WARN_ON_ONCE(iter->leftover);
4019
4020         (*pos)++;
4021
4022         /* can't go backwards */
4023         if (iter->idx > i)
4024                 return NULL;
4025
4026         if (iter->idx < 0)
4027                 ent = trace_find_next_entry_inc(iter);
4028         else
4029                 ent = iter;
4030
4031         while (ent && iter->idx < i)
4032                 ent = trace_find_next_entry_inc(iter);
4033
4034         iter->pos = *pos;
4035
4036         return ent;
4037 }
4038
4039 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4040 {
4041         struct ring_buffer_iter *buf_iter;
4042         unsigned long entries = 0;
4043         u64 ts;
4044
4045         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4046
4047         buf_iter = trace_buffer_iter(iter, cpu);
4048         if (!buf_iter)
4049                 return;
4050
4051         ring_buffer_iter_reset(buf_iter);
4052
4053         /*
4054          * We could have the case with the max latency tracers
4055          * that a reset never took place on a cpu. This is evident
4056          * by the timestamp being before the start of the buffer.
4057          */
4058         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4059                 if (ts >= iter->array_buffer->time_start)
4060                         break;
4061                 entries++;
4062                 ring_buffer_iter_advance(buf_iter);
4063         }
4064
4065         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4066 }
4067
4068 /*
4069  * The current tracer is copied to avoid a global locking
4070  * all around.
4071  */
4072 static void *s_start(struct seq_file *m, loff_t *pos)
4073 {
4074         struct trace_iterator *iter = m->private;
4075         struct trace_array *tr = iter->tr;
4076         int cpu_file = iter->cpu_file;
4077         void *p = NULL;
4078         loff_t l = 0;
4079         int cpu;
4080
4081         /*
4082          * copy the tracer to avoid using a global lock all around.
4083          * iter->trace is a copy of current_trace, the pointer to the
4084          * name may be used instead of a strcmp(), as iter->trace->name
4085          * will point to the same string as current_trace->name.
4086          */
4087         mutex_lock(&trace_types_lock);
4088         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
4089                 /* Close iter->trace before switching to the new current tracer */
4090                 if (iter->trace->close)
4091                         iter->trace->close(iter);
4092                 *iter->trace = *tr->current_trace;
4093                 /* Reopen the new current tracer */
4094                 if (iter->trace->open)
4095                         iter->trace->open(iter);
4096         }
4097         mutex_unlock(&trace_types_lock);
4098
4099 #ifdef CONFIG_TRACER_MAX_TRACE
4100         if (iter->snapshot && iter->trace->use_max_tr)
4101                 return ERR_PTR(-EBUSY);
4102 #endif
4103
4104         if (*pos != iter->pos) {
4105                 iter->ent = NULL;
4106                 iter->cpu = 0;
4107                 iter->idx = -1;
4108
4109                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4110                         for_each_tracing_cpu(cpu)
4111                                 tracing_iter_reset(iter, cpu);
4112                 } else
4113                         tracing_iter_reset(iter, cpu_file);
4114
4115                 iter->leftover = 0;
4116                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4117                         ;
4118
4119         } else {
4120                 /*
4121                  * If we overflowed the seq_file before, then we want
4122                  * to just reuse the trace_seq buffer again.
4123                  */
4124                 if (iter->leftover)
4125                         p = iter;
4126                 else {
4127                         l = *pos - 1;
4128                         p = s_next(m, p, &l);
4129                 }
4130         }
4131
4132         trace_event_read_lock();
4133         trace_access_lock(cpu_file);
4134         return p;
4135 }
4136
4137 static void s_stop(struct seq_file *m, void *p)
4138 {
4139         struct trace_iterator *iter = m->private;
4140
4141 #ifdef CONFIG_TRACER_MAX_TRACE
4142         if (iter->snapshot && iter->trace->use_max_tr)
4143                 return;
4144 #endif
4145
4146         trace_access_unlock(iter->cpu_file);
4147         trace_event_read_unlock();
4148 }
4149
4150 static void
4151 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4152                       unsigned long *entries, int cpu)
4153 {
4154         unsigned long count;
4155
4156         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4157         /*
4158          * If this buffer has skipped entries, then we hold all
4159          * entries for the trace and we need to ignore the
4160          * ones before the time stamp.
4161          */
4162         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4163                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4164                 /* total is the same as the entries */
4165                 *total = count;
4166         } else
4167                 *total = count +
4168                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4169         *entries = count;
4170 }
4171
4172 static void
4173 get_total_entries(struct array_buffer *buf,
4174                   unsigned long *total, unsigned long *entries)
4175 {
4176         unsigned long t, e;
4177         int cpu;
4178
4179         *total = 0;
4180         *entries = 0;
4181
4182         for_each_tracing_cpu(cpu) {
4183                 get_total_entries_cpu(buf, &t, &e, cpu);
4184                 *total += t;
4185                 *entries += e;
4186         }
4187 }
4188
4189 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4190 {
4191         unsigned long total, entries;
4192
4193         if (!tr)
4194                 tr = &global_trace;
4195
4196         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4197
4198         return entries;
4199 }
4200
4201 unsigned long trace_total_entries(struct trace_array *tr)
4202 {
4203         unsigned long total, entries;
4204
4205         if (!tr)
4206                 tr = &global_trace;
4207
4208         get_total_entries(&tr->array_buffer, &total, &entries);
4209
4210         return entries;
4211 }
4212
4213 static void print_lat_help_header(struct seq_file *m)
4214 {
4215         seq_puts(m, "#                    _------=> CPU#            \n"
4216                     "#                   / _-----=> irqs-off/BH-disabled\n"
4217                     "#                  | / _----=> need-resched    \n"
4218                     "#                  || / _---=> hardirq/softirq \n"
4219                     "#                  ||| / _--=> preempt-depth   \n"
4220                     "#                  |||| / _-=> migrate-disable \n"
4221                     "#                  ||||| /     delay           \n"
4222                     "#  cmd     pid     |||||| time  |   caller     \n"
4223                     "#     \\   /        ||||||  \\    |    /       \n");
4224 }
4225
4226 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4227 {
4228         unsigned long total;
4229         unsigned long entries;
4230
4231         get_total_entries(buf, &total, &entries);
4232         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4233                    entries, total, num_online_cpus());
4234         seq_puts(m, "#\n");
4235 }
4236
4237 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4238                                    unsigned int flags)
4239 {
4240         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4241
4242         print_event_info(buf, m);
4243
4244         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4245         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4246 }
4247
4248 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4249                                        unsigned int flags)
4250 {
4251         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4252         static const char space[] = "            ";
4253         int prec = tgid ? 12 : 2;
4254
4255         print_event_info(buf, m);
4256
4257         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4258         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4259         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4260         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4261         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4262         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4263         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4264         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4265 }
4266
4267 void
4268 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4269 {
4270         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4271         struct array_buffer *buf = iter->array_buffer;
4272         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4273         struct tracer *type = iter->trace;
4274         unsigned long entries;
4275         unsigned long total;
4276         const char *name = type->name;
4277
4278         get_total_entries(buf, &total, &entries);
4279
4280         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4281                    name, UTS_RELEASE);
4282         seq_puts(m, "# -----------------------------------"
4283                  "---------------------------------\n");
4284         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4285                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4286                    nsecs_to_usecs(data->saved_latency),
4287                    entries,
4288                    total,
4289                    buf->cpu,
4290                    preempt_model_none()      ? "server" :
4291                    preempt_model_voluntary() ? "desktop" :
4292                    preempt_model_full()      ? "preempt" :
4293                    preempt_model_rt()        ? "preempt_rt" :
4294                    "unknown",
4295                    /* These are reserved for later use */
4296                    0, 0, 0, 0);
4297 #ifdef CONFIG_SMP
4298         seq_printf(m, " #P:%d)\n", num_online_cpus());
4299 #else
4300         seq_puts(m, ")\n");
4301 #endif
4302         seq_puts(m, "#    -----------------\n");
4303         seq_printf(m, "#    | task: %.16s-%d "
4304                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4305                    data->comm, data->pid,
4306                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4307                    data->policy, data->rt_priority);
4308         seq_puts(m, "#    -----------------\n");
4309
4310         if (data->critical_start) {
4311                 seq_puts(m, "#  => started at: ");
4312                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4313                 trace_print_seq(m, &iter->seq);
4314                 seq_puts(m, "\n#  => ended at:   ");
4315                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4316                 trace_print_seq(m, &iter->seq);
4317                 seq_puts(m, "\n#\n");
4318         }
4319
4320         seq_puts(m, "#\n");
4321 }
4322
4323 static void test_cpu_buff_start(struct trace_iterator *iter)
4324 {
4325         struct trace_seq *s = &iter->seq;
4326         struct trace_array *tr = iter->tr;
4327
4328         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4329                 return;
4330
4331         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4332                 return;
4333
4334         if (cpumask_available(iter->started) &&
4335             cpumask_test_cpu(iter->cpu, iter->started))
4336                 return;
4337
4338         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4339                 return;
4340
4341         if (cpumask_available(iter->started))
4342                 cpumask_set_cpu(iter->cpu, iter->started);
4343
4344         /* Don't print started cpu buffer for the first entry of the trace */
4345         if (iter->idx > 1)
4346                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4347                                 iter->cpu);
4348 }
4349
4350 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4351 {
4352         struct trace_array *tr = iter->tr;
4353         struct trace_seq *s = &iter->seq;
4354         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4355         struct trace_entry *entry;
4356         struct trace_event *event;
4357
4358         entry = iter->ent;
4359
4360         test_cpu_buff_start(iter);
4361
4362         event = ftrace_find_event(entry->type);
4363
4364         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4365                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4366                         trace_print_lat_context(iter);
4367                 else
4368                         trace_print_context(iter);
4369         }
4370
4371         if (trace_seq_has_overflowed(s))
4372                 return TRACE_TYPE_PARTIAL_LINE;
4373
4374         if (event)
4375                 return event->funcs->trace(iter, sym_flags, event);
4376
4377         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4378
4379         return trace_handle_return(s);
4380 }
4381
4382 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4383 {
4384         struct trace_array *tr = iter->tr;
4385         struct trace_seq *s = &iter->seq;
4386         struct trace_entry *entry;
4387         struct trace_event *event;
4388
4389         entry = iter->ent;
4390
4391         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4392                 trace_seq_printf(s, "%d %d %llu ",
4393                                  entry->pid, iter->cpu, iter->ts);
4394
4395         if (trace_seq_has_overflowed(s))
4396                 return TRACE_TYPE_PARTIAL_LINE;
4397
4398         event = ftrace_find_event(entry->type);
4399         if (event)
4400                 return event->funcs->raw(iter, 0, event);
4401
4402         trace_seq_printf(s, "%d ?\n", entry->type);
4403
4404         return trace_handle_return(s);
4405 }
4406
4407 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4408 {
4409         struct trace_array *tr = iter->tr;
4410         struct trace_seq *s = &iter->seq;
4411         unsigned char newline = '\n';
4412         struct trace_entry *entry;
4413         struct trace_event *event;
4414
4415         entry = iter->ent;
4416
4417         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4418                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4419                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4420                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4421                 if (trace_seq_has_overflowed(s))
4422                         return TRACE_TYPE_PARTIAL_LINE;
4423         }
4424
4425         event = ftrace_find_event(entry->type);
4426         if (event) {
4427                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4428                 if (ret != TRACE_TYPE_HANDLED)
4429                         return ret;
4430         }
4431
4432         SEQ_PUT_FIELD(s, newline);
4433
4434         return trace_handle_return(s);
4435 }
4436
4437 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4438 {
4439         struct trace_array *tr = iter->tr;
4440         struct trace_seq *s = &iter->seq;
4441         struct trace_entry *entry;
4442         struct trace_event *event;
4443
4444         entry = iter->ent;
4445
4446         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4447                 SEQ_PUT_FIELD(s, entry->pid);
4448                 SEQ_PUT_FIELD(s, iter->cpu);
4449                 SEQ_PUT_FIELD(s, iter->ts);
4450                 if (trace_seq_has_overflowed(s))
4451                         return TRACE_TYPE_PARTIAL_LINE;
4452         }
4453
4454         event = ftrace_find_event(entry->type);
4455         return event ? event->funcs->binary(iter, 0, event) :
4456                 TRACE_TYPE_HANDLED;
4457 }
4458
4459 int trace_empty(struct trace_iterator *iter)
4460 {
4461         struct ring_buffer_iter *buf_iter;
4462         int cpu;
4463
4464         /* If we are looking at one CPU buffer, only check that one */
4465         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4466                 cpu = iter->cpu_file;
4467                 buf_iter = trace_buffer_iter(iter, cpu);
4468                 if (buf_iter) {
4469                         if (!ring_buffer_iter_empty(buf_iter))
4470                                 return 0;
4471                 } else {
4472                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4473                                 return 0;
4474                 }
4475                 return 1;
4476         }
4477
4478         for_each_tracing_cpu(cpu) {
4479                 buf_iter = trace_buffer_iter(iter, cpu);
4480                 if (buf_iter) {
4481                         if (!ring_buffer_iter_empty(buf_iter))
4482                                 return 0;
4483                 } else {
4484                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4485                                 return 0;
4486                 }
4487         }
4488
4489         return 1;
4490 }
4491
4492 /*  Called with trace_event_read_lock() held. */
4493 enum print_line_t print_trace_line(struct trace_iterator *iter)
4494 {
4495         struct trace_array *tr = iter->tr;
4496         unsigned long trace_flags = tr->trace_flags;
4497         enum print_line_t ret;
4498
4499         if (iter->lost_events) {
4500                 if (iter->lost_events == (unsigned long)-1)
4501                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4502                                          iter->cpu);
4503                 else
4504                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4505                                          iter->cpu, iter->lost_events);
4506                 if (trace_seq_has_overflowed(&iter->seq))
4507                         return TRACE_TYPE_PARTIAL_LINE;
4508         }
4509
4510         if (iter->trace && iter->trace->print_line) {
4511                 ret = iter->trace->print_line(iter);
4512                 if (ret != TRACE_TYPE_UNHANDLED)
4513                         return ret;
4514         }
4515
4516         if (iter->ent->type == TRACE_BPUTS &&
4517                         trace_flags & TRACE_ITER_PRINTK &&
4518                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4519                 return trace_print_bputs_msg_only(iter);
4520
4521         if (iter->ent->type == TRACE_BPRINT &&
4522                         trace_flags & TRACE_ITER_PRINTK &&
4523                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4524                 return trace_print_bprintk_msg_only(iter);
4525
4526         if (iter->ent->type == TRACE_PRINT &&
4527                         trace_flags & TRACE_ITER_PRINTK &&
4528                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4529                 return trace_print_printk_msg_only(iter);
4530
4531         if (trace_flags & TRACE_ITER_BIN)
4532                 return print_bin_fmt(iter);
4533
4534         if (trace_flags & TRACE_ITER_HEX)
4535                 return print_hex_fmt(iter);
4536
4537         if (trace_flags & TRACE_ITER_RAW)
4538                 return print_raw_fmt(iter);
4539
4540         return print_trace_fmt(iter);
4541 }
4542
4543 void trace_latency_header(struct seq_file *m)
4544 {
4545         struct trace_iterator *iter = m->private;
4546         struct trace_array *tr = iter->tr;
4547
4548         /* print nothing if the buffers are empty */
4549         if (trace_empty(iter))
4550                 return;
4551
4552         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4553                 print_trace_header(m, iter);
4554
4555         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4556                 print_lat_help_header(m);
4557 }
4558
4559 void trace_default_header(struct seq_file *m)
4560 {
4561         struct trace_iterator *iter = m->private;
4562         struct trace_array *tr = iter->tr;
4563         unsigned long trace_flags = tr->trace_flags;
4564
4565         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4566                 return;
4567
4568         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4569                 /* print nothing if the buffers are empty */
4570                 if (trace_empty(iter))
4571                         return;
4572                 print_trace_header(m, iter);
4573                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4574                         print_lat_help_header(m);
4575         } else {
4576                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4577                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4578                                 print_func_help_header_irq(iter->array_buffer,
4579                                                            m, trace_flags);
4580                         else
4581                                 print_func_help_header(iter->array_buffer, m,
4582                                                        trace_flags);
4583                 }
4584         }
4585 }
4586
4587 static void test_ftrace_alive(struct seq_file *m)
4588 {
4589         if (!ftrace_is_dead())
4590                 return;
4591         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4592                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4593 }
4594
4595 #ifdef CONFIG_TRACER_MAX_TRACE
4596 static void show_snapshot_main_help(struct seq_file *m)
4597 {
4598         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4599                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4600                     "#                      Takes a snapshot of the main buffer.\n"
4601                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4602                     "#                      (Doesn't have to be '2' works with any number that\n"
4603                     "#                       is not a '0' or '1')\n");
4604 }
4605
4606 static void show_snapshot_percpu_help(struct seq_file *m)
4607 {
4608         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4609 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4610         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4611                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4612 #else
4613         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4614                     "#                     Must use main snapshot file to allocate.\n");
4615 #endif
4616         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4617                     "#                      (Doesn't have to be '2' works with any number that\n"
4618                     "#                       is not a '0' or '1')\n");
4619 }
4620
4621 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4622 {
4623         if (iter->tr->allocated_snapshot)
4624                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4625         else
4626                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4627
4628         seq_puts(m, "# Snapshot commands:\n");
4629         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4630                 show_snapshot_main_help(m);
4631         else
4632                 show_snapshot_percpu_help(m);
4633 }
4634 #else
4635 /* Should never be called */
4636 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4637 #endif
4638
4639 static int s_show(struct seq_file *m, void *v)
4640 {
4641         struct trace_iterator *iter = v;
4642         int ret;
4643
4644         if (iter->ent == NULL) {
4645                 if (iter->tr) {
4646                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4647                         seq_puts(m, "#\n");
4648                         test_ftrace_alive(m);
4649                 }
4650                 if (iter->snapshot && trace_empty(iter))
4651                         print_snapshot_help(m, iter);
4652                 else if (iter->trace && iter->trace->print_header)
4653                         iter->trace->print_header(m);
4654                 else
4655                         trace_default_header(m);
4656
4657         } else if (iter->leftover) {
4658                 /*
4659                  * If we filled the seq_file buffer earlier, we
4660                  * want to just show it now.
4661                  */
4662                 ret = trace_print_seq(m, &iter->seq);
4663
4664                 /* ret should this time be zero, but you never know */
4665                 iter->leftover = ret;
4666
4667         } else {
4668                 print_trace_line(iter);
4669                 ret = trace_print_seq(m, &iter->seq);
4670                 /*
4671                  * If we overflow the seq_file buffer, then it will
4672                  * ask us for this data again at start up.
4673                  * Use that instead.
4674                  *  ret is 0 if seq_file write succeeded.
4675                  *        -1 otherwise.
4676                  */
4677                 iter->leftover = ret;
4678         }
4679
4680         return 0;
4681 }
4682
4683 /*
4684  * Should be used after trace_array_get(), trace_types_lock
4685  * ensures that i_cdev was already initialized.
4686  */
4687 static inline int tracing_get_cpu(struct inode *inode)
4688 {
4689         if (inode->i_cdev) /* See trace_create_cpu_file() */
4690                 return (long)inode->i_cdev - 1;
4691         return RING_BUFFER_ALL_CPUS;
4692 }
4693
4694 static const struct seq_operations tracer_seq_ops = {
4695         .start          = s_start,
4696         .next           = s_next,
4697         .stop           = s_stop,
4698         .show           = s_show,
4699 };
4700
4701 static struct trace_iterator *
4702 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4703 {
4704         struct trace_array *tr = inode->i_private;
4705         struct trace_iterator *iter;
4706         int cpu;
4707
4708         if (tracing_disabled)
4709                 return ERR_PTR(-ENODEV);
4710
4711         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4712         if (!iter)
4713                 return ERR_PTR(-ENOMEM);
4714
4715         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4716                                     GFP_KERNEL);
4717         if (!iter->buffer_iter)
4718                 goto release;
4719
4720         /*
4721          * trace_find_next_entry() may need to save off iter->ent.
4722          * It will place it into the iter->temp buffer. As most
4723          * events are less than 128, allocate a buffer of that size.
4724          * If one is greater, then trace_find_next_entry() will
4725          * allocate a new buffer to adjust for the bigger iter->ent.
4726          * It's not critical if it fails to get allocated here.
4727          */
4728         iter->temp = kmalloc(128, GFP_KERNEL);
4729         if (iter->temp)
4730                 iter->temp_size = 128;
4731
4732         /*
4733          * trace_event_printf() may need to modify given format
4734          * string to replace %p with %px so that it shows real address
4735          * instead of hash value. However, that is only for the event
4736          * tracing, other tracer may not need. Defer the allocation
4737          * until it is needed.
4738          */
4739         iter->fmt = NULL;
4740         iter->fmt_size = 0;
4741
4742         /*
4743          * We make a copy of the current tracer to avoid concurrent
4744          * changes on it while we are reading.
4745          */
4746         mutex_lock(&trace_types_lock);
4747         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4748         if (!iter->trace)
4749                 goto fail;
4750
4751         *iter->trace = *tr->current_trace;
4752
4753         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4754                 goto fail;
4755
4756         iter->tr = tr;
4757
4758 #ifdef CONFIG_TRACER_MAX_TRACE
4759         /* Currently only the top directory has a snapshot */
4760         if (tr->current_trace->print_max || snapshot)
4761                 iter->array_buffer = &tr->max_buffer;
4762         else
4763 #endif
4764                 iter->array_buffer = &tr->array_buffer;
4765         iter->snapshot = snapshot;
4766         iter->pos = -1;
4767         iter->cpu_file = tracing_get_cpu(inode);
4768         mutex_init(&iter->mutex);
4769
4770         /* Notify the tracer early; before we stop tracing. */
4771         if (iter->trace->open)
4772                 iter->trace->open(iter);
4773
4774         /* Annotate start of buffers if we had overruns */
4775         if (ring_buffer_overruns(iter->array_buffer->buffer))
4776                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4777
4778         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4779         if (trace_clocks[tr->clock_id].in_ns)
4780                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4781
4782         /*
4783          * If pause-on-trace is enabled, then stop the trace while
4784          * dumping, unless this is the "snapshot" file
4785          */
4786         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4787                 tracing_stop_tr(tr);
4788
4789         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4790                 for_each_tracing_cpu(cpu) {
4791                         iter->buffer_iter[cpu] =
4792                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4793                                                          cpu, GFP_KERNEL);
4794                 }
4795                 ring_buffer_read_prepare_sync();
4796                 for_each_tracing_cpu(cpu) {
4797                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4798                         tracing_iter_reset(iter, cpu);
4799                 }
4800         } else {
4801                 cpu = iter->cpu_file;
4802                 iter->buffer_iter[cpu] =
4803                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4804                                                  cpu, GFP_KERNEL);
4805                 ring_buffer_read_prepare_sync();
4806                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4807                 tracing_iter_reset(iter, cpu);
4808         }
4809
4810         mutex_unlock(&trace_types_lock);
4811
4812         return iter;
4813
4814  fail:
4815         mutex_unlock(&trace_types_lock);
4816         kfree(iter->trace);
4817         kfree(iter->temp);
4818         kfree(iter->buffer_iter);
4819 release:
4820         seq_release_private(inode, file);
4821         return ERR_PTR(-ENOMEM);
4822 }
4823
4824 int tracing_open_generic(struct inode *inode, struct file *filp)
4825 {
4826         int ret;
4827
4828         ret = tracing_check_open_get_tr(NULL);
4829         if (ret)
4830                 return ret;
4831
4832         filp->private_data = inode->i_private;
4833         return 0;
4834 }
4835
4836 bool tracing_is_disabled(void)
4837 {
4838         return (tracing_disabled) ? true: false;
4839 }
4840
4841 /*
4842  * Open and update trace_array ref count.
4843  * Must have the current trace_array passed to it.
4844  */
4845 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4846 {
4847         struct trace_array *tr = inode->i_private;
4848         int ret;
4849
4850         ret = tracing_check_open_get_tr(tr);
4851         if (ret)
4852                 return ret;
4853
4854         filp->private_data = inode->i_private;
4855
4856         return 0;
4857 }
4858
4859 /*
4860  * The private pointer of the inode is the trace_event_file.
4861  * Update the tr ref count associated to it.
4862  */
4863 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4864 {
4865         struct trace_event_file *file = inode->i_private;
4866         int ret;
4867
4868         ret = tracing_check_open_get_tr(file->tr);
4869         if (ret)
4870                 return ret;
4871
4872         mutex_lock(&event_mutex);
4873
4874         /* Fail if the file is marked for removal */
4875         if (file->flags & EVENT_FILE_FL_FREED) {
4876                 trace_array_put(file->tr);
4877                 ret = -ENODEV;
4878         } else {
4879                 event_file_get(file);
4880         }
4881
4882         mutex_unlock(&event_mutex);
4883         if (ret)
4884                 return ret;
4885
4886         filp->private_data = inode->i_private;
4887
4888         return 0;
4889 }
4890
4891 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4892 {
4893         struct trace_event_file *file = inode->i_private;
4894
4895         trace_array_put(file->tr);
4896         event_file_put(file);
4897
4898         return 0;
4899 }
4900
4901 static int tracing_mark_open(struct inode *inode, struct file *filp)
4902 {
4903         stream_open(inode, filp);
4904         return tracing_open_generic_tr(inode, filp);
4905 }
4906
4907 static int tracing_release(struct inode *inode, struct file *file)
4908 {
4909         struct trace_array *tr = inode->i_private;
4910         struct seq_file *m = file->private_data;
4911         struct trace_iterator *iter;
4912         int cpu;
4913
4914         if (!(file->f_mode & FMODE_READ)) {
4915                 trace_array_put(tr);
4916                 return 0;
4917         }
4918
4919         /* Writes do not use seq_file */
4920         iter = m->private;
4921         mutex_lock(&trace_types_lock);
4922
4923         for_each_tracing_cpu(cpu) {
4924                 if (iter->buffer_iter[cpu])
4925                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4926         }
4927
4928         if (iter->trace && iter->trace->close)
4929                 iter->trace->close(iter);
4930
4931         if (!iter->snapshot && tr->stop_count)
4932                 /* reenable tracing if it was previously enabled */
4933                 tracing_start_tr(tr);
4934
4935         __trace_array_put(tr);
4936
4937         mutex_unlock(&trace_types_lock);
4938
4939         mutex_destroy(&iter->mutex);
4940         free_cpumask_var(iter->started);
4941         kfree(iter->fmt);
4942         kfree(iter->temp);
4943         kfree(iter->trace);
4944         kfree(iter->buffer_iter);
4945         seq_release_private(inode, file);
4946
4947         return 0;
4948 }
4949
4950 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4951 {
4952         struct trace_array *tr = inode->i_private;
4953
4954         trace_array_put(tr);
4955         return 0;
4956 }
4957
4958 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4959 {
4960         struct trace_array *tr = inode->i_private;
4961
4962         trace_array_put(tr);
4963
4964         return single_release(inode, file);
4965 }
4966
4967 static int tracing_open(struct inode *inode, struct file *file)
4968 {
4969         struct trace_array *tr = inode->i_private;
4970         struct trace_iterator *iter;
4971         int ret;
4972
4973         ret = tracing_check_open_get_tr(tr);
4974         if (ret)
4975                 return ret;
4976
4977         /* If this file was open for write, then erase contents */
4978         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4979                 int cpu = tracing_get_cpu(inode);
4980                 struct array_buffer *trace_buf = &tr->array_buffer;
4981
4982 #ifdef CONFIG_TRACER_MAX_TRACE
4983                 if (tr->current_trace->print_max)
4984                         trace_buf = &tr->max_buffer;
4985 #endif
4986
4987                 if (cpu == RING_BUFFER_ALL_CPUS)
4988                         tracing_reset_online_cpus(trace_buf);
4989                 else
4990                         tracing_reset_cpu(trace_buf, cpu);
4991         }
4992
4993         if (file->f_mode & FMODE_READ) {
4994                 iter = __tracing_open(inode, file, false);
4995                 if (IS_ERR(iter))
4996                         ret = PTR_ERR(iter);
4997                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4998                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4999         }
5000
5001         if (ret < 0)
5002                 trace_array_put(tr);
5003
5004         return ret;
5005 }
5006
5007 /*
5008  * Some tracers are not suitable for instance buffers.
5009  * A tracer is always available for the global array (toplevel)
5010  * or if it explicitly states that it is.
5011  */
5012 static bool
5013 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5014 {
5015         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5016 }
5017
5018 /* Find the next tracer that this trace array may use */
5019 static struct tracer *
5020 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5021 {
5022         while (t && !trace_ok_for_array(t, tr))
5023                 t = t->next;
5024
5025         return t;
5026 }
5027
5028 static void *
5029 t_next(struct seq_file *m, void *v, loff_t *pos)
5030 {
5031         struct trace_array *tr = m->private;
5032         struct tracer *t = v;
5033
5034         (*pos)++;
5035
5036         if (t)
5037                 t = get_tracer_for_array(tr, t->next);
5038
5039         return t;
5040 }
5041
5042 static void *t_start(struct seq_file *m, loff_t *pos)
5043 {
5044         struct trace_array *tr = m->private;
5045         struct tracer *t;
5046         loff_t l = 0;
5047
5048         mutex_lock(&trace_types_lock);
5049
5050         t = get_tracer_for_array(tr, trace_types);
5051         for (; t && l < *pos; t = t_next(m, t, &l))
5052                         ;
5053
5054         return t;
5055 }
5056
5057 static void t_stop(struct seq_file *m, void *p)
5058 {
5059         mutex_unlock(&trace_types_lock);
5060 }
5061
5062 static int t_show(struct seq_file *m, void *v)
5063 {
5064         struct tracer *t = v;
5065
5066         if (!t)
5067                 return 0;
5068
5069         seq_puts(m, t->name);
5070         if (t->next)
5071                 seq_putc(m, ' ');
5072         else
5073                 seq_putc(m, '\n');
5074
5075         return 0;
5076 }
5077
5078 static const struct seq_operations show_traces_seq_ops = {
5079         .start          = t_start,
5080         .next           = t_next,
5081         .stop           = t_stop,
5082         .show           = t_show,
5083 };
5084
5085 static int show_traces_open(struct inode *inode, struct file *file)
5086 {
5087         struct trace_array *tr = inode->i_private;
5088         struct seq_file *m;
5089         int ret;
5090
5091         ret = tracing_check_open_get_tr(tr);
5092         if (ret)
5093                 return ret;
5094
5095         ret = seq_open(file, &show_traces_seq_ops);
5096         if (ret) {
5097                 trace_array_put(tr);
5098                 return ret;
5099         }
5100
5101         m = file->private_data;
5102         m->private = tr;
5103
5104         return 0;
5105 }
5106
5107 static int show_traces_release(struct inode *inode, struct file *file)
5108 {
5109         struct trace_array *tr = inode->i_private;
5110
5111         trace_array_put(tr);
5112         return seq_release(inode, file);
5113 }
5114
5115 static ssize_t
5116 tracing_write_stub(struct file *filp, const char __user *ubuf,
5117                    size_t count, loff_t *ppos)
5118 {
5119         return count;
5120 }
5121
5122 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5123 {
5124         int ret;
5125
5126         if (file->f_mode & FMODE_READ)
5127                 ret = seq_lseek(file, offset, whence);
5128         else
5129                 file->f_pos = ret = 0;
5130
5131         return ret;
5132 }
5133
5134 static const struct file_operations tracing_fops = {
5135         .open           = tracing_open,
5136         .read           = seq_read,
5137         .read_iter      = seq_read_iter,
5138         .splice_read    = generic_file_splice_read,
5139         .write          = tracing_write_stub,
5140         .llseek         = tracing_lseek,
5141         .release        = tracing_release,
5142 };
5143
5144 static const struct file_operations show_traces_fops = {
5145         .open           = show_traces_open,
5146         .read           = seq_read,
5147         .llseek         = seq_lseek,
5148         .release        = show_traces_release,
5149 };
5150
5151 static ssize_t
5152 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5153                      size_t count, loff_t *ppos)
5154 {
5155         struct trace_array *tr = file_inode(filp)->i_private;
5156         char *mask_str;
5157         int len;
5158
5159         len = snprintf(NULL, 0, "%*pb\n",
5160                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5161         mask_str = kmalloc(len, GFP_KERNEL);
5162         if (!mask_str)
5163                 return -ENOMEM;
5164
5165         len = snprintf(mask_str, len, "%*pb\n",
5166                        cpumask_pr_args(tr->tracing_cpumask));
5167         if (len >= count) {
5168                 count = -EINVAL;
5169                 goto out_err;
5170         }
5171         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5172
5173 out_err:
5174         kfree(mask_str);
5175
5176         return count;
5177 }
5178
5179 int tracing_set_cpumask(struct trace_array *tr,
5180                         cpumask_var_t tracing_cpumask_new)
5181 {
5182         int cpu;
5183
5184         if (!tr)
5185                 return -EINVAL;
5186
5187         local_irq_disable();
5188         arch_spin_lock(&tr->max_lock);
5189         for_each_tracing_cpu(cpu) {
5190                 /*
5191                  * Increase/decrease the disabled counter if we are
5192                  * about to flip a bit in the cpumask:
5193                  */
5194                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5195                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5196                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5197                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5198 #ifdef CONFIG_TRACER_MAX_TRACE
5199                         ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5200 #endif
5201                 }
5202                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5203                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5204                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5205                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5206 #ifdef CONFIG_TRACER_MAX_TRACE
5207                         ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5208 #endif
5209                 }
5210         }
5211         arch_spin_unlock(&tr->max_lock);
5212         local_irq_enable();
5213
5214         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5215
5216         return 0;
5217 }
5218
5219 static ssize_t
5220 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5221                       size_t count, loff_t *ppos)
5222 {
5223         struct trace_array *tr = file_inode(filp)->i_private;
5224         cpumask_var_t tracing_cpumask_new;
5225         int err;
5226
5227         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5228                 return -ENOMEM;
5229
5230         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5231         if (err)
5232                 goto err_free;
5233
5234         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5235         if (err)
5236                 goto err_free;
5237
5238         free_cpumask_var(tracing_cpumask_new);
5239
5240         return count;
5241
5242 err_free:
5243         free_cpumask_var(tracing_cpumask_new);
5244
5245         return err;
5246 }
5247
5248 static const struct file_operations tracing_cpumask_fops = {
5249         .open           = tracing_open_generic_tr,
5250         .read           = tracing_cpumask_read,
5251         .write          = tracing_cpumask_write,
5252         .release        = tracing_release_generic_tr,
5253         .llseek         = generic_file_llseek,
5254 };
5255
5256 static int tracing_trace_options_show(struct seq_file *m, void *v)
5257 {
5258         struct tracer_opt *trace_opts;
5259         struct trace_array *tr = m->private;
5260         u32 tracer_flags;
5261         int i;
5262
5263         mutex_lock(&trace_types_lock);
5264         tracer_flags = tr->current_trace->flags->val;
5265         trace_opts = tr->current_trace->flags->opts;
5266
5267         for (i = 0; trace_options[i]; i++) {
5268                 if (tr->trace_flags & (1 << i))
5269                         seq_printf(m, "%s\n", trace_options[i]);
5270                 else
5271                         seq_printf(m, "no%s\n", trace_options[i]);
5272         }
5273
5274         for (i = 0; trace_opts[i].name; i++) {
5275                 if (tracer_flags & trace_opts[i].bit)
5276                         seq_printf(m, "%s\n", trace_opts[i].name);
5277                 else
5278                         seq_printf(m, "no%s\n", trace_opts[i].name);
5279         }
5280         mutex_unlock(&trace_types_lock);
5281
5282         return 0;
5283 }
5284
5285 static int __set_tracer_option(struct trace_array *tr,
5286                                struct tracer_flags *tracer_flags,
5287                                struct tracer_opt *opts, int neg)
5288 {
5289         struct tracer *trace = tracer_flags->trace;
5290         int ret;
5291
5292         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5293         if (ret)
5294                 return ret;
5295
5296         if (neg)
5297                 tracer_flags->val &= ~opts->bit;
5298         else
5299                 tracer_flags->val |= opts->bit;
5300         return 0;
5301 }
5302
5303 /* Try to assign a tracer specific option */
5304 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5305 {
5306         struct tracer *trace = tr->current_trace;
5307         struct tracer_flags *tracer_flags = trace->flags;
5308         struct tracer_opt *opts = NULL;
5309         int i;
5310
5311         for (i = 0; tracer_flags->opts[i].name; i++) {
5312                 opts = &tracer_flags->opts[i];
5313
5314                 if (strcmp(cmp, opts->name) == 0)
5315                         return __set_tracer_option(tr, trace->flags, opts, neg);
5316         }
5317
5318         return -EINVAL;
5319 }
5320
5321 /* Some tracers require overwrite to stay enabled */
5322 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5323 {
5324         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5325                 return -1;
5326
5327         return 0;
5328 }
5329
5330 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5331 {
5332         int *map;
5333
5334         if ((mask == TRACE_ITER_RECORD_TGID) ||
5335             (mask == TRACE_ITER_RECORD_CMD))
5336                 lockdep_assert_held(&event_mutex);
5337
5338         /* do nothing if flag is already set */
5339         if (!!(tr->trace_flags & mask) == !!enabled)
5340                 return 0;
5341
5342         /* Give the tracer a chance to approve the change */
5343         if (tr->current_trace->flag_changed)
5344                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5345                         return -EINVAL;
5346
5347         if (enabled)
5348                 tr->trace_flags |= mask;
5349         else
5350                 tr->trace_flags &= ~mask;
5351
5352         if (mask == TRACE_ITER_RECORD_CMD)
5353                 trace_event_enable_cmd_record(enabled);
5354
5355         if (mask == TRACE_ITER_RECORD_TGID) {
5356                 if (!tgid_map) {
5357                         tgid_map_max = pid_max;
5358                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5359                                        GFP_KERNEL);
5360
5361                         /*
5362                          * Pairs with smp_load_acquire() in
5363                          * trace_find_tgid_ptr() to ensure that if it observes
5364                          * the tgid_map we just allocated then it also observes
5365                          * the corresponding tgid_map_max value.
5366                          */
5367                         smp_store_release(&tgid_map, map);
5368                 }
5369                 if (!tgid_map) {
5370                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5371                         return -ENOMEM;
5372                 }
5373
5374                 trace_event_enable_tgid_record(enabled);
5375         }
5376
5377         if (mask == TRACE_ITER_EVENT_FORK)
5378                 trace_event_follow_fork(tr, enabled);
5379
5380         if (mask == TRACE_ITER_FUNC_FORK)
5381                 ftrace_pid_follow_fork(tr, enabled);
5382
5383         if (mask == TRACE_ITER_OVERWRITE) {
5384                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5385 #ifdef CONFIG_TRACER_MAX_TRACE
5386                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5387 #endif
5388         }
5389
5390         if (mask == TRACE_ITER_PRINTK) {
5391                 trace_printk_start_stop_comm(enabled);
5392                 trace_printk_control(enabled);
5393         }
5394
5395         return 0;
5396 }
5397
5398 int trace_set_options(struct trace_array *tr, char *option)
5399 {
5400         char *cmp;
5401         int neg = 0;
5402         int ret;
5403         size_t orig_len = strlen(option);
5404         int len;
5405
5406         cmp = strstrip(option);
5407
5408         len = str_has_prefix(cmp, "no");
5409         if (len)
5410                 neg = 1;
5411
5412         cmp += len;
5413
5414         mutex_lock(&event_mutex);
5415         mutex_lock(&trace_types_lock);
5416
5417         ret = match_string(trace_options, -1, cmp);
5418         /* If no option could be set, test the specific tracer options */
5419         if (ret < 0)
5420                 ret = set_tracer_option(tr, cmp, neg);
5421         else
5422                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5423
5424         mutex_unlock(&trace_types_lock);
5425         mutex_unlock(&event_mutex);
5426
5427         /*
5428          * If the first trailing whitespace is replaced with '\0' by strstrip,
5429          * turn it back into a space.
5430          */
5431         if (orig_len > strlen(option))
5432                 option[strlen(option)] = ' ';
5433
5434         return ret;
5435 }
5436
5437 static void __init apply_trace_boot_options(void)
5438 {
5439         char *buf = trace_boot_options_buf;
5440         char *option;
5441
5442         while (true) {
5443                 option = strsep(&buf, ",");
5444
5445                 if (!option)
5446                         break;
5447
5448                 if (*option)
5449                         trace_set_options(&global_trace, option);
5450
5451                 /* Put back the comma to allow this to be called again */
5452                 if (buf)
5453                         *(buf - 1) = ',';
5454         }
5455 }
5456
5457 static ssize_t
5458 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5459                         size_t cnt, loff_t *ppos)
5460 {
5461         struct seq_file *m = filp->private_data;
5462         struct trace_array *tr = m->private;
5463         char buf[64];
5464         int ret;
5465
5466         if (cnt >= sizeof(buf))
5467                 return -EINVAL;
5468
5469         if (copy_from_user(buf, ubuf, cnt))
5470                 return -EFAULT;
5471
5472         buf[cnt] = 0;
5473
5474         ret = trace_set_options(tr, buf);
5475         if (ret < 0)
5476                 return ret;
5477
5478         *ppos += cnt;
5479
5480         return cnt;
5481 }
5482
5483 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5484 {
5485         struct trace_array *tr = inode->i_private;
5486         int ret;
5487
5488         ret = tracing_check_open_get_tr(tr);
5489         if (ret)
5490                 return ret;
5491
5492         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5493         if (ret < 0)
5494                 trace_array_put(tr);
5495
5496         return ret;
5497 }
5498
5499 static const struct file_operations tracing_iter_fops = {
5500         .open           = tracing_trace_options_open,
5501         .read           = seq_read,
5502         .llseek         = seq_lseek,
5503         .release        = tracing_single_release_tr,
5504         .write          = tracing_trace_options_write,
5505 };
5506
5507 static const char readme_msg[] =
5508         "tracing mini-HOWTO:\n\n"
5509         "# echo 0 > tracing_on : quick way to disable tracing\n"
5510         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5511         " Important files:\n"
5512         "  trace\t\t\t- The static contents of the buffer\n"
5513         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5514         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5515         "  current_tracer\t- function and latency tracers\n"
5516         "  available_tracers\t- list of configured tracers for current_tracer\n"
5517         "  error_log\t- error log for failed commands (that support it)\n"
5518         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5519         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5520         "  trace_clock\t\t- change the clock used to order events\n"
5521         "       local:   Per cpu clock but may not be synced across CPUs\n"
5522         "      global:   Synced across CPUs but slows tracing down.\n"
5523         "     counter:   Not a clock, but just an increment\n"
5524         "      uptime:   Jiffy counter from time of boot\n"
5525         "        perf:   Same clock that perf events use\n"
5526 #ifdef CONFIG_X86_64
5527         "     x86-tsc:   TSC cycle counter\n"
5528 #endif
5529         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5530         "       delta:   Delta difference against a buffer-wide timestamp\n"
5531         "    absolute:   Absolute (standalone) timestamp\n"
5532         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5533         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5534         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5535         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5536         "\t\t\t  Remove sub-buffer with rmdir\n"
5537         "  trace_options\t\t- Set format or modify how tracing happens\n"
5538         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5539         "\t\t\t  option name\n"
5540         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5541 #ifdef CONFIG_DYNAMIC_FTRACE
5542         "\n  available_filter_functions - list of functions that can be filtered on\n"
5543         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5544         "\t\t\t  functions\n"
5545         "\t     accepts: func_full_name or glob-matching-pattern\n"
5546         "\t     modules: Can select a group via module\n"
5547         "\t      Format: :mod:<module-name>\n"
5548         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5549         "\t    triggers: a command to perform when function is hit\n"
5550         "\t      Format: <function>:<trigger>[:count]\n"
5551         "\t     trigger: traceon, traceoff\n"
5552         "\t\t      enable_event:<system>:<event>\n"
5553         "\t\t      disable_event:<system>:<event>\n"
5554 #ifdef CONFIG_STACKTRACE
5555         "\t\t      stacktrace\n"
5556 #endif
5557 #ifdef CONFIG_TRACER_SNAPSHOT
5558         "\t\t      snapshot\n"
5559 #endif
5560         "\t\t      dump\n"
5561         "\t\t      cpudump\n"
5562         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5563         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5564         "\t     The first one will disable tracing every time do_fault is hit\n"
5565         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5566         "\t       The first time do trap is hit and it disables tracing, the\n"
5567         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5568         "\t       the counter will not decrement. It only decrements when the\n"
5569         "\t       trigger did work\n"
5570         "\t     To remove trigger without count:\n"
5571         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5572         "\t     To remove trigger with a count:\n"
5573         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5574         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5575         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5576         "\t    modules: Can select a group via module command :mod:\n"
5577         "\t    Does not accept triggers\n"
5578 #endif /* CONFIG_DYNAMIC_FTRACE */
5579 #ifdef CONFIG_FUNCTION_TRACER
5580         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5581         "\t\t    (function)\n"
5582         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5583         "\t\t    (function)\n"
5584 #endif
5585 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5586         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5587         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5588         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5589 #endif
5590 #ifdef CONFIG_TRACER_SNAPSHOT
5591         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5592         "\t\t\t  snapshot buffer. Read the contents for more\n"
5593         "\t\t\t  information\n"
5594 #endif
5595 #ifdef CONFIG_STACK_TRACER
5596         "  stack_trace\t\t- Shows the max stack trace when active\n"
5597         "  stack_max_size\t- Shows current max stack size that was traced\n"
5598         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5599         "\t\t\t  new trace)\n"
5600 #ifdef CONFIG_DYNAMIC_FTRACE
5601         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5602         "\t\t\t  traces\n"
5603 #endif
5604 #endif /* CONFIG_STACK_TRACER */
5605 #ifdef CONFIG_DYNAMIC_EVENTS
5606         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5607         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5608 #endif
5609 #ifdef CONFIG_KPROBE_EVENTS
5610         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5611         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5612 #endif
5613 #ifdef CONFIG_UPROBE_EVENTS
5614         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5615         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5616 #endif
5617 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5618         "\t  accepts: event-definitions (one definition per line)\n"
5619         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5620         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5621 #ifdef CONFIG_HIST_TRIGGERS
5622         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5623 #endif
5624         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5625         "\t           -:[<group>/][<event>]\n"
5626 #ifdef CONFIG_KPROBE_EVENTS
5627         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5628   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5629 #endif
5630 #ifdef CONFIG_UPROBE_EVENTS
5631   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5632 #endif
5633         "\t     args: <name>=fetcharg[:type]\n"
5634         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5635 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5636         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5637 #else
5638         "\t           $stack<index>, $stack, $retval, $comm,\n"
5639 #endif
5640         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5641         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5642         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5643         "\t           symstr, <type>\\[<array-size>\\]\n"
5644 #ifdef CONFIG_HIST_TRIGGERS
5645         "\t    field: <stype> <name>;\n"
5646         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5647         "\t           [unsigned] char/int/long\n"
5648 #endif
5649         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5650         "\t            of the <attached-group>/<attached-event>.\n"
5651 #endif
5652         "  events/\t\t- Directory containing all trace event subsystems:\n"
5653         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5654         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5655         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5656         "\t\t\t  events\n"
5657         "      filter\t\t- If set, only events passing filter are traced\n"
5658         "  events/<system>/<event>/\t- Directory containing control files for\n"
5659         "\t\t\t  <event>:\n"
5660         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5661         "      filter\t\t- If set, only events passing filter are traced\n"
5662         "      trigger\t\t- If set, a command to perform when event is hit\n"
5663         "\t    Format: <trigger>[:count][if <filter>]\n"
5664         "\t   trigger: traceon, traceoff\n"
5665         "\t            enable_event:<system>:<event>\n"
5666         "\t            disable_event:<system>:<event>\n"
5667 #ifdef CONFIG_HIST_TRIGGERS
5668         "\t            enable_hist:<system>:<event>\n"
5669         "\t            disable_hist:<system>:<event>\n"
5670 #endif
5671 #ifdef CONFIG_STACKTRACE
5672         "\t\t    stacktrace\n"
5673 #endif
5674 #ifdef CONFIG_TRACER_SNAPSHOT
5675         "\t\t    snapshot\n"
5676 #endif
5677 #ifdef CONFIG_HIST_TRIGGERS
5678         "\t\t    hist (see below)\n"
5679 #endif
5680         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5681         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5682         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5683         "\t                  events/block/block_unplug/trigger\n"
5684         "\t   The first disables tracing every time block_unplug is hit.\n"
5685         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5686         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5687         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5688         "\t   Like function triggers, the counter is only decremented if it\n"
5689         "\t    enabled or disabled tracing.\n"
5690         "\t   To remove a trigger without a count:\n"
5691         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5692         "\t   To remove a trigger with a count:\n"
5693         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5694         "\t   Filters can be ignored when removing a trigger.\n"
5695 #ifdef CONFIG_HIST_TRIGGERS
5696         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5697         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5698         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5699         "\t            [:values=<field1[,field2,...]>]\n"
5700         "\t            [:sort=<field1[,field2,...]>]\n"
5701         "\t            [:size=#entries]\n"
5702         "\t            [:pause][:continue][:clear]\n"
5703         "\t            [:name=histname1]\n"
5704         "\t            [:<handler>.<action>]\n"
5705         "\t            [if <filter>]\n\n"
5706         "\t    Note, special fields can be used as well:\n"
5707         "\t            common_timestamp - to record current timestamp\n"
5708         "\t            common_cpu - to record the CPU the event happened on\n"
5709         "\n"
5710         "\t    A hist trigger variable can be:\n"
5711         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5712         "\t        - a reference to another variable e.g. y=$x,\n"
5713         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5714         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5715         "\n"
5716         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5717         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5718         "\t    variable reference, field or numeric literal.\n"
5719         "\n"
5720         "\t    When a matching event is hit, an entry is added to a hash\n"
5721         "\t    table using the key(s) and value(s) named, and the value of a\n"
5722         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5723         "\t    correspond to fields in the event's format description.  Keys\n"
5724         "\t    can be any field, or the special string 'stacktrace'.\n"
5725         "\t    Compound keys consisting of up to two fields can be specified\n"
5726         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5727         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5728         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5729         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5730         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5731         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5732         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5733         "\t    its histogram data will be shared with other triggers of the\n"
5734         "\t    same name, and trigger hits will update this common data.\n\n"
5735         "\t    Reading the 'hist' file for the event will dump the hash\n"
5736         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5737         "\t    triggers attached to an event, there will be a table for each\n"
5738         "\t    trigger in the output.  The table displayed for a named\n"
5739         "\t    trigger will be the same as any other instance having the\n"
5740         "\t    same name.  The default format used to display a given field\n"
5741         "\t    can be modified by appending any of the following modifiers\n"
5742         "\t    to the field name, as applicable:\n\n"
5743         "\t            .hex        display a number as a hex value\n"
5744         "\t            .sym        display an address as a symbol\n"
5745         "\t            .sym-offset display an address as a symbol and offset\n"
5746         "\t            .execname   display a common_pid as a program name\n"
5747         "\t            .syscall    display a syscall id as a syscall name\n"
5748         "\t            .log2       display log2 value rather than raw number\n"
5749         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5750         "\t            .usecs      display a common_timestamp in microseconds\n"
5751         "\t            .percent    display a number of percentage value\n"
5752         "\t            .graph      display a bar-graph of a value\n\n"
5753         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5754         "\t    trigger or to start a hist trigger but not log any events\n"
5755         "\t    until told to do so.  'continue' can be used to start or\n"
5756         "\t    restart a paused hist trigger.\n\n"
5757         "\t    The 'clear' parameter will clear the contents of a running\n"
5758         "\t    hist trigger and leave its current paused/active state\n"
5759         "\t    unchanged.\n\n"
5760         "\t    The enable_hist and disable_hist triggers can be used to\n"
5761         "\t    have one event conditionally start and stop another event's\n"
5762         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5763         "\t    the enable_event and disable_event triggers.\n\n"
5764         "\t    Hist trigger handlers and actions are executed whenever a\n"
5765         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5766         "\t        <handler>.<action>\n\n"
5767         "\t    The available handlers are:\n\n"
5768         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5769         "\t        onmax(var)               - invoke if var exceeds current max\n"
5770         "\t        onchange(var)            - invoke action if var changes\n\n"
5771         "\t    The available actions are:\n\n"
5772         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5773         "\t        save(field,...)                      - save current event fields\n"
5774 #ifdef CONFIG_TRACER_SNAPSHOT
5775         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5776 #endif
5777 #ifdef CONFIG_SYNTH_EVENTS
5778         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5779         "\t  Write into this file to define/undefine new synthetic events.\n"
5780         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5781 #endif
5782 #endif
5783 ;
5784
5785 static ssize_t
5786 tracing_readme_read(struct file *filp, char __user *ubuf,
5787                        size_t cnt, loff_t *ppos)
5788 {
5789         return simple_read_from_buffer(ubuf, cnt, ppos,
5790                                         readme_msg, strlen(readme_msg));
5791 }
5792
5793 static const struct file_operations tracing_readme_fops = {
5794         .open           = tracing_open_generic,
5795         .read           = tracing_readme_read,
5796         .llseek         = generic_file_llseek,
5797 };
5798
5799 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5800 {
5801         int pid = ++(*pos);
5802
5803         return trace_find_tgid_ptr(pid);
5804 }
5805
5806 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5807 {
5808         int pid = *pos;
5809
5810         return trace_find_tgid_ptr(pid);
5811 }
5812
5813 static void saved_tgids_stop(struct seq_file *m, void *v)
5814 {
5815 }
5816
5817 static int saved_tgids_show(struct seq_file *m, void *v)
5818 {
5819         int *entry = (int *)v;
5820         int pid = entry - tgid_map;
5821         int tgid = *entry;
5822
5823         if (tgid == 0)
5824                 return SEQ_SKIP;
5825
5826         seq_printf(m, "%d %d\n", pid, tgid);
5827         return 0;
5828 }
5829
5830 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5831         .start          = saved_tgids_start,
5832         .stop           = saved_tgids_stop,
5833         .next           = saved_tgids_next,
5834         .show           = saved_tgids_show,
5835 };
5836
5837 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5838 {
5839         int ret;
5840
5841         ret = tracing_check_open_get_tr(NULL);
5842         if (ret)
5843                 return ret;
5844
5845         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5846 }
5847
5848
5849 static const struct file_operations tracing_saved_tgids_fops = {
5850         .open           = tracing_saved_tgids_open,
5851         .read           = seq_read,
5852         .llseek         = seq_lseek,
5853         .release        = seq_release,
5854 };
5855
5856 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5857 {
5858         unsigned int *ptr = v;
5859
5860         if (*pos || m->count)
5861                 ptr++;
5862
5863         (*pos)++;
5864
5865         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5866              ptr++) {
5867                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5868                         continue;
5869
5870                 return ptr;
5871         }
5872
5873         return NULL;
5874 }
5875
5876 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5877 {
5878         void *v;
5879         loff_t l = 0;
5880
5881         preempt_disable();
5882         arch_spin_lock(&trace_cmdline_lock);
5883
5884         v = &savedcmd->map_cmdline_to_pid[0];
5885         while (l <= *pos) {
5886                 v = saved_cmdlines_next(m, v, &l);
5887                 if (!v)
5888                         return NULL;
5889         }
5890
5891         return v;
5892 }
5893
5894 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5895 {
5896         arch_spin_unlock(&trace_cmdline_lock);
5897         preempt_enable();
5898 }
5899
5900 static int saved_cmdlines_show(struct seq_file *m, void *v)
5901 {
5902         char buf[TASK_COMM_LEN];
5903         unsigned int *pid = v;
5904
5905         __trace_find_cmdline(*pid, buf);
5906         seq_printf(m, "%d %s\n", *pid, buf);
5907         return 0;
5908 }
5909
5910 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5911         .start          = saved_cmdlines_start,
5912         .next           = saved_cmdlines_next,
5913         .stop           = saved_cmdlines_stop,
5914         .show           = saved_cmdlines_show,
5915 };
5916
5917 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5918 {
5919         int ret;
5920
5921         ret = tracing_check_open_get_tr(NULL);
5922         if (ret)
5923                 return ret;
5924
5925         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5926 }
5927
5928 static const struct file_operations tracing_saved_cmdlines_fops = {
5929         .open           = tracing_saved_cmdlines_open,
5930         .read           = seq_read,
5931         .llseek         = seq_lseek,
5932         .release        = seq_release,
5933 };
5934
5935 static ssize_t
5936 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5937                                  size_t cnt, loff_t *ppos)
5938 {
5939         char buf[64];
5940         int r;
5941
5942         preempt_disable();
5943         arch_spin_lock(&trace_cmdline_lock);
5944         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5945         arch_spin_unlock(&trace_cmdline_lock);
5946         preempt_enable();
5947
5948         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5949 }
5950
5951 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5952 {
5953         kfree(s->saved_cmdlines);
5954         kfree(s->map_cmdline_to_pid);
5955         kfree(s);
5956 }
5957
5958 static int tracing_resize_saved_cmdlines(unsigned int val)
5959 {
5960         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5961
5962         s = kmalloc(sizeof(*s), GFP_KERNEL);
5963         if (!s)
5964                 return -ENOMEM;
5965
5966         if (allocate_cmdlines_buffer(val, s) < 0) {
5967                 kfree(s);
5968                 return -ENOMEM;
5969         }
5970
5971         preempt_disable();
5972         arch_spin_lock(&trace_cmdline_lock);
5973         savedcmd_temp = savedcmd;
5974         savedcmd = s;
5975         arch_spin_unlock(&trace_cmdline_lock);
5976         preempt_enable();
5977         free_saved_cmdlines_buffer(savedcmd_temp);
5978
5979         return 0;
5980 }
5981
5982 static ssize_t
5983 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5984                                   size_t cnt, loff_t *ppos)
5985 {
5986         unsigned long val;
5987         int ret;
5988
5989         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5990         if (ret)
5991                 return ret;
5992
5993         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5994         if (!val || val > PID_MAX_DEFAULT)
5995                 return -EINVAL;
5996
5997         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5998         if (ret < 0)
5999                 return ret;
6000
6001         *ppos += cnt;
6002
6003         return cnt;
6004 }
6005
6006 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6007         .open           = tracing_open_generic,
6008         .read           = tracing_saved_cmdlines_size_read,
6009         .write          = tracing_saved_cmdlines_size_write,
6010 };
6011
6012 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6013 static union trace_eval_map_item *
6014 update_eval_map(union trace_eval_map_item *ptr)
6015 {
6016         if (!ptr->map.eval_string) {
6017                 if (ptr->tail.next) {
6018                         ptr = ptr->tail.next;
6019                         /* Set ptr to the next real item (skip head) */
6020                         ptr++;
6021                 } else
6022                         return NULL;
6023         }
6024         return ptr;
6025 }
6026
6027 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6028 {
6029         union trace_eval_map_item *ptr = v;
6030
6031         /*
6032          * Paranoid! If ptr points to end, we don't want to increment past it.
6033          * This really should never happen.
6034          */
6035         (*pos)++;
6036         ptr = update_eval_map(ptr);
6037         if (WARN_ON_ONCE(!ptr))
6038                 return NULL;
6039
6040         ptr++;
6041         ptr = update_eval_map(ptr);
6042
6043         return ptr;
6044 }
6045
6046 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6047 {
6048         union trace_eval_map_item *v;
6049         loff_t l = 0;
6050
6051         mutex_lock(&trace_eval_mutex);
6052
6053         v = trace_eval_maps;
6054         if (v)
6055                 v++;
6056
6057         while (v && l < *pos) {
6058                 v = eval_map_next(m, v, &l);
6059         }
6060
6061         return v;
6062 }
6063
6064 static void eval_map_stop(struct seq_file *m, void *v)
6065 {
6066         mutex_unlock(&trace_eval_mutex);
6067 }
6068
6069 static int eval_map_show(struct seq_file *m, void *v)
6070 {
6071         union trace_eval_map_item *ptr = v;
6072
6073         seq_printf(m, "%s %ld (%s)\n",
6074                    ptr->map.eval_string, ptr->map.eval_value,
6075                    ptr->map.system);
6076
6077         return 0;
6078 }
6079
6080 static const struct seq_operations tracing_eval_map_seq_ops = {
6081         .start          = eval_map_start,
6082         .next           = eval_map_next,
6083         .stop           = eval_map_stop,
6084         .show           = eval_map_show,
6085 };
6086
6087 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6088 {
6089         int ret;
6090
6091         ret = tracing_check_open_get_tr(NULL);
6092         if (ret)
6093                 return ret;
6094
6095         return seq_open(filp, &tracing_eval_map_seq_ops);
6096 }
6097
6098 static const struct file_operations tracing_eval_map_fops = {
6099         .open           = tracing_eval_map_open,
6100         .read           = seq_read,
6101         .llseek         = seq_lseek,
6102         .release        = seq_release,
6103 };
6104
6105 static inline union trace_eval_map_item *
6106 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6107 {
6108         /* Return tail of array given the head */
6109         return ptr + ptr->head.length + 1;
6110 }
6111
6112 static void
6113 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6114                            int len)
6115 {
6116         struct trace_eval_map **stop;
6117         struct trace_eval_map **map;
6118         union trace_eval_map_item *map_array;
6119         union trace_eval_map_item *ptr;
6120
6121         stop = start + len;
6122
6123         /*
6124          * The trace_eval_maps contains the map plus a head and tail item,
6125          * where the head holds the module and length of array, and the
6126          * tail holds a pointer to the next list.
6127          */
6128         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6129         if (!map_array) {
6130                 pr_warn("Unable to allocate trace eval mapping\n");
6131                 return;
6132         }
6133
6134         mutex_lock(&trace_eval_mutex);
6135
6136         if (!trace_eval_maps)
6137                 trace_eval_maps = map_array;
6138         else {
6139                 ptr = trace_eval_maps;
6140                 for (;;) {
6141                         ptr = trace_eval_jmp_to_tail(ptr);
6142                         if (!ptr->tail.next)
6143                                 break;
6144                         ptr = ptr->tail.next;
6145
6146                 }
6147                 ptr->tail.next = map_array;
6148         }
6149         map_array->head.mod = mod;
6150         map_array->head.length = len;
6151         map_array++;
6152
6153         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6154                 map_array->map = **map;
6155                 map_array++;
6156         }
6157         memset(map_array, 0, sizeof(*map_array));
6158
6159         mutex_unlock(&trace_eval_mutex);
6160 }
6161
6162 static void trace_create_eval_file(struct dentry *d_tracer)
6163 {
6164         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6165                           NULL, &tracing_eval_map_fops);
6166 }
6167
6168 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6169 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6170 static inline void trace_insert_eval_map_file(struct module *mod,
6171                               struct trace_eval_map **start, int len) { }
6172 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6173
6174 static void trace_insert_eval_map(struct module *mod,
6175                                   struct trace_eval_map **start, int len)
6176 {
6177         struct trace_eval_map **map;
6178
6179         if (len <= 0)
6180                 return;
6181
6182         map = start;
6183
6184         trace_event_eval_update(map, len);
6185
6186         trace_insert_eval_map_file(mod, start, len);
6187 }
6188
6189 static ssize_t
6190 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6191                        size_t cnt, loff_t *ppos)
6192 {
6193         struct trace_array *tr = filp->private_data;
6194         char buf[MAX_TRACER_SIZE+2];
6195         int r;
6196
6197         mutex_lock(&trace_types_lock);
6198         r = sprintf(buf, "%s\n", tr->current_trace->name);
6199         mutex_unlock(&trace_types_lock);
6200
6201         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6202 }
6203
6204 int tracer_init(struct tracer *t, struct trace_array *tr)
6205 {
6206         tracing_reset_online_cpus(&tr->array_buffer);
6207         return t->init(tr);
6208 }
6209
6210 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6211 {
6212         int cpu;
6213
6214         for_each_tracing_cpu(cpu)
6215                 per_cpu_ptr(buf->data, cpu)->entries = val;
6216 }
6217
6218 #ifdef CONFIG_TRACER_MAX_TRACE
6219 /* resize @tr's buffer to the size of @size_tr's entries */
6220 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6221                                         struct array_buffer *size_buf, int cpu_id)
6222 {
6223         int cpu, ret = 0;
6224
6225         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6226                 for_each_tracing_cpu(cpu) {
6227                         ret = ring_buffer_resize(trace_buf->buffer,
6228                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6229                         if (ret < 0)
6230                                 break;
6231                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6232                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6233                 }
6234         } else {
6235                 ret = ring_buffer_resize(trace_buf->buffer,
6236                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6237                 if (ret == 0)
6238                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6239                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6240         }
6241
6242         return ret;
6243 }
6244 #endif /* CONFIG_TRACER_MAX_TRACE */
6245
6246 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6247                                         unsigned long size, int cpu)
6248 {
6249         int ret;
6250
6251         /*
6252          * If kernel or user changes the size of the ring buffer
6253          * we use the size that was given, and we can forget about
6254          * expanding it later.
6255          */
6256         ring_buffer_expanded = true;
6257
6258         /* May be called before buffers are initialized */
6259         if (!tr->array_buffer.buffer)
6260                 return 0;
6261
6262         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6263         if (ret < 0)
6264                 return ret;
6265
6266 #ifdef CONFIG_TRACER_MAX_TRACE
6267         if (!tr->current_trace->use_max_tr)
6268                 goto out;
6269
6270         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6271         if (ret < 0) {
6272                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6273                                                      &tr->array_buffer, cpu);
6274                 if (r < 0) {
6275                         /*
6276                          * AARGH! We are left with different
6277                          * size max buffer!!!!
6278                          * The max buffer is our "snapshot" buffer.
6279                          * When a tracer needs a snapshot (one of the
6280                          * latency tracers), it swaps the max buffer
6281                          * with the saved snap shot. We succeeded to
6282                          * update the size of the main buffer, but failed to
6283                          * update the size of the max buffer. But when we tried
6284                          * to reset the main buffer to the original size, we
6285                          * failed there too. This is very unlikely to
6286                          * happen, but if it does, warn and kill all
6287                          * tracing.
6288                          */
6289                         WARN_ON(1);
6290                         tracing_disabled = 1;
6291                 }
6292                 return ret;
6293         }
6294
6295         if (cpu == RING_BUFFER_ALL_CPUS)
6296                 set_buffer_entries(&tr->max_buffer, size);
6297         else
6298                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6299
6300  out:
6301 #endif /* CONFIG_TRACER_MAX_TRACE */
6302
6303         if (cpu == RING_BUFFER_ALL_CPUS)
6304                 set_buffer_entries(&tr->array_buffer, size);
6305         else
6306                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6307
6308         return ret;
6309 }
6310
6311 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6312                                   unsigned long size, int cpu_id)
6313 {
6314         int ret;
6315
6316         mutex_lock(&trace_types_lock);
6317
6318         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6319                 /* make sure, this cpu is enabled in the mask */
6320                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6321                         ret = -EINVAL;
6322                         goto out;
6323                 }
6324         }
6325
6326         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6327         if (ret < 0)
6328                 ret = -ENOMEM;
6329
6330 out:
6331         mutex_unlock(&trace_types_lock);
6332
6333         return ret;
6334 }
6335
6336
6337 /**
6338  * tracing_update_buffers - used by tracing facility to expand ring buffers
6339  *
6340  * To save on memory when the tracing is never used on a system with it
6341  * configured in. The ring buffers are set to a minimum size. But once
6342  * a user starts to use the tracing facility, then they need to grow
6343  * to their default size.
6344  *
6345  * This function is to be called when a tracer is about to be used.
6346  */
6347 int tracing_update_buffers(void)
6348 {
6349         int ret = 0;
6350
6351         mutex_lock(&trace_types_lock);
6352         if (!ring_buffer_expanded)
6353                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6354                                                 RING_BUFFER_ALL_CPUS);
6355         mutex_unlock(&trace_types_lock);
6356
6357         return ret;
6358 }
6359
6360 struct trace_option_dentry;
6361
6362 static void
6363 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6364
6365 /*
6366  * Used to clear out the tracer before deletion of an instance.
6367  * Must have trace_types_lock held.
6368  */
6369 static void tracing_set_nop(struct trace_array *tr)
6370 {
6371         if (tr->current_trace == &nop_trace)
6372                 return;
6373         
6374         tr->current_trace->enabled--;
6375
6376         if (tr->current_trace->reset)
6377                 tr->current_trace->reset(tr);
6378
6379         tr->current_trace = &nop_trace;
6380 }
6381
6382 static bool tracer_options_updated;
6383
6384 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6385 {
6386         /* Only enable if the directory has been created already. */
6387         if (!tr->dir)
6388                 return;
6389
6390         /* Only create trace option files after update_tracer_options finish */
6391         if (!tracer_options_updated)
6392                 return;
6393
6394         create_trace_option_files(tr, t);
6395 }
6396
6397 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6398 {
6399         struct tracer *t;
6400 #ifdef CONFIG_TRACER_MAX_TRACE
6401         bool had_max_tr;
6402 #endif
6403         int ret = 0;
6404
6405         mutex_lock(&trace_types_lock);
6406
6407         if (!ring_buffer_expanded) {
6408                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6409                                                 RING_BUFFER_ALL_CPUS);
6410                 if (ret < 0)
6411                         goto out;
6412                 ret = 0;
6413         }
6414
6415         for (t = trace_types; t; t = t->next) {
6416                 if (strcmp(t->name, buf) == 0)
6417                         break;
6418         }
6419         if (!t) {
6420                 ret = -EINVAL;
6421                 goto out;
6422         }
6423         if (t == tr->current_trace)
6424                 goto out;
6425
6426 #ifdef CONFIG_TRACER_SNAPSHOT
6427         if (t->use_max_tr) {
6428                 local_irq_disable();
6429                 arch_spin_lock(&tr->max_lock);
6430                 if (tr->cond_snapshot)
6431                         ret = -EBUSY;
6432                 arch_spin_unlock(&tr->max_lock);
6433                 local_irq_enable();
6434                 if (ret)
6435                         goto out;
6436         }
6437 #endif
6438         /* Some tracers won't work on kernel command line */
6439         if (system_state < SYSTEM_RUNNING && t->noboot) {
6440                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6441                         t->name);
6442                 goto out;
6443         }
6444
6445         /* Some tracers are only allowed for the top level buffer */
6446         if (!trace_ok_for_array(t, tr)) {
6447                 ret = -EINVAL;
6448                 goto out;
6449         }
6450
6451         /* If trace pipe files are being read, we can't change the tracer */
6452         if (tr->trace_ref) {
6453                 ret = -EBUSY;
6454                 goto out;
6455         }
6456
6457         trace_branch_disable();
6458
6459         tr->current_trace->enabled--;
6460
6461         if (tr->current_trace->reset)
6462                 tr->current_trace->reset(tr);
6463
6464 #ifdef CONFIG_TRACER_MAX_TRACE
6465         had_max_tr = tr->current_trace->use_max_tr;
6466
6467         /* Current trace needs to be nop_trace before synchronize_rcu */
6468         tr->current_trace = &nop_trace;
6469
6470         if (had_max_tr && !t->use_max_tr) {
6471                 /*
6472                  * We need to make sure that the update_max_tr sees that
6473                  * current_trace changed to nop_trace to keep it from
6474                  * swapping the buffers after we resize it.
6475                  * The update_max_tr is called from interrupts disabled
6476                  * so a synchronized_sched() is sufficient.
6477                  */
6478                 synchronize_rcu();
6479                 free_snapshot(tr);
6480         }
6481
6482         if (t->use_max_tr && !tr->allocated_snapshot) {
6483                 ret = tracing_alloc_snapshot_instance(tr);
6484                 if (ret < 0)
6485                         goto out;
6486         }
6487 #else
6488         tr->current_trace = &nop_trace;
6489 #endif
6490
6491         if (t->init) {
6492                 ret = tracer_init(t, tr);
6493                 if (ret)
6494                         goto out;
6495         }
6496
6497         tr->current_trace = t;
6498         tr->current_trace->enabled++;
6499         trace_branch_enable(tr);
6500  out:
6501         mutex_unlock(&trace_types_lock);
6502
6503         return ret;
6504 }
6505
6506 static ssize_t
6507 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6508                         size_t cnt, loff_t *ppos)
6509 {
6510         struct trace_array *tr = filp->private_data;
6511         char buf[MAX_TRACER_SIZE+1];
6512         char *name;
6513         size_t ret;
6514         int err;
6515
6516         ret = cnt;
6517
6518         if (cnt > MAX_TRACER_SIZE)
6519                 cnt = MAX_TRACER_SIZE;
6520
6521         if (copy_from_user(buf, ubuf, cnt))
6522                 return -EFAULT;
6523
6524         buf[cnt] = 0;
6525
6526         name = strim(buf);
6527
6528         err = tracing_set_tracer(tr, name);
6529         if (err)
6530                 return err;
6531
6532         *ppos += ret;
6533
6534         return ret;
6535 }
6536
6537 static ssize_t
6538 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6539                    size_t cnt, loff_t *ppos)
6540 {
6541         char buf[64];
6542         int r;
6543
6544         r = snprintf(buf, sizeof(buf), "%ld\n",
6545                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6546         if (r > sizeof(buf))
6547                 r = sizeof(buf);
6548         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6549 }
6550
6551 static ssize_t
6552 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6553                     size_t cnt, loff_t *ppos)
6554 {
6555         unsigned long val;
6556         int ret;
6557
6558         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6559         if (ret)
6560                 return ret;
6561
6562         *ptr = val * 1000;
6563
6564         return cnt;
6565 }
6566
6567 static ssize_t
6568 tracing_thresh_read(struct file *filp, char __user *ubuf,
6569                     size_t cnt, loff_t *ppos)
6570 {
6571         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6572 }
6573
6574 static ssize_t
6575 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6576                      size_t cnt, loff_t *ppos)
6577 {
6578         struct trace_array *tr = filp->private_data;
6579         int ret;
6580
6581         mutex_lock(&trace_types_lock);
6582         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6583         if (ret < 0)
6584                 goto out;
6585
6586         if (tr->current_trace->update_thresh) {
6587                 ret = tr->current_trace->update_thresh(tr);
6588                 if (ret < 0)
6589                         goto out;
6590         }
6591
6592         ret = cnt;
6593 out:
6594         mutex_unlock(&trace_types_lock);
6595
6596         return ret;
6597 }
6598
6599 #ifdef CONFIG_TRACER_MAX_TRACE
6600
6601 static ssize_t
6602 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6603                      size_t cnt, loff_t *ppos)
6604 {
6605         struct trace_array *tr = filp->private_data;
6606
6607         return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6608 }
6609
6610 static ssize_t
6611 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6612                       size_t cnt, loff_t *ppos)
6613 {
6614         struct trace_array *tr = filp->private_data;
6615
6616         return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6617 }
6618
6619 #endif
6620
6621 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6622 {
6623         if (cpu == RING_BUFFER_ALL_CPUS) {
6624                 if (cpumask_empty(tr->pipe_cpumask)) {
6625                         cpumask_setall(tr->pipe_cpumask);
6626                         return 0;
6627                 }
6628         } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6629                 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6630                 return 0;
6631         }
6632         return -EBUSY;
6633 }
6634
6635 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6636 {
6637         if (cpu == RING_BUFFER_ALL_CPUS) {
6638                 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6639                 cpumask_clear(tr->pipe_cpumask);
6640         } else {
6641                 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6642                 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6643         }
6644 }
6645
6646 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6647 {
6648         struct trace_array *tr = inode->i_private;
6649         struct trace_iterator *iter;
6650         int cpu;
6651         int ret;
6652
6653         ret = tracing_check_open_get_tr(tr);
6654         if (ret)
6655                 return ret;
6656
6657         mutex_lock(&trace_types_lock);
6658         cpu = tracing_get_cpu(inode);
6659         ret = open_pipe_on_cpu(tr, cpu);
6660         if (ret)
6661                 goto fail_pipe_on_cpu;
6662
6663         /* create a buffer to store the information to pass to userspace */
6664         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6665         if (!iter) {
6666                 ret = -ENOMEM;
6667                 goto fail_alloc_iter;
6668         }
6669
6670         trace_seq_init(&iter->seq);
6671         iter->trace = tr->current_trace;
6672
6673         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6674                 ret = -ENOMEM;
6675                 goto fail;
6676         }
6677
6678         /* trace pipe does not show start of buffer */
6679         cpumask_setall(iter->started);
6680
6681         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6682                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6683
6684         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6685         if (trace_clocks[tr->clock_id].in_ns)
6686                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6687
6688         iter->tr = tr;
6689         iter->array_buffer = &tr->array_buffer;
6690         iter->cpu_file = cpu;
6691         mutex_init(&iter->mutex);
6692         filp->private_data = iter;
6693
6694         if (iter->trace->pipe_open)
6695                 iter->trace->pipe_open(iter);
6696
6697         nonseekable_open(inode, filp);
6698
6699         tr->trace_ref++;
6700
6701         mutex_unlock(&trace_types_lock);
6702         return ret;
6703
6704 fail:
6705         kfree(iter);
6706 fail_alloc_iter:
6707         close_pipe_on_cpu(tr, cpu);
6708 fail_pipe_on_cpu:
6709         __trace_array_put(tr);
6710         mutex_unlock(&trace_types_lock);
6711         return ret;
6712 }
6713
6714 static int tracing_release_pipe(struct inode *inode, struct file *file)
6715 {
6716         struct trace_iterator *iter = file->private_data;
6717         struct trace_array *tr = inode->i_private;
6718
6719         mutex_lock(&trace_types_lock);
6720
6721         tr->trace_ref--;
6722
6723         if (iter->trace->pipe_close)
6724                 iter->trace->pipe_close(iter);
6725         close_pipe_on_cpu(tr, iter->cpu_file);
6726         mutex_unlock(&trace_types_lock);
6727
6728         free_cpumask_var(iter->started);
6729         kfree(iter->fmt);
6730         kfree(iter->temp);
6731         mutex_destroy(&iter->mutex);
6732         kfree(iter);
6733
6734         trace_array_put(tr);
6735
6736         return 0;
6737 }
6738
6739 static __poll_t
6740 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6741 {
6742         struct trace_array *tr = iter->tr;
6743
6744         /* Iterators are static, they should be filled or empty */
6745         if (trace_buffer_iter(iter, iter->cpu_file))
6746                 return EPOLLIN | EPOLLRDNORM;
6747
6748         if (tr->trace_flags & TRACE_ITER_BLOCK)
6749                 /*
6750                  * Always select as readable when in blocking mode
6751                  */
6752                 return EPOLLIN | EPOLLRDNORM;
6753         else
6754                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6755                                              filp, poll_table, iter->tr->buffer_percent);
6756 }
6757
6758 static __poll_t
6759 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6760 {
6761         struct trace_iterator *iter = filp->private_data;
6762
6763         return trace_poll(iter, filp, poll_table);
6764 }
6765
6766 /* Must be called with iter->mutex held. */
6767 static int tracing_wait_pipe(struct file *filp)
6768 {
6769         struct trace_iterator *iter = filp->private_data;
6770         int ret;
6771
6772         while (trace_empty(iter)) {
6773
6774                 if ((filp->f_flags & O_NONBLOCK)) {
6775                         return -EAGAIN;
6776                 }
6777
6778                 /*
6779                  * We block until we read something and tracing is disabled.
6780                  * We still block if tracing is disabled, but we have never
6781                  * read anything. This allows a user to cat this file, and
6782                  * then enable tracing. But after we have read something,
6783                  * we give an EOF when tracing is again disabled.
6784                  *
6785                  * iter->pos will be 0 if we haven't read anything.
6786                  */
6787                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6788                         break;
6789
6790                 mutex_unlock(&iter->mutex);
6791
6792                 ret = wait_on_pipe(iter, 0);
6793
6794                 mutex_lock(&iter->mutex);
6795
6796                 if (ret)
6797                         return ret;
6798         }
6799
6800         return 1;
6801 }
6802
6803 /*
6804  * Consumer reader.
6805  */
6806 static ssize_t
6807 tracing_read_pipe(struct file *filp, char __user *ubuf,
6808                   size_t cnt, loff_t *ppos)
6809 {
6810         struct trace_iterator *iter = filp->private_data;
6811         ssize_t sret;
6812
6813         /*
6814          * Avoid more than one consumer on a single file descriptor
6815          * This is just a matter of traces coherency, the ring buffer itself
6816          * is protected.
6817          */
6818         mutex_lock(&iter->mutex);
6819
6820         /* return any leftover data */
6821         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6822         if (sret != -EBUSY)
6823                 goto out;
6824
6825         trace_seq_init(&iter->seq);
6826
6827         if (iter->trace->read) {
6828                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6829                 if (sret)
6830                         goto out;
6831         }
6832
6833 waitagain:
6834         sret = tracing_wait_pipe(filp);
6835         if (sret <= 0)
6836                 goto out;
6837
6838         /* stop when tracing is finished */
6839         if (trace_empty(iter)) {
6840                 sret = 0;
6841                 goto out;
6842         }
6843
6844         if (cnt >= PAGE_SIZE)
6845                 cnt = PAGE_SIZE - 1;
6846
6847         /* reset all but tr, trace, and overruns */
6848         trace_iterator_reset(iter);
6849         cpumask_clear(iter->started);
6850         trace_seq_init(&iter->seq);
6851
6852         trace_event_read_lock();
6853         trace_access_lock(iter->cpu_file);
6854         while (trace_find_next_entry_inc(iter) != NULL) {
6855                 enum print_line_t ret;
6856                 int save_len = iter->seq.seq.len;
6857
6858                 ret = print_trace_line(iter);
6859                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6860                         /*
6861                          * If one print_trace_line() fills entire trace_seq in one shot,
6862                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6863                          * In this case, we need to consume it, otherwise, loop will peek
6864                          * this event next time, resulting in an infinite loop.
6865                          */
6866                         if (save_len == 0) {
6867                                 iter->seq.full = 0;
6868                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6869                                 trace_consume(iter);
6870                                 break;
6871                         }
6872
6873                         /* In other cases, don't print partial lines */
6874                         iter->seq.seq.len = save_len;
6875                         break;
6876                 }
6877                 if (ret != TRACE_TYPE_NO_CONSUME)
6878                         trace_consume(iter);
6879
6880                 if (trace_seq_used(&iter->seq) >= cnt)
6881                         break;
6882
6883                 /*
6884                  * Setting the full flag means we reached the trace_seq buffer
6885                  * size and we should leave by partial output condition above.
6886                  * One of the trace_seq_* functions is not used properly.
6887                  */
6888                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6889                           iter->ent->type);
6890         }
6891         trace_access_unlock(iter->cpu_file);
6892         trace_event_read_unlock();
6893
6894         /* Now copy what we have to the user */
6895         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6896         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6897                 trace_seq_init(&iter->seq);
6898
6899         /*
6900          * If there was nothing to send to user, in spite of consuming trace
6901          * entries, go back to wait for more entries.
6902          */
6903         if (sret == -EBUSY)
6904                 goto waitagain;
6905
6906 out:
6907         mutex_unlock(&iter->mutex);
6908
6909         return sret;
6910 }
6911
6912 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6913                                      unsigned int idx)
6914 {
6915         __free_page(spd->pages[idx]);
6916 }
6917
6918 static size_t
6919 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6920 {
6921         size_t count;
6922         int save_len;
6923         int ret;
6924
6925         /* Seq buffer is page-sized, exactly what we need. */
6926         for (;;) {
6927                 save_len = iter->seq.seq.len;
6928                 ret = print_trace_line(iter);
6929
6930                 if (trace_seq_has_overflowed(&iter->seq)) {
6931                         iter->seq.seq.len = save_len;
6932                         break;
6933                 }
6934
6935                 /*
6936                  * This should not be hit, because it should only
6937                  * be set if the iter->seq overflowed. But check it
6938                  * anyway to be safe.
6939                  */
6940                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6941                         iter->seq.seq.len = save_len;
6942                         break;
6943                 }
6944
6945                 count = trace_seq_used(&iter->seq) - save_len;
6946                 if (rem < count) {
6947                         rem = 0;
6948                         iter->seq.seq.len = save_len;
6949                         break;
6950                 }
6951
6952                 if (ret != TRACE_TYPE_NO_CONSUME)
6953                         trace_consume(iter);
6954                 rem -= count;
6955                 if (!trace_find_next_entry_inc(iter))   {
6956                         rem = 0;
6957                         iter->ent = NULL;
6958                         break;
6959                 }
6960         }
6961
6962         return rem;
6963 }
6964
6965 static ssize_t tracing_splice_read_pipe(struct file *filp,
6966                                         loff_t *ppos,
6967                                         struct pipe_inode_info *pipe,
6968                                         size_t len,
6969                                         unsigned int flags)
6970 {
6971         struct page *pages_def[PIPE_DEF_BUFFERS];
6972         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6973         struct trace_iterator *iter = filp->private_data;
6974         struct splice_pipe_desc spd = {
6975                 .pages          = pages_def,
6976                 .partial        = partial_def,
6977                 .nr_pages       = 0, /* This gets updated below. */
6978                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6979                 .ops            = &default_pipe_buf_ops,
6980                 .spd_release    = tracing_spd_release_pipe,
6981         };
6982         ssize_t ret;
6983         size_t rem;
6984         unsigned int i;
6985
6986         if (splice_grow_spd(pipe, &spd))
6987                 return -ENOMEM;
6988
6989         mutex_lock(&iter->mutex);
6990
6991         if (iter->trace->splice_read) {
6992                 ret = iter->trace->splice_read(iter, filp,
6993                                                ppos, pipe, len, flags);
6994                 if (ret)
6995                         goto out_err;
6996         }
6997
6998         ret = tracing_wait_pipe(filp);
6999         if (ret <= 0)
7000                 goto out_err;
7001
7002         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7003                 ret = -EFAULT;
7004                 goto out_err;
7005         }
7006
7007         trace_event_read_lock();
7008         trace_access_lock(iter->cpu_file);
7009
7010         /* Fill as many pages as possible. */
7011         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7012                 spd.pages[i] = alloc_page(GFP_KERNEL);
7013                 if (!spd.pages[i])
7014                         break;
7015
7016                 rem = tracing_fill_pipe_page(rem, iter);
7017
7018                 /* Copy the data into the page, so we can start over. */
7019                 ret = trace_seq_to_buffer(&iter->seq,
7020                                           page_address(spd.pages[i]),
7021                                           trace_seq_used(&iter->seq));
7022                 if (ret < 0) {
7023                         __free_page(spd.pages[i]);
7024                         break;
7025                 }
7026                 spd.partial[i].offset = 0;
7027                 spd.partial[i].len = trace_seq_used(&iter->seq);
7028
7029                 trace_seq_init(&iter->seq);
7030         }
7031
7032         trace_access_unlock(iter->cpu_file);
7033         trace_event_read_unlock();
7034         mutex_unlock(&iter->mutex);
7035
7036         spd.nr_pages = i;
7037
7038         if (i)
7039                 ret = splice_to_pipe(pipe, &spd);
7040         else
7041                 ret = 0;
7042 out:
7043         splice_shrink_spd(&spd);
7044         return ret;
7045
7046 out_err:
7047         mutex_unlock(&iter->mutex);
7048         goto out;
7049 }
7050
7051 static ssize_t
7052 tracing_entries_read(struct file *filp, char __user *ubuf,
7053                      size_t cnt, loff_t *ppos)
7054 {
7055         struct inode *inode = file_inode(filp);
7056         struct trace_array *tr = inode->i_private;
7057         int cpu = tracing_get_cpu(inode);
7058         char buf[64];
7059         int r = 0;
7060         ssize_t ret;
7061
7062         mutex_lock(&trace_types_lock);
7063
7064         if (cpu == RING_BUFFER_ALL_CPUS) {
7065                 int cpu, buf_size_same;
7066                 unsigned long size;
7067
7068                 size = 0;
7069                 buf_size_same = 1;
7070                 /* check if all cpu sizes are same */
7071                 for_each_tracing_cpu(cpu) {
7072                         /* fill in the size from first enabled cpu */
7073                         if (size == 0)
7074                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7075                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7076                                 buf_size_same = 0;
7077                                 break;
7078                         }
7079                 }
7080
7081                 if (buf_size_same) {
7082                         if (!ring_buffer_expanded)
7083                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7084                                             size >> 10,
7085                                             trace_buf_size >> 10);
7086                         else
7087                                 r = sprintf(buf, "%lu\n", size >> 10);
7088                 } else
7089                         r = sprintf(buf, "X\n");
7090         } else
7091                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7092
7093         mutex_unlock(&trace_types_lock);
7094
7095         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7096         return ret;
7097 }
7098
7099 static ssize_t
7100 tracing_entries_write(struct file *filp, const char __user *ubuf,
7101                       size_t cnt, loff_t *ppos)
7102 {
7103         struct inode *inode = file_inode(filp);
7104         struct trace_array *tr = inode->i_private;
7105         unsigned long val;
7106         int ret;
7107
7108         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7109         if (ret)
7110                 return ret;
7111
7112         /* must have at least 1 entry */
7113         if (!val)
7114                 return -EINVAL;
7115
7116         /* value is in KB */
7117         val <<= 10;
7118         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7119         if (ret < 0)
7120                 return ret;
7121
7122         *ppos += cnt;
7123
7124         return cnt;
7125 }
7126
7127 static ssize_t
7128 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7129                                 size_t cnt, loff_t *ppos)
7130 {
7131         struct trace_array *tr = filp->private_data;
7132         char buf[64];
7133         int r, cpu;
7134         unsigned long size = 0, expanded_size = 0;
7135
7136         mutex_lock(&trace_types_lock);
7137         for_each_tracing_cpu(cpu) {
7138                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7139                 if (!ring_buffer_expanded)
7140                         expanded_size += trace_buf_size >> 10;
7141         }
7142         if (ring_buffer_expanded)
7143                 r = sprintf(buf, "%lu\n", size);
7144         else
7145                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7146         mutex_unlock(&trace_types_lock);
7147
7148         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7149 }
7150
7151 static ssize_t
7152 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7153                           size_t cnt, loff_t *ppos)
7154 {
7155         /*
7156          * There is no need to read what the user has written, this function
7157          * is just to make sure that there is no error when "echo" is used
7158          */
7159
7160         *ppos += cnt;
7161
7162         return cnt;
7163 }
7164
7165 static int
7166 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7167 {
7168         struct trace_array *tr = inode->i_private;
7169
7170         /* disable tracing ? */
7171         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7172                 tracer_tracing_off(tr);
7173         /* resize the ring buffer to 0 */
7174         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7175
7176         trace_array_put(tr);
7177
7178         return 0;
7179 }
7180
7181 static ssize_t
7182 tracing_mark_write(struct file *filp, const char __user *ubuf,
7183                                         size_t cnt, loff_t *fpos)
7184 {
7185         struct trace_array *tr = filp->private_data;
7186         struct ring_buffer_event *event;
7187         enum event_trigger_type tt = ETT_NONE;
7188         struct trace_buffer *buffer;
7189         struct print_entry *entry;
7190         ssize_t written;
7191         int size;
7192         int len;
7193
7194 /* Used in tracing_mark_raw_write() as well */
7195 #define FAULTED_STR "<faulted>"
7196 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7197
7198         if (tracing_disabled)
7199                 return -EINVAL;
7200
7201         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7202                 return -EINVAL;
7203
7204         if (cnt > TRACE_BUF_SIZE)
7205                 cnt = TRACE_BUF_SIZE;
7206
7207         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7208
7209         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7210
7211         /* If less than "<faulted>", then make sure we can still add that */
7212         if (cnt < FAULTED_SIZE)
7213                 size += FAULTED_SIZE - cnt;
7214
7215         buffer = tr->array_buffer.buffer;
7216         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7217                                             tracing_gen_ctx());
7218         if (unlikely(!event))
7219                 /* Ring buffer disabled, return as if not open for write */
7220                 return -EBADF;
7221
7222         entry = ring_buffer_event_data(event);
7223         entry->ip = _THIS_IP_;
7224
7225         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7226         if (len) {
7227                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7228                 cnt = FAULTED_SIZE;
7229                 written = -EFAULT;
7230         } else
7231                 written = cnt;
7232
7233         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7234                 /* do not add \n before testing triggers, but add \0 */
7235                 entry->buf[cnt] = '\0';
7236                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7237         }
7238
7239         if (entry->buf[cnt - 1] != '\n') {
7240                 entry->buf[cnt] = '\n';
7241                 entry->buf[cnt + 1] = '\0';
7242         } else
7243                 entry->buf[cnt] = '\0';
7244
7245         if (static_branch_unlikely(&trace_marker_exports_enabled))
7246                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7247         __buffer_unlock_commit(buffer, event);
7248
7249         if (tt)
7250                 event_triggers_post_call(tr->trace_marker_file, tt);
7251
7252         return written;
7253 }
7254
7255 /* Limit it for now to 3K (including tag) */
7256 #define RAW_DATA_MAX_SIZE (1024*3)
7257
7258 static ssize_t
7259 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7260                                         size_t cnt, loff_t *fpos)
7261 {
7262         struct trace_array *tr = filp->private_data;
7263         struct ring_buffer_event *event;
7264         struct trace_buffer *buffer;
7265         struct raw_data_entry *entry;
7266         ssize_t written;
7267         int size;
7268         int len;
7269
7270 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7271
7272         if (tracing_disabled)
7273                 return -EINVAL;
7274
7275         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7276                 return -EINVAL;
7277
7278         /* The marker must at least have a tag id */
7279         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7280                 return -EINVAL;
7281
7282         if (cnt > TRACE_BUF_SIZE)
7283                 cnt = TRACE_BUF_SIZE;
7284
7285         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7286
7287         size = sizeof(*entry) + cnt;
7288         if (cnt < FAULT_SIZE_ID)
7289                 size += FAULT_SIZE_ID - cnt;
7290
7291         buffer = tr->array_buffer.buffer;
7292         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7293                                             tracing_gen_ctx());
7294         if (!event)
7295                 /* Ring buffer disabled, return as if not open for write */
7296                 return -EBADF;
7297
7298         entry = ring_buffer_event_data(event);
7299
7300         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7301         if (len) {
7302                 entry->id = -1;
7303                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7304                 written = -EFAULT;
7305         } else
7306                 written = cnt;
7307
7308         __buffer_unlock_commit(buffer, event);
7309
7310         return written;
7311 }
7312
7313 static int tracing_clock_show(struct seq_file *m, void *v)
7314 {
7315         struct trace_array *tr = m->private;
7316         int i;
7317
7318         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7319                 seq_printf(m,
7320                         "%s%s%s%s", i ? " " : "",
7321                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7322                         i == tr->clock_id ? "]" : "");
7323         seq_putc(m, '\n');
7324
7325         return 0;
7326 }
7327
7328 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7329 {
7330         int i;
7331
7332         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7333                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7334                         break;
7335         }
7336         if (i == ARRAY_SIZE(trace_clocks))
7337                 return -EINVAL;
7338
7339         mutex_lock(&trace_types_lock);
7340
7341         tr->clock_id = i;
7342
7343         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7344
7345         /*
7346          * New clock may not be consistent with the previous clock.
7347          * Reset the buffer so that it doesn't have incomparable timestamps.
7348          */
7349         tracing_reset_online_cpus(&tr->array_buffer);
7350
7351 #ifdef CONFIG_TRACER_MAX_TRACE
7352         if (tr->max_buffer.buffer)
7353                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7354         tracing_reset_online_cpus(&tr->max_buffer);
7355 #endif
7356
7357         mutex_unlock(&trace_types_lock);
7358
7359         return 0;
7360 }
7361
7362 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7363                                    size_t cnt, loff_t *fpos)
7364 {
7365         struct seq_file *m = filp->private_data;
7366         struct trace_array *tr = m->private;
7367         char buf[64];
7368         const char *clockstr;
7369         int ret;
7370
7371         if (cnt >= sizeof(buf))
7372                 return -EINVAL;
7373
7374         if (copy_from_user(buf, ubuf, cnt))
7375                 return -EFAULT;
7376
7377         buf[cnt] = 0;
7378
7379         clockstr = strstrip(buf);
7380
7381         ret = tracing_set_clock(tr, clockstr);
7382         if (ret)
7383                 return ret;
7384
7385         *fpos += cnt;
7386
7387         return cnt;
7388 }
7389
7390 static int tracing_clock_open(struct inode *inode, struct file *file)
7391 {
7392         struct trace_array *tr = inode->i_private;
7393         int ret;
7394
7395         ret = tracing_check_open_get_tr(tr);
7396         if (ret)
7397                 return ret;
7398
7399         ret = single_open(file, tracing_clock_show, inode->i_private);
7400         if (ret < 0)
7401                 trace_array_put(tr);
7402
7403         return ret;
7404 }
7405
7406 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7407 {
7408         struct trace_array *tr = m->private;
7409
7410         mutex_lock(&trace_types_lock);
7411
7412         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7413                 seq_puts(m, "delta [absolute]\n");
7414         else
7415                 seq_puts(m, "[delta] absolute\n");
7416
7417         mutex_unlock(&trace_types_lock);
7418
7419         return 0;
7420 }
7421
7422 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7423 {
7424         struct trace_array *tr = inode->i_private;
7425         int ret;
7426
7427         ret = tracing_check_open_get_tr(tr);
7428         if (ret)
7429                 return ret;
7430
7431         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7432         if (ret < 0)
7433                 trace_array_put(tr);
7434
7435         return ret;
7436 }
7437
7438 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7439 {
7440         if (rbe == this_cpu_read(trace_buffered_event))
7441                 return ring_buffer_time_stamp(buffer);
7442
7443         return ring_buffer_event_time_stamp(buffer, rbe);
7444 }
7445
7446 /*
7447  * Set or disable using the per CPU trace_buffer_event when possible.
7448  */
7449 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7450 {
7451         int ret = 0;
7452
7453         mutex_lock(&trace_types_lock);
7454
7455         if (set && tr->no_filter_buffering_ref++)
7456                 goto out;
7457
7458         if (!set) {
7459                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7460                         ret = -EINVAL;
7461                         goto out;
7462                 }
7463
7464                 --tr->no_filter_buffering_ref;
7465         }
7466  out:
7467         mutex_unlock(&trace_types_lock);
7468
7469         return ret;
7470 }
7471
7472 struct ftrace_buffer_info {
7473         struct trace_iterator   iter;
7474         void                    *spare;
7475         unsigned int            spare_cpu;
7476         unsigned int            read;
7477 };
7478
7479 #ifdef CONFIG_TRACER_SNAPSHOT
7480 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7481 {
7482         struct trace_array *tr = inode->i_private;
7483         struct trace_iterator *iter;
7484         struct seq_file *m;
7485         int ret;
7486
7487         ret = tracing_check_open_get_tr(tr);
7488         if (ret)
7489                 return ret;
7490
7491         if (file->f_mode & FMODE_READ) {
7492                 iter = __tracing_open(inode, file, true);
7493                 if (IS_ERR(iter))
7494                         ret = PTR_ERR(iter);
7495         } else {
7496                 /* Writes still need the seq_file to hold the private data */
7497                 ret = -ENOMEM;
7498                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7499                 if (!m)
7500                         goto out;
7501                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7502                 if (!iter) {
7503                         kfree(m);
7504                         goto out;
7505                 }
7506                 ret = 0;
7507
7508                 iter->tr = tr;
7509                 iter->array_buffer = &tr->max_buffer;
7510                 iter->cpu_file = tracing_get_cpu(inode);
7511                 m->private = iter;
7512                 file->private_data = m;
7513         }
7514 out:
7515         if (ret < 0)
7516                 trace_array_put(tr);
7517
7518         return ret;
7519 }
7520
7521 static void tracing_swap_cpu_buffer(void *tr)
7522 {
7523         update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7524 }
7525
7526 static ssize_t
7527 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7528                        loff_t *ppos)
7529 {
7530         struct seq_file *m = filp->private_data;
7531         struct trace_iterator *iter = m->private;
7532         struct trace_array *tr = iter->tr;
7533         unsigned long val;
7534         int ret;
7535
7536         ret = tracing_update_buffers();
7537         if (ret < 0)
7538                 return ret;
7539
7540         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7541         if (ret)
7542                 return ret;
7543
7544         mutex_lock(&trace_types_lock);
7545
7546         if (tr->current_trace->use_max_tr) {
7547                 ret = -EBUSY;
7548                 goto out;
7549         }
7550
7551         local_irq_disable();
7552         arch_spin_lock(&tr->max_lock);
7553         if (tr->cond_snapshot)
7554                 ret = -EBUSY;
7555         arch_spin_unlock(&tr->max_lock);
7556         local_irq_enable();
7557         if (ret)
7558                 goto out;
7559
7560         switch (val) {
7561         case 0:
7562                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7563                         ret = -EINVAL;
7564                         break;
7565                 }
7566                 if (tr->allocated_snapshot)
7567                         free_snapshot(tr);
7568                 break;
7569         case 1:
7570 /* Only allow per-cpu swap if the ring buffer supports it */
7571 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7572                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7573                         ret = -EINVAL;
7574                         break;
7575                 }
7576 #endif
7577                 if (tr->allocated_snapshot)
7578                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7579                                         &tr->array_buffer, iter->cpu_file);
7580                 else
7581                         ret = tracing_alloc_snapshot_instance(tr);
7582                 if (ret < 0)
7583                         break;
7584                 /* Now, we're going to swap */
7585                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7586                         local_irq_disable();
7587                         update_max_tr(tr, current, smp_processor_id(), NULL);
7588                         local_irq_enable();
7589                 } else {
7590                         smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7591                                                  (void *)tr, 1);
7592                 }
7593                 break;
7594         default:
7595                 if (tr->allocated_snapshot) {
7596                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7597                                 tracing_reset_online_cpus(&tr->max_buffer);
7598                         else
7599                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7600                 }
7601                 break;
7602         }
7603
7604         if (ret >= 0) {
7605                 *ppos += cnt;
7606                 ret = cnt;
7607         }
7608 out:
7609         mutex_unlock(&trace_types_lock);
7610         return ret;
7611 }
7612
7613 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7614 {
7615         struct seq_file *m = file->private_data;
7616         int ret;
7617
7618         ret = tracing_release(inode, file);
7619
7620         if (file->f_mode & FMODE_READ)
7621                 return ret;
7622
7623         /* If write only, the seq_file is just a stub */
7624         if (m)
7625                 kfree(m->private);
7626         kfree(m);
7627
7628         return 0;
7629 }
7630
7631 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7632 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7633                                     size_t count, loff_t *ppos);
7634 static int tracing_buffers_release(struct inode *inode, struct file *file);
7635 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7636                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7637
7638 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7639 {
7640         struct ftrace_buffer_info *info;
7641         int ret;
7642
7643         /* The following checks for tracefs lockdown */
7644         ret = tracing_buffers_open(inode, filp);
7645         if (ret < 0)
7646                 return ret;
7647
7648         info = filp->private_data;
7649
7650         if (info->iter.trace->use_max_tr) {
7651                 tracing_buffers_release(inode, filp);
7652                 return -EBUSY;
7653         }
7654
7655         info->iter.snapshot = true;
7656         info->iter.array_buffer = &info->iter.tr->max_buffer;
7657
7658         return ret;
7659 }
7660
7661 #endif /* CONFIG_TRACER_SNAPSHOT */
7662
7663
7664 static const struct file_operations tracing_thresh_fops = {
7665         .open           = tracing_open_generic,
7666         .read           = tracing_thresh_read,
7667         .write          = tracing_thresh_write,
7668         .llseek         = generic_file_llseek,
7669 };
7670
7671 #ifdef CONFIG_TRACER_MAX_TRACE
7672 static const struct file_operations tracing_max_lat_fops = {
7673         .open           = tracing_open_generic_tr,
7674         .read           = tracing_max_lat_read,
7675         .write          = tracing_max_lat_write,
7676         .llseek         = generic_file_llseek,
7677         .release        = tracing_release_generic_tr,
7678 };
7679 #endif
7680
7681 static const struct file_operations set_tracer_fops = {
7682         .open           = tracing_open_generic_tr,
7683         .read           = tracing_set_trace_read,
7684         .write          = tracing_set_trace_write,
7685         .llseek         = generic_file_llseek,
7686         .release        = tracing_release_generic_tr,
7687 };
7688
7689 static const struct file_operations tracing_pipe_fops = {
7690         .open           = tracing_open_pipe,
7691         .poll           = tracing_poll_pipe,
7692         .read           = tracing_read_pipe,
7693         .splice_read    = tracing_splice_read_pipe,
7694         .release        = tracing_release_pipe,
7695         .llseek         = no_llseek,
7696 };
7697
7698 static const struct file_operations tracing_entries_fops = {
7699         .open           = tracing_open_generic_tr,
7700         .read           = tracing_entries_read,
7701         .write          = tracing_entries_write,
7702         .llseek         = generic_file_llseek,
7703         .release        = tracing_release_generic_tr,
7704 };
7705
7706 static const struct file_operations tracing_total_entries_fops = {
7707         .open           = tracing_open_generic_tr,
7708         .read           = tracing_total_entries_read,
7709         .llseek         = generic_file_llseek,
7710         .release        = tracing_release_generic_tr,
7711 };
7712
7713 static const struct file_operations tracing_free_buffer_fops = {
7714         .open           = tracing_open_generic_tr,
7715         .write          = tracing_free_buffer_write,
7716         .release        = tracing_free_buffer_release,
7717 };
7718
7719 static const struct file_operations tracing_mark_fops = {
7720         .open           = tracing_mark_open,
7721         .write          = tracing_mark_write,
7722         .release        = tracing_release_generic_tr,
7723 };
7724
7725 static const struct file_operations tracing_mark_raw_fops = {
7726         .open           = tracing_mark_open,
7727         .write          = tracing_mark_raw_write,
7728         .release        = tracing_release_generic_tr,
7729 };
7730
7731 static const struct file_operations trace_clock_fops = {
7732         .open           = tracing_clock_open,
7733         .read           = seq_read,
7734         .llseek         = seq_lseek,
7735         .release        = tracing_single_release_tr,
7736         .write          = tracing_clock_write,
7737 };
7738
7739 static const struct file_operations trace_time_stamp_mode_fops = {
7740         .open           = tracing_time_stamp_mode_open,
7741         .read           = seq_read,
7742         .llseek         = seq_lseek,
7743         .release        = tracing_single_release_tr,
7744 };
7745
7746 #ifdef CONFIG_TRACER_SNAPSHOT
7747 static const struct file_operations snapshot_fops = {
7748         .open           = tracing_snapshot_open,
7749         .read           = seq_read,
7750         .write          = tracing_snapshot_write,
7751         .llseek         = tracing_lseek,
7752         .release        = tracing_snapshot_release,
7753 };
7754
7755 static const struct file_operations snapshot_raw_fops = {
7756         .open           = snapshot_raw_open,
7757         .read           = tracing_buffers_read,
7758         .release        = tracing_buffers_release,
7759         .splice_read    = tracing_buffers_splice_read,
7760         .llseek         = no_llseek,
7761 };
7762
7763 #endif /* CONFIG_TRACER_SNAPSHOT */
7764
7765 /*
7766  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7767  * @filp: The active open file structure
7768  * @ubuf: The userspace provided buffer to read value into
7769  * @cnt: The maximum number of bytes to read
7770  * @ppos: The current "file" position
7771  *
7772  * This function implements the write interface for a struct trace_min_max_param.
7773  * The filp->private_data must point to a trace_min_max_param structure that
7774  * defines where to write the value, the min and the max acceptable values,
7775  * and a lock to protect the write.
7776  */
7777 static ssize_t
7778 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7779 {
7780         struct trace_min_max_param *param = filp->private_data;
7781         u64 val;
7782         int err;
7783
7784         if (!param)
7785                 return -EFAULT;
7786
7787         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7788         if (err)
7789                 return err;
7790
7791         if (param->lock)
7792                 mutex_lock(param->lock);
7793
7794         if (param->min && val < *param->min)
7795                 err = -EINVAL;
7796
7797         if (param->max && val > *param->max)
7798                 err = -EINVAL;
7799
7800         if (!err)
7801                 *param->val = val;
7802
7803         if (param->lock)
7804                 mutex_unlock(param->lock);
7805
7806         if (err)
7807                 return err;
7808
7809         return cnt;
7810 }
7811
7812 /*
7813  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7814  * @filp: The active open file structure
7815  * @ubuf: The userspace provided buffer to read value into
7816  * @cnt: The maximum number of bytes to read
7817  * @ppos: The current "file" position
7818  *
7819  * This function implements the read interface for a struct trace_min_max_param.
7820  * The filp->private_data must point to a trace_min_max_param struct with valid
7821  * data.
7822  */
7823 static ssize_t
7824 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7825 {
7826         struct trace_min_max_param *param = filp->private_data;
7827         char buf[U64_STR_SIZE];
7828         int len;
7829         u64 val;
7830
7831         if (!param)
7832                 return -EFAULT;
7833
7834         val = *param->val;
7835
7836         if (cnt > sizeof(buf))
7837                 cnt = sizeof(buf);
7838
7839         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7840
7841         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7842 }
7843
7844 const struct file_operations trace_min_max_fops = {
7845         .open           = tracing_open_generic,
7846         .read           = trace_min_max_read,
7847         .write          = trace_min_max_write,
7848 };
7849
7850 #define TRACING_LOG_ERRS_MAX    8
7851 #define TRACING_LOG_LOC_MAX     128
7852
7853 #define CMD_PREFIX "  Command: "
7854
7855 struct err_info {
7856         const char      **errs; /* ptr to loc-specific array of err strings */
7857         u8              type;   /* index into errs -> specific err string */
7858         u16             pos;    /* caret position */
7859         u64             ts;
7860 };
7861
7862 struct tracing_log_err {
7863         struct list_head        list;
7864         struct err_info         info;
7865         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7866         char                    *cmd;                     /* what caused err */
7867 };
7868
7869 static DEFINE_MUTEX(tracing_err_log_lock);
7870
7871 static struct tracing_log_err *alloc_tracing_log_err(int len)
7872 {
7873         struct tracing_log_err *err;
7874
7875         err = kzalloc(sizeof(*err), GFP_KERNEL);
7876         if (!err)
7877                 return ERR_PTR(-ENOMEM);
7878
7879         err->cmd = kzalloc(len, GFP_KERNEL);
7880         if (!err->cmd) {
7881                 kfree(err);
7882                 return ERR_PTR(-ENOMEM);
7883         }
7884
7885         return err;
7886 }
7887
7888 static void free_tracing_log_err(struct tracing_log_err *err)
7889 {
7890         kfree(err->cmd);
7891         kfree(err);
7892 }
7893
7894 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7895                                                    int len)
7896 {
7897         struct tracing_log_err *err;
7898         char *cmd;
7899
7900         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7901                 err = alloc_tracing_log_err(len);
7902                 if (PTR_ERR(err) != -ENOMEM)
7903                         tr->n_err_log_entries++;
7904
7905                 return err;
7906         }
7907         cmd = kzalloc(len, GFP_KERNEL);
7908         if (!cmd)
7909                 return ERR_PTR(-ENOMEM);
7910         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7911         kfree(err->cmd);
7912         err->cmd = cmd;
7913         list_del(&err->list);
7914
7915         return err;
7916 }
7917
7918 /**
7919  * err_pos - find the position of a string within a command for error careting
7920  * @cmd: The tracing command that caused the error
7921  * @str: The string to position the caret at within @cmd
7922  *
7923  * Finds the position of the first occurrence of @str within @cmd.  The
7924  * return value can be passed to tracing_log_err() for caret placement
7925  * within @cmd.
7926  *
7927  * Returns the index within @cmd of the first occurrence of @str or 0
7928  * if @str was not found.
7929  */
7930 unsigned int err_pos(char *cmd, const char *str)
7931 {
7932         char *found;
7933
7934         if (WARN_ON(!strlen(cmd)))
7935                 return 0;
7936
7937         found = strstr(cmd, str);
7938         if (found)
7939                 return found - cmd;
7940
7941         return 0;
7942 }
7943
7944 /**
7945  * tracing_log_err - write an error to the tracing error log
7946  * @tr: The associated trace array for the error (NULL for top level array)
7947  * @loc: A string describing where the error occurred
7948  * @cmd: The tracing command that caused the error
7949  * @errs: The array of loc-specific static error strings
7950  * @type: The index into errs[], which produces the specific static err string
7951  * @pos: The position the caret should be placed in the cmd
7952  *
7953  * Writes an error into tracing/error_log of the form:
7954  *
7955  * <loc>: error: <text>
7956  *   Command: <cmd>
7957  *              ^
7958  *
7959  * tracing/error_log is a small log file containing the last
7960  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7961  * unless there has been a tracing error, and the error log can be
7962  * cleared and have its memory freed by writing the empty string in
7963  * truncation mode to it i.e. echo > tracing/error_log.
7964  *
7965  * NOTE: the @errs array along with the @type param are used to
7966  * produce a static error string - this string is not copied and saved
7967  * when the error is logged - only a pointer to it is saved.  See
7968  * existing callers for examples of how static strings are typically
7969  * defined for use with tracing_log_err().
7970  */
7971 void tracing_log_err(struct trace_array *tr,
7972                      const char *loc, const char *cmd,
7973                      const char **errs, u8 type, u16 pos)
7974 {
7975         struct tracing_log_err *err;
7976         int len = 0;
7977
7978         if (!tr)
7979                 tr = &global_trace;
7980
7981         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7982
7983         mutex_lock(&tracing_err_log_lock);
7984         err = get_tracing_log_err(tr, len);
7985         if (PTR_ERR(err) == -ENOMEM) {
7986                 mutex_unlock(&tracing_err_log_lock);
7987                 return;
7988         }
7989
7990         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7991         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7992
7993         err->info.errs = errs;
7994         err->info.type = type;
7995         err->info.pos = pos;
7996         err->info.ts = local_clock();
7997
7998         list_add_tail(&err->list, &tr->err_log);
7999         mutex_unlock(&tracing_err_log_lock);
8000 }
8001
8002 static void clear_tracing_err_log(struct trace_array *tr)
8003 {
8004         struct tracing_log_err *err, *next;
8005
8006         mutex_lock(&tracing_err_log_lock);
8007         list_for_each_entry_safe(err, next, &tr->err_log, list) {
8008                 list_del(&err->list);
8009                 free_tracing_log_err(err);
8010         }
8011
8012         tr->n_err_log_entries = 0;
8013         mutex_unlock(&tracing_err_log_lock);
8014 }
8015
8016 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8017 {
8018         struct trace_array *tr = m->private;
8019
8020         mutex_lock(&tracing_err_log_lock);
8021
8022         return seq_list_start(&tr->err_log, *pos);
8023 }
8024
8025 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8026 {
8027         struct trace_array *tr = m->private;
8028
8029         return seq_list_next(v, &tr->err_log, pos);
8030 }
8031
8032 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8033 {
8034         mutex_unlock(&tracing_err_log_lock);
8035 }
8036
8037 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8038 {
8039         u16 i;
8040
8041         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8042                 seq_putc(m, ' ');
8043         for (i = 0; i < pos; i++)
8044                 seq_putc(m, ' ');
8045         seq_puts(m, "^\n");
8046 }
8047
8048 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8049 {
8050         struct tracing_log_err *err = v;
8051
8052         if (err) {
8053                 const char *err_text = err->info.errs[err->info.type];
8054                 u64 sec = err->info.ts;
8055                 u32 nsec;
8056
8057                 nsec = do_div(sec, NSEC_PER_SEC);
8058                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8059                            err->loc, err_text);
8060                 seq_printf(m, "%s", err->cmd);
8061                 tracing_err_log_show_pos(m, err->info.pos);
8062         }
8063
8064         return 0;
8065 }
8066
8067 static const struct seq_operations tracing_err_log_seq_ops = {
8068         .start  = tracing_err_log_seq_start,
8069         .next   = tracing_err_log_seq_next,
8070         .stop   = tracing_err_log_seq_stop,
8071         .show   = tracing_err_log_seq_show
8072 };
8073
8074 static int tracing_err_log_open(struct inode *inode, struct file *file)
8075 {
8076         struct trace_array *tr = inode->i_private;
8077         int ret = 0;
8078
8079         ret = tracing_check_open_get_tr(tr);
8080         if (ret)
8081                 return ret;
8082
8083         /* If this file was opened for write, then erase contents */
8084         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8085                 clear_tracing_err_log(tr);
8086
8087         if (file->f_mode & FMODE_READ) {
8088                 ret = seq_open(file, &tracing_err_log_seq_ops);
8089                 if (!ret) {
8090                         struct seq_file *m = file->private_data;
8091                         m->private = tr;
8092                 } else {
8093                         trace_array_put(tr);
8094                 }
8095         }
8096         return ret;
8097 }
8098
8099 static ssize_t tracing_err_log_write(struct file *file,
8100                                      const char __user *buffer,
8101                                      size_t count, loff_t *ppos)
8102 {
8103         return count;
8104 }
8105
8106 static int tracing_err_log_release(struct inode *inode, struct file *file)
8107 {
8108         struct trace_array *tr = inode->i_private;
8109
8110         trace_array_put(tr);
8111
8112         if (file->f_mode & FMODE_READ)
8113                 seq_release(inode, file);
8114
8115         return 0;
8116 }
8117
8118 static const struct file_operations tracing_err_log_fops = {
8119         .open           = tracing_err_log_open,
8120         .write          = tracing_err_log_write,
8121         .read           = seq_read,
8122         .llseek         = tracing_lseek,
8123         .release        = tracing_err_log_release,
8124 };
8125
8126 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8127 {
8128         struct trace_array *tr = inode->i_private;
8129         struct ftrace_buffer_info *info;
8130         int ret;
8131
8132         ret = tracing_check_open_get_tr(tr);
8133         if (ret)
8134                 return ret;
8135
8136         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8137         if (!info) {
8138                 trace_array_put(tr);
8139                 return -ENOMEM;
8140         }
8141
8142         mutex_lock(&trace_types_lock);
8143
8144         info->iter.tr           = tr;
8145         info->iter.cpu_file     = tracing_get_cpu(inode);
8146         info->iter.trace        = tr->current_trace;
8147         info->iter.array_buffer = &tr->array_buffer;
8148         info->spare             = NULL;
8149         /* Force reading ring buffer for first read */
8150         info->read              = (unsigned int)-1;
8151
8152         filp->private_data = info;
8153
8154         tr->trace_ref++;
8155
8156         mutex_unlock(&trace_types_lock);
8157
8158         ret = nonseekable_open(inode, filp);
8159         if (ret < 0)
8160                 trace_array_put(tr);
8161
8162         return ret;
8163 }
8164
8165 static __poll_t
8166 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8167 {
8168         struct ftrace_buffer_info *info = filp->private_data;
8169         struct trace_iterator *iter = &info->iter;
8170
8171         return trace_poll(iter, filp, poll_table);
8172 }
8173
8174 static ssize_t
8175 tracing_buffers_read(struct file *filp, char __user *ubuf,
8176                      size_t count, loff_t *ppos)
8177 {
8178         struct ftrace_buffer_info *info = filp->private_data;
8179         struct trace_iterator *iter = &info->iter;
8180         ssize_t ret = 0;
8181         ssize_t size;
8182
8183         if (!count)
8184                 return 0;
8185
8186 #ifdef CONFIG_TRACER_MAX_TRACE
8187         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8188                 return -EBUSY;
8189 #endif
8190
8191         if (!info->spare) {
8192                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8193                                                           iter->cpu_file);
8194                 if (IS_ERR(info->spare)) {
8195                         ret = PTR_ERR(info->spare);
8196                         info->spare = NULL;
8197                 } else {
8198                         info->spare_cpu = iter->cpu_file;
8199                 }
8200         }
8201         if (!info->spare)
8202                 return ret;
8203
8204         /* Do we have previous read data to read? */
8205         if (info->read < PAGE_SIZE)
8206                 goto read;
8207
8208  again:
8209         trace_access_lock(iter->cpu_file);
8210         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8211                                     &info->spare,
8212                                     count,
8213                                     iter->cpu_file, 0);
8214         trace_access_unlock(iter->cpu_file);
8215
8216         if (ret < 0) {
8217                 if (trace_empty(iter)) {
8218                         if ((filp->f_flags & O_NONBLOCK))
8219                                 return -EAGAIN;
8220
8221                         ret = wait_on_pipe(iter, 0);
8222                         if (ret)
8223                                 return ret;
8224
8225                         goto again;
8226                 }
8227                 return 0;
8228         }
8229
8230         info->read = 0;
8231  read:
8232         size = PAGE_SIZE - info->read;
8233         if (size > count)
8234                 size = count;
8235
8236         ret = copy_to_user(ubuf, info->spare + info->read, size);
8237         if (ret == size)
8238                 return -EFAULT;
8239
8240         size -= ret;
8241
8242         *ppos += size;
8243         info->read += size;
8244
8245         return size;
8246 }
8247
8248 static int tracing_buffers_release(struct inode *inode, struct file *file)
8249 {
8250         struct ftrace_buffer_info *info = file->private_data;
8251         struct trace_iterator *iter = &info->iter;
8252
8253         mutex_lock(&trace_types_lock);
8254
8255         iter->tr->trace_ref--;
8256
8257         __trace_array_put(iter->tr);
8258
8259         iter->wait_index++;
8260         /* Make sure the waiters see the new wait_index */
8261         smp_wmb();
8262
8263         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8264
8265         if (info->spare)
8266                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8267                                            info->spare_cpu, info->spare);
8268         kvfree(info);
8269
8270         mutex_unlock(&trace_types_lock);
8271
8272         return 0;
8273 }
8274
8275 struct buffer_ref {
8276         struct trace_buffer     *buffer;
8277         void                    *page;
8278         int                     cpu;
8279         refcount_t              refcount;
8280 };
8281
8282 static void buffer_ref_release(struct buffer_ref *ref)
8283 {
8284         if (!refcount_dec_and_test(&ref->refcount))
8285                 return;
8286         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8287         kfree(ref);
8288 }
8289
8290 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8291                                     struct pipe_buffer *buf)
8292 {
8293         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8294
8295         buffer_ref_release(ref);
8296         buf->private = 0;
8297 }
8298
8299 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8300                                 struct pipe_buffer *buf)
8301 {
8302         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8303
8304         if (refcount_read(&ref->refcount) > INT_MAX/2)
8305                 return false;
8306
8307         refcount_inc(&ref->refcount);
8308         return true;
8309 }
8310
8311 /* Pipe buffer operations for a buffer. */
8312 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8313         .release                = buffer_pipe_buf_release,
8314         .get                    = buffer_pipe_buf_get,
8315 };
8316
8317 /*
8318  * Callback from splice_to_pipe(), if we need to release some pages
8319  * at the end of the spd in case we error'ed out in filling the pipe.
8320  */
8321 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8322 {
8323         struct buffer_ref *ref =
8324                 (struct buffer_ref *)spd->partial[i].private;
8325
8326         buffer_ref_release(ref);
8327         spd->partial[i].private = 0;
8328 }
8329
8330 static ssize_t
8331 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8332                             struct pipe_inode_info *pipe, size_t len,
8333                             unsigned int flags)
8334 {
8335         struct ftrace_buffer_info *info = file->private_data;
8336         struct trace_iterator *iter = &info->iter;
8337         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8338         struct page *pages_def[PIPE_DEF_BUFFERS];
8339         struct splice_pipe_desc spd = {
8340                 .pages          = pages_def,
8341                 .partial        = partial_def,
8342                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8343                 .ops            = &buffer_pipe_buf_ops,
8344                 .spd_release    = buffer_spd_release,
8345         };
8346         struct buffer_ref *ref;
8347         int entries, i;
8348         ssize_t ret = 0;
8349
8350 #ifdef CONFIG_TRACER_MAX_TRACE
8351         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8352                 return -EBUSY;
8353 #endif
8354
8355         if (*ppos & (PAGE_SIZE - 1))
8356                 return -EINVAL;
8357
8358         if (len & (PAGE_SIZE - 1)) {
8359                 if (len < PAGE_SIZE)
8360                         return -EINVAL;
8361                 len &= PAGE_MASK;
8362         }
8363
8364         if (splice_grow_spd(pipe, &spd))
8365                 return -ENOMEM;
8366
8367  again:
8368         trace_access_lock(iter->cpu_file);
8369         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8370
8371         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8372                 struct page *page;
8373                 int r;
8374
8375                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8376                 if (!ref) {
8377                         ret = -ENOMEM;
8378                         break;
8379                 }
8380
8381                 refcount_set(&ref->refcount, 1);
8382                 ref->buffer = iter->array_buffer->buffer;
8383                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8384                 if (IS_ERR(ref->page)) {
8385                         ret = PTR_ERR(ref->page);
8386                         ref->page = NULL;
8387                         kfree(ref);
8388                         break;
8389                 }
8390                 ref->cpu = iter->cpu_file;
8391
8392                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8393                                           len, iter->cpu_file, 1);
8394                 if (r < 0) {
8395                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8396                                                    ref->page);
8397                         kfree(ref);
8398                         break;
8399                 }
8400
8401                 page = virt_to_page(ref->page);
8402
8403                 spd.pages[i] = page;
8404                 spd.partial[i].len = PAGE_SIZE;
8405                 spd.partial[i].offset = 0;
8406                 spd.partial[i].private = (unsigned long)ref;
8407                 spd.nr_pages++;
8408                 *ppos += PAGE_SIZE;
8409
8410                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8411         }
8412
8413         trace_access_unlock(iter->cpu_file);
8414         spd.nr_pages = i;
8415
8416         /* did we read anything? */
8417         if (!spd.nr_pages) {
8418                 long wait_index;
8419
8420                 if (ret)
8421                         goto out;
8422
8423                 ret = -EAGAIN;
8424                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8425                         goto out;
8426
8427                 wait_index = READ_ONCE(iter->wait_index);
8428
8429                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8430                 if (ret)
8431                         goto out;
8432
8433                 /* No need to wait after waking up when tracing is off */
8434                 if (!tracer_tracing_is_on(iter->tr))
8435                         goto out;
8436
8437                 /* Make sure we see the new wait_index */
8438                 smp_rmb();
8439                 if (wait_index != iter->wait_index)
8440                         goto out;
8441
8442                 goto again;
8443         }
8444
8445         ret = splice_to_pipe(pipe, &spd);
8446 out:
8447         splice_shrink_spd(&spd);
8448
8449         return ret;
8450 }
8451
8452 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8453 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8454 {
8455         struct ftrace_buffer_info *info = file->private_data;
8456         struct trace_iterator *iter = &info->iter;
8457
8458         if (cmd)
8459                 return -ENOIOCTLCMD;
8460
8461         mutex_lock(&trace_types_lock);
8462
8463         iter->wait_index++;
8464         /* Make sure the waiters see the new wait_index */
8465         smp_wmb();
8466
8467         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8468
8469         mutex_unlock(&trace_types_lock);
8470         return 0;
8471 }
8472
8473 static const struct file_operations tracing_buffers_fops = {
8474         .open           = tracing_buffers_open,
8475         .read           = tracing_buffers_read,
8476         .poll           = tracing_buffers_poll,
8477         .release        = tracing_buffers_release,
8478         .splice_read    = tracing_buffers_splice_read,
8479         .unlocked_ioctl = tracing_buffers_ioctl,
8480         .llseek         = no_llseek,
8481 };
8482
8483 static ssize_t
8484 tracing_stats_read(struct file *filp, char __user *ubuf,
8485                    size_t count, loff_t *ppos)
8486 {
8487         struct inode *inode = file_inode(filp);
8488         struct trace_array *tr = inode->i_private;
8489         struct array_buffer *trace_buf = &tr->array_buffer;
8490         int cpu = tracing_get_cpu(inode);
8491         struct trace_seq *s;
8492         unsigned long cnt;
8493         unsigned long long t;
8494         unsigned long usec_rem;
8495
8496         s = kmalloc(sizeof(*s), GFP_KERNEL);
8497         if (!s)
8498                 return -ENOMEM;
8499
8500         trace_seq_init(s);
8501
8502         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8503         trace_seq_printf(s, "entries: %ld\n", cnt);
8504
8505         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8506         trace_seq_printf(s, "overrun: %ld\n", cnt);
8507
8508         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8509         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8510
8511         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8512         trace_seq_printf(s, "bytes: %ld\n", cnt);
8513
8514         if (trace_clocks[tr->clock_id].in_ns) {
8515                 /* local or global for trace_clock */
8516                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8517                 usec_rem = do_div(t, USEC_PER_SEC);
8518                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8519                                                                 t, usec_rem);
8520
8521                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8522                 usec_rem = do_div(t, USEC_PER_SEC);
8523                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8524         } else {
8525                 /* counter or tsc mode for trace_clock */
8526                 trace_seq_printf(s, "oldest event ts: %llu\n",
8527                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8528
8529                 trace_seq_printf(s, "now ts: %llu\n",
8530                                 ring_buffer_time_stamp(trace_buf->buffer));
8531         }
8532
8533         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8534         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8535
8536         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8537         trace_seq_printf(s, "read events: %ld\n", cnt);
8538
8539         count = simple_read_from_buffer(ubuf, count, ppos,
8540                                         s->buffer, trace_seq_used(s));
8541
8542         kfree(s);
8543
8544         return count;
8545 }
8546
8547 static const struct file_operations tracing_stats_fops = {
8548         .open           = tracing_open_generic_tr,
8549         .read           = tracing_stats_read,
8550         .llseek         = generic_file_llseek,
8551         .release        = tracing_release_generic_tr,
8552 };
8553
8554 #ifdef CONFIG_DYNAMIC_FTRACE
8555
8556 static ssize_t
8557 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8558                   size_t cnt, loff_t *ppos)
8559 {
8560         ssize_t ret;
8561         char *buf;
8562         int r;
8563
8564         /* 256 should be plenty to hold the amount needed */
8565         buf = kmalloc(256, GFP_KERNEL);
8566         if (!buf)
8567                 return -ENOMEM;
8568
8569         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8570                       ftrace_update_tot_cnt,
8571                       ftrace_number_of_pages,
8572                       ftrace_number_of_groups);
8573
8574         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8575         kfree(buf);
8576         return ret;
8577 }
8578
8579 static const struct file_operations tracing_dyn_info_fops = {
8580         .open           = tracing_open_generic,
8581         .read           = tracing_read_dyn_info,
8582         .llseek         = generic_file_llseek,
8583 };
8584 #endif /* CONFIG_DYNAMIC_FTRACE */
8585
8586 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8587 static void
8588 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8589                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8590                 void *data)
8591 {
8592         tracing_snapshot_instance(tr);
8593 }
8594
8595 static void
8596 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8597                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8598                       void *data)
8599 {
8600         struct ftrace_func_mapper *mapper = data;
8601         long *count = NULL;
8602
8603         if (mapper)
8604                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8605
8606         if (count) {
8607
8608                 if (*count <= 0)
8609                         return;
8610
8611                 (*count)--;
8612         }
8613
8614         tracing_snapshot_instance(tr);
8615 }
8616
8617 static int
8618 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8619                       struct ftrace_probe_ops *ops, void *data)
8620 {
8621         struct ftrace_func_mapper *mapper = data;
8622         long *count = NULL;
8623
8624         seq_printf(m, "%ps:", (void *)ip);
8625
8626         seq_puts(m, "snapshot");
8627
8628         if (mapper)
8629                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8630
8631         if (count)
8632                 seq_printf(m, ":count=%ld\n", *count);
8633         else
8634                 seq_puts(m, ":unlimited\n");
8635
8636         return 0;
8637 }
8638
8639 static int
8640 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8641                      unsigned long ip, void *init_data, void **data)
8642 {
8643         struct ftrace_func_mapper *mapper = *data;
8644
8645         if (!mapper) {
8646                 mapper = allocate_ftrace_func_mapper();
8647                 if (!mapper)
8648                         return -ENOMEM;
8649                 *data = mapper;
8650         }
8651
8652         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8653 }
8654
8655 static void
8656 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8657                      unsigned long ip, void *data)
8658 {
8659         struct ftrace_func_mapper *mapper = data;
8660
8661         if (!ip) {
8662                 if (!mapper)
8663                         return;
8664                 free_ftrace_func_mapper(mapper, NULL);
8665                 return;
8666         }
8667
8668         ftrace_func_mapper_remove_ip(mapper, ip);
8669 }
8670
8671 static struct ftrace_probe_ops snapshot_probe_ops = {
8672         .func                   = ftrace_snapshot,
8673         .print                  = ftrace_snapshot_print,
8674 };
8675
8676 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8677         .func                   = ftrace_count_snapshot,
8678         .print                  = ftrace_snapshot_print,
8679         .init                   = ftrace_snapshot_init,
8680         .free                   = ftrace_snapshot_free,
8681 };
8682
8683 static int
8684 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8685                                char *glob, char *cmd, char *param, int enable)
8686 {
8687         struct ftrace_probe_ops *ops;
8688         void *count = (void *)-1;
8689         char *number;
8690         int ret;
8691
8692         if (!tr)
8693                 return -ENODEV;
8694
8695         /* hash funcs only work with set_ftrace_filter */
8696         if (!enable)
8697                 return -EINVAL;
8698
8699         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8700
8701         if (glob[0] == '!')
8702                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8703
8704         if (!param)
8705                 goto out_reg;
8706
8707         number = strsep(&param, ":");
8708
8709         if (!strlen(number))
8710                 goto out_reg;
8711
8712         /*
8713          * We use the callback data field (which is a pointer)
8714          * as our counter.
8715          */
8716         ret = kstrtoul(number, 0, (unsigned long *)&count);
8717         if (ret)
8718                 return ret;
8719
8720  out_reg:
8721         ret = tracing_alloc_snapshot_instance(tr);
8722         if (ret < 0)
8723                 goto out;
8724
8725         ret = register_ftrace_function_probe(glob, tr, ops, count);
8726
8727  out:
8728         return ret < 0 ? ret : 0;
8729 }
8730
8731 static struct ftrace_func_command ftrace_snapshot_cmd = {
8732         .name                   = "snapshot",
8733         .func                   = ftrace_trace_snapshot_callback,
8734 };
8735
8736 static __init int register_snapshot_cmd(void)
8737 {
8738         return register_ftrace_command(&ftrace_snapshot_cmd);
8739 }
8740 #else
8741 static inline __init int register_snapshot_cmd(void) { return 0; }
8742 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8743
8744 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8745 {
8746         if (WARN_ON(!tr->dir))
8747                 return ERR_PTR(-ENODEV);
8748
8749         /* Top directory uses NULL as the parent */
8750         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8751                 return NULL;
8752
8753         /* All sub buffers have a descriptor */
8754         return tr->dir;
8755 }
8756
8757 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8758 {
8759         struct dentry *d_tracer;
8760
8761         if (tr->percpu_dir)
8762                 return tr->percpu_dir;
8763
8764         d_tracer = tracing_get_dentry(tr);
8765         if (IS_ERR(d_tracer))
8766                 return NULL;
8767
8768         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8769
8770         MEM_FAIL(!tr->percpu_dir,
8771                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8772
8773         return tr->percpu_dir;
8774 }
8775
8776 static struct dentry *
8777 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8778                       void *data, long cpu, const struct file_operations *fops)
8779 {
8780         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8781
8782         if (ret) /* See tracing_get_cpu() */
8783                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8784         return ret;
8785 }
8786
8787 static void
8788 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8789 {
8790         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8791         struct dentry *d_cpu;
8792         char cpu_dir[30]; /* 30 characters should be more than enough */
8793
8794         if (!d_percpu)
8795                 return;
8796
8797         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8798         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8799         if (!d_cpu) {
8800                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8801                 return;
8802         }
8803
8804         /* per cpu trace_pipe */
8805         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8806                                 tr, cpu, &tracing_pipe_fops);
8807
8808         /* per cpu trace */
8809         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8810                                 tr, cpu, &tracing_fops);
8811
8812         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8813                                 tr, cpu, &tracing_buffers_fops);
8814
8815         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8816                                 tr, cpu, &tracing_stats_fops);
8817
8818         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8819                                 tr, cpu, &tracing_entries_fops);
8820
8821 #ifdef CONFIG_TRACER_SNAPSHOT
8822         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8823                                 tr, cpu, &snapshot_fops);
8824
8825         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8826                                 tr, cpu, &snapshot_raw_fops);
8827 #endif
8828 }
8829
8830 #ifdef CONFIG_FTRACE_SELFTEST
8831 /* Let selftest have access to static functions in this file */
8832 #include "trace_selftest.c"
8833 #endif
8834
8835 static ssize_t
8836 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8837                         loff_t *ppos)
8838 {
8839         struct trace_option_dentry *topt = filp->private_data;
8840         char *buf;
8841
8842         if (topt->flags->val & topt->opt->bit)
8843                 buf = "1\n";
8844         else
8845                 buf = "0\n";
8846
8847         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8848 }
8849
8850 static ssize_t
8851 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8852                          loff_t *ppos)
8853 {
8854         struct trace_option_dentry *topt = filp->private_data;
8855         unsigned long val;
8856         int ret;
8857
8858         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8859         if (ret)
8860                 return ret;
8861
8862         if (val != 0 && val != 1)
8863                 return -EINVAL;
8864
8865         if (!!(topt->flags->val & topt->opt->bit) != val) {
8866                 mutex_lock(&trace_types_lock);
8867                 ret = __set_tracer_option(topt->tr, topt->flags,
8868                                           topt->opt, !val);
8869                 mutex_unlock(&trace_types_lock);
8870                 if (ret)
8871                         return ret;
8872         }
8873
8874         *ppos += cnt;
8875
8876         return cnt;
8877 }
8878
8879 static int tracing_open_options(struct inode *inode, struct file *filp)
8880 {
8881         struct trace_option_dentry *topt = inode->i_private;
8882         int ret;
8883
8884         ret = tracing_check_open_get_tr(topt->tr);
8885         if (ret)
8886                 return ret;
8887
8888         filp->private_data = inode->i_private;
8889         return 0;
8890 }
8891
8892 static int tracing_release_options(struct inode *inode, struct file *file)
8893 {
8894         struct trace_option_dentry *topt = file->private_data;
8895
8896         trace_array_put(topt->tr);
8897         return 0;
8898 }
8899
8900 static const struct file_operations trace_options_fops = {
8901         .open = tracing_open_options,
8902         .read = trace_options_read,
8903         .write = trace_options_write,
8904         .llseek = generic_file_llseek,
8905         .release = tracing_release_options,
8906 };
8907
8908 /*
8909  * In order to pass in both the trace_array descriptor as well as the index
8910  * to the flag that the trace option file represents, the trace_array
8911  * has a character array of trace_flags_index[], which holds the index
8912  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8913  * The address of this character array is passed to the flag option file
8914  * read/write callbacks.
8915  *
8916  * In order to extract both the index and the trace_array descriptor,
8917  * get_tr_index() uses the following algorithm.
8918  *
8919  *   idx = *ptr;
8920  *
8921  * As the pointer itself contains the address of the index (remember
8922  * index[1] == 1).
8923  *
8924  * Then to get the trace_array descriptor, by subtracting that index
8925  * from the ptr, we get to the start of the index itself.
8926  *
8927  *   ptr - idx == &index[0]
8928  *
8929  * Then a simple container_of() from that pointer gets us to the
8930  * trace_array descriptor.
8931  */
8932 static void get_tr_index(void *data, struct trace_array **ptr,
8933                          unsigned int *pindex)
8934 {
8935         *pindex = *(unsigned char *)data;
8936
8937         *ptr = container_of(data - *pindex, struct trace_array,
8938                             trace_flags_index);
8939 }
8940
8941 static ssize_t
8942 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8943                         loff_t *ppos)
8944 {
8945         void *tr_index = filp->private_data;
8946         struct trace_array *tr;
8947         unsigned int index;
8948         char *buf;
8949
8950         get_tr_index(tr_index, &tr, &index);
8951
8952         if (tr->trace_flags & (1 << index))
8953                 buf = "1\n";
8954         else
8955                 buf = "0\n";
8956
8957         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8958 }
8959
8960 static ssize_t
8961 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8962                          loff_t *ppos)
8963 {
8964         void *tr_index = filp->private_data;
8965         struct trace_array *tr;
8966         unsigned int index;
8967         unsigned long val;
8968         int ret;
8969
8970         get_tr_index(tr_index, &tr, &index);
8971
8972         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8973         if (ret)
8974                 return ret;
8975
8976         if (val != 0 && val != 1)
8977                 return -EINVAL;
8978
8979         mutex_lock(&event_mutex);
8980         mutex_lock(&trace_types_lock);
8981         ret = set_tracer_flag(tr, 1 << index, val);
8982         mutex_unlock(&trace_types_lock);
8983         mutex_unlock(&event_mutex);
8984
8985         if (ret < 0)
8986                 return ret;
8987
8988         *ppos += cnt;
8989
8990         return cnt;
8991 }
8992
8993 static const struct file_operations trace_options_core_fops = {
8994         .open = tracing_open_generic,
8995         .read = trace_options_core_read,
8996         .write = trace_options_core_write,
8997         .llseek = generic_file_llseek,
8998 };
8999
9000 struct dentry *trace_create_file(const char *name,
9001                                  umode_t mode,
9002                                  struct dentry *parent,
9003                                  void *data,
9004                                  const struct file_operations *fops)
9005 {
9006         struct dentry *ret;
9007
9008         ret = tracefs_create_file(name, mode, parent, data, fops);
9009         if (!ret)
9010                 pr_warn("Could not create tracefs '%s' entry\n", name);
9011
9012         return ret;
9013 }
9014
9015
9016 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9017 {
9018         struct dentry *d_tracer;
9019
9020         if (tr->options)
9021                 return tr->options;
9022
9023         d_tracer = tracing_get_dentry(tr);
9024         if (IS_ERR(d_tracer))
9025                 return NULL;
9026
9027         tr->options = tracefs_create_dir("options", d_tracer);
9028         if (!tr->options) {
9029                 pr_warn("Could not create tracefs directory 'options'\n");
9030                 return NULL;
9031         }
9032
9033         return tr->options;
9034 }
9035
9036 static void
9037 create_trace_option_file(struct trace_array *tr,
9038                          struct trace_option_dentry *topt,
9039                          struct tracer_flags *flags,
9040                          struct tracer_opt *opt)
9041 {
9042         struct dentry *t_options;
9043
9044         t_options = trace_options_init_dentry(tr);
9045         if (!t_options)
9046                 return;
9047
9048         topt->flags = flags;
9049         topt->opt = opt;
9050         topt->tr = tr;
9051
9052         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9053                                         t_options, topt, &trace_options_fops);
9054
9055 }
9056
9057 static void
9058 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9059 {
9060         struct trace_option_dentry *topts;
9061         struct trace_options *tr_topts;
9062         struct tracer_flags *flags;
9063         struct tracer_opt *opts;
9064         int cnt;
9065         int i;
9066
9067         if (!tracer)
9068                 return;
9069
9070         flags = tracer->flags;
9071
9072         if (!flags || !flags->opts)
9073                 return;
9074
9075         /*
9076          * If this is an instance, only create flags for tracers
9077          * the instance may have.
9078          */
9079         if (!trace_ok_for_array(tracer, tr))
9080                 return;
9081
9082         for (i = 0; i < tr->nr_topts; i++) {
9083                 /* Make sure there's no duplicate flags. */
9084                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9085                         return;
9086         }
9087
9088         opts = flags->opts;
9089
9090         for (cnt = 0; opts[cnt].name; cnt++)
9091                 ;
9092
9093         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9094         if (!topts)
9095                 return;
9096
9097         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9098                             GFP_KERNEL);
9099         if (!tr_topts) {
9100                 kfree(topts);
9101                 return;
9102         }
9103
9104         tr->topts = tr_topts;
9105         tr->topts[tr->nr_topts].tracer = tracer;
9106         tr->topts[tr->nr_topts].topts = topts;
9107         tr->nr_topts++;
9108
9109         for (cnt = 0; opts[cnt].name; cnt++) {
9110                 create_trace_option_file(tr, &topts[cnt], flags,
9111                                          &opts[cnt]);
9112                 MEM_FAIL(topts[cnt].entry == NULL,
9113                           "Failed to create trace option: %s",
9114                           opts[cnt].name);
9115         }
9116 }
9117
9118 static struct dentry *
9119 create_trace_option_core_file(struct trace_array *tr,
9120                               const char *option, long index)
9121 {
9122         struct dentry *t_options;
9123
9124         t_options = trace_options_init_dentry(tr);
9125         if (!t_options)
9126                 return NULL;
9127
9128         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9129                                  (void *)&tr->trace_flags_index[index],
9130                                  &trace_options_core_fops);
9131 }
9132
9133 static void create_trace_options_dir(struct trace_array *tr)
9134 {
9135         struct dentry *t_options;
9136         bool top_level = tr == &global_trace;
9137         int i;
9138
9139         t_options = trace_options_init_dentry(tr);
9140         if (!t_options)
9141                 return;
9142
9143         for (i = 0; trace_options[i]; i++) {
9144                 if (top_level ||
9145                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9146                         create_trace_option_core_file(tr, trace_options[i], i);
9147         }
9148 }
9149
9150 static ssize_t
9151 rb_simple_read(struct file *filp, char __user *ubuf,
9152                size_t cnt, loff_t *ppos)
9153 {
9154         struct trace_array *tr = filp->private_data;
9155         char buf[64];
9156         int r;
9157
9158         r = tracer_tracing_is_on(tr);
9159         r = sprintf(buf, "%d\n", r);
9160
9161         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9162 }
9163
9164 static ssize_t
9165 rb_simple_write(struct file *filp, const char __user *ubuf,
9166                 size_t cnt, loff_t *ppos)
9167 {
9168         struct trace_array *tr = filp->private_data;
9169         struct trace_buffer *buffer = tr->array_buffer.buffer;
9170         unsigned long val;
9171         int ret;
9172
9173         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9174         if (ret)
9175                 return ret;
9176
9177         if (buffer) {
9178                 mutex_lock(&trace_types_lock);
9179                 if (!!val == tracer_tracing_is_on(tr)) {
9180                         val = 0; /* do nothing */
9181                 } else if (val) {
9182                         tracer_tracing_on(tr);
9183                         if (tr->current_trace->start)
9184                                 tr->current_trace->start(tr);
9185                 } else {
9186                         tracer_tracing_off(tr);
9187                         if (tr->current_trace->stop)
9188                                 tr->current_trace->stop(tr);
9189                         /* Wake up any waiters */
9190                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9191                 }
9192                 mutex_unlock(&trace_types_lock);
9193         }
9194
9195         (*ppos)++;
9196
9197         return cnt;
9198 }
9199
9200 static const struct file_operations rb_simple_fops = {
9201         .open           = tracing_open_generic_tr,
9202         .read           = rb_simple_read,
9203         .write          = rb_simple_write,
9204         .release        = tracing_release_generic_tr,
9205         .llseek         = default_llseek,
9206 };
9207
9208 static ssize_t
9209 buffer_percent_read(struct file *filp, char __user *ubuf,
9210                     size_t cnt, loff_t *ppos)
9211 {
9212         struct trace_array *tr = filp->private_data;
9213         char buf[64];
9214         int r;
9215
9216         r = tr->buffer_percent;
9217         r = sprintf(buf, "%d\n", r);
9218
9219         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9220 }
9221
9222 static ssize_t
9223 buffer_percent_write(struct file *filp, const char __user *ubuf,
9224                      size_t cnt, loff_t *ppos)
9225 {
9226         struct trace_array *tr = filp->private_data;
9227         unsigned long val;
9228         int ret;
9229
9230         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9231         if (ret)
9232                 return ret;
9233
9234         if (val > 100)
9235                 return -EINVAL;
9236
9237         tr->buffer_percent = val;
9238
9239         (*ppos)++;
9240
9241         return cnt;
9242 }
9243
9244 static const struct file_operations buffer_percent_fops = {
9245         .open           = tracing_open_generic_tr,
9246         .read           = buffer_percent_read,
9247         .write          = buffer_percent_write,
9248         .release        = tracing_release_generic_tr,
9249         .llseek         = default_llseek,
9250 };
9251
9252 static struct dentry *trace_instance_dir;
9253
9254 static void
9255 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9256
9257 static int
9258 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9259 {
9260         enum ring_buffer_flags rb_flags;
9261
9262         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9263
9264         buf->tr = tr;
9265
9266         buf->buffer = ring_buffer_alloc(size, rb_flags);
9267         if (!buf->buffer)
9268                 return -ENOMEM;
9269
9270         buf->data = alloc_percpu(struct trace_array_cpu);
9271         if (!buf->data) {
9272                 ring_buffer_free(buf->buffer);
9273                 buf->buffer = NULL;
9274                 return -ENOMEM;
9275         }
9276
9277         /* Allocate the first page for all buffers */
9278         set_buffer_entries(&tr->array_buffer,
9279                            ring_buffer_size(tr->array_buffer.buffer, 0));
9280
9281         return 0;
9282 }
9283
9284 static void free_trace_buffer(struct array_buffer *buf)
9285 {
9286         if (buf->buffer) {
9287                 ring_buffer_free(buf->buffer);
9288                 buf->buffer = NULL;
9289                 free_percpu(buf->data);
9290                 buf->data = NULL;
9291         }
9292 }
9293
9294 static int allocate_trace_buffers(struct trace_array *tr, int size)
9295 {
9296         int ret;
9297
9298         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9299         if (ret)
9300                 return ret;
9301
9302 #ifdef CONFIG_TRACER_MAX_TRACE
9303         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9304                                     allocate_snapshot ? size : 1);
9305         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9306                 free_trace_buffer(&tr->array_buffer);
9307                 return -ENOMEM;
9308         }
9309         tr->allocated_snapshot = allocate_snapshot;
9310
9311         /*
9312          * Only the top level trace array gets its snapshot allocated
9313          * from the kernel command line.
9314          */
9315         allocate_snapshot = false;
9316 #endif
9317
9318         return 0;
9319 }
9320
9321 static void free_trace_buffers(struct trace_array *tr)
9322 {
9323         if (!tr)
9324                 return;
9325
9326         free_trace_buffer(&tr->array_buffer);
9327
9328 #ifdef CONFIG_TRACER_MAX_TRACE
9329         free_trace_buffer(&tr->max_buffer);
9330 #endif
9331 }
9332
9333 static void init_trace_flags_index(struct trace_array *tr)
9334 {
9335         int i;
9336
9337         /* Used by the trace options files */
9338         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9339                 tr->trace_flags_index[i] = i;
9340 }
9341
9342 static void __update_tracer_options(struct trace_array *tr)
9343 {
9344         struct tracer *t;
9345
9346         for (t = trace_types; t; t = t->next)
9347                 add_tracer_options(tr, t);
9348 }
9349
9350 static void update_tracer_options(struct trace_array *tr)
9351 {
9352         mutex_lock(&trace_types_lock);
9353         tracer_options_updated = true;
9354         __update_tracer_options(tr);
9355         mutex_unlock(&trace_types_lock);
9356 }
9357
9358 /* Must have trace_types_lock held */
9359 struct trace_array *trace_array_find(const char *instance)
9360 {
9361         struct trace_array *tr, *found = NULL;
9362
9363         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9364                 if (tr->name && strcmp(tr->name, instance) == 0) {
9365                         found = tr;
9366                         break;
9367                 }
9368         }
9369
9370         return found;
9371 }
9372
9373 struct trace_array *trace_array_find_get(const char *instance)
9374 {
9375         struct trace_array *tr;
9376
9377         mutex_lock(&trace_types_lock);
9378         tr = trace_array_find(instance);
9379         if (tr)
9380                 tr->ref++;
9381         mutex_unlock(&trace_types_lock);
9382
9383         return tr;
9384 }
9385
9386 static int trace_array_create_dir(struct trace_array *tr)
9387 {
9388         int ret;
9389
9390         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9391         if (!tr->dir)
9392                 return -EINVAL;
9393
9394         ret = event_trace_add_tracer(tr->dir, tr);
9395         if (ret) {
9396                 tracefs_remove(tr->dir);
9397                 return ret;
9398         }
9399
9400         init_tracer_tracefs(tr, tr->dir);
9401         __update_tracer_options(tr);
9402
9403         return ret;
9404 }
9405
9406 static struct trace_array *trace_array_create(const char *name)
9407 {
9408         struct trace_array *tr;
9409         int ret;
9410
9411         ret = -ENOMEM;
9412         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9413         if (!tr)
9414                 return ERR_PTR(ret);
9415
9416         tr->name = kstrdup(name, GFP_KERNEL);
9417         if (!tr->name)
9418                 goto out_free_tr;
9419
9420         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9421                 goto out_free_tr;
9422
9423         if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9424                 goto out_free_tr;
9425
9426         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9427
9428         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9429
9430         raw_spin_lock_init(&tr->start_lock);
9431
9432         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9433
9434         tr->current_trace = &nop_trace;
9435
9436         INIT_LIST_HEAD(&tr->systems);
9437         INIT_LIST_HEAD(&tr->events);
9438         INIT_LIST_HEAD(&tr->hist_vars);
9439         INIT_LIST_HEAD(&tr->err_log);
9440
9441         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9442                 goto out_free_tr;
9443
9444         if (ftrace_allocate_ftrace_ops(tr) < 0)
9445                 goto out_free_tr;
9446
9447         ftrace_init_trace_array(tr);
9448
9449         init_trace_flags_index(tr);
9450
9451         if (trace_instance_dir) {
9452                 ret = trace_array_create_dir(tr);
9453                 if (ret)
9454                         goto out_free_tr;
9455         } else
9456                 __trace_early_add_events(tr);
9457
9458         list_add(&tr->list, &ftrace_trace_arrays);
9459
9460         tr->ref++;
9461
9462         return tr;
9463
9464  out_free_tr:
9465         ftrace_free_ftrace_ops(tr);
9466         free_trace_buffers(tr);
9467         free_cpumask_var(tr->pipe_cpumask);
9468         free_cpumask_var(tr->tracing_cpumask);
9469         kfree(tr->name);
9470         kfree(tr);
9471
9472         return ERR_PTR(ret);
9473 }
9474
9475 static int instance_mkdir(const char *name)
9476 {
9477         struct trace_array *tr;
9478         int ret;
9479
9480         mutex_lock(&event_mutex);
9481         mutex_lock(&trace_types_lock);
9482
9483         ret = -EEXIST;
9484         if (trace_array_find(name))
9485                 goto out_unlock;
9486
9487         tr = trace_array_create(name);
9488
9489         ret = PTR_ERR_OR_ZERO(tr);
9490
9491 out_unlock:
9492         mutex_unlock(&trace_types_lock);
9493         mutex_unlock(&event_mutex);
9494         return ret;
9495 }
9496
9497 /**
9498  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9499  * @name: The name of the trace array to be looked up/created.
9500  *
9501  * Returns pointer to trace array with given name.
9502  * NULL, if it cannot be created.
9503  *
9504  * NOTE: This function increments the reference counter associated with the
9505  * trace array returned. This makes sure it cannot be freed while in use.
9506  * Use trace_array_put() once the trace array is no longer needed.
9507  * If the trace_array is to be freed, trace_array_destroy() needs to
9508  * be called after the trace_array_put(), or simply let user space delete
9509  * it from the tracefs instances directory. But until the
9510  * trace_array_put() is called, user space can not delete it.
9511  *
9512  */
9513 struct trace_array *trace_array_get_by_name(const char *name)
9514 {
9515         struct trace_array *tr;
9516
9517         mutex_lock(&event_mutex);
9518         mutex_lock(&trace_types_lock);
9519
9520         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9521                 if (tr->name && strcmp(tr->name, name) == 0)
9522                         goto out_unlock;
9523         }
9524
9525         tr = trace_array_create(name);
9526
9527         if (IS_ERR(tr))
9528                 tr = NULL;
9529 out_unlock:
9530         if (tr)
9531                 tr->ref++;
9532
9533         mutex_unlock(&trace_types_lock);
9534         mutex_unlock(&event_mutex);
9535         return tr;
9536 }
9537 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9538
9539 static int __remove_instance(struct trace_array *tr)
9540 {
9541         int i;
9542
9543         /* Reference counter for a newly created trace array = 1. */
9544         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9545                 return -EBUSY;
9546
9547         list_del(&tr->list);
9548
9549         /* Disable all the flags that were enabled coming in */
9550         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9551                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9552                         set_tracer_flag(tr, 1 << i, 0);
9553         }
9554
9555         tracing_set_nop(tr);
9556         clear_ftrace_function_probes(tr);
9557         event_trace_del_tracer(tr);
9558         ftrace_clear_pids(tr);
9559         ftrace_destroy_function_files(tr);
9560         tracefs_remove(tr->dir);
9561         free_percpu(tr->last_func_repeats);
9562         free_trace_buffers(tr);
9563         clear_tracing_err_log(tr);
9564
9565         for (i = 0; i < tr->nr_topts; i++) {
9566                 kfree(tr->topts[i].topts);
9567         }
9568         kfree(tr->topts);
9569
9570         free_cpumask_var(tr->pipe_cpumask);
9571         free_cpumask_var(tr->tracing_cpumask);
9572         kfree(tr->name);
9573         kfree(tr);
9574
9575         return 0;
9576 }
9577
9578 int trace_array_destroy(struct trace_array *this_tr)
9579 {
9580         struct trace_array *tr;
9581         int ret;
9582
9583         if (!this_tr)
9584                 return -EINVAL;
9585
9586         mutex_lock(&event_mutex);
9587         mutex_lock(&trace_types_lock);
9588
9589         ret = -ENODEV;
9590
9591         /* Making sure trace array exists before destroying it. */
9592         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9593                 if (tr == this_tr) {
9594                         ret = __remove_instance(tr);
9595                         break;
9596                 }
9597         }
9598
9599         mutex_unlock(&trace_types_lock);
9600         mutex_unlock(&event_mutex);
9601
9602         return ret;
9603 }
9604 EXPORT_SYMBOL_GPL(trace_array_destroy);
9605
9606 static int instance_rmdir(const char *name)
9607 {
9608         struct trace_array *tr;
9609         int ret;
9610
9611         mutex_lock(&event_mutex);
9612         mutex_lock(&trace_types_lock);
9613
9614         ret = -ENODEV;
9615         tr = trace_array_find(name);
9616         if (tr)
9617                 ret = __remove_instance(tr);
9618
9619         mutex_unlock(&trace_types_lock);
9620         mutex_unlock(&event_mutex);
9621
9622         return ret;
9623 }
9624
9625 static __init void create_trace_instances(struct dentry *d_tracer)
9626 {
9627         struct trace_array *tr;
9628
9629         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9630                                                          instance_mkdir,
9631                                                          instance_rmdir);
9632         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9633                 return;
9634
9635         mutex_lock(&event_mutex);
9636         mutex_lock(&trace_types_lock);
9637
9638         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9639                 if (!tr->name)
9640                         continue;
9641                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9642                              "Failed to create instance directory\n"))
9643                         break;
9644         }
9645
9646         mutex_unlock(&trace_types_lock);
9647         mutex_unlock(&event_mutex);
9648 }
9649
9650 static void
9651 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9652 {
9653         struct trace_event_file *file;
9654         int cpu;
9655
9656         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9657                         tr, &show_traces_fops);
9658
9659         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9660                         tr, &set_tracer_fops);
9661
9662         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9663                           tr, &tracing_cpumask_fops);
9664
9665         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9666                           tr, &tracing_iter_fops);
9667
9668         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9669                           tr, &tracing_fops);
9670
9671         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9672                           tr, &tracing_pipe_fops);
9673
9674         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9675                           tr, &tracing_entries_fops);
9676
9677         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9678                           tr, &tracing_total_entries_fops);
9679
9680         trace_create_file("free_buffer", 0200, d_tracer,
9681                           tr, &tracing_free_buffer_fops);
9682
9683         trace_create_file("trace_marker", 0220, d_tracer,
9684                           tr, &tracing_mark_fops);
9685
9686         file = __find_event_file(tr, "ftrace", "print");
9687         if (file && file->dir)
9688                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9689                                   file, &event_trigger_fops);
9690         tr->trace_marker_file = file;
9691
9692         trace_create_file("trace_marker_raw", 0220, d_tracer,
9693                           tr, &tracing_mark_raw_fops);
9694
9695         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9696                           &trace_clock_fops);
9697
9698         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9699                           tr, &rb_simple_fops);
9700
9701         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9702                           &trace_time_stamp_mode_fops);
9703
9704         tr->buffer_percent = 50;
9705
9706         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9707                         tr, &buffer_percent_fops);
9708
9709         create_trace_options_dir(tr);
9710
9711 #ifdef CONFIG_TRACER_MAX_TRACE
9712         trace_create_maxlat_file(tr, d_tracer);
9713 #endif
9714
9715         if (ftrace_create_function_files(tr, d_tracer))
9716                 MEM_FAIL(1, "Could not allocate function filter files");
9717
9718 #ifdef CONFIG_TRACER_SNAPSHOT
9719         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9720                           tr, &snapshot_fops);
9721 #endif
9722
9723         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9724                           tr, &tracing_err_log_fops);
9725
9726         for_each_tracing_cpu(cpu)
9727                 tracing_init_tracefs_percpu(tr, cpu);
9728
9729         ftrace_init_tracefs(tr, d_tracer);
9730 }
9731
9732 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9733 {
9734         struct vfsmount *mnt;
9735         struct file_system_type *type;
9736
9737         /*
9738          * To maintain backward compatibility for tools that mount
9739          * debugfs to get to the tracing facility, tracefs is automatically
9740          * mounted to the debugfs/tracing directory.
9741          */
9742         type = get_fs_type("tracefs");
9743         if (!type)
9744                 return NULL;
9745         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9746         put_filesystem(type);
9747         if (IS_ERR(mnt))
9748                 return NULL;
9749         mntget(mnt);
9750
9751         return mnt;
9752 }
9753
9754 /**
9755  * tracing_init_dentry - initialize top level trace array
9756  *
9757  * This is called when creating files or directories in the tracing
9758  * directory. It is called via fs_initcall() by any of the boot up code
9759  * and expects to return the dentry of the top level tracing directory.
9760  */
9761 int tracing_init_dentry(void)
9762 {
9763         struct trace_array *tr = &global_trace;
9764
9765         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9766                 pr_warn("Tracing disabled due to lockdown\n");
9767                 return -EPERM;
9768         }
9769
9770         /* The top level trace array uses  NULL as parent */
9771         if (tr->dir)
9772                 return 0;
9773
9774         if (WARN_ON(!tracefs_initialized()))
9775                 return -ENODEV;
9776
9777         /*
9778          * As there may still be users that expect the tracing
9779          * files to exist in debugfs/tracing, we must automount
9780          * the tracefs file system there, so older tools still
9781          * work with the newer kernel.
9782          */
9783         tr->dir = debugfs_create_automount("tracing", NULL,
9784                                            trace_automount, NULL);
9785
9786         return 0;
9787 }
9788
9789 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9790 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9791
9792 static struct workqueue_struct *eval_map_wq __initdata;
9793 static struct work_struct eval_map_work __initdata;
9794 static struct work_struct tracerfs_init_work __initdata;
9795
9796 static void __init eval_map_work_func(struct work_struct *work)
9797 {
9798         int len;
9799
9800         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9801         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9802 }
9803
9804 static int __init trace_eval_init(void)
9805 {
9806         INIT_WORK(&eval_map_work, eval_map_work_func);
9807
9808         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9809         if (!eval_map_wq) {
9810                 pr_err("Unable to allocate eval_map_wq\n");
9811                 /* Do work here */
9812                 eval_map_work_func(&eval_map_work);
9813                 return -ENOMEM;
9814         }
9815
9816         queue_work(eval_map_wq, &eval_map_work);
9817         return 0;
9818 }
9819
9820 subsys_initcall(trace_eval_init);
9821
9822 static int __init trace_eval_sync(void)
9823 {
9824         /* Make sure the eval map updates are finished */
9825         if (eval_map_wq)
9826                 destroy_workqueue(eval_map_wq);
9827         return 0;
9828 }
9829
9830 late_initcall_sync(trace_eval_sync);
9831
9832
9833 #ifdef CONFIG_MODULES
9834 static void trace_module_add_evals(struct module *mod)
9835 {
9836         if (!mod->num_trace_evals)
9837                 return;
9838
9839         /*
9840          * Modules with bad taint do not have events created, do
9841          * not bother with enums either.
9842          */
9843         if (trace_module_has_bad_taint(mod))
9844                 return;
9845
9846         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9847 }
9848
9849 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9850 static void trace_module_remove_evals(struct module *mod)
9851 {
9852         union trace_eval_map_item *map;
9853         union trace_eval_map_item **last = &trace_eval_maps;
9854
9855         if (!mod->num_trace_evals)
9856                 return;
9857
9858         mutex_lock(&trace_eval_mutex);
9859
9860         map = trace_eval_maps;
9861
9862         while (map) {
9863                 if (map->head.mod == mod)
9864                         break;
9865                 map = trace_eval_jmp_to_tail(map);
9866                 last = &map->tail.next;
9867                 map = map->tail.next;
9868         }
9869         if (!map)
9870                 goto out;
9871
9872         *last = trace_eval_jmp_to_tail(map)->tail.next;
9873         kfree(map);
9874  out:
9875         mutex_unlock(&trace_eval_mutex);
9876 }
9877 #else
9878 static inline void trace_module_remove_evals(struct module *mod) { }
9879 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9880
9881 static int trace_module_notify(struct notifier_block *self,
9882                                unsigned long val, void *data)
9883 {
9884         struct module *mod = data;
9885
9886         switch (val) {
9887         case MODULE_STATE_COMING:
9888                 trace_module_add_evals(mod);
9889                 break;
9890         case MODULE_STATE_GOING:
9891                 trace_module_remove_evals(mod);
9892                 break;
9893         }
9894
9895         return NOTIFY_OK;
9896 }
9897
9898 static struct notifier_block trace_module_nb = {
9899         .notifier_call = trace_module_notify,
9900         .priority = 0,
9901 };
9902 #endif /* CONFIG_MODULES */
9903
9904 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9905 {
9906
9907         event_trace_init();
9908
9909         init_tracer_tracefs(&global_trace, NULL);
9910         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9911
9912         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9913                         &global_trace, &tracing_thresh_fops);
9914
9915         trace_create_file("README", TRACE_MODE_READ, NULL,
9916                         NULL, &tracing_readme_fops);
9917
9918         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9919                         NULL, &tracing_saved_cmdlines_fops);
9920
9921         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9922                           NULL, &tracing_saved_cmdlines_size_fops);
9923
9924         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9925                         NULL, &tracing_saved_tgids_fops);
9926
9927         trace_create_eval_file(NULL);
9928
9929 #ifdef CONFIG_MODULES
9930         register_module_notifier(&trace_module_nb);
9931 #endif
9932
9933 #ifdef CONFIG_DYNAMIC_FTRACE
9934         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9935                         NULL, &tracing_dyn_info_fops);
9936 #endif
9937
9938         create_trace_instances(NULL);
9939
9940         update_tracer_options(&global_trace);
9941 }
9942
9943 static __init int tracer_init_tracefs(void)
9944 {
9945         int ret;
9946
9947         trace_access_lock_init();
9948
9949         ret = tracing_init_dentry();
9950         if (ret)
9951                 return 0;
9952
9953         if (eval_map_wq) {
9954                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9955                 queue_work(eval_map_wq, &tracerfs_init_work);
9956         } else {
9957                 tracer_init_tracefs_work_func(NULL);
9958         }
9959
9960         rv_init_interface();
9961
9962         return 0;
9963 }
9964
9965 fs_initcall(tracer_init_tracefs);
9966
9967 static int trace_panic_handler(struct notifier_block *this,
9968                                unsigned long event, void *unused)
9969 {
9970         if (ftrace_dump_on_oops)
9971                 ftrace_dump(ftrace_dump_on_oops);
9972         return NOTIFY_OK;
9973 }
9974
9975 static struct notifier_block trace_panic_notifier = {
9976         .notifier_call  = trace_panic_handler,
9977         .next           = NULL,
9978         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9979 };
9980
9981 static int trace_die_handler(struct notifier_block *self,
9982                              unsigned long val,
9983                              void *data)
9984 {
9985         switch (val) {
9986         case DIE_OOPS:
9987                 if (ftrace_dump_on_oops)
9988                         ftrace_dump(ftrace_dump_on_oops);
9989                 break;
9990         default:
9991                 break;
9992         }
9993         return NOTIFY_OK;
9994 }
9995
9996 static struct notifier_block trace_die_notifier = {
9997         .notifier_call = trace_die_handler,
9998         .priority = 200
9999 };
10000
10001 /*
10002  * printk is set to max of 1024, we really don't need it that big.
10003  * Nothing should be printing 1000 characters anyway.
10004  */
10005 #define TRACE_MAX_PRINT         1000
10006
10007 /*
10008  * Define here KERN_TRACE so that we have one place to modify
10009  * it if we decide to change what log level the ftrace dump
10010  * should be at.
10011  */
10012 #define KERN_TRACE              KERN_EMERG
10013
10014 void
10015 trace_printk_seq(struct trace_seq *s)
10016 {
10017         /* Probably should print a warning here. */
10018         if (s->seq.len >= TRACE_MAX_PRINT)
10019                 s->seq.len = TRACE_MAX_PRINT;
10020
10021         /*
10022          * More paranoid code. Although the buffer size is set to
10023          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10024          * an extra layer of protection.
10025          */
10026         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10027                 s->seq.len = s->seq.size - 1;
10028
10029         /* should be zero ended, but we are paranoid. */
10030         s->buffer[s->seq.len] = 0;
10031
10032         printk(KERN_TRACE "%s", s->buffer);
10033
10034         trace_seq_init(s);
10035 }
10036
10037 void trace_init_global_iter(struct trace_iterator *iter)
10038 {
10039         iter->tr = &global_trace;
10040         iter->trace = iter->tr->current_trace;
10041         iter->cpu_file = RING_BUFFER_ALL_CPUS;
10042         iter->array_buffer = &global_trace.array_buffer;
10043
10044         if (iter->trace && iter->trace->open)
10045                 iter->trace->open(iter);
10046
10047         /* Annotate start of buffers if we had overruns */
10048         if (ring_buffer_overruns(iter->array_buffer->buffer))
10049                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10050
10051         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10052         if (trace_clocks[iter->tr->clock_id].in_ns)
10053                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10054
10055         /* Can not use kmalloc for iter.temp and iter.fmt */
10056         iter->temp = static_temp_buf;
10057         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10058         iter->fmt = static_fmt_buf;
10059         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10060 }
10061
10062 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10063 {
10064         /* use static because iter can be a bit big for the stack */
10065         static struct trace_iterator iter;
10066         static atomic_t dump_running;
10067         struct trace_array *tr = &global_trace;
10068         unsigned int old_userobj;
10069         unsigned long flags;
10070         int cnt = 0, cpu;
10071
10072         /* Only allow one dump user at a time. */
10073         if (atomic_inc_return(&dump_running) != 1) {
10074                 atomic_dec(&dump_running);
10075                 return;
10076         }
10077
10078         /*
10079          * Always turn off tracing when we dump.
10080          * We don't need to show trace output of what happens
10081          * between multiple crashes.
10082          *
10083          * If the user does a sysrq-z, then they can re-enable
10084          * tracing with echo 1 > tracing_on.
10085          */
10086         tracing_off();
10087
10088         local_irq_save(flags);
10089
10090         /* Simulate the iterator */
10091         trace_init_global_iter(&iter);
10092
10093         for_each_tracing_cpu(cpu) {
10094                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10095         }
10096
10097         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10098
10099         /* don't look at user memory in panic mode */
10100         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10101
10102         switch (oops_dump_mode) {
10103         case DUMP_ALL:
10104                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10105                 break;
10106         case DUMP_ORIG:
10107                 iter.cpu_file = raw_smp_processor_id();
10108                 break;
10109         case DUMP_NONE:
10110                 goto out_enable;
10111         default:
10112                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10113                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10114         }
10115
10116         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10117
10118         /* Did function tracer already get disabled? */
10119         if (ftrace_is_dead()) {
10120                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10121                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10122         }
10123
10124         /*
10125          * We need to stop all tracing on all CPUS to read
10126          * the next buffer. This is a bit expensive, but is
10127          * not done often. We fill all what we can read,
10128          * and then release the locks again.
10129          */
10130
10131         while (!trace_empty(&iter)) {
10132
10133                 if (!cnt)
10134                         printk(KERN_TRACE "---------------------------------\n");
10135
10136                 cnt++;
10137
10138                 trace_iterator_reset(&iter);
10139                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10140
10141                 if (trace_find_next_entry_inc(&iter) != NULL) {
10142                         int ret;
10143
10144                         ret = print_trace_line(&iter);
10145                         if (ret != TRACE_TYPE_NO_CONSUME)
10146                                 trace_consume(&iter);
10147                 }
10148                 touch_nmi_watchdog();
10149
10150                 trace_printk_seq(&iter.seq);
10151         }
10152
10153         if (!cnt)
10154                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10155         else
10156                 printk(KERN_TRACE "---------------------------------\n");
10157
10158  out_enable:
10159         tr->trace_flags |= old_userobj;
10160
10161         for_each_tracing_cpu(cpu) {
10162                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10163         }
10164         atomic_dec(&dump_running);
10165         local_irq_restore(flags);
10166 }
10167 EXPORT_SYMBOL_GPL(ftrace_dump);
10168
10169 #define WRITE_BUFSIZE  4096
10170
10171 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10172                                 size_t count, loff_t *ppos,
10173                                 int (*createfn)(const char *))
10174 {
10175         char *kbuf, *buf, *tmp;
10176         int ret = 0;
10177         size_t done = 0;
10178         size_t size;
10179
10180         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10181         if (!kbuf)
10182                 return -ENOMEM;
10183
10184         while (done < count) {
10185                 size = count - done;
10186
10187                 if (size >= WRITE_BUFSIZE)
10188                         size = WRITE_BUFSIZE - 1;
10189
10190                 if (copy_from_user(kbuf, buffer + done, size)) {
10191                         ret = -EFAULT;
10192                         goto out;
10193                 }
10194                 kbuf[size] = '\0';
10195                 buf = kbuf;
10196                 do {
10197                         tmp = strchr(buf, '\n');
10198                         if (tmp) {
10199                                 *tmp = '\0';
10200                                 size = tmp - buf + 1;
10201                         } else {
10202                                 size = strlen(buf);
10203                                 if (done + size < count) {
10204                                         if (buf != kbuf)
10205                                                 break;
10206                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10207                                         pr_warn("Line length is too long: Should be less than %d\n",
10208                                                 WRITE_BUFSIZE - 2);
10209                                         ret = -EINVAL;
10210                                         goto out;
10211                                 }
10212                         }
10213                         done += size;
10214
10215                         /* Remove comments */
10216                         tmp = strchr(buf, '#');
10217
10218                         if (tmp)
10219                                 *tmp = '\0';
10220
10221                         ret = createfn(buf);
10222                         if (ret)
10223                                 goto out;
10224                         buf += size;
10225
10226                 } while (done < count);
10227         }
10228         ret = done;
10229
10230 out:
10231         kfree(kbuf);
10232
10233         return ret;
10234 }
10235
10236 __init static int tracer_alloc_buffers(void)
10237 {
10238         int ring_buf_size;
10239         int ret = -ENOMEM;
10240
10241
10242         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10243                 pr_warn("Tracing disabled due to lockdown\n");
10244                 return -EPERM;
10245         }
10246
10247         /*
10248          * Make sure we don't accidentally add more trace options
10249          * than we have bits for.
10250          */
10251         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10252
10253         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10254                 goto out;
10255
10256         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10257                 goto out_free_buffer_mask;
10258
10259         /* Only allocate trace_printk buffers if a trace_printk exists */
10260         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10261                 /* Must be called before global_trace.buffer is allocated */
10262                 trace_printk_init_buffers();
10263
10264         /* To save memory, keep the ring buffer size to its minimum */
10265         if (ring_buffer_expanded)
10266                 ring_buf_size = trace_buf_size;
10267         else
10268                 ring_buf_size = 1;
10269
10270         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10271         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10272
10273         raw_spin_lock_init(&global_trace.start_lock);
10274
10275         /*
10276          * The prepare callbacks allocates some memory for the ring buffer. We
10277          * don't free the buffer if the CPU goes down. If we were to free
10278          * the buffer, then the user would lose any trace that was in the
10279          * buffer. The memory will be removed once the "instance" is removed.
10280          */
10281         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10282                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10283                                       NULL);
10284         if (ret < 0)
10285                 goto out_free_cpumask;
10286         /* Used for event triggers */
10287         ret = -ENOMEM;
10288         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10289         if (!temp_buffer)
10290                 goto out_rm_hp_state;
10291
10292         if (trace_create_savedcmd() < 0)
10293                 goto out_free_temp_buffer;
10294
10295         if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10296                 goto out_free_savedcmd;
10297
10298         /* TODO: make the number of buffers hot pluggable with CPUS */
10299         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10300                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10301                 goto out_free_pipe_cpumask;
10302         }
10303         if (global_trace.buffer_disabled)
10304                 tracing_off();
10305
10306         if (trace_boot_clock) {
10307                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10308                 if (ret < 0)
10309                         pr_warn("Trace clock %s not defined, going back to default\n",
10310                                 trace_boot_clock);
10311         }
10312
10313         /*
10314          * register_tracer() might reference current_trace, so it
10315          * needs to be set before we register anything. This is
10316          * just a bootstrap of current_trace anyway.
10317          */
10318         global_trace.current_trace = &nop_trace;
10319
10320         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10321
10322         ftrace_init_global_array_ops(&global_trace);
10323
10324         init_trace_flags_index(&global_trace);
10325
10326         register_tracer(&nop_trace);
10327
10328         /* Function tracing may start here (via kernel command line) */
10329         init_function_trace();
10330
10331         /* All seems OK, enable tracing */
10332         tracing_disabled = 0;
10333
10334         atomic_notifier_chain_register(&panic_notifier_list,
10335                                        &trace_panic_notifier);
10336
10337         register_die_notifier(&trace_die_notifier);
10338
10339         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10340
10341         INIT_LIST_HEAD(&global_trace.systems);
10342         INIT_LIST_HEAD(&global_trace.events);
10343         INIT_LIST_HEAD(&global_trace.hist_vars);
10344         INIT_LIST_HEAD(&global_trace.err_log);
10345         list_add(&global_trace.list, &ftrace_trace_arrays);
10346
10347         apply_trace_boot_options();
10348
10349         register_snapshot_cmd();
10350
10351         test_can_verify();
10352
10353         return 0;
10354
10355 out_free_pipe_cpumask:
10356         free_cpumask_var(global_trace.pipe_cpumask);
10357 out_free_savedcmd:
10358         free_saved_cmdlines_buffer(savedcmd);
10359 out_free_temp_buffer:
10360         ring_buffer_free(temp_buffer);
10361 out_rm_hp_state:
10362         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10363 out_free_cpumask:
10364         free_cpumask_var(global_trace.tracing_cpumask);
10365 out_free_buffer_mask:
10366         free_cpumask_var(tracing_buffer_mask);
10367 out:
10368         return ret;
10369 }
10370
10371 void __init ftrace_boot_snapshot(void)
10372 {
10373         if (snapshot_at_boot) {
10374                 tracing_snapshot();
10375                 internal_trace_puts("** Boot snapshot taken **\n");
10376         }
10377 }
10378
10379 void __init early_trace_init(void)
10380 {
10381         if (tracepoint_printk) {
10382                 tracepoint_print_iter =
10383                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10384                 if (MEM_FAIL(!tracepoint_print_iter,
10385                              "Failed to allocate trace iterator\n"))
10386                         tracepoint_printk = 0;
10387                 else
10388                         static_key_enable(&tracepoint_printk_key.key);
10389         }
10390         tracer_alloc_buffers();
10391
10392         init_events();
10393 }
10394
10395 void __init trace_init(void)
10396 {
10397         trace_event_init();
10398 }
10399
10400 __init static void clear_boot_tracer(void)
10401 {
10402         /*
10403          * The default tracer at boot buffer is an init section.
10404          * This function is called in lateinit. If we did not
10405          * find the boot tracer, then clear it out, to prevent
10406          * later registration from accessing the buffer that is
10407          * about to be freed.
10408          */
10409         if (!default_bootup_tracer)
10410                 return;
10411
10412         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10413                default_bootup_tracer);
10414         default_bootup_tracer = NULL;
10415 }
10416
10417 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10418 __init static void tracing_set_default_clock(void)
10419 {
10420         /* sched_clock_stable() is determined in late_initcall */
10421         if (!trace_boot_clock && !sched_clock_stable()) {
10422                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10423                         pr_warn("Can not set tracing clock due to lockdown\n");
10424                         return;
10425                 }
10426
10427                 printk(KERN_WARNING
10428                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10429                        "If you want to keep using the local clock, then add:\n"
10430                        "  \"trace_clock=local\"\n"
10431                        "on the kernel command line\n");
10432                 tracing_set_clock(&global_trace, "global");
10433         }
10434 }
10435 #else
10436 static inline void tracing_set_default_clock(void) { }
10437 #endif
10438
10439 __init static int late_trace_init(void)
10440 {
10441         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10442                 static_key_disable(&tracepoint_printk_key.key);
10443                 tracepoint_printk = 0;
10444         }
10445
10446         tracing_set_default_clock();
10447         clear_boot_tracer();
10448         return 0;
10449 }
10450
10451 late_initcall_sync(late_trace_init);