Merge tag 'for-linus-2022120801' of git://git.kernel.org/pub/scm/linux/kernel/git...
[platform/kernel/linux-starfive.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
189
190 static int __init set_cmdline_ftrace(char *str)
191 {
192         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
193         default_bootup_tracer = bootup_tracer_buf;
194         /* We are using ftrace early, expand it */
195         ring_buffer_expanded = true;
196         return 1;
197 }
198 __setup("ftrace=", set_cmdline_ftrace);
199
200 static int __init set_ftrace_dump_on_oops(char *str)
201 {
202         if (*str++ != '=' || !*str || !strcmp("1", str)) {
203                 ftrace_dump_on_oops = DUMP_ALL;
204                 return 1;
205         }
206
207         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
208                 ftrace_dump_on_oops = DUMP_ORIG;
209                 return 1;
210         }
211
212         return 0;
213 }
214 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
215
216 static int __init stop_trace_on_warning(char *str)
217 {
218         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
219                 __disable_trace_on_warning = 1;
220         return 1;
221 }
222 __setup("traceoff_on_warning", stop_trace_on_warning);
223
224 static int __init boot_alloc_snapshot(char *str)
225 {
226         allocate_snapshot = true;
227         /* We also need the main ring buffer expanded */
228         ring_buffer_expanded = true;
229         return 1;
230 }
231 __setup("alloc_snapshot", boot_alloc_snapshot);
232
233
234 static int __init boot_snapshot(char *str)
235 {
236         snapshot_at_boot = true;
237         boot_alloc_snapshot(str);
238         return 1;
239 }
240 __setup("ftrace_boot_snapshot", boot_snapshot);
241
242
243 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
244
245 static int __init set_trace_boot_options(char *str)
246 {
247         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
248         return 1;
249 }
250 __setup("trace_options=", set_trace_boot_options);
251
252 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
253 static char *trace_boot_clock __initdata;
254
255 static int __init set_trace_boot_clock(char *str)
256 {
257         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
258         trace_boot_clock = trace_boot_clock_buf;
259         return 1;
260 }
261 __setup("trace_clock=", set_trace_boot_clock);
262
263 static int __init set_tracepoint_printk(char *str)
264 {
265         /* Ignore the "tp_printk_stop_on_boot" param */
266         if (*str == '_')
267                 return 0;
268
269         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
270                 tracepoint_printk = 1;
271         return 1;
272 }
273 __setup("tp_printk", set_tracepoint_printk);
274
275 static int __init set_tracepoint_printk_stop(char *str)
276 {
277         tracepoint_printk_stop_on_boot = true;
278         return 1;
279 }
280 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
281
282 unsigned long long ns2usecs(u64 nsec)
283 {
284         nsec += 500;
285         do_div(nsec, 1000);
286         return nsec;
287 }
288
289 static void
290 trace_process_export(struct trace_export *export,
291                struct ring_buffer_event *event, int flag)
292 {
293         struct trace_entry *entry;
294         unsigned int size = 0;
295
296         if (export->flags & flag) {
297                 entry = ring_buffer_event_data(event);
298                 size = ring_buffer_event_length(event);
299                 export->write(export, entry, size);
300         }
301 }
302
303 static DEFINE_MUTEX(ftrace_export_lock);
304
305 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
306
307 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
308 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
309 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
310
311 static inline void ftrace_exports_enable(struct trace_export *export)
312 {
313         if (export->flags & TRACE_EXPORT_FUNCTION)
314                 static_branch_inc(&trace_function_exports_enabled);
315
316         if (export->flags & TRACE_EXPORT_EVENT)
317                 static_branch_inc(&trace_event_exports_enabled);
318
319         if (export->flags & TRACE_EXPORT_MARKER)
320                 static_branch_inc(&trace_marker_exports_enabled);
321 }
322
323 static inline void ftrace_exports_disable(struct trace_export *export)
324 {
325         if (export->flags & TRACE_EXPORT_FUNCTION)
326                 static_branch_dec(&trace_function_exports_enabled);
327
328         if (export->flags & TRACE_EXPORT_EVENT)
329                 static_branch_dec(&trace_event_exports_enabled);
330
331         if (export->flags & TRACE_EXPORT_MARKER)
332                 static_branch_dec(&trace_marker_exports_enabled);
333 }
334
335 static void ftrace_exports(struct ring_buffer_event *event, int flag)
336 {
337         struct trace_export *export;
338
339         preempt_disable_notrace();
340
341         export = rcu_dereference_raw_check(ftrace_exports_list);
342         while (export) {
343                 trace_process_export(export, event, flag);
344                 export = rcu_dereference_raw_check(export->next);
345         }
346
347         preempt_enable_notrace();
348 }
349
350 static inline void
351 add_trace_export(struct trace_export **list, struct trace_export *export)
352 {
353         rcu_assign_pointer(export->next, *list);
354         /*
355          * We are entering export into the list but another
356          * CPU might be walking that list. We need to make sure
357          * the export->next pointer is valid before another CPU sees
358          * the export pointer included into the list.
359          */
360         rcu_assign_pointer(*list, export);
361 }
362
363 static inline int
364 rm_trace_export(struct trace_export **list, struct trace_export *export)
365 {
366         struct trace_export **p;
367
368         for (p = list; *p != NULL; p = &(*p)->next)
369                 if (*p == export)
370                         break;
371
372         if (*p != export)
373                 return -1;
374
375         rcu_assign_pointer(*p, (*p)->next);
376
377         return 0;
378 }
379
380 static inline void
381 add_ftrace_export(struct trace_export **list, struct trace_export *export)
382 {
383         ftrace_exports_enable(export);
384
385         add_trace_export(list, export);
386 }
387
388 static inline int
389 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
390 {
391         int ret;
392
393         ret = rm_trace_export(list, export);
394         ftrace_exports_disable(export);
395
396         return ret;
397 }
398
399 int register_ftrace_export(struct trace_export *export)
400 {
401         if (WARN_ON_ONCE(!export->write))
402                 return -1;
403
404         mutex_lock(&ftrace_export_lock);
405
406         add_ftrace_export(&ftrace_exports_list, export);
407
408         mutex_unlock(&ftrace_export_lock);
409
410         return 0;
411 }
412 EXPORT_SYMBOL_GPL(register_ftrace_export);
413
414 int unregister_ftrace_export(struct trace_export *export)
415 {
416         int ret;
417
418         mutex_lock(&ftrace_export_lock);
419
420         ret = rm_ftrace_export(&ftrace_exports_list, export);
421
422         mutex_unlock(&ftrace_export_lock);
423
424         return ret;
425 }
426 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
427
428 /* trace_flags holds trace_options default values */
429 #define TRACE_DEFAULT_FLAGS                                             \
430         (FUNCTION_DEFAULT_FLAGS |                                       \
431          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
432          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
433          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
434          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
435          TRACE_ITER_HASH_PTR)
436
437 /* trace_options that are only supported by global_trace */
438 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
439                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
440
441 /* trace_flags that are default zero for instances */
442 #define ZEROED_TRACE_FLAGS \
443         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
444
445 /*
446  * The global_trace is the descriptor that holds the top-level tracing
447  * buffers for the live tracing.
448  */
449 static struct trace_array global_trace = {
450         .trace_flags = TRACE_DEFAULT_FLAGS,
451 };
452
453 LIST_HEAD(ftrace_trace_arrays);
454
455 int trace_array_get(struct trace_array *this_tr)
456 {
457         struct trace_array *tr;
458         int ret = -ENODEV;
459
460         mutex_lock(&trace_types_lock);
461         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
462                 if (tr == this_tr) {
463                         tr->ref++;
464                         ret = 0;
465                         break;
466                 }
467         }
468         mutex_unlock(&trace_types_lock);
469
470         return ret;
471 }
472
473 static void __trace_array_put(struct trace_array *this_tr)
474 {
475         WARN_ON(!this_tr->ref);
476         this_tr->ref--;
477 }
478
479 /**
480  * trace_array_put - Decrement the reference counter for this trace array.
481  * @this_tr : pointer to the trace array
482  *
483  * NOTE: Use this when we no longer need the trace array returned by
484  * trace_array_get_by_name(). This ensures the trace array can be later
485  * destroyed.
486  *
487  */
488 void trace_array_put(struct trace_array *this_tr)
489 {
490         if (!this_tr)
491                 return;
492
493         mutex_lock(&trace_types_lock);
494         __trace_array_put(this_tr);
495         mutex_unlock(&trace_types_lock);
496 }
497 EXPORT_SYMBOL_GPL(trace_array_put);
498
499 int tracing_check_open_get_tr(struct trace_array *tr)
500 {
501         int ret;
502
503         ret = security_locked_down(LOCKDOWN_TRACEFS);
504         if (ret)
505                 return ret;
506
507         if (tracing_disabled)
508                 return -ENODEV;
509
510         if (tr && trace_array_get(tr) < 0)
511                 return -ENODEV;
512
513         return 0;
514 }
515
516 int call_filter_check_discard(struct trace_event_call *call, void *rec,
517                               struct trace_buffer *buffer,
518                               struct ring_buffer_event *event)
519 {
520         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
521             !filter_match_preds(call->filter, rec)) {
522                 __trace_event_discard_commit(buffer, event);
523                 return 1;
524         }
525
526         return 0;
527 }
528
529 /**
530  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
531  * @filtered_pids: The list of pids to check
532  * @search_pid: The PID to find in @filtered_pids
533  *
534  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
535  */
536 bool
537 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
538 {
539         return trace_pid_list_is_set(filtered_pids, search_pid);
540 }
541
542 /**
543  * trace_ignore_this_task - should a task be ignored for tracing
544  * @filtered_pids: The list of pids to check
545  * @filtered_no_pids: The list of pids not to be traced
546  * @task: The task that should be ignored if not filtered
547  *
548  * Checks if @task should be traced or not from @filtered_pids.
549  * Returns true if @task should *NOT* be traced.
550  * Returns false if @task should be traced.
551  */
552 bool
553 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
554                        struct trace_pid_list *filtered_no_pids,
555                        struct task_struct *task)
556 {
557         /*
558          * If filtered_no_pids is not empty, and the task's pid is listed
559          * in filtered_no_pids, then return true.
560          * Otherwise, if filtered_pids is empty, that means we can
561          * trace all tasks. If it has content, then only trace pids
562          * within filtered_pids.
563          */
564
565         return (filtered_pids &&
566                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
567                 (filtered_no_pids &&
568                  trace_find_filtered_pid(filtered_no_pids, task->pid));
569 }
570
571 /**
572  * trace_filter_add_remove_task - Add or remove a task from a pid_list
573  * @pid_list: The list to modify
574  * @self: The current task for fork or NULL for exit
575  * @task: The task to add or remove
576  *
577  * If adding a task, if @self is defined, the task is only added if @self
578  * is also included in @pid_list. This happens on fork and tasks should
579  * only be added when the parent is listed. If @self is NULL, then the
580  * @task pid will be removed from the list, which would happen on exit
581  * of a task.
582  */
583 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
584                                   struct task_struct *self,
585                                   struct task_struct *task)
586 {
587         if (!pid_list)
588                 return;
589
590         /* For forks, we only add if the forking task is listed */
591         if (self) {
592                 if (!trace_find_filtered_pid(pid_list, self->pid))
593                         return;
594         }
595
596         /* "self" is set for forks, and NULL for exits */
597         if (self)
598                 trace_pid_list_set(pid_list, task->pid);
599         else
600                 trace_pid_list_clear(pid_list, task->pid);
601 }
602
603 /**
604  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
605  * @pid_list: The pid list to show
606  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
607  * @pos: The position of the file
608  *
609  * This is used by the seq_file "next" operation to iterate the pids
610  * listed in a trace_pid_list structure.
611  *
612  * Returns the pid+1 as we want to display pid of zero, but NULL would
613  * stop the iteration.
614  */
615 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
616 {
617         long pid = (unsigned long)v;
618         unsigned int next;
619
620         (*pos)++;
621
622         /* pid already is +1 of the actual previous bit */
623         if (trace_pid_list_next(pid_list, pid, &next) < 0)
624                 return NULL;
625
626         pid = next;
627
628         /* Return pid + 1 to allow zero to be represented */
629         return (void *)(pid + 1);
630 }
631
632 /**
633  * trace_pid_start - Used for seq_file to start reading pid lists
634  * @pid_list: The pid list to show
635  * @pos: The position of the file
636  *
637  * This is used by seq_file "start" operation to start the iteration
638  * of listing pids.
639  *
640  * Returns the pid+1 as we want to display pid of zero, but NULL would
641  * stop the iteration.
642  */
643 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
644 {
645         unsigned long pid;
646         unsigned int first;
647         loff_t l = 0;
648
649         if (trace_pid_list_first(pid_list, &first) < 0)
650                 return NULL;
651
652         pid = first;
653
654         /* Return pid + 1 so that zero can be the exit value */
655         for (pid++; pid && l < *pos;
656              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657                 ;
658         return (void *)pid;
659 }
660
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671         unsigned long pid = (unsigned long)v - 1;
672
673         seq_printf(m, "%lu\n", pid);
674         return 0;
675 }
676
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE            127
679
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681                     struct trace_pid_list **new_pid_list,
682                     const char __user *ubuf, size_t cnt)
683 {
684         struct trace_pid_list *pid_list;
685         struct trace_parser parser;
686         unsigned long val;
687         int nr_pids = 0;
688         ssize_t read = 0;
689         ssize_t ret;
690         loff_t pos;
691         pid_t pid;
692
693         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694                 return -ENOMEM;
695
696         /*
697          * Always recreate a new array. The write is an all or nothing
698          * operation. Always create a new array when adding new pids by
699          * the user. If the operation fails, then the current list is
700          * not modified.
701          */
702         pid_list = trace_pid_list_alloc();
703         if (!pid_list) {
704                 trace_parser_put(&parser);
705                 return -ENOMEM;
706         }
707
708         if (filtered_pids) {
709                 /* copy the current bits to the new max */
710                 ret = trace_pid_list_first(filtered_pids, &pid);
711                 while (!ret) {
712                         trace_pid_list_set(pid_list, pid);
713                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
714                         nr_pids++;
715                 }
716         }
717
718         ret = 0;
719         while (cnt > 0) {
720
721                 pos = 0;
722
723                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
724                 if (ret < 0)
725                         break;
726
727                 read += ret;
728                 ubuf += ret;
729                 cnt -= ret;
730
731                 if (!trace_parser_loaded(&parser))
732                         break;
733
734                 ret = -EINVAL;
735                 if (kstrtoul(parser.buffer, 0, &val))
736                         break;
737
738                 pid = (pid_t)val;
739
740                 if (trace_pid_list_set(pid_list, pid) < 0) {
741                         ret = -1;
742                         break;
743                 }
744                 nr_pids++;
745
746                 trace_parser_clear(&parser);
747                 ret = 0;
748         }
749         trace_parser_put(&parser);
750
751         if (ret < 0) {
752                 trace_pid_list_free(pid_list);
753                 return ret;
754         }
755
756         if (!nr_pids) {
757                 /* Cleared the list of pids */
758                 trace_pid_list_free(pid_list);
759                 pid_list = NULL;
760         }
761
762         *new_pid_list = pid_list;
763
764         return read;
765 }
766
767 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
768 {
769         u64 ts;
770
771         /* Early boot up does not have a buffer yet */
772         if (!buf->buffer)
773                 return trace_clock_local();
774
775         ts = ring_buffer_time_stamp(buf->buffer);
776         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
777
778         return ts;
779 }
780
781 u64 ftrace_now(int cpu)
782 {
783         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
784 }
785
786 /**
787  * tracing_is_enabled - Show if global_trace has been enabled
788  *
789  * Shows if the global trace has been enabled or not. It uses the
790  * mirror flag "buffer_disabled" to be used in fast paths such as for
791  * the irqsoff tracer. But it may be inaccurate due to races. If you
792  * need to know the accurate state, use tracing_is_on() which is a little
793  * slower, but accurate.
794  */
795 int tracing_is_enabled(void)
796 {
797         /*
798          * For quick access (irqsoff uses this in fast path), just
799          * return the mirror variable of the state of the ring buffer.
800          * It's a little racy, but we don't really care.
801          */
802         smp_rmb();
803         return !global_trace.buffer_disabled;
804 }
805
806 /*
807  * trace_buf_size is the size in bytes that is allocated
808  * for a buffer. Note, the number of bytes is always rounded
809  * to page size.
810  *
811  * This number is purposely set to a low number of 16384.
812  * If the dump on oops happens, it will be much appreciated
813  * to not have to wait for all that output. Anyway this can be
814  * boot time and run time configurable.
815  */
816 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
817
818 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
819
820 /* trace_types holds a link list of available tracers. */
821 static struct tracer            *trace_types __read_mostly;
822
823 /*
824  * trace_types_lock is used to protect the trace_types list.
825  */
826 DEFINE_MUTEX(trace_types_lock);
827
828 /*
829  * serialize the access of the ring buffer
830  *
831  * ring buffer serializes readers, but it is low level protection.
832  * The validity of the events (which returns by ring_buffer_peek() ..etc)
833  * are not protected by ring buffer.
834  *
835  * The content of events may become garbage if we allow other process consumes
836  * these events concurrently:
837  *   A) the page of the consumed events may become a normal page
838  *      (not reader page) in ring buffer, and this page will be rewritten
839  *      by events producer.
840  *   B) The page of the consumed events may become a page for splice_read,
841  *      and this page will be returned to system.
842  *
843  * These primitives allow multi process access to different cpu ring buffer
844  * concurrently.
845  *
846  * These primitives don't distinguish read-only and read-consume access.
847  * Multi read-only access are also serialized.
848  */
849
850 #ifdef CONFIG_SMP
851 static DECLARE_RWSEM(all_cpu_access_lock);
852 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
853
854 static inline void trace_access_lock(int cpu)
855 {
856         if (cpu == RING_BUFFER_ALL_CPUS) {
857                 /* gain it for accessing the whole ring buffer. */
858                 down_write(&all_cpu_access_lock);
859         } else {
860                 /* gain it for accessing a cpu ring buffer. */
861
862                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
863                 down_read(&all_cpu_access_lock);
864
865                 /* Secondly block other access to this @cpu ring buffer. */
866                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
867         }
868 }
869
870 static inline void trace_access_unlock(int cpu)
871 {
872         if (cpu == RING_BUFFER_ALL_CPUS) {
873                 up_write(&all_cpu_access_lock);
874         } else {
875                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
876                 up_read(&all_cpu_access_lock);
877         }
878 }
879
880 static inline void trace_access_lock_init(void)
881 {
882         int cpu;
883
884         for_each_possible_cpu(cpu)
885                 mutex_init(&per_cpu(cpu_access_lock, cpu));
886 }
887
888 #else
889
890 static DEFINE_MUTEX(access_lock);
891
892 static inline void trace_access_lock(int cpu)
893 {
894         (void)cpu;
895         mutex_lock(&access_lock);
896 }
897
898 static inline void trace_access_unlock(int cpu)
899 {
900         (void)cpu;
901         mutex_unlock(&access_lock);
902 }
903
904 static inline void trace_access_lock_init(void)
905 {
906 }
907
908 #endif
909
910 #ifdef CONFIG_STACKTRACE
911 static void __ftrace_trace_stack(struct trace_buffer *buffer,
912                                  unsigned int trace_ctx,
913                                  int skip, struct pt_regs *regs);
914 static inline void ftrace_trace_stack(struct trace_array *tr,
915                                       struct trace_buffer *buffer,
916                                       unsigned int trace_ctx,
917                                       int skip, struct pt_regs *regs);
918
919 #else
920 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
921                                         unsigned int trace_ctx,
922                                         int skip, struct pt_regs *regs)
923 {
924 }
925 static inline void ftrace_trace_stack(struct trace_array *tr,
926                                       struct trace_buffer *buffer,
927                                       unsigned long trace_ctx,
928                                       int skip, struct pt_regs *regs)
929 {
930 }
931
932 #endif
933
934 static __always_inline void
935 trace_event_setup(struct ring_buffer_event *event,
936                   int type, unsigned int trace_ctx)
937 {
938         struct trace_entry *ent = ring_buffer_event_data(event);
939
940         tracing_generic_entry_update(ent, type, trace_ctx);
941 }
942
943 static __always_inline struct ring_buffer_event *
944 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
945                           int type,
946                           unsigned long len,
947                           unsigned int trace_ctx)
948 {
949         struct ring_buffer_event *event;
950
951         event = ring_buffer_lock_reserve(buffer, len);
952         if (event != NULL)
953                 trace_event_setup(event, type, trace_ctx);
954
955         return event;
956 }
957
958 void tracer_tracing_on(struct trace_array *tr)
959 {
960         if (tr->array_buffer.buffer)
961                 ring_buffer_record_on(tr->array_buffer.buffer);
962         /*
963          * This flag is looked at when buffers haven't been allocated
964          * yet, or by some tracers (like irqsoff), that just want to
965          * know if the ring buffer has been disabled, but it can handle
966          * races of where it gets disabled but we still do a record.
967          * As the check is in the fast path of the tracers, it is more
968          * important to be fast than accurate.
969          */
970         tr->buffer_disabled = 0;
971         /* Make the flag seen by readers */
972         smp_wmb();
973 }
974
975 /**
976  * tracing_on - enable tracing buffers
977  *
978  * This function enables tracing buffers that may have been
979  * disabled with tracing_off.
980  */
981 void tracing_on(void)
982 {
983         tracer_tracing_on(&global_trace);
984 }
985 EXPORT_SYMBOL_GPL(tracing_on);
986
987
988 static __always_inline void
989 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
990 {
991         __this_cpu_write(trace_taskinfo_save, true);
992
993         /* If this is the temp buffer, we need to commit fully */
994         if (this_cpu_read(trace_buffered_event) == event) {
995                 /* Length is in event->array[0] */
996                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
997                 /* Release the temp buffer */
998                 this_cpu_dec(trace_buffered_event_cnt);
999                 /* ring_buffer_unlock_commit() enables preemption */
1000                 preempt_enable_notrace();
1001         } else
1002                 ring_buffer_unlock_commit(buffer, event);
1003 }
1004
1005 /**
1006  * __trace_puts - write a constant string into the trace buffer.
1007  * @ip:    The address of the caller
1008  * @str:   The constant string to write
1009  * @size:  The size of the string.
1010  */
1011 int __trace_puts(unsigned long ip, const char *str, int size)
1012 {
1013         struct ring_buffer_event *event;
1014         struct trace_buffer *buffer;
1015         struct print_entry *entry;
1016         unsigned int trace_ctx;
1017         int alloc;
1018
1019         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1020                 return 0;
1021
1022         if (unlikely(tracing_selftest_running || tracing_disabled))
1023                 return 0;
1024
1025         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1026
1027         trace_ctx = tracing_gen_ctx();
1028         buffer = global_trace.array_buffer.buffer;
1029         ring_buffer_nest_start(buffer);
1030         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1031                                             trace_ctx);
1032         if (!event) {
1033                 size = 0;
1034                 goto out;
1035         }
1036
1037         entry = ring_buffer_event_data(event);
1038         entry->ip = ip;
1039
1040         memcpy(&entry->buf, str, size);
1041
1042         /* Add a newline if necessary */
1043         if (entry->buf[size - 1] != '\n') {
1044                 entry->buf[size] = '\n';
1045                 entry->buf[size + 1] = '\0';
1046         } else
1047                 entry->buf[size] = '\0';
1048
1049         __buffer_unlock_commit(buffer, event);
1050         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1051  out:
1052         ring_buffer_nest_end(buffer);
1053         return size;
1054 }
1055 EXPORT_SYMBOL_GPL(__trace_puts);
1056
1057 /**
1058  * __trace_bputs - write the pointer to a constant string into trace buffer
1059  * @ip:    The address of the caller
1060  * @str:   The constant string to write to the buffer to
1061  */
1062 int __trace_bputs(unsigned long ip, const char *str)
1063 {
1064         struct ring_buffer_event *event;
1065         struct trace_buffer *buffer;
1066         struct bputs_entry *entry;
1067         unsigned int trace_ctx;
1068         int size = sizeof(struct bputs_entry);
1069         int ret = 0;
1070
1071         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1072                 return 0;
1073
1074         if (unlikely(tracing_selftest_running || tracing_disabled))
1075                 return 0;
1076
1077         trace_ctx = tracing_gen_ctx();
1078         buffer = global_trace.array_buffer.buffer;
1079
1080         ring_buffer_nest_start(buffer);
1081         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1082                                             trace_ctx);
1083         if (!event)
1084                 goto out;
1085
1086         entry = ring_buffer_event_data(event);
1087         entry->ip                       = ip;
1088         entry->str                      = str;
1089
1090         __buffer_unlock_commit(buffer, event);
1091         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1092
1093         ret = 1;
1094  out:
1095         ring_buffer_nest_end(buffer);
1096         return ret;
1097 }
1098 EXPORT_SYMBOL_GPL(__trace_bputs);
1099
1100 #ifdef CONFIG_TRACER_SNAPSHOT
1101 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1102                                            void *cond_data)
1103 {
1104         struct tracer *tracer = tr->current_trace;
1105         unsigned long flags;
1106
1107         if (in_nmi()) {
1108                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1109                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1110                 return;
1111         }
1112
1113         if (!tr->allocated_snapshot) {
1114                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1115                 internal_trace_puts("*** stopping trace here!   ***\n");
1116                 tracing_off();
1117                 return;
1118         }
1119
1120         /* Note, snapshot can not be used when the tracer uses it */
1121         if (tracer->use_max_tr) {
1122                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1123                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1124                 return;
1125         }
1126
1127         local_irq_save(flags);
1128         update_max_tr(tr, current, smp_processor_id(), cond_data);
1129         local_irq_restore(flags);
1130 }
1131
1132 void tracing_snapshot_instance(struct trace_array *tr)
1133 {
1134         tracing_snapshot_instance_cond(tr, NULL);
1135 }
1136
1137 /**
1138  * tracing_snapshot - take a snapshot of the current buffer.
1139  *
1140  * This causes a swap between the snapshot buffer and the current live
1141  * tracing buffer. You can use this to take snapshots of the live
1142  * trace when some condition is triggered, but continue to trace.
1143  *
1144  * Note, make sure to allocate the snapshot with either
1145  * a tracing_snapshot_alloc(), or by doing it manually
1146  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1147  *
1148  * If the snapshot buffer is not allocated, it will stop tracing.
1149  * Basically making a permanent snapshot.
1150  */
1151 void tracing_snapshot(void)
1152 {
1153         struct trace_array *tr = &global_trace;
1154
1155         tracing_snapshot_instance(tr);
1156 }
1157 EXPORT_SYMBOL_GPL(tracing_snapshot);
1158
1159 /**
1160  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1161  * @tr:         The tracing instance to snapshot
1162  * @cond_data:  The data to be tested conditionally, and possibly saved
1163  *
1164  * This is the same as tracing_snapshot() except that the snapshot is
1165  * conditional - the snapshot will only happen if the
1166  * cond_snapshot.update() implementation receiving the cond_data
1167  * returns true, which means that the trace array's cond_snapshot
1168  * update() operation used the cond_data to determine whether the
1169  * snapshot should be taken, and if it was, presumably saved it along
1170  * with the snapshot.
1171  */
1172 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1173 {
1174         tracing_snapshot_instance_cond(tr, cond_data);
1175 }
1176 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1177
1178 /**
1179  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1180  * @tr:         The tracing instance
1181  *
1182  * When the user enables a conditional snapshot using
1183  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1184  * with the snapshot.  This accessor is used to retrieve it.
1185  *
1186  * Should not be called from cond_snapshot.update(), since it takes
1187  * the tr->max_lock lock, which the code calling
1188  * cond_snapshot.update() has already done.
1189  *
1190  * Returns the cond_data associated with the trace array's snapshot.
1191  */
1192 void *tracing_cond_snapshot_data(struct trace_array *tr)
1193 {
1194         void *cond_data = NULL;
1195
1196         local_irq_disable();
1197         arch_spin_lock(&tr->max_lock);
1198
1199         if (tr->cond_snapshot)
1200                 cond_data = tr->cond_snapshot->cond_data;
1201
1202         arch_spin_unlock(&tr->max_lock);
1203         local_irq_enable();
1204
1205         return cond_data;
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1208
1209 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1210                                         struct array_buffer *size_buf, int cpu_id);
1211 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1212
1213 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1214 {
1215         int ret;
1216
1217         if (!tr->allocated_snapshot) {
1218
1219                 /* allocate spare buffer */
1220                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1221                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1222                 if (ret < 0)
1223                         return ret;
1224
1225                 tr->allocated_snapshot = true;
1226         }
1227
1228         return 0;
1229 }
1230
1231 static void free_snapshot(struct trace_array *tr)
1232 {
1233         /*
1234          * We don't free the ring buffer. instead, resize it because
1235          * The max_tr ring buffer has some state (e.g. ring->clock) and
1236          * we want preserve it.
1237          */
1238         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1239         set_buffer_entries(&tr->max_buffer, 1);
1240         tracing_reset_online_cpus(&tr->max_buffer);
1241         tr->allocated_snapshot = false;
1242 }
1243
1244 /**
1245  * tracing_alloc_snapshot - allocate snapshot buffer.
1246  *
1247  * This only allocates the snapshot buffer if it isn't already
1248  * allocated - it doesn't also take a snapshot.
1249  *
1250  * This is meant to be used in cases where the snapshot buffer needs
1251  * to be set up for events that can't sleep but need to be able to
1252  * trigger a snapshot.
1253  */
1254 int tracing_alloc_snapshot(void)
1255 {
1256         struct trace_array *tr = &global_trace;
1257         int ret;
1258
1259         ret = tracing_alloc_snapshot_instance(tr);
1260         WARN_ON(ret < 0);
1261
1262         return ret;
1263 }
1264 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1265
1266 /**
1267  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1268  *
1269  * This is similar to tracing_snapshot(), but it will allocate the
1270  * snapshot buffer if it isn't already allocated. Use this only
1271  * where it is safe to sleep, as the allocation may sleep.
1272  *
1273  * This causes a swap between the snapshot buffer and the current live
1274  * tracing buffer. You can use this to take snapshots of the live
1275  * trace when some condition is triggered, but continue to trace.
1276  */
1277 void tracing_snapshot_alloc(void)
1278 {
1279         int ret;
1280
1281         ret = tracing_alloc_snapshot();
1282         if (ret < 0)
1283                 return;
1284
1285         tracing_snapshot();
1286 }
1287 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1288
1289 /**
1290  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1291  * @tr:         The tracing instance
1292  * @cond_data:  User data to associate with the snapshot
1293  * @update:     Implementation of the cond_snapshot update function
1294  *
1295  * Check whether the conditional snapshot for the given instance has
1296  * already been enabled, or if the current tracer is already using a
1297  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1298  * save the cond_data and update function inside.
1299  *
1300  * Returns 0 if successful, error otherwise.
1301  */
1302 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1303                                  cond_update_fn_t update)
1304 {
1305         struct cond_snapshot *cond_snapshot;
1306         int ret = 0;
1307
1308         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1309         if (!cond_snapshot)
1310                 return -ENOMEM;
1311
1312         cond_snapshot->cond_data = cond_data;
1313         cond_snapshot->update = update;
1314
1315         mutex_lock(&trace_types_lock);
1316
1317         ret = tracing_alloc_snapshot_instance(tr);
1318         if (ret)
1319                 goto fail_unlock;
1320
1321         if (tr->current_trace->use_max_tr) {
1322                 ret = -EBUSY;
1323                 goto fail_unlock;
1324         }
1325
1326         /*
1327          * The cond_snapshot can only change to NULL without the
1328          * trace_types_lock. We don't care if we race with it going
1329          * to NULL, but we want to make sure that it's not set to
1330          * something other than NULL when we get here, which we can
1331          * do safely with only holding the trace_types_lock and not
1332          * having to take the max_lock.
1333          */
1334         if (tr->cond_snapshot) {
1335                 ret = -EBUSY;
1336                 goto fail_unlock;
1337         }
1338
1339         local_irq_disable();
1340         arch_spin_lock(&tr->max_lock);
1341         tr->cond_snapshot = cond_snapshot;
1342         arch_spin_unlock(&tr->max_lock);
1343         local_irq_enable();
1344
1345         mutex_unlock(&trace_types_lock);
1346
1347         return ret;
1348
1349  fail_unlock:
1350         mutex_unlock(&trace_types_lock);
1351         kfree(cond_snapshot);
1352         return ret;
1353 }
1354 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1355
1356 /**
1357  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1358  * @tr:         The tracing instance
1359  *
1360  * Check whether the conditional snapshot for the given instance is
1361  * enabled; if so, free the cond_snapshot associated with it,
1362  * otherwise return -EINVAL.
1363  *
1364  * Returns 0 if successful, error otherwise.
1365  */
1366 int tracing_snapshot_cond_disable(struct trace_array *tr)
1367 {
1368         int ret = 0;
1369
1370         local_irq_disable();
1371         arch_spin_lock(&tr->max_lock);
1372
1373         if (!tr->cond_snapshot)
1374                 ret = -EINVAL;
1375         else {
1376                 kfree(tr->cond_snapshot);
1377                 tr->cond_snapshot = NULL;
1378         }
1379
1380         arch_spin_unlock(&tr->max_lock);
1381         local_irq_enable();
1382
1383         return ret;
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1386 #else
1387 void tracing_snapshot(void)
1388 {
1389         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot);
1392 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1393 {
1394         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397 int tracing_alloc_snapshot(void)
1398 {
1399         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1400         return -ENODEV;
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403 void tracing_snapshot_alloc(void)
1404 {
1405         /* Give warning */
1406         tracing_snapshot();
1407 }
1408 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409 void *tracing_cond_snapshot_data(struct trace_array *tr)
1410 {
1411         return NULL;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1415 {
1416         return -ENODEV;
1417 }
1418 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419 int tracing_snapshot_cond_disable(struct trace_array *tr)
1420 {
1421         return false;
1422 }
1423 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424 #endif /* CONFIG_TRACER_SNAPSHOT */
1425
1426 void tracer_tracing_off(struct trace_array *tr)
1427 {
1428         if (tr->array_buffer.buffer)
1429                 ring_buffer_record_off(tr->array_buffer.buffer);
1430         /*
1431          * This flag is looked at when buffers haven't been allocated
1432          * yet, or by some tracers (like irqsoff), that just want to
1433          * know if the ring buffer has been disabled, but it can handle
1434          * races of where it gets disabled but we still do a record.
1435          * As the check is in the fast path of the tracers, it is more
1436          * important to be fast than accurate.
1437          */
1438         tr->buffer_disabled = 1;
1439         /* Make the flag seen by readers */
1440         smp_wmb();
1441 }
1442
1443 /**
1444  * tracing_off - turn off tracing buffers
1445  *
1446  * This function stops the tracing buffers from recording data.
1447  * It does not disable any overhead the tracers themselves may
1448  * be causing. This function simply causes all recording to
1449  * the ring buffers to fail.
1450  */
1451 void tracing_off(void)
1452 {
1453         tracer_tracing_off(&global_trace);
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_off);
1456
1457 void disable_trace_on_warning(void)
1458 {
1459         if (__disable_trace_on_warning) {
1460                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461                         "Disabling tracing due to warning\n");
1462                 tracing_off();
1463         }
1464 }
1465
1466 /**
1467  * tracer_tracing_is_on - show real state of ring buffer enabled
1468  * @tr : the trace array to know if ring buffer is enabled
1469  *
1470  * Shows real state of the ring buffer if it is enabled or not.
1471  */
1472 bool tracer_tracing_is_on(struct trace_array *tr)
1473 {
1474         if (tr->array_buffer.buffer)
1475                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476         return !tr->buffer_disabled;
1477 }
1478
1479 /**
1480  * tracing_is_on - show state of ring buffers enabled
1481  */
1482 int tracing_is_on(void)
1483 {
1484         return tracer_tracing_is_on(&global_trace);
1485 }
1486 EXPORT_SYMBOL_GPL(tracing_is_on);
1487
1488 static int __init set_buf_size(char *str)
1489 {
1490         unsigned long buf_size;
1491
1492         if (!str)
1493                 return 0;
1494         buf_size = memparse(str, &str);
1495         /*
1496          * nr_entries can not be zero and the startup
1497          * tests require some buffer space. Therefore
1498          * ensure we have at least 4096 bytes of buffer.
1499          */
1500         trace_buf_size = max(4096UL, buf_size);
1501         return 1;
1502 }
1503 __setup("trace_buf_size=", set_buf_size);
1504
1505 static int __init set_tracing_thresh(char *str)
1506 {
1507         unsigned long threshold;
1508         int ret;
1509
1510         if (!str)
1511                 return 0;
1512         ret = kstrtoul(str, 0, &threshold);
1513         if (ret < 0)
1514                 return 0;
1515         tracing_thresh = threshold * 1000;
1516         return 1;
1517 }
1518 __setup("tracing_thresh=", set_tracing_thresh);
1519
1520 unsigned long nsecs_to_usecs(unsigned long nsecs)
1521 {
1522         return nsecs / 1000;
1523 }
1524
1525 /*
1526  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1527  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1528  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1529  * of strings in the order that the evals (enum) were defined.
1530  */
1531 #undef C
1532 #define C(a, b) b
1533
1534 /* These must match the bit positions in trace_iterator_flags */
1535 static const char *trace_options[] = {
1536         TRACE_FLAGS
1537         NULL
1538 };
1539
1540 static struct {
1541         u64 (*func)(void);
1542         const char *name;
1543         int in_ns;              /* is this clock in nanoseconds? */
1544 } trace_clocks[] = {
1545         { trace_clock_local,            "local",        1 },
1546         { trace_clock_global,           "global",       1 },
1547         { trace_clock_counter,          "counter",      0 },
1548         { trace_clock_jiffies,          "uptime",       0 },
1549         { trace_clock,                  "perf",         1 },
1550         { ktime_get_mono_fast_ns,       "mono",         1 },
1551         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1552         { ktime_get_boot_fast_ns,       "boot",         1 },
1553         { ktime_get_tai_fast_ns,        "tai",          1 },
1554         ARCH_TRACE_CLOCKS
1555 };
1556
1557 bool trace_clock_in_ns(struct trace_array *tr)
1558 {
1559         if (trace_clocks[tr->clock_id].in_ns)
1560                 return true;
1561
1562         return false;
1563 }
1564
1565 /*
1566  * trace_parser_get_init - gets the buffer for trace parser
1567  */
1568 int trace_parser_get_init(struct trace_parser *parser, int size)
1569 {
1570         memset(parser, 0, sizeof(*parser));
1571
1572         parser->buffer = kmalloc(size, GFP_KERNEL);
1573         if (!parser->buffer)
1574                 return 1;
1575
1576         parser->size = size;
1577         return 0;
1578 }
1579
1580 /*
1581  * trace_parser_put - frees the buffer for trace parser
1582  */
1583 void trace_parser_put(struct trace_parser *parser)
1584 {
1585         kfree(parser->buffer);
1586         parser->buffer = NULL;
1587 }
1588
1589 /*
1590  * trace_get_user - reads the user input string separated by  space
1591  * (matched by isspace(ch))
1592  *
1593  * For each string found the 'struct trace_parser' is updated,
1594  * and the function returns.
1595  *
1596  * Returns number of bytes read.
1597  *
1598  * See kernel/trace/trace.h for 'struct trace_parser' details.
1599  */
1600 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1601         size_t cnt, loff_t *ppos)
1602 {
1603         char ch;
1604         size_t read = 0;
1605         ssize_t ret;
1606
1607         if (!*ppos)
1608                 trace_parser_clear(parser);
1609
1610         ret = get_user(ch, ubuf++);
1611         if (ret)
1612                 goto out;
1613
1614         read++;
1615         cnt--;
1616
1617         /*
1618          * The parser is not finished with the last write,
1619          * continue reading the user input without skipping spaces.
1620          */
1621         if (!parser->cont) {
1622                 /* skip white space */
1623                 while (cnt && isspace(ch)) {
1624                         ret = get_user(ch, ubuf++);
1625                         if (ret)
1626                                 goto out;
1627                         read++;
1628                         cnt--;
1629                 }
1630
1631                 parser->idx = 0;
1632
1633                 /* only spaces were written */
1634                 if (isspace(ch) || !ch) {
1635                         *ppos += read;
1636                         ret = read;
1637                         goto out;
1638                 }
1639         }
1640
1641         /* read the non-space input */
1642         while (cnt && !isspace(ch) && ch) {
1643                 if (parser->idx < parser->size - 1)
1644                         parser->buffer[parser->idx++] = ch;
1645                 else {
1646                         ret = -EINVAL;
1647                         goto out;
1648                 }
1649                 ret = get_user(ch, ubuf++);
1650                 if (ret)
1651                         goto out;
1652                 read++;
1653                 cnt--;
1654         }
1655
1656         /* We either got finished input or we have to wait for another call. */
1657         if (isspace(ch) || !ch) {
1658                 parser->buffer[parser->idx] = 0;
1659                 parser->cont = false;
1660         } else if (parser->idx < parser->size - 1) {
1661                 parser->cont = true;
1662                 parser->buffer[parser->idx++] = ch;
1663                 /* Make sure the parsed string always terminates with '\0'. */
1664                 parser->buffer[parser->idx] = 0;
1665         } else {
1666                 ret = -EINVAL;
1667                 goto out;
1668         }
1669
1670         *ppos += read;
1671         ret = read;
1672
1673 out:
1674         return ret;
1675 }
1676
1677 /* TODO add a seq_buf_to_buffer() */
1678 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1679 {
1680         int len;
1681
1682         if (trace_seq_used(s) <= s->seq.readpos)
1683                 return -EBUSY;
1684
1685         len = trace_seq_used(s) - s->seq.readpos;
1686         if (cnt > len)
1687                 cnt = len;
1688         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1689
1690         s->seq.readpos += cnt;
1691         return cnt;
1692 }
1693
1694 unsigned long __read_mostly     tracing_thresh;
1695 static const struct file_operations tracing_max_lat_fops;
1696
1697 #ifdef LATENCY_FS_NOTIFY
1698
1699 static struct workqueue_struct *fsnotify_wq;
1700
1701 static void latency_fsnotify_workfn(struct work_struct *work)
1702 {
1703         struct trace_array *tr = container_of(work, struct trace_array,
1704                                               fsnotify_work);
1705         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1706 }
1707
1708 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1709 {
1710         struct trace_array *tr = container_of(iwork, struct trace_array,
1711                                               fsnotify_irqwork);
1712         queue_work(fsnotify_wq, &tr->fsnotify_work);
1713 }
1714
1715 static void trace_create_maxlat_file(struct trace_array *tr,
1716                                      struct dentry *d_tracer)
1717 {
1718         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1719         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1720         tr->d_max_latency = trace_create_file("tracing_max_latency",
1721                                               TRACE_MODE_WRITE,
1722                                               d_tracer, &tr->max_latency,
1723                                               &tracing_max_lat_fops);
1724 }
1725
1726 __init static int latency_fsnotify_init(void)
1727 {
1728         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1729                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1730         if (!fsnotify_wq) {
1731                 pr_err("Unable to allocate tr_max_lat_wq\n");
1732                 return -ENOMEM;
1733         }
1734         return 0;
1735 }
1736
1737 late_initcall_sync(latency_fsnotify_init);
1738
1739 void latency_fsnotify(struct trace_array *tr)
1740 {
1741         if (!fsnotify_wq)
1742                 return;
1743         /*
1744          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1745          * possible that we are called from __schedule() or do_idle(), which
1746          * could cause a deadlock.
1747          */
1748         irq_work_queue(&tr->fsnotify_irqwork);
1749 }
1750
1751 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)  \
1752         || defined(CONFIG_OSNOISE_TRACER)
1753
1754 #define trace_create_maxlat_file(tr, d_tracer)                          \
1755         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1756                           d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1757
1758 #else
1759 #define trace_create_maxlat_file(tr, d_tracer)   do { } while (0)
1760 #endif
1761
1762 #ifdef CONFIG_TRACER_MAX_TRACE
1763 /*
1764  * Copy the new maximum trace into the separate maximum-trace
1765  * structure. (this way the maximum trace is permanently saved,
1766  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1767  */
1768 static void
1769 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1770 {
1771         struct array_buffer *trace_buf = &tr->array_buffer;
1772         struct array_buffer *max_buf = &tr->max_buffer;
1773         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1774         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1775
1776         max_buf->cpu = cpu;
1777         max_buf->time_start = data->preempt_timestamp;
1778
1779         max_data->saved_latency = tr->max_latency;
1780         max_data->critical_start = data->critical_start;
1781         max_data->critical_end = data->critical_end;
1782
1783         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1784         max_data->pid = tsk->pid;
1785         /*
1786          * If tsk == current, then use current_uid(), as that does not use
1787          * RCU. The irq tracer can be called out of RCU scope.
1788          */
1789         if (tsk == current)
1790                 max_data->uid = current_uid();
1791         else
1792                 max_data->uid = task_uid(tsk);
1793
1794         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1795         max_data->policy = tsk->policy;
1796         max_data->rt_priority = tsk->rt_priority;
1797
1798         /* record this tasks comm */
1799         tracing_record_cmdline(tsk);
1800         latency_fsnotify(tr);
1801 }
1802
1803 /**
1804  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1805  * @tr: tracer
1806  * @tsk: the task with the latency
1807  * @cpu: The cpu that initiated the trace.
1808  * @cond_data: User data associated with a conditional snapshot
1809  *
1810  * Flip the buffers between the @tr and the max_tr and record information
1811  * about which task was the cause of this latency.
1812  */
1813 void
1814 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1815               void *cond_data)
1816 {
1817         if (tr->stop_count)
1818                 return;
1819
1820         WARN_ON_ONCE(!irqs_disabled());
1821
1822         if (!tr->allocated_snapshot) {
1823                 /* Only the nop tracer should hit this when disabling */
1824                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1825                 return;
1826         }
1827
1828         arch_spin_lock(&tr->max_lock);
1829
1830         /* Inherit the recordable setting from array_buffer */
1831         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1832                 ring_buffer_record_on(tr->max_buffer.buffer);
1833         else
1834                 ring_buffer_record_off(tr->max_buffer.buffer);
1835
1836 #ifdef CONFIG_TRACER_SNAPSHOT
1837         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1838                 goto out_unlock;
1839 #endif
1840         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1841
1842         __update_max_tr(tr, tsk, cpu);
1843
1844  out_unlock:
1845         arch_spin_unlock(&tr->max_lock);
1846 }
1847
1848 /**
1849  * update_max_tr_single - only copy one trace over, and reset the rest
1850  * @tr: tracer
1851  * @tsk: task with the latency
1852  * @cpu: the cpu of the buffer to copy.
1853  *
1854  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1855  */
1856 void
1857 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1858 {
1859         int ret;
1860
1861         if (tr->stop_count)
1862                 return;
1863
1864         WARN_ON_ONCE(!irqs_disabled());
1865         if (!tr->allocated_snapshot) {
1866                 /* Only the nop tracer should hit this when disabling */
1867                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1868                 return;
1869         }
1870
1871         arch_spin_lock(&tr->max_lock);
1872
1873         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1874
1875         if (ret == -EBUSY) {
1876                 /*
1877                  * We failed to swap the buffer due to a commit taking
1878                  * place on this CPU. We fail to record, but we reset
1879                  * the max trace buffer (no one writes directly to it)
1880                  * and flag that it failed.
1881                  */
1882                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1883                         "Failed to swap buffers due to commit in progress\n");
1884         }
1885
1886         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1887
1888         __update_max_tr(tr, tsk, cpu);
1889         arch_spin_unlock(&tr->max_lock);
1890 }
1891 #endif /* CONFIG_TRACER_MAX_TRACE */
1892
1893 static int wait_on_pipe(struct trace_iterator *iter, int full)
1894 {
1895         /* Iterators are static, they should be filled or empty */
1896         if (trace_buffer_iter(iter, iter->cpu_file))
1897                 return 0;
1898
1899         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1900                                 full);
1901 }
1902
1903 #ifdef CONFIG_FTRACE_STARTUP_TEST
1904 static bool selftests_can_run;
1905
1906 struct trace_selftests {
1907         struct list_head                list;
1908         struct tracer                   *type;
1909 };
1910
1911 static LIST_HEAD(postponed_selftests);
1912
1913 static int save_selftest(struct tracer *type)
1914 {
1915         struct trace_selftests *selftest;
1916
1917         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1918         if (!selftest)
1919                 return -ENOMEM;
1920
1921         selftest->type = type;
1922         list_add(&selftest->list, &postponed_selftests);
1923         return 0;
1924 }
1925
1926 static int run_tracer_selftest(struct tracer *type)
1927 {
1928         struct trace_array *tr = &global_trace;
1929         struct tracer *saved_tracer = tr->current_trace;
1930         int ret;
1931
1932         if (!type->selftest || tracing_selftest_disabled)
1933                 return 0;
1934
1935         /*
1936          * If a tracer registers early in boot up (before scheduling is
1937          * initialized and such), then do not run its selftests yet.
1938          * Instead, run it a little later in the boot process.
1939          */
1940         if (!selftests_can_run)
1941                 return save_selftest(type);
1942
1943         if (!tracing_is_on()) {
1944                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1945                         type->name);
1946                 return 0;
1947         }
1948
1949         /*
1950          * Run a selftest on this tracer.
1951          * Here we reset the trace buffer, and set the current
1952          * tracer to be this tracer. The tracer can then run some
1953          * internal tracing to verify that everything is in order.
1954          * If we fail, we do not register this tracer.
1955          */
1956         tracing_reset_online_cpus(&tr->array_buffer);
1957
1958         tr->current_trace = type;
1959
1960 #ifdef CONFIG_TRACER_MAX_TRACE
1961         if (type->use_max_tr) {
1962                 /* If we expanded the buffers, make sure the max is expanded too */
1963                 if (ring_buffer_expanded)
1964                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1965                                            RING_BUFFER_ALL_CPUS);
1966                 tr->allocated_snapshot = true;
1967         }
1968 #endif
1969
1970         /* the test is responsible for initializing and enabling */
1971         pr_info("Testing tracer %s: ", type->name);
1972         ret = type->selftest(type, tr);
1973         /* the test is responsible for resetting too */
1974         tr->current_trace = saved_tracer;
1975         if (ret) {
1976                 printk(KERN_CONT "FAILED!\n");
1977                 /* Add the warning after printing 'FAILED' */
1978                 WARN_ON(1);
1979                 return -1;
1980         }
1981         /* Only reset on passing, to avoid touching corrupted buffers */
1982         tracing_reset_online_cpus(&tr->array_buffer);
1983
1984 #ifdef CONFIG_TRACER_MAX_TRACE
1985         if (type->use_max_tr) {
1986                 tr->allocated_snapshot = false;
1987
1988                 /* Shrink the max buffer again */
1989                 if (ring_buffer_expanded)
1990                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1991                                            RING_BUFFER_ALL_CPUS);
1992         }
1993 #endif
1994
1995         printk(KERN_CONT "PASSED\n");
1996         return 0;
1997 }
1998
1999 static __init int init_trace_selftests(void)
2000 {
2001         struct trace_selftests *p, *n;
2002         struct tracer *t, **last;
2003         int ret;
2004
2005         selftests_can_run = true;
2006
2007         mutex_lock(&trace_types_lock);
2008
2009         if (list_empty(&postponed_selftests))
2010                 goto out;
2011
2012         pr_info("Running postponed tracer tests:\n");
2013
2014         tracing_selftest_running = true;
2015         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2016                 /* This loop can take minutes when sanitizers are enabled, so
2017                  * lets make sure we allow RCU processing.
2018                  */
2019                 cond_resched();
2020                 ret = run_tracer_selftest(p->type);
2021                 /* If the test fails, then warn and remove from available_tracers */
2022                 if (ret < 0) {
2023                         WARN(1, "tracer: %s failed selftest, disabling\n",
2024                              p->type->name);
2025                         last = &trace_types;
2026                         for (t = trace_types; t; t = t->next) {
2027                                 if (t == p->type) {
2028                                         *last = t->next;
2029                                         break;
2030                                 }
2031                                 last = &t->next;
2032                         }
2033                 }
2034                 list_del(&p->list);
2035                 kfree(p);
2036         }
2037         tracing_selftest_running = false;
2038
2039  out:
2040         mutex_unlock(&trace_types_lock);
2041
2042         return 0;
2043 }
2044 core_initcall(init_trace_selftests);
2045 #else
2046 static inline int run_tracer_selftest(struct tracer *type)
2047 {
2048         return 0;
2049 }
2050 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2051
2052 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2053
2054 static void __init apply_trace_boot_options(void);
2055
2056 /**
2057  * register_tracer - register a tracer with the ftrace system.
2058  * @type: the plugin for the tracer
2059  *
2060  * Register a new plugin tracer.
2061  */
2062 int __init register_tracer(struct tracer *type)
2063 {
2064         struct tracer *t;
2065         int ret = 0;
2066
2067         if (!type->name) {
2068                 pr_info("Tracer must have a name\n");
2069                 return -1;
2070         }
2071
2072         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2073                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2074                 return -1;
2075         }
2076
2077         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2078                 pr_warn("Can not register tracer %s due to lockdown\n",
2079                            type->name);
2080                 return -EPERM;
2081         }
2082
2083         mutex_lock(&trace_types_lock);
2084
2085         tracing_selftest_running = true;
2086
2087         for (t = trace_types; t; t = t->next) {
2088                 if (strcmp(type->name, t->name) == 0) {
2089                         /* already found */
2090                         pr_info("Tracer %s already registered\n",
2091                                 type->name);
2092                         ret = -1;
2093                         goto out;
2094                 }
2095         }
2096
2097         if (!type->set_flag)
2098                 type->set_flag = &dummy_set_flag;
2099         if (!type->flags) {
2100                 /*allocate a dummy tracer_flags*/
2101                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2102                 if (!type->flags) {
2103                         ret = -ENOMEM;
2104                         goto out;
2105                 }
2106                 type->flags->val = 0;
2107                 type->flags->opts = dummy_tracer_opt;
2108         } else
2109                 if (!type->flags->opts)
2110                         type->flags->opts = dummy_tracer_opt;
2111
2112         /* store the tracer for __set_tracer_option */
2113         type->flags->trace = type;
2114
2115         ret = run_tracer_selftest(type);
2116         if (ret < 0)
2117                 goto out;
2118
2119         type->next = trace_types;
2120         trace_types = type;
2121         add_tracer_options(&global_trace, type);
2122
2123  out:
2124         tracing_selftest_running = false;
2125         mutex_unlock(&trace_types_lock);
2126
2127         if (ret || !default_bootup_tracer)
2128                 goto out_unlock;
2129
2130         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2131                 goto out_unlock;
2132
2133         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2134         /* Do we want this tracer to start on bootup? */
2135         tracing_set_tracer(&global_trace, type->name);
2136         default_bootup_tracer = NULL;
2137
2138         apply_trace_boot_options();
2139
2140         /* disable other selftests, since this will break it. */
2141         disable_tracing_selftest("running a tracer");
2142
2143  out_unlock:
2144         return ret;
2145 }
2146
2147 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2148 {
2149         struct trace_buffer *buffer = buf->buffer;
2150
2151         if (!buffer)
2152                 return;
2153
2154         ring_buffer_record_disable(buffer);
2155
2156         /* Make sure all commits have finished */
2157         synchronize_rcu();
2158         ring_buffer_reset_cpu(buffer, cpu);
2159
2160         ring_buffer_record_enable(buffer);
2161 }
2162
2163 void tracing_reset_online_cpus(struct array_buffer *buf)
2164 {
2165         struct trace_buffer *buffer = buf->buffer;
2166
2167         if (!buffer)
2168                 return;
2169
2170         ring_buffer_record_disable(buffer);
2171
2172         /* Make sure all commits have finished */
2173         synchronize_rcu();
2174
2175         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2176
2177         ring_buffer_reset_online_cpus(buffer);
2178
2179         ring_buffer_record_enable(buffer);
2180 }
2181
2182 /* Must have trace_types_lock held */
2183 void tracing_reset_all_online_cpus_unlocked(void)
2184 {
2185         struct trace_array *tr;
2186
2187         lockdep_assert_held(&trace_types_lock);
2188
2189         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2190                 if (!tr->clear_trace)
2191                         continue;
2192                 tr->clear_trace = false;
2193                 tracing_reset_online_cpus(&tr->array_buffer);
2194 #ifdef CONFIG_TRACER_MAX_TRACE
2195                 tracing_reset_online_cpus(&tr->max_buffer);
2196 #endif
2197         }
2198 }
2199
2200 void tracing_reset_all_online_cpus(void)
2201 {
2202         mutex_lock(&trace_types_lock);
2203         tracing_reset_all_online_cpus_unlocked();
2204         mutex_unlock(&trace_types_lock);
2205 }
2206
2207 /*
2208  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2209  * is the tgid last observed corresponding to pid=i.
2210  */
2211 static int *tgid_map;
2212
2213 /* The maximum valid index into tgid_map. */
2214 static size_t tgid_map_max;
2215
2216 #define SAVED_CMDLINES_DEFAULT 128
2217 #define NO_CMDLINE_MAP UINT_MAX
2218 /*
2219  * Preemption must be disabled before acquiring trace_cmdline_lock.
2220  * The various trace_arrays' max_lock must be acquired in a context
2221  * where interrupt is disabled.
2222  */
2223 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2224 struct saved_cmdlines_buffer {
2225         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2226         unsigned *map_cmdline_to_pid;
2227         unsigned cmdline_num;
2228         int cmdline_idx;
2229         char *saved_cmdlines;
2230 };
2231 static struct saved_cmdlines_buffer *savedcmd;
2232
2233 static inline char *get_saved_cmdlines(int idx)
2234 {
2235         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2236 }
2237
2238 static inline void set_cmdline(int idx, const char *cmdline)
2239 {
2240         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2241 }
2242
2243 static int allocate_cmdlines_buffer(unsigned int val,
2244                                     struct saved_cmdlines_buffer *s)
2245 {
2246         s->map_cmdline_to_pid = kmalloc_array(val,
2247                                               sizeof(*s->map_cmdline_to_pid),
2248                                               GFP_KERNEL);
2249         if (!s->map_cmdline_to_pid)
2250                 return -ENOMEM;
2251
2252         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2253         if (!s->saved_cmdlines) {
2254                 kfree(s->map_cmdline_to_pid);
2255                 return -ENOMEM;
2256         }
2257
2258         s->cmdline_idx = 0;
2259         s->cmdline_num = val;
2260         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2261                sizeof(s->map_pid_to_cmdline));
2262         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2263                val * sizeof(*s->map_cmdline_to_pid));
2264
2265         return 0;
2266 }
2267
2268 static int trace_create_savedcmd(void)
2269 {
2270         int ret;
2271
2272         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2273         if (!savedcmd)
2274                 return -ENOMEM;
2275
2276         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2277         if (ret < 0) {
2278                 kfree(savedcmd);
2279                 savedcmd = NULL;
2280                 return -ENOMEM;
2281         }
2282
2283         return 0;
2284 }
2285
2286 int is_tracing_stopped(void)
2287 {
2288         return global_trace.stop_count;
2289 }
2290
2291 /**
2292  * tracing_start - quick start of the tracer
2293  *
2294  * If tracing is enabled but was stopped by tracing_stop,
2295  * this will start the tracer back up.
2296  */
2297 void tracing_start(void)
2298 {
2299         struct trace_buffer *buffer;
2300         unsigned long flags;
2301
2302         if (tracing_disabled)
2303                 return;
2304
2305         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2306         if (--global_trace.stop_count) {
2307                 if (global_trace.stop_count < 0) {
2308                         /* Someone screwed up their debugging */
2309                         WARN_ON_ONCE(1);
2310                         global_trace.stop_count = 0;
2311                 }
2312                 goto out;
2313         }
2314
2315         /* Prevent the buffers from switching */
2316         arch_spin_lock(&global_trace.max_lock);
2317
2318         buffer = global_trace.array_buffer.buffer;
2319         if (buffer)
2320                 ring_buffer_record_enable(buffer);
2321
2322 #ifdef CONFIG_TRACER_MAX_TRACE
2323         buffer = global_trace.max_buffer.buffer;
2324         if (buffer)
2325                 ring_buffer_record_enable(buffer);
2326 #endif
2327
2328         arch_spin_unlock(&global_trace.max_lock);
2329
2330  out:
2331         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2332 }
2333
2334 static void tracing_start_tr(struct trace_array *tr)
2335 {
2336         struct trace_buffer *buffer;
2337         unsigned long flags;
2338
2339         if (tracing_disabled)
2340                 return;
2341
2342         /* If global, we need to also start the max tracer */
2343         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2344                 return tracing_start();
2345
2346         raw_spin_lock_irqsave(&tr->start_lock, flags);
2347
2348         if (--tr->stop_count) {
2349                 if (tr->stop_count < 0) {
2350                         /* Someone screwed up their debugging */
2351                         WARN_ON_ONCE(1);
2352                         tr->stop_count = 0;
2353                 }
2354                 goto out;
2355         }
2356
2357         buffer = tr->array_buffer.buffer;
2358         if (buffer)
2359                 ring_buffer_record_enable(buffer);
2360
2361  out:
2362         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2363 }
2364
2365 /**
2366  * tracing_stop - quick stop of the tracer
2367  *
2368  * Light weight way to stop tracing. Use in conjunction with
2369  * tracing_start.
2370  */
2371 void tracing_stop(void)
2372 {
2373         struct trace_buffer *buffer;
2374         unsigned long flags;
2375
2376         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2377         if (global_trace.stop_count++)
2378                 goto out;
2379
2380         /* Prevent the buffers from switching */
2381         arch_spin_lock(&global_trace.max_lock);
2382
2383         buffer = global_trace.array_buffer.buffer;
2384         if (buffer)
2385                 ring_buffer_record_disable(buffer);
2386
2387 #ifdef CONFIG_TRACER_MAX_TRACE
2388         buffer = global_trace.max_buffer.buffer;
2389         if (buffer)
2390                 ring_buffer_record_disable(buffer);
2391 #endif
2392
2393         arch_spin_unlock(&global_trace.max_lock);
2394
2395  out:
2396         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2397 }
2398
2399 static void tracing_stop_tr(struct trace_array *tr)
2400 {
2401         struct trace_buffer *buffer;
2402         unsigned long flags;
2403
2404         /* If global, we need to also stop the max tracer */
2405         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2406                 return tracing_stop();
2407
2408         raw_spin_lock_irqsave(&tr->start_lock, flags);
2409         if (tr->stop_count++)
2410                 goto out;
2411
2412         buffer = tr->array_buffer.buffer;
2413         if (buffer)
2414                 ring_buffer_record_disable(buffer);
2415
2416  out:
2417         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2418 }
2419
2420 static int trace_save_cmdline(struct task_struct *tsk)
2421 {
2422         unsigned tpid, idx;
2423
2424         /* treat recording of idle task as a success */
2425         if (!tsk->pid)
2426                 return 1;
2427
2428         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2429
2430         /*
2431          * It's not the end of the world if we don't get
2432          * the lock, but we also don't want to spin
2433          * nor do we want to disable interrupts,
2434          * so if we miss here, then better luck next time.
2435          *
2436          * This is called within the scheduler and wake up, so interrupts
2437          * had better been disabled and run queue lock been held.
2438          */
2439         lockdep_assert_preemption_disabled();
2440         if (!arch_spin_trylock(&trace_cmdline_lock))
2441                 return 0;
2442
2443         idx = savedcmd->map_pid_to_cmdline[tpid];
2444         if (idx == NO_CMDLINE_MAP) {
2445                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2446
2447                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2448                 savedcmd->cmdline_idx = idx;
2449         }
2450
2451         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2452         set_cmdline(idx, tsk->comm);
2453
2454         arch_spin_unlock(&trace_cmdline_lock);
2455
2456         return 1;
2457 }
2458
2459 static void __trace_find_cmdline(int pid, char comm[])
2460 {
2461         unsigned map;
2462         int tpid;
2463
2464         if (!pid) {
2465                 strcpy(comm, "<idle>");
2466                 return;
2467         }
2468
2469         if (WARN_ON_ONCE(pid < 0)) {
2470                 strcpy(comm, "<XXX>");
2471                 return;
2472         }
2473
2474         tpid = pid & (PID_MAX_DEFAULT - 1);
2475         map = savedcmd->map_pid_to_cmdline[tpid];
2476         if (map != NO_CMDLINE_MAP) {
2477                 tpid = savedcmd->map_cmdline_to_pid[map];
2478                 if (tpid == pid) {
2479                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2480                         return;
2481                 }
2482         }
2483         strcpy(comm, "<...>");
2484 }
2485
2486 void trace_find_cmdline(int pid, char comm[])
2487 {
2488         preempt_disable();
2489         arch_spin_lock(&trace_cmdline_lock);
2490
2491         __trace_find_cmdline(pid, comm);
2492
2493         arch_spin_unlock(&trace_cmdline_lock);
2494         preempt_enable();
2495 }
2496
2497 static int *trace_find_tgid_ptr(int pid)
2498 {
2499         /*
2500          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2501          * if we observe a non-NULL tgid_map then we also observe the correct
2502          * tgid_map_max.
2503          */
2504         int *map = smp_load_acquire(&tgid_map);
2505
2506         if (unlikely(!map || pid > tgid_map_max))
2507                 return NULL;
2508
2509         return &map[pid];
2510 }
2511
2512 int trace_find_tgid(int pid)
2513 {
2514         int *ptr = trace_find_tgid_ptr(pid);
2515
2516         return ptr ? *ptr : 0;
2517 }
2518
2519 static int trace_save_tgid(struct task_struct *tsk)
2520 {
2521         int *ptr;
2522
2523         /* treat recording of idle task as a success */
2524         if (!tsk->pid)
2525                 return 1;
2526
2527         ptr = trace_find_tgid_ptr(tsk->pid);
2528         if (!ptr)
2529                 return 0;
2530
2531         *ptr = tsk->tgid;
2532         return 1;
2533 }
2534
2535 static bool tracing_record_taskinfo_skip(int flags)
2536 {
2537         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2538                 return true;
2539         if (!__this_cpu_read(trace_taskinfo_save))
2540                 return true;
2541         return false;
2542 }
2543
2544 /**
2545  * tracing_record_taskinfo - record the task info of a task
2546  *
2547  * @task:  task to record
2548  * @flags: TRACE_RECORD_CMDLINE for recording comm
2549  *         TRACE_RECORD_TGID for recording tgid
2550  */
2551 void tracing_record_taskinfo(struct task_struct *task, int flags)
2552 {
2553         bool done;
2554
2555         if (tracing_record_taskinfo_skip(flags))
2556                 return;
2557
2558         /*
2559          * Record as much task information as possible. If some fail, continue
2560          * to try to record the others.
2561          */
2562         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2563         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2564
2565         /* If recording any information failed, retry again soon. */
2566         if (!done)
2567                 return;
2568
2569         __this_cpu_write(trace_taskinfo_save, false);
2570 }
2571
2572 /**
2573  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2574  *
2575  * @prev: previous task during sched_switch
2576  * @next: next task during sched_switch
2577  * @flags: TRACE_RECORD_CMDLINE for recording comm
2578  *         TRACE_RECORD_TGID for recording tgid
2579  */
2580 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2581                                           struct task_struct *next, int flags)
2582 {
2583         bool done;
2584
2585         if (tracing_record_taskinfo_skip(flags))
2586                 return;
2587
2588         /*
2589          * Record as much task information as possible. If some fail, continue
2590          * to try to record the others.
2591          */
2592         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2593         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2594         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2595         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2596
2597         /* If recording any information failed, retry again soon. */
2598         if (!done)
2599                 return;
2600
2601         __this_cpu_write(trace_taskinfo_save, false);
2602 }
2603
2604 /* Helpers to record a specific task information */
2605 void tracing_record_cmdline(struct task_struct *task)
2606 {
2607         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2608 }
2609
2610 void tracing_record_tgid(struct task_struct *task)
2611 {
2612         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2613 }
2614
2615 /*
2616  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2617  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2618  * simplifies those functions and keeps them in sync.
2619  */
2620 enum print_line_t trace_handle_return(struct trace_seq *s)
2621 {
2622         return trace_seq_has_overflowed(s) ?
2623                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2624 }
2625 EXPORT_SYMBOL_GPL(trace_handle_return);
2626
2627 static unsigned short migration_disable_value(void)
2628 {
2629 #if defined(CONFIG_SMP)
2630         return current->migration_disabled;
2631 #else
2632         return 0;
2633 #endif
2634 }
2635
2636 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2637 {
2638         unsigned int trace_flags = irqs_status;
2639         unsigned int pc;
2640
2641         pc = preempt_count();
2642
2643         if (pc & NMI_MASK)
2644                 trace_flags |= TRACE_FLAG_NMI;
2645         if (pc & HARDIRQ_MASK)
2646                 trace_flags |= TRACE_FLAG_HARDIRQ;
2647         if (in_serving_softirq())
2648                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2649         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2650                 trace_flags |= TRACE_FLAG_BH_OFF;
2651
2652         if (tif_need_resched())
2653                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2654         if (test_preempt_need_resched())
2655                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2656         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2657                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2658 }
2659
2660 struct ring_buffer_event *
2661 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2662                           int type,
2663                           unsigned long len,
2664                           unsigned int trace_ctx)
2665 {
2666         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2667 }
2668
2669 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2670 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2671 static int trace_buffered_event_ref;
2672
2673 /**
2674  * trace_buffered_event_enable - enable buffering events
2675  *
2676  * When events are being filtered, it is quicker to use a temporary
2677  * buffer to write the event data into if there's a likely chance
2678  * that it will not be committed. The discard of the ring buffer
2679  * is not as fast as committing, and is much slower than copying
2680  * a commit.
2681  *
2682  * When an event is to be filtered, allocate per cpu buffers to
2683  * write the event data into, and if the event is filtered and discarded
2684  * it is simply dropped, otherwise, the entire data is to be committed
2685  * in one shot.
2686  */
2687 void trace_buffered_event_enable(void)
2688 {
2689         struct ring_buffer_event *event;
2690         struct page *page;
2691         int cpu;
2692
2693         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2694
2695         if (trace_buffered_event_ref++)
2696                 return;
2697
2698         for_each_tracing_cpu(cpu) {
2699                 page = alloc_pages_node(cpu_to_node(cpu),
2700                                         GFP_KERNEL | __GFP_NORETRY, 0);
2701                 if (!page)
2702                         goto failed;
2703
2704                 event = page_address(page);
2705                 memset(event, 0, sizeof(*event));
2706
2707                 per_cpu(trace_buffered_event, cpu) = event;
2708
2709                 preempt_disable();
2710                 if (cpu == smp_processor_id() &&
2711                     __this_cpu_read(trace_buffered_event) !=
2712                     per_cpu(trace_buffered_event, cpu))
2713                         WARN_ON_ONCE(1);
2714                 preempt_enable();
2715         }
2716
2717         return;
2718  failed:
2719         trace_buffered_event_disable();
2720 }
2721
2722 static void enable_trace_buffered_event(void *data)
2723 {
2724         /* Probably not needed, but do it anyway */
2725         smp_rmb();
2726         this_cpu_dec(trace_buffered_event_cnt);
2727 }
2728
2729 static void disable_trace_buffered_event(void *data)
2730 {
2731         this_cpu_inc(trace_buffered_event_cnt);
2732 }
2733
2734 /**
2735  * trace_buffered_event_disable - disable buffering events
2736  *
2737  * When a filter is removed, it is faster to not use the buffered
2738  * events, and to commit directly into the ring buffer. Free up
2739  * the temp buffers when there are no more users. This requires
2740  * special synchronization with current events.
2741  */
2742 void trace_buffered_event_disable(void)
2743 {
2744         int cpu;
2745
2746         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2747
2748         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2749                 return;
2750
2751         if (--trace_buffered_event_ref)
2752                 return;
2753
2754         preempt_disable();
2755         /* For each CPU, set the buffer as used. */
2756         smp_call_function_many(tracing_buffer_mask,
2757                                disable_trace_buffered_event, NULL, 1);
2758         preempt_enable();
2759
2760         /* Wait for all current users to finish */
2761         synchronize_rcu();
2762
2763         for_each_tracing_cpu(cpu) {
2764                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2765                 per_cpu(trace_buffered_event, cpu) = NULL;
2766         }
2767         /*
2768          * Make sure trace_buffered_event is NULL before clearing
2769          * trace_buffered_event_cnt.
2770          */
2771         smp_wmb();
2772
2773         preempt_disable();
2774         /* Do the work on each cpu */
2775         smp_call_function_many(tracing_buffer_mask,
2776                                enable_trace_buffered_event, NULL, 1);
2777         preempt_enable();
2778 }
2779
2780 static struct trace_buffer *temp_buffer;
2781
2782 struct ring_buffer_event *
2783 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2784                           struct trace_event_file *trace_file,
2785                           int type, unsigned long len,
2786                           unsigned int trace_ctx)
2787 {
2788         struct ring_buffer_event *entry;
2789         struct trace_array *tr = trace_file->tr;
2790         int val;
2791
2792         *current_rb = tr->array_buffer.buffer;
2793
2794         if (!tr->no_filter_buffering_ref &&
2795             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2796                 preempt_disable_notrace();
2797                 /*
2798                  * Filtering is on, so try to use the per cpu buffer first.
2799                  * This buffer will simulate a ring_buffer_event,
2800                  * where the type_len is zero and the array[0] will
2801                  * hold the full length.
2802                  * (see include/linux/ring-buffer.h for details on
2803                  *  how the ring_buffer_event is structured).
2804                  *
2805                  * Using a temp buffer during filtering and copying it
2806                  * on a matched filter is quicker than writing directly
2807                  * into the ring buffer and then discarding it when
2808                  * it doesn't match. That is because the discard
2809                  * requires several atomic operations to get right.
2810                  * Copying on match and doing nothing on a failed match
2811                  * is still quicker than no copy on match, but having
2812                  * to discard out of the ring buffer on a failed match.
2813                  */
2814                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2815                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2816
2817                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2818
2819                         /*
2820                          * Preemption is disabled, but interrupts and NMIs
2821                          * can still come in now. If that happens after
2822                          * the above increment, then it will have to go
2823                          * back to the old method of allocating the event
2824                          * on the ring buffer, and if the filter fails, it
2825                          * will have to call ring_buffer_discard_commit()
2826                          * to remove it.
2827                          *
2828                          * Need to also check the unlikely case that the
2829                          * length is bigger than the temp buffer size.
2830                          * If that happens, then the reserve is pretty much
2831                          * guaranteed to fail, as the ring buffer currently
2832                          * only allows events less than a page. But that may
2833                          * change in the future, so let the ring buffer reserve
2834                          * handle the failure in that case.
2835                          */
2836                         if (val == 1 && likely(len <= max_len)) {
2837                                 trace_event_setup(entry, type, trace_ctx);
2838                                 entry->array[0] = len;
2839                                 /* Return with preemption disabled */
2840                                 return entry;
2841                         }
2842                         this_cpu_dec(trace_buffered_event_cnt);
2843                 }
2844                 /* __trace_buffer_lock_reserve() disables preemption */
2845                 preempt_enable_notrace();
2846         }
2847
2848         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2849                                             trace_ctx);
2850         /*
2851          * If tracing is off, but we have triggers enabled
2852          * we still need to look at the event data. Use the temp_buffer
2853          * to store the trace event for the trigger to use. It's recursive
2854          * safe and will not be recorded anywhere.
2855          */
2856         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2857                 *current_rb = temp_buffer;
2858                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2859                                                     trace_ctx);
2860         }
2861         return entry;
2862 }
2863 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2864
2865 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2866 static DEFINE_MUTEX(tracepoint_printk_mutex);
2867
2868 static void output_printk(struct trace_event_buffer *fbuffer)
2869 {
2870         struct trace_event_call *event_call;
2871         struct trace_event_file *file;
2872         struct trace_event *event;
2873         unsigned long flags;
2874         struct trace_iterator *iter = tracepoint_print_iter;
2875
2876         /* We should never get here if iter is NULL */
2877         if (WARN_ON_ONCE(!iter))
2878                 return;
2879
2880         event_call = fbuffer->trace_file->event_call;
2881         if (!event_call || !event_call->event.funcs ||
2882             !event_call->event.funcs->trace)
2883                 return;
2884
2885         file = fbuffer->trace_file;
2886         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2887             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2888              !filter_match_preds(file->filter, fbuffer->entry)))
2889                 return;
2890
2891         event = &fbuffer->trace_file->event_call->event;
2892
2893         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2894         trace_seq_init(&iter->seq);
2895         iter->ent = fbuffer->entry;
2896         event_call->event.funcs->trace(iter, 0, event);
2897         trace_seq_putc(&iter->seq, 0);
2898         printk("%s", iter->seq.buffer);
2899
2900         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2901 }
2902
2903 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2904                              void *buffer, size_t *lenp,
2905                              loff_t *ppos)
2906 {
2907         int save_tracepoint_printk;
2908         int ret;
2909
2910         mutex_lock(&tracepoint_printk_mutex);
2911         save_tracepoint_printk = tracepoint_printk;
2912
2913         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2914
2915         /*
2916          * This will force exiting early, as tracepoint_printk
2917          * is always zero when tracepoint_printk_iter is not allocated
2918          */
2919         if (!tracepoint_print_iter)
2920                 tracepoint_printk = 0;
2921
2922         if (save_tracepoint_printk == tracepoint_printk)
2923                 goto out;
2924
2925         if (tracepoint_printk)
2926                 static_key_enable(&tracepoint_printk_key.key);
2927         else
2928                 static_key_disable(&tracepoint_printk_key.key);
2929
2930  out:
2931         mutex_unlock(&tracepoint_printk_mutex);
2932
2933         return ret;
2934 }
2935
2936 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2937 {
2938         enum event_trigger_type tt = ETT_NONE;
2939         struct trace_event_file *file = fbuffer->trace_file;
2940
2941         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2942                         fbuffer->entry, &tt))
2943                 goto discard;
2944
2945         if (static_key_false(&tracepoint_printk_key.key))
2946                 output_printk(fbuffer);
2947
2948         if (static_branch_unlikely(&trace_event_exports_enabled))
2949                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2950
2951         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2952                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2953
2954 discard:
2955         if (tt)
2956                 event_triggers_post_call(file, tt);
2957
2958 }
2959 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2960
2961 /*
2962  * Skip 3:
2963  *
2964  *   trace_buffer_unlock_commit_regs()
2965  *   trace_event_buffer_commit()
2966  *   trace_event_raw_event_xxx()
2967  */
2968 # define STACK_SKIP 3
2969
2970 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2971                                      struct trace_buffer *buffer,
2972                                      struct ring_buffer_event *event,
2973                                      unsigned int trace_ctx,
2974                                      struct pt_regs *regs)
2975 {
2976         __buffer_unlock_commit(buffer, event);
2977
2978         /*
2979          * If regs is not set, then skip the necessary functions.
2980          * Note, we can still get here via blktrace, wakeup tracer
2981          * and mmiotrace, but that's ok if they lose a function or
2982          * two. They are not that meaningful.
2983          */
2984         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2985         ftrace_trace_userstack(tr, buffer, trace_ctx);
2986 }
2987
2988 /*
2989  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2990  */
2991 void
2992 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2993                                    struct ring_buffer_event *event)
2994 {
2995         __buffer_unlock_commit(buffer, event);
2996 }
2997
2998 void
2999 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3000                parent_ip, unsigned int trace_ctx)
3001 {
3002         struct trace_event_call *call = &event_function;
3003         struct trace_buffer *buffer = tr->array_buffer.buffer;
3004         struct ring_buffer_event *event;
3005         struct ftrace_entry *entry;
3006
3007         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3008                                             trace_ctx);
3009         if (!event)
3010                 return;
3011         entry   = ring_buffer_event_data(event);
3012         entry->ip                       = ip;
3013         entry->parent_ip                = parent_ip;
3014
3015         if (!call_filter_check_discard(call, entry, buffer, event)) {
3016                 if (static_branch_unlikely(&trace_function_exports_enabled))
3017                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3018                 __buffer_unlock_commit(buffer, event);
3019         }
3020 }
3021
3022 #ifdef CONFIG_STACKTRACE
3023
3024 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3025 #define FTRACE_KSTACK_NESTING   4
3026
3027 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3028
3029 struct ftrace_stack {
3030         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3031 };
3032
3033
3034 struct ftrace_stacks {
3035         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3036 };
3037
3038 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3039 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3040
3041 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3042                                  unsigned int trace_ctx,
3043                                  int skip, struct pt_regs *regs)
3044 {
3045         struct trace_event_call *call = &event_kernel_stack;
3046         struct ring_buffer_event *event;
3047         unsigned int size, nr_entries;
3048         struct ftrace_stack *fstack;
3049         struct stack_entry *entry;
3050         int stackidx;
3051
3052         /*
3053          * Add one, for this function and the call to save_stack_trace()
3054          * If regs is set, then these functions will not be in the way.
3055          */
3056 #ifndef CONFIG_UNWINDER_ORC
3057         if (!regs)
3058                 skip++;
3059 #endif
3060
3061         preempt_disable_notrace();
3062
3063         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3064
3065         /* This should never happen. If it does, yell once and skip */
3066         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3067                 goto out;
3068
3069         /*
3070          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3071          * interrupt will either see the value pre increment or post
3072          * increment. If the interrupt happens pre increment it will have
3073          * restored the counter when it returns.  We just need a barrier to
3074          * keep gcc from moving things around.
3075          */
3076         barrier();
3077
3078         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3079         size = ARRAY_SIZE(fstack->calls);
3080
3081         if (regs) {
3082                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3083                                                    size, skip);
3084         } else {
3085                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3086         }
3087
3088         size = nr_entries * sizeof(unsigned long);
3089         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3090                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3091                                     trace_ctx);
3092         if (!event)
3093                 goto out;
3094         entry = ring_buffer_event_data(event);
3095
3096         memcpy(&entry->caller, fstack->calls, size);
3097         entry->size = nr_entries;
3098
3099         if (!call_filter_check_discard(call, entry, buffer, event))
3100                 __buffer_unlock_commit(buffer, event);
3101
3102  out:
3103         /* Again, don't let gcc optimize things here */
3104         barrier();
3105         __this_cpu_dec(ftrace_stack_reserve);
3106         preempt_enable_notrace();
3107
3108 }
3109
3110 static inline void ftrace_trace_stack(struct trace_array *tr,
3111                                       struct trace_buffer *buffer,
3112                                       unsigned int trace_ctx,
3113                                       int skip, struct pt_regs *regs)
3114 {
3115         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3116                 return;
3117
3118         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3119 }
3120
3121 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3122                    int skip)
3123 {
3124         struct trace_buffer *buffer = tr->array_buffer.buffer;
3125
3126         if (rcu_is_watching()) {
3127                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3128                 return;
3129         }
3130
3131         /*
3132          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3133          * but if the above rcu_is_watching() failed, then the NMI
3134          * triggered someplace critical, and ct_irq_enter() should
3135          * not be called from NMI.
3136          */
3137         if (unlikely(in_nmi()))
3138                 return;
3139
3140         ct_irq_enter_irqson();
3141         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3142         ct_irq_exit_irqson();
3143 }
3144
3145 /**
3146  * trace_dump_stack - record a stack back trace in the trace buffer
3147  * @skip: Number of functions to skip (helper handlers)
3148  */
3149 void trace_dump_stack(int skip)
3150 {
3151         if (tracing_disabled || tracing_selftest_running)
3152                 return;
3153
3154 #ifndef CONFIG_UNWINDER_ORC
3155         /* Skip 1 to skip this function. */
3156         skip++;
3157 #endif
3158         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3159                              tracing_gen_ctx(), skip, NULL);
3160 }
3161 EXPORT_SYMBOL_GPL(trace_dump_stack);
3162
3163 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3164 static DEFINE_PER_CPU(int, user_stack_count);
3165
3166 static void
3167 ftrace_trace_userstack(struct trace_array *tr,
3168                        struct trace_buffer *buffer, unsigned int trace_ctx)
3169 {
3170         struct trace_event_call *call = &event_user_stack;
3171         struct ring_buffer_event *event;
3172         struct userstack_entry *entry;
3173
3174         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3175                 return;
3176
3177         /*
3178          * NMIs can not handle page faults, even with fix ups.
3179          * The save user stack can (and often does) fault.
3180          */
3181         if (unlikely(in_nmi()))
3182                 return;
3183
3184         /*
3185          * prevent recursion, since the user stack tracing may
3186          * trigger other kernel events.
3187          */
3188         preempt_disable();
3189         if (__this_cpu_read(user_stack_count))
3190                 goto out;
3191
3192         __this_cpu_inc(user_stack_count);
3193
3194         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3195                                             sizeof(*entry), trace_ctx);
3196         if (!event)
3197                 goto out_drop_count;
3198         entry   = ring_buffer_event_data(event);
3199
3200         entry->tgid             = current->tgid;
3201         memset(&entry->caller, 0, sizeof(entry->caller));
3202
3203         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3204         if (!call_filter_check_discard(call, entry, buffer, event))
3205                 __buffer_unlock_commit(buffer, event);
3206
3207  out_drop_count:
3208         __this_cpu_dec(user_stack_count);
3209  out:
3210         preempt_enable();
3211 }
3212 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3213 static void ftrace_trace_userstack(struct trace_array *tr,
3214                                    struct trace_buffer *buffer,
3215                                    unsigned int trace_ctx)
3216 {
3217 }
3218 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3219
3220 #endif /* CONFIG_STACKTRACE */
3221
3222 static inline void
3223 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3224                           unsigned long long delta)
3225 {
3226         entry->bottom_delta_ts = delta & U32_MAX;
3227         entry->top_delta_ts = (delta >> 32);
3228 }
3229
3230 void trace_last_func_repeats(struct trace_array *tr,
3231                              struct trace_func_repeats *last_info,
3232                              unsigned int trace_ctx)
3233 {
3234         struct trace_buffer *buffer = tr->array_buffer.buffer;
3235         struct func_repeats_entry *entry;
3236         struct ring_buffer_event *event;
3237         u64 delta;
3238
3239         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3240                                             sizeof(*entry), trace_ctx);
3241         if (!event)
3242                 return;
3243
3244         delta = ring_buffer_event_time_stamp(buffer, event) -
3245                 last_info->ts_last_call;
3246
3247         entry = ring_buffer_event_data(event);
3248         entry->ip = last_info->ip;
3249         entry->parent_ip = last_info->parent_ip;
3250         entry->count = last_info->count;
3251         func_repeats_set_delta_ts(entry, delta);
3252
3253         __buffer_unlock_commit(buffer, event);
3254 }
3255
3256 /* created for use with alloc_percpu */
3257 struct trace_buffer_struct {
3258         int nesting;
3259         char buffer[4][TRACE_BUF_SIZE];
3260 };
3261
3262 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3263
3264 /*
3265  * This allows for lockless recording.  If we're nested too deeply, then
3266  * this returns NULL.
3267  */
3268 static char *get_trace_buf(void)
3269 {
3270         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3271
3272         if (!trace_percpu_buffer || buffer->nesting >= 4)
3273                 return NULL;
3274
3275         buffer->nesting++;
3276
3277         /* Interrupts must see nesting incremented before we use the buffer */
3278         barrier();
3279         return &buffer->buffer[buffer->nesting - 1][0];
3280 }
3281
3282 static void put_trace_buf(void)
3283 {
3284         /* Don't let the decrement of nesting leak before this */
3285         barrier();
3286         this_cpu_dec(trace_percpu_buffer->nesting);
3287 }
3288
3289 static int alloc_percpu_trace_buffer(void)
3290 {
3291         struct trace_buffer_struct __percpu *buffers;
3292
3293         if (trace_percpu_buffer)
3294                 return 0;
3295
3296         buffers = alloc_percpu(struct trace_buffer_struct);
3297         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3298                 return -ENOMEM;
3299
3300         trace_percpu_buffer = buffers;
3301         return 0;
3302 }
3303
3304 static int buffers_allocated;
3305
3306 void trace_printk_init_buffers(void)
3307 {
3308         if (buffers_allocated)
3309                 return;
3310
3311         if (alloc_percpu_trace_buffer())
3312                 return;
3313
3314         /* trace_printk() is for debug use only. Don't use it in production. */
3315
3316         pr_warn("\n");
3317         pr_warn("**********************************************************\n");
3318         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3319         pr_warn("**                                                      **\n");
3320         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3321         pr_warn("**                                                      **\n");
3322         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3323         pr_warn("** unsafe for production use.                           **\n");
3324         pr_warn("**                                                      **\n");
3325         pr_warn("** If you see this message and you are not debugging    **\n");
3326         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3327         pr_warn("**                                                      **\n");
3328         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3329         pr_warn("**********************************************************\n");
3330
3331         /* Expand the buffers to set size */
3332         tracing_update_buffers();
3333
3334         buffers_allocated = 1;
3335
3336         /*
3337          * trace_printk_init_buffers() can be called by modules.
3338          * If that happens, then we need to start cmdline recording
3339          * directly here. If the global_trace.buffer is already
3340          * allocated here, then this was called by module code.
3341          */
3342         if (global_trace.array_buffer.buffer)
3343                 tracing_start_cmdline_record();
3344 }
3345 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3346
3347 void trace_printk_start_comm(void)
3348 {
3349         /* Start tracing comms if trace printk is set */
3350         if (!buffers_allocated)
3351                 return;
3352         tracing_start_cmdline_record();
3353 }
3354
3355 static void trace_printk_start_stop_comm(int enabled)
3356 {
3357         if (!buffers_allocated)
3358                 return;
3359
3360         if (enabled)
3361                 tracing_start_cmdline_record();
3362         else
3363                 tracing_stop_cmdline_record();
3364 }
3365
3366 /**
3367  * trace_vbprintk - write binary msg to tracing buffer
3368  * @ip:    The address of the caller
3369  * @fmt:   The string format to write to the buffer
3370  * @args:  Arguments for @fmt
3371  */
3372 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3373 {
3374         struct trace_event_call *call = &event_bprint;
3375         struct ring_buffer_event *event;
3376         struct trace_buffer *buffer;
3377         struct trace_array *tr = &global_trace;
3378         struct bprint_entry *entry;
3379         unsigned int trace_ctx;
3380         char *tbuffer;
3381         int len = 0, size;
3382
3383         if (unlikely(tracing_selftest_running || tracing_disabled))
3384                 return 0;
3385
3386         /* Don't pollute graph traces with trace_vprintk internals */
3387         pause_graph_tracing();
3388
3389         trace_ctx = tracing_gen_ctx();
3390         preempt_disable_notrace();
3391
3392         tbuffer = get_trace_buf();
3393         if (!tbuffer) {
3394                 len = 0;
3395                 goto out_nobuffer;
3396         }
3397
3398         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3399
3400         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3401                 goto out_put;
3402
3403         size = sizeof(*entry) + sizeof(u32) * len;
3404         buffer = tr->array_buffer.buffer;
3405         ring_buffer_nest_start(buffer);
3406         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3407                                             trace_ctx);
3408         if (!event)
3409                 goto out;
3410         entry = ring_buffer_event_data(event);
3411         entry->ip                       = ip;
3412         entry->fmt                      = fmt;
3413
3414         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3415         if (!call_filter_check_discard(call, entry, buffer, event)) {
3416                 __buffer_unlock_commit(buffer, event);
3417                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3418         }
3419
3420 out:
3421         ring_buffer_nest_end(buffer);
3422 out_put:
3423         put_trace_buf();
3424
3425 out_nobuffer:
3426         preempt_enable_notrace();
3427         unpause_graph_tracing();
3428
3429         return len;
3430 }
3431 EXPORT_SYMBOL_GPL(trace_vbprintk);
3432
3433 __printf(3, 0)
3434 static int
3435 __trace_array_vprintk(struct trace_buffer *buffer,
3436                       unsigned long ip, const char *fmt, va_list args)
3437 {
3438         struct trace_event_call *call = &event_print;
3439         struct ring_buffer_event *event;
3440         int len = 0, size;
3441         struct print_entry *entry;
3442         unsigned int trace_ctx;
3443         char *tbuffer;
3444
3445         if (tracing_disabled || tracing_selftest_running)
3446                 return 0;
3447
3448         /* Don't pollute graph traces with trace_vprintk internals */
3449         pause_graph_tracing();
3450
3451         trace_ctx = tracing_gen_ctx();
3452         preempt_disable_notrace();
3453
3454
3455         tbuffer = get_trace_buf();
3456         if (!tbuffer) {
3457                 len = 0;
3458                 goto out_nobuffer;
3459         }
3460
3461         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3462
3463         size = sizeof(*entry) + len + 1;
3464         ring_buffer_nest_start(buffer);
3465         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3466                                             trace_ctx);
3467         if (!event)
3468                 goto out;
3469         entry = ring_buffer_event_data(event);
3470         entry->ip = ip;
3471
3472         memcpy(&entry->buf, tbuffer, len + 1);
3473         if (!call_filter_check_discard(call, entry, buffer, event)) {
3474                 __buffer_unlock_commit(buffer, event);
3475                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3476         }
3477
3478 out:
3479         ring_buffer_nest_end(buffer);
3480         put_trace_buf();
3481
3482 out_nobuffer:
3483         preempt_enable_notrace();
3484         unpause_graph_tracing();
3485
3486         return len;
3487 }
3488
3489 __printf(3, 0)
3490 int trace_array_vprintk(struct trace_array *tr,
3491                         unsigned long ip, const char *fmt, va_list args)
3492 {
3493         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3494 }
3495
3496 /**
3497  * trace_array_printk - Print a message to a specific instance
3498  * @tr: The instance trace_array descriptor
3499  * @ip: The instruction pointer that this is called from.
3500  * @fmt: The format to print (printf format)
3501  *
3502  * If a subsystem sets up its own instance, they have the right to
3503  * printk strings into their tracing instance buffer using this
3504  * function. Note, this function will not write into the top level
3505  * buffer (use trace_printk() for that), as writing into the top level
3506  * buffer should only have events that can be individually disabled.
3507  * trace_printk() is only used for debugging a kernel, and should not
3508  * be ever incorporated in normal use.
3509  *
3510  * trace_array_printk() can be used, as it will not add noise to the
3511  * top level tracing buffer.
3512  *
3513  * Note, trace_array_init_printk() must be called on @tr before this
3514  * can be used.
3515  */
3516 __printf(3, 0)
3517 int trace_array_printk(struct trace_array *tr,
3518                        unsigned long ip, const char *fmt, ...)
3519 {
3520         int ret;
3521         va_list ap;
3522
3523         if (!tr)
3524                 return -ENOENT;
3525
3526         /* This is only allowed for created instances */
3527         if (tr == &global_trace)
3528                 return 0;
3529
3530         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3531                 return 0;
3532
3533         va_start(ap, fmt);
3534         ret = trace_array_vprintk(tr, ip, fmt, ap);
3535         va_end(ap);
3536         return ret;
3537 }
3538 EXPORT_SYMBOL_GPL(trace_array_printk);
3539
3540 /**
3541  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3542  * @tr: The trace array to initialize the buffers for
3543  *
3544  * As trace_array_printk() only writes into instances, they are OK to
3545  * have in the kernel (unlike trace_printk()). This needs to be called
3546  * before trace_array_printk() can be used on a trace_array.
3547  */
3548 int trace_array_init_printk(struct trace_array *tr)
3549 {
3550         if (!tr)
3551                 return -ENOENT;
3552
3553         /* This is only allowed for created instances */
3554         if (tr == &global_trace)
3555                 return -EINVAL;
3556
3557         return alloc_percpu_trace_buffer();
3558 }
3559 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3560
3561 __printf(3, 4)
3562 int trace_array_printk_buf(struct trace_buffer *buffer,
3563                            unsigned long ip, const char *fmt, ...)
3564 {
3565         int ret;
3566         va_list ap;
3567
3568         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3569                 return 0;
3570
3571         va_start(ap, fmt);
3572         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3573         va_end(ap);
3574         return ret;
3575 }
3576
3577 __printf(2, 0)
3578 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3579 {
3580         return trace_array_vprintk(&global_trace, ip, fmt, args);
3581 }
3582 EXPORT_SYMBOL_GPL(trace_vprintk);
3583
3584 static void trace_iterator_increment(struct trace_iterator *iter)
3585 {
3586         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3587
3588         iter->idx++;
3589         if (buf_iter)
3590                 ring_buffer_iter_advance(buf_iter);
3591 }
3592
3593 static struct trace_entry *
3594 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3595                 unsigned long *lost_events)
3596 {
3597         struct ring_buffer_event *event;
3598         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3599
3600         if (buf_iter) {
3601                 event = ring_buffer_iter_peek(buf_iter, ts);
3602                 if (lost_events)
3603                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3604                                 (unsigned long)-1 : 0;
3605         } else {
3606                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3607                                          lost_events);
3608         }
3609
3610         if (event) {
3611                 iter->ent_size = ring_buffer_event_length(event);
3612                 return ring_buffer_event_data(event);
3613         }
3614         iter->ent_size = 0;
3615         return NULL;
3616 }
3617
3618 static struct trace_entry *
3619 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3620                   unsigned long *missing_events, u64 *ent_ts)
3621 {
3622         struct trace_buffer *buffer = iter->array_buffer->buffer;
3623         struct trace_entry *ent, *next = NULL;
3624         unsigned long lost_events = 0, next_lost = 0;
3625         int cpu_file = iter->cpu_file;
3626         u64 next_ts = 0, ts;
3627         int next_cpu = -1;
3628         int next_size = 0;
3629         int cpu;
3630
3631         /*
3632          * If we are in a per_cpu trace file, don't bother by iterating over
3633          * all cpu and peek directly.
3634          */
3635         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3636                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3637                         return NULL;
3638                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3639                 if (ent_cpu)
3640                         *ent_cpu = cpu_file;
3641
3642                 return ent;
3643         }
3644
3645         for_each_tracing_cpu(cpu) {
3646
3647                 if (ring_buffer_empty_cpu(buffer, cpu))
3648                         continue;
3649
3650                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3651
3652                 /*
3653                  * Pick the entry with the smallest timestamp:
3654                  */
3655                 if (ent && (!next || ts < next_ts)) {
3656                         next = ent;
3657                         next_cpu = cpu;
3658                         next_ts = ts;
3659                         next_lost = lost_events;
3660                         next_size = iter->ent_size;
3661                 }
3662         }
3663
3664         iter->ent_size = next_size;
3665
3666         if (ent_cpu)
3667                 *ent_cpu = next_cpu;
3668
3669         if (ent_ts)
3670                 *ent_ts = next_ts;
3671
3672         if (missing_events)
3673                 *missing_events = next_lost;
3674
3675         return next;
3676 }
3677
3678 #define STATIC_FMT_BUF_SIZE     128
3679 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3680
3681 static char *trace_iter_expand_format(struct trace_iterator *iter)
3682 {
3683         char *tmp;
3684
3685         /*
3686          * iter->tr is NULL when used with tp_printk, which makes
3687          * this get called where it is not safe to call krealloc().
3688          */
3689         if (!iter->tr || iter->fmt == static_fmt_buf)
3690                 return NULL;
3691
3692         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3693                        GFP_KERNEL);
3694         if (tmp) {
3695                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3696                 iter->fmt = tmp;
3697         }
3698
3699         return tmp;
3700 }
3701
3702 /* Returns true if the string is safe to dereference from an event */
3703 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3704                            bool star, int len)
3705 {
3706         unsigned long addr = (unsigned long)str;
3707         struct trace_event *trace_event;
3708         struct trace_event_call *event;
3709
3710         /* Ignore strings with no length */
3711         if (star && !len)
3712                 return true;
3713
3714         /* OK if part of the event data */
3715         if ((addr >= (unsigned long)iter->ent) &&
3716             (addr < (unsigned long)iter->ent + iter->ent_size))
3717                 return true;
3718
3719         /* OK if part of the temp seq buffer */
3720         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3721             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3722                 return true;
3723
3724         /* Core rodata can not be freed */
3725         if (is_kernel_rodata(addr))
3726                 return true;
3727
3728         if (trace_is_tracepoint_string(str))
3729                 return true;
3730
3731         /*
3732          * Now this could be a module event, referencing core module
3733          * data, which is OK.
3734          */
3735         if (!iter->ent)
3736                 return false;
3737
3738         trace_event = ftrace_find_event(iter->ent->type);
3739         if (!trace_event)
3740                 return false;
3741
3742         event = container_of(trace_event, struct trace_event_call, event);
3743         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3744                 return false;
3745
3746         /* Would rather have rodata, but this will suffice */
3747         if (within_module_core(addr, event->module))
3748                 return true;
3749
3750         return false;
3751 }
3752
3753 static const char *show_buffer(struct trace_seq *s)
3754 {
3755         struct seq_buf *seq = &s->seq;
3756
3757         seq_buf_terminate(seq);
3758
3759         return seq->buffer;
3760 }
3761
3762 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3763
3764 static int test_can_verify_check(const char *fmt, ...)
3765 {
3766         char buf[16];
3767         va_list ap;
3768         int ret;
3769
3770         /*
3771          * The verifier is dependent on vsnprintf() modifies the va_list
3772          * passed to it, where it is sent as a reference. Some architectures
3773          * (like x86_32) passes it by value, which means that vsnprintf()
3774          * does not modify the va_list passed to it, and the verifier
3775          * would then need to be able to understand all the values that
3776          * vsnprintf can use. If it is passed by value, then the verifier
3777          * is disabled.
3778          */
3779         va_start(ap, fmt);
3780         vsnprintf(buf, 16, "%d", ap);
3781         ret = va_arg(ap, int);
3782         va_end(ap);
3783
3784         return ret;
3785 }
3786
3787 static void test_can_verify(void)
3788 {
3789         if (!test_can_verify_check("%d %d", 0, 1)) {
3790                 pr_info("trace event string verifier disabled\n");
3791                 static_branch_inc(&trace_no_verify);
3792         }
3793 }
3794
3795 /**
3796  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3797  * @iter: The iterator that holds the seq buffer and the event being printed
3798  * @fmt: The format used to print the event
3799  * @ap: The va_list holding the data to print from @fmt.
3800  *
3801  * This writes the data into the @iter->seq buffer using the data from
3802  * @fmt and @ap. If the format has a %s, then the source of the string
3803  * is examined to make sure it is safe to print, otherwise it will
3804  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3805  * pointer.
3806  */
3807 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3808                          va_list ap)
3809 {
3810         const char *p = fmt;
3811         const char *str;
3812         int i, j;
3813
3814         if (WARN_ON_ONCE(!fmt))
3815                 return;
3816
3817         if (static_branch_unlikely(&trace_no_verify))
3818                 goto print;
3819
3820         /* Don't bother checking when doing a ftrace_dump() */
3821         if (iter->fmt == static_fmt_buf)
3822                 goto print;
3823
3824         while (*p) {
3825                 bool star = false;
3826                 int len = 0;
3827
3828                 j = 0;
3829
3830                 /* We only care about %s and variants */
3831                 for (i = 0; p[i]; i++) {
3832                         if (i + 1 >= iter->fmt_size) {
3833                                 /*
3834                                  * If we can't expand the copy buffer,
3835                                  * just print it.
3836                                  */
3837                                 if (!trace_iter_expand_format(iter))
3838                                         goto print;
3839                         }
3840
3841                         if (p[i] == '\\' && p[i+1]) {
3842                                 i++;
3843                                 continue;
3844                         }
3845                         if (p[i] == '%') {
3846                                 /* Need to test cases like %08.*s */
3847                                 for (j = 1; p[i+j]; j++) {
3848                                         if (isdigit(p[i+j]) ||
3849                                             p[i+j] == '.')
3850                                                 continue;
3851                                         if (p[i+j] == '*') {
3852                                                 star = true;
3853                                                 continue;
3854                                         }
3855                                         break;
3856                                 }
3857                                 if (p[i+j] == 's')
3858                                         break;
3859                                 star = false;
3860                         }
3861                         j = 0;
3862                 }
3863                 /* If no %s found then just print normally */
3864                 if (!p[i])
3865                         break;
3866
3867                 /* Copy up to the %s, and print that */
3868                 strncpy(iter->fmt, p, i);
3869                 iter->fmt[i] = '\0';
3870                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3871
3872                 /*
3873                  * If iter->seq is full, the above call no longer guarantees
3874                  * that ap is in sync with fmt processing, and further calls
3875                  * to va_arg() can return wrong positional arguments.
3876                  *
3877                  * Ensure that ap is no longer used in this case.
3878                  */
3879                 if (iter->seq.full) {
3880                         p = "";
3881                         break;
3882                 }
3883
3884                 if (star)
3885                         len = va_arg(ap, int);
3886
3887                 /* The ap now points to the string data of the %s */
3888                 str = va_arg(ap, const char *);
3889
3890                 /*
3891                  * If you hit this warning, it is likely that the
3892                  * trace event in question used %s on a string that
3893                  * was saved at the time of the event, but may not be
3894                  * around when the trace is read. Use __string(),
3895                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3896                  * instead. See samples/trace_events/trace-events-sample.h
3897                  * for reference.
3898                  */
3899                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3900                               "fmt: '%s' current_buffer: '%s'",
3901                               fmt, show_buffer(&iter->seq))) {
3902                         int ret;
3903
3904                         /* Try to safely read the string */
3905                         if (star) {
3906                                 if (len + 1 > iter->fmt_size)
3907                                         len = iter->fmt_size - 1;
3908                                 if (len < 0)
3909                                         len = 0;
3910                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3911                                 iter->fmt[len] = 0;
3912                                 star = false;
3913                         } else {
3914                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3915                                                                   iter->fmt_size);
3916                         }
3917                         if (ret < 0)
3918                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3919                         else
3920                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3921                                                  str, iter->fmt);
3922                         str = "[UNSAFE-MEMORY]";
3923                         strcpy(iter->fmt, "%s");
3924                 } else {
3925                         strncpy(iter->fmt, p + i, j + 1);
3926                         iter->fmt[j+1] = '\0';
3927                 }
3928                 if (star)
3929                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3930                 else
3931                         trace_seq_printf(&iter->seq, iter->fmt, str);
3932
3933                 p += i + j + 1;
3934         }
3935  print:
3936         if (*p)
3937                 trace_seq_vprintf(&iter->seq, p, ap);
3938 }
3939
3940 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3941 {
3942         const char *p, *new_fmt;
3943         char *q;
3944
3945         if (WARN_ON_ONCE(!fmt))
3946                 return fmt;
3947
3948         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3949                 return fmt;
3950
3951         p = fmt;
3952         new_fmt = q = iter->fmt;
3953         while (*p) {
3954                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3955                         if (!trace_iter_expand_format(iter))
3956                                 return fmt;
3957
3958                         q += iter->fmt - new_fmt;
3959                         new_fmt = iter->fmt;
3960                 }
3961
3962                 *q++ = *p++;
3963
3964                 /* Replace %p with %px */
3965                 if (p[-1] == '%') {
3966                         if (p[0] == '%') {
3967                                 *q++ = *p++;
3968                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3969                                 *q++ = *p++;
3970                                 *q++ = 'x';
3971                         }
3972                 }
3973         }
3974         *q = '\0';
3975
3976         return new_fmt;
3977 }
3978
3979 #define STATIC_TEMP_BUF_SIZE    128
3980 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3981
3982 /* Find the next real entry, without updating the iterator itself */
3983 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3984                                           int *ent_cpu, u64 *ent_ts)
3985 {
3986         /* __find_next_entry will reset ent_size */
3987         int ent_size = iter->ent_size;
3988         struct trace_entry *entry;
3989
3990         /*
3991          * If called from ftrace_dump(), then the iter->temp buffer
3992          * will be the static_temp_buf and not created from kmalloc.
3993          * If the entry size is greater than the buffer, we can
3994          * not save it. Just return NULL in that case. This is only
3995          * used to add markers when two consecutive events' time
3996          * stamps have a large delta. See trace_print_lat_context()
3997          */
3998         if (iter->temp == static_temp_buf &&
3999             STATIC_TEMP_BUF_SIZE < ent_size)
4000                 return NULL;
4001
4002         /*
4003          * The __find_next_entry() may call peek_next_entry(), which may
4004          * call ring_buffer_peek() that may make the contents of iter->ent
4005          * undefined. Need to copy iter->ent now.
4006          */
4007         if (iter->ent && iter->ent != iter->temp) {
4008                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4009                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4010                         void *temp;
4011                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4012                         if (!temp)
4013                                 return NULL;
4014                         kfree(iter->temp);
4015                         iter->temp = temp;
4016                         iter->temp_size = iter->ent_size;
4017                 }
4018                 memcpy(iter->temp, iter->ent, iter->ent_size);
4019                 iter->ent = iter->temp;
4020         }
4021         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4022         /* Put back the original ent_size */
4023         iter->ent_size = ent_size;
4024
4025         return entry;
4026 }
4027
4028 /* Find the next real entry, and increment the iterator to the next entry */
4029 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4030 {
4031         iter->ent = __find_next_entry(iter, &iter->cpu,
4032                                       &iter->lost_events, &iter->ts);
4033
4034         if (iter->ent)
4035                 trace_iterator_increment(iter);
4036
4037         return iter->ent ? iter : NULL;
4038 }
4039
4040 static void trace_consume(struct trace_iterator *iter)
4041 {
4042         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4043                             &iter->lost_events);
4044 }
4045
4046 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4047 {
4048         struct trace_iterator *iter = m->private;
4049         int i = (int)*pos;
4050         void *ent;
4051
4052         WARN_ON_ONCE(iter->leftover);
4053
4054         (*pos)++;
4055
4056         /* can't go backwards */
4057         if (iter->idx > i)
4058                 return NULL;
4059
4060         if (iter->idx < 0)
4061                 ent = trace_find_next_entry_inc(iter);
4062         else
4063                 ent = iter;
4064
4065         while (ent && iter->idx < i)
4066                 ent = trace_find_next_entry_inc(iter);
4067
4068         iter->pos = *pos;
4069
4070         return ent;
4071 }
4072
4073 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4074 {
4075         struct ring_buffer_iter *buf_iter;
4076         unsigned long entries = 0;
4077         u64 ts;
4078
4079         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4080
4081         buf_iter = trace_buffer_iter(iter, cpu);
4082         if (!buf_iter)
4083                 return;
4084
4085         ring_buffer_iter_reset(buf_iter);
4086
4087         /*
4088          * We could have the case with the max latency tracers
4089          * that a reset never took place on a cpu. This is evident
4090          * by the timestamp being before the start of the buffer.
4091          */
4092         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4093                 if (ts >= iter->array_buffer->time_start)
4094                         break;
4095                 entries++;
4096                 ring_buffer_iter_advance(buf_iter);
4097         }
4098
4099         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4100 }
4101
4102 /*
4103  * The current tracer is copied to avoid a global locking
4104  * all around.
4105  */
4106 static void *s_start(struct seq_file *m, loff_t *pos)
4107 {
4108         struct trace_iterator *iter = m->private;
4109         struct trace_array *tr = iter->tr;
4110         int cpu_file = iter->cpu_file;
4111         void *p = NULL;
4112         loff_t l = 0;
4113         int cpu;
4114
4115         /*
4116          * copy the tracer to avoid using a global lock all around.
4117          * iter->trace is a copy of current_trace, the pointer to the
4118          * name may be used instead of a strcmp(), as iter->trace->name
4119          * will point to the same string as current_trace->name.
4120          */
4121         mutex_lock(&trace_types_lock);
4122         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4123                 *iter->trace = *tr->current_trace;
4124         mutex_unlock(&trace_types_lock);
4125
4126 #ifdef CONFIG_TRACER_MAX_TRACE
4127         if (iter->snapshot && iter->trace->use_max_tr)
4128                 return ERR_PTR(-EBUSY);
4129 #endif
4130
4131         if (*pos != iter->pos) {
4132                 iter->ent = NULL;
4133                 iter->cpu = 0;
4134                 iter->idx = -1;
4135
4136                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4137                         for_each_tracing_cpu(cpu)
4138                                 tracing_iter_reset(iter, cpu);
4139                 } else
4140                         tracing_iter_reset(iter, cpu_file);
4141
4142                 iter->leftover = 0;
4143                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4144                         ;
4145
4146         } else {
4147                 /*
4148                  * If we overflowed the seq_file before, then we want
4149                  * to just reuse the trace_seq buffer again.
4150                  */
4151                 if (iter->leftover)
4152                         p = iter;
4153                 else {
4154                         l = *pos - 1;
4155                         p = s_next(m, p, &l);
4156                 }
4157         }
4158
4159         trace_event_read_lock();
4160         trace_access_lock(cpu_file);
4161         return p;
4162 }
4163
4164 static void s_stop(struct seq_file *m, void *p)
4165 {
4166         struct trace_iterator *iter = m->private;
4167
4168 #ifdef CONFIG_TRACER_MAX_TRACE
4169         if (iter->snapshot && iter->trace->use_max_tr)
4170                 return;
4171 #endif
4172
4173         trace_access_unlock(iter->cpu_file);
4174         trace_event_read_unlock();
4175 }
4176
4177 static void
4178 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4179                       unsigned long *entries, int cpu)
4180 {
4181         unsigned long count;
4182
4183         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4184         /*
4185          * If this buffer has skipped entries, then we hold all
4186          * entries for the trace and we need to ignore the
4187          * ones before the time stamp.
4188          */
4189         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4190                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4191                 /* total is the same as the entries */
4192                 *total = count;
4193         } else
4194                 *total = count +
4195                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4196         *entries = count;
4197 }
4198
4199 static void
4200 get_total_entries(struct array_buffer *buf,
4201                   unsigned long *total, unsigned long *entries)
4202 {
4203         unsigned long t, e;
4204         int cpu;
4205
4206         *total = 0;
4207         *entries = 0;
4208
4209         for_each_tracing_cpu(cpu) {
4210                 get_total_entries_cpu(buf, &t, &e, cpu);
4211                 *total += t;
4212                 *entries += e;
4213         }
4214 }
4215
4216 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4217 {
4218         unsigned long total, entries;
4219
4220         if (!tr)
4221                 tr = &global_trace;
4222
4223         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4224
4225         return entries;
4226 }
4227
4228 unsigned long trace_total_entries(struct trace_array *tr)
4229 {
4230         unsigned long total, entries;
4231
4232         if (!tr)
4233                 tr = &global_trace;
4234
4235         get_total_entries(&tr->array_buffer, &total, &entries);
4236
4237         return entries;
4238 }
4239
4240 static void print_lat_help_header(struct seq_file *m)
4241 {
4242         seq_puts(m, "#                    _------=> CPU#            \n"
4243                     "#                   / _-----=> irqs-off/BH-disabled\n"
4244                     "#                  | / _----=> need-resched    \n"
4245                     "#                  || / _---=> hardirq/softirq \n"
4246                     "#                  ||| / _--=> preempt-depth   \n"
4247                     "#                  |||| / _-=> migrate-disable \n"
4248                     "#                  ||||| /     delay           \n"
4249                     "#  cmd     pid     |||||| time  |   caller     \n"
4250                     "#     \\   /        ||||||  \\    |    /       \n");
4251 }
4252
4253 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4254 {
4255         unsigned long total;
4256         unsigned long entries;
4257
4258         get_total_entries(buf, &total, &entries);
4259         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4260                    entries, total, num_online_cpus());
4261         seq_puts(m, "#\n");
4262 }
4263
4264 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4265                                    unsigned int flags)
4266 {
4267         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4268
4269         print_event_info(buf, m);
4270
4271         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4272         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4273 }
4274
4275 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4276                                        unsigned int flags)
4277 {
4278         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4279         static const char space[] = "            ";
4280         int prec = tgid ? 12 : 2;
4281
4282         print_event_info(buf, m);
4283
4284         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4285         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4286         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4287         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4288         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4289         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4290         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4291         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4292 }
4293
4294 void
4295 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4296 {
4297         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4298         struct array_buffer *buf = iter->array_buffer;
4299         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4300         struct tracer *type = iter->trace;
4301         unsigned long entries;
4302         unsigned long total;
4303         const char *name = type->name;
4304
4305         get_total_entries(buf, &total, &entries);
4306
4307         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4308                    name, UTS_RELEASE);
4309         seq_puts(m, "# -----------------------------------"
4310                  "---------------------------------\n");
4311         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4312                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4313                    nsecs_to_usecs(data->saved_latency),
4314                    entries,
4315                    total,
4316                    buf->cpu,
4317                    preempt_model_none()      ? "server" :
4318                    preempt_model_voluntary() ? "desktop" :
4319                    preempt_model_full()      ? "preempt" :
4320                    preempt_model_rt()        ? "preempt_rt" :
4321                    "unknown",
4322                    /* These are reserved for later use */
4323                    0, 0, 0, 0);
4324 #ifdef CONFIG_SMP
4325         seq_printf(m, " #P:%d)\n", num_online_cpus());
4326 #else
4327         seq_puts(m, ")\n");
4328 #endif
4329         seq_puts(m, "#    -----------------\n");
4330         seq_printf(m, "#    | task: %.16s-%d "
4331                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4332                    data->comm, data->pid,
4333                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4334                    data->policy, data->rt_priority);
4335         seq_puts(m, "#    -----------------\n");
4336
4337         if (data->critical_start) {
4338                 seq_puts(m, "#  => started at: ");
4339                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4340                 trace_print_seq(m, &iter->seq);
4341                 seq_puts(m, "\n#  => ended at:   ");
4342                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4343                 trace_print_seq(m, &iter->seq);
4344                 seq_puts(m, "\n#\n");
4345         }
4346
4347         seq_puts(m, "#\n");
4348 }
4349
4350 static void test_cpu_buff_start(struct trace_iterator *iter)
4351 {
4352         struct trace_seq *s = &iter->seq;
4353         struct trace_array *tr = iter->tr;
4354
4355         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4356                 return;
4357
4358         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4359                 return;
4360
4361         if (cpumask_available(iter->started) &&
4362             cpumask_test_cpu(iter->cpu, iter->started))
4363                 return;
4364
4365         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4366                 return;
4367
4368         if (cpumask_available(iter->started))
4369                 cpumask_set_cpu(iter->cpu, iter->started);
4370
4371         /* Don't print started cpu buffer for the first entry of the trace */
4372         if (iter->idx > 1)
4373                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4374                                 iter->cpu);
4375 }
4376
4377 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4378 {
4379         struct trace_array *tr = iter->tr;
4380         struct trace_seq *s = &iter->seq;
4381         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4382         struct trace_entry *entry;
4383         struct trace_event *event;
4384
4385         entry = iter->ent;
4386
4387         test_cpu_buff_start(iter);
4388
4389         event = ftrace_find_event(entry->type);
4390
4391         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4392                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4393                         trace_print_lat_context(iter);
4394                 else
4395                         trace_print_context(iter);
4396         }
4397
4398         if (trace_seq_has_overflowed(s))
4399                 return TRACE_TYPE_PARTIAL_LINE;
4400
4401         if (event)
4402                 return event->funcs->trace(iter, sym_flags, event);
4403
4404         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4405
4406         return trace_handle_return(s);
4407 }
4408
4409 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4410 {
4411         struct trace_array *tr = iter->tr;
4412         struct trace_seq *s = &iter->seq;
4413         struct trace_entry *entry;
4414         struct trace_event *event;
4415
4416         entry = iter->ent;
4417
4418         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4419                 trace_seq_printf(s, "%d %d %llu ",
4420                                  entry->pid, iter->cpu, iter->ts);
4421
4422         if (trace_seq_has_overflowed(s))
4423                 return TRACE_TYPE_PARTIAL_LINE;
4424
4425         event = ftrace_find_event(entry->type);
4426         if (event)
4427                 return event->funcs->raw(iter, 0, event);
4428
4429         trace_seq_printf(s, "%d ?\n", entry->type);
4430
4431         return trace_handle_return(s);
4432 }
4433
4434 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4435 {
4436         struct trace_array *tr = iter->tr;
4437         struct trace_seq *s = &iter->seq;
4438         unsigned char newline = '\n';
4439         struct trace_entry *entry;
4440         struct trace_event *event;
4441
4442         entry = iter->ent;
4443
4444         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4445                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4446                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4447                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4448                 if (trace_seq_has_overflowed(s))
4449                         return TRACE_TYPE_PARTIAL_LINE;
4450         }
4451
4452         event = ftrace_find_event(entry->type);
4453         if (event) {
4454                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4455                 if (ret != TRACE_TYPE_HANDLED)
4456                         return ret;
4457         }
4458
4459         SEQ_PUT_FIELD(s, newline);
4460
4461         return trace_handle_return(s);
4462 }
4463
4464 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4465 {
4466         struct trace_array *tr = iter->tr;
4467         struct trace_seq *s = &iter->seq;
4468         struct trace_entry *entry;
4469         struct trace_event *event;
4470
4471         entry = iter->ent;
4472
4473         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4474                 SEQ_PUT_FIELD(s, entry->pid);
4475                 SEQ_PUT_FIELD(s, iter->cpu);
4476                 SEQ_PUT_FIELD(s, iter->ts);
4477                 if (trace_seq_has_overflowed(s))
4478                         return TRACE_TYPE_PARTIAL_LINE;
4479         }
4480
4481         event = ftrace_find_event(entry->type);
4482         return event ? event->funcs->binary(iter, 0, event) :
4483                 TRACE_TYPE_HANDLED;
4484 }
4485
4486 int trace_empty(struct trace_iterator *iter)
4487 {
4488         struct ring_buffer_iter *buf_iter;
4489         int cpu;
4490
4491         /* If we are looking at one CPU buffer, only check that one */
4492         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4493                 cpu = iter->cpu_file;
4494                 buf_iter = trace_buffer_iter(iter, cpu);
4495                 if (buf_iter) {
4496                         if (!ring_buffer_iter_empty(buf_iter))
4497                                 return 0;
4498                 } else {
4499                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4500                                 return 0;
4501                 }
4502                 return 1;
4503         }
4504
4505         for_each_tracing_cpu(cpu) {
4506                 buf_iter = trace_buffer_iter(iter, cpu);
4507                 if (buf_iter) {
4508                         if (!ring_buffer_iter_empty(buf_iter))
4509                                 return 0;
4510                 } else {
4511                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4512                                 return 0;
4513                 }
4514         }
4515
4516         return 1;
4517 }
4518
4519 /*  Called with trace_event_read_lock() held. */
4520 enum print_line_t print_trace_line(struct trace_iterator *iter)
4521 {
4522         struct trace_array *tr = iter->tr;
4523         unsigned long trace_flags = tr->trace_flags;
4524         enum print_line_t ret;
4525
4526         if (iter->lost_events) {
4527                 if (iter->lost_events == (unsigned long)-1)
4528                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4529                                          iter->cpu);
4530                 else
4531                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4532                                          iter->cpu, iter->lost_events);
4533                 if (trace_seq_has_overflowed(&iter->seq))
4534                         return TRACE_TYPE_PARTIAL_LINE;
4535         }
4536
4537         if (iter->trace && iter->trace->print_line) {
4538                 ret = iter->trace->print_line(iter);
4539                 if (ret != TRACE_TYPE_UNHANDLED)
4540                         return ret;
4541         }
4542
4543         if (iter->ent->type == TRACE_BPUTS &&
4544                         trace_flags & TRACE_ITER_PRINTK &&
4545                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4546                 return trace_print_bputs_msg_only(iter);
4547
4548         if (iter->ent->type == TRACE_BPRINT &&
4549                         trace_flags & TRACE_ITER_PRINTK &&
4550                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4551                 return trace_print_bprintk_msg_only(iter);
4552
4553         if (iter->ent->type == TRACE_PRINT &&
4554                         trace_flags & TRACE_ITER_PRINTK &&
4555                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4556                 return trace_print_printk_msg_only(iter);
4557
4558         if (trace_flags & TRACE_ITER_BIN)
4559                 return print_bin_fmt(iter);
4560
4561         if (trace_flags & TRACE_ITER_HEX)
4562                 return print_hex_fmt(iter);
4563
4564         if (trace_flags & TRACE_ITER_RAW)
4565                 return print_raw_fmt(iter);
4566
4567         return print_trace_fmt(iter);
4568 }
4569
4570 void trace_latency_header(struct seq_file *m)
4571 {
4572         struct trace_iterator *iter = m->private;
4573         struct trace_array *tr = iter->tr;
4574
4575         /* print nothing if the buffers are empty */
4576         if (trace_empty(iter))
4577                 return;
4578
4579         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4580                 print_trace_header(m, iter);
4581
4582         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4583                 print_lat_help_header(m);
4584 }
4585
4586 void trace_default_header(struct seq_file *m)
4587 {
4588         struct trace_iterator *iter = m->private;
4589         struct trace_array *tr = iter->tr;
4590         unsigned long trace_flags = tr->trace_flags;
4591
4592         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4593                 return;
4594
4595         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4596                 /* print nothing if the buffers are empty */
4597                 if (trace_empty(iter))
4598                         return;
4599                 print_trace_header(m, iter);
4600                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4601                         print_lat_help_header(m);
4602         } else {
4603                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4604                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4605                                 print_func_help_header_irq(iter->array_buffer,
4606                                                            m, trace_flags);
4607                         else
4608                                 print_func_help_header(iter->array_buffer, m,
4609                                                        trace_flags);
4610                 }
4611         }
4612 }
4613
4614 static void test_ftrace_alive(struct seq_file *m)
4615 {
4616         if (!ftrace_is_dead())
4617                 return;
4618         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4619                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4620 }
4621
4622 #ifdef CONFIG_TRACER_MAX_TRACE
4623 static void show_snapshot_main_help(struct seq_file *m)
4624 {
4625         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4626                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4627                     "#                      Takes a snapshot of the main buffer.\n"
4628                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4629                     "#                      (Doesn't have to be '2' works with any number that\n"
4630                     "#                       is not a '0' or '1')\n");
4631 }
4632
4633 static void show_snapshot_percpu_help(struct seq_file *m)
4634 {
4635         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4636 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4637         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4638                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4639 #else
4640         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4641                     "#                     Must use main snapshot file to allocate.\n");
4642 #endif
4643         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4644                     "#                      (Doesn't have to be '2' works with any number that\n"
4645                     "#                       is not a '0' or '1')\n");
4646 }
4647
4648 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4649 {
4650         if (iter->tr->allocated_snapshot)
4651                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4652         else
4653                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4654
4655         seq_puts(m, "# Snapshot commands:\n");
4656         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4657                 show_snapshot_main_help(m);
4658         else
4659                 show_snapshot_percpu_help(m);
4660 }
4661 #else
4662 /* Should never be called */
4663 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4664 #endif
4665
4666 static int s_show(struct seq_file *m, void *v)
4667 {
4668         struct trace_iterator *iter = v;
4669         int ret;
4670
4671         if (iter->ent == NULL) {
4672                 if (iter->tr) {
4673                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4674                         seq_puts(m, "#\n");
4675                         test_ftrace_alive(m);
4676                 }
4677                 if (iter->snapshot && trace_empty(iter))
4678                         print_snapshot_help(m, iter);
4679                 else if (iter->trace && iter->trace->print_header)
4680                         iter->trace->print_header(m);
4681                 else
4682                         trace_default_header(m);
4683
4684         } else if (iter->leftover) {
4685                 /*
4686                  * If we filled the seq_file buffer earlier, we
4687                  * want to just show it now.
4688                  */
4689                 ret = trace_print_seq(m, &iter->seq);
4690
4691                 /* ret should this time be zero, but you never know */
4692                 iter->leftover = ret;
4693
4694         } else {
4695                 print_trace_line(iter);
4696                 ret = trace_print_seq(m, &iter->seq);
4697                 /*
4698                  * If we overflow the seq_file buffer, then it will
4699                  * ask us for this data again at start up.
4700                  * Use that instead.
4701                  *  ret is 0 if seq_file write succeeded.
4702                  *        -1 otherwise.
4703                  */
4704                 iter->leftover = ret;
4705         }
4706
4707         return 0;
4708 }
4709
4710 /*
4711  * Should be used after trace_array_get(), trace_types_lock
4712  * ensures that i_cdev was already initialized.
4713  */
4714 static inline int tracing_get_cpu(struct inode *inode)
4715 {
4716         if (inode->i_cdev) /* See trace_create_cpu_file() */
4717                 return (long)inode->i_cdev - 1;
4718         return RING_BUFFER_ALL_CPUS;
4719 }
4720
4721 static const struct seq_operations tracer_seq_ops = {
4722         .start          = s_start,
4723         .next           = s_next,
4724         .stop           = s_stop,
4725         .show           = s_show,
4726 };
4727
4728 static struct trace_iterator *
4729 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4730 {
4731         struct trace_array *tr = inode->i_private;
4732         struct trace_iterator *iter;
4733         int cpu;
4734
4735         if (tracing_disabled)
4736                 return ERR_PTR(-ENODEV);
4737
4738         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4739         if (!iter)
4740                 return ERR_PTR(-ENOMEM);
4741
4742         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4743                                     GFP_KERNEL);
4744         if (!iter->buffer_iter)
4745                 goto release;
4746
4747         /*
4748          * trace_find_next_entry() may need to save off iter->ent.
4749          * It will place it into the iter->temp buffer. As most
4750          * events are less than 128, allocate a buffer of that size.
4751          * If one is greater, then trace_find_next_entry() will
4752          * allocate a new buffer to adjust for the bigger iter->ent.
4753          * It's not critical if it fails to get allocated here.
4754          */
4755         iter->temp = kmalloc(128, GFP_KERNEL);
4756         if (iter->temp)
4757                 iter->temp_size = 128;
4758
4759         /*
4760          * trace_event_printf() may need to modify given format
4761          * string to replace %p with %px so that it shows real address
4762          * instead of hash value. However, that is only for the event
4763          * tracing, other tracer may not need. Defer the allocation
4764          * until it is needed.
4765          */
4766         iter->fmt = NULL;
4767         iter->fmt_size = 0;
4768
4769         /*
4770          * We make a copy of the current tracer to avoid concurrent
4771          * changes on it while we are reading.
4772          */
4773         mutex_lock(&trace_types_lock);
4774         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4775         if (!iter->trace)
4776                 goto fail;
4777
4778         *iter->trace = *tr->current_trace;
4779
4780         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4781                 goto fail;
4782
4783         iter->tr = tr;
4784
4785 #ifdef CONFIG_TRACER_MAX_TRACE
4786         /* Currently only the top directory has a snapshot */
4787         if (tr->current_trace->print_max || snapshot)
4788                 iter->array_buffer = &tr->max_buffer;
4789         else
4790 #endif
4791                 iter->array_buffer = &tr->array_buffer;
4792         iter->snapshot = snapshot;
4793         iter->pos = -1;
4794         iter->cpu_file = tracing_get_cpu(inode);
4795         mutex_init(&iter->mutex);
4796
4797         /* Notify the tracer early; before we stop tracing. */
4798         if (iter->trace->open)
4799                 iter->trace->open(iter);
4800
4801         /* Annotate start of buffers if we had overruns */
4802         if (ring_buffer_overruns(iter->array_buffer->buffer))
4803                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4804
4805         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4806         if (trace_clocks[tr->clock_id].in_ns)
4807                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4808
4809         /*
4810          * If pause-on-trace is enabled, then stop the trace while
4811          * dumping, unless this is the "snapshot" file
4812          */
4813         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4814                 tracing_stop_tr(tr);
4815
4816         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4817                 for_each_tracing_cpu(cpu) {
4818                         iter->buffer_iter[cpu] =
4819                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4820                                                          cpu, GFP_KERNEL);
4821                 }
4822                 ring_buffer_read_prepare_sync();
4823                 for_each_tracing_cpu(cpu) {
4824                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4825                         tracing_iter_reset(iter, cpu);
4826                 }
4827         } else {
4828                 cpu = iter->cpu_file;
4829                 iter->buffer_iter[cpu] =
4830                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4831                                                  cpu, GFP_KERNEL);
4832                 ring_buffer_read_prepare_sync();
4833                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4834                 tracing_iter_reset(iter, cpu);
4835         }
4836
4837         mutex_unlock(&trace_types_lock);
4838
4839         return iter;
4840
4841  fail:
4842         mutex_unlock(&trace_types_lock);
4843         kfree(iter->trace);
4844         kfree(iter->temp);
4845         kfree(iter->buffer_iter);
4846 release:
4847         seq_release_private(inode, file);
4848         return ERR_PTR(-ENOMEM);
4849 }
4850
4851 int tracing_open_generic(struct inode *inode, struct file *filp)
4852 {
4853         int ret;
4854
4855         ret = tracing_check_open_get_tr(NULL);
4856         if (ret)
4857                 return ret;
4858
4859         filp->private_data = inode->i_private;
4860         return 0;
4861 }
4862
4863 bool tracing_is_disabled(void)
4864 {
4865         return (tracing_disabled) ? true: false;
4866 }
4867
4868 /*
4869  * Open and update trace_array ref count.
4870  * Must have the current trace_array passed to it.
4871  */
4872 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4873 {
4874         struct trace_array *tr = inode->i_private;
4875         int ret;
4876
4877         ret = tracing_check_open_get_tr(tr);
4878         if (ret)
4879                 return ret;
4880
4881         filp->private_data = inode->i_private;
4882
4883         return 0;
4884 }
4885
4886 static int tracing_mark_open(struct inode *inode, struct file *filp)
4887 {
4888         stream_open(inode, filp);
4889         return tracing_open_generic_tr(inode, filp);
4890 }
4891
4892 static int tracing_release(struct inode *inode, struct file *file)
4893 {
4894         struct trace_array *tr = inode->i_private;
4895         struct seq_file *m = file->private_data;
4896         struct trace_iterator *iter;
4897         int cpu;
4898
4899         if (!(file->f_mode & FMODE_READ)) {
4900                 trace_array_put(tr);
4901                 return 0;
4902         }
4903
4904         /* Writes do not use seq_file */
4905         iter = m->private;
4906         mutex_lock(&trace_types_lock);
4907
4908         for_each_tracing_cpu(cpu) {
4909                 if (iter->buffer_iter[cpu])
4910                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4911         }
4912
4913         if (iter->trace && iter->trace->close)
4914                 iter->trace->close(iter);
4915
4916         if (!iter->snapshot && tr->stop_count)
4917                 /* reenable tracing if it was previously enabled */
4918                 tracing_start_tr(tr);
4919
4920         __trace_array_put(tr);
4921
4922         mutex_unlock(&trace_types_lock);
4923
4924         mutex_destroy(&iter->mutex);
4925         free_cpumask_var(iter->started);
4926         kfree(iter->fmt);
4927         kfree(iter->temp);
4928         kfree(iter->trace);
4929         kfree(iter->buffer_iter);
4930         seq_release_private(inode, file);
4931
4932         return 0;
4933 }
4934
4935 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4936 {
4937         struct trace_array *tr = inode->i_private;
4938
4939         trace_array_put(tr);
4940         return 0;
4941 }
4942
4943 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4944 {
4945         struct trace_array *tr = inode->i_private;
4946
4947         trace_array_put(tr);
4948
4949         return single_release(inode, file);
4950 }
4951
4952 static int tracing_open(struct inode *inode, struct file *file)
4953 {
4954         struct trace_array *tr = inode->i_private;
4955         struct trace_iterator *iter;
4956         int ret;
4957
4958         ret = tracing_check_open_get_tr(tr);
4959         if (ret)
4960                 return ret;
4961
4962         /* If this file was open for write, then erase contents */
4963         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4964                 int cpu = tracing_get_cpu(inode);
4965                 struct array_buffer *trace_buf = &tr->array_buffer;
4966
4967 #ifdef CONFIG_TRACER_MAX_TRACE
4968                 if (tr->current_trace->print_max)
4969                         trace_buf = &tr->max_buffer;
4970 #endif
4971
4972                 if (cpu == RING_BUFFER_ALL_CPUS)
4973                         tracing_reset_online_cpus(trace_buf);
4974                 else
4975                         tracing_reset_cpu(trace_buf, cpu);
4976         }
4977
4978         if (file->f_mode & FMODE_READ) {
4979                 iter = __tracing_open(inode, file, false);
4980                 if (IS_ERR(iter))
4981                         ret = PTR_ERR(iter);
4982                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4983                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4984         }
4985
4986         if (ret < 0)
4987                 trace_array_put(tr);
4988
4989         return ret;
4990 }
4991
4992 /*
4993  * Some tracers are not suitable for instance buffers.
4994  * A tracer is always available for the global array (toplevel)
4995  * or if it explicitly states that it is.
4996  */
4997 static bool
4998 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4999 {
5000         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5001 }
5002
5003 /* Find the next tracer that this trace array may use */
5004 static struct tracer *
5005 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5006 {
5007         while (t && !trace_ok_for_array(t, tr))
5008                 t = t->next;
5009
5010         return t;
5011 }
5012
5013 static void *
5014 t_next(struct seq_file *m, void *v, loff_t *pos)
5015 {
5016         struct trace_array *tr = m->private;
5017         struct tracer *t = v;
5018
5019         (*pos)++;
5020
5021         if (t)
5022                 t = get_tracer_for_array(tr, t->next);
5023
5024         return t;
5025 }
5026
5027 static void *t_start(struct seq_file *m, loff_t *pos)
5028 {
5029         struct trace_array *tr = m->private;
5030         struct tracer *t;
5031         loff_t l = 0;
5032
5033         mutex_lock(&trace_types_lock);
5034
5035         t = get_tracer_for_array(tr, trace_types);
5036         for (; t && l < *pos; t = t_next(m, t, &l))
5037                         ;
5038
5039         return t;
5040 }
5041
5042 static void t_stop(struct seq_file *m, void *p)
5043 {
5044         mutex_unlock(&trace_types_lock);
5045 }
5046
5047 static int t_show(struct seq_file *m, void *v)
5048 {
5049         struct tracer *t = v;
5050
5051         if (!t)
5052                 return 0;
5053
5054         seq_puts(m, t->name);
5055         if (t->next)
5056                 seq_putc(m, ' ');
5057         else
5058                 seq_putc(m, '\n');
5059
5060         return 0;
5061 }
5062
5063 static const struct seq_operations show_traces_seq_ops = {
5064         .start          = t_start,
5065         .next           = t_next,
5066         .stop           = t_stop,
5067         .show           = t_show,
5068 };
5069
5070 static int show_traces_open(struct inode *inode, struct file *file)
5071 {
5072         struct trace_array *tr = inode->i_private;
5073         struct seq_file *m;
5074         int ret;
5075
5076         ret = tracing_check_open_get_tr(tr);
5077         if (ret)
5078                 return ret;
5079
5080         ret = seq_open(file, &show_traces_seq_ops);
5081         if (ret) {
5082                 trace_array_put(tr);
5083                 return ret;
5084         }
5085
5086         m = file->private_data;
5087         m->private = tr;
5088
5089         return 0;
5090 }
5091
5092 static int show_traces_release(struct inode *inode, struct file *file)
5093 {
5094         struct trace_array *tr = inode->i_private;
5095
5096         trace_array_put(tr);
5097         return seq_release(inode, file);
5098 }
5099
5100 static ssize_t
5101 tracing_write_stub(struct file *filp, const char __user *ubuf,
5102                    size_t count, loff_t *ppos)
5103 {
5104         return count;
5105 }
5106
5107 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5108 {
5109         int ret;
5110
5111         if (file->f_mode & FMODE_READ)
5112                 ret = seq_lseek(file, offset, whence);
5113         else
5114                 file->f_pos = ret = 0;
5115
5116         return ret;
5117 }
5118
5119 static const struct file_operations tracing_fops = {
5120         .open           = tracing_open,
5121         .read           = seq_read,
5122         .write          = tracing_write_stub,
5123         .llseek         = tracing_lseek,
5124         .release        = tracing_release,
5125 };
5126
5127 static const struct file_operations show_traces_fops = {
5128         .open           = show_traces_open,
5129         .read           = seq_read,
5130         .llseek         = seq_lseek,
5131         .release        = show_traces_release,
5132 };
5133
5134 static ssize_t
5135 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5136                      size_t count, loff_t *ppos)
5137 {
5138         struct trace_array *tr = file_inode(filp)->i_private;
5139         char *mask_str;
5140         int len;
5141
5142         len = snprintf(NULL, 0, "%*pb\n",
5143                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5144         mask_str = kmalloc(len, GFP_KERNEL);
5145         if (!mask_str)
5146                 return -ENOMEM;
5147
5148         len = snprintf(mask_str, len, "%*pb\n",
5149                        cpumask_pr_args(tr->tracing_cpumask));
5150         if (len >= count) {
5151                 count = -EINVAL;
5152                 goto out_err;
5153         }
5154         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5155
5156 out_err:
5157         kfree(mask_str);
5158
5159         return count;
5160 }
5161
5162 int tracing_set_cpumask(struct trace_array *tr,
5163                         cpumask_var_t tracing_cpumask_new)
5164 {
5165         int cpu;
5166
5167         if (!tr)
5168                 return -EINVAL;
5169
5170         local_irq_disable();
5171         arch_spin_lock(&tr->max_lock);
5172         for_each_tracing_cpu(cpu) {
5173                 /*
5174                  * Increase/decrease the disabled counter if we are
5175                  * about to flip a bit in the cpumask:
5176                  */
5177                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5178                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5179                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5180                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5181                 }
5182                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5183                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5184                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5185                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5186                 }
5187         }
5188         arch_spin_unlock(&tr->max_lock);
5189         local_irq_enable();
5190
5191         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5192
5193         return 0;
5194 }
5195
5196 static ssize_t
5197 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5198                       size_t count, loff_t *ppos)
5199 {
5200         struct trace_array *tr = file_inode(filp)->i_private;
5201         cpumask_var_t tracing_cpumask_new;
5202         int err;
5203
5204         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5205                 return -ENOMEM;
5206
5207         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5208         if (err)
5209                 goto err_free;
5210
5211         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5212         if (err)
5213                 goto err_free;
5214
5215         free_cpumask_var(tracing_cpumask_new);
5216
5217         return count;
5218
5219 err_free:
5220         free_cpumask_var(tracing_cpumask_new);
5221
5222         return err;
5223 }
5224
5225 static const struct file_operations tracing_cpumask_fops = {
5226         .open           = tracing_open_generic_tr,
5227         .read           = tracing_cpumask_read,
5228         .write          = tracing_cpumask_write,
5229         .release        = tracing_release_generic_tr,
5230         .llseek         = generic_file_llseek,
5231 };
5232
5233 static int tracing_trace_options_show(struct seq_file *m, void *v)
5234 {
5235         struct tracer_opt *trace_opts;
5236         struct trace_array *tr = m->private;
5237         u32 tracer_flags;
5238         int i;
5239
5240         mutex_lock(&trace_types_lock);
5241         tracer_flags = tr->current_trace->flags->val;
5242         trace_opts = tr->current_trace->flags->opts;
5243
5244         for (i = 0; trace_options[i]; i++) {
5245                 if (tr->trace_flags & (1 << i))
5246                         seq_printf(m, "%s\n", trace_options[i]);
5247                 else
5248                         seq_printf(m, "no%s\n", trace_options[i]);
5249         }
5250
5251         for (i = 0; trace_opts[i].name; i++) {
5252                 if (tracer_flags & trace_opts[i].bit)
5253                         seq_printf(m, "%s\n", trace_opts[i].name);
5254                 else
5255                         seq_printf(m, "no%s\n", trace_opts[i].name);
5256         }
5257         mutex_unlock(&trace_types_lock);
5258
5259         return 0;
5260 }
5261
5262 static int __set_tracer_option(struct trace_array *tr,
5263                                struct tracer_flags *tracer_flags,
5264                                struct tracer_opt *opts, int neg)
5265 {
5266         struct tracer *trace = tracer_flags->trace;
5267         int ret;
5268
5269         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5270         if (ret)
5271                 return ret;
5272
5273         if (neg)
5274                 tracer_flags->val &= ~opts->bit;
5275         else
5276                 tracer_flags->val |= opts->bit;
5277         return 0;
5278 }
5279
5280 /* Try to assign a tracer specific option */
5281 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5282 {
5283         struct tracer *trace = tr->current_trace;
5284         struct tracer_flags *tracer_flags = trace->flags;
5285         struct tracer_opt *opts = NULL;
5286         int i;
5287
5288         for (i = 0; tracer_flags->opts[i].name; i++) {
5289                 opts = &tracer_flags->opts[i];
5290
5291                 if (strcmp(cmp, opts->name) == 0)
5292                         return __set_tracer_option(tr, trace->flags, opts, neg);
5293         }
5294
5295         return -EINVAL;
5296 }
5297
5298 /* Some tracers require overwrite to stay enabled */
5299 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5300 {
5301         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5302                 return -1;
5303
5304         return 0;
5305 }
5306
5307 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5308 {
5309         int *map;
5310
5311         if ((mask == TRACE_ITER_RECORD_TGID) ||
5312             (mask == TRACE_ITER_RECORD_CMD))
5313                 lockdep_assert_held(&event_mutex);
5314
5315         /* do nothing if flag is already set */
5316         if (!!(tr->trace_flags & mask) == !!enabled)
5317                 return 0;
5318
5319         /* Give the tracer a chance to approve the change */
5320         if (tr->current_trace->flag_changed)
5321                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5322                         return -EINVAL;
5323
5324         if (enabled)
5325                 tr->trace_flags |= mask;
5326         else
5327                 tr->trace_flags &= ~mask;
5328
5329         if (mask == TRACE_ITER_RECORD_CMD)
5330                 trace_event_enable_cmd_record(enabled);
5331
5332         if (mask == TRACE_ITER_RECORD_TGID) {
5333                 if (!tgid_map) {
5334                         tgid_map_max = pid_max;
5335                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5336                                        GFP_KERNEL);
5337
5338                         /*
5339                          * Pairs with smp_load_acquire() in
5340                          * trace_find_tgid_ptr() to ensure that if it observes
5341                          * the tgid_map we just allocated then it also observes
5342                          * the corresponding tgid_map_max value.
5343                          */
5344                         smp_store_release(&tgid_map, map);
5345                 }
5346                 if (!tgid_map) {
5347                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5348                         return -ENOMEM;
5349                 }
5350
5351                 trace_event_enable_tgid_record(enabled);
5352         }
5353
5354         if (mask == TRACE_ITER_EVENT_FORK)
5355                 trace_event_follow_fork(tr, enabled);
5356
5357         if (mask == TRACE_ITER_FUNC_FORK)
5358                 ftrace_pid_follow_fork(tr, enabled);
5359
5360         if (mask == TRACE_ITER_OVERWRITE) {
5361                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5362 #ifdef CONFIG_TRACER_MAX_TRACE
5363                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5364 #endif
5365         }
5366
5367         if (mask == TRACE_ITER_PRINTK) {
5368                 trace_printk_start_stop_comm(enabled);
5369                 trace_printk_control(enabled);
5370         }
5371
5372         return 0;
5373 }
5374
5375 int trace_set_options(struct trace_array *tr, char *option)
5376 {
5377         char *cmp;
5378         int neg = 0;
5379         int ret;
5380         size_t orig_len = strlen(option);
5381         int len;
5382
5383         cmp = strstrip(option);
5384
5385         len = str_has_prefix(cmp, "no");
5386         if (len)
5387                 neg = 1;
5388
5389         cmp += len;
5390
5391         mutex_lock(&event_mutex);
5392         mutex_lock(&trace_types_lock);
5393
5394         ret = match_string(trace_options, -1, cmp);
5395         /* If no option could be set, test the specific tracer options */
5396         if (ret < 0)
5397                 ret = set_tracer_option(tr, cmp, neg);
5398         else
5399                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5400
5401         mutex_unlock(&trace_types_lock);
5402         mutex_unlock(&event_mutex);
5403
5404         /*
5405          * If the first trailing whitespace is replaced with '\0' by strstrip,
5406          * turn it back into a space.
5407          */
5408         if (orig_len > strlen(option))
5409                 option[strlen(option)] = ' ';
5410
5411         return ret;
5412 }
5413
5414 static void __init apply_trace_boot_options(void)
5415 {
5416         char *buf = trace_boot_options_buf;
5417         char *option;
5418
5419         while (true) {
5420                 option = strsep(&buf, ",");
5421
5422                 if (!option)
5423                         break;
5424
5425                 if (*option)
5426                         trace_set_options(&global_trace, option);
5427
5428                 /* Put back the comma to allow this to be called again */
5429                 if (buf)
5430                         *(buf - 1) = ',';
5431         }
5432 }
5433
5434 static ssize_t
5435 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5436                         size_t cnt, loff_t *ppos)
5437 {
5438         struct seq_file *m = filp->private_data;
5439         struct trace_array *tr = m->private;
5440         char buf[64];
5441         int ret;
5442
5443         if (cnt >= sizeof(buf))
5444                 return -EINVAL;
5445
5446         if (copy_from_user(buf, ubuf, cnt))
5447                 return -EFAULT;
5448
5449         buf[cnt] = 0;
5450
5451         ret = trace_set_options(tr, buf);
5452         if (ret < 0)
5453                 return ret;
5454
5455         *ppos += cnt;
5456
5457         return cnt;
5458 }
5459
5460 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5461 {
5462         struct trace_array *tr = inode->i_private;
5463         int ret;
5464
5465         ret = tracing_check_open_get_tr(tr);
5466         if (ret)
5467                 return ret;
5468
5469         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5470         if (ret < 0)
5471                 trace_array_put(tr);
5472
5473         return ret;
5474 }
5475
5476 static const struct file_operations tracing_iter_fops = {
5477         .open           = tracing_trace_options_open,
5478         .read           = seq_read,
5479         .llseek         = seq_lseek,
5480         .release        = tracing_single_release_tr,
5481         .write          = tracing_trace_options_write,
5482 };
5483
5484 static const char readme_msg[] =
5485         "tracing mini-HOWTO:\n\n"
5486         "# echo 0 > tracing_on : quick way to disable tracing\n"
5487         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5488         " Important files:\n"
5489         "  trace\t\t\t- The static contents of the buffer\n"
5490         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5491         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5492         "  current_tracer\t- function and latency tracers\n"
5493         "  available_tracers\t- list of configured tracers for current_tracer\n"
5494         "  error_log\t- error log for failed commands (that support it)\n"
5495         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5496         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5497         "  trace_clock\t\t- change the clock used to order events\n"
5498         "       local:   Per cpu clock but may not be synced across CPUs\n"
5499         "      global:   Synced across CPUs but slows tracing down.\n"
5500         "     counter:   Not a clock, but just an increment\n"
5501         "      uptime:   Jiffy counter from time of boot\n"
5502         "        perf:   Same clock that perf events use\n"
5503 #ifdef CONFIG_X86_64
5504         "     x86-tsc:   TSC cycle counter\n"
5505 #endif
5506         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5507         "       delta:   Delta difference against a buffer-wide timestamp\n"
5508         "    absolute:   Absolute (standalone) timestamp\n"
5509         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5510         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5511         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5512         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5513         "\t\t\t  Remove sub-buffer with rmdir\n"
5514         "  trace_options\t\t- Set format or modify how tracing happens\n"
5515         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5516         "\t\t\t  option name\n"
5517         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5518 #ifdef CONFIG_DYNAMIC_FTRACE
5519         "\n  available_filter_functions - list of functions that can be filtered on\n"
5520         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5521         "\t\t\t  functions\n"
5522         "\t     accepts: func_full_name or glob-matching-pattern\n"
5523         "\t     modules: Can select a group via module\n"
5524         "\t      Format: :mod:<module-name>\n"
5525         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5526         "\t    triggers: a command to perform when function is hit\n"
5527         "\t      Format: <function>:<trigger>[:count]\n"
5528         "\t     trigger: traceon, traceoff\n"
5529         "\t\t      enable_event:<system>:<event>\n"
5530         "\t\t      disable_event:<system>:<event>\n"
5531 #ifdef CONFIG_STACKTRACE
5532         "\t\t      stacktrace\n"
5533 #endif
5534 #ifdef CONFIG_TRACER_SNAPSHOT
5535         "\t\t      snapshot\n"
5536 #endif
5537         "\t\t      dump\n"
5538         "\t\t      cpudump\n"
5539         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5540         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5541         "\t     The first one will disable tracing every time do_fault is hit\n"
5542         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5543         "\t       The first time do trap is hit and it disables tracing, the\n"
5544         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5545         "\t       the counter will not decrement. It only decrements when the\n"
5546         "\t       trigger did work\n"
5547         "\t     To remove trigger without count:\n"
5548         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5549         "\t     To remove trigger with a count:\n"
5550         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5551         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5552         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5553         "\t    modules: Can select a group via module command :mod:\n"
5554         "\t    Does not accept triggers\n"
5555 #endif /* CONFIG_DYNAMIC_FTRACE */
5556 #ifdef CONFIG_FUNCTION_TRACER
5557         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5558         "\t\t    (function)\n"
5559         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5560         "\t\t    (function)\n"
5561 #endif
5562 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5563         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5564         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5565         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5566 #endif
5567 #ifdef CONFIG_TRACER_SNAPSHOT
5568         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5569         "\t\t\t  snapshot buffer. Read the contents for more\n"
5570         "\t\t\t  information\n"
5571 #endif
5572 #ifdef CONFIG_STACK_TRACER
5573         "  stack_trace\t\t- Shows the max stack trace when active\n"
5574         "  stack_max_size\t- Shows current max stack size that was traced\n"
5575         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5576         "\t\t\t  new trace)\n"
5577 #ifdef CONFIG_DYNAMIC_FTRACE
5578         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5579         "\t\t\t  traces\n"
5580 #endif
5581 #endif /* CONFIG_STACK_TRACER */
5582 #ifdef CONFIG_DYNAMIC_EVENTS
5583         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5584         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5585 #endif
5586 #ifdef CONFIG_KPROBE_EVENTS
5587         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5588         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5589 #endif
5590 #ifdef CONFIG_UPROBE_EVENTS
5591         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5592         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5593 #endif
5594 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5595         "\t  accepts: event-definitions (one definition per line)\n"
5596         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5597         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5598 #ifdef CONFIG_HIST_TRIGGERS
5599         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5600 #endif
5601         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]\n"
5602         "\t           -:[<group>/][<event>]\n"
5603 #ifdef CONFIG_KPROBE_EVENTS
5604         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5605   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5606 #endif
5607 #ifdef CONFIG_UPROBE_EVENTS
5608   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5609 #endif
5610         "\t     args: <name>=fetcharg[:type]\n"
5611         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5612 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5613         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5614 #else
5615         "\t           $stack<index>, $stack, $retval, $comm,\n"
5616 #endif
5617         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5618         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5619         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5620         "\t           <type>\\[<array-size>\\]\n"
5621 #ifdef CONFIG_HIST_TRIGGERS
5622         "\t    field: <stype> <name>;\n"
5623         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5624         "\t           [unsigned] char/int/long\n"
5625 #endif
5626         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5627         "\t            of the <attached-group>/<attached-event>.\n"
5628 #endif
5629         "  events/\t\t- Directory containing all trace event subsystems:\n"
5630         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5631         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5632         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5633         "\t\t\t  events\n"
5634         "      filter\t\t- If set, only events passing filter are traced\n"
5635         "  events/<system>/<event>/\t- Directory containing control files for\n"
5636         "\t\t\t  <event>:\n"
5637         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5638         "      filter\t\t- If set, only events passing filter are traced\n"
5639         "      trigger\t\t- If set, a command to perform when event is hit\n"
5640         "\t    Format: <trigger>[:count][if <filter>]\n"
5641         "\t   trigger: traceon, traceoff\n"
5642         "\t            enable_event:<system>:<event>\n"
5643         "\t            disable_event:<system>:<event>\n"
5644 #ifdef CONFIG_HIST_TRIGGERS
5645         "\t            enable_hist:<system>:<event>\n"
5646         "\t            disable_hist:<system>:<event>\n"
5647 #endif
5648 #ifdef CONFIG_STACKTRACE
5649         "\t\t    stacktrace\n"
5650 #endif
5651 #ifdef CONFIG_TRACER_SNAPSHOT
5652         "\t\t    snapshot\n"
5653 #endif
5654 #ifdef CONFIG_HIST_TRIGGERS
5655         "\t\t    hist (see below)\n"
5656 #endif
5657         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5658         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5659         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5660         "\t                  events/block/block_unplug/trigger\n"
5661         "\t   The first disables tracing every time block_unplug is hit.\n"
5662         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5663         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5664         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5665         "\t   Like function triggers, the counter is only decremented if it\n"
5666         "\t    enabled or disabled tracing.\n"
5667         "\t   To remove a trigger without a count:\n"
5668         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5669         "\t   To remove a trigger with a count:\n"
5670         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5671         "\t   Filters can be ignored when removing a trigger.\n"
5672 #ifdef CONFIG_HIST_TRIGGERS
5673         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5674         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5675         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5676         "\t            [:values=<field1[,field2,...]>]\n"
5677         "\t            [:sort=<field1[,field2,...]>]\n"
5678         "\t            [:size=#entries]\n"
5679         "\t            [:pause][:continue][:clear]\n"
5680         "\t            [:name=histname1]\n"
5681         "\t            [:<handler>.<action>]\n"
5682         "\t            [if <filter>]\n\n"
5683         "\t    Note, special fields can be used as well:\n"
5684         "\t            common_timestamp - to record current timestamp\n"
5685         "\t            common_cpu - to record the CPU the event happened on\n"
5686         "\n"
5687         "\t    A hist trigger variable can be:\n"
5688         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5689         "\t        - a reference to another variable e.g. y=$x,\n"
5690         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5691         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5692         "\n"
5693         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5694         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5695         "\t    variable reference, field or numeric literal.\n"
5696         "\n"
5697         "\t    When a matching event is hit, an entry is added to a hash\n"
5698         "\t    table using the key(s) and value(s) named, and the value of a\n"
5699         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5700         "\t    correspond to fields in the event's format description.  Keys\n"
5701         "\t    can be any field, or the special string 'stacktrace'.\n"
5702         "\t    Compound keys consisting of up to two fields can be specified\n"
5703         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5704         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5705         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5706         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5707         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5708         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5709         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5710         "\t    its histogram data will be shared with other triggers of the\n"
5711         "\t    same name, and trigger hits will update this common data.\n\n"
5712         "\t    Reading the 'hist' file for the event will dump the hash\n"
5713         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5714         "\t    triggers attached to an event, there will be a table for each\n"
5715         "\t    trigger in the output.  The table displayed for a named\n"
5716         "\t    trigger will be the same as any other instance having the\n"
5717         "\t    same name.  The default format used to display a given field\n"
5718         "\t    can be modified by appending any of the following modifiers\n"
5719         "\t    to the field name, as applicable:\n\n"
5720         "\t            .hex        display a number as a hex value\n"
5721         "\t            .sym        display an address as a symbol\n"
5722         "\t            .sym-offset display an address as a symbol and offset\n"
5723         "\t            .execname   display a common_pid as a program name\n"
5724         "\t            .syscall    display a syscall id as a syscall name\n"
5725         "\t            .log2       display log2 value rather than raw number\n"
5726         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5727         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5728         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5729         "\t    trigger or to start a hist trigger but not log any events\n"
5730         "\t    until told to do so.  'continue' can be used to start or\n"
5731         "\t    restart a paused hist trigger.\n\n"
5732         "\t    The 'clear' parameter will clear the contents of a running\n"
5733         "\t    hist trigger and leave its current paused/active state\n"
5734         "\t    unchanged.\n\n"
5735         "\t    The enable_hist and disable_hist triggers can be used to\n"
5736         "\t    have one event conditionally start and stop another event's\n"
5737         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5738         "\t    the enable_event and disable_event triggers.\n\n"
5739         "\t    Hist trigger handlers and actions are executed whenever a\n"
5740         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5741         "\t        <handler>.<action>\n\n"
5742         "\t    The available handlers are:\n\n"
5743         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5744         "\t        onmax(var)               - invoke if var exceeds current max\n"
5745         "\t        onchange(var)            - invoke action if var changes\n\n"
5746         "\t    The available actions are:\n\n"
5747         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5748         "\t        save(field,...)                      - save current event fields\n"
5749 #ifdef CONFIG_TRACER_SNAPSHOT
5750         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5751 #endif
5752 #ifdef CONFIG_SYNTH_EVENTS
5753         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5754         "\t  Write into this file to define/undefine new synthetic events.\n"
5755         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5756 #endif
5757 #endif
5758 ;
5759
5760 static ssize_t
5761 tracing_readme_read(struct file *filp, char __user *ubuf,
5762                        size_t cnt, loff_t *ppos)
5763 {
5764         return simple_read_from_buffer(ubuf, cnt, ppos,
5765                                         readme_msg, strlen(readme_msg));
5766 }
5767
5768 static const struct file_operations tracing_readme_fops = {
5769         .open           = tracing_open_generic,
5770         .read           = tracing_readme_read,
5771         .llseek         = generic_file_llseek,
5772 };
5773
5774 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5775 {
5776         int pid = ++(*pos);
5777
5778         return trace_find_tgid_ptr(pid);
5779 }
5780
5781 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5782 {
5783         int pid = *pos;
5784
5785         return trace_find_tgid_ptr(pid);
5786 }
5787
5788 static void saved_tgids_stop(struct seq_file *m, void *v)
5789 {
5790 }
5791
5792 static int saved_tgids_show(struct seq_file *m, void *v)
5793 {
5794         int *entry = (int *)v;
5795         int pid = entry - tgid_map;
5796         int tgid = *entry;
5797
5798         if (tgid == 0)
5799                 return SEQ_SKIP;
5800
5801         seq_printf(m, "%d %d\n", pid, tgid);
5802         return 0;
5803 }
5804
5805 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5806         .start          = saved_tgids_start,
5807         .stop           = saved_tgids_stop,
5808         .next           = saved_tgids_next,
5809         .show           = saved_tgids_show,
5810 };
5811
5812 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5813 {
5814         int ret;
5815
5816         ret = tracing_check_open_get_tr(NULL);
5817         if (ret)
5818                 return ret;
5819
5820         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5821 }
5822
5823
5824 static const struct file_operations tracing_saved_tgids_fops = {
5825         .open           = tracing_saved_tgids_open,
5826         .read           = seq_read,
5827         .llseek         = seq_lseek,
5828         .release        = seq_release,
5829 };
5830
5831 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5832 {
5833         unsigned int *ptr = v;
5834
5835         if (*pos || m->count)
5836                 ptr++;
5837
5838         (*pos)++;
5839
5840         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5841              ptr++) {
5842                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5843                         continue;
5844
5845                 return ptr;
5846         }
5847
5848         return NULL;
5849 }
5850
5851 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5852 {
5853         void *v;
5854         loff_t l = 0;
5855
5856         preempt_disable();
5857         arch_spin_lock(&trace_cmdline_lock);
5858
5859         v = &savedcmd->map_cmdline_to_pid[0];
5860         while (l <= *pos) {
5861                 v = saved_cmdlines_next(m, v, &l);
5862                 if (!v)
5863                         return NULL;
5864         }
5865
5866         return v;
5867 }
5868
5869 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5870 {
5871         arch_spin_unlock(&trace_cmdline_lock);
5872         preempt_enable();
5873 }
5874
5875 static int saved_cmdlines_show(struct seq_file *m, void *v)
5876 {
5877         char buf[TASK_COMM_LEN];
5878         unsigned int *pid = v;
5879
5880         __trace_find_cmdline(*pid, buf);
5881         seq_printf(m, "%d %s\n", *pid, buf);
5882         return 0;
5883 }
5884
5885 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5886         .start          = saved_cmdlines_start,
5887         .next           = saved_cmdlines_next,
5888         .stop           = saved_cmdlines_stop,
5889         .show           = saved_cmdlines_show,
5890 };
5891
5892 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5893 {
5894         int ret;
5895
5896         ret = tracing_check_open_get_tr(NULL);
5897         if (ret)
5898                 return ret;
5899
5900         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5901 }
5902
5903 static const struct file_operations tracing_saved_cmdlines_fops = {
5904         .open           = tracing_saved_cmdlines_open,
5905         .read           = seq_read,
5906         .llseek         = seq_lseek,
5907         .release        = seq_release,
5908 };
5909
5910 static ssize_t
5911 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5912                                  size_t cnt, loff_t *ppos)
5913 {
5914         char buf[64];
5915         int r;
5916
5917         preempt_disable();
5918         arch_spin_lock(&trace_cmdline_lock);
5919         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5920         arch_spin_unlock(&trace_cmdline_lock);
5921         preempt_enable();
5922
5923         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5924 }
5925
5926 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5927 {
5928         kfree(s->saved_cmdlines);
5929         kfree(s->map_cmdline_to_pid);
5930         kfree(s);
5931 }
5932
5933 static int tracing_resize_saved_cmdlines(unsigned int val)
5934 {
5935         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5936
5937         s = kmalloc(sizeof(*s), GFP_KERNEL);
5938         if (!s)
5939                 return -ENOMEM;
5940
5941         if (allocate_cmdlines_buffer(val, s) < 0) {
5942                 kfree(s);
5943                 return -ENOMEM;
5944         }
5945
5946         preempt_disable();
5947         arch_spin_lock(&trace_cmdline_lock);
5948         savedcmd_temp = savedcmd;
5949         savedcmd = s;
5950         arch_spin_unlock(&trace_cmdline_lock);
5951         preempt_enable();
5952         free_saved_cmdlines_buffer(savedcmd_temp);
5953
5954         return 0;
5955 }
5956
5957 static ssize_t
5958 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5959                                   size_t cnt, loff_t *ppos)
5960 {
5961         unsigned long val;
5962         int ret;
5963
5964         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5965         if (ret)
5966                 return ret;
5967
5968         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5969         if (!val || val > PID_MAX_DEFAULT)
5970                 return -EINVAL;
5971
5972         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5973         if (ret < 0)
5974                 return ret;
5975
5976         *ppos += cnt;
5977
5978         return cnt;
5979 }
5980
5981 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5982         .open           = tracing_open_generic,
5983         .read           = tracing_saved_cmdlines_size_read,
5984         .write          = tracing_saved_cmdlines_size_write,
5985 };
5986
5987 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5988 static union trace_eval_map_item *
5989 update_eval_map(union trace_eval_map_item *ptr)
5990 {
5991         if (!ptr->map.eval_string) {
5992                 if (ptr->tail.next) {
5993                         ptr = ptr->tail.next;
5994                         /* Set ptr to the next real item (skip head) */
5995                         ptr++;
5996                 } else
5997                         return NULL;
5998         }
5999         return ptr;
6000 }
6001
6002 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6003 {
6004         union trace_eval_map_item *ptr = v;
6005
6006         /*
6007          * Paranoid! If ptr points to end, we don't want to increment past it.
6008          * This really should never happen.
6009          */
6010         (*pos)++;
6011         ptr = update_eval_map(ptr);
6012         if (WARN_ON_ONCE(!ptr))
6013                 return NULL;
6014
6015         ptr++;
6016         ptr = update_eval_map(ptr);
6017
6018         return ptr;
6019 }
6020
6021 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6022 {
6023         union trace_eval_map_item *v;
6024         loff_t l = 0;
6025
6026         mutex_lock(&trace_eval_mutex);
6027
6028         v = trace_eval_maps;
6029         if (v)
6030                 v++;
6031
6032         while (v && l < *pos) {
6033                 v = eval_map_next(m, v, &l);
6034         }
6035
6036         return v;
6037 }
6038
6039 static void eval_map_stop(struct seq_file *m, void *v)
6040 {
6041         mutex_unlock(&trace_eval_mutex);
6042 }
6043
6044 static int eval_map_show(struct seq_file *m, void *v)
6045 {
6046         union trace_eval_map_item *ptr = v;
6047
6048         seq_printf(m, "%s %ld (%s)\n",
6049                    ptr->map.eval_string, ptr->map.eval_value,
6050                    ptr->map.system);
6051
6052         return 0;
6053 }
6054
6055 static const struct seq_operations tracing_eval_map_seq_ops = {
6056         .start          = eval_map_start,
6057         .next           = eval_map_next,
6058         .stop           = eval_map_stop,
6059         .show           = eval_map_show,
6060 };
6061
6062 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6063 {
6064         int ret;
6065
6066         ret = tracing_check_open_get_tr(NULL);
6067         if (ret)
6068                 return ret;
6069
6070         return seq_open(filp, &tracing_eval_map_seq_ops);
6071 }
6072
6073 static const struct file_operations tracing_eval_map_fops = {
6074         .open           = tracing_eval_map_open,
6075         .read           = seq_read,
6076         .llseek         = seq_lseek,
6077         .release        = seq_release,
6078 };
6079
6080 static inline union trace_eval_map_item *
6081 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6082 {
6083         /* Return tail of array given the head */
6084         return ptr + ptr->head.length + 1;
6085 }
6086
6087 static void
6088 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6089                            int len)
6090 {
6091         struct trace_eval_map **stop;
6092         struct trace_eval_map **map;
6093         union trace_eval_map_item *map_array;
6094         union trace_eval_map_item *ptr;
6095
6096         stop = start + len;
6097
6098         /*
6099          * The trace_eval_maps contains the map plus a head and tail item,
6100          * where the head holds the module and length of array, and the
6101          * tail holds a pointer to the next list.
6102          */
6103         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6104         if (!map_array) {
6105                 pr_warn("Unable to allocate trace eval mapping\n");
6106                 return;
6107         }
6108
6109         mutex_lock(&trace_eval_mutex);
6110
6111         if (!trace_eval_maps)
6112                 trace_eval_maps = map_array;
6113         else {
6114                 ptr = trace_eval_maps;
6115                 for (;;) {
6116                         ptr = trace_eval_jmp_to_tail(ptr);
6117                         if (!ptr->tail.next)
6118                                 break;
6119                         ptr = ptr->tail.next;
6120
6121                 }
6122                 ptr->tail.next = map_array;
6123         }
6124         map_array->head.mod = mod;
6125         map_array->head.length = len;
6126         map_array++;
6127
6128         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6129                 map_array->map = **map;
6130                 map_array++;
6131         }
6132         memset(map_array, 0, sizeof(*map_array));
6133
6134         mutex_unlock(&trace_eval_mutex);
6135 }
6136
6137 static void trace_create_eval_file(struct dentry *d_tracer)
6138 {
6139         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6140                           NULL, &tracing_eval_map_fops);
6141 }
6142
6143 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6144 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6145 static inline void trace_insert_eval_map_file(struct module *mod,
6146                               struct trace_eval_map **start, int len) { }
6147 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6148
6149 static void trace_insert_eval_map(struct module *mod,
6150                                   struct trace_eval_map **start, int len)
6151 {
6152         struct trace_eval_map **map;
6153
6154         if (len <= 0)
6155                 return;
6156
6157         map = start;
6158
6159         trace_event_eval_update(map, len);
6160
6161         trace_insert_eval_map_file(mod, start, len);
6162 }
6163
6164 static ssize_t
6165 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6166                        size_t cnt, loff_t *ppos)
6167 {
6168         struct trace_array *tr = filp->private_data;
6169         char buf[MAX_TRACER_SIZE+2];
6170         int r;
6171
6172         mutex_lock(&trace_types_lock);
6173         r = sprintf(buf, "%s\n", tr->current_trace->name);
6174         mutex_unlock(&trace_types_lock);
6175
6176         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6177 }
6178
6179 int tracer_init(struct tracer *t, struct trace_array *tr)
6180 {
6181         tracing_reset_online_cpus(&tr->array_buffer);
6182         return t->init(tr);
6183 }
6184
6185 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6186 {
6187         int cpu;
6188
6189         for_each_tracing_cpu(cpu)
6190                 per_cpu_ptr(buf->data, cpu)->entries = val;
6191 }
6192
6193 #ifdef CONFIG_TRACER_MAX_TRACE
6194 /* resize @tr's buffer to the size of @size_tr's entries */
6195 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6196                                         struct array_buffer *size_buf, int cpu_id)
6197 {
6198         int cpu, ret = 0;
6199
6200         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6201                 for_each_tracing_cpu(cpu) {
6202                         ret = ring_buffer_resize(trace_buf->buffer,
6203                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6204                         if (ret < 0)
6205                                 break;
6206                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6207                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6208                 }
6209         } else {
6210                 ret = ring_buffer_resize(trace_buf->buffer,
6211                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6212                 if (ret == 0)
6213                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6214                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6215         }
6216
6217         return ret;
6218 }
6219 #endif /* CONFIG_TRACER_MAX_TRACE */
6220
6221 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6222                                         unsigned long size, int cpu)
6223 {
6224         int ret;
6225
6226         /*
6227          * If kernel or user changes the size of the ring buffer
6228          * we use the size that was given, and we can forget about
6229          * expanding it later.
6230          */
6231         ring_buffer_expanded = true;
6232
6233         /* May be called before buffers are initialized */
6234         if (!tr->array_buffer.buffer)
6235                 return 0;
6236
6237         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6238         if (ret < 0)
6239                 return ret;
6240
6241 #ifdef CONFIG_TRACER_MAX_TRACE
6242         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6243             !tr->current_trace->use_max_tr)
6244                 goto out;
6245
6246         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6247         if (ret < 0) {
6248                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6249                                                      &tr->array_buffer, cpu);
6250                 if (r < 0) {
6251                         /*
6252                          * AARGH! We are left with different
6253                          * size max buffer!!!!
6254                          * The max buffer is our "snapshot" buffer.
6255                          * When a tracer needs a snapshot (one of the
6256                          * latency tracers), it swaps the max buffer
6257                          * with the saved snap shot. We succeeded to
6258                          * update the size of the main buffer, but failed to
6259                          * update the size of the max buffer. But when we tried
6260                          * to reset the main buffer to the original size, we
6261                          * failed there too. This is very unlikely to
6262                          * happen, but if it does, warn and kill all
6263                          * tracing.
6264                          */
6265                         WARN_ON(1);
6266                         tracing_disabled = 1;
6267                 }
6268                 return ret;
6269         }
6270
6271         if (cpu == RING_BUFFER_ALL_CPUS)
6272                 set_buffer_entries(&tr->max_buffer, size);
6273         else
6274                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6275
6276  out:
6277 #endif /* CONFIG_TRACER_MAX_TRACE */
6278
6279         if (cpu == RING_BUFFER_ALL_CPUS)
6280                 set_buffer_entries(&tr->array_buffer, size);
6281         else
6282                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6283
6284         return ret;
6285 }
6286
6287 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6288                                   unsigned long size, int cpu_id)
6289 {
6290         int ret;
6291
6292         mutex_lock(&trace_types_lock);
6293
6294         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6295                 /* make sure, this cpu is enabled in the mask */
6296                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6297                         ret = -EINVAL;
6298                         goto out;
6299                 }
6300         }
6301
6302         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6303         if (ret < 0)
6304                 ret = -ENOMEM;
6305
6306 out:
6307         mutex_unlock(&trace_types_lock);
6308
6309         return ret;
6310 }
6311
6312
6313 /**
6314  * tracing_update_buffers - used by tracing facility to expand ring buffers
6315  *
6316  * To save on memory when the tracing is never used on a system with it
6317  * configured in. The ring buffers are set to a minimum size. But once
6318  * a user starts to use the tracing facility, then they need to grow
6319  * to their default size.
6320  *
6321  * This function is to be called when a tracer is about to be used.
6322  */
6323 int tracing_update_buffers(void)
6324 {
6325         int ret = 0;
6326
6327         mutex_lock(&trace_types_lock);
6328         if (!ring_buffer_expanded)
6329                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6330                                                 RING_BUFFER_ALL_CPUS);
6331         mutex_unlock(&trace_types_lock);
6332
6333         return ret;
6334 }
6335
6336 struct trace_option_dentry;
6337
6338 static void
6339 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6340
6341 /*
6342  * Used to clear out the tracer before deletion of an instance.
6343  * Must have trace_types_lock held.
6344  */
6345 static void tracing_set_nop(struct trace_array *tr)
6346 {
6347         if (tr->current_trace == &nop_trace)
6348                 return;
6349         
6350         tr->current_trace->enabled--;
6351
6352         if (tr->current_trace->reset)
6353                 tr->current_trace->reset(tr);
6354
6355         tr->current_trace = &nop_trace;
6356 }
6357
6358 static bool tracer_options_updated;
6359
6360 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6361 {
6362         /* Only enable if the directory has been created already. */
6363         if (!tr->dir)
6364                 return;
6365
6366         /* Only create trace option files after update_tracer_options finish */
6367         if (!tracer_options_updated)
6368                 return;
6369
6370         create_trace_option_files(tr, t);
6371 }
6372
6373 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6374 {
6375         struct tracer *t;
6376 #ifdef CONFIG_TRACER_MAX_TRACE
6377         bool had_max_tr;
6378 #endif
6379         int ret = 0;
6380
6381         mutex_lock(&trace_types_lock);
6382
6383         if (!ring_buffer_expanded) {
6384                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6385                                                 RING_BUFFER_ALL_CPUS);
6386                 if (ret < 0)
6387                         goto out;
6388                 ret = 0;
6389         }
6390
6391         for (t = trace_types; t; t = t->next) {
6392                 if (strcmp(t->name, buf) == 0)
6393                         break;
6394         }
6395         if (!t) {
6396                 ret = -EINVAL;
6397                 goto out;
6398         }
6399         if (t == tr->current_trace)
6400                 goto out;
6401
6402 #ifdef CONFIG_TRACER_SNAPSHOT
6403         if (t->use_max_tr) {
6404                 local_irq_disable();
6405                 arch_spin_lock(&tr->max_lock);
6406                 if (tr->cond_snapshot)
6407                         ret = -EBUSY;
6408                 arch_spin_unlock(&tr->max_lock);
6409                 local_irq_enable();
6410                 if (ret)
6411                         goto out;
6412         }
6413 #endif
6414         /* Some tracers won't work on kernel command line */
6415         if (system_state < SYSTEM_RUNNING && t->noboot) {
6416                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6417                         t->name);
6418                 goto out;
6419         }
6420
6421         /* Some tracers are only allowed for the top level buffer */
6422         if (!trace_ok_for_array(t, tr)) {
6423                 ret = -EINVAL;
6424                 goto out;
6425         }
6426
6427         /* If trace pipe files are being read, we can't change the tracer */
6428         if (tr->trace_ref) {
6429                 ret = -EBUSY;
6430                 goto out;
6431         }
6432
6433         trace_branch_disable();
6434
6435         tr->current_trace->enabled--;
6436
6437         if (tr->current_trace->reset)
6438                 tr->current_trace->reset(tr);
6439
6440 #ifdef CONFIG_TRACER_MAX_TRACE
6441         had_max_tr = tr->current_trace->use_max_tr;
6442
6443         /* Current trace needs to be nop_trace before synchronize_rcu */
6444         tr->current_trace = &nop_trace;
6445
6446         if (had_max_tr && !t->use_max_tr) {
6447                 /*
6448                  * We need to make sure that the update_max_tr sees that
6449                  * current_trace changed to nop_trace to keep it from
6450                  * swapping the buffers after we resize it.
6451                  * The update_max_tr is called from interrupts disabled
6452                  * so a synchronized_sched() is sufficient.
6453                  */
6454                 synchronize_rcu();
6455                 free_snapshot(tr);
6456         }
6457
6458         if (t->use_max_tr && !tr->allocated_snapshot) {
6459                 ret = tracing_alloc_snapshot_instance(tr);
6460                 if (ret < 0)
6461                         goto out;
6462         }
6463 #else
6464         tr->current_trace = &nop_trace;
6465 #endif
6466
6467         if (t->init) {
6468                 ret = tracer_init(t, tr);
6469                 if (ret)
6470                         goto out;
6471         }
6472
6473         tr->current_trace = t;
6474         tr->current_trace->enabled++;
6475         trace_branch_enable(tr);
6476  out:
6477         mutex_unlock(&trace_types_lock);
6478
6479         return ret;
6480 }
6481
6482 static ssize_t
6483 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6484                         size_t cnt, loff_t *ppos)
6485 {
6486         struct trace_array *tr = filp->private_data;
6487         char buf[MAX_TRACER_SIZE+1];
6488         char *name;
6489         size_t ret;
6490         int err;
6491
6492         ret = cnt;
6493
6494         if (cnt > MAX_TRACER_SIZE)
6495                 cnt = MAX_TRACER_SIZE;
6496
6497         if (copy_from_user(buf, ubuf, cnt))
6498                 return -EFAULT;
6499
6500         buf[cnt] = 0;
6501
6502         name = strim(buf);
6503
6504         err = tracing_set_tracer(tr, name);
6505         if (err)
6506                 return err;
6507
6508         *ppos += ret;
6509
6510         return ret;
6511 }
6512
6513 static ssize_t
6514 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6515                    size_t cnt, loff_t *ppos)
6516 {
6517         char buf[64];
6518         int r;
6519
6520         r = snprintf(buf, sizeof(buf), "%ld\n",
6521                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6522         if (r > sizeof(buf))
6523                 r = sizeof(buf);
6524         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6525 }
6526
6527 static ssize_t
6528 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6529                     size_t cnt, loff_t *ppos)
6530 {
6531         unsigned long val;
6532         int ret;
6533
6534         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6535         if (ret)
6536                 return ret;
6537
6538         *ptr = val * 1000;
6539
6540         return cnt;
6541 }
6542
6543 static ssize_t
6544 tracing_thresh_read(struct file *filp, char __user *ubuf,
6545                     size_t cnt, loff_t *ppos)
6546 {
6547         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6548 }
6549
6550 static ssize_t
6551 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6552                      size_t cnt, loff_t *ppos)
6553 {
6554         struct trace_array *tr = filp->private_data;
6555         int ret;
6556
6557         mutex_lock(&trace_types_lock);
6558         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6559         if (ret < 0)
6560                 goto out;
6561
6562         if (tr->current_trace->update_thresh) {
6563                 ret = tr->current_trace->update_thresh(tr);
6564                 if (ret < 0)
6565                         goto out;
6566         }
6567
6568         ret = cnt;
6569 out:
6570         mutex_unlock(&trace_types_lock);
6571
6572         return ret;
6573 }
6574
6575 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6576
6577 static ssize_t
6578 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6579                      size_t cnt, loff_t *ppos)
6580 {
6581         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6582 }
6583
6584 static ssize_t
6585 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6586                       size_t cnt, loff_t *ppos)
6587 {
6588         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6589 }
6590
6591 #endif
6592
6593 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6594 {
6595         struct trace_array *tr = inode->i_private;
6596         struct trace_iterator *iter;
6597         int ret;
6598
6599         ret = tracing_check_open_get_tr(tr);
6600         if (ret)
6601                 return ret;
6602
6603         mutex_lock(&trace_types_lock);
6604
6605         /* create a buffer to store the information to pass to userspace */
6606         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6607         if (!iter) {
6608                 ret = -ENOMEM;
6609                 __trace_array_put(tr);
6610                 goto out;
6611         }
6612
6613         trace_seq_init(&iter->seq);
6614         iter->trace = tr->current_trace;
6615
6616         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6617                 ret = -ENOMEM;
6618                 goto fail;
6619         }
6620
6621         /* trace pipe does not show start of buffer */
6622         cpumask_setall(iter->started);
6623
6624         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6625                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6626
6627         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6628         if (trace_clocks[tr->clock_id].in_ns)
6629                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6630
6631         iter->tr = tr;
6632         iter->array_buffer = &tr->array_buffer;
6633         iter->cpu_file = tracing_get_cpu(inode);
6634         mutex_init(&iter->mutex);
6635         filp->private_data = iter;
6636
6637         if (iter->trace->pipe_open)
6638                 iter->trace->pipe_open(iter);
6639
6640         nonseekable_open(inode, filp);
6641
6642         tr->trace_ref++;
6643 out:
6644         mutex_unlock(&trace_types_lock);
6645         return ret;
6646
6647 fail:
6648         kfree(iter);
6649         __trace_array_put(tr);
6650         mutex_unlock(&trace_types_lock);
6651         return ret;
6652 }
6653
6654 static int tracing_release_pipe(struct inode *inode, struct file *file)
6655 {
6656         struct trace_iterator *iter = file->private_data;
6657         struct trace_array *tr = inode->i_private;
6658
6659         mutex_lock(&trace_types_lock);
6660
6661         tr->trace_ref--;
6662
6663         if (iter->trace->pipe_close)
6664                 iter->trace->pipe_close(iter);
6665
6666         mutex_unlock(&trace_types_lock);
6667
6668         free_cpumask_var(iter->started);
6669         kfree(iter->fmt);
6670         mutex_destroy(&iter->mutex);
6671         kfree(iter);
6672
6673         trace_array_put(tr);
6674
6675         return 0;
6676 }
6677
6678 static __poll_t
6679 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6680 {
6681         struct trace_array *tr = iter->tr;
6682
6683         /* Iterators are static, they should be filled or empty */
6684         if (trace_buffer_iter(iter, iter->cpu_file))
6685                 return EPOLLIN | EPOLLRDNORM;
6686
6687         if (tr->trace_flags & TRACE_ITER_BLOCK)
6688                 /*
6689                  * Always select as readable when in blocking mode
6690                  */
6691                 return EPOLLIN | EPOLLRDNORM;
6692         else
6693                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6694                                              filp, poll_table, iter->tr->buffer_percent);
6695 }
6696
6697 static __poll_t
6698 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6699 {
6700         struct trace_iterator *iter = filp->private_data;
6701
6702         return trace_poll(iter, filp, poll_table);
6703 }
6704
6705 /* Must be called with iter->mutex held. */
6706 static int tracing_wait_pipe(struct file *filp)
6707 {
6708         struct trace_iterator *iter = filp->private_data;
6709         int ret;
6710
6711         while (trace_empty(iter)) {
6712
6713                 if ((filp->f_flags & O_NONBLOCK)) {
6714                         return -EAGAIN;
6715                 }
6716
6717                 /*
6718                  * We block until we read something and tracing is disabled.
6719                  * We still block if tracing is disabled, but we have never
6720                  * read anything. This allows a user to cat this file, and
6721                  * then enable tracing. But after we have read something,
6722                  * we give an EOF when tracing is again disabled.
6723                  *
6724                  * iter->pos will be 0 if we haven't read anything.
6725                  */
6726                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6727                         break;
6728
6729                 mutex_unlock(&iter->mutex);
6730
6731                 ret = wait_on_pipe(iter, 0);
6732
6733                 mutex_lock(&iter->mutex);
6734
6735                 if (ret)
6736                         return ret;
6737         }
6738
6739         return 1;
6740 }
6741
6742 /*
6743  * Consumer reader.
6744  */
6745 static ssize_t
6746 tracing_read_pipe(struct file *filp, char __user *ubuf,
6747                   size_t cnt, loff_t *ppos)
6748 {
6749         struct trace_iterator *iter = filp->private_data;
6750         ssize_t sret;
6751
6752         /*
6753          * Avoid more than one consumer on a single file descriptor
6754          * This is just a matter of traces coherency, the ring buffer itself
6755          * is protected.
6756          */
6757         mutex_lock(&iter->mutex);
6758
6759         /* return any leftover data */
6760         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6761         if (sret != -EBUSY)
6762                 goto out;
6763
6764         trace_seq_init(&iter->seq);
6765
6766         if (iter->trace->read) {
6767                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6768                 if (sret)
6769                         goto out;
6770         }
6771
6772 waitagain:
6773         sret = tracing_wait_pipe(filp);
6774         if (sret <= 0)
6775                 goto out;
6776
6777         /* stop when tracing is finished */
6778         if (trace_empty(iter)) {
6779                 sret = 0;
6780                 goto out;
6781         }
6782
6783         if (cnt >= PAGE_SIZE)
6784                 cnt = PAGE_SIZE - 1;
6785
6786         /* reset all but tr, trace, and overruns */
6787         trace_iterator_reset(iter);
6788         cpumask_clear(iter->started);
6789         trace_seq_init(&iter->seq);
6790
6791         trace_event_read_lock();
6792         trace_access_lock(iter->cpu_file);
6793         while (trace_find_next_entry_inc(iter) != NULL) {
6794                 enum print_line_t ret;
6795                 int save_len = iter->seq.seq.len;
6796
6797                 ret = print_trace_line(iter);
6798                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6799                         /* don't print partial lines */
6800                         iter->seq.seq.len = save_len;
6801                         break;
6802                 }
6803                 if (ret != TRACE_TYPE_NO_CONSUME)
6804                         trace_consume(iter);
6805
6806                 if (trace_seq_used(&iter->seq) >= cnt)
6807                         break;
6808
6809                 /*
6810                  * Setting the full flag means we reached the trace_seq buffer
6811                  * size and we should leave by partial output condition above.
6812                  * One of the trace_seq_* functions is not used properly.
6813                  */
6814                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6815                           iter->ent->type);
6816         }
6817         trace_access_unlock(iter->cpu_file);
6818         trace_event_read_unlock();
6819
6820         /* Now copy what we have to the user */
6821         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6822         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6823                 trace_seq_init(&iter->seq);
6824
6825         /*
6826          * If there was nothing to send to user, in spite of consuming trace
6827          * entries, go back to wait for more entries.
6828          */
6829         if (sret == -EBUSY)
6830                 goto waitagain;
6831
6832 out:
6833         mutex_unlock(&iter->mutex);
6834
6835         return sret;
6836 }
6837
6838 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6839                                      unsigned int idx)
6840 {
6841         __free_page(spd->pages[idx]);
6842 }
6843
6844 static size_t
6845 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6846 {
6847         size_t count;
6848         int save_len;
6849         int ret;
6850
6851         /* Seq buffer is page-sized, exactly what we need. */
6852         for (;;) {
6853                 save_len = iter->seq.seq.len;
6854                 ret = print_trace_line(iter);
6855
6856                 if (trace_seq_has_overflowed(&iter->seq)) {
6857                         iter->seq.seq.len = save_len;
6858                         break;
6859                 }
6860
6861                 /*
6862                  * This should not be hit, because it should only
6863                  * be set if the iter->seq overflowed. But check it
6864                  * anyway to be safe.
6865                  */
6866                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6867                         iter->seq.seq.len = save_len;
6868                         break;
6869                 }
6870
6871                 count = trace_seq_used(&iter->seq) - save_len;
6872                 if (rem < count) {
6873                         rem = 0;
6874                         iter->seq.seq.len = save_len;
6875                         break;
6876                 }
6877
6878                 if (ret != TRACE_TYPE_NO_CONSUME)
6879                         trace_consume(iter);
6880                 rem -= count;
6881                 if (!trace_find_next_entry_inc(iter))   {
6882                         rem = 0;
6883                         iter->ent = NULL;
6884                         break;
6885                 }
6886         }
6887
6888         return rem;
6889 }
6890
6891 static ssize_t tracing_splice_read_pipe(struct file *filp,
6892                                         loff_t *ppos,
6893                                         struct pipe_inode_info *pipe,
6894                                         size_t len,
6895                                         unsigned int flags)
6896 {
6897         struct page *pages_def[PIPE_DEF_BUFFERS];
6898         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6899         struct trace_iterator *iter = filp->private_data;
6900         struct splice_pipe_desc spd = {
6901                 .pages          = pages_def,
6902                 .partial        = partial_def,
6903                 .nr_pages       = 0, /* This gets updated below. */
6904                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6905                 .ops            = &default_pipe_buf_ops,
6906                 .spd_release    = tracing_spd_release_pipe,
6907         };
6908         ssize_t ret;
6909         size_t rem;
6910         unsigned int i;
6911
6912         if (splice_grow_spd(pipe, &spd))
6913                 return -ENOMEM;
6914
6915         mutex_lock(&iter->mutex);
6916
6917         if (iter->trace->splice_read) {
6918                 ret = iter->trace->splice_read(iter, filp,
6919                                                ppos, pipe, len, flags);
6920                 if (ret)
6921                         goto out_err;
6922         }
6923
6924         ret = tracing_wait_pipe(filp);
6925         if (ret <= 0)
6926                 goto out_err;
6927
6928         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6929                 ret = -EFAULT;
6930                 goto out_err;
6931         }
6932
6933         trace_event_read_lock();
6934         trace_access_lock(iter->cpu_file);
6935
6936         /* Fill as many pages as possible. */
6937         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6938                 spd.pages[i] = alloc_page(GFP_KERNEL);
6939                 if (!spd.pages[i])
6940                         break;
6941
6942                 rem = tracing_fill_pipe_page(rem, iter);
6943
6944                 /* Copy the data into the page, so we can start over. */
6945                 ret = trace_seq_to_buffer(&iter->seq,
6946                                           page_address(spd.pages[i]),
6947                                           trace_seq_used(&iter->seq));
6948                 if (ret < 0) {
6949                         __free_page(spd.pages[i]);
6950                         break;
6951                 }
6952                 spd.partial[i].offset = 0;
6953                 spd.partial[i].len = trace_seq_used(&iter->seq);
6954
6955                 trace_seq_init(&iter->seq);
6956         }
6957
6958         trace_access_unlock(iter->cpu_file);
6959         trace_event_read_unlock();
6960         mutex_unlock(&iter->mutex);
6961
6962         spd.nr_pages = i;
6963
6964         if (i)
6965                 ret = splice_to_pipe(pipe, &spd);
6966         else
6967                 ret = 0;
6968 out:
6969         splice_shrink_spd(&spd);
6970         return ret;
6971
6972 out_err:
6973         mutex_unlock(&iter->mutex);
6974         goto out;
6975 }
6976
6977 static ssize_t
6978 tracing_entries_read(struct file *filp, char __user *ubuf,
6979                      size_t cnt, loff_t *ppos)
6980 {
6981         struct inode *inode = file_inode(filp);
6982         struct trace_array *tr = inode->i_private;
6983         int cpu = tracing_get_cpu(inode);
6984         char buf[64];
6985         int r = 0;
6986         ssize_t ret;
6987
6988         mutex_lock(&trace_types_lock);
6989
6990         if (cpu == RING_BUFFER_ALL_CPUS) {
6991                 int cpu, buf_size_same;
6992                 unsigned long size;
6993
6994                 size = 0;
6995                 buf_size_same = 1;
6996                 /* check if all cpu sizes are same */
6997                 for_each_tracing_cpu(cpu) {
6998                         /* fill in the size from first enabled cpu */
6999                         if (size == 0)
7000                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7001                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7002                                 buf_size_same = 0;
7003                                 break;
7004                         }
7005                 }
7006
7007                 if (buf_size_same) {
7008                         if (!ring_buffer_expanded)
7009                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7010                                             size >> 10,
7011                                             trace_buf_size >> 10);
7012                         else
7013                                 r = sprintf(buf, "%lu\n", size >> 10);
7014                 } else
7015                         r = sprintf(buf, "X\n");
7016         } else
7017                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7018
7019         mutex_unlock(&trace_types_lock);
7020
7021         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7022         return ret;
7023 }
7024
7025 static ssize_t
7026 tracing_entries_write(struct file *filp, const char __user *ubuf,
7027                       size_t cnt, loff_t *ppos)
7028 {
7029         struct inode *inode = file_inode(filp);
7030         struct trace_array *tr = inode->i_private;
7031         unsigned long val;
7032         int ret;
7033
7034         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7035         if (ret)
7036                 return ret;
7037
7038         /* must have at least 1 entry */
7039         if (!val)
7040                 return -EINVAL;
7041
7042         /* value is in KB */
7043         val <<= 10;
7044         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7045         if (ret < 0)
7046                 return ret;
7047
7048         *ppos += cnt;
7049
7050         return cnt;
7051 }
7052
7053 static ssize_t
7054 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7055                                 size_t cnt, loff_t *ppos)
7056 {
7057         struct trace_array *tr = filp->private_data;
7058         char buf[64];
7059         int r, cpu;
7060         unsigned long size = 0, expanded_size = 0;
7061
7062         mutex_lock(&trace_types_lock);
7063         for_each_tracing_cpu(cpu) {
7064                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7065                 if (!ring_buffer_expanded)
7066                         expanded_size += trace_buf_size >> 10;
7067         }
7068         if (ring_buffer_expanded)
7069                 r = sprintf(buf, "%lu\n", size);
7070         else
7071                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7072         mutex_unlock(&trace_types_lock);
7073
7074         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7075 }
7076
7077 static ssize_t
7078 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7079                           size_t cnt, loff_t *ppos)
7080 {
7081         /*
7082          * There is no need to read what the user has written, this function
7083          * is just to make sure that there is no error when "echo" is used
7084          */
7085
7086         *ppos += cnt;
7087
7088         return cnt;
7089 }
7090
7091 static int
7092 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7093 {
7094         struct trace_array *tr = inode->i_private;
7095
7096         /* disable tracing ? */
7097         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7098                 tracer_tracing_off(tr);
7099         /* resize the ring buffer to 0 */
7100         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7101
7102         trace_array_put(tr);
7103
7104         return 0;
7105 }
7106
7107 static ssize_t
7108 tracing_mark_write(struct file *filp, const char __user *ubuf,
7109                                         size_t cnt, loff_t *fpos)
7110 {
7111         struct trace_array *tr = filp->private_data;
7112         struct ring_buffer_event *event;
7113         enum event_trigger_type tt = ETT_NONE;
7114         struct trace_buffer *buffer;
7115         struct print_entry *entry;
7116         ssize_t written;
7117         int size;
7118         int len;
7119
7120 /* Used in tracing_mark_raw_write() as well */
7121 #define FAULTED_STR "<faulted>"
7122 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7123
7124         if (tracing_disabled)
7125                 return -EINVAL;
7126
7127         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7128                 return -EINVAL;
7129
7130         if (cnt > TRACE_BUF_SIZE)
7131                 cnt = TRACE_BUF_SIZE;
7132
7133         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7134
7135         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7136
7137         /* If less than "<faulted>", then make sure we can still add that */
7138         if (cnt < FAULTED_SIZE)
7139                 size += FAULTED_SIZE - cnt;
7140
7141         buffer = tr->array_buffer.buffer;
7142         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7143                                             tracing_gen_ctx());
7144         if (unlikely(!event))
7145                 /* Ring buffer disabled, return as if not open for write */
7146                 return -EBADF;
7147
7148         entry = ring_buffer_event_data(event);
7149         entry->ip = _THIS_IP_;
7150
7151         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7152         if (len) {
7153                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7154                 cnt = FAULTED_SIZE;
7155                 written = -EFAULT;
7156         } else
7157                 written = cnt;
7158
7159         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7160                 /* do not add \n before testing triggers, but add \0 */
7161                 entry->buf[cnt] = '\0';
7162                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7163         }
7164
7165         if (entry->buf[cnt - 1] != '\n') {
7166                 entry->buf[cnt] = '\n';
7167                 entry->buf[cnt + 1] = '\0';
7168         } else
7169                 entry->buf[cnt] = '\0';
7170
7171         if (static_branch_unlikely(&trace_marker_exports_enabled))
7172                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7173         __buffer_unlock_commit(buffer, event);
7174
7175         if (tt)
7176                 event_triggers_post_call(tr->trace_marker_file, tt);
7177
7178         return written;
7179 }
7180
7181 /* Limit it for now to 3K (including tag) */
7182 #define RAW_DATA_MAX_SIZE (1024*3)
7183
7184 static ssize_t
7185 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7186                                         size_t cnt, loff_t *fpos)
7187 {
7188         struct trace_array *tr = filp->private_data;
7189         struct ring_buffer_event *event;
7190         struct trace_buffer *buffer;
7191         struct raw_data_entry *entry;
7192         ssize_t written;
7193         int size;
7194         int len;
7195
7196 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7197
7198         if (tracing_disabled)
7199                 return -EINVAL;
7200
7201         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7202                 return -EINVAL;
7203
7204         /* The marker must at least have a tag id */
7205         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7206                 return -EINVAL;
7207
7208         if (cnt > TRACE_BUF_SIZE)
7209                 cnt = TRACE_BUF_SIZE;
7210
7211         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7212
7213         size = sizeof(*entry) + cnt;
7214         if (cnt < FAULT_SIZE_ID)
7215                 size += FAULT_SIZE_ID - cnt;
7216
7217         buffer = tr->array_buffer.buffer;
7218         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7219                                             tracing_gen_ctx());
7220         if (!event)
7221                 /* Ring buffer disabled, return as if not open for write */
7222                 return -EBADF;
7223
7224         entry = ring_buffer_event_data(event);
7225
7226         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7227         if (len) {
7228                 entry->id = -1;
7229                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7230                 written = -EFAULT;
7231         } else
7232                 written = cnt;
7233
7234         __buffer_unlock_commit(buffer, event);
7235
7236         return written;
7237 }
7238
7239 static int tracing_clock_show(struct seq_file *m, void *v)
7240 {
7241         struct trace_array *tr = m->private;
7242         int i;
7243
7244         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7245                 seq_printf(m,
7246                         "%s%s%s%s", i ? " " : "",
7247                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7248                         i == tr->clock_id ? "]" : "");
7249         seq_putc(m, '\n');
7250
7251         return 0;
7252 }
7253
7254 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7255 {
7256         int i;
7257
7258         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7259                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7260                         break;
7261         }
7262         if (i == ARRAY_SIZE(trace_clocks))
7263                 return -EINVAL;
7264
7265         mutex_lock(&trace_types_lock);
7266
7267         tr->clock_id = i;
7268
7269         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7270
7271         /*
7272          * New clock may not be consistent with the previous clock.
7273          * Reset the buffer so that it doesn't have incomparable timestamps.
7274          */
7275         tracing_reset_online_cpus(&tr->array_buffer);
7276
7277 #ifdef CONFIG_TRACER_MAX_TRACE
7278         if (tr->max_buffer.buffer)
7279                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7280         tracing_reset_online_cpus(&tr->max_buffer);
7281 #endif
7282
7283         mutex_unlock(&trace_types_lock);
7284
7285         return 0;
7286 }
7287
7288 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7289                                    size_t cnt, loff_t *fpos)
7290 {
7291         struct seq_file *m = filp->private_data;
7292         struct trace_array *tr = m->private;
7293         char buf[64];
7294         const char *clockstr;
7295         int ret;
7296
7297         if (cnt >= sizeof(buf))
7298                 return -EINVAL;
7299
7300         if (copy_from_user(buf, ubuf, cnt))
7301                 return -EFAULT;
7302
7303         buf[cnt] = 0;
7304
7305         clockstr = strstrip(buf);
7306
7307         ret = tracing_set_clock(tr, clockstr);
7308         if (ret)
7309                 return ret;
7310
7311         *fpos += cnt;
7312
7313         return cnt;
7314 }
7315
7316 static int tracing_clock_open(struct inode *inode, struct file *file)
7317 {
7318         struct trace_array *tr = inode->i_private;
7319         int ret;
7320
7321         ret = tracing_check_open_get_tr(tr);
7322         if (ret)
7323                 return ret;
7324
7325         ret = single_open(file, tracing_clock_show, inode->i_private);
7326         if (ret < 0)
7327                 trace_array_put(tr);
7328
7329         return ret;
7330 }
7331
7332 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7333 {
7334         struct trace_array *tr = m->private;
7335
7336         mutex_lock(&trace_types_lock);
7337
7338         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7339                 seq_puts(m, "delta [absolute]\n");
7340         else
7341                 seq_puts(m, "[delta] absolute\n");
7342
7343         mutex_unlock(&trace_types_lock);
7344
7345         return 0;
7346 }
7347
7348 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7349 {
7350         struct trace_array *tr = inode->i_private;
7351         int ret;
7352
7353         ret = tracing_check_open_get_tr(tr);
7354         if (ret)
7355                 return ret;
7356
7357         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7358         if (ret < 0)
7359                 trace_array_put(tr);
7360
7361         return ret;
7362 }
7363
7364 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7365 {
7366         if (rbe == this_cpu_read(trace_buffered_event))
7367                 return ring_buffer_time_stamp(buffer);
7368
7369         return ring_buffer_event_time_stamp(buffer, rbe);
7370 }
7371
7372 /*
7373  * Set or disable using the per CPU trace_buffer_event when possible.
7374  */
7375 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7376 {
7377         int ret = 0;
7378
7379         mutex_lock(&trace_types_lock);
7380
7381         if (set && tr->no_filter_buffering_ref++)
7382                 goto out;
7383
7384         if (!set) {
7385                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7386                         ret = -EINVAL;
7387                         goto out;
7388                 }
7389
7390                 --tr->no_filter_buffering_ref;
7391         }
7392  out:
7393         mutex_unlock(&trace_types_lock);
7394
7395         return ret;
7396 }
7397
7398 struct ftrace_buffer_info {
7399         struct trace_iterator   iter;
7400         void                    *spare;
7401         unsigned int            spare_cpu;
7402         unsigned int            read;
7403 };
7404
7405 #ifdef CONFIG_TRACER_SNAPSHOT
7406 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7407 {
7408         struct trace_array *tr = inode->i_private;
7409         struct trace_iterator *iter;
7410         struct seq_file *m;
7411         int ret;
7412
7413         ret = tracing_check_open_get_tr(tr);
7414         if (ret)
7415                 return ret;
7416
7417         if (file->f_mode & FMODE_READ) {
7418                 iter = __tracing_open(inode, file, true);
7419                 if (IS_ERR(iter))
7420                         ret = PTR_ERR(iter);
7421         } else {
7422                 /* Writes still need the seq_file to hold the private data */
7423                 ret = -ENOMEM;
7424                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7425                 if (!m)
7426                         goto out;
7427                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7428                 if (!iter) {
7429                         kfree(m);
7430                         goto out;
7431                 }
7432                 ret = 0;
7433
7434                 iter->tr = tr;
7435                 iter->array_buffer = &tr->max_buffer;
7436                 iter->cpu_file = tracing_get_cpu(inode);
7437                 m->private = iter;
7438                 file->private_data = m;
7439         }
7440 out:
7441         if (ret < 0)
7442                 trace_array_put(tr);
7443
7444         return ret;
7445 }
7446
7447 static ssize_t
7448 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7449                        loff_t *ppos)
7450 {
7451         struct seq_file *m = filp->private_data;
7452         struct trace_iterator *iter = m->private;
7453         struct trace_array *tr = iter->tr;
7454         unsigned long val;
7455         int ret;
7456
7457         ret = tracing_update_buffers();
7458         if (ret < 0)
7459                 return ret;
7460
7461         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7462         if (ret)
7463                 return ret;
7464
7465         mutex_lock(&trace_types_lock);
7466
7467         if (tr->current_trace->use_max_tr) {
7468                 ret = -EBUSY;
7469                 goto out;
7470         }
7471
7472         local_irq_disable();
7473         arch_spin_lock(&tr->max_lock);
7474         if (tr->cond_snapshot)
7475                 ret = -EBUSY;
7476         arch_spin_unlock(&tr->max_lock);
7477         local_irq_enable();
7478         if (ret)
7479                 goto out;
7480
7481         switch (val) {
7482         case 0:
7483                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7484                         ret = -EINVAL;
7485                         break;
7486                 }
7487                 if (tr->allocated_snapshot)
7488                         free_snapshot(tr);
7489                 break;
7490         case 1:
7491 /* Only allow per-cpu swap if the ring buffer supports it */
7492 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7493                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7494                         ret = -EINVAL;
7495                         break;
7496                 }
7497 #endif
7498                 if (tr->allocated_snapshot)
7499                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7500                                         &tr->array_buffer, iter->cpu_file);
7501                 else
7502                         ret = tracing_alloc_snapshot_instance(tr);
7503                 if (ret < 0)
7504                         break;
7505                 local_irq_disable();
7506                 /* Now, we're going to swap */
7507                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7508                         update_max_tr(tr, current, smp_processor_id(), NULL);
7509                 else
7510                         update_max_tr_single(tr, current, iter->cpu_file);
7511                 local_irq_enable();
7512                 break;
7513         default:
7514                 if (tr->allocated_snapshot) {
7515                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7516                                 tracing_reset_online_cpus(&tr->max_buffer);
7517                         else
7518                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7519                 }
7520                 break;
7521         }
7522
7523         if (ret >= 0) {
7524                 *ppos += cnt;
7525                 ret = cnt;
7526         }
7527 out:
7528         mutex_unlock(&trace_types_lock);
7529         return ret;
7530 }
7531
7532 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7533 {
7534         struct seq_file *m = file->private_data;
7535         int ret;
7536
7537         ret = tracing_release(inode, file);
7538
7539         if (file->f_mode & FMODE_READ)
7540                 return ret;
7541
7542         /* If write only, the seq_file is just a stub */
7543         if (m)
7544                 kfree(m->private);
7545         kfree(m);
7546
7547         return 0;
7548 }
7549
7550 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7551 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7552                                     size_t count, loff_t *ppos);
7553 static int tracing_buffers_release(struct inode *inode, struct file *file);
7554 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7555                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7556
7557 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7558 {
7559         struct ftrace_buffer_info *info;
7560         int ret;
7561
7562         /* The following checks for tracefs lockdown */
7563         ret = tracing_buffers_open(inode, filp);
7564         if (ret < 0)
7565                 return ret;
7566
7567         info = filp->private_data;
7568
7569         if (info->iter.trace->use_max_tr) {
7570                 tracing_buffers_release(inode, filp);
7571                 return -EBUSY;
7572         }
7573
7574         info->iter.snapshot = true;
7575         info->iter.array_buffer = &info->iter.tr->max_buffer;
7576
7577         return ret;
7578 }
7579
7580 #endif /* CONFIG_TRACER_SNAPSHOT */
7581
7582
7583 static const struct file_operations tracing_thresh_fops = {
7584         .open           = tracing_open_generic,
7585         .read           = tracing_thresh_read,
7586         .write          = tracing_thresh_write,
7587         .llseek         = generic_file_llseek,
7588 };
7589
7590 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7591 static const struct file_operations tracing_max_lat_fops = {
7592         .open           = tracing_open_generic,
7593         .read           = tracing_max_lat_read,
7594         .write          = tracing_max_lat_write,
7595         .llseek         = generic_file_llseek,
7596 };
7597 #endif
7598
7599 static const struct file_operations set_tracer_fops = {
7600         .open           = tracing_open_generic,
7601         .read           = tracing_set_trace_read,
7602         .write          = tracing_set_trace_write,
7603         .llseek         = generic_file_llseek,
7604 };
7605
7606 static const struct file_operations tracing_pipe_fops = {
7607         .open           = tracing_open_pipe,
7608         .poll           = tracing_poll_pipe,
7609         .read           = tracing_read_pipe,
7610         .splice_read    = tracing_splice_read_pipe,
7611         .release        = tracing_release_pipe,
7612         .llseek         = no_llseek,
7613 };
7614
7615 static const struct file_operations tracing_entries_fops = {
7616         .open           = tracing_open_generic_tr,
7617         .read           = tracing_entries_read,
7618         .write          = tracing_entries_write,
7619         .llseek         = generic_file_llseek,
7620         .release        = tracing_release_generic_tr,
7621 };
7622
7623 static const struct file_operations tracing_total_entries_fops = {
7624         .open           = tracing_open_generic_tr,
7625         .read           = tracing_total_entries_read,
7626         .llseek         = generic_file_llseek,
7627         .release        = tracing_release_generic_tr,
7628 };
7629
7630 static const struct file_operations tracing_free_buffer_fops = {
7631         .open           = tracing_open_generic_tr,
7632         .write          = tracing_free_buffer_write,
7633         .release        = tracing_free_buffer_release,
7634 };
7635
7636 static const struct file_operations tracing_mark_fops = {
7637         .open           = tracing_mark_open,
7638         .write          = tracing_mark_write,
7639         .release        = tracing_release_generic_tr,
7640 };
7641
7642 static const struct file_operations tracing_mark_raw_fops = {
7643         .open           = tracing_mark_open,
7644         .write          = tracing_mark_raw_write,
7645         .release        = tracing_release_generic_tr,
7646 };
7647
7648 static const struct file_operations trace_clock_fops = {
7649         .open           = tracing_clock_open,
7650         .read           = seq_read,
7651         .llseek         = seq_lseek,
7652         .release        = tracing_single_release_tr,
7653         .write          = tracing_clock_write,
7654 };
7655
7656 static const struct file_operations trace_time_stamp_mode_fops = {
7657         .open           = tracing_time_stamp_mode_open,
7658         .read           = seq_read,
7659         .llseek         = seq_lseek,
7660         .release        = tracing_single_release_tr,
7661 };
7662
7663 #ifdef CONFIG_TRACER_SNAPSHOT
7664 static const struct file_operations snapshot_fops = {
7665         .open           = tracing_snapshot_open,
7666         .read           = seq_read,
7667         .write          = tracing_snapshot_write,
7668         .llseek         = tracing_lseek,
7669         .release        = tracing_snapshot_release,
7670 };
7671
7672 static const struct file_operations snapshot_raw_fops = {
7673         .open           = snapshot_raw_open,
7674         .read           = tracing_buffers_read,
7675         .release        = tracing_buffers_release,
7676         .splice_read    = tracing_buffers_splice_read,
7677         .llseek         = no_llseek,
7678 };
7679
7680 #endif /* CONFIG_TRACER_SNAPSHOT */
7681
7682 /*
7683  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7684  * @filp: The active open file structure
7685  * @ubuf: The userspace provided buffer to read value into
7686  * @cnt: The maximum number of bytes to read
7687  * @ppos: The current "file" position
7688  *
7689  * This function implements the write interface for a struct trace_min_max_param.
7690  * The filp->private_data must point to a trace_min_max_param structure that
7691  * defines where to write the value, the min and the max acceptable values,
7692  * and a lock to protect the write.
7693  */
7694 static ssize_t
7695 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7696 {
7697         struct trace_min_max_param *param = filp->private_data;
7698         u64 val;
7699         int err;
7700
7701         if (!param)
7702                 return -EFAULT;
7703
7704         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7705         if (err)
7706                 return err;
7707
7708         if (param->lock)
7709                 mutex_lock(param->lock);
7710
7711         if (param->min && val < *param->min)
7712                 err = -EINVAL;
7713
7714         if (param->max && val > *param->max)
7715                 err = -EINVAL;
7716
7717         if (!err)
7718                 *param->val = val;
7719
7720         if (param->lock)
7721                 mutex_unlock(param->lock);
7722
7723         if (err)
7724                 return err;
7725
7726         return cnt;
7727 }
7728
7729 /*
7730  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7731  * @filp: The active open file structure
7732  * @ubuf: The userspace provided buffer to read value into
7733  * @cnt: The maximum number of bytes to read
7734  * @ppos: The current "file" position
7735  *
7736  * This function implements the read interface for a struct trace_min_max_param.
7737  * The filp->private_data must point to a trace_min_max_param struct with valid
7738  * data.
7739  */
7740 static ssize_t
7741 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7742 {
7743         struct trace_min_max_param *param = filp->private_data;
7744         char buf[U64_STR_SIZE];
7745         int len;
7746         u64 val;
7747
7748         if (!param)
7749                 return -EFAULT;
7750
7751         val = *param->val;
7752
7753         if (cnt > sizeof(buf))
7754                 cnt = sizeof(buf);
7755
7756         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7757
7758         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7759 }
7760
7761 const struct file_operations trace_min_max_fops = {
7762         .open           = tracing_open_generic,
7763         .read           = trace_min_max_read,
7764         .write          = trace_min_max_write,
7765 };
7766
7767 #define TRACING_LOG_ERRS_MAX    8
7768 #define TRACING_LOG_LOC_MAX     128
7769
7770 #define CMD_PREFIX "  Command: "
7771
7772 struct err_info {
7773         const char      **errs; /* ptr to loc-specific array of err strings */
7774         u8              type;   /* index into errs -> specific err string */
7775         u16             pos;    /* caret position */
7776         u64             ts;
7777 };
7778
7779 struct tracing_log_err {
7780         struct list_head        list;
7781         struct err_info         info;
7782         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7783         char                    *cmd;                     /* what caused err */
7784 };
7785
7786 static DEFINE_MUTEX(tracing_err_log_lock);
7787
7788 static struct tracing_log_err *alloc_tracing_log_err(int len)
7789 {
7790         struct tracing_log_err *err;
7791
7792         err = kzalloc(sizeof(*err), GFP_KERNEL);
7793         if (!err)
7794                 return ERR_PTR(-ENOMEM);
7795
7796         err->cmd = kzalloc(len, GFP_KERNEL);
7797         if (!err->cmd) {
7798                 kfree(err);
7799                 return ERR_PTR(-ENOMEM);
7800         }
7801
7802         return err;
7803 }
7804
7805 static void free_tracing_log_err(struct tracing_log_err *err)
7806 {
7807         kfree(err->cmd);
7808         kfree(err);
7809 }
7810
7811 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7812                                                    int len)
7813 {
7814         struct tracing_log_err *err;
7815         char *cmd;
7816
7817         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7818                 err = alloc_tracing_log_err(len);
7819                 if (PTR_ERR(err) != -ENOMEM)
7820                         tr->n_err_log_entries++;
7821
7822                 return err;
7823         }
7824         cmd = kzalloc(len, GFP_KERNEL);
7825         if (!cmd)
7826                 return ERR_PTR(-ENOMEM);
7827         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7828         kfree(err->cmd);
7829         err->cmd = cmd;
7830         list_del(&err->list);
7831
7832         return err;
7833 }
7834
7835 /**
7836  * err_pos - find the position of a string within a command for error careting
7837  * @cmd: The tracing command that caused the error
7838  * @str: The string to position the caret at within @cmd
7839  *
7840  * Finds the position of the first occurrence of @str within @cmd.  The
7841  * return value can be passed to tracing_log_err() for caret placement
7842  * within @cmd.
7843  *
7844  * Returns the index within @cmd of the first occurrence of @str or 0
7845  * if @str was not found.
7846  */
7847 unsigned int err_pos(char *cmd, const char *str)
7848 {
7849         char *found;
7850
7851         if (WARN_ON(!strlen(cmd)))
7852                 return 0;
7853
7854         found = strstr(cmd, str);
7855         if (found)
7856                 return found - cmd;
7857
7858         return 0;
7859 }
7860
7861 /**
7862  * tracing_log_err - write an error to the tracing error log
7863  * @tr: The associated trace array for the error (NULL for top level array)
7864  * @loc: A string describing where the error occurred
7865  * @cmd: The tracing command that caused the error
7866  * @errs: The array of loc-specific static error strings
7867  * @type: The index into errs[], which produces the specific static err string
7868  * @pos: The position the caret should be placed in the cmd
7869  *
7870  * Writes an error into tracing/error_log of the form:
7871  *
7872  * <loc>: error: <text>
7873  *   Command: <cmd>
7874  *              ^
7875  *
7876  * tracing/error_log is a small log file containing the last
7877  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7878  * unless there has been a tracing error, and the error log can be
7879  * cleared and have its memory freed by writing the empty string in
7880  * truncation mode to it i.e. echo > tracing/error_log.
7881  *
7882  * NOTE: the @errs array along with the @type param are used to
7883  * produce a static error string - this string is not copied and saved
7884  * when the error is logged - only a pointer to it is saved.  See
7885  * existing callers for examples of how static strings are typically
7886  * defined for use with tracing_log_err().
7887  */
7888 void tracing_log_err(struct trace_array *tr,
7889                      const char *loc, const char *cmd,
7890                      const char **errs, u8 type, u16 pos)
7891 {
7892         struct tracing_log_err *err;
7893         int len = 0;
7894
7895         if (!tr)
7896                 tr = &global_trace;
7897
7898         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7899
7900         mutex_lock(&tracing_err_log_lock);
7901         err = get_tracing_log_err(tr, len);
7902         if (PTR_ERR(err) == -ENOMEM) {
7903                 mutex_unlock(&tracing_err_log_lock);
7904                 return;
7905         }
7906
7907         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7908         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7909
7910         err->info.errs = errs;
7911         err->info.type = type;
7912         err->info.pos = pos;
7913         err->info.ts = local_clock();
7914
7915         list_add_tail(&err->list, &tr->err_log);
7916         mutex_unlock(&tracing_err_log_lock);
7917 }
7918
7919 static void clear_tracing_err_log(struct trace_array *tr)
7920 {
7921         struct tracing_log_err *err, *next;
7922
7923         mutex_lock(&tracing_err_log_lock);
7924         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7925                 list_del(&err->list);
7926                 free_tracing_log_err(err);
7927         }
7928
7929         tr->n_err_log_entries = 0;
7930         mutex_unlock(&tracing_err_log_lock);
7931 }
7932
7933 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7934 {
7935         struct trace_array *tr = m->private;
7936
7937         mutex_lock(&tracing_err_log_lock);
7938
7939         return seq_list_start(&tr->err_log, *pos);
7940 }
7941
7942 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7943 {
7944         struct trace_array *tr = m->private;
7945
7946         return seq_list_next(v, &tr->err_log, pos);
7947 }
7948
7949 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7950 {
7951         mutex_unlock(&tracing_err_log_lock);
7952 }
7953
7954 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7955 {
7956         u16 i;
7957
7958         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7959                 seq_putc(m, ' ');
7960         for (i = 0; i < pos; i++)
7961                 seq_putc(m, ' ');
7962         seq_puts(m, "^\n");
7963 }
7964
7965 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7966 {
7967         struct tracing_log_err *err = v;
7968
7969         if (err) {
7970                 const char *err_text = err->info.errs[err->info.type];
7971                 u64 sec = err->info.ts;
7972                 u32 nsec;
7973
7974                 nsec = do_div(sec, NSEC_PER_SEC);
7975                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7976                            err->loc, err_text);
7977                 seq_printf(m, "%s", err->cmd);
7978                 tracing_err_log_show_pos(m, err->info.pos);
7979         }
7980
7981         return 0;
7982 }
7983
7984 static const struct seq_operations tracing_err_log_seq_ops = {
7985         .start  = tracing_err_log_seq_start,
7986         .next   = tracing_err_log_seq_next,
7987         .stop   = tracing_err_log_seq_stop,
7988         .show   = tracing_err_log_seq_show
7989 };
7990
7991 static int tracing_err_log_open(struct inode *inode, struct file *file)
7992 {
7993         struct trace_array *tr = inode->i_private;
7994         int ret = 0;
7995
7996         ret = tracing_check_open_get_tr(tr);
7997         if (ret)
7998                 return ret;
7999
8000         /* If this file was opened for write, then erase contents */
8001         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8002                 clear_tracing_err_log(tr);
8003
8004         if (file->f_mode & FMODE_READ) {
8005                 ret = seq_open(file, &tracing_err_log_seq_ops);
8006                 if (!ret) {
8007                         struct seq_file *m = file->private_data;
8008                         m->private = tr;
8009                 } else {
8010                         trace_array_put(tr);
8011                 }
8012         }
8013         return ret;
8014 }
8015
8016 static ssize_t tracing_err_log_write(struct file *file,
8017                                      const char __user *buffer,
8018                                      size_t count, loff_t *ppos)
8019 {
8020         return count;
8021 }
8022
8023 static int tracing_err_log_release(struct inode *inode, struct file *file)
8024 {
8025         struct trace_array *tr = inode->i_private;
8026
8027         trace_array_put(tr);
8028
8029         if (file->f_mode & FMODE_READ)
8030                 seq_release(inode, file);
8031
8032         return 0;
8033 }
8034
8035 static const struct file_operations tracing_err_log_fops = {
8036         .open           = tracing_err_log_open,
8037         .write          = tracing_err_log_write,
8038         .read           = seq_read,
8039         .llseek         = seq_lseek,
8040         .release        = tracing_err_log_release,
8041 };
8042
8043 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8044 {
8045         struct trace_array *tr = inode->i_private;
8046         struct ftrace_buffer_info *info;
8047         int ret;
8048
8049         ret = tracing_check_open_get_tr(tr);
8050         if (ret)
8051                 return ret;
8052
8053         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8054         if (!info) {
8055                 trace_array_put(tr);
8056                 return -ENOMEM;
8057         }
8058
8059         mutex_lock(&trace_types_lock);
8060
8061         info->iter.tr           = tr;
8062         info->iter.cpu_file     = tracing_get_cpu(inode);
8063         info->iter.trace        = tr->current_trace;
8064         info->iter.array_buffer = &tr->array_buffer;
8065         info->spare             = NULL;
8066         /* Force reading ring buffer for first read */
8067         info->read              = (unsigned int)-1;
8068
8069         filp->private_data = info;
8070
8071         tr->trace_ref++;
8072
8073         mutex_unlock(&trace_types_lock);
8074
8075         ret = nonseekable_open(inode, filp);
8076         if (ret < 0)
8077                 trace_array_put(tr);
8078
8079         return ret;
8080 }
8081
8082 static __poll_t
8083 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8084 {
8085         struct ftrace_buffer_info *info = filp->private_data;
8086         struct trace_iterator *iter = &info->iter;
8087
8088         return trace_poll(iter, filp, poll_table);
8089 }
8090
8091 static ssize_t
8092 tracing_buffers_read(struct file *filp, char __user *ubuf,
8093                      size_t count, loff_t *ppos)
8094 {
8095         struct ftrace_buffer_info *info = filp->private_data;
8096         struct trace_iterator *iter = &info->iter;
8097         ssize_t ret = 0;
8098         ssize_t size;
8099
8100         if (!count)
8101                 return 0;
8102
8103 #ifdef CONFIG_TRACER_MAX_TRACE
8104         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8105                 return -EBUSY;
8106 #endif
8107
8108         if (!info->spare) {
8109                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8110                                                           iter->cpu_file);
8111                 if (IS_ERR(info->spare)) {
8112                         ret = PTR_ERR(info->spare);
8113                         info->spare = NULL;
8114                 } else {
8115                         info->spare_cpu = iter->cpu_file;
8116                 }
8117         }
8118         if (!info->spare)
8119                 return ret;
8120
8121         /* Do we have previous read data to read? */
8122         if (info->read < PAGE_SIZE)
8123                 goto read;
8124
8125  again:
8126         trace_access_lock(iter->cpu_file);
8127         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8128                                     &info->spare,
8129                                     count,
8130                                     iter->cpu_file, 0);
8131         trace_access_unlock(iter->cpu_file);
8132
8133         if (ret < 0) {
8134                 if (trace_empty(iter)) {
8135                         if ((filp->f_flags & O_NONBLOCK))
8136                                 return -EAGAIN;
8137
8138                         ret = wait_on_pipe(iter, 0);
8139                         if (ret)
8140                                 return ret;
8141
8142                         goto again;
8143                 }
8144                 return 0;
8145         }
8146
8147         info->read = 0;
8148  read:
8149         size = PAGE_SIZE - info->read;
8150         if (size > count)
8151                 size = count;
8152
8153         ret = copy_to_user(ubuf, info->spare + info->read, size);
8154         if (ret == size)
8155                 return -EFAULT;
8156
8157         size -= ret;
8158
8159         *ppos += size;
8160         info->read += size;
8161
8162         return size;
8163 }
8164
8165 static int tracing_buffers_release(struct inode *inode, struct file *file)
8166 {
8167         struct ftrace_buffer_info *info = file->private_data;
8168         struct trace_iterator *iter = &info->iter;
8169
8170         mutex_lock(&trace_types_lock);
8171
8172         iter->tr->trace_ref--;
8173
8174         __trace_array_put(iter->tr);
8175
8176         iter->wait_index++;
8177         /* Make sure the waiters see the new wait_index */
8178         smp_wmb();
8179
8180         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8181
8182         if (info->spare)
8183                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8184                                            info->spare_cpu, info->spare);
8185         kvfree(info);
8186
8187         mutex_unlock(&trace_types_lock);
8188
8189         return 0;
8190 }
8191
8192 struct buffer_ref {
8193         struct trace_buffer     *buffer;
8194         void                    *page;
8195         int                     cpu;
8196         refcount_t              refcount;
8197 };
8198
8199 static void buffer_ref_release(struct buffer_ref *ref)
8200 {
8201         if (!refcount_dec_and_test(&ref->refcount))
8202                 return;
8203         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8204         kfree(ref);
8205 }
8206
8207 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8208                                     struct pipe_buffer *buf)
8209 {
8210         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8211
8212         buffer_ref_release(ref);
8213         buf->private = 0;
8214 }
8215
8216 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8217                                 struct pipe_buffer *buf)
8218 {
8219         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8220
8221         if (refcount_read(&ref->refcount) > INT_MAX/2)
8222                 return false;
8223
8224         refcount_inc(&ref->refcount);
8225         return true;
8226 }
8227
8228 /* Pipe buffer operations for a buffer. */
8229 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8230         .release                = buffer_pipe_buf_release,
8231         .get                    = buffer_pipe_buf_get,
8232 };
8233
8234 /*
8235  * Callback from splice_to_pipe(), if we need to release some pages
8236  * at the end of the spd in case we error'ed out in filling the pipe.
8237  */
8238 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8239 {
8240         struct buffer_ref *ref =
8241                 (struct buffer_ref *)spd->partial[i].private;
8242
8243         buffer_ref_release(ref);
8244         spd->partial[i].private = 0;
8245 }
8246
8247 static ssize_t
8248 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8249                             struct pipe_inode_info *pipe, size_t len,
8250                             unsigned int flags)
8251 {
8252         struct ftrace_buffer_info *info = file->private_data;
8253         struct trace_iterator *iter = &info->iter;
8254         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8255         struct page *pages_def[PIPE_DEF_BUFFERS];
8256         struct splice_pipe_desc spd = {
8257                 .pages          = pages_def,
8258                 .partial        = partial_def,
8259                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8260                 .ops            = &buffer_pipe_buf_ops,
8261                 .spd_release    = buffer_spd_release,
8262         };
8263         struct buffer_ref *ref;
8264         int entries, i;
8265         ssize_t ret = 0;
8266
8267 #ifdef CONFIG_TRACER_MAX_TRACE
8268         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8269                 return -EBUSY;
8270 #endif
8271
8272         if (*ppos & (PAGE_SIZE - 1))
8273                 return -EINVAL;
8274
8275         if (len & (PAGE_SIZE - 1)) {
8276                 if (len < PAGE_SIZE)
8277                         return -EINVAL;
8278                 len &= PAGE_MASK;
8279         }
8280
8281         if (splice_grow_spd(pipe, &spd))
8282                 return -ENOMEM;
8283
8284  again:
8285         trace_access_lock(iter->cpu_file);
8286         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8287
8288         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8289                 struct page *page;
8290                 int r;
8291
8292                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8293                 if (!ref) {
8294                         ret = -ENOMEM;
8295                         break;
8296                 }
8297
8298                 refcount_set(&ref->refcount, 1);
8299                 ref->buffer = iter->array_buffer->buffer;
8300                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8301                 if (IS_ERR(ref->page)) {
8302                         ret = PTR_ERR(ref->page);
8303                         ref->page = NULL;
8304                         kfree(ref);
8305                         break;
8306                 }
8307                 ref->cpu = iter->cpu_file;
8308
8309                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8310                                           len, iter->cpu_file, 1);
8311                 if (r < 0) {
8312                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8313                                                    ref->page);
8314                         kfree(ref);
8315                         break;
8316                 }
8317
8318                 page = virt_to_page(ref->page);
8319
8320                 spd.pages[i] = page;
8321                 spd.partial[i].len = PAGE_SIZE;
8322                 spd.partial[i].offset = 0;
8323                 spd.partial[i].private = (unsigned long)ref;
8324                 spd.nr_pages++;
8325                 *ppos += PAGE_SIZE;
8326
8327                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8328         }
8329
8330         trace_access_unlock(iter->cpu_file);
8331         spd.nr_pages = i;
8332
8333         /* did we read anything? */
8334         if (!spd.nr_pages) {
8335                 long wait_index;
8336
8337                 if (ret)
8338                         goto out;
8339
8340                 ret = -EAGAIN;
8341                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8342                         goto out;
8343
8344                 wait_index = READ_ONCE(iter->wait_index);
8345
8346                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8347                 if (ret)
8348                         goto out;
8349
8350                 /* No need to wait after waking up when tracing is off */
8351                 if (!tracer_tracing_is_on(iter->tr))
8352                         goto out;
8353
8354                 /* Make sure we see the new wait_index */
8355                 smp_rmb();
8356                 if (wait_index != iter->wait_index)
8357                         goto out;
8358
8359                 goto again;
8360         }
8361
8362         ret = splice_to_pipe(pipe, &spd);
8363 out:
8364         splice_shrink_spd(&spd);
8365
8366         return ret;
8367 }
8368
8369 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8370 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8371 {
8372         struct ftrace_buffer_info *info = file->private_data;
8373         struct trace_iterator *iter = &info->iter;
8374
8375         if (cmd)
8376                 return -ENOIOCTLCMD;
8377
8378         mutex_lock(&trace_types_lock);
8379
8380         iter->wait_index++;
8381         /* Make sure the waiters see the new wait_index */
8382         smp_wmb();
8383
8384         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8385
8386         mutex_unlock(&trace_types_lock);
8387         return 0;
8388 }
8389
8390 static const struct file_operations tracing_buffers_fops = {
8391         .open           = tracing_buffers_open,
8392         .read           = tracing_buffers_read,
8393         .poll           = tracing_buffers_poll,
8394         .release        = tracing_buffers_release,
8395         .splice_read    = tracing_buffers_splice_read,
8396         .unlocked_ioctl = tracing_buffers_ioctl,
8397         .llseek         = no_llseek,
8398 };
8399
8400 static ssize_t
8401 tracing_stats_read(struct file *filp, char __user *ubuf,
8402                    size_t count, loff_t *ppos)
8403 {
8404         struct inode *inode = file_inode(filp);
8405         struct trace_array *tr = inode->i_private;
8406         struct array_buffer *trace_buf = &tr->array_buffer;
8407         int cpu = tracing_get_cpu(inode);
8408         struct trace_seq *s;
8409         unsigned long cnt;
8410         unsigned long long t;
8411         unsigned long usec_rem;
8412
8413         s = kmalloc(sizeof(*s), GFP_KERNEL);
8414         if (!s)
8415                 return -ENOMEM;
8416
8417         trace_seq_init(s);
8418
8419         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8420         trace_seq_printf(s, "entries: %ld\n", cnt);
8421
8422         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8423         trace_seq_printf(s, "overrun: %ld\n", cnt);
8424
8425         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8426         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8427
8428         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8429         trace_seq_printf(s, "bytes: %ld\n", cnt);
8430
8431         if (trace_clocks[tr->clock_id].in_ns) {
8432                 /* local or global for trace_clock */
8433                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8434                 usec_rem = do_div(t, USEC_PER_SEC);
8435                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8436                                                                 t, usec_rem);
8437
8438                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8439                 usec_rem = do_div(t, USEC_PER_SEC);
8440                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8441         } else {
8442                 /* counter or tsc mode for trace_clock */
8443                 trace_seq_printf(s, "oldest event ts: %llu\n",
8444                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8445
8446                 trace_seq_printf(s, "now ts: %llu\n",
8447                                 ring_buffer_time_stamp(trace_buf->buffer));
8448         }
8449
8450         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8451         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8452
8453         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8454         trace_seq_printf(s, "read events: %ld\n", cnt);
8455
8456         count = simple_read_from_buffer(ubuf, count, ppos,
8457                                         s->buffer, trace_seq_used(s));
8458
8459         kfree(s);
8460
8461         return count;
8462 }
8463
8464 static const struct file_operations tracing_stats_fops = {
8465         .open           = tracing_open_generic_tr,
8466         .read           = tracing_stats_read,
8467         .llseek         = generic_file_llseek,
8468         .release        = tracing_release_generic_tr,
8469 };
8470
8471 #ifdef CONFIG_DYNAMIC_FTRACE
8472
8473 static ssize_t
8474 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8475                   size_t cnt, loff_t *ppos)
8476 {
8477         ssize_t ret;
8478         char *buf;
8479         int r;
8480
8481         /* 256 should be plenty to hold the amount needed */
8482         buf = kmalloc(256, GFP_KERNEL);
8483         if (!buf)
8484                 return -ENOMEM;
8485
8486         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8487                       ftrace_update_tot_cnt,
8488                       ftrace_number_of_pages,
8489                       ftrace_number_of_groups);
8490
8491         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8492         kfree(buf);
8493         return ret;
8494 }
8495
8496 static const struct file_operations tracing_dyn_info_fops = {
8497         .open           = tracing_open_generic,
8498         .read           = tracing_read_dyn_info,
8499         .llseek         = generic_file_llseek,
8500 };
8501 #endif /* CONFIG_DYNAMIC_FTRACE */
8502
8503 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8504 static void
8505 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8506                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8507                 void *data)
8508 {
8509         tracing_snapshot_instance(tr);
8510 }
8511
8512 static void
8513 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8514                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8515                       void *data)
8516 {
8517         struct ftrace_func_mapper *mapper = data;
8518         long *count = NULL;
8519
8520         if (mapper)
8521                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8522
8523         if (count) {
8524
8525                 if (*count <= 0)
8526                         return;
8527
8528                 (*count)--;
8529         }
8530
8531         tracing_snapshot_instance(tr);
8532 }
8533
8534 static int
8535 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8536                       struct ftrace_probe_ops *ops, void *data)
8537 {
8538         struct ftrace_func_mapper *mapper = data;
8539         long *count = NULL;
8540
8541         seq_printf(m, "%ps:", (void *)ip);
8542
8543         seq_puts(m, "snapshot");
8544
8545         if (mapper)
8546                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8547
8548         if (count)
8549                 seq_printf(m, ":count=%ld\n", *count);
8550         else
8551                 seq_puts(m, ":unlimited\n");
8552
8553         return 0;
8554 }
8555
8556 static int
8557 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8558                      unsigned long ip, void *init_data, void **data)
8559 {
8560         struct ftrace_func_mapper *mapper = *data;
8561
8562         if (!mapper) {
8563                 mapper = allocate_ftrace_func_mapper();
8564                 if (!mapper)
8565                         return -ENOMEM;
8566                 *data = mapper;
8567         }
8568
8569         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8570 }
8571
8572 static void
8573 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8574                      unsigned long ip, void *data)
8575 {
8576         struct ftrace_func_mapper *mapper = data;
8577
8578         if (!ip) {
8579                 if (!mapper)
8580                         return;
8581                 free_ftrace_func_mapper(mapper, NULL);
8582                 return;
8583         }
8584
8585         ftrace_func_mapper_remove_ip(mapper, ip);
8586 }
8587
8588 static struct ftrace_probe_ops snapshot_probe_ops = {
8589         .func                   = ftrace_snapshot,
8590         .print                  = ftrace_snapshot_print,
8591 };
8592
8593 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8594         .func                   = ftrace_count_snapshot,
8595         .print                  = ftrace_snapshot_print,
8596         .init                   = ftrace_snapshot_init,
8597         .free                   = ftrace_snapshot_free,
8598 };
8599
8600 static int
8601 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8602                                char *glob, char *cmd, char *param, int enable)
8603 {
8604         struct ftrace_probe_ops *ops;
8605         void *count = (void *)-1;
8606         char *number;
8607         int ret;
8608
8609         if (!tr)
8610                 return -ENODEV;
8611
8612         /* hash funcs only work with set_ftrace_filter */
8613         if (!enable)
8614                 return -EINVAL;
8615
8616         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8617
8618         if (glob[0] == '!')
8619                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8620
8621         if (!param)
8622                 goto out_reg;
8623
8624         number = strsep(&param, ":");
8625
8626         if (!strlen(number))
8627                 goto out_reg;
8628
8629         /*
8630          * We use the callback data field (which is a pointer)
8631          * as our counter.
8632          */
8633         ret = kstrtoul(number, 0, (unsigned long *)&count);
8634         if (ret)
8635                 return ret;
8636
8637  out_reg:
8638         ret = tracing_alloc_snapshot_instance(tr);
8639         if (ret < 0)
8640                 goto out;
8641
8642         ret = register_ftrace_function_probe(glob, tr, ops, count);
8643
8644  out:
8645         return ret < 0 ? ret : 0;
8646 }
8647
8648 static struct ftrace_func_command ftrace_snapshot_cmd = {
8649         .name                   = "snapshot",
8650         .func                   = ftrace_trace_snapshot_callback,
8651 };
8652
8653 static __init int register_snapshot_cmd(void)
8654 {
8655         return register_ftrace_command(&ftrace_snapshot_cmd);
8656 }
8657 #else
8658 static inline __init int register_snapshot_cmd(void) { return 0; }
8659 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8660
8661 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8662 {
8663         if (WARN_ON(!tr->dir))
8664                 return ERR_PTR(-ENODEV);
8665
8666         /* Top directory uses NULL as the parent */
8667         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8668                 return NULL;
8669
8670         /* All sub buffers have a descriptor */
8671         return tr->dir;
8672 }
8673
8674 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8675 {
8676         struct dentry *d_tracer;
8677
8678         if (tr->percpu_dir)
8679                 return tr->percpu_dir;
8680
8681         d_tracer = tracing_get_dentry(tr);
8682         if (IS_ERR(d_tracer))
8683                 return NULL;
8684
8685         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8686
8687         MEM_FAIL(!tr->percpu_dir,
8688                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8689
8690         return tr->percpu_dir;
8691 }
8692
8693 static struct dentry *
8694 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8695                       void *data, long cpu, const struct file_operations *fops)
8696 {
8697         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8698
8699         if (ret) /* See tracing_get_cpu() */
8700                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8701         return ret;
8702 }
8703
8704 static void
8705 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8706 {
8707         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8708         struct dentry *d_cpu;
8709         char cpu_dir[30]; /* 30 characters should be more than enough */
8710
8711         if (!d_percpu)
8712                 return;
8713
8714         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8715         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8716         if (!d_cpu) {
8717                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8718                 return;
8719         }
8720
8721         /* per cpu trace_pipe */
8722         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8723                                 tr, cpu, &tracing_pipe_fops);
8724
8725         /* per cpu trace */
8726         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8727                                 tr, cpu, &tracing_fops);
8728
8729         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8730                                 tr, cpu, &tracing_buffers_fops);
8731
8732         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8733                                 tr, cpu, &tracing_stats_fops);
8734
8735         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8736                                 tr, cpu, &tracing_entries_fops);
8737
8738 #ifdef CONFIG_TRACER_SNAPSHOT
8739         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8740                                 tr, cpu, &snapshot_fops);
8741
8742         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8743                                 tr, cpu, &snapshot_raw_fops);
8744 #endif
8745 }
8746
8747 #ifdef CONFIG_FTRACE_SELFTEST
8748 /* Let selftest have access to static functions in this file */
8749 #include "trace_selftest.c"
8750 #endif
8751
8752 static ssize_t
8753 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8754                         loff_t *ppos)
8755 {
8756         struct trace_option_dentry *topt = filp->private_data;
8757         char *buf;
8758
8759         if (topt->flags->val & topt->opt->bit)
8760                 buf = "1\n";
8761         else
8762                 buf = "0\n";
8763
8764         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8765 }
8766
8767 static ssize_t
8768 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8769                          loff_t *ppos)
8770 {
8771         struct trace_option_dentry *topt = filp->private_data;
8772         unsigned long val;
8773         int ret;
8774
8775         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8776         if (ret)
8777                 return ret;
8778
8779         if (val != 0 && val != 1)
8780                 return -EINVAL;
8781
8782         if (!!(topt->flags->val & topt->opt->bit) != val) {
8783                 mutex_lock(&trace_types_lock);
8784                 ret = __set_tracer_option(topt->tr, topt->flags,
8785                                           topt->opt, !val);
8786                 mutex_unlock(&trace_types_lock);
8787                 if (ret)
8788                         return ret;
8789         }
8790
8791         *ppos += cnt;
8792
8793         return cnt;
8794 }
8795
8796
8797 static const struct file_operations trace_options_fops = {
8798         .open = tracing_open_generic,
8799         .read = trace_options_read,
8800         .write = trace_options_write,
8801         .llseek = generic_file_llseek,
8802 };
8803
8804 /*
8805  * In order to pass in both the trace_array descriptor as well as the index
8806  * to the flag that the trace option file represents, the trace_array
8807  * has a character array of trace_flags_index[], which holds the index
8808  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8809  * The address of this character array is passed to the flag option file
8810  * read/write callbacks.
8811  *
8812  * In order to extract both the index and the trace_array descriptor,
8813  * get_tr_index() uses the following algorithm.
8814  *
8815  *   idx = *ptr;
8816  *
8817  * As the pointer itself contains the address of the index (remember
8818  * index[1] == 1).
8819  *
8820  * Then to get the trace_array descriptor, by subtracting that index
8821  * from the ptr, we get to the start of the index itself.
8822  *
8823  *   ptr - idx == &index[0]
8824  *
8825  * Then a simple container_of() from that pointer gets us to the
8826  * trace_array descriptor.
8827  */
8828 static void get_tr_index(void *data, struct trace_array **ptr,
8829                          unsigned int *pindex)
8830 {
8831         *pindex = *(unsigned char *)data;
8832
8833         *ptr = container_of(data - *pindex, struct trace_array,
8834                             trace_flags_index);
8835 }
8836
8837 static ssize_t
8838 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8839                         loff_t *ppos)
8840 {
8841         void *tr_index = filp->private_data;
8842         struct trace_array *tr;
8843         unsigned int index;
8844         char *buf;
8845
8846         get_tr_index(tr_index, &tr, &index);
8847
8848         if (tr->trace_flags & (1 << index))
8849                 buf = "1\n";
8850         else
8851                 buf = "0\n";
8852
8853         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8854 }
8855
8856 static ssize_t
8857 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8858                          loff_t *ppos)
8859 {
8860         void *tr_index = filp->private_data;
8861         struct trace_array *tr;
8862         unsigned int index;
8863         unsigned long val;
8864         int ret;
8865
8866         get_tr_index(tr_index, &tr, &index);
8867
8868         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8869         if (ret)
8870                 return ret;
8871
8872         if (val != 0 && val != 1)
8873                 return -EINVAL;
8874
8875         mutex_lock(&event_mutex);
8876         mutex_lock(&trace_types_lock);
8877         ret = set_tracer_flag(tr, 1 << index, val);
8878         mutex_unlock(&trace_types_lock);
8879         mutex_unlock(&event_mutex);
8880
8881         if (ret < 0)
8882                 return ret;
8883
8884         *ppos += cnt;
8885
8886         return cnt;
8887 }
8888
8889 static const struct file_operations trace_options_core_fops = {
8890         .open = tracing_open_generic,
8891         .read = trace_options_core_read,
8892         .write = trace_options_core_write,
8893         .llseek = generic_file_llseek,
8894 };
8895
8896 struct dentry *trace_create_file(const char *name,
8897                                  umode_t mode,
8898                                  struct dentry *parent,
8899                                  void *data,
8900                                  const struct file_operations *fops)
8901 {
8902         struct dentry *ret;
8903
8904         ret = tracefs_create_file(name, mode, parent, data, fops);
8905         if (!ret)
8906                 pr_warn("Could not create tracefs '%s' entry\n", name);
8907
8908         return ret;
8909 }
8910
8911
8912 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8913 {
8914         struct dentry *d_tracer;
8915
8916         if (tr->options)
8917                 return tr->options;
8918
8919         d_tracer = tracing_get_dentry(tr);
8920         if (IS_ERR(d_tracer))
8921                 return NULL;
8922
8923         tr->options = tracefs_create_dir("options", d_tracer);
8924         if (!tr->options) {
8925                 pr_warn("Could not create tracefs directory 'options'\n");
8926                 return NULL;
8927         }
8928
8929         return tr->options;
8930 }
8931
8932 static void
8933 create_trace_option_file(struct trace_array *tr,
8934                          struct trace_option_dentry *topt,
8935                          struct tracer_flags *flags,
8936                          struct tracer_opt *opt)
8937 {
8938         struct dentry *t_options;
8939
8940         t_options = trace_options_init_dentry(tr);
8941         if (!t_options)
8942                 return;
8943
8944         topt->flags = flags;
8945         topt->opt = opt;
8946         topt->tr = tr;
8947
8948         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8949                                         t_options, topt, &trace_options_fops);
8950
8951 }
8952
8953 static void
8954 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8955 {
8956         struct trace_option_dentry *topts;
8957         struct trace_options *tr_topts;
8958         struct tracer_flags *flags;
8959         struct tracer_opt *opts;
8960         int cnt;
8961         int i;
8962
8963         if (!tracer)
8964                 return;
8965
8966         flags = tracer->flags;
8967
8968         if (!flags || !flags->opts)
8969                 return;
8970
8971         /*
8972          * If this is an instance, only create flags for tracers
8973          * the instance may have.
8974          */
8975         if (!trace_ok_for_array(tracer, tr))
8976                 return;
8977
8978         for (i = 0; i < tr->nr_topts; i++) {
8979                 /* Make sure there's no duplicate flags. */
8980                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8981                         return;
8982         }
8983
8984         opts = flags->opts;
8985
8986         for (cnt = 0; opts[cnt].name; cnt++)
8987                 ;
8988
8989         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8990         if (!topts)
8991                 return;
8992
8993         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8994                             GFP_KERNEL);
8995         if (!tr_topts) {
8996                 kfree(topts);
8997                 return;
8998         }
8999
9000         tr->topts = tr_topts;
9001         tr->topts[tr->nr_topts].tracer = tracer;
9002         tr->topts[tr->nr_topts].topts = topts;
9003         tr->nr_topts++;
9004
9005         for (cnt = 0; opts[cnt].name; cnt++) {
9006                 create_trace_option_file(tr, &topts[cnt], flags,
9007                                          &opts[cnt]);
9008                 MEM_FAIL(topts[cnt].entry == NULL,
9009                           "Failed to create trace option: %s",
9010                           opts[cnt].name);
9011         }
9012 }
9013
9014 static struct dentry *
9015 create_trace_option_core_file(struct trace_array *tr,
9016                               const char *option, long index)
9017 {
9018         struct dentry *t_options;
9019
9020         t_options = trace_options_init_dentry(tr);
9021         if (!t_options)
9022                 return NULL;
9023
9024         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9025                                  (void *)&tr->trace_flags_index[index],
9026                                  &trace_options_core_fops);
9027 }
9028
9029 static void create_trace_options_dir(struct trace_array *tr)
9030 {
9031         struct dentry *t_options;
9032         bool top_level = tr == &global_trace;
9033         int i;
9034
9035         t_options = trace_options_init_dentry(tr);
9036         if (!t_options)
9037                 return;
9038
9039         for (i = 0; trace_options[i]; i++) {
9040                 if (top_level ||
9041                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9042                         create_trace_option_core_file(tr, trace_options[i], i);
9043         }
9044 }
9045
9046 static ssize_t
9047 rb_simple_read(struct file *filp, char __user *ubuf,
9048                size_t cnt, loff_t *ppos)
9049 {
9050         struct trace_array *tr = filp->private_data;
9051         char buf[64];
9052         int r;
9053
9054         r = tracer_tracing_is_on(tr);
9055         r = sprintf(buf, "%d\n", r);
9056
9057         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9058 }
9059
9060 static ssize_t
9061 rb_simple_write(struct file *filp, const char __user *ubuf,
9062                 size_t cnt, loff_t *ppos)
9063 {
9064         struct trace_array *tr = filp->private_data;
9065         struct trace_buffer *buffer = tr->array_buffer.buffer;
9066         unsigned long val;
9067         int ret;
9068
9069         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9070         if (ret)
9071                 return ret;
9072
9073         if (buffer) {
9074                 mutex_lock(&trace_types_lock);
9075                 if (!!val == tracer_tracing_is_on(tr)) {
9076                         val = 0; /* do nothing */
9077                 } else if (val) {
9078                         tracer_tracing_on(tr);
9079                         if (tr->current_trace->start)
9080                                 tr->current_trace->start(tr);
9081                 } else {
9082                         tracer_tracing_off(tr);
9083                         if (tr->current_trace->stop)
9084                                 tr->current_trace->stop(tr);
9085                         /* Wake up any waiters */
9086                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9087                 }
9088                 mutex_unlock(&trace_types_lock);
9089         }
9090
9091         (*ppos)++;
9092
9093         return cnt;
9094 }
9095
9096 static const struct file_operations rb_simple_fops = {
9097         .open           = tracing_open_generic_tr,
9098         .read           = rb_simple_read,
9099         .write          = rb_simple_write,
9100         .release        = tracing_release_generic_tr,
9101         .llseek         = default_llseek,
9102 };
9103
9104 static ssize_t
9105 buffer_percent_read(struct file *filp, char __user *ubuf,
9106                     size_t cnt, loff_t *ppos)
9107 {
9108         struct trace_array *tr = filp->private_data;
9109         char buf[64];
9110         int r;
9111
9112         r = tr->buffer_percent;
9113         r = sprintf(buf, "%d\n", r);
9114
9115         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9116 }
9117
9118 static ssize_t
9119 buffer_percent_write(struct file *filp, const char __user *ubuf,
9120                      size_t cnt, loff_t *ppos)
9121 {
9122         struct trace_array *tr = filp->private_data;
9123         unsigned long val;
9124         int ret;
9125
9126         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9127         if (ret)
9128                 return ret;
9129
9130         if (val > 100)
9131                 return -EINVAL;
9132
9133         if (!val)
9134                 val = 1;
9135
9136         tr->buffer_percent = val;
9137
9138         (*ppos)++;
9139
9140         return cnt;
9141 }
9142
9143 static const struct file_operations buffer_percent_fops = {
9144         .open           = tracing_open_generic_tr,
9145         .read           = buffer_percent_read,
9146         .write          = buffer_percent_write,
9147         .release        = tracing_release_generic_tr,
9148         .llseek         = default_llseek,
9149 };
9150
9151 static struct dentry *trace_instance_dir;
9152
9153 static void
9154 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9155
9156 static int
9157 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9158 {
9159         enum ring_buffer_flags rb_flags;
9160
9161         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9162
9163         buf->tr = tr;
9164
9165         buf->buffer = ring_buffer_alloc(size, rb_flags);
9166         if (!buf->buffer)
9167                 return -ENOMEM;
9168
9169         buf->data = alloc_percpu(struct trace_array_cpu);
9170         if (!buf->data) {
9171                 ring_buffer_free(buf->buffer);
9172                 buf->buffer = NULL;
9173                 return -ENOMEM;
9174         }
9175
9176         /* Allocate the first page for all buffers */
9177         set_buffer_entries(&tr->array_buffer,
9178                            ring_buffer_size(tr->array_buffer.buffer, 0));
9179
9180         return 0;
9181 }
9182
9183 static void free_trace_buffer(struct array_buffer *buf)
9184 {
9185         if (buf->buffer) {
9186                 ring_buffer_free(buf->buffer);
9187                 buf->buffer = NULL;
9188                 free_percpu(buf->data);
9189                 buf->data = NULL;
9190         }
9191 }
9192
9193 static int allocate_trace_buffers(struct trace_array *tr, int size)
9194 {
9195         int ret;
9196
9197         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9198         if (ret)
9199                 return ret;
9200
9201 #ifdef CONFIG_TRACER_MAX_TRACE
9202         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9203                                     allocate_snapshot ? size : 1);
9204         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9205                 free_trace_buffer(&tr->array_buffer);
9206                 return -ENOMEM;
9207         }
9208         tr->allocated_snapshot = allocate_snapshot;
9209
9210         /*
9211          * Only the top level trace array gets its snapshot allocated
9212          * from the kernel command line.
9213          */
9214         allocate_snapshot = false;
9215 #endif
9216
9217         return 0;
9218 }
9219
9220 static void free_trace_buffers(struct trace_array *tr)
9221 {
9222         if (!tr)
9223                 return;
9224
9225         free_trace_buffer(&tr->array_buffer);
9226
9227 #ifdef CONFIG_TRACER_MAX_TRACE
9228         free_trace_buffer(&tr->max_buffer);
9229 #endif
9230 }
9231
9232 static void init_trace_flags_index(struct trace_array *tr)
9233 {
9234         int i;
9235
9236         /* Used by the trace options files */
9237         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9238                 tr->trace_flags_index[i] = i;
9239 }
9240
9241 static void __update_tracer_options(struct trace_array *tr)
9242 {
9243         struct tracer *t;
9244
9245         for (t = trace_types; t; t = t->next)
9246                 add_tracer_options(tr, t);
9247 }
9248
9249 static void update_tracer_options(struct trace_array *tr)
9250 {
9251         mutex_lock(&trace_types_lock);
9252         tracer_options_updated = true;
9253         __update_tracer_options(tr);
9254         mutex_unlock(&trace_types_lock);
9255 }
9256
9257 /* Must have trace_types_lock held */
9258 struct trace_array *trace_array_find(const char *instance)
9259 {
9260         struct trace_array *tr, *found = NULL;
9261
9262         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9263                 if (tr->name && strcmp(tr->name, instance) == 0) {
9264                         found = tr;
9265                         break;
9266                 }
9267         }
9268
9269         return found;
9270 }
9271
9272 struct trace_array *trace_array_find_get(const char *instance)
9273 {
9274         struct trace_array *tr;
9275
9276         mutex_lock(&trace_types_lock);
9277         tr = trace_array_find(instance);
9278         if (tr)
9279                 tr->ref++;
9280         mutex_unlock(&trace_types_lock);
9281
9282         return tr;
9283 }
9284
9285 static int trace_array_create_dir(struct trace_array *tr)
9286 {
9287         int ret;
9288
9289         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9290         if (!tr->dir)
9291                 return -EINVAL;
9292
9293         ret = event_trace_add_tracer(tr->dir, tr);
9294         if (ret) {
9295                 tracefs_remove(tr->dir);
9296                 return ret;
9297         }
9298
9299         init_tracer_tracefs(tr, tr->dir);
9300         __update_tracer_options(tr);
9301
9302         return ret;
9303 }
9304
9305 static struct trace_array *trace_array_create(const char *name)
9306 {
9307         struct trace_array *tr;
9308         int ret;
9309
9310         ret = -ENOMEM;
9311         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9312         if (!tr)
9313                 return ERR_PTR(ret);
9314
9315         tr->name = kstrdup(name, GFP_KERNEL);
9316         if (!tr->name)
9317                 goto out_free_tr;
9318
9319         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9320                 goto out_free_tr;
9321
9322         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9323
9324         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9325
9326         raw_spin_lock_init(&tr->start_lock);
9327
9328         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9329
9330         tr->current_trace = &nop_trace;
9331
9332         INIT_LIST_HEAD(&tr->systems);
9333         INIT_LIST_HEAD(&tr->events);
9334         INIT_LIST_HEAD(&tr->hist_vars);
9335         INIT_LIST_HEAD(&tr->err_log);
9336
9337         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9338                 goto out_free_tr;
9339
9340         if (ftrace_allocate_ftrace_ops(tr) < 0)
9341                 goto out_free_tr;
9342
9343         ftrace_init_trace_array(tr);
9344
9345         init_trace_flags_index(tr);
9346
9347         if (trace_instance_dir) {
9348                 ret = trace_array_create_dir(tr);
9349                 if (ret)
9350                         goto out_free_tr;
9351         } else
9352                 __trace_early_add_events(tr);
9353
9354         list_add(&tr->list, &ftrace_trace_arrays);
9355
9356         tr->ref++;
9357
9358         return tr;
9359
9360  out_free_tr:
9361         ftrace_free_ftrace_ops(tr);
9362         free_trace_buffers(tr);
9363         free_cpumask_var(tr->tracing_cpumask);
9364         kfree(tr->name);
9365         kfree(tr);
9366
9367         return ERR_PTR(ret);
9368 }
9369
9370 static int instance_mkdir(const char *name)
9371 {
9372         struct trace_array *tr;
9373         int ret;
9374
9375         mutex_lock(&event_mutex);
9376         mutex_lock(&trace_types_lock);
9377
9378         ret = -EEXIST;
9379         if (trace_array_find(name))
9380                 goto out_unlock;
9381
9382         tr = trace_array_create(name);
9383
9384         ret = PTR_ERR_OR_ZERO(tr);
9385
9386 out_unlock:
9387         mutex_unlock(&trace_types_lock);
9388         mutex_unlock(&event_mutex);
9389         return ret;
9390 }
9391
9392 /**
9393  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9394  * @name: The name of the trace array to be looked up/created.
9395  *
9396  * Returns pointer to trace array with given name.
9397  * NULL, if it cannot be created.
9398  *
9399  * NOTE: This function increments the reference counter associated with the
9400  * trace array returned. This makes sure it cannot be freed while in use.
9401  * Use trace_array_put() once the trace array is no longer needed.
9402  * If the trace_array is to be freed, trace_array_destroy() needs to
9403  * be called after the trace_array_put(), or simply let user space delete
9404  * it from the tracefs instances directory. But until the
9405  * trace_array_put() is called, user space can not delete it.
9406  *
9407  */
9408 struct trace_array *trace_array_get_by_name(const char *name)
9409 {
9410         struct trace_array *tr;
9411
9412         mutex_lock(&event_mutex);
9413         mutex_lock(&trace_types_lock);
9414
9415         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9416                 if (tr->name && strcmp(tr->name, name) == 0)
9417                         goto out_unlock;
9418         }
9419
9420         tr = trace_array_create(name);
9421
9422         if (IS_ERR(tr))
9423                 tr = NULL;
9424 out_unlock:
9425         if (tr)
9426                 tr->ref++;
9427
9428         mutex_unlock(&trace_types_lock);
9429         mutex_unlock(&event_mutex);
9430         return tr;
9431 }
9432 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9433
9434 static int __remove_instance(struct trace_array *tr)
9435 {
9436         int i;
9437
9438         /* Reference counter for a newly created trace array = 1. */
9439         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9440                 return -EBUSY;
9441
9442         list_del(&tr->list);
9443
9444         /* Disable all the flags that were enabled coming in */
9445         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9446                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9447                         set_tracer_flag(tr, 1 << i, 0);
9448         }
9449
9450         tracing_set_nop(tr);
9451         clear_ftrace_function_probes(tr);
9452         event_trace_del_tracer(tr);
9453         ftrace_clear_pids(tr);
9454         ftrace_destroy_function_files(tr);
9455         tracefs_remove(tr->dir);
9456         free_percpu(tr->last_func_repeats);
9457         free_trace_buffers(tr);
9458
9459         for (i = 0; i < tr->nr_topts; i++) {
9460                 kfree(tr->topts[i].topts);
9461         }
9462         kfree(tr->topts);
9463
9464         free_cpumask_var(tr->tracing_cpumask);
9465         kfree(tr->name);
9466         kfree(tr);
9467
9468         return 0;
9469 }
9470
9471 int trace_array_destroy(struct trace_array *this_tr)
9472 {
9473         struct trace_array *tr;
9474         int ret;
9475
9476         if (!this_tr)
9477                 return -EINVAL;
9478
9479         mutex_lock(&event_mutex);
9480         mutex_lock(&trace_types_lock);
9481
9482         ret = -ENODEV;
9483
9484         /* Making sure trace array exists before destroying it. */
9485         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9486                 if (tr == this_tr) {
9487                         ret = __remove_instance(tr);
9488                         break;
9489                 }
9490         }
9491
9492         mutex_unlock(&trace_types_lock);
9493         mutex_unlock(&event_mutex);
9494
9495         return ret;
9496 }
9497 EXPORT_SYMBOL_GPL(trace_array_destroy);
9498
9499 static int instance_rmdir(const char *name)
9500 {
9501         struct trace_array *tr;
9502         int ret;
9503
9504         mutex_lock(&event_mutex);
9505         mutex_lock(&trace_types_lock);
9506
9507         ret = -ENODEV;
9508         tr = trace_array_find(name);
9509         if (tr)
9510                 ret = __remove_instance(tr);
9511
9512         mutex_unlock(&trace_types_lock);
9513         mutex_unlock(&event_mutex);
9514
9515         return ret;
9516 }
9517
9518 static __init void create_trace_instances(struct dentry *d_tracer)
9519 {
9520         struct trace_array *tr;
9521
9522         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9523                                                          instance_mkdir,
9524                                                          instance_rmdir);
9525         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9526                 return;
9527
9528         mutex_lock(&event_mutex);
9529         mutex_lock(&trace_types_lock);
9530
9531         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9532                 if (!tr->name)
9533                         continue;
9534                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9535                              "Failed to create instance directory\n"))
9536                         break;
9537         }
9538
9539         mutex_unlock(&trace_types_lock);
9540         mutex_unlock(&event_mutex);
9541 }
9542
9543 static void
9544 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9545 {
9546         struct trace_event_file *file;
9547         int cpu;
9548
9549         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9550                         tr, &show_traces_fops);
9551
9552         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9553                         tr, &set_tracer_fops);
9554
9555         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9556                           tr, &tracing_cpumask_fops);
9557
9558         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9559                           tr, &tracing_iter_fops);
9560
9561         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9562                           tr, &tracing_fops);
9563
9564         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9565                           tr, &tracing_pipe_fops);
9566
9567         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9568                           tr, &tracing_entries_fops);
9569
9570         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9571                           tr, &tracing_total_entries_fops);
9572
9573         trace_create_file("free_buffer", 0200, d_tracer,
9574                           tr, &tracing_free_buffer_fops);
9575
9576         trace_create_file("trace_marker", 0220, d_tracer,
9577                           tr, &tracing_mark_fops);
9578
9579         file = __find_event_file(tr, "ftrace", "print");
9580         if (file && file->dir)
9581                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9582                                   file, &event_trigger_fops);
9583         tr->trace_marker_file = file;
9584
9585         trace_create_file("trace_marker_raw", 0220, d_tracer,
9586                           tr, &tracing_mark_raw_fops);
9587
9588         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9589                           &trace_clock_fops);
9590
9591         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9592                           tr, &rb_simple_fops);
9593
9594         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9595                           &trace_time_stamp_mode_fops);
9596
9597         tr->buffer_percent = 50;
9598
9599         trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9600                         tr, &buffer_percent_fops);
9601
9602         create_trace_options_dir(tr);
9603
9604         trace_create_maxlat_file(tr, d_tracer);
9605
9606         if (ftrace_create_function_files(tr, d_tracer))
9607                 MEM_FAIL(1, "Could not allocate function filter files");
9608
9609 #ifdef CONFIG_TRACER_SNAPSHOT
9610         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9611                           tr, &snapshot_fops);
9612 #endif
9613
9614         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9615                           tr, &tracing_err_log_fops);
9616
9617         for_each_tracing_cpu(cpu)
9618                 tracing_init_tracefs_percpu(tr, cpu);
9619
9620         ftrace_init_tracefs(tr, d_tracer);
9621 }
9622
9623 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9624 {
9625         struct vfsmount *mnt;
9626         struct file_system_type *type;
9627
9628         /*
9629          * To maintain backward compatibility for tools that mount
9630          * debugfs to get to the tracing facility, tracefs is automatically
9631          * mounted to the debugfs/tracing directory.
9632          */
9633         type = get_fs_type("tracefs");
9634         if (!type)
9635                 return NULL;
9636         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9637         put_filesystem(type);
9638         if (IS_ERR(mnt))
9639                 return NULL;
9640         mntget(mnt);
9641
9642         return mnt;
9643 }
9644
9645 /**
9646  * tracing_init_dentry - initialize top level trace array
9647  *
9648  * This is called when creating files or directories in the tracing
9649  * directory. It is called via fs_initcall() by any of the boot up code
9650  * and expects to return the dentry of the top level tracing directory.
9651  */
9652 int tracing_init_dentry(void)
9653 {
9654         struct trace_array *tr = &global_trace;
9655
9656         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9657                 pr_warn("Tracing disabled due to lockdown\n");
9658                 return -EPERM;
9659         }
9660
9661         /* The top level trace array uses  NULL as parent */
9662         if (tr->dir)
9663                 return 0;
9664
9665         if (WARN_ON(!tracefs_initialized()))
9666                 return -ENODEV;
9667
9668         /*
9669          * As there may still be users that expect the tracing
9670          * files to exist in debugfs/tracing, we must automount
9671          * the tracefs file system there, so older tools still
9672          * work with the newer kernel.
9673          */
9674         tr->dir = debugfs_create_automount("tracing", NULL,
9675                                            trace_automount, NULL);
9676
9677         return 0;
9678 }
9679
9680 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9681 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9682
9683 static struct workqueue_struct *eval_map_wq __initdata;
9684 static struct work_struct eval_map_work __initdata;
9685 static struct work_struct tracerfs_init_work __initdata;
9686
9687 static void __init eval_map_work_func(struct work_struct *work)
9688 {
9689         int len;
9690
9691         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9692         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9693 }
9694
9695 static int __init trace_eval_init(void)
9696 {
9697         INIT_WORK(&eval_map_work, eval_map_work_func);
9698
9699         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9700         if (!eval_map_wq) {
9701                 pr_err("Unable to allocate eval_map_wq\n");
9702                 /* Do work here */
9703                 eval_map_work_func(&eval_map_work);
9704                 return -ENOMEM;
9705         }
9706
9707         queue_work(eval_map_wq, &eval_map_work);
9708         return 0;
9709 }
9710
9711 subsys_initcall(trace_eval_init);
9712
9713 static int __init trace_eval_sync(void)
9714 {
9715         /* Make sure the eval map updates are finished */
9716         if (eval_map_wq)
9717                 destroy_workqueue(eval_map_wq);
9718         return 0;
9719 }
9720
9721 late_initcall_sync(trace_eval_sync);
9722
9723
9724 #ifdef CONFIG_MODULES
9725 static void trace_module_add_evals(struct module *mod)
9726 {
9727         if (!mod->num_trace_evals)
9728                 return;
9729
9730         /*
9731          * Modules with bad taint do not have events created, do
9732          * not bother with enums either.
9733          */
9734         if (trace_module_has_bad_taint(mod))
9735                 return;
9736
9737         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9738 }
9739
9740 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9741 static void trace_module_remove_evals(struct module *mod)
9742 {
9743         union trace_eval_map_item *map;
9744         union trace_eval_map_item **last = &trace_eval_maps;
9745
9746         if (!mod->num_trace_evals)
9747                 return;
9748
9749         mutex_lock(&trace_eval_mutex);
9750
9751         map = trace_eval_maps;
9752
9753         while (map) {
9754                 if (map->head.mod == mod)
9755                         break;
9756                 map = trace_eval_jmp_to_tail(map);
9757                 last = &map->tail.next;
9758                 map = map->tail.next;
9759         }
9760         if (!map)
9761                 goto out;
9762
9763         *last = trace_eval_jmp_to_tail(map)->tail.next;
9764         kfree(map);
9765  out:
9766         mutex_unlock(&trace_eval_mutex);
9767 }
9768 #else
9769 static inline void trace_module_remove_evals(struct module *mod) { }
9770 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9771
9772 static int trace_module_notify(struct notifier_block *self,
9773                                unsigned long val, void *data)
9774 {
9775         struct module *mod = data;
9776
9777         switch (val) {
9778         case MODULE_STATE_COMING:
9779                 trace_module_add_evals(mod);
9780                 break;
9781         case MODULE_STATE_GOING:
9782                 trace_module_remove_evals(mod);
9783                 break;
9784         }
9785
9786         return NOTIFY_OK;
9787 }
9788
9789 static struct notifier_block trace_module_nb = {
9790         .notifier_call = trace_module_notify,
9791         .priority = 0,
9792 };
9793 #endif /* CONFIG_MODULES */
9794
9795 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9796 {
9797
9798         event_trace_init();
9799
9800         init_tracer_tracefs(&global_trace, NULL);
9801         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9802
9803         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9804                         &global_trace, &tracing_thresh_fops);
9805
9806         trace_create_file("README", TRACE_MODE_READ, NULL,
9807                         NULL, &tracing_readme_fops);
9808
9809         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9810                         NULL, &tracing_saved_cmdlines_fops);
9811
9812         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9813                           NULL, &tracing_saved_cmdlines_size_fops);
9814
9815         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9816                         NULL, &tracing_saved_tgids_fops);
9817
9818         trace_create_eval_file(NULL);
9819
9820 #ifdef CONFIG_MODULES
9821         register_module_notifier(&trace_module_nb);
9822 #endif
9823
9824 #ifdef CONFIG_DYNAMIC_FTRACE
9825         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9826                         NULL, &tracing_dyn_info_fops);
9827 #endif
9828
9829         create_trace_instances(NULL);
9830
9831         update_tracer_options(&global_trace);
9832 }
9833
9834 static __init int tracer_init_tracefs(void)
9835 {
9836         int ret;
9837
9838         trace_access_lock_init();
9839
9840         ret = tracing_init_dentry();
9841         if (ret)
9842                 return 0;
9843
9844         if (eval_map_wq) {
9845                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9846                 queue_work(eval_map_wq, &tracerfs_init_work);
9847         } else {
9848                 tracer_init_tracefs_work_func(NULL);
9849         }
9850
9851         rv_init_interface();
9852
9853         return 0;
9854 }
9855
9856 fs_initcall(tracer_init_tracefs);
9857
9858 static int trace_panic_handler(struct notifier_block *this,
9859                                unsigned long event, void *unused)
9860 {
9861         if (ftrace_dump_on_oops)
9862                 ftrace_dump(ftrace_dump_on_oops);
9863         return NOTIFY_OK;
9864 }
9865
9866 static struct notifier_block trace_panic_notifier = {
9867         .notifier_call  = trace_panic_handler,
9868         .next           = NULL,
9869         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9870 };
9871
9872 static int trace_die_handler(struct notifier_block *self,
9873                              unsigned long val,
9874                              void *data)
9875 {
9876         switch (val) {
9877         case DIE_OOPS:
9878                 if (ftrace_dump_on_oops)
9879                         ftrace_dump(ftrace_dump_on_oops);
9880                 break;
9881         default:
9882                 break;
9883         }
9884         return NOTIFY_OK;
9885 }
9886
9887 static struct notifier_block trace_die_notifier = {
9888         .notifier_call = trace_die_handler,
9889         .priority = 200
9890 };
9891
9892 /*
9893  * printk is set to max of 1024, we really don't need it that big.
9894  * Nothing should be printing 1000 characters anyway.
9895  */
9896 #define TRACE_MAX_PRINT         1000
9897
9898 /*
9899  * Define here KERN_TRACE so that we have one place to modify
9900  * it if we decide to change what log level the ftrace dump
9901  * should be at.
9902  */
9903 #define KERN_TRACE              KERN_EMERG
9904
9905 void
9906 trace_printk_seq(struct trace_seq *s)
9907 {
9908         /* Probably should print a warning here. */
9909         if (s->seq.len >= TRACE_MAX_PRINT)
9910                 s->seq.len = TRACE_MAX_PRINT;
9911
9912         /*
9913          * More paranoid code. Although the buffer size is set to
9914          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9915          * an extra layer of protection.
9916          */
9917         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9918                 s->seq.len = s->seq.size - 1;
9919
9920         /* should be zero ended, but we are paranoid. */
9921         s->buffer[s->seq.len] = 0;
9922
9923         printk(KERN_TRACE "%s", s->buffer);
9924
9925         trace_seq_init(s);
9926 }
9927
9928 void trace_init_global_iter(struct trace_iterator *iter)
9929 {
9930         iter->tr = &global_trace;
9931         iter->trace = iter->tr->current_trace;
9932         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9933         iter->array_buffer = &global_trace.array_buffer;
9934
9935         if (iter->trace && iter->trace->open)
9936                 iter->trace->open(iter);
9937
9938         /* Annotate start of buffers if we had overruns */
9939         if (ring_buffer_overruns(iter->array_buffer->buffer))
9940                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9941
9942         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9943         if (trace_clocks[iter->tr->clock_id].in_ns)
9944                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9945
9946         /* Can not use kmalloc for iter.temp and iter.fmt */
9947         iter->temp = static_temp_buf;
9948         iter->temp_size = STATIC_TEMP_BUF_SIZE;
9949         iter->fmt = static_fmt_buf;
9950         iter->fmt_size = STATIC_FMT_BUF_SIZE;
9951 }
9952
9953 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9954 {
9955         /* use static because iter can be a bit big for the stack */
9956         static struct trace_iterator iter;
9957         static atomic_t dump_running;
9958         struct trace_array *tr = &global_trace;
9959         unsigned int old_userobj;
9960         unsigned long flags;
9961         int cnt = 0, cpu;
9962
9963         /* Only allow one dump user at a time. */
9964         if (atomic_inc_return(&dump_running) != 1) {
9965                 atomic_dec(&dump_running);
9966                 return;
9967         }
9968
9969         /*
9970          * Always turn off tracing when we dump.
9971          * We don't need to show trace output of what happens
9972          * between multiple crashes.
9973          *
9974          * If the user does a sysrq-z, then they can re-enable
9975          * tracing with echo 1 > tracing_on.
9976          */
9977         tracing_off();
9978
9979         local_irq_save(flags);
9980
9981         /* Simulate the iterator */
9982         trace_init_global_iter(&iter);
9983
9984         for_each_tracing_cpu(cpu) {
9985                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9986         }
9987
9988         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9989
9990         /* don't look at user memory in panic mode */
9991         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9992
9993         switch (oops_dump_mode) {
9994         case DUMP_ALL:
9995                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9996                 break;
9997         case DUMP_ORIG:
9998                 iter.cpu_file = raw_smp_processor_id();
9999                 break;
10000         case DUMP_NONE:
10001                 goto out_enable;
10002         default:
10003                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10004                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10005         }
10006
10007         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10008
10009         /* Did function tracer already get disabled? */
10010         if (ftrace_is_dead()) {
10011                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10012                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10013         }
10014
10015         /*
10016          * We need to stop all tracing on all CPUS to read
10017          * the next buffer. This is a bit expensive, but is
10018          * not done often. We fill all what we can read,
10019          * and then release the locks again.
10020          */
10021
10022         while (!trace_empty(&iter)) {
10023
10024                 if (!cnt)
10025                         printk(KERN_TRACE "---------------------------------\n");
10026
10027                 cnt++;
10028
10029                 trace_iterator_reset(&iter);
10030                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10031
10032                 if (trace_find_next_entry_inc(&iter) != NULL) {
10033                         int ret;
10034
10035                         ret = print_trace_line(&iter);
10036                         if (ret != TRACE_TYPE_NO_CONSUME)
10037                                 trace_consume(&iter);
10038                 }
10039                 touch_nmi_watchdog();
10040
10041                 trace_printk_seq(&iter.seq);
10042         }
10043
10044         if (!cnt)
10045                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10046         else
10047                 printk(KERN_TRACE "---------------------------------\n");
10048
10049  out_enable:
10050         tr->trace_flags |= old_userobj;
10051
10052         for_each_tracing_cpu(cpu) {
10053                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10054         }
10055         atomic_dec(&dump_running);
10056         local_irq_restore(flags);
10057 }
10058 EXPORT_SYMBOL_GPL(ftrace_dump);
10059
10060 #define WRITE_BUFSIZE  4096
10061
10062 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10063                                 size_t count, loff_t *ppos,
10064                                 int (*createfn)(const char *))
10065 {
10066         char *kbuf, *buf, *tmp;
10067         int ret = 0;
10068         size_t done = 0;
10069         size_t size;
10070
10071         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10072         if (!kbuf)
10073                 return -ENOMEM;
10074
10075         while (done < count) {
10076                 size = count - done;
10077
10078                 if (size >= WRITE_BUFSIZE)
10079                         size = WRITE_BUFSIZE - 1;
10080
10081                 if (copy_from_user(kbuf, buffer + done, size)) {
10082                         ret = -EFAULT;
10083                         goto out;
10084                 }
10085                 kbuf[size] = '\0';
10086                 buf = kbuf;
10087                 do {
10088                         tmp = strchr(buf, '\n');
10089                         if (tmp) {
10090                                 *tmp = '\0';
10091                                 size = tmp - buf + 1;
10092                         } else {
10093                                 size = strlen(buf);
10094                                 if (done + size < count) {
10095                                         if (buf != kbuf)
10096                                                 break;
10097                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10098                                         pr_warn("Line length is too long: Should be less than %d\n",
10099                                                 WRITE_BUFSIZE - 2);
10100                                         ret = -EINVAL;
10101                                         goto out;
10102                                 }
10103                         }
10104                         done += size;
10105
10106                         /* Remove comments */
10107                         tmp = strchr(buf, '#');
10108
10109                         if (tmp)
10110                                 *tmp = '\0';
10111
10112                         ret = createfn(buf);
10113                         if (ret)
10114                                 goto out;
10115                         buf += size;
10116
10117                 } while (done < count);
10118         }
10119         ret = done;
10120
10121 out:
10122         kfree(kbuf);
10123
10124         return ret;
10125 }
10126
10127 __init static int tracer_alloc_buffers(void)
10128 {
10129         int ring_buf_size;
10130         int ret = -ENOMEM;
10131
10132
10133         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10134                 pr_warn("Tracing disabled due to lockdown\n");
10135                 return -EPERM;
10136         }
10137
10138         /*
10139          * Make sure we don't accidentally add more trace options
10140          * than we have bits for.
10141          */
10142         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10143
10144         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10145                 goto out;
10146
10147         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10148                 goto out_free_buffer_mask;
10149
10150         /* Only allocate trace_printk buffers if a trace_printk exists */
10151         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10152                 /* Must be called before global_trace.buffer is allocated */
10153                 trace_printk_init_buffers();
10154
10155         /* To save memory, keep the ring buffer size to its minimum */
10156         if (ring_buffer_expanded)
10157                 ring_buf_size = trace_buf_size;
10158         else
10159                 ring_buf_size = 1;
10160
10161         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10162         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10163
10164         raw_spin_lock_init(&global_trace.start_lock);
10165
10166         /*
10167          * The prepare callbacks allocates some memory for the ring buffer. We
10168          * don't free the buffer if the CPU goes down. If we were to free
10169          * the buffer, then the user would lose any trace that was in the
10170          * buffer. The memory will be removed once the "instance" is removed.
10171          */
10172         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10173                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10174                                       NULL);
10175         if (ret < 0)
10176                 goto out_free_cpumask;
10177         /* Used for event triggers */
10178         ret = -ENOMEM;
10179         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10180         if (!temp_buffer)
10181                 goto out_rm_hp_state;
10182
10183         if (trace_create_savedcmd() < 0)
10184                 goto out_free_temp_buffer;
10185
10186         /* TODO: make the number of buffers hot pluggable with CPUS */
10187         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10188                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10189                 goto out_free_savedcmd;
10190         }
10191
10192         if (global_trace.buffer_disabled)
10193                 tracing_off();
10194
10195         if (trace_boot_clock) {
10196                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10197                 if (ret < 0)
10198                         pr_warn("Trace clock %s not defined, going back to default\n",
10199                                 trace_boot_clock);
10200         }
10201
10202         /*
10203          * register_tracer() might reference current_trace, so it
10204          * needs to be set before we register anything. This is
10205          * just a bootstrap of current_trace anyway.
10206          */
10207         global_trace.current_trace = &nop_trace;
10208
10209         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10210
10211         ftrace_init_global_array_ops(&global_trace);
10212
10213         init_trace_flags_index(&global_trace);
10214
10215         register_tracer(&nop_trace);
10216
10217         /* Function tracing may start here (via kernel command line) */
10218         init_function_trace();
10219
10220         /* All seems OK, enable tracing */
10221         tracing_disabled = 0;
10222
10223         atomic_notifier_chain_register(&panic_notifier_list,
10224                                        &trace_panic_notifier);
10225
10226         register_die_notifier(&trace_die_notifier);
10227
10228         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10229
10230         INIT_LIST_HEAD(&global_trace.systems);
10231         INIT_LIST_HEAD(&global_trace.events);
10232         INIT_LIST_HEAD(&global_trace.hist_vars);
10233         INIT_LIST_HEAD(&global_trace.err_log);
10234         list_add(&global_trace.list, &ftrace_trace_arrays);
10235
10236         apply_trace_boot_options();
10237
10238         register_snapshot_cmd();
10239
10240         test_can_verify();
10241
10242         return 0;
10243
10244 out_free_savedcmd:
10245         free_saved_cmdlines_buffer(savedcmd);
10246 out_free_temp_buffer:
10247         ring_buffer_free(temp_buffer);
10248 out_rm_hp_state:
10249         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10250 out_free_cpumask:
10251         free_cpumask_var(global_trace.tracing_cpumask);
10252 out_free_buffer_mask:
10253         free_cpumask_var(tracing_buffer_mask);
10254 out:
10255         return ret;
10256 }
10257
10258 void __init ftrace_boot_snapshot(void)
10259 {
10260         if (snapshot_at_boot) {
10261                 tracing_snapshot();
10262                 internal_trace_puts("** Boot snapshot taken **\n");
10263         }
10264 }
10265
10266 void __init early_trace_init(void)
10267 {
10268         if (tracepoint_printk) {
10269                 tracepoint_print_iter =
10270                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10271                 if (MEM_FAIL(!tracepoint_print_iter,
10272                              "Failed to allocate trace iterator\n"))
10273                         tracepoint_printk = 0;
10274                 else
10275                         static_key_enable(&tracepoint_printk_key.key);
10276         }
10277         tracer_alloc_buffers();
10278 }
10279
10280 void __init trace_init(void)
10281 {
10282         trace_event_init();
10283 }
10284
10285 __init static void clear_boot_tracer(void)
10286 {
10287         /*
10288          * The default tracer at boot buffer is an init section.
10289          * This function is called in lateinit. If we did not
10290          * find the boot tracer, then clear it out, to prevent
10291          * later registration from accessing the buffer that is
10292          * about to be freed.
10293          */
10294         if (!default_bootup_tracer)
10295                 return;
10296
10297         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10298                default_bootup_tracer);
10299         default_bootup_tracer = NULL;
10300 }
10301
10302 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10303 __init static void tracing_set_default_clock(void)
10304 {
10305         /* sched_clock_stable() is determined in late_initcall */
10306         if (!trace_boot_clock && !sched_clock_stable()) {
10307                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10308                         pr_warn("Can not set tracing clock due to lockdown\n");
10309                         return;
10310                 }
10311
10312                 printk(KERN_WARNING
10313                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10314                        "If you want to keep using the local clock, then add:\n"
10315                        "  \"trace_clock=local\"\n"
10316                        "on the kernel command line\n");
10317                 tracing_set_clock(&global_trace, "global");
10318         }
10319 }
10320 #else
10321 static inline void tracing_set_default_clock(void) { }
10322 #endif
10323
10324 __init static int late_trace_init(void)
10325 {
10326         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10327                 static_key_disable(&tracepoint_printk_key.key);
10328                 tracepoint_printk = 0;
10329         }
10330
10331         tracing_set_default_clock();
10332         clear_boot_tracer();
10333         return 0;
10334 }
10335
10336 late_initcall_sync(late_trace_init);