5bd202d6d79a1c7f62593591d4ed5998ce4ea9ce
[platform/kernel/linux-starfive.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
189
190 static int __init set_cmdline_ftrace(char *str)
191 {
192         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
193         default_bootup_tracer = bootup_tracer_buf;
194         /* We are using ftrace early, expand it */
195         ring_buffer_expanded = true;
196         return 1;
197 }
198 __setup("ftrace=", set_cmdline_ftrace);
199
200 static int __init set_ftrace_dump_on_oops(char *str)
201 {
202         if (*str++ != '=' || !*str || !strcmp("1", str)) {
203                 ftrace_dump_on_oops = DUMP_ALL;
204                 return 1;
205         }
206
207         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
208                 ftrace_dump_on_oops = DUMP_ORIG;
209                 return 1;
210         }
211
212         return 0;
213 }
214 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
215
216 static int __init stop_trace_on_warning(char *str)
217 {
218         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
219                 __disable_trace_on_warning = 1;
220         return 1;
221 }
222 __setup("traceoff_on_warning", stop_trace_on_warning);
223
224 static int __init boot_alloc_snapshot(char *str)
225 {
226         allocate_snapshot = true;
227         /* We also need the main ring buffer expanded */
228         ring_buffer_expanded = true;
229         return 1;
230 }
231 __setup("alloc_snapshot", boot_alloc_snapshot);
232
233
234 static int __init boot_snapshot(char *str)
235 {
236         snapshot_at_boot = true;
237         boot_alloc_snapshot(str);
238         return 1;
239 }
240 __setup("ftrace_boot_snapshot", boot_snapshot);
241
242
243 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
244
245 static int __init set_trace_boot_options(char *str)
246 {
247         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
248         return 1;
249 }
250 __setup("trace_options=", set_trace_boot_options);
251
252 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
253 static char *trace_boot_clock __initdata;
254
255 static int __init set_trace_boot_clock(char *str)
256 {
257         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
258         trace_boot_clock = trace_boot_clock_buf;
259         return 1;
260 }
261 __setup("trace_clock=", set_trace_boot_clock);
262
263 static int __init set_tracepoint_printk(char *str)
264 {
265         /* Ignore the "tp_printk_stop_on_boot" param */
266         if (*str == '_')
267                 return 0;
268
269         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
270                 tracepoint_printk = 1;
271         return 1;
272 }
273 __setup("tp_printk", set_tracepoint_printk);
274
275 static int __init set_tracepoint_printk_stop(char *str)
276 {
277         tracepoint_printk_stop_on_boot = true;
278         return 1;
279 }
280 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
281
282 unsigned long long ns2usecs(u64 nsec)
283 {
284         nsec += 500;
285         do_div(nsec, 1000);
286         return nsec;
287 }
288
289 static void
290 trace_process_export(struct trace_export *export,
291                struct ring_buffer_event *event, int flag)
292 {
293         struct trace_entry *entry;
294         unsigned int size = 0;
295
296         if (export->flags & flag) {
297                 entry = ring_buffer_event_data(event);
298                 size = ring_buffer_event_length(event);
299                 export->write(export, entry, size);
300         }
301 }
302
303 static DEFINE_MUTEX(ftrace_export_lock);
304
305 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
306
307 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
308 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
309 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
310
311 static inline void ftrace_exports_enable(struct trace_export *export)
312 {
313         if (export->flags & TRACE_EXPORT_FUNCTION)
314                 static_branch_inc(&trace_function_exports_enabled);
315
316         if (export->flags & TRACE_EXPORT_EVENT)
317                 static_branch_inc(&trace_event_exports_enabled);
318
319         if (export->flags & TRACE_EXPORT_MARKER)
320                 static_branch_inc(&trace_marker_exports_enabled);
321 }
322
323 static inline void ftrace_exports_disable(struct trace_export *export)
324 {
325         if (export->flags & TRACE_EXPORT_FUNCTION)
326                 static_branch_dec(&trace_function_exports_enabled);
327
328         if (export->flags & TRACE_EXPORT_EVENT)
329                 static_branch_dec(&trace_event_exports_enabled);
330
331         if (export->flags & TRACE_EXPORT_MARKER)
332                 static_branch_dec(&trace_marker_exports_enabled);
333 }
334
335 static void ftrace_exports(struct ring_buffer_event *event, int flag)
336 {
337         struct trace_export *export;
338
339         preempt_disable_notrace();
340
341         export = rcu_dereference_raw_check(ftrace_exports_list);
342         while (export) {
343                 trace_process_export(export, event, flag);
344                 export = rcu_dereference_raw_check(export->next);
345         }
346
347         preempt_enable_notrace();
348 }
349
350 static inline void
351 add_trace_export(struct trace_export **list, struct trace_export *export)
352 {
353         rcu_assign_pointer(export->next, *list);
354         /*
355          * We are entering export into the list but another
356          * CPU might be walking that list. We need to make sure
357          * the export->next pointer is valid before another CPU sees
358          * the export pointer included into the list.
359          */
360         rcu_assign_pointer(*list, export);
361 }
362
363 static inline int
364 rm_trace_export(struct trace_export **list, struct trace_export *export)
365 {
366         struct trace_export **p;
367
368         for (p = list; *p != NULL; p = &(*p)->next)
369                 if (*p == export)
370                         break;
371
372         if (*p != export)
373                 return -1;
374
375         rcu_assign_pointer(*p, (*p)->next);
376
377         return 0;
378 }
379
380 static inline void
381 add_ftrace_export(struct trace_export **list, struct trace_export *export)
382 {
383         ftrace_exports_enable(export);
384
385         add_trace_export(list, export);
386 }
387
388 static inline int
389 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
390 {
391         int ret;
392
393         ret = rm_trace_export(list, export);
394         ftrace_exports_disable(export);
395
396         return ret;
397 }
398
399 int register_ftrace_export(struct trace_export *export)
400 {
401         if (WARN_ON_ONCE(!export->write))
402                 return -1;
403
404         mutex_lock(&ftrace_export_lock);
405
406         add_ftrace_export(&ftrace_exports_list, export);
407
408         mutex_unlock(&ftrace_export_lock);
409
410         return 0;
411 }
412 EXPORT_SYMBOL_GPL(register_ftrace_export);
413
414 int unregister_ftrace_export(struct trace_export *export)
415 {
416         int ret;
417
418         mutex_lock(&ftrace_export_lock);
419
420         ret = rm_ftrace_export(&ftrace_exports_list, export);
421
422         mutex_unlock(&ftrace_export_lock);
423
424         return ret;
425 }
426 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
427
428 /* trace_flags holds trace_options default values */
429 #define TRACE_DEFAULT_FLAGS                                             \
430         (FUNCTION_DEFAULT_FLAGS |                                       \
431          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
432          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
433          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
434          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
435          TRACE_ITER_HASH_PTR)
436
437 /* trace_options that are only supported by global_trace */
438 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
439                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
440
441 /* trace_flags that are default zero for instances */
442 #define ZEROED_TRACE_FLAGS \
443         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
444
445 /*
446  * The global_trace is the descriptor that holds the top-level tracing
447  * buffers for the live tracing.
448  */
449 static struct trace_array global_trace = {
450         .trace_flags = TRACE_DEFAULT_FLAGS,
451 };
452
453 LIST_HEAD(ftrace_trace_arrays);
454
455 int trace_array_get(struct trace_array *this_tr)
456 {
457         struct trace_array *tr;
458         int ret = -ENODEV;
459
460         mutex_lock(&trace_types_lock);
461         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
462                 if (tr == this_tr) {
463                         tr->ref++;
464                         ret = 0;
465                         break;
466                 }
467         }
468         mutex_unlock(&trace_types_lock);
469
470         return ret;
471 }
472
473 static void __trace_array_put(struct trace_array *this_tr)
474 {
475         WARN_ON(!this_tr->ref);
476         this_tr->ref--;
477 }
478
479 /**
480  * trace_array_put - Decrement the reference counter for this trace array.
481  * @this_tr : pointer to the trace array
482  *
483  * NOTE: Use this when we no longer need the trace array returned by
484  * trace_array_get_by_name(). This ensures the trace array can be later
485  * destroyed.
486  *
487  */
488 void trace_array_put(struct trace_array *this_tr)
489 {
490         if (!this_tr)
491                 return;
492
493         mutex_lock(&trace_types_lock);
494         __trace_array_put(this_tr);
495         mutex_unlock(&trace_types_lock);
496 }
497 EXPORT_SYMBOL_GPL(trace_array_put);
498
499 int tracing_check_open_get_tr(struct trace_array *tr)
500 {
501         int ret;
502
503         ret = security_locked_down(LOCKDOWN_TRACEFS);
504         if (ret)
505                 return ret;
506
507         if (tracing_disabled)
508                 return -ENODEV;
509
510         if (tr && trace_array_get(tr) < 0)
511                 return -ENODEV;
512
513         return 0;
514 }
515
516 int call_filter_check_discard(struct trace_event_call *call, void *rec,
517                               struct trace_buffer *buffer,
518                               struct ring_buffer_event *event)
519 {
520         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
521             !filter_match_preds(call->filter, rec)) {
522                 __trace_event_discard_commit(buffer, event);
523                 return 1;
524         }
525
526         return 0;
527 }
528
529 /**
530  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
531  * @filtered_pids: The list of pids to check
532  * @search_pid: The PID to find in @filtered_pids
533  *
534  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
535  */
536 bool
537 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
538 {
539         return trace_pid_list_is_set(filtered_pids, search_pid);
540 }
541
542 /**
543  * trace_ignore_this_task - should a task be ignored for tracing
544  * @filtered_pids: The list of pids to check
545  * @filtered_no_pids: The list of pids not to be traced
546  * @task: The task that should be ignored if not filtered
547  *
548  * Checks if @task should be traced or not from @filtered_pids.
549  * Returns true if @task should *NOT* be traced.
550  * Returns false if @task should be traced.
551  */
552 bool
553 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
554                        struct trace_pid_list *filtered_no_pids,
555                        struct task_struct *task)
556 {
557         /*
558          * If filtered_no_pids is not empty, and the task's pid is listed
559          * in filtered_no_pids, then return true.
560          * Otherwise, if filtered_pids is empty, that means we can
561          * trace all tasks. If it has content, then only trace pids
562          * within filtered_pids.
563          */
564
565         return (filtered_pids &&
566                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
567                 (filtered_no_pids &&
568                  trace_find_filtered_pid(filtered_no_pids, task->pid));
569 }
570
571 /**
572  * trace_filter_add_remove_task - Add or remove a task from a pid_list
573  * @pid_list: The list to modify
574  * @self: The current task for fork or NULL for exit
575  * @task: The task to add or remove
576  *
577  * If adding a task, if @self is defined, the task is only added if @self
578  * is also included in @pid_list. This happens on fork and tasks should
579  * only be added when the parent is listed. If @self is NULL, then the
580  * @task pid will be removed from the list, which would happen on exit
581  * of a task.
582  */
583 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
584                                   struct task_struct *self,
585                                   struct task_struct *task)
586 {
587         if (!pid_list)
588                 return;
589
590         /* For forks, we only add if the forking task is listed */
591         if (self) {
592                 if (!trace_find_filtered_pid(pid_list, self->pid))
593                         return;
594         }
595
596         /* "self" is set for forks, and NULL for exits */
597         if (self)
598                 trace_pid_list_set(pid_list, task->pid);
599         else
600                 trace_pid_list_clear(pid_list, task->pid);
601 }
602
603 /**
604  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
605  * @pid_list: The pid list to show
606  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
607  * @pos: The position of the file
608  *
609  * This is used by the seq_file "next" operation to iterate the pids
610  * listed in a trace_pid_list structure.
611  *
612  * Returns the pid+1 as we want to display pid of zero, but NULL would
613  * stop the iteration.
614  */
615 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
616 {
617         long pid = (unsigned long)v;
618         unsigned int next;
619
620         (*pos)++;
621
622         /* pid already is +1 of the actual previous bit */
623         if (trace_pid_list_next(pid_list, pid, &next) < 0)
624                 return NULL;
625
626         pid = next;
627
628         /* Return pid + 1 to allow zero to be represented */
629         return (void *)(pid + 1);
630 }
631
632 /**
633  * trace_pid_start - Used for seq_file to start reading pid lists
634  * @pid_list: The pid list to show
635  * @pos: The position of the file
636  *
637  * This is used by seq_file "start" operation to start the iteration
638  * of listing pids.
639  *
640  * Returns the pid+1 as we want to display pid of zero, but NULL would
641  * stop the iteration.
642  */
643 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
644 {
645         unsigned long pid;
646         unsigned int first;
647         loff_t l = 0;
648
649         if (trace_pid_list_first(pid_list, &first) < 0)
650                 return NULL;
651
652         pid = first;
653
654         /* Return pid + 1 so that zero can be the exit value */
655         for (pid++; pid && l < *pos;
656              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657                 ;
658         return (void *)pid;
659 }
660
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671         unsigned long pid = (unsigned long)v - 1;
672
673         seq_printf(m, "%lu\n", pid);
674         return 0;
675 }
676
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE            127
679
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681                     struct trace_pid_list **new_pid_list,
682                     const char __user *ubuf, size_t cnt)
683 {
684         struct trace_pid_list *pid_list;
685         struct trace_parser parser;
686         unsigned long val;
687         int nr_pids = 0;
688         ssize_t read = 0;
689         ssize_t ret;
690         loff_t pos;
691         pid_t pid;
692
693         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694                 return -ENOMEM;
695
696         /*
697          * Always recreate a new array. The write is an all or nothing
698          * operation. Always create a new array when adding new pids by
699          * the user. If the operation fails, then the current list is
700          * not modified.
701          */
702         pid_list = trace_pid_list_alloc();
703         if (!pid_list) {
704                 trace_parser_put(&parser);
705                 return -ENOMEM;
706         }
707
708         if (filtered_pids) {
709                 /* copy the current bits to the new max */
710                 ret = trace_pid_list_first(filtered_pids, &pid);
711                 while (!ret) {
712                         trace_pid_list_set(pid_list, pid);
713                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
714                         nr_pids++;
715                 }
716         }
717
718         ret = 0;
719         while (cnt > 0) {
720
721                 pos = 0;
722
723                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
724                 if (ret < 0)
725                         break;
726
727                 read += ret;
728                 ubuf += ret;
729                 cnt -= ret;
730
731                 if (!trace_parser_loaded(&parser))
732                         break;
733
734                 ret = -EINVAL;
735                 if (kstrtoul(parser.buffer, 0, &val))
736                         break;
737
738                 pid = (pid_t)val;
739
740                 if (trace_pid_list_set(pid_list, pid) < 0) {
741                         ret = -1;
742                         break;
743                 }
744                 nr_pids++;
745
746                 trace_parser_clear(&parser);
747                 ret = 0;
748         }
749         trace_parser_put(&parser);
750
751         if (ret < 0) {
752                 trace_pid_list_free(pid_list);
753                 return ret;
754         }
755
756         if (!nr_pids) {
757                 /* Cleared the list of pids */
758                 trace_pid_list_free(pid_list);
759                 pid_list = NULL;
760         }
761
762         *new_pid_list = pid_list;
763
764         return read;
765 }
766
767 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
768 {
769         u64 ts;
770
771         /* Early boot up does not have a buffer yet */
772         if (!buf->buffer)
773                 return trace_clock_local();
774
775         ts = ring_buffer_time_stamp(buf->buffer);
776         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
777
778         return ts;
779 }
780
781 u64 ftrace_now(int cpu)
782 {
783         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
784 }
785
786 /**
787  * tracing_is_enabled - Show if global_trace has been enabled
788  *
789  * Shows if the global trace has been enabled or not. It uses the
790  * mirror flag "buffer_disabled" to be used in fast paths such as for
791  * the irqsoff tracer. But it may be inaccurate due to races. If you
792  * need to know the accurate state, use tracing_is_on() which is a little
793  * slower, but accurate.
794  */
795 int tracing_is_enabled(void)
796 {
797         /*
798          * For quick access (irqsoff uses this in fast path), just
799          * return the mirror variable of the state of the ring buffer.
800          * It's a little racy, but we don't really care.
801          */
802         smp_rmb();
803         return !global_trace.buffer_disabled;
804 }
805
806 /*
807  * trace_buf_size is the size in bytes that is allocated
808  * for a buffer. Note, the number of bytes is always rounded
809  * to page size.
810  *
811  * This number is purposely set to a low number of 16384.
812  * If the dump on oops happens, it will be much appreciated
813  * to not have to wait for all that output. Anyway this can be
814  * boot time and run time configurable.
815  */
816 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
817
818 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
819
820 /* trace_types holds a link list of available tracers. */
821 static struct tracer            *trace_types __read_mostly;
822
823 /*
824  * trace_types_lock is used to protect the trace_types list.
825  */
826 DEFINE_MUTEX(trace_types_lock);
827
828 /*
829  * serialize the access of the ring buffer
830  *
831  * ring buffer serializes readers, but it is low level protection.
832  * The validity of the events (which returns by ring_buffer_peek() ..etc)
833  * are not protected by ring buffer.
834  *
835  * The content of events may become garbage if we allow other process consumes
836  * these events concurrently:
837  *   A) the page of the consumed events may become a normal page
838  *      (not reader page) in ring buffer, and this page will be rewritten
839  *      by events producer.
840  *   B) The page of the consumed events may become a page for splice_read,
841  *      and this page will be returned to system.
842  *
843  * These primitives allow multi process access to different cpu ring buffer
844  * concurrently.
845  *
846  * These primitives don't distinguish read-only and read-consume access.
847  * Multi read-only access are also serialized.
848  */
849
850 #ifdef CONFIG_SMP
851 static DECLARE_RWSEM(all_cpu_access_lock);
852 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
853
854 static inline void trace_access_lock(int cpu)
855 {
856         if (cpu == RING_BUFFER_ALL_CPUS) {
857                 /* gain it for accessing the whole ring buffer. */
858                 down_write(&all_cpu_access_lock);
859         } else {
860                 /* gain it for accessing a cpu ring buffer. */
861
862                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
863                 down_read(&all_cpu_access_lock);
864
865                 /* Secondly block other access to this @cpu ring buffer. */
866                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
867         }
868 }
869
870 static inline void trace_access_unlock(int cpu)
871 {
872         if (cpu == RING_BUFFER_ALL_CPUS) {
873                 up_write(&all_cpu_access_lock);
874         } else {
875                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
876                 up_read(&all_cpu_access_lock);
877         }
878 }
879
880 static inline void trace_access_lock_init(void)
881 {
882         int cpu;
883
884         for_each_possible_cpu(cpu)
885                 mutex_init(&per_cpu(cpu_access_lock, cpu));
886 }
887
888 #else
889
890 static DEFINE_MUTEX(access_lock);
891
892 static inline void trace_access_lock(int cpu)
893 {
894         (void)cpu;
895         mutex_lock(&access_lock);
896 }
897
898 static inline void trace_access_unlock(int cpu)
899 {
900         (void)cpu;
901         mutex_unlock(&access_lock);
902 }
903
904 static inline void trace_access_lock_init(void)
905 {
906 }
907
908 #endif
909
910 #ifdef CONFIG_STACKTRACE
911 static void __ftrace_trace_stack(struct trace_buffer *buffer,
912                                  unsigned int trace_ctx,
913                                  int skip, struct pt_regs *regs);
914 static inline void ftrace_trace_stack(struct trace_array *tr,
915                                       struct trace_buffer *buffer,
916                                       unsigned int trace_ctx,
917                                       int skip, struct pt_regs *regs);
918
919 #else
920 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
921                                         unsigned int trace_ctx,
922                                         int skip, struct pt_regs *regs)
923 {
924 }
925 static inline void ftrace_trace_stack(struct trace_array *tr,
926                                       struct trace_buffer *buffer,
927                                       unsigned long trace_ctx,
928                                       int skip, struct pt_regs *regs)
929 {
930 }
931
932 #endif
933
934 static __always_inline void
935 trace_event_setup(struct ring_buffer_event *event,
936                   int type, unsigned int trace_ctx)
937 {
938         struct trace_entry *ent = ring_buffer_event_data(event);
939
940         tracing_generic_entry_update(ent, type, trace_ctx);
941 }
942
943 static __always_inline struct ring_buffer_event *
944 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
945                           int type,
946                           unsigned long len,
947                           unsigned int trace_ctx)
948 {
949         struct ring_buffer_event *event;
950
951         event = ring_buffer_lock_reserve(buffer, len);
952         if (event != NULL)
953                 trace_event_setup(event, type, trace_ctx);
954
955         return event;
956 }
957
958 void tracer_tracing_on(struct trace_array *tr)
959 {
960         if (tr->array_buffer.buffer)
961                 ring_buffer_record_on(tr->array_buffer.buffer);
962         /*
963          * This flag is looked at when buffers haven't been allocated
964          * yet, or by some tracers (like irqsoff), that just want to
965          * know if the ring buffer has been disabled, but it can handle
966          * races of where it gets disabled but we still do a record.
967          * As the check is in the fast path of the tracers, it is more
968          * important to be fast than accurate.
969          */
970         tr->buffer_disabled = 0;
971         /* Make the flag seen by readers */
972         smp_wmb();
973 }
974
975 /**
976  * tracing_on - enable tracing buffers
977  *
978  * This function enables tracing buffers that may have been
979  * disabled with tracing_off.
980  */
981 void tracing_on(void)
982 {
983         tracer_tracing_on(&global_trace);
984 }
985 EXPORT_SYMBOL_GPL(tracing_on);
986
987
988 static __always_inline void
989 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
990 {
991         __this_cpu_write(trace_taskinfo_save, true);
992
993         /* If this is the temp buffer, we need to commit fully */
994         if (this_cpu_read(trace_buffered_event) == event) {
995                 /* Length is in event->array[0] */
996                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
997                 /* Release the temp buffer */
998                 this_cpu_dec(trace_buffered_event_cnt);
999                 /* ring_buffer_unlock_commit() enables preemption */
1000                 preempt_enable_notrace();
1001         } else
1002                 ring_buffer_unlock_commit(buffer, event);
1003 }
1004
1005 /**
1006  * __trace_puts - write a constant string into the trace buffer.
1007  * @ip:    The address of the caller
1008  * @str:   The constant string to write
1009  * @size:  The size of the string.
1010  */
1011 int __trace_puts(unsigned long ip, const char *str, int size)
1012 {
1013         struct ring_buffer_event *event;
1014         struct trace_buffer *buffer;
1015         struct print_entry *entry;
1016         unsigned int trace_ctx;
1017         int alloc;
1018
1019         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1020                 return 0;
1021
1022         if (unlikely(tracing_selftest_running || tracing_disabled))
1023                 return 0;
1024
1025         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1026
1027         trace_ctx = tracing_gen_ctx();
1028         buffer = global_trace.array_buffer.buffer;
1029         ring_buffer_nest_start(buffer);
1030         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1031                                             trace_ctx);
1032         if (!event) {
1033                 size = 0;
1034                 goto out;
1035         }
1036
1037         entry = ring_buffer_event_data(event);
1038         entry->ip = ip;
1039
1040         memcpy(&entry->buf, str, size);
1041
1042         /* Add a newline if necessary */
1043         if (entry->buf[size - 1] != '\n') {
1044                 entry->buf[size] = '\n';
1045                 entry->buf[size + 1] = '\0';
1046         } else
1047                 entry->buf[size] = '\0';
1048
1049         __buffer_unlock_commit(buffer, event);
1050         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1051  out:
1052         ring_buffer_nest_end(buffer);
1053         return size;
1054 }
1055 EXPORT_SYMBOL_GPL(__trace_puts);
1056
1057 /**
1058  * __trace_bputs - write the pointer to a constant string into trace buffer
1059  * @ip:    The address of the caller
1060  * @str:   The constant string to write to the buffer to
1061  */
1062 int __trace_bputs(unsigned long ip, const char *str)
1063 {
1064         struct ring_buffer_event *event;
1065         struct trace_buffer *buffer;
1066         struct bputs_entry *entry;
1067         unsigned int trace_ctx;
1068         int size = sizeof(struct bputs_entry);
1069         int ret = 0;
1070
1071         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1072                 return 0;
1073
1074         if (unlikely(tracing_selftest_running || tracing_disabled))
1075                 return 0;
1076
1077         trace_ctx = tracing_gen_ctx();
1078         buffer = global_trace.array_buffer.buffer;
1079
1080         ring_buffer_nest_start(buffer);
1081         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1082                                             trace_ctx);
1083         if (!event)
1084                 goto out;
1085
1086         entry = ring_buffer_event_data(event);
1087         entry->ip                       = ip;
1088         entry->str                      = str;
1089
1090         __buffer_unlock_commit(buffer, event);
1091         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1092
1093         ret = 1;
1094  out:
1095         ring_buffer_nest_end(buffer);
1096         return ret;
1097 }
1098 EXPORT_SYMBOL_GPL(__trace_bputs);
1099
1100 #ifdef CONFIG_TRACER_SNAPSHOT
1101 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1102                                            void *cond_data)
1103 {
1104         struct tracer *tracer = tr->current_trace;
1105         unsigned long flags;
1106
1107         if (in_nmi()) {
1108                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1109                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1110                 return;
1111         }
1112
1113         if (!tr->allocated_snapshot) {
1114                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1115                 internal_trace_puts("*** stopping trace here!   ***\n");
1116                 tracing_off();
1117                 return;
1118         }
1119
1120         /* Note, snapshot can not be used when the tracer uses it */
1121         if (tracer->use_max_tr) {
1122                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1123                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1124                 return;
1125         }
1126
1127         local_irq_save(flags);
1128         update_max_tr(tr, current, smp_processor_id(), cond_data);
1129         local_irq_restore(flags);
1130 }
1131
1132 void tracing_snapshot_instance(struct trace_array *tr)
1133 {
1134         tracing_snapshot_instance_cond(tr, NULL);
1135 }
1136
1137 /**
1138  * tracing_snapshot - take a snapshot of the current buffer.
1139  *
1140  * This causes a swap between the snapshot buffer and the current live
1141  * tracing buffer. You can use this to take snapshots of the live
1142  * trace when some condition is triggered, but continue to trace.
1143  *
1144  * Note, make sure to allocate the snapshot with either
1145  * a tracing_snapshot_alloc(), or by doing it manually
1146  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1147  *
1148  * If the snapshot buffer is not allocated, it will stop tracing.
1149  * Basically making a permanent snapshot.
1150  */
1151 void tracing_snapshot(void)
1152 {
1153         struct trace_array *tr = &global_trace;
1154
1155         tracing_snapshot_instance(tr);
1156 }
1157 EXPORT_SYMBOL_GPL(tracing_snapshot);
1158
1159 /**
1160  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1161  * @tr:         The tracing instance to snapshot
1162  * @cond_data:  The data to be tested conditionally, and possibly saved
1163  *
1164  * This is the same as tracing_snapshot() except that the snapshot is
1165  * conditional - the snapshot will only happen if the
1166  * cond_snapshot.update() implementation receiving the cond_data
1167  * returns true, which means that the trace array's cond_snapshot
1168  * update() operation used the cond_data to determine whether the
1169  * snapshot should be taken, and if it was, presumably saved it along
1170  * with the snapshot.
1171  */
1172 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1173 {
1174         tracing_snapshot_instance_cond(tr, cond_data);
1175 }
1176 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1177
1178 /**
1179  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1180  * @tr:         The tracing instance
1181  *
1182  * When the user enables a conditional snapshot using
1183  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1184  * with the snapshot.  This accessor is used to retrieve it.
1185  *
1186  * Should not be called from cond_snapshot.update(), since it takes
1187  * the tr->max_lock lock, which the code calling
1188  * cond_snapshot.update() has already done.
1189  *
1190  * Returns the cond_data associated with the trace array's snapshot.
1191  */
1192 void *tracing_cond_snapshot_data(struct trace_array *tr)
1193 {
1194         void *cond_data = NULL;
1195
1196         local_irq_disable();
1197         arch_spin_lock(&tr->max_lock);
1198
1199         if (tr->cond_snapshot)
1200                 cond_data = tr->cond_snapshot->cond_data;
1201
1202         arch_spin_unlock(&tr->max_lock);
1203         local_irq_enable();
1204
1205         return cond_data;
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1208
1209 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1210                                         struct array_buffer *size_buf, int cpu_id);
1211 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1212
1213 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1214 {
1215         int ret;
1216
1217         if (!tr->allocated_snapshot) {
1218
1219                 /* allocate spare buffer */
1220                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1221                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1222                 if (ret < 0)
1223                         return ret;
1224
1225                 tr->allocated_snapshot = true;
1226         }
1227
1228         return 0;
1229 }
1230
1231 static void free_snapshot(struct trace_array *tr)
1232 {
1233         /*
1234          * We don't free the ring buffer. instead, resize it because
1235          * The max_tr ring buffer has some state (e.g. ring->clock) and
1236          * we want preserve it.
1237          */
1238         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1239         set_buffer_entries(&tr->max_buffer, 1);
1240         tracing_reset_online_cpus(&tr->max_buffer);
1241         tr->allocated_snapshot = false;
1242 }
1243
1244 /**
1245  * tracing_alloc_snapshot - allocate snapshot buffer.
1246  *
1247  * This only allocates the snapshot buffer if it isn't already
1248  * allocated - it doesn't also take a snapshot.
1249  *
1250  * This is meant to be used in cases where the snapshot buffer needs
1251  * to be set up for events that can't sleep but need to be able to
1252  * trigger a snapshot.
1253  */
1254 int tracing_alloc_snapshot(void)
1255 {
1256         struct trace_array *tr = &global_trace;
1257         int ret;
1258
1259         ret = tracing_alloc_snapshot_instance(tr);
1260         WARN_ON(ret < 0);
1261
1262         return ret;
1263 }
1264 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1265
1266 /**
1267  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1268  *
1269  * This is similar to tracing_snapshot(), but it will allocate the
1270  * snapshot buffer if it isn't already allocated. Use this only
1271  * where it is safe to sleep, as the allocation may sleep.
1272  *
1273  * This causes a swap between the snapshot buffer and the current live
1274  * tracing buffer. You can use this to take snapshots of the live
1275  * trace when some condition is triggered, but continue to trace.
1276  */
1277 void tracing_snapshot_alloc(void)
1278 {
1279         int ret;
1280
1281         ret = tracing_alloc_snapshot();
1282         if (ret < 0)
1283                 return;
1284
1285         tracing_snapshot();
1286 }
1287 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1288
1289 /**
1290  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1291  * @tr:         The tracing instance
1292  * @cond_data:  User data to associate with the snapshot
1293  * @update:     Implementation of the cond_snapshot update function
1294  *
1295  * Check whether the conditional snapshot for the given instance has
1296  * already been enabled, or if the current tracer is already using a
1297  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1298  * save the cond_data and update function inside.
1299  *
1300  * Returns 0 if successful, error otherwise.
1301  */
1302 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1303                                  cond_update_fn_t update)
1304 {
1305         struct cond_snapshot *cond_snapshot;
1306         int ret = 0;
1307
1308         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1309         if (!cond_snapshot)
1310                 return -ENOMEM;
1311
1312         cond_snapshot->cond_data = cond_data;
1313         cond_snapshot->update = update;
1314
1315         mutex_lock(&trace_types_lock);
1316
1317         ret = tracing_alloc_snapshot_instance(tr);
1318         if (ret)
1319                 goto fail_unlock;
1320
1321         if (tr->current_trace->use_max_tr) {
1322                 ret = -EBUSY;
1323                 goto fail_unlock;
1324         }
1325
1326         /*
1327          * The cond_snapshot can only change to NULL without the
1328          * trace_types_lock. We don't care if we race with it going
1329          * to NULL, but we want to make sure that it's not set to
1330          * something other than NULL when we get here, which we can
1331          * do safely with only holding the trace_types_lock and not
1332          * having to take the max_lock.
1333          */
1334         if (tr->cond_snapshot) {
1335                 ret = -EBUSY;
1336                 goto fail_unlock;
1337         }
1338
1339         local_irq_disable();
1340         arch_spin_lock(&tr->max_lock);
1341         tr->cond_snapshot = cond_snapshot;
1342         arch_spin_unlock(&tr->max_lock);
1343         local_irq_enable();
1344
1345         mutex_unlock(&trace_types_lock);
1346
1347         return ret;
1348
1349  fail_unlock:
1350         mutex_unlock(&trace_types_lock);
1351         kfree(cond_snapshot);
1352         return ret;
1353 }
1354 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1355
1356 /**
1357  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1358  * @tr:         The tracing instance
1359  *
1360  * Check whether the conditional snapshot for the given instance is
1361  * enabled; if so, free the cond_snapshot associated with it,
1362  * otherwise return -EINVAL.
1363  *
1364  * Returns 0 if successful, error otherwise.
1365  */
1366 int tracing_snapshot_cond_disable(struct trace_array *tr)
1367 {
1368         int ret = 0;
1369
1370         local_irq_disable();
1371         arch_spin_lock(&tr->max_lock);
1372
1373         if (!tr->cond_snapshot)
1374                 ret = -EINVAL;
1375         else {
1376                 kfree(tr->cond_snapshot);
1377                 tr->cond_snapshot = NULL;
1378         }
1379
1380         arch_spin_unlock(&tr->max_lock);
1381         local_irq_enable();
1382
1383         return ret;
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1386 #else
1387 void tracing_snapshot(void)
1388 {
1389         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot);
1392 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1393 {
1394         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397 int tracing_alloc_snapshot(void)
1398 {
1399         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1400         return -ENODEV;
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403 void tracing_snapshot_alloc(void)
1404 {
1405         /* Give warning */
1406         tracing_snapshot();
1407 }
1408 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409 void *tracing_cond_snapshot_data(struct trace_array *tr)
1410 {
1411         return NULL;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1415 {
1416         return -ENODEV;
1417 }
1418 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419 int tracing_snapshot_cond_disable(struct trace_array *tr)
1420 {
1421         return false;
1422 }
1423 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424 #endif /* CONFIG_TRACER_SNAPSHOT */
1425
1426 void tracer_tracing_off(struct trace_array *tr)
1427 {
1428         if (tr->array_buffer.buffer)
1429                 ring_buffer_record_off(tr->array_buffer.buffer);
1430         /*
1431          * This flag is looked at when buffers haven't been allocated
1432          * yet, or by some tracers (like irqsoff), that just want to
1433          * know if the ring buffer has been disabled, but it can handle
1434          * races of where it gets disabled but we still do a record.
1435          * As the check is in the fast path of the tracers, it is more
1436          * important to be fast than accurate.
1437          */
1438         tr->buffer_disabled = 1;
1439         /* Make the flag seen by readers */
1440         smp_wmb();
1441 }
1442
1443 /**
1444  * tracing_off - turn off tracing buffers
1445  *
1446  * This function stops the tracing buffers from recording data.
1447  * It does not disable any overhead the tracers themselves may
1448  * be causing. This function simply causes all recording to
1449  * the ring buffers to fail.
1450  */
1451 void tracing_off(void)
1452 {
1453         tracer_tracing_off(&global_trace);
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_off);
1456
1457 void disable_trace_on_warning(void)
1458 {
1459         if (__disable_trace_on_warning) {
1460                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461                         "Disabling tracing due to warning\n");
1462                 tracing_off();
1463         }
1464 }
1465
1466 /**
1467  * tracer_tracing_is_on - show real state of ring buffer enabled
1468  * @tr : the trace array to know if ring buffer is enabled
1469  *
1470  * Shows real state of the ring buffer if it is enabled or not.
1471  */
1472 bool tracer_tracing_is_on(struct trace_array *tr)
1473 {
1474         if (tr->array_buffer.buffer)
1475                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476         return !tr->buffer_disabled;
1477 }
1478
1479 /**
1480  * tracing_is_on - show state of ring buffers enabled
1481  */
1482 int tracing_is_on(void)
1483 {
1484         return tracer_tracing_is_on(&global_trace);
1485 }
1486 EXPORT_SYMBOL_GPL(tracing_is_on);
1487
1488 static int __init set_buf_size(char *str)
1489 {
1490         unsigned long buf_size;
1491
1492         if (!str)
1493                 return 0;
1494         buf_size = memparse(str, &str);
1495         /*
1496          * nr_entries can not be zero and the startup
1497          * tests require some buffer space. Therefore
1498          * ensure we have at least 4096 bytes of buffer.
1499          */
1500         trace_buf_size = max(4096UL, buf_size);
1501         return 1;
1502 }
1503 __setup("trace_buf_size=", set_buf_size);
1504
1505 static int __init set_tracing_thresh(char *str)
1506 {
1507         unsigned long threshold;
1508         int ret;
1509
1510         if (!str)
1511                 return 0;
1512         ret = kstrtoul(str, 0, &threshold);
1513         if (ret < 0)
1514                 return 0;
1515         tracing_thresh = threshold * 1000;
1516         return 1;
1517 }
1518 __setup("tracing_thresh=", set_tracing_thresh);
1519
1520 unsigned long nsecs_to_usecs(unsigned long nsecs)
1521 {
1522         return nsecs / 1000;
1523 }
1524
1525 /*
1526  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1527  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1528  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1529  * of strings in the order that the evals (enum) were defined.
1530  */
1531 #undef C
1532 #define C(a, b) b
1533
1534 /* These must match the bit positions in trace_iterator_flags */
1535 static const char *trace_options[] = {
1536         TRACE_FLAGS
1537         NULL
1538 };
1539
1540 static struct {
1541         u64 (*func)(void);
1542         const char *name;
1543         int in_ns;              /* is this clock in nanoseconds? */
1544 } trace_clocks[] = {
1545         { trace_clock_local,            "local",        1 },
1546         { trace_clock_global,           "global",       1 },
1547         { trace_clock_counter,          "counter",      0 },
1548         { trace_clock_jiffies,          "uptime",       0 },
1549         { trace_clock,                  "perf",         1 },
1550         { ktime_get_mono_fast_ns,       "mono",         1 },
1551         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1552         { ktime_get_boot_fast_ns,       "boot",         1 },
1553         { ktime_get_tai_fast_ns,        "tai",          1 },
1554         ARCH_TRACE_CLOCKS
1555 };
1556
1557 bool trace_clock_in_ns(struct trace_array *tr)
1558 {
1559         if (trace_clocks[tr->clock_id].in_ns)
1560                 return true;
1561
1562         return false;
1563 }
1564
1565 /*
1566  * trace_parser_get_init - gets the buffer for trace parser
1567  */
1568 int trace_parser_get_init(struct trace_parser *parser, int size)
1569 {
1570         memset(parser, 0, sizeof(*parser));
1571
1572         parser->buffer = kmalloc(size, GFP_KERNEL);
1573         if (!parser->buffer)
1574                 return 1;
1575
1576         parser->size = size;
1577         return 0;
1578 }
1579
1580 /*
1581  * trace_parser_put - frees the buffer for trace parser
1582  */
1583 void trace_parser_put(struct trace_parser *parser)
1584 {
1585         kfree(parser->buffer);
1586         parser->buffer = NULL;
1587 }
1588
1589 /*
1590  * trace_get_user - reads the user input string separated by  space
1591  * (matched by isspace(ch))
1592  *
1593  * For each string found the 'struct trace_parser' is updated,
1594  * and the function returns.
1595  *
1596  * Returns number of bytes read.
1597  *
1598  * See kernel/trace/trace.h for 'struct trace_parser' details.
1599  */
1600 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1601         size_t cnt, loff_t *ppos)
1602 {
1603         char ch;
1604         size_t read = 0;
1605         ssize_t ret;
1606
1607         if (!*ppos)
1608                 trace_parser_clear(parser);
1609
1610         ret = get_user(ch, ubuf++);
1611         if (ret)
1612                 goto out;
1613
1614         read++;
1615         cnt--;
1616
1617         /*
1618          * The parser is not finished with the last write,
1619          * continue reading the user input without skipping spaces.
1620          */
1621         if (!parser->cont) {
1622                 /* skip white space */
1623                 while (cnt && isspace(ch)) {
1624                         ret = get_user(ch, ubuf++);
1625                         if (ret)
1626                                 goto out;
1627                         read++;
1628                         cnt--;
1629                 }
1630
1631                 parser->idx = 0;
1632
1633                 /* only spaces were written */
1634                 if (isspace(ch) || !ch) {
1635                         *ppos += read;
1636                         ret = read;
1637                         goto out;
1638                 }
1639         }
1640
1641         /* read the non-space input */
1642         while (cnt && !isspace(ch) && ch) {
1643                 if (parser->idx < parser->size - 1)
1644                         parser->buffer[parser->idx++] = ch;
1645                 else {
1646                         ret = -EINVAL;
1647                         goto out;
1648                 }
1649                 ret = get_user(ch, ubuf++);
1650                 if (ret)
1651                         goto out;
1652                 read++;
1653                 cnt--;
1654         }
1655
1656         /* We either got finished input or we have to wait for another call. */
1657         if (isspace(ch) || !ch) {
1658                 parser->buffer[parser->idx] = 0;
1659                 parser->cont = false;
1660         } else if (parser->idx < parser->size - 1) {
1661                 parser->cont = true;
1662                 parser->buffer[parser->idx++] = ch;
1663                 /* Make sure the parsed string always terminates with '\0'. */
1664                 parser->buffer[parser->idx] = 0;
1665         } else {
1666                 ret = -EINVAL;
1667                 goto out;
1668         }
1669
1670         *ppos += read;
1671         ret = read;
1672
1673 out:
1674         return ret;
1675 }
1676
1677 /* TODO add a seq_buf_to_buffer() */
1678 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1679 {
1680         int len;
1681
1682         if (trace_seq_used(s) <= s->seq.readpos)
1683                 return -EBUSY;
1684
1685         len = trace_seq_used(s) - s->seq.readpos;
1686         if (cnt > len)
1687                 cnt = len;
1688         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1689
1690         s->seq.readpos += cnt;
1691         return cnt;
1692 }
1693
1694 unsigned long __read_mostly     tracing_thresh;
1695 static const struct file_operations tracing_max_lat_fops;
1696
1697 #ifdef LATENCY_FS_NOTIFY
1698
1699 static struct workqueue_struct *fsnotify_wq;
1700
1701 static void latency_fsnotify_workfn(struct work_struct *work)
1702 {
1703         struct trace_array *tr = container_of(work, struct trace_array,
1704                                               fsnotify_work);
1705         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1706 }
1707
1708 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1709 {
1710         struct trace_array *tr = container_of(iwork, struct trace_array,
1711                                               fsnotify_irqwork);
1712         queue_work(fsnotify_wq, &tr->fsnotify_work);
1713 }
1714
1715 static void trace_create_maxlat_file(struct trace_array *tr,
1716                                      struct dentry *d_tracer)
1717 {
1718         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1719         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1720         tr->d_max_latency = trace_create_file("tracing_max_latency",
1721                                               TRACE_MODE_WRITE,
1722                                               d_tracer, &tr->max_latency,
1723                                               &tracing_max_lat_fops);
1724 }
1725
1726 __init static int latency_fsnotify_init(void)
1727 {
1728         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1729                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1730         if (!fsnotify_wq) {
1731                 pr_err("Unable to allocate tr_max_lat_wq\n");
1732                 return -ENOMEM;
1733         }
1734         return 0;
1735 }
1736
1737 late_initcall_sync(latency_fsnotify_init);
1738
1739 void latency_fsnotify(struct trace_array *tr)
1740 {
1741         if (!fsnotify_wq)
1742                 return;
1743         /*
1744          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1745          * possible that we are called from __schedule() or do_idle(), which
1746          * could cause a deadlock.
1747          */
1748         irq_work_queue(&tr->fsnotify_irqwork);
1749 }
1750
1751 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)  \
1752         || defined(CONFIG_OSNOISE_TRACER)
1753
1754 #define trace_create_maxlat_file(tr, d_tracer)                          \
1755         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1756                           d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1757
1758 #else
1759 #define trace_create_maxlat_file(tr, d_tracer)   do { } while (0)
1760 #endif
1761
1762 #ifdef CONFIG_TRACER_MAX_TRACE
1763 /*
1764  * Copy the new maximum trace into the separate maximum-trace
1765  * structure. (this way the maximum trace is permanently saved,
1766  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1767  */
1768 static void
1769 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1770 {
1771         struct array_buffer *trace_buf = &tr->array_buffer;
1772         struct array_buffer *max_buf = &tr->max_buffer;
1773         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1774         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1775
1776         max_buf->cpu = cpu;
1777         max_buf->time_start = data->preempt_timestamp;
1778
1779         max_data->saved_latency = tr->max_latency;
1780         max_data->critical_start = data->critical_start;
1781         max_data->critical_end = data->critical_end;
1782
1783         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1784         max_data->pid = tsk->pid;
1785         /*
1786          * If tsk == current, then use current_uid(), as that does not use
1787          * RCU. The irq tracer can be called out of RCU scope.
1788          */
1789         if (tsk == current)
1790                 max_data->uid = current_uid();
1791         else
1792                 max_data->uid = task_uid(tsk);
1793
1794         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1795         max_data->policy = tsk->policy;
1796         max_data->rt_priority = tsk->rt_priority;
1797
1798         /* record this tasks comm */
1799         tracing_record_cmdline(tsk);
1800         latency_fsnotify(tr);
1801 }
1802
1803 /**
1804  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1805  * @tr: tracer
1806  * @tsk: the task with the latency
1807  * @cpu: The cpu that initiated the trace.
1808  * @cond_data: User data associated with a conditional snapshot
1809  *
1810  * Flip the buffers between the @tr and the max_tr and record information
1811  * about which task was the cause of this latency.
1812  */
1813 void
1814 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1815               void *cond_data)
1816 {
1817         if (tr->stop_count)
1818                 return;
1819
1820         WARN_ON_ONCE(!irqs_disabled());
1821
1822         if (!tr->allocated_snapshot) {
1823                 /* Only the nop tracer should hit this when disabling */
1824                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1825                 return;
1826         }
1827
1828         arch_spin_lock(&tr->max_lock);
1829
1830         /* Inherit the recordable setting from array_buffer */
1831         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1832                 ring_buffer_record_on(tr->max_buffer.buffer);
1833         else
1834                 ring_buffer_record_off(tr->max_buffer.buffer);
1835
1836 #ifdef CONFIG_TRACER_SNAPSHOT
1837         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1838                 goto out_unlock;
1839 #endif
1840         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1841
1842         __update_max_tr(tr, tsk, cpu);
1843
1844  out_unlock:
1845         arch_spin_unlock(&tr->max_lock);
1846 }
1847
1848 /**
1849  * update_max_tr_single - only copy one trace over, and reset the rest
1850  * @tr: tracer
1851  * @tsk: task with the latency
1852  * @cpu: the cpu of the buffer to copy.
1853  *
1854  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1855  */
1856 void
1857 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1858 {
1859         int ret;
1860
1861         if (tr->stop_count)
1862                 return;
1863
1864         WARN_ON_ONCE(!irqs_disabled());
1865         if (!tr->allocated_snapshot) {
1866                 /* Only the nop tracer should hit this when disabling */
1867                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1868                 return;
1869         }
1870
1871         arch_spin_lock(&tr->max_lock);
1872
1873         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1874
1875         if (ret == -EBUSY) {
1876                 /*
1877                  * We failed to swap the buffer due to a commit taking
1878                  * place on this CPU. We fail to record, but we reset
1879                  * the max trace buffer (no one writes directly to it)
1880                  * and flag that it failed.
1881                  */
1882                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1883                         "Failed to swap buffers due to commit in progress\n");
1884         }
1885
1886         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1887
1888         __update_max_tr(tr, tsk, cpu);
1889         arch_spin_unlock(&tr->max_lock);
1890 }
1891 #endif /* CONFIG_TRACER_MAX_TRACE */
1892
1893 static int wait_on_pipe(struct trace_iterator *iter, int full)
1894 {
1895         /* Iterators are static, they should be filled or empty */
1896         if (trace_buffer_iter(iter, iter->cpu_file))
1897                 return 0;
1898
1899         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1900                                 full);
1901 }
1902
1903 #ifdef CONFIG_FTRACE_STARTUP_TEST
1904 static bool selftests_can_run;
1905
1906 struct trace_selftests {
1907         struct list_head                list;
1908         struct tracer                   *type;
1909 };
1910
1911 static LIST_HEAD(postponed_selftests);
1912
1913 static int save_selftest(struct tracer *type)
1914 {
1915         struct trace_selftests *selftest;
1916
1917         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1918         if (!selftest)
1919                 return -ENOMEM;
1920
1921         selftest->type = type;
1922         list_add(&selftest->list, &postponed_selftests);
1923         return 0;
1924 }
1925
1926 static int run_tracer_selftest(struct tracer *type)
1927 {
1928         struct trace_array *tr = &global_trace;
1929         struct tracer *saved_tracer = tr->current_trace;
1930         int ret;
1931
1932         if (!type->selftest || tracing_selftest_disabled)
1933                 return 0;
1934
1935         /*
1936          * If a tracer registers early in boot up (before scheduling is
1937          * initialized and such), then do not run its selftests yet.
1938          * Instead, run it a little later in the boot process.
1939          */
1940         if (!selftests_can_run)
1941                 return save_selftest(type);
1942
1943         if (!tracing_is_on()) {
1944                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1945                         type->name);
1946                 return 0;
1947         }
1948
1949         /*
1950          * Run a selftest on this tracer.
1951          * Here we reset the trace buffer, and set the current
1952          * tracer to be this tracer. The tracer can then run some
1953          * internal tracing to verify that everything is in order.
1954          * If we fail, we do not register this tracer.
1955          */
1956         tracing_reset_online_cpus(&tr->array_buffer);
1957
1958         tr->current_trace = type;
1959
1960 #ifdef CONFIG_TRACER_MAX_TRACE
1961         if (type->use_max_tr) {
1962                 /* If we expanded the buffers, make sure the max is expanded too */
1963                 if (ring_buffer_expanded)
1964                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1965                                            RING_BUFFER_ALL_CPUS);
1966                 tr->allocated_snapshot = true;
1967         }
1968 #endif
1969
1970         /* the test is responsible for initializing and enabling */
1971         pr_info("Testing tracer %s: ", type->name);
1972         ret = type->selftest(type, tr);
1973         /* the test is responsible for resetting too */
1974         tr->current_trace = saved_tracer;
1975         if (ret) {
1976                 printk(KERN_CONT "FAILED!\n");
1977                 /* Add the warning after printing 'FAILED' */
1978                 WARN_ON(1);
1979                 return -1;
1980         }
1981         /* Only reset on passing, to avoid touching corrupted buffers */
1982         tracing_reset_online_cpus(&tr->array_buffer);
1983
1984 #ifdef CONFIG_TRACER_MAX_TRACE
1985         if (type->use_max_tr) {
1986                 tr->allocated_snapshot = false;
1987
1988                 /* Shrink the max buffer again */
1989                 if (ring_buffer_expanded)
1990                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1991                                            RING_BUFFER_ALL_CPUS);
1992         }
1993 #endif
1994
1995         printk(KERN_CONT "PASSED\n");
1996         return 0;
1997 }
1998
1999 static __init int init_trace_selftests(void)
2000 {
2001         struct trace_selftests *p, *n;
2002         struct tracer *t, **last;
2003         int ret;
2004
2005         selftests_can_run = true;
2006
2007         mutex_lock(&trace_types_lock);
2008
2009         if (list_empty(&postponed_selftests))
2010                 goto out;
2011
2012         pr_info("Running postponed tracer tests:\n");
2013
2014         tracing_selftest_running = true;
2015         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2016                 /* This loop can take minutes when sanitizers are enabled, so
2017                  * lets make sure we allow RCU processing.
2018                  */
2019                 cond_resched();
2020                 ret = run_tracer_selftest(p->type);
2021                 /* If the test fails, then warn and remove from available_tracers */
2022                 if (ret < 0) {
2023                         WARN(1, "tracer: %s failed selftest, disabling\n",
2024                              p->type->name);
2025                         last = &trace_types;
2026                         for (t = trace_types; t; t = t->next) {
2027                                 if (t == p->type) {
2028                                         *last = t->next;
2029                                         break;
2030                                 }
2031                                 last = &t->next;
2032                         }
2033                 }
2034                 list_del(&p->list);
2035                 kfree(p);
2036         }
2037         tracing_selftest_running = false;
2038
2039  out:
2040         mutex_unlock(&trace_types_lock);
2041
2042         return 0;
2043 }
2044 core_initcall(init_trace_selftests);
2045 #else
2046 static inline int run_tracer_selftest(struct tracer *type)
2047 {
2048         return 0;
2049 }
2050 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2051
2052 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2053
2054 static void __init apply_trace_boot_options(void);
2055
2056 /**
2057  * register_tracer - register a tracer with the ftrace system.
2058  * @type: the plugin for the tracer
2059  *
2060  * Register a new plugin tracer.
2061  */
2062 int __init register_tracer(struct tracer *type)
2063 {
2064         struct tracer *t;
2065         int ret = 0;
2066
2067         if (!type->name) {
2068                 pr_info("Tracer must have a name\n");
2069                 return -1;
2070         }
2071
2072         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2073                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2074                 return -1;
2075         }
2076
2077         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2078                 pr_warn("Can not register tracer %s due to lockdown\n",
2079                            type->name);
2080                 return -EPERM;
2081         }
2082
2083         mutex_lock(&trace_types_lock);
2084
2085         tracing_selftest_running = true;
2086
2087         for (t = trace_types; t; t = t->next) {
2088                 if (strcmp(type->name, t->name) == 0) {
2089                         /* already found */
2090                         pr_info("Tracer %s already registered\n",
2091                                 type->name);
2092                         ret = -1;
2093                         goto out;
2094                 }
2095         }
2096
2097         if (!type->set_flag)
2098                 type->set_flag = &dummy_set_flag;
2099         if (!type->flags) {
2100                 /*allocate a dummy tracer_flags*/
2101                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2102                 if (!type->flags) {
2103                         ret = -ENOMEM;
2104                         goto out;
2105                 }
2106                 type->flags->val = 0;
2107                 type->flags->opts = dummy_tracer_opt;
2108         } else
2109                 if (!type->flags->opts)
2110                         type->flags->opts = dummy_tracer_opt;
2111
2112         /* store the tracer for __set_tracer_option */
2113         type->flags->trace = type;
2114
2115         ret = run_tracer_selftest(type);
2116         if (ret < 0)
2117                 goto out;
2118
2119         type->next = trace_types;
2120         trace_types = type;
2121         add_tracer_options(&global_trace, type);
2122
2123  out:
2124         tracing_selftest_running = false;
2125         mutex_unlock(&trace_types_lock);
2126
2127         if (ret || !default_bootup_tracer)
2128                 goto out_unlock;
2129
2130         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2131                 goto out_unlock;
2132
2133         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2134         /* Do we want this tracer to start on bootup? */
2135         tracing_set_tracer(&global_trace, type->name);
2136         default_bootup_tracer = NULL;
2137
2138         apply_trace_boot_options();
2139
2140         /* disable other selftests, since this will break it. */
2141         disable_tracing_selftest("running a tracer");
2142
2143  out_unlock:
2144         return ret;
2145 }
2146
2147 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2148 {
2149         struct trace_buffer *buffer = buf->buffer;
2150
2151         if (!buffer)
2152                 return;
2153
2154         ring_buffer_record_disable(buffer);
2155
2156         /* Make sure all commits have finished */
2157         synchronize_rcu();
2158         ring_buffer_reset_cpu(buffer, cpu);
2159
2160         ring_buffer_record_enable(buffer);
2161 }
2162
2163 void tracing_reset_online_cpus(struct array_buffer *buf)
2164 {
2165         struct trace_buffer *buffer = buf->buffer;
2166
2167         if (!buffer)
2168                 return;
2169
2170         ring_buffer_record_disable(buffer);
2171
2172         /* Make sure all commits have finished */
2173         synchronize_rcu();
2174
2175         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2176
2177         ring_buffer_reset_online_cpus(buffer);
2178
2179         ring_buffer_record_enable(buffer);
2180 }
2181
2182 /* Must have trace_types_lock held */
2183 void tracing_reset_all_online_cpus(void)
2184 {
2185         struct trace_array *tr;
2186
2187         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2188                 if (!tr->clear_trace)
2189                         continue;
2190                 tr->clear_trace = false;
2191                 tracing_reset_online_cpus(&tr->array_buffer);
2192 #ifdef CONFIG_TRACER_MAX_TRACE
2193                 tracing_reset_online_cpus(&tr->max_buffer);
2194 #endif
2195         }
2196 }
2197
2198 /*
2199  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2200  * is the tgid last observed corresponding to pid=i.
2201  */
2202 static int *tgid_map;
2203
2204 /* The maximum valid index into tgid_map. */
2205 static size_t tgid_map_max;
2206
2207 #define SAVED_CMDLINES_DEFAULT 128
2208 #define NO_CMDLINE_MAP UINT_MAX
2209 /*
2210  * Preemption must be disabled before acquiring trace_cmdline_lock.
2211  * The various trace_arrays' max_lock must be acquired in a context
2212  * where interrupt is disabled.
2213  */
2214 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2215 struct saved_cmdlines_buffer {
2216         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2217         unsigned *map_cmdline_to_pid;
2218         unsigned cmdline_num;
2219         int cmdline_idx;
2220         char *saved_cmdlines;
2221 };
2222 static struct saved_cmdlines_buffer *savedcmd;
2223
2224 static inline char *get_saved_cmdlines(int idx)
2225 {
2226         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2227 }
2228
2229 static inline void set_cmdline(int idx, const char *cmdline)
2230 {
2231         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2232 }
2233
2234 static int allocate_cmdlines_buffer(unsigned int val,
2235                                     struct saved_cmdlines_buffer *s)
2236 {
2237         s->map_cmdline_to_pid = kmalloc_array(val,
2238                                               sizeof(*s->map_cmdline_to_pid),
2239                                               GFP_KERNEL);
2240         if (!s->map_cmdline_to_pid)
2241                 return -ENOMEM;
2242
2243         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2244         if (!s->saved_cmdlines) {
2245                 kfree(s->map_cmdline_to_pid);
2246                 return -ENOMEM;
2247         }
2248
2249         s->cmdline_idx = 0;
2250         s->cmdline_num = val;
2251         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2252                sizeof(s->map_pid_to_cmdline));
2253         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2254                val * sizeof(*s->map_cmdline_to_pid));
2255
2256         return 0;
2257 }
2258
2259 static int trace_create_savedcmd(void)
2260 {
2261         int ret;
2262
2263         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2264         if (!savedcmd)
2265                 return -ENOMEM;
2266
2267         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2268         if (ret < 0) {
2269                 kfree(savedcmd);
2270                 savedcmd = NULL;
2271                 return -ENOMEM;
2272         }
2273
2274         return 0;
2275 }
2276
2277 int is_tracing_stopped(void)
2278 {
2279         return global_trace.stop_count;
2280 }
2281
2282 /**
2283  * tracing_start - quick start of the tracer
2284  *
2285  * If tracing is enabled but was stopped by tracing_stop,
2286  * this will start the tracer back up.
2287  */
2288 void tracing_start(void)
2289 {
2290         struct trace_buffer *buffer;
2291         unsigned long flags;
2292
2293         if (tracing_disabled)
2294                 return;
2295
2296         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2297         if (--global_trace.stop_count) {
2298                 if (global_trace.stop_count < 0) {
2299                         /* Someone screwed up their debugging */
2300                         WARN_ON_ONCE(1);
2301                         global_trace.stop_count = 0;
2302                 }
2303                 goto out;
2304         }
2305
2306         /* Prevent the buffers from switching */
2307         arch_spin_lock(&global_trace.max_lock);
2308
2309         buffer = global_trace.array_buffer.buffer;
2310         if (buffer)
2311                 ring_buffer_record_enable(buffer);
2312
2313 #ifdef CONFIG_TRACER_MAX_TRACE
2314         buffer = global_trace.max_buffer.buffer;
2315         if (buffer)
2316                 ring_buffer_record_enable(buffer);
2317 #endif
2318
2319         arch_spin_unlock(&global_trace.max_lock);
2320
2321  out:
2322         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2323 }
2324
2325 static void tracing_start_tr(struct trace_array *tr)
2326 {
2327         struct trace_buffer *buffer;
2328         unsigned long flags;
2329
2330         if (tracing_disabled)
2331                 return;
2332
2333         /* If global, we need to also start the max tracer */
2334         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2335                 return tracing_start();
2336
2337         raw_spin_lock_irqsave(&tr->start_lock, flags);
2338
2339         if (--tr->stop_count) {
2340                 if (tr->stop_count < 0) {
2341                         /* Someone screwed up their debugging */
2342                         WARN_ON_ONCE(1);
2343                         tr->stop_count = 0;
2344                 }
2345                 goto out;
2346         }
2347
2348         buffer = tr->array_buffer.buffer;
2349         if (buffer)
2350                 ring_buffer_record_enable(buffer);
2351
2352  out:
2353         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2354 }
2355
2356 /**
2357  * tracing_stop - quick stop of the tracer
2358  *
2359  * Light weight way to stop tracing. Use in conjunction with
2360  * tracing_start.
2361  */
2362 void tracing_stop(void)
2363 {
2364         struct trace_buffer *buffer;
2365         unsigned long flags;
2366
2367         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2368         if (global_trace.stop_count++)
2369                 goto out;
2370
2371         /* Prevent the buffers from switching */
2372         arch_spin_lock(&global_trace.max_lock);
2373
2374         buffer = global_trace.array_buffer.buffer;
2375         if (buffer)
2376                 ring_buffer_record_disable(buffer);
2377
2378 #ifdef CONFIG_TRACER_MAX_TRACE
2379         buffer = global_trace.max_buffer.buffer;
2380         if (buffer)
2381                 ring_buffer_record_disable(buffer);
2382 #endif
2383
2384         arch_spin_unlock(&global_trace.max_lock);
2385
2386  out:
2387         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2388 }
2389
2390 static void tracing_stop_tr(struct trace_array *tr)
2391 {
2392         struct trace_buffer *buffer;
2393         unsigned long flags;
2394
2395         /* If global, we need to also stop the max tracer */
2396         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2397                 return tracing_stop();
2398
2399         raw_spin_lock_irqsave(&tr->start_lock, flags);
2400         if (tr->stop_count++)
2401                 goto out;
2402
2403         buffer = tr->array_buffer.buffer;
2404         if (buffer)
2405                 ring_buffer_record_disable(buffer);
2406
2407  out:
2408         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2409 }
2410
2411 static int trace_save_cmdline(struct task_struct *tsk)
2412 {
2413         unsigned tpid, idx;
2414
2415         /* treat recording of idle task as a success */
2416         if (!tsk->pid)
2417                 return 1;
2418
2419         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2420
2421         /*
2422          * It's not the end of the world if we don't get
2423          * the lock, but we also don't want to spin
2424          * nor do we want to disable interrupts,
2425          * so if we miss here, then better luck next time.
2426          *
2427          * This is called within the scheduler and wake up, so interrupts
2428          * had better been disabled and run queue lock been held.
2429          */
2430         lockdep_assert_preemption_disabled();
2431         if (!arch_spin_trylock(&trace_cmdline_lock))
2432                 return 0;
2433
2434         idx = savedcmd->map_pid_to_cmdline[tpid];
2435         if (idx == NO_CMDLINE_MAP) {
2436                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2437
2438                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2439                 savedcmd->cmdline_idx = idx;
2440         }
2441
2442         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2443         set_cmdline(idx, tsk->comm);
2444
2445         arch_spin_unlock(&trace_cmdline_lock);
2446
2447         return 1;
2448 }
2449
2450 static void __trace_find_cmdline(int pid, char comm[])
2451 {
2452         unsigned map;
2453         int tpid;
2454
2455         if (!pid) {
2456                 strcpy(comm, "<idle>");
2457                 return;
2458         }
2459
2460         if (WARN_ON_ONCE(pid < 0)) {
2461                 strcpy(comm, "<XXX>");
2462                 return;
2463         }
2464
2465         tpid = pid & (PID_MAX_DEFAULT - 1);
2466         map = savedcmd->map_pid_to_cmdline[tpid];
2467         if (map != NO_CMDLINE_MAP) {
2468                 tpid = savedcmd->map_cmdline_to_pid[map];
2469                 if (tpid == pid) {
2470                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2471                         return;
2472                 }
2473         }
2474         strcpy(comm, "<...>");
2475 }
2476
2477 void trace_find_cmdline(int pid, char comm[])
2478 {
2479         preempt_disable();
2480         arch_spin_lock(&trace_cmdline_lock);
2481
2482         __trace_find_cmdline(pid, comm);
2483
2484         arch_spin_unlock(&trace_cmdline_lock);
2485         preempt_enable();
2486 }
2487
2488 static int *trace_find_tgid_ptr(int pid)
2489 {
2490         /*
2491          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2492          * if we observe a non-NULL tgid_map then we also observe the correct
2493          * tgid_map_max.
2494          */
2495         int *map = smp_load_acquire(&tgid_map);
2496
2497         if (unlikely(!map || pid > tgid_map_max))
2498                 return NULL;
2499
2500         return &map[pid];
2501 }
2502
2503 int trace_find_tgid(int pid)
2504 {
2505         int *ptr = trace_find_tgid_ptr(pid);
2506
2507         return ptr ? *ptr : 0;
2508 }
2509
2510 static int trace_save_tgid(struct task_struct *tsk)
2511 {
2512         int *ptr;
2513
2514         /* treat recording of idle task as a success */
2515         if (!tsk->pid)
2516                 return 1;
2517
2518         ptr = trace_find_tgid_ptr(tsk->pid);
2519         if (!ptr)
2520                 return 0;
2521
2522         *ptr = tsk->tgid;
2523         return 1;
2524 }
2525
2526 static bool tracing_record_taskinfo_skip(int flags)
2527 {
2528         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2529                 return true;
2530         if (!__this_cpu_read(trace_taskinfo_save))
2531                 return true;
2532         return false;
2533 }
2534
2535 /**
2536  * tracing_record_taskinfo - record the task info of a task
2537  *
2538  * @task:  task to record
2539  * @flags: TRACE_RECORD_CMDLINE for recording comm
2540  *         TRACE_RECORD_TGID for recording tgid
2541  */
2542 void tracing_record_taskinfo(struct task_struct *task, int flags)
2543 {
2544         bool done;
2545
2546         if (tracing_record_taskinfo_skip(flags))
2547                 return;
2548
2549         /*
2550          * Record as much task information as possible. If some fail, continue
2551          * to try to record the others.
2552          */
2553         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2554         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2555
2556         /* If recording any information failed, retry again soon. */
2557         if (!done)
2558                 return;
2559
2560         __this_cpu_write(trace_taskinfo_save, false);
2561 }
2562
2563 /**
2564  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2565  *
2566  * @prev: previous task during sched_switch
2567  * @next: next task during sched_switch
2568  * @flags: TRACE_RECORD_CMDLINE for recording comm
2569  *         TRACE_RECORD_TGID for recording tgid
2570  */
2571 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2572                                           struct task_struct *next, int flags)
2573 {
2574         bool done;
2575
2576         if (tracing_record_taskinfo_skip(flags))
2577                 return;
2578
2579         /*
2580          * Record as much task information as possible. If some fail, continue
2581          * to try to record the others.
2582          */
2583         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2584         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2585         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2586         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2587
2588         /* If recording any information failed, retry again soon. */
2589         if (!done)
2590                 return;
2591
2592         __this_cpu_write(trace_taskinfo_save, false);
2593 }
2594
2595 /* Helpers to record a specific task information */
2596 void tracing_record_cmdline(struct task_struct *task)
2597 {
2598         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2599 }
2600
2601 void tracing_record_tgid(struct task_struct *task)
2602 {
2603         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2604 }
2605
2606 /*
2607  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2608  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2609  * simplifies those functions and keeps them in sync.
2610  */
2611 enum print_line_t trace_handle_return(struct trace_seq *s)
2612 {
2613         return trace_seq_has_overflowed(s) ?
2614                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2615 }
2616 EXPORT_SYMBOL_GPL(trace_handle_return);
2617
2618 static unsigned short migration_disable_value(void)
2619 {
2620 #if defined(CONFIG_SMP)
2621         return current->migration_disabled;
2622 #else
2623         return 0;
2624 #endif
2625 }
2626
2627 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2628 {
2629         unsigned int trace_flags = irqs_status;
2630         unsigned int pc;
2631
2632         pc = preempt_count();
2633
2634         if (pc & NMI_MASK)
2635                 trace_flags |= TRACE_FLAG_NMI;
2636         if (pc & HARDIRQ_MASK)
2637                 trace_flags |= TRACE_FLAG_HARDIRQ;
2638         if (in_serving_softirq())
2639                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2640         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2641                 trace_flags |= TRACE_FLAG_BH_OFF;
2642
2643         if (tif_need_resched())
2644                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2645         if (test_preempt_need_resched())
2646                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2647         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2648                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2649 }
2650
2651 struct ring_buffer_event *
2652 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2653                           int type,
2654                           unsigned long len,
2655                           unsigned int trace_ctx)
2656 {
2657         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2658 }
2659
2660 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2661 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2662 static int trace_buffered_event_ref;
2663
2664 /**
2665  * trace_buffered_event_enable - enable buffering events
2666  *
2667  * When events are being filtered, it is quicker to use a temporary
2668  * buffer to write the event data into if there's a likely chance
2669  * that it will not be committed. The discard of the ring buffer
2670  * is not as fast as committing, and is much slower than copying
2671  * a commit.
2672  *
2673  * When an event is to be filtered, allocate per cpu buffers to
2674  * write the event data into, and if the event is filtered and discarded
2675  * it is simply dropped, otherwise, the entire data is to be committed
2676  * in one shot.
2677  */
2678 void trace_buffered_event_enable(void)
2679 {
2680         struct ring_buffer_event *event;
2681         struct page *page;
2682         int cpu;
2683
2684         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2685
2686         if (trace_buffered_event_ref++)
2687                 return;
2688
2689         for_each_tracing_cpu(cpu) {
2690                 page = alloc_pages_node(cpu_to_node(cpu),
2691                                         GFP_KERNEL | __GFP_NORETRY, 0);
2692                 if (!page)
2693                         goto failed;
2694
2695                 event = page_address(page);
2696                 memset(event, 0, sizeof(*event));
2697
2698                 per_cpu(trace_buffered_event, cpu) = event;
2699
2700                 preempt_disable();
2701                 if (cpu == smp_processor_id() &&
2702                     __this_cpu_read(trace_buffered_event) !=
2703                     per_cpu(trace_buffered_event, cpu))
2704                         WARN_ON_ONCE(1);
2705                 preempt_enable();
2706         }
2707
2708         return;
2709  failed:
2710         trace_buffered_event_disable();
2711 }
2712
2713 static void enable_trace_buffered_event(void *data)
2714 {
2715         /* Probably not needed, but do it anyway */
2716         smp_rmb();
2717         this_cpu_dec(trace_buffered_event_cnt);
2718 }
2719
2720 static void disable_trace_buffered_event(void *data)
2721 {
2722         this_cpu_inc(trace_buffered_event_cnt);
2723 }
2724
2725 /**
2726  * trace_buffered_event_disable - disable buffering events
2727  *
2728  * When a filter is removed, it is faster to not use the buffered
2729  * events, and to commit directly into the ring buffer. Free up
2730  * the temp buffers when there are no more users. This requires
2731  * special synchronization with current events.
2732  */
2733 void trace_buffered_event_disable(void)
2734 {
2735         int cpu;
2736
2737         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2738
2739         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2740                 return;
2741
2742         if (--trace_buffered_event_ref)
2743                 return;
2744
2745         preempt_disable();
2746         /* For each CPU, set the buffer as used. */
2747         smp_call_function_many(tracing_buffer_mask,
2748                                disable_trace_buffered_event, NULL, 1);
2749         preempt_enable();
2750
2751         /* Wait for all current users to finish */
2752         synchronize_rcu();
2753
2754         for_each_tracing_cpu(cpu) {
2755                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2756                 per_cpu(trace_buffered_event, cpu) = NULL;
2757         }
2758         /*
2759          * Make sure trace_buffered_event is NULL before clearing
2760          * trace_buffered_event_cnt.
2761          */
2762         smp_wmb();
2763
2764         preempt_disable();
2765         /* Do the work on each cpu */
2766         smp_call_function_many(tracing_buffer_mask,
2767                                enable_trace_buffered_event, NULL, 1);
2768         preempt_enable();
2769 }
2770
2771 static struct trace_buffer *temp_buffer;
2772
2773 struct ring_buffer_event *
2774 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2775                           struct trace_event_file *trace_file,
2776                           int type, unsigned long len,
2777                           unsigned int trace_ctx)
2778 {
2779         struct ring_buffer_event *entry;
2780         struct trace_array *tr = trace_file->tr;
2781         int val;
2782
2783         *current_rb = tr->array_buffer.buffer;
2784
2785         if (!tr->no_filter_buffering_ref &&
2786             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2787                 preempt_disable_notrace();
2788                 /*
2789                  * Filtering is on, so try to use the per cpu buffer first.
2790                  * This buffer will simulate a ring_buffer_event,
2791                  * where the type_len is zero and the array[0] will
2792                  * hold the full length.
2793                  * (see include/linux/ring-buffer.h for details on
2794                  *  how the ring_buffer_event is structured).
2795                  *
2796                  * Using a temp buffer during filtering and copying it
2797                  * on a matched filter is quicker than writing directly
2798                  * into the ring buffer and then discarding it when
2799                  * it doesn't match. That is because the discard
2800                  * requires several atomic operations to get right.
2801                  * Copying on match and doing nothing on a failed match
2802                  * is still quicker than no copy on match, but having
2803                  * to discard out of the ring buffer on a failed match.
2804                  */
2805                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2806                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2807
2808                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2809
2810                         /*
2811                          * Preemption is disabled, but interrupts and NMIs
2812                          * can still come in now. If that happens after
2813                          * the above increment, then it will have to go
2814                          * back to the old method of allocating the event
2815                          * on the ring buffer, and if the filter fails, it
2816                          * will have to call ring_buffer_discard_commit()
2817                          * to remove it.
2818                          *
2819                          * Need to also check the unlikely case that the
2820                          * length is bigger than the temp buffer size.
2821                          * If that happens, then the reserve is pretty much
2822                          * guaranteed to fail, as the ring buffer currently
2823                          * only allows events less than a page. But that may
2824                          * change in the future, so let the ring buffer reserve
2825                          * handle the failure in that case.
2826                          */
2827                         if (val == 1 && likely(len <= max_len)) {
2828                                 trace_event_setup(entry, type, trace_ctx);
2829                                 entry->array[0] = len;
2830                                 /* Return with preemption disabled */
2831                                 return entry;
2832                         }
2833                         this_cpu_dec(trace_buffered_event_cnt);
2834                 }
2835                 /* __trace_buffer_lock_reserve() disables preemption */
2836                 preempt_enable_notrace();
2837         }
2838
2839         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2840                                             trace_ctx);
2841         /*
2842          * If tracing is off, but we have triggers enabled
2843          * we still need to look at the event data. Use the temp_buffer
2844          * to store the trace event for the trigger to use. It's recursive
2845          * safe and will not be recorded anywhere.
2846          */
2847         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2848                 *current_rb = temp_buffer;
2849                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2850                                                     trace_ctx);
2851         }
2852         return entry;
2853 }
2854 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2855
2856 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2857 static DEFINE_MUTEX(tracepoint_printk_mutex);
2858
2859 static void output_printk(struct trace_event_buffer *fbuffer)
2860 {
2861         struct trace_event_call *event_call;
2862         struct trace_event_file *file;
2863         struct trace_event *event;
2864         unsigned long flags;
2865         struct trace_iterator *iter = tracepoint_print_iter;
2866
2867         /* We should never get here if iter is NULL */
2868         if (WARN_ON_ONCE(!iter))
2869                 return;
2870
2871         event_call = fbuffer->trace_file->event_call;
2872         if (!event_call || !event_call->event.funcs ||
2873             !event_call->event.funcs->trace)
2874                 return;
2875
2876         file = fbuffer->trace_file;
2877         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2878             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2879              !filter_match_preds(file->filter, fbuffer->entry)))
2880                 return;
2881
2882         event = &fbuffer->trace_file->event_call->event;
2883
2884         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2885         trace_seq_init(&iter->seq);
2886         iter->ent = fbuffer->entry;
2887         event_call->event.funcs->trace(iter, 0, event);
2888         trace_seq_putc(&iter->seq, 0);
2889         printk("%s", iter->seq.buffer);
2890
2891         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2892 }
2893
2894 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2895                              void *buffer, size_t *lenp,
2896                              loff_t *ppos)
2897 {
2898         int save_tracepoint_printk;
2899         int ret;
2900
2901         mutex_lock(&tracepoint_printk_mutex);
2902         save_tracepoint_printk = tracepoint_printk;
2903
2904         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2905
2906         /*
2907          * This will force exiting early, as tracepoint_printk
2908          * is always zero when tracepoint_printk_iter is not allocated
2909          */
2910         if (!tracepoint_print_iter)
2911                 tracepoint_printk = 0;
2912
2913         if (save_tracepoint_printk == tracepoint_printk)
2914                 goto out;
2915
2916         if (tracepoint_printk)
2917                 static_key_enable(&tracepoint_printk_key.key);
2918         else
2919                 static_key_disable(&tracepoint_printk_key.key);
2920
2921  out:
2922         mutex_unlock(&tracepoint_printk_mutex);
2923
2924         return ret;
2925 }
2926
2927 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2928 {
2929         enum event_trigger_type tt = ETT_NONE;
2930         struct trace_event_file *file = fbuffer->trace_file;
2931
2932         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2933                         fbuffer->entry, &tt))
2934                 goto discard;
2935
2936         if (static_key_false(&tracepoint_printk_key.key))
2937                 output_printk(fbuffer);
2938
2939         if (static_branch_unlikely(&trace_event_exports_enabled))
2940                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2941
2942         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2943                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2944
2945 discard:
2946         if (tt)
2947                 event_triggers_post_call(file, tt);
2948
2949 }
2950 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2951
2952 /*
2953  * Skip 3:
2954  *
2955  *   trace_buffer_unlock_commit_regs()
2956  *   trace_event_buffer_commit()
2957  *   trace_event_raw_event_xxx()
2958  */
2959 # define STACK_SKIP 3
2960
2961 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2962                                      struct trace_buffer *buffer,
2963                                      struct ring_buffer_event *event,
2964                                      unsigned int trace_ctx,
2965                                      struct pt_regs *regs)
2966 {
2967         __buffer_unlock_commit(buffer, event);
2968
2969         /*
2970          * If regs is not set, then skip the necessary functions.
2971          * Note, we can still get here via blktrace, wakeup tracer
2972          * and mmiotrace, but that's ok if they lose a function or
2973          * two. They are not that meaningful.
2974          */
2975         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2976         ftrace_trace_userstack(tr, buffer, trace_ctx);
2977 }
2978
2979 /*
2980  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2981  */
2982 void
2983 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2984                                    struct ring_buffer_event *event)
2985 {
2986         __buffer_unlock_commit(buffer, event);
2987 }
2988
2989 void
2990 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2991                parent_ip, unsigned int trace_ctx)
2992 {
2993         struct trace_event_call *call = &event_function;
2994         struct trace_buffer *buffer = tr->array_buffer.buffer;
2995         struct ring_buffer_event *event;
2996         struct ftrace_entry *entry;
2997
2998         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2999                                             trace_ctx);
3000         if (!event)
3001                 return;
3002         entry   = ring_buffer_event_data(event);
3003         entry->ip                       = ip;
3004         entry->parent_ip                = parent_ip;
3005
3006         if (!call_filter_check_discard(call, entry, buffer, event)) {
3007                 if (static_branch_unlikely(&trace_function_exports_enabled))
3008                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3009                 __buffer_unlock_commit(buffer, event);
3010         }
3011 }
3012
3013 #ifdef CONFIG_STACKTRACE
3014
3015 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3016 #define FTRACE_KSTACK_NESTING   4
3017
3018 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3019
3020 struct ftrace_stack {
3021         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3022 };
3023
3024
3025 struct ftrace_stacks {
3026         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3027 };
3028
3029 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3030 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3031
3032 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3033                                  unsigned int trace_ctx,
3034                                  int skip, struct pt_regs *regs)
3035 {
3036         struct trace_event_call *call = &event_kernel_stack;
3037         struct ring_buffer_event *event;
3038         unsigned int size, nr_entries;
3039         struct ftrace_stack *fstack;
3040         struct stack_entry *entry;
3041         int stackidx;
3042
3043         /*
3044          * Add one, for this function and the call to save_stack_trace()
3045          * If regs is set, then these functions will not be in the way.
3046          */
3047 #ifndef CONFIG_UNWINDER_ORC
3048         if (!regs)
3049                 skip++;
3050 #endif
3051
3052         preempt_disable_notrace();
3053
3054         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3055
3056         /* This should never happen. If it does, yell once and skip */
3057         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3058                 goto out;
3059
3060         /*
3061          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3062          * interrupt will either see the value pre increment or post
3063          * increment. If the interrupt happens pre increment it will have
3064          * restored the counter when it returns.  We just need a barrier to
3065          * keep gcc from moving things around.
3066          */
3067         barrier();
3068
3069         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3070         size = ARRAY_SIZE(fstack->calls);
3071
3072         if (regs) {
3073                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3074                                                    size, skip);
3075         } else {
3076                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3077         }
3078
3079         size = nr_entries * sizeof(unsigned long);
3080         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3081                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3082                                     trace_ctx);
3083         if (!event)
3084                 goto out;
3085         entry = ring_buffer_event_data(event);
3086
3087         memcpy(&entry->caller, fstack->calls, size);
3088         entry->size = nr_entries;
3089
3090         if (!call_filter_check_discard(call, entry, buffer, event))
3091                 __buffer_unlock_commit(buffer, event);
3092
3093  out:
3094         /* Again, don't let gcc optimize things here */
3095         barrier();
3096         __this_cpu_dec(ftrace_stack_reserve);
3097         preempt_enable_notrace();
3098
3099 }
3100
3101 static inline void ftrace_trace_stack(struct trace_array *tr,
3102                                       struct trace_buffer *buffer,
3103                                       unsigned int trace_ctx,
3104                                       int skip, struct pt_regs *regs)
3105 {
3106         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3107                 return;
3108
3109         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3110 }
3111
3112 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3113                    int skip)
3114 {
3115         struct trace_buffer *buffer = tr->array_buffer.buffer;
3116
3117         if (rcu_is_watching()) {
3118                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3119                 return;
3120         }
3121
3122         /*
3123          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3124          * but if the above rcu_is_watching() failed, then the NMI
3125          * triggered someplace critical, and ct_irq_enter() should
3126          * not be called from NMI.
3127          */
3128         if (unlikely(in_nmi()))
3129                 return;
3130
3131         ct_irq_enter_irqson();
3132         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3133         ct_irq_exit_irqson();
3134 }
3135
3136 /**
3137  * trace_dump_stack - record a stack back trace in the trace buffer
3138  * @skip: Number of functions to skip (helper handlers)
3139  */
3140 void trace_dump_stack(int skip)
3141 {
3142         if (tracing_disabled || tracing_selftest_running)
3143                 return;
3144
3145 #ifndef CONFIG_UNWINDER_ORC
3146         /* Skip 1 to skip this function. */
3147         skip++;
3148 #endif
3149         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3150                              tracing_gen_ctx(), skip, NULL);
3151 }
3152 EXPORT_SYMBOL_GPL(trace_dump_stack);
3153
3154 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3155 static DEFINE_PER_CPU(int, user_stack_count);
3156
3157 static void
3158 ftrace_trace_userstack(struct trace_array *tr,
3159                        struct trace_buffer *buffer, unsigned int trace_ctx)
3160 {
3161         struct trace_event_call *call = &event_user_stack;
3162         struct ring_buffer_event *event;
3163         struct userstack_entry *entry;
3164
3165         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3166                 return;
3167
3168         /*
3169          * NMIs can not handle page faults, even with fix ups.
3170          * The save user stack can (and often does) fault.
3171          */
3172         if (unlikely(in_nmi()))
3173                 return;
3174
3175         /*
3176          * prevent recursion, since the user stack tracing may
3177          * trigger other kernel events.
3178          */
3179         preempt_disable();
3180         if (__this_cpu_read(user_stack_count))
3181                 goto out;
3182
3183         __this_cpu_inc(user_stack_count);
3184
3185         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3186                                             sizeof(*entry), trace_ctx);
3187         if (!event)
3188                 goto out_drop_count;
3189         entry   = ring_buffer_event_data(event);
3190
3191         entry->tgid             = current->tgid;
3192         memset(&entry->caller, 0, sizeof(entry->caller));
3193
3194         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3195         if (!call_filter_check_discard(call, entry, buffer, event))
3196                 __buffer_unlock_commit(buffer, event);
3197
3198  out_drop_count:
3199         __this_cpu_dec(user_stack_count);
3200  out:
3201         preempt_enable();
3202 }
3203 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3204 static void ftrace_trace_userstack(struct trace_array *tr,
3205                                    struct trace_buffer *buffer,
3206                                    unsigned int trace_ctx)
3207 {
3208 }
3209 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3210
3211 #endif /* CONFIG_STACKTRACE */
3212
3213 static inline void
3214 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3215                           unsigned long long delta)
3216 {
3217         entry->bottom_delta_ts = delta & U32_MAX;
3218         entry->top_delta_ts = (delta >> 32);
3219 }
3220
3221 void trace_last_func_repeats(struct trace_array *tr,
3222                              struct trace_func_repeats *last_info,
3223                              unsigned int trace_ctx)
3224 {
3225         struct trace_buffer *buffer = tr->array_buffer.buffer;
3226         struct func_repeats_entry *entry;
3227         struct ring_buffer_event *event;
3228         u64 delta;
3229
3230         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3231                                             sizeof(*entry), trace_ctx);
3232         if (!event)
3233                 return;
3234
3235         delta = ring_buffer_event_time_stamp(buffer, event) -
3236                 last_info->ts_last_call;
3237
3238         entry = ring_buffer_event_data(event);
3239         entry->ip = last_info->ip;
3240         entry->parent_ip = last_info->parent_ip;
3241         entry->count = last_info->count;
3242         func_repeats_set_delta_ts(entry, delta);
3243
3244         __buffer_unlock_commit(buffer, event);
3245 }
3246
3247 /* created for use with alloc_percpu */
3248 struct trace_buffer_struct {
3249         int nesting;
3250         char buffer[4][TRACE_BUF_SIZE];
3251 };
3252
3253 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3254
3255 /*
3256  * This allows for lockless recording.  If we're nested too deeply, then
3257  * this returns NULL.
3258  */
3259 static char *get_trace_buf(void)
3260 {
3261         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3262
3263         if (!trace_percpu_buffer || buffer->nesting >= 4)
3264                 return NULL;
3265
3266         buffer->nesting++;
3267
3268         /* Interrupts must see nesting incremented before we use the buffer */
3269         barrier();
3270         return &buffer->buffer[buffer->nesting - 1][0];
3271 }
3272
3273 static void put_trace_buf(void)
3274 {
3275         /* Don't let the decrement of nesting leak before this */
3276         barrier();
3277         this_cpu_dec(trace_percpu_buffer->nesting);
3278 }
3279
3280 static int alloc_percpu_trace_buffer(void)
3281 {
3282         struct trace_buffer_struct __percpu *buffers;
3283
3284         if (trace_percpu_buffer)
3285                 return 0;
3286
3287         buffers = alloc_percpu(struct trace_buffer_struct);
3288         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3289                 return -ENOMEM;
3290
3291         trace_percpu_buffer = buffers;
3292         return 0;
3293 }
3294
3295 static int buffers_allocated;
3296
3297 void trace_printk_init_buffers(void)
3298 {
3299         if (buffers_allocated)
3300                 return;
3301
3302         if (alloc_percpu_trace_buffer())
3303                 return;
3304
3305         /* trace_printk() is for debug use only. Don't use it in production. */
3306
3307         pr_warn("\n");
3308         pr_warn("**********************************************************\n");
3309         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3310         pr_warn("**                                                      **\n");
3311         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3312         pr_warn("**                                                      **\n");
3313         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3314         pr_warn("** unsafe for production use.                           **\n");
3315         pr_warn("**                                                      **\n");
3316         pr_warn("** If you see this message and you are not debugging    **\n");
3317         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3318         pr_warn("**                                                      **\n");
3319         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3320         pr_warn("**********************************************************\n");
3321
3322         /* Expand the buffers to set size */
3323         tracing_update_buffers();
3324
3325         buffers_allocated = 1;
3326
3327         /*
3328          * trace_printk_init_buffers() can be called by modules.
3329          * If that happens, then we need to start cmdline recording
3330          * directly here. If the global_trace.buffer is already
3331          * allocated here, then this was called by module code.
3332          */
3333         if (global_trace.array_buffer.buffer)
3334                 tracing_start_cmdline_record();
3335 }
3336 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3337
3338 void trace_printk_start_comm(void)
3339 {
3340         /* Start tracing comms if trace printk is set */
3341         if (!buffers_allocated)
3342                 return;
3343         tracing_start_cmdline_record();
3344 }
3345
3346 static void trace_printk_start_stop_comm(int enabled)
3347 {
3348         if (!buffers_allocated)
3349                 return;
3350
3351         if (enabled)
3352                 tracing_start_cmdline_record();
3353         else
3354                 tracing_stop_cmdline_record();
3355 }
3356
3357 /**
3358  * trace_vbprintk - write binary msg to tracing buffer
3359  * @ip:    The address of the caller
3360  * @fmt:   The string format to write to the buffer
3361  * @args:  Arguments for @fmt
3362  */
3363 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3364 {
3365         struct trace_event_call *call = &event_bprint;
3366         struct ring_buffer_event *event;
3367         struct trace_buffer *buffer;
3368         struct trace_array *tr = &global_trace;
3369         struct bprint_entry *entry;
3370         unsigned int trace_ctx;
3371         char *tbuffer;
3372         int len = 0, size;
3373
3374         if (unlikely(tracing_selftest_running || tracing_disabled))
3375                 return 0;
3376
3377         /* Don't pollute graph traces with trace_vprintk internals */
3378         pause_graph_tracing();
3379
3380         trace_ctx = tracing_gen_ctx();
3381         preempt_disable_notrace();
3382
3383         tbuffer = get_trace_buf();
3384         if (!tbuffer) {
3385                 len = 0;
3386                 goto out_nobuffer;
3387         }
3388
3389         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3390
3391         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3392                 goto out_put;
3393
3394         size = sizeof(*entry) + sizeof(u32) * len;
3395         buffer = tr->array_buffer.buffer;
3396         ring_buffer_nest_start(buffer);
3397         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3398                                             trace_ctx);
3399         if (!event)
3400                 goto out;
3401         entry = ring_buffer_event_data(event);
3402         entry->ip                       = ip;
3403         entry->fmt                      = fmt;
3404
3405         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3406         if (!call_filter_check_discard(call, entry, buffer, event)) {
3407                 __buffer_unlock_commit(buffer, event);
3408                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3409         }
3410
3411 out:
3412         ring_buffer_nest_end(buffer);
3413 out_put:
3414         put_trace_buf();
3415
3416 out_nobuffer:
3417         preempt_enable_notrace();
3418         unpause_graph_tracing();
3419
3420         return len;
3421 }
3422 EXPORT_SYMBOL_GPL(trace_vbprintk);
3423
3424 __printf(3, 0)
3425 static int
3426 __trace_array_vprintk(struct trace_buffer *buffer,
3427                       unsigned long ip, const char *fmt, va_list args)
3428 {
3429         struct trace_event_call *call = &event_print;
3430         struct ring_buffer_event *event;
3431         int len = 0, size;
3432         struct print_entry *entry;
3433         unsigned int trace_ctx;
3434         char *tbuffer;
3435
3436         if (tracing_disabled || tracing_selftest_running)
3437                 return 0;
3438
3439         /* Don't pollute graph traces with trace_vprintk internals */
3440         pause_graph_tracing();
3441
3442         trace_ctx = tracing_gen_ctx();
3443         preempt_disable_notrace();
3444
3445
3446         tbuffer = get_trace_buf();
3447         if (!tbuffer) {
3448                 len = 0;
3449                 goto out_nobuffer;
3450         }
3451
3452         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3453
3454         size = sizeof(*entry) + len + 1;
3455         ring_buffer_nest_start(buffer);
3456         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3457                                             trace_ctx);
3458         if (!event)
3459                 goto out;
3460         entry = ring_buffer_event_data(event);
3461         entry->ip = ip;
3462
3463         memcpy(&entry->buf, tbuffer, len + 1);
3464         if (!call_filter_check_discard(call, entry, buffer, event)) {
3465                 __buffer_unlock_commit(buffer, event);
3466                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3467         }
3468
3469 out:
3470         ring_buffer_nest_end(buffer);
3471         put_trace_buf();
3472
3473 out_nobuffer:
3474         preempt_enable_notrace();
3475         unpause_graph_tracing();
3476
3477         return len;
3478 }
3479
3480 __printf(3, 0)
3481 int trace_array_vprintk(struct trace_array *tr,
3482                         unsigned long ip, const char *fmt, va_list args)
3483 {
3484         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3485 }
3486
3487 /**
3488  * trace_array_printk - Print a message to a specific instance
3489  * @tr: The instance trace_array descriptor
3490  * @ip: The instruction pointer that this is called from.
3491  * @fmt: The format to print (printf format)
3492  *
3493  * If a subsystem sets up its own instance, they have the right to
3494  * printk strings into their tracing instance buffer using this
3495  * function. Note, this function will not write into the top level
3496  * buffer (use trace_printk() for that), as writing into the top level
3497  * buffer should only have events that can be individually disabled.
3498  * trace_printk() is only used for debugging a kernel, and should not
3499  * be ever incorporated in normal use.
3500  *
3501  * trace_array_printk() can be used, as it will not add noise to the
3502  * top level tracing buffer.
3503  *
3504  * Note, trace_array_init_printk() must be called on @tr before this
3505  * can be used.
3506  */
3507 __printf(3, 0)
3508 int trace_array_printk(struct trace_array *tr,
3509                        unsigned long ip, const char *fmt, ...)
3510 {
3511         int ret;
3512         va_list ap;
3513
3514         if (!tr)
3515                 return -ENOENT;
3516
3517         /* This is only allowed for created instances */
3518         if (tr == &global_trace)
3519                 return 0;
3520
3521         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3522                 return 0;
3523
3524         va_start(ap, fmt);
3525         ret = trace_array_vprintk(tr, ip, fmt, ap);
3526         va_end(ap);
3527         return ret;
3528 }
3529 EXPORT_SYMBOL_GPL(trace_array_printk);
3530
3531 /**
3532  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3533  * @tr: The trace array to initialize the buffers for
3534  *
3535  * As trace_array_printk() only writes into instances, they are OK to
3536  * have in the kernel (unlike trace_printk()). This needs to be called
3537  * before trace_array_printk() can be used on a trace_array.
3538  */
3539 int trace_array_init_printk(struct trace_array *tr)
3540 {
3541         if (!tr)
3542                 return -ENOENT;
3543
3544         /* This is only allowed for created instances */
3545         if (tr == &global_trace)
3546                 return -EINVAL;
3547
3548         return alloc_percpu_trace_buffer();
3549 }
3550 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3551
3552 __printf(3, 4)
3553 int trace_array_printk_buf(struct trace_buffer *buffer,
3554                            unsigned long ip, const char *fmt, ...)
3555 {
3556         int ret;
3557         va_list ap;
3558
3559         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3560                 return 0;
3561
3562         va_start(ap, fmt);
3563         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3564         va_end(ap);
3565         return ret;
3566 }
3567
3568 __printf(2, 0)
3569 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3570 {
3571         return trace_array_vprintk(&global_trace, ip, fmt, args);
3572 }
3573 EXPORT_SYMBOL_GPL(trace_vprintk);
3574
3575 static void trace_iterator_increment(struct trace_iterator *iter)
3576 {
3577         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3578
3579         iter->idx++;
3580         if (buf_iter)
3581                 ring_buffer_iter_advance(buf_iter);
3582 }
3583
3584 static struct trace_entry *
3585 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3586                 unsigned long *lost_events)
3587 {
3588         struct ring_buffer_event *event;
3589         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3590
3591         if (buf_iter) {
3592                 event = ring_buffer_iter_peek(buf_iter, ts);
3593                 if (lost_events)
3594                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3595                                 (unsigned long)-1 : 0;
3596         } else {
3597                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3598                                          lost_events);
3599         }
3600
3601         if (event) {
3602                 iter->ent_size = ring_buffer_event_length(event);
3603                 return ring_buffer_event_data(event);
3604         }
3605         iter->ent_size = 0;
3606         return NULL;
3607 }
3608
3609 static struct trace_entry *
3610 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3611                   unsigned long *missing_events, u64 *ent_ts)
3612 {
3613         struct trace_buffer *buffer = iter->array_buffer->buffer;
3614         struct trace_entry *ent, *next = NULL;
3615         unsigned long lost_events = 0, next_lost = 0;
3616         int cpu_file = iter->cpu_file;
3617         u64 next_ts = 0, ts;
3618         int next_cpu = -1;
3619         int next_size = 0;
3620         int cpu;
3621
3622         /*
3623          * If we are in a per_cpu trace file, don't bother by iterating over
3624          * all cpu and peek directly.
3625          */
3626         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3627                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3628                         return NULL;
3629                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3630                 if (ent_cpu)
3631                         *ent_cpu = cpu_file;
3632
3633                 return ent;
3634         }
3635
3636         for_each_tracing_cpu(cpu) {
3637
3638                 if (ring_buffer_empty_cpu(buffer, cpu))
3639                         continue;
3640
3641                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3642
3643                 /*
3644                  * Pick the entry with the smallest timestamp:
3645                  */
3646                 if (ent && (!next || ts < next_ts)) {
3647                         next = ent;
3648                         next_cpu = cpu;
3649                         next_ts = ts;
3650                         next_lost = lost_events;
3651                         next_size = iter->ent_size;
3652                 }
3653         }
3654
3655         iter->ent_size = next_size;
3656
3657         if (ent_cpu)
3658                 *ent_cpu = next_cpu;
3659
3660         if (ent_ts)
3661                 *ent_ts = next_ts;
3662
3663         if (missing_events)
3664                 *missing_events = next_lost;
3665
3666         return next;
3667 }
3668
3669 #define STATIC_FMT_BUF_SIZE     128
3670 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3671
3672 static char *trace_iter_expand_format(struct trace_iterator *iter)
3673 {
3674         char *tmp;
3675
3676         /*
3677          * iter->tr is NULL when used with tp_printk, which makes
3678          * this get called where it is not safe to call krealloc().
3679          */
3680         if (!iter->tr || iter->fmt == static_fmt_buf)
3681                 return NULL;
3682
3683         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3684                        GFP_KERNEL);
3685         if (tmp) {
3686                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3687                 iter->fmt = tmp;
3688         }
3689
3690         return tmp;
3691 }
3692
3693 /* Returns true if the string is safe to dereference from an event */
3694 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3695                            bool star, int len)
3696 {
3697         unsigned long addr = (unsigned long)str;
3698         struct trace_event *trace_event;
3699         struct trace_event_call *event;
3700
3701         /* Ignore strings with no length */
3702         if (star && !len)
3703                 return true;
3704
3705         /* OK if part of the event data */
3706         if ((addr >= (unsigned long)iter->ent) &&
3707             (addr < (unsigned long)iter->ent + iter->ent_size))
3708                 return true;
3709
3710         /* OK if part of the temp seq buffer */
3711         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3712             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3713                 return true;
3714
3715         /* Core rodata can not be freed */
3716         if (is_kernel_rodata(addr))
3717                 return true;
3718
3719         if (trace_is_tracepoint_string(str))
3720                 return true;
3721
3722         /*
3723          * Now this could be a module event, referencing core module
3724          * data, which is OK.
3725          */
3726         if (!iter->ent)
3727                 return false;
3728
3729         trace_event = ftrace_find_event(iter->ent->type);
3730         if (!trace_event)
3731                 return false;
3732
3733         event = container_of(trace_event, struct trace_event_call, event);
3734         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3735                 return false;
3736
3737         /* Would rather have rodata, but this will suffice */
3738         if (within_module_core(addr, event->module))
3739                 return true;
3740
3741         return false;
3742 }
3743
3744 static const char *show_buffer(struct trace_seq *s)
3745 {
3746         struct seq_buf *seq = &s->seq;
3747
3748         seq_buf_terminate(seq);
3749
3750         return seq->buffer;
3751 }
3752
3753 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3754
3755 static int test_can_verify_check(const char *fmt, ...)
3756 {
3757         char buf[16];
3758         va_list ap;
3759         int ret;
3760
3761         /*
3762          * The verifier is dependent on vsnprintf() modifies the va_list
3763          * passed to it, where it is sent as a reference. Some architectures
3764          * (like x86_32) passes it by value, which means that vsnprintf()
3765          * does not modify the va_list passed to it, and the verifier
3766          * would then need to be able to understand all the values that
3767          * vsnprintf can use. If it is passed by value, then the verifier
3768          * is disabled.
3769          */
3770         va_start(ap, fmt);
3771         vsnprintf(buf, 16, "%d", ap);
3772         ret = va_arg(ap, int);
3773         va_end(ap);
3774
3775         return ret;
3776 }
3777
3778 static void test_can_verify(void)
3779 {
3780         if (!test_can_verify_check("%d %d", 0, 1)) {
3781                 pr_info("trace event string verifier disabled\n");
3782                 static_branch_inc(&trace_no_verify);
3783         }
3784 }
3785
3786 /**
3787  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3788  * @iter: The iterator that holds the seq buffer and the event being printed
3789  * @fmt: The format used to print the event
3790  * @ap: The va_list holding the data to print from @fmt.
3791  *
3792  * This writes the data into the @iter->seq buffer using the data from
3793  * @fmt and @ap. If the format has a %s, then the source of the string
3794  * is examined to make sure it is safe to print, otherwise it will
3795  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3796  * pointer.
3797  */
3798 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3799                          va_list ap)
3800 {
3801         const char *p = fmt;
3802         const char *str;
3803         int i, j;
3804
3805         if (WARN_ON_ONCE(!fmt))
3806                 return;
3807
3808         if (static_branch_unlikely(&trace_no_verify))
3809                 goto print;
3810
3811         /* Don't bother checking when doing a ftrace_dump() */
3812         if (iter->fmt == static_fmt_buf)
3813                 goto print;
3814
3815         while (*p) {
3816                 bool star = false;
3817                 int len = 0;
3818
3819                 j = 0;
3820
3821                 /* We only care about %s and variants */
3822                 for (i = 0; p[i]; i++) {
3823                         if (i + 1 >= iter->fmt_size) {
3824                                 /*
3825                                  * If we can't expand the copy buffer,
3826                                  * just print it.
3827                                  */
3828                                 if (!trace_iter_expand_format(iter))
3829                                         goto print;
3830                         }
3831
3832                         if (p[i] == '\\' && p[i+1]) {
3833                                 i++;
3834                                 continue;
3835                         }
3836                         if (p[i] == '%') {
3837                                 /* Need to test cases like %08.*s */
3838                                 for (j = 1; p[i+j]; j++) {
3839                                         if (isdigit(p[i+j]) ||
3840                                             p[i+j] == '.')
3841                                                 continue;
3842                                         if (p[i+j] == '*') {
3843                                                 star = true;
3844                                                 continue;
3845                                         }
3846                                         break;
3847                                 }
3848                                 if (p[i+j] == 's')
3849                                         break;
3850                                 star = false;
3851                         }
3852                         j = 0;
3853                 }
3854                 /* If no %s found then just print normally */
3855                 if (!p[i])
3856                         break;
3857
3858                 /* Copy up to the %s, and print that */
3859                 strncpy(iter->fmt, p, i);
3860                 iter->fmt[i] = '\0';
3861                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3862
3863                 /*
3864                  * If iter->seq is full, the above call no longer guarantees
3865                  * that ap is in sync with fmt processing, and further calls
3866                  * to va_arg() can return wrong positional arguments.
3867                  *
3868                  * Ensure that ap is no longer used in this case.
3869                  */
3870                 if (iter->seq.full) {
3871                         p = "";
3872                         break;
3873                 }
3874
3875                 if (star)
3876                         len = va_arg(ap, int);
3877
3878                 /* The ap now points to the string data of the %s */
3879                 str = va_arg(ap, const char *);
3880
3881                 /*
3882                  * If you hit this warning, it is likely that the
3883                  * trace event in question used %s on a string that
3884                  * was saved at the time of the event, but may not be
3885                  * around when the trace is read. Use __string(),
3886                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3887                  * instead. See samples/trace_events/trace-events-sample.h
3888                  * for reference.
3889                  */
3890                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3891                               "fmt: '%s' current_buffer: '%s'",
3892                               fmt, show_buffer(&iter->seq))) {
3893                         int ret;
3894
3895                         /* Try to safely read the string */
3896                         if (star) {
3897                                 if (len + 1 > iter->fmt_size)
3898                                         len = iter->fmt_size - 1;
3899                                 if (len < 0)
3900                                         len = 0;
3901                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3902                                 iter->fmt[len] = 0;
3903                                 star = false;
3904                         } else {
3905                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3906                                                                   iter->fmt_size);
3907                         }
3908                         if (ret < 0)
3909                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3910                         else
3911                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3912                                                  str, iter->fmt);
3913                         str = "[UNSAFE-MEMORY]";
3914                         strcpy(iter->fmt, "%s");
3915                 } else {
3916                         strncpy(iter->fmt, p + i, j + 1);
3917                         iter->fmt[j+1] = '\0';
3918                 }
3919                 if (star)
3920                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3921                 else
3922                         trace_seq_printf(&iter->seq, iter->fmt, str);
3923
3924                 p += i + j + 1;
3925         }
3926  print:
3927         if (*p)
3928                 trace_seq_vprintf(&iter->seq, p, ap);
3929 }
3930
3931 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3932 {
3933         const char *p, *new_fmt;
3934         char *q;
3935
3936         if (WARN_ON_ONCE(!fmt))
3937                 return fmt;
3938
3939         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3940                 return fmt;
3941
3942         p = fmt;
3943         new_fmt = q = iter->fmt;
3944         while (*p) {
3945                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3946                         if (!trace_iter_expand_format(iter))
3947                                 return fmt;
3948
3949                         q += iter->fmt - new_fmt;
3950                         new_fmt = iter->fmt;
3951                 }
3952
3953                 *q++ = *p++;
3954
3955                 /* Replace %p with %px */
3956                 if (p[-1] == '%') {
3957                         if (p[0] == '%') {
3958                                 *q++ = *p++;
3959                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3960                                 *q++ = *p++;
3961                                 *q++ = 'x';
3962                         }
3963                 }
3964         }
3965         *q = '\0';
3966
3967         return new_fmt;
3968 }
3969
3970 #define STATIC_TEMP_BUF_SIZE    128
3971 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3972
3973 /* Find the next real entry, without updating the iterator itself */
3974 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3975                                           int *ent_cpu, u64 *ent_ts)
3976 {
3977         /* __find_next_entry will reset ent_size */
3978         int ent_size = iter->ent_size;
3979         struct trace_entry *entry;
3980
3981         /*
3982          * If called from ftrace_dump(), then the iter->temp buffer
3983          * will be the static_temp_buf and not created from kmalloc.
3984          * If the entry size is greater than the buffer, we can
3985          * not save it. Just return NULL in that case. This is only
3986          * used to add markers when two consecutive events' time
3987          * stamps have a large delta. See trace_print_lat_context()
3988          */
3989         if (iter->temp == static_temp_buf &&
3990             STATIC_TEMP_BUF_SIZE < ent_size)
3991                 return NULL;
3992
3993         /*
3994          * The __find_next_entry() may call peek_next_entry(), which may
3995          * call ring_buffer_peek() that may make the contents of iter->ent
3996          * undefined. Need to copy iter->ent now.
3997          */
3998         if (iter->ent && iter->ent != iter->temp) {
3999                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4000                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4001                         void *temp;
4002                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4003                         if (!temp)
4004                                 return NULL;
4005                         kfree(iter->temp);
4006                         iter->temp = temp;
4007                         iter->temp_size = iter->ent_size;
4008                 }
4009                 memcpy(iter->temp, iter->ent, iter->ent_size);
4010                 iter->ent = iter->temp;
4011         }
4012         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4013         /* Put back the original ent_size */
4014         iter->ent_size = ent_size;
4015
4016         return entry;
4017 }
4018
4019 /* Find the next real entry, and increment the iterator to the next entry */
4020 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4021 {
4022         iter->ent = __find_next_entry(iter, &iter->cpu,
4023                                       &iter->lost_events, &iter->ts);
4024
4025         if (iter->ent)
4026                 trace_iterator_increment(iter);
4027
4028         return iter->ent ? iter : NULL;
4029 }
4030
4031 static void trace_consume(struct trace_iterator *iter)
4032 {
4033         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4034                             &iter->lost_events);
4035 }
4036
4037 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4038 {
4039         struct trace_iterator *iter = m->private;
4040         int i = (int)*pos;
4041         void *ent;
4042
4043         WARN_ON_ONCE(iter->leftover);
4044
4045         (*pos)++;
4046
4047         /* can't go backwards */
4048         if (iter->idx > i)
4049                 return NULL;
4050
4051         if (iter->idx < 0)
4052                 ent = trace_find_next_entry_inc(iter);
4053         else
4054                 ent = iter;
4055
4056         while (ent && iter->idx < i)
4057                 ent = trace_find_next_entry_inc(iter);
4058
4059         iter->pos = *pos;
4060
4061         return ent;
4062 }
4063
4064 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4065 {
4066         struct ring_buffer_iter *buf_iter;
4067         unsigned long entries = 0;
4068         u64 ts;
4069
4070         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4071
4072         buf_iter = trace_buffer_iter(iter, cpu);
4073         if (!buf_iter)
4074                 return;
4075
4076         ring_buffer_iter_reset(buf_iter);
4077
4078         /*
4079          * We could have the case with the max latency tracers
4080          * that a reset never took place on a cpu. This is evident
4081          * by the timestamp being before the start of the buffer.
4082          */
4083         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4084                 if (ts >= iter->array_buffer->time_start)
4085                         break;
4086                 entries++;
4087                 ring_buffer_iter_advance(buf_iter);
4088         }
4089
4090         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4091 }
4092
4093 /*
4094  * The current tracer is copied to avoid a global locking
4095  * all around.
4096  */
4097 static void *s_start(struct seq_file *m, loff_t *pos)
4098 {
4099         struct trace_iterator *iter = m->private;
4100         struct trace_array *tr = iter->tr;
4101         int cpu_file = iter->cpu_file;
4102         void *p = NULL;
4103         loff_t l = 0;
4104         int cpu;
4105
4106         /*
4107          * copy the tracer to avoid using a global lock all around.
4108          * iter->trace is a copy of current_trace, the pointer to the
4109          * name may be used instead of a strcmp(), as iter->trace->name
4110          * will point to the same string as current_trace->name.
4111          */
4112         mutex_lock(&trace_types_lock);
4113         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4114                 *iter->trace = *tr->current_trace;
4115         mutex_unlock(&trace_types_lock);
4116
4117 #ifdef CONFIG_TRACER_MAX_TRACE
4118         if (iter->snapshot && iter->trace->use_max_tr)
4119                 return ERR_PTR(-EBUSY);
4120 #endif
4121
4122         if (*pos != iter->pos) {
4123                 iter->ent = NULL;
4124                 iter->cpu = 0;
4125                 iter->idx = -1;
4126
4127                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4128                         for_each_tracing_cpu(cpu)
4129                                 tracing_iter_reset(iter, cpu);
4130                 } else
4131                         tracing_iter_reset(iter, cpu_file);
4132
4133                 iter->leftover = 0;
4134                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4135                         ;
4136
4137         } else {
4138                 /*
4139                  * If we overflowed the seq_file before, then we want
4140                  * to just reuse the trace_seq buffer again.
4141                  */
4142                 if (iter->leftover)
4143                         p = iter;
4144                 else {
4145                         l = *pos - 1;
4146                         p = s_next(m, p, &l);
4147                 }
4148         }
4149
4150         trace_event_read_lock();
4151         trace_access_lock(cpu_file);
4152         return p;
4153 }
4154
4155 static void s_stop(struct seq_file *m, void *p)
4156 {
4157         struct trace_iterator *iter = m->private;
4158
4159 #ifdef CONFIG_TRACER_MAX_TRACE
4160         if (iter->snapshot && iter->trace->use_max_tr)
4161                 return;
4162 #endif
4163
4164         trace_access_unlock(iter->cpu_file);
4165         trace_event_read_unlock();
4166 }
4167
4168 static void
4169 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4170                       unsigned long *entries, int cpu)
4171 {
4172         unsigned long count;
4173
4174         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4175         /*
4176          * If this buffer has skipped entries, then we hold all
4177          * entries for the trace and we need to ignore the
4178          * ones before the time stamp.
4179          */
4180         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4181                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4182                 /* total is the same as the entries */
4183                 *total = count;
4184         } else
4185                 *total = count +
4186                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4187         *entries = count;
4188 }
4189
4190 static void
4191 get_total_entries(struct array_buffer *buf,
4192                   unsigned long *total, unsigned long *entries)
4193 {
4194         unsigned long t, e;
4195         int cpu;
4196
4197         *total = 0;
4198         *entries = 0;
4199
4200         for_each_tracing_cpu(cpu) {
4201                 get_total_entries_cpu(buf, &t, &e, cpu);
4202                 *total += t;
4203                 *entries += e;
4204         }
4205 }
4206
4207 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4208 {
4209         unsigned long total, entries;
4210
4211         if (!tr)
4212                 tr = &global_trace;
4213
4214         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4215
4216         return entries;
4217 }
4218
4219 unsigned long trace_total_entries(struct trace_array *tr)
4220 {
4221         unsigned long total, entries;
4222
4223         if (!tr)
4224                 tr = &global_trace;
4225
4226         get_total_entries(&tr->array_buffer, &total, &entries);
4227
4228         return entries;
4229 }
4230
4231 static void print_lat_help_header(struct seq_file *m)
4232 {
4233         seq_puts(m, "#                    _------=> CPU#            \n"
4234                     "#                   / _-----=> irqs-off/BH-disabled\n"
4235                     "#                  | / _----=> need-resched    \n"
4236                     "#                  || / _---=> hardirq/softirq \n"
4237                     "#                  ||| / _--=> preempt-depth   \n"
4238                     "#                  |||| / _-=> migrate-disable \n"
4239                     "#                  ||||| /     delay           \n"
4240                     "#  cmd     pid     |||||| time  |   caller     \n"
4241                     "#     \\   /        ||||||  \\    |    /       \n");
4242 }
4243
4244 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4245 {
4246         unsigned long total;
4247         unsigned long entries;
4248
4249         get_total_entries(buf, &total, &entries);
4250         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4251                    entries, total, num_online_cpus());
4252         seq_puts(m, "#\n");
4253 }
4254
4255 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4256                                    unsigned int flags)
4257 {
4258         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4259
4260         print_event_info(buf, m);
4261
4262         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4263         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4264 }
4265
4266 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4267                                        unsigned int flags)
4268 {
4269         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4270         static const char space[] = "            ";
4271         int prec = tgid ? 12 : 2;
4272
4273         print_event_info(buf, m);
4274
4275         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4276         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4277         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4278         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4279         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4280         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4281         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4282         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4283 }
4284
4285 void
4286 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4287 {
4288         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4289         struct array_buffer *buf = iter->array_buffer;
4290         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4291         struct tracer *type = iter->trace;
4292         unsigned long entries;
4293         unsigned long total;
4294         const char *name = type->name;
4295
4296         get_total_entries(buf, &total, &entries);
4297
4298         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4299                    name, UTS_RELEASE);
4300         seq_puts(m, "# -----------------------------------"
4301                  "---------------------------------\n");
4302         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4303                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4304                    nsecs_to_usecs(data->saved_latency),
4305                    entries,
4306                    total,
4307                    buf->cpu,
4308                    preempt_model_none()      ? "server" :
4309                    preempt_model_voluntary() ? "desktop" :
4310                    preempt_model_full()      ? "preempt" :
4311                    preempt_model_rt()        ? "preempt_rt" :
4312                    "unknown",
4313                    /* These are reserved for later use */
4314                    0, 0, 0, 0);
4315 #ifdef CONFIG_SMP
4316         seq_printf(m, " #P:%d)\n", num_online_cpus());
4317 #else
4318         seq_puts(m, ")\n");
4319 #endif
4320         seq_puts(m, "#    -----------------\n");
4321         seq_printf(m, "#    | task: %.16s-%d "
4322                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4323                    data->comm, data->pid,
4324                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4325                    data->policy, data->rt_priority);
4326         seq_puts(m, "#    -----------------\n");
4327
4328         if (data->critical_start) {
4329                 seq_puts(m, "#  => started at: ");
4330                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4331                 trace_print_seq(m, &iter->seq);
4332                 seq_puts(m, "\n#  => ended at:   ");
4333                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4334                 trace_print_seq(m, &iter->seq);
4335                 seq_puts(m, "\n#\n");
4336         }
4337
4338         seq_puts(m, "#\n");
4339 }
4340
4341 static void test_cpu_buff_start(struct trace_iterator *iter)
4342 {
4343         struct trace_seq *s = &iter->seq;
4344         struct trace_array *tr = iter->tr;
4345
4346         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4347                 return;
4348
4349         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4350                 return;
4351
4352         if (cpumask_available(iter->started) &&
4353             cpumask_test_cpu(iter->cpu, iter->started))
4354                 return;
4355
4356         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4357                 return;
4358
4359         if (cpumask_available(iter->started))
4360                 cpumask_set_cpu(iter->cpu, iter->started);
4361
4362         /* Don't print started cpu buffer for the first entry of the trace */
4363         if (iter->idx > 1)
4364                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4365                                 iter->cpu);
4366 }
4367
4368 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4369 {
4370         struct trace_array *tr = iter->tr;
4371         struct trace_seq *s = &iter->seq;
4372         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4373         struct trace_entry *entry;
4374         struct trace_event *event;
4375
4376         entry = iter->ent;
4377
4378         test_cpu_buff_start(iter);
4379
4380         event = ftrace_find_event(entry->type);
4381
4382         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4383                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4384                         trace_print_lat_context(iter);
4385                 else
4386                         trace_print_context(iter);
4387         }
4388
4389         if (trace_seq_has_overflowed(s))
4390                 return TRACE_TYPE_PARTIAL_LINE;
4391
4392         if (event)
4393                 return event->funcs->trace(iter, sym_flags, event);
4394
4395         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4396
4397         return trace_handle_return(s);
4398 }
4399
4400 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4401 {
4402         struct trace_array *tr = iter->tr;
4403         struct trace_seq *s = &iter->seq;
4404         struct trace_entry *entry;
4405         struct trace_event *event;
4406
4407         entry = iter->ent;
4408
4409         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4410                 trace_seq_printf(s, "%d %d %llu ",
4411                                  entry->pid, iter->cpu, iter->ts);
4412
4413         if (trace_seq_has_overflowed(s))
4414                 return TRACE_TYPE_PARTIAL_LINE;
4415
4416         event = ftrace_find_event(entry->type);
4417         if (event)
4418                 return event->funcs->raw(iter, 0, event);
4419
4420         trace_seq_printf(s, "%d ?\n", entry->type);
4421
4422         return trace_handle_return(s);
4423 }
4424
4425 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4426 {
4427         struct trace_array *tr = iter->tr;
4428         struct trace_seq *s = &iter->seq;
4429         unsigned char newline = '\n';
4430         struct trace_entry *entry;
4431         struct trace_event *event;
4432
4433         entry = iter->ent;
4434
4435         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4436                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4437                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4438                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4439                 if (trace_seq_has_overflowed(s))
4440                         return TRACE_TYPE_PARTIAL_LINE;
4441         }
4442
4443         event = ftrace_find_event(entry->type);
4444         if (event) {
4445                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4446                 if (ret != TRACE_TYPE_HANDLED)
4447                         return ret;
4448         }
4449
4450         SEQ_PUT_FIELD(s, newline);
4451
4452         return trace_handle_return(s);
4453 }
4454
4455 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4456 {
4457         struct trace_array *tr = iter->tr;
4458         struct trace_seq *s = &iter->seq;
4459         struct trace_entry *entry;
4460         struct trace_event *event;
4461
4462         entry = iter->ent;
4463
4464         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4465                 SEQ_PUT_FIELD(s, entry->pid);
4466                 SEQ_PUT_FIELD(s, iter->cpu);
4467                 SEQ_PUT_FIELD(s, iter->ts);
4468                 if (trace_seq_has_overflowed(s))
4469                         return TRACE_TYPE_PARTIAL_LINE;
4470         }
4471
4472         event = ftrace_find_event(entry->type);
4473         return event ? event->funcs->binary(iter, 0, event) :
4474                 TRACE_TYPE_HANDLED;
4475 }
4476
4477 int trace_empty(struct trace_iterator *iter)
4478 {
4479         struct ring_buffer_iter *buf_iter;
4480         int cpu;
4481
4482         /* If we are looking at one CPU buffer, only check that one */
4483         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4484                 cpu = iter->cpu_file;
4485                 buf_iter = trace_buffer_iter(iter, cpu);
4486                 if (buf_iter) {
4487                         if (!ring_buffer_iter_empty(buf_iter))
4488                                 return 0;
4489                 } else {
4490                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4491                                 return 0;
4492                 }
4493                 return 1;
4494         }
4495
4496         for_each_tracing_cpu(cpu) {
4497                 buf_iter = trace_buffer_iter(iter, cpu);
4498                 if (buf_iter) {
4499                         if (!ring_buffer_iter_empty(buf_iter))
4500                                 return 0;
4501                 } else {
4502                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4503                                 return 0;
4504                 }
4505         }
4506
4507         return 1;
4508 }
4509
4510 /*  Called with trace_event_read_lock() held. */
4511 enum print_line_t print_trace_line(struct trace_iterator *iter)
4512 {
4513         struct trace_array *tr = iter->tr;
4514         unsigned long trace_flags = tr->trace_flags;
4515         enum print_line_t ret;
4516
4517         if (iter->lost_events) {
4518                 if (iter->lost_events == (unsigned long)-1)
4519                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4520                                          iter->cpu);
4521                 else
4522                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4523                                          iter->cpu, iter->lost_events);
4524                 if (trace_seq_has_overflowed(&iter->seq))
4525                         return TRACE_TYPE_PARTIAL_LINE;
4526         }
4527
4528         if (iter->trace && iter->trace->print_line) {
4529                 ret = iter->trace->print_line(iter);
4530                 if (ret != TRACE_TYPE_UNHANDLED)
4531                         return ret;
4532         }
4533
4534         if (iter->ent->type == TRACE_BPUTS &&
4535                         trace_flags & TRACE_ITER_PRINTK &&
4536                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4537                 return trace_print_bputs_msg_only(iter);
4538
4539         if (iter->ent->type == TRACE_BPRINT &&
4540                         trace_flags & TRACE_ITER_PRINTK &&
4541                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4542                 return trace_print_bprintk_msg_only(iter);
4543
4544         if (iter->ent->type == TRACE_PRINT &&
4545                         trace_flags & TRACE_ITER_PRINTK &&
4546                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4547                 return trace_print_printk_msg_only(iter);
4548
4549         if (trace_flags & TRACE_ITER_BIN)
4550                 return print_bin_fmt(iter);
4551
4552         if (trace_flags & TRACE_ITER_HEX)
4553                 return print_hex_fmt(iter);
4554
4555         if (trace_flags & TRACE_ITER_RAW)
4556                 return print_raw_fmt(iter);
4557
4558         return print_trace_fmt(iter);
4559 }
4560
4561 void trace_latency_header(struct seq_file *m)
4562 {
4563         struct trace_iterator *iter = m->private;
4564         struct trace_array *tr = iter->tr;
4565
4566         /* print nothing if the buffers are empty */
4567         if (trace_empty(iter))
4568                 return;
4569
4570         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4571                 print_trace_header(m, iter);
4572
4573         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4574                 print_lat_help_header(m);
4575 }
4576
4577 void trace_default_header(struct seq_file *m)
4578 {
4579         struct trace_iterator *iter = m->private;
4580         struct trace_array *tr = iter->tr;
4581         unsigned long trace_flags = tr->trace_flags;
4582
4583         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4584                 return;
4585
4586         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4587                 /* print nothing if the buffers are empty */
4588                 if (trace_empty(iter))
4589                         return;
4590                 print_trace_header(m, iter);
4591                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4592                         print_lat_help_header(m);
4593         } else {
4594                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4595                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4596                                 print_func_help_header_irq(iter->array_buffer,
4597                                                            m, trace_flags);
4598                         else
4599                                 print_func_help_header(iter->array_buffer, m,
4600                                                        trace_flags);
4601                 }
4602         }
4603 }
4604
4605 static void test_ftrace_alive(struct seq_file *m)
4606 {
4607         if (!ftrace_is_dead())
4608                 return;
4609         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4610                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4611 }
4612
4613 #ifdef CONFIG_TRACER_MAX_TRACE
4614 static void show_snapshot_main_help(struct seq_file *m)
4615 {
4616         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4617                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4618                     "#                      Takes a snapshot of the main buffer.\n"
4619                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4620                     "#                      (Doesn't have to be '2' works with any number that\n"
4621                     "#                       is not a '0' or '1')\n");
4622 }
4623
4624 static void show_snapshot_percpu_help(struct seq_file *m)
4625 {
4626         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4627 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4628         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4629                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4630 #else
4631         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4632                     "#                     Must use main snapshot file to allocate.\n");
4633 #endif
4634         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4635                     "#                      (Doesn't have to be '2' works with any number that\n"
4636                     "#                       is not a '0' or '1')\n");
4637 }
4638
4639 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4640 {
4641         if (iter->tr->allocated_snapshot)
4642                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4643         else
4644                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4645
4646         seq_puts(m, "# Snapshot commands:\n");
4647         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4648                 show_snapshot_main_help(m);
4649         else
4650                 show_snapshot_percpu_help(m);
4651 }
4652 #else
4653 /* Should never be called */
4654 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4655 #endif
4656
4657 static int s_show(struct seq_file *m, void *v)
4658 {
4659         struct trace_iterator *iter = v;
4660         int ret;
4661
4662         if (iter->ent == NULL) {
4663                 if (iter->tr) {
4664                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4665                         seq_puts(m, "#\n");
4666                         test_ftrace_alive(m);
4667                 }
4668                 if (iter->snapshot && trace_empty(iter))
4669                         print_snapshot_help(m, iter);
4670                 else if (iter->trace && iter->trace->print_header)
4671                         iter->trace->print_header(m);
4672                 else
4673                         trace_default_header(m);
4674
4675         } else if (iter->leftover) {
4676                 /*
4677                  * If we filled the seq_file buffer earlier, we
4678                  * want to just show it now.
4679                  */
4680                 ret = trace_print_seq(m, &iter->seq);
4681
4682                 /* ret should this time be zero, but you never know */
4683                 iter->leftover = ret;
4684
4685         } else {
4686                 print_trace_line(iter);
4687                 ret = trace_print_seq(m, &iter->seq);
4688                 /*
4689                  * If we overflow the seq_file buffer, then it will
4690                  * ask us for this data again at start up.
4691                  * Use that instead.
4692                  *  ret is 0 if seq_file write succeeded.
4693                  *        -1 otherwise.
4694                  */
4695                 iter->leftover = ret;
4696         }
4697
4698         return 0;
4699 }
4700
4701 /*
4702  * Should be used after trace_array_get(), trace_types_lock
4703  * ensures that i_cdev was already initialized.
4704  */
4705 static inline int tracing_get_cpu(struct inode *inode)
4706 {
4707         if (inode->i_cdev) /* See trace_create_cpu_file() */
4708                 return (long)inode->i_cdev - 1;
4709         return RING_BUFFER_ALL_CPUS;
4710 }
4711
4712 static const struct seq_operations tracer_seq_ops = {
4713         .start          = s_start,
4714         .next           = s_next,
4715         .stop           = s_stop,
4716         .show           = s_show,
4717 };
4718
4719 static struct trace_iterator *
4720 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4721 {
4722         struct trace_array *tr = inode->i_private;
4723         struct trace_iterator *iter;
4724         int cpu;
4725
4726         if (tracing_disabled)
4727                 return ERR_PTR(-ENODEV);
4728
4729         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4730         if (!iter)
4731                 return ERR_PTR(-ENOMEM);
4732
4733         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4734                                     GFP_KERNEL);
4735         if (!iter->buffer_iter)
4736                 goto release;
4737
4738         /*
4739          * trace_find_next_entry() may need to save off iter->ent.
4740          * It will place it into the iter->temp buffer. As most
4741          * events are less than 128, allocate a buffer of that size.
4742          * If one is greater, then trace_find_next_entry() will
4743          * allocate a new buffer to adjust for the bigger iter->ent.
4744          * It's not critical if it fails to get allocated here.
4745          */
4746         iter->temp = kmalloc(128, GFP_KERNEL);
4747         if (iter->temp)
4748                 iter->temp_size = 128;
4749
4750         /*
4751          * trace_event_printf() may need to modify given format
4752          * string to replace %p with %px so that it shows real address
4753          * instead of hash value. However, that is only for the event
4754          * tracing, other tracer may not need. Defer the allocation
4755          * until it is needed.
4756          */
4757         iter->fmt = NULL;
4758         iter->fmt_size = 0;
4759
4760         /*
4761          * We make a copy of the current tracer to avoid concurrent
4762          * changes on it while we are reading.
4763          */
4764         mutex_lock(&trace_types_lock);
4765         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4766         if (!iter->trace)
4767                 goto fail;
4768
4769         *iter->trace = *tr->current_trace;
4770
4771         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4772                 goto fail;
4773
4774         iter->tr = tr;
4775
4776 #ifdef CONFIG_TRACER_MAX_TRACE
4777         /* Currently only the top directory has a snapshot */
4778         if (tr->current_trace->print_max || snapshot)
4779                 iter->array_buffer = &tr->max_buffer;
4780         else
4781 #endif
4782                 iter->array_buffer = &tr->array_buffer;
4783         iter->snapshot = snapshot;
4784         iter->pos = -1;
4785         iter->cpu_file = tracing_get_cpu(inode);
4786         mutex_init(&iter->mutex);
4787
4788         /* Notify the tracer early; before we stop tracing. */
4789         if (iter->trace->open)
4790                 iter->trace->open(iter);
4791
4792         /* Annotate start of buffers if we had overruns */
4793         if (ring_buffer_overruns(iter->array_buffer->buffer))
4794                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4795
4796         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4797         if (trace_clocks[tr->clock_id].in_ns)
4798                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4799
4800         /*
4801          * If pause-on-trace is enabled, then stop the trace while
4802          * dumping, unless this is the "snapshot" file
4803          */
4804         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4805                 tracing_stop_tr(tr);
4806
4807         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4808                 for_each_tracing_cpu(cpu) {
4809                         iter->buffer_iter[cpu] =
4810                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4811                                                          cpu, GFP_KERNEL);
4812                 }
4813                 ring_buffer_read_prepare_sync();
4814                 for_each_tracing_cpu(cpu) {
4815                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4816                         tracing_iter_reset(iter, cpu);
4817                 }
4818         } else {
4819                 cpu = iter->cpu_file;
4820                 iter->buffer_iter[cpu] =
4821                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4822                                                  cpu, GFP_KERNEL);
4823                 ring_buffer_read_prepare_sync();
4824                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4825                 tracing_iter_reset(iter, cpu);
4826         }
4827
4828         mutex_unlock(&trace_types_lock);
4829
4830         return iter;
4831
4832  fail:
4833         mutex_unlock(&trace_types_lock);
4834         kfree(iter->trace);
4835         kfree(iter->temp);
4836         kfree(iter->buffer_iter);
4837 release:
4838         seq_release_private(inode, file);
4839         return ERR_PTR(-ENOMEM);
4840 }
4841
4842 int tracing_open_generic(struct inode *inode, struct file *filp)
4843 {
4844         int ret;
4845
4846         ret = tracing_check_open_get_tr(NULL);
4847         if (ret)
4848                 return ret;
4849
4850         filp->private_data = inode->i_private;
4851         return 0;
4852 }
4853
4854 bool tracing_is_disabled(void)
4855 {
4856         return (tracing_disabled) ? true: false;
4857 }
4858
4859 /*
4860  * Open and update trace_array ref count.
4861  * Must have the current trace_array passed to it.
4862  */
4863 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4864 {
4865         struct trace_array *tr = inode->i_private;
4866         int ret;
4867
4868         ret = tracing_check_open_get_tr(tr);
4869         if (ret)
4870                 return ret;
4871
4872         filp->private_data = inode->i_private;
4873
4874         return 0;
4875 }
4876
4877 static int tracing_mark_open(struct inode *inode, struct file *filp)
4878 {
4879         stream_open(inode, filp);
4880         return tracing_open_generic_tr(inode, filp);
4881 }
4882
4883 static int tracing_release(struct inode *inode, struct file *file)
4884 {
4885         struct trace_array *tr = inode->i_private;
4886         struct seq_file *m = file->private_data;
4887         struct trace_iterator *iter;
4888         int cpu;
4889
4890         if (!(file->f_mode & FMODE_READ)) {
4891                 trace_array_put(tr);
4892                 return 0;
4893         }
4894
4895         /* Writes do not use seq_file */
4896         iter = m->private;
4897         mutex_lock(&trace_types_lock);
4898
4899         for_each_tracing_cpu(cpu) {
4900                 if (iter->buffer_iter[cpu])
4901                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4902         }
4903
4904         if (iter->trace && iter->trace->close)
4905                 iter->trace->close(iter);
4906
4907         if (!iter->snapshot && tr->stop_count)
4908                 /* reenable tracing if it was previously enabled */
4909                 tracing_start_tr(tr);
4910
4911         __trace_array_put(tr);
4912
4913         mutex_unlock(&trace_types_lock);
4914
4915         mutex_destroy(&iter->mutex);
4916         free_cpumask_var(iter->started);
4917         kfree(iter->fmt);
4918         kfree(iter->temp);
4919         kfree(iter->trace);
4920         kfree(iter->buffer_iter);
4921         seq_release_private(inode, file);
4922
4923         return 0;
4924 }
4925
4926 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4927 {
4928         struct trace_array *tr = inode->i_private;
4929
4930         trace_array_put(tr);
4931         return 0;
4932 }
4933
4934 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4935 {
4936         struct trace_array *tr = inode->i_private;
4937
4938         trace_array_put(tr);
4939
4940         return single_release(inode, file);
4941 }
4942
4943 static int tracing_open(struct inode *inode, struct file *file)
4944 {
4945         struct trace_array *tr = inode->i_private;
4946         struct trace_iterator *iter;
4947         int ret;
4948
4949         ret = tracing_check_open_get_tr(tr);
4950         if (ret)
4951                 return ret;
4952
4953         /* If this file was open for write, then erase contents */
4954         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4955                 int cpu = tracing_get_cpu(inode);
4956                 struct array_buffer *trace_buf = &tr->array_buffer;
4957
4958 #ifdef CONFIG_TRACER_MAX_TRACE
4959                 if (tr->current_trace->print_max)
4960                         trace_buf = &tr->max_buffer;
4961 #endif
4962
4963                 if (cpu == RING_BUFFER_ALL_CPUS)
4964                         tracing_reset_online_cpus(trace_buf);
4965                 else
4966                         tracing_reset_cpu(trace_buf, cpu);
4967         }
4968
4969         if (file->f_mode & FMODE_READ) {
4970                 iter = __tracing_open(inode, file, false);
4971                 if (IS_ERR(iter))
4972                         ret = PTR_ERR(iter);
4973                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4974                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4975         }
4976
4977         if (ret < 0)
4978                 trace_array_put(tr);
4979
4980         return ret;
4981 }
4982
4983 /*
4984  * Some tracers are not suitable for instance buffers.
4985  * A tracer is always available for the global array (toplevel)
4986  * or if it explicitly states that it is.
4987  */
4988 static bool
4989 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4990 {
4991         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4992 }
4993
4994 /* Find the next tracer that this trace array may use */
4995 static struct tracer *
4996 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4997 {
4998         while (t && !trace_ok_for_array(t, tr))
4999                 t = t->next;
5000
5001         return t;
5002 }
5003
5004 static void *
5005 t_next(struct seq_file *m, void *v, loff_t *pos)
5006 {
5007         struct trace_array *tr = m->private;
5008         struct tracer *t = v;
5009
5010         (*pos)++;
5011
5012         if (t)
5013                 t = get_tracer_for_array(tr, t->next);
5014
5015         return t;
5016 }
5017
5018 static void *t_start(struct seq_file *m, loff_t *pos)
5019 {
5020         struct trace_array *tr = m->private;
5021         struct tracer *t;
5022         loff_t l = 0;
5023
5024         mutex_lock(&trace_types_lock);
5025
5026         t = get_tracer_for_array(tr, trace_types);
5027         for (; t && l < *pos; t = t_next(m, t, &l))
5028                         ;
5029
5030         return t;
5031 }
5032
5033 static void t_stop(struct seq_file *m, void *p)
5034 {
5035         mutex_unlock(&trace_types_lock);
5036 }
5037
5038 static int t_show(struct seq_file *m, void *v)
5039 {
5040         struct tracer *t = v;
5041
5042         if (!t)
5043                 return 0;
5044
5045         seq_puts(m, t->name);
5046         if (t->next)
5047                 seq_putc(m, ' ');
5048         else
5049                 seq_putc(m, '\n');
5050
5051         return 0;
5052 }
5053
5054 static const struct seq_operations show_traces_seq_ops = {
5055         .start          = t_start,
5056         .next           = t_next,
5057         .stop           = t_stop,
5058         .show           = t_show,
5059 };
5060
5061 static int show_traces_open(struct inode *inode, struct file *file)
5062 {
5063         struct trace_array *tr = inode->i_private;
5064         struct seq_file *m;
5065         int ret;
5066
5067         ret = tracing_check_open_get_tr(tr);
5068         if (ret)
5069                 return ret;
5070
5071         ret = seq_open(file, &show_traces_seq_ops);
5072         if (ret) {
5073                 trace_array_put(tr);
5074                 return ret;
5075         }
5076
5077         m = file->private_data;
5078         m->private = tr;
5079
5080         return 0;
5081 }
5082
5083 static int show_traces_release(struct inode *inode, struct file *file)
5084 {
5085         struct trace_array *tr = inode->i_private;
5086
5087         trace_array_put(tr);
5088         return seq_release(inode, file);
5089 }
5090
5091 static ssize_t
5092 tracing_write_stub(struct file *filp, const char __user *ubuf,
5093                    size_t count, loff_t *ppos)
5094 {
5095         return count;
5096 }
5097
5098 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5099 {
5100         int ret;
5101
5102         if (file->f_mode & FMODE_READ)
5103                 ret = seq_lseek(file, offset, whence);
5104         else
5105                 file->f_pos = ret = 0;
5106
5107         return ret;
5108 }
5109
5110 static const struct file_operations tracing_fops = {
5111         .open           = tracing_open,
5112         .read           = seq_read,
5113         .write          = tracing_write_stub,
5114         .llseek         = tracing_lseek,
5115         .release        = tracing_release,
5116 };
5117
5118 static const struct file_operations show_traces_fops = {
5119         .open           = show_traces_open,
5120         .read           = seq_read,
5121         .llseek         = seq_lseek,
5122         .release        = show_traces_release,
5123 };
5124
5125 static ssize_t
5126 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5127                      size_t count, loff_t *ppos)
5128 {
5129         struct trace_array *tr = file_inode(filp)->i_private;
5130         char *mask_str;
5131         int len;
5132
5133         len = snprintf(NULL, 0, "%*pb\n",
5134                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5135         mask_str = kmalloc(len, GFP_KERNEL);
5136         if (!mask_str)
5137                 return -ENOMEM;
5138
5139         len = snprintf(mask_str, len, "%*pb\n",
5140                        cpumask_pr_args(tr->tracing_cpumask));
5141         if (len >= count) {
5142                 count = -EINVAL;
5143                 goto out_err;
5144         }
5145         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5146
5147 out_err:
5148         kfree(mask_str);
5149
5150         return count;
5151 }
5152
5153 int tracing_set_cpumask(struct trace_array *tr,
5154                         cpumask_var_t tracing_cpumask_new)
5155 {
5156         int cpu;
5157
5158         if (!tr)
5159                 return -EINVAL;
5160
5161         local_irq_disable();
5162         arch_spin_lock(&tr->max_lock);
5163         for_each_tracing_cpu(cpu) {
5164                 /*
5165                  * Increase/decrease the disabled counter if we are
5166                  * about to flip a bit in the cpumask:
5167                  */
5168                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5169                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5170                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5171                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5172                 }
5173                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5174                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5175                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5176                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5177                 }
5178         }
5179         arch_spin_unlock(&tr->max_lock);
5180         local_irq_enable();
5181
5182         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5183
5184         return 0;
5185 }
5186
5187 static ssize_t
5188 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5189                       size_t count, loff_t *ppos)
5190 {
5191         struct trace_array *tr = file_inode(filp)->i_private;
5192         cpumask_var_t tracing_cpumask_new;
5193         int err;
5194
5195         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5196                 return -ENOMEM;
5197
5198         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5199         if (err)
5200                 goto err_free;
5201
5202         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5203         if (err)
5204                 goto err_free;
5205
5206         free_cpumask_var(tracing_cpumask_new);
5207
5208         return count;
5209
5210 err_free:
5211         free_cpumask_var(tracing_cpumask_new);
5212
5213         return err;
5214 }
5215
5216 static const struct file_operations tracing_cpumask_fops = {
5217         .open           = tracing_open_generic_tr,
5218         .read           = tracing_cpumask_read,
5219         .write          = tracing_cpumask_write,
5220         .release        = tracing_release_generic_tr,
5221         .llseek         = generic_file_llseek,
5222 };
5223
5224 static int tracing_trace_options_show(struct seq_file *m, void *v)
5225 {
5226         struct tracer_opt *trace_opts;
5227         struct trace_array *tr = m->private;
5228         u32 tracer_flags;
5229         int i;
5230
5231         mutex_lock(&trace_types_lock);
5232         tracer_flags = tr->current_trace->flags->val;
5233         trace_opts = tr->current_trace->flags->opts;
5234
5235         for (i = 0; trace_options[i]; i++) {
5236                 if (tr->trace_flags & (1 << i))
5237                         seq_printf(m, "%s\n", trace_options[i]);
5238                 else
5239                         seq_printf(m, "no%s\n", trace_options[i]);
5240         }
5241
5242         for (i = 0; trace_opts[i].name; i++) {
5243                 if (tracer_flags & trace_opts[i].bit)
5244                         seq_printf(m, "%s\n", trace_opts[i].name);
5245                 else
5246                         seq_printf(m, "no%s\n", trace_opts[i].name);
5247         }
5248         mutex_unlock(&trace_types_lock);
5249
5250         return 0;
5251 }
5252
5253 static int __set_tracer_option(struct trace_array *tr,
5254                                struct tracer_flags *tracer_flags,
5255                                struct tracer_opt *opts, int neg)
5256 {
5257         struct tracer *trace = tracer_flags->trace;
5258         int ret;
5259
5260         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5261         if (ret)
5262                 return ret;
5263
5264         if (neg)
5265                 tracer_flags->val &= ~opts->bit;
5266         else
5267                 tracer_flags->val |= opts->bit;
5268         return 0;
5269 }
5270
5271 /* Try to assign a tracer specific option */
5272 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5273 {
5274         struct tracer *trace = tr->current_trace;
5275         struct tracer_flags *tracer_flags = trace->flags;
5276         struct tracer_opt *opts = NULL;
5277         int i;
5278
5279         for (i = 0; tracer_flags->opts[i].name; i++) {
5280                 opts = &tracer_flags->opts[i];
5281
5282                 if (strcmp(cmp, opts->name) == 0)
5283                         return __set_tracer_option(tr, trace->flags, opts, neg);
5284         }
5285
5286         return -EINVAL;
5287 }
5288
5289 /* Some tracers require overwrite to stay enabled */
5290 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5291 {
5292         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5293                 return -1;
5294
5295         return 0;
5296 }
5297
5298 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5299 {
5300         int *map;
5301
5302         if ((mask == TRACE_ITER_RECORD_TGID) ||
5303             (mask == TRACE_ITER_RECORD_CMD))
5304                 lockdep_assert_held(&event_mutex);
5305
5306         /* do nothing if flag is already set */
5307         if (!!(tr->trace_flags & mask) == !!enabled)
5308                 return 0;
5309
5310         /* Give the tracer a chance to approve the change */
5311         if (tr->current_trace->flag_changed)
5312                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5313                         return -EINVAL;
5314
5315         if (enabled)
5316                 tr->trace_flags |= mask;
5317         else
5318                 tr->trace_flags &= ~mask;
5319
5320         if (mask == TRACE_ITER_RECORD_CMD)
5321                 trace_event_enable_cmd_record(enabled);
5322
5323         if (mask == TRACE_ITER_RECORD_TGID) {
5324                 if (!tgid_map) {
5325                         tgid_map_max = pid_max;
5326                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5327                                        GFP_KERNEL);
5328
5329                         /*
5330                          * Pairs with smp_load_acquire() in
5331                          * trace_find_tgid_ptr() to ensure that if it observes
5332                          * the tgid_map we just allocated then it also observes
5333                          * the corresponding tgid_map_max value.
5334                          */
5335                         smp_store_release(&tgid_map, map);
5336                 }
5337                 if (!tgid_map) {
5338                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5339                         return -ENOMEM;
5340                 }
5341
5342                 trace_event_enable_tgid_record(enabled);
5343         }
5344
5345         if (mask == TRACE_ITER_EVENT_FORK)
5346                 trace_event_follow_fork(tr, enabled);
5347
5348         if (mask == TRACE_ITER_FUNC_FORK)
5349                 ftrace_pid_follow_fork(tr, enabled);
5350
5351         if (mask == TRACE_ITER_OVERWRITE) {
5352                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5353 #ifdef CONFIG_TRACER_MAX_TRACE
5354                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5355 #endif
5356         }
5357
5358         if (mask == TRACE_ITER_PRINTK) {
5359                 trace_printk_start_stop_comm(enabled);
5360                 trace_printk_control(enabled);
5361         }
5362
5363         return 0;
5364 }
5365
5366 int trace_set_options(struct trace_array *tr, char *option)
5367 {
5368         char *cmp;
5369         int neg = 0;
5370         int ret;
5371         size_t orig_len = strlen(option);
5372         int len;
5373
5374         cmp = strstrip(option);
5375
5376         len = str_has_prefix(cmp, "no");
5377         if (len)
5378                 neg = 1;
5379
5380         cmp += len;
5381
5382         mutex_lock(&event_mutex);
5383         mutex_lock(&trace_types_lock);
5384
5385         ret = match_string(trace_options, -1, cmp);
5386         /* If no option could be set, test the specific tracer options */
5387         if (ret < 0)
5388                 ret = set_tracer_option(tr, cmp, neg);
5389         else
5390                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5391
5392         mutex_unlock(&trace_types_lock);
5393         mutex_unlock(&event_mutex);
5394
5395         /*
5396          * If the first trailing whitespace is replaced with '\0' by strstrip,
5397          * turn it back into a space.
5398          */
5399         if (orig_len > strlen(option))
5400                 option[strlen(option)] = ' ';
5401
5402         return ret;
5403 }
5404
5405 static void __init apply_trace_boot_options(void)
5406 {
5407         char *buf = trace_boot_options_buf;
5408         char *option;
5409
5410         while (true) {
5411                 option = strsep(&buf, ",");
5412
5413                 if (!option)
5414                         break;
5415
5416                 if (*option)
5417                         trace_set_options(&global_trace, option);
5418
5419                 /* Put back the comma to allow this to be called again */
5420                 if (buf)
5421                         *(buf - 1) = ',';
5422         }
5423 }
5424
5425 static ssize_t
5426 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5427                         size_t cnt, loff_t *ppos)
5428 {
5429         struct seq_file *m = filp->private_data;
5430         struct trace_array *tr = m->private;
5431         char buf[64];
5432         int ret;
5433
5434         if (cnt >= sizeof(buf))
5435                 return -EINVAL;
5436
5437         if (copy_from_user(buf, ubuf, cnt))
5438                 return -EFAULT;
5439
5440         buf[cnt] = 0;
5441
5442         ret = trace_set_options(tr, buf);
5443         if (ret < 0)
5444                 return ret;
5445
5446         *ppos += cnt;
5447
5448         return cnt;
5449 }
5450
5451 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5452 {
5453         struct trace_array *tr = inode->i_private;
5454         int ret;
5455
5456         ret = tracing_check_open_get_tr(tr);
5457         if (ret)
5458                 return ret;
5459
5460         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5461         if (ret < 0)
5462                 trace_array_put(tr);
5463
5464         return ret;
5465 }
5466
5467 static const struct file_operations tracing_iter_fops = {
5468         .open           = tracing_trace_options_open,
5469         .read           = seq_read,
5470         .llseek         = seq_lseek,
5471         .release        = tracing_single_release_tr,
5472         .write          = tracing_trace_options_write,
5473 };
5474
5475 static const char readme_msg[] =
5476         "tracing mini-HOWTO:\n\n"
5477         "# echo 0 > tracing_on : quick way to disable tracing\n"
5478         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5479         " Important files:\n"
5480         "  trace\t\t\t- The static contents of the buffer\n"
5481         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5482         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5483         "  current_tracer\t- function and latency tracers\n"
5484         "  available_tracers\t- list of configured tracers for current_tracer\n"
5485         "  error_log\t- error log for failed commands (that support it)\n"
5486         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5487         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5488         "  trace_clock\t\t- change the clock used to order events\n"
5489         "       local:   Per cpu clock but may not be synced across CPUs\n"
5490         "      global:   Synced across CPUs but slows tracing down.\n"
5491         "     counter:   Not a clock, but just an increment\n"
5492         "      uptime:   Jiffy counter from time of boot\n"
5493         "        perf:   Same clock that perf events use\n"
5494 #ifdef CONFIG_X86_64
5495         "     x86-tsc:   TSC cycle counter\n"
5496 #endif
5497         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5498         "       delta:   Delta difference against a buffer-wide timestamp\n"
5499         "    absolute:   Absolute (standalone) timestamp\n"
5500         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5501         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5502         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5503         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5504         "\t\t\t  Remove sub-buffer with rmdir\n"
5505         "  trace_options\t\t- Set format or modify how tracing happens\n"
5506         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5507         "\t\t\t  option name\n"
5508         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5509 #ifdef CONFIG_DYNAMIC_FTRACE
5510         "\n  available_filter_functions - list of functions that can be filtered on\n"
5511         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5512         "\t\t\t  functions\n"
5513         "\t     accepts: func_full_name or glob-matching-pattern\n"
5514         "\t     modules: Can select a group via module\n"
5515         "\t      Format: :mod:<module-name>\n"
5516         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5517         "\t    triggers: a command to perform when function is hit\n"
5518         "\t      Format: <function>:<trigger>[:count]\n"
5519         "\t     trigger: traceon, traceoff\n"
5520         "\t\t      enable_event:<system>:<event>\n"
5521         "\t\t      disable_event:<system>:<event>\n"
5522 #ifdef CONFIG_STACKTRACE
5523         "\t\t      stacktrace\n"
5524 #endif
5525 #ifdef CONFIG_TRACER_SNAPSHOT
5526         "\t\t      snapshot\n"
5527 #endif
5528         "\t\t      dump\n"
5529         "\t\t      cpudump\n"
5530         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5531         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5532         "\t     The first one will disable tracing every time do_fault is hit\n"
5533         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5534         "\t       The first time do trap is hit and it disables tracing, the\n"
5535         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5536         "\t       the counter will not decrement. It only decrements when the\n"
5537         "\t       trigger did work\n"
5538         "\t     To remove trigger without count:\n"
5539         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5540         "\t     To remove trigger with a count:\n"
5541         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5542         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5543         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5544         "\t    modules: Can select a group via module command :mod:\n"
5545         "\t    Does not accept triggers\n"
5546 #endif /* CONFIG_DYNAMIC_FTRACE */
5547 #ifdef CONFIG_FUNCTION_TRACER
5548         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5549         "\t\t    (function)\n"
5550         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5551         "\t\t    (function)\n"
5552 #endif
5553 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5554         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5555         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5556         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5557 #endif
5558 #ifdef CONFIG_TRACER_SNAPSHOT
5559         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5560         "\t\t\t  snapshot buffer. Read the contents for more\n"
5561         "\t\t\t  information\n"
5562 #endif
5563 #ifdef CONFIG_STACK_TRACER
5564         "  stack_trace\t\t- Shows the max stack trace when active\n"
5565         "  stack_max_size\t- Shows current max stack size that was traced\n"
5566         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5567         "\t\t\t  new trace)\n"
5568 #ifdef CONFIG_DYNAMIC_FTRACE
5569         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5570         "\t\t\t  traces\n"
5571 #endif
5572 #endif /* CONFIG_STACK_TRACER */
5573 #ifdef CONFIG_DYNAMIC_EVENTS
5574         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5575         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5576 #endif
5577 #ifdef CONFIG_KPROBE_EVENTS
5578         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5579         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5580 #endif
5581 #ifdef CONFIG_UPROBE_EVENTS
5582         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5583         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5584 #endif
5585 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5586         "\t  accepts: event-definitions (one definition per line)\n"
5587         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5588         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5589 #ifdef CONFIG_HIST_TRIGGERS
5590         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5591 #endif
5592         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]\n"
5593         "\t           -:[<group>/][<event>]\n"
5594 #ifdef CONFIG_KPROBE_EVENTS
5595         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5596   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5597 #endif
5598 #ifdef CONFIG_UPROBE_EVENTS
5599   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5600 #endif
5601         "\t     args: <name>=fetcharg[:type]\n"
5602         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5603 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5604         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5605 #else
5606         "\t           $stack<index>, $stack, $retval, $comm,\n"
5607 #endif
5608         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5609         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5610         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5611         "\t           <type>\\[<array-size>\\]\n"
5612 #ifdef CONFIG_HIST_TRIGGERS
5613         "\t    field: <stype> <name>;\n"
5614         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5615         "\t           [unsigned] char/int/long\n"
5616 #endif
5617         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5618         "\t            of the <attached-group>/<attached-event>.\n"
5619 #endif
5620         "  events/\t\t- Directory containing all trace event subsystems:\n"
5621         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5622         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5623         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5624         "\t\t\t  events\n"
5625         "      filter\t\t- If set, only events passing filter are traced\n"
5626         "  events/<system>/<event>/\t- Directory containing control files for\n"
5627         "\t\t\t  <event>:\n"
5628         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5629         "      filter\t\t- If set, only events passing filter are traced\n"
5630         "      trigger\t\t- If set, a command to perform when event is hit\n"
5631         "\t    Format: <trigger>[:count][if <filter>]\n"
5632         "\t   trigger: traceon, traceoff\n"
5633         "\t            enable_event:<system>:<event>\n"
5634         "\t            disable_event:<system>:<event>\n"
5635 #ifdef CONFIG_HIST_TRIGGERS
5636         "\t            enable_hist:<system>:<event>\n"
5637         "\t            disable_hist:<system>:<event>\n"
5638 #endif
5639 #ifdef CONFIG_STACKTRACE
5640         "\t\t    stacktrace\n"
5641 #endif
5642 #ifdef CONFIG_TRACER_SNAPSHOT
5643         "\t\t    snapshot\n"
5644 #endif
5645 #ifdef CONFIG_HIST_TRIGGERS
5646         "\t\t    hist (see below)\n"
5647 #endif
5648         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5649         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5650         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5651         "\t                  events/block/block_unplug/trigger\n"
5652         "\t   The first disables tracing every time block_unplug is hit.\n"
5653         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5654         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5655         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5656         "\t   Like function triggers, the counter is only decremented if it\n"
5657         "\t    enabled or disabled tracing.\n"
5658         "\t   To remove a trigger without a count:\n"
5659         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5660         "\t   To remove a trigger with a count:\n"
5661         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5662         "\t   Filters can be ignored when removing a trigger.\n"
5663 #ifdef CONFIG_HIST_TRIGGERS
5664         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5665         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5666         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5667         "\t            [:values=<field1[,field2,...]>]\n"
5668         "\t            [:sort=<field1[,field2,...]>]\n"
5669         "\t            [:size=#entries]\n"
5670         "\t            [:pause][:continue][:clear]\n"
5671         "\t            [:name=histname1]\n"
5672         "\t            [:<handler>.<action>]\n"
5673         "\t            [if <filter>]\n\n"
5674         "\t    Note, special fields can be used as well:\n"
5675         "\t            common_timestamp - to record current timestamp\n"
5676         "\t            common_cpu - to record the CPU the event happened on\n"
5677         "\n"
5678         "\t    A hist trigger variable can be:\n"
5679         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5680         "\t        - a reference to another variable e.g. y=$x,\n"
5681         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5682         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5683         "\n"
5684         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5685         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5686         "\t    variable reference, field or numeric literal.\n"
5687         "\n"
5688         "\t    When a matching event is hit, an entry is added to a hash\n"
5689         "\t    table using the key(s) and value(s) named, and the value of a\n"
5690         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5691         "\t    correspond to fields in the event's format description.  Keys\n"
5692         "\t    can be any field, or the special string 'stacktrace'.\n"
5693         "\t    Compound keys consisting of up to two fields can be specified\n"
5694         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5695         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5696         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5697         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5698         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5699         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5700         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5701         "\t    its histogram data will be shared with other triggers of the\n"
5702         "\t    same name, and trigger hits will update this common data.\n\n"
5703         "\t    Reading the 'hist' file for the event will dump the hash\n"
5704         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5705         "\t    triggers attached to an event, there will be a table for each\n"
5706         "\t    trigger in the output.  The table displayed for a named\n"
5707         "\t    trigger will be the same as any other instance having the\n"
5708         "\t    same name.  The default format used to display a given field\n"
5709         "\t    can be modified by appending any of the following modifiers\n"
5710         "\t    to the field name, as applicable:\n\n"
5711         "\t            .hex        display a number as a hex value\n"
5712         "\t            .sym        display an address as a symbol\n"
5713         "\t            .sym-offset display an address as a symbol and offset\n"
5714         "\t            .execname   display a common_pid as a program name\n"
5715         "\t            .syscall    display a syscall id as a syscall name\n"
5716         "\t            .log2       display log2 value rather than raw number\n"
5717         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5718         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5719         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5720         "\t    trigger or to start a hist trigger but not log any events\n"
5721         "\t    until told to do so.  'continue' can be used to start or\n"
5722         "\t    restart a paused hist trigger.\n\n"
5723         "\t    The 'clear' parameter will clear the contents of a running\n"
5724         "\t    hist trigger and leave its current paused/active state\n"
5725         "\t    unchanged.\n\n"
5726         "\t    The enable_hist and disable_hist triggers can be used to\n"
5727         "\t    have one event conditionally start and stop another event's\n"
5728         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5729         "\t    the enable_event and disable_event triggers.\n\n"
5730         "\t    Hist trigger handlers and actions are executed whenever a\n"
5731         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5732         "\t        <handler>.<action>\n\n"
5733         "\t    The available handlers are:\n\n"
5734         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5735         "\t        onmax(var)               - invoke if var exceeds current max\n"
5736         "\t        onchange(var)            - invoke action if var changes\n\n"
5737         "\t    The available actions are:\n\n"
5738         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5739         "\t        save(field,...)                      - save current event fields\n"
5740 #ifdef CONFIG_TRACER_SNAPSHOT
5741         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5742 #endif
5743 #ifdef CONFIG_SYNTH_EVENTS
5744         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5745         "\t  Write into this file to define/undefine new synthetic events.\n"
5746         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5747 #endif
5748 #endif
5749 ;
5750
5751 static ssize_t
5752 tracing_readme_read(struct file *filp, char __user *ubuf,
5753                        size_t cnt, loff_t *ppos)
5754 {
5755         return simple_read_from_buffer(ubuf, cnt, ppos,
5756                                         readme_msg, strlen(readme_msg));
5757 }
5758
5759 static const struct file_operations tracing_readme_fops = {
5760         .open           = tracing_open_generic,
5761         .read           = tracing_readme_read,
5762         .llseek         = generic_file_llseek,
5763 };
5764
5765 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5766 {
5767         int pid = ++(*pos);
5768
5769         return trace_find_tgid_ptr(pid);
5770 }
5771
5772 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5773 {
5774         int pid = *pos;
5775
5776         return trace_find_tgid_ptr(pid);
5777 }
5778
5779 static void saved_tgids_stop(struct seq_file *m, void *v)
5780 {
5781 }
5782
5783 static int saved_tgids_show(struct seq_file *m, void *v)
5784 {
5785         int *entry = (int *)v;
5786         int pid = entry - tgid_map;
5787         int tgid = *entry;
5788
5789         if (tgid == 0)
5790                 return SEQ_SKIP;
5791
5792         seq_printf(m, "%d %d\n", pid, tgid);
5793         return 0;
5794 }
5795
5796 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5797         .start          = saved_tgids_start,
5798         .stop           = saved_tgids_stop,
5799         .next           = saved_tgids_next,
5800         .show           = saved_tgids_show,
5801 };
5802
5803 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5804 {
5805         int ret;
5806
5807         ret = tracing_check_open_get_tr(NULL);
5808         if (ret)
5809                 return ret;
5810
5811         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5812 }
5813
5814
5815 static const struct file_operations tracing_saved_tgids_fops = {
5816         .open           = tracing_saved_tgids_open,
5817         .read           = seq_read,
5818         .llseek         = seq_lseek,
5819         .release        = seq_release,
5820 };
5821
5822 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5823 {
5824         unsigned int *ptr = v;
5825
5826         if (*pos || m->count)
5827                 ptr++;
5828
5829         (*pos)++;
5830
5831         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5832              ptr++) {
5833                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5834                         continue;
5835
5836                 return ptr;
5837         }
5838
5839         return NULL;
5840 }
5841
5842 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5843 {
5844         void *v;
5845         loff_t l = 0;
5846
5847         preempt_disable();
5848         arch_spin_lock(&trace_cmdline_lock);
5849
5850         v = &savedcmd->map_cmdline_to_pid[0];
5851         while (l <= *pos) {
5852                 v = saved_cmdlines_next(m, v, &l);
5853                 if (!v)
5854                         return NULL;
5855         }
5856
5857         return v;
5858 }
5859
5860 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5861 {
5862         arch_spin_unlock(&trace_cmdline_lock);
5863         preempt_enable();
5864 }
5865
5866 static int saved_cmdlines_show(struct seq_file *m, void *v)
5867 {
5868         char buf[TASK_COMM_LEN];
5869         unsigned int *pid = v;
5870
5871         __trace_find_cmdline(*pid, buf);
5872         seq_printf(m, "%d %s\n", *pid, buf);
5873         return 0;
5874 }
5875
5876 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5877         .start          = saved_cmdlines_start,
5878         .next           = saved_cmdlines_next,
5879         .stop           = saved_cmdlines_stop,
5880         .show           = saved_cmdlines_show,
5881 };
5882
5883 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5884 {
5885         int ret;
5886
5887         ret = tracing_check_open_get_tr(NULL);
5888         if (ret)
5889                 return ret;
5890
5891         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5892 }
5893
5894 static const struct file_operations tracing_saved_cmdlines_fops = {
5895         .open           = tracing_saved_cmdlines_open,
5896         .read           = seq_read,
5897         .llseek         = seq_lseek,
5898         .release        = seq_release,
5899 };
5900
5901 static ssize_t
5902 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5903                                  size_t cnt, loff_t *ppos)
5904 {
5905         char buf[64];
5906         int r;
5907
5908         preempt_disable();
5909         arch_spin_lock(&trace_cmdline_lock);
5910         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5911         arch_spin_unlock(&trace_cmdline_lock);
5912         preempt_enable();
5913
5914         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5915 }
5916
5917 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5918 {
5919         kfree(s->saved_cmdlines);
5920         kfree(s->map_cmdline_to_pid);
5921         kfree(s);
5922 }
5923
5924 static int tracing_resize_saved_cmdlines(unsigned int val)
5925 {
5926         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5927
5928         s = kmalloc(sizeof(*s), GFP_KERNEL);
5929         if (!s)
5930                 return -ENOMEM;
5931
5932         if (allocate_cmdlines_buffer(val, s) < 0) {
5933                 kfree(s);
5934                 return -ENOMEM;
5935         }
5936
5937         preempt_disable();
5938         arch_spin_lock(&trace_cmdline_lock);
5939         savedcmd_temp = savedcmd;
5940         savedcmd = s;
5941         arch_spin_unlock(&trace_cmdline_lock);
5942         preempt_enable();
5943         free_saved_cmdlines_buffer(savedcmd_temp);
5944
5945         return 0;
5946 }
5947
5948 static ssize_t
5949 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5950                                   size_t cnt, loff_t *ppos)
5951 {
5952         unsigned long val;
5953         int ret;
5954
5955         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5956         if (ret)
5957                 return ret;
5958
5959         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5960         if (!val || val > PID_MAX_DEFAULT)
5961                 return -EINVAL;
5962
5963         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5964         if (ret < 0)
5965                 return ret;
5966
5967         *ppos += cnt;
5968
5969         return cnt;
5970 }
5971
5972 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5973         .open           = tracing_open_generic,
5974         .read           = tracing_saved_cmdlines_size_read,
5975         .write          = tracing_saved_cmdlines_size_write,
5976 };
5977
5978 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5979 static union trace_eval_map_item *
5980 update_eval_map(union trace_eval_map_item *ptr)
5981 {
5982         if (!ptr->map.eval_string) {
5983                 if (ptr->tail.next) {
5984                         ptr = ptr->tail.next;
5985                         /* Set ptr to the next real item (skip head) */
5986                         ptr++;
5987                 } else
5988                         return NULL;
5989         }
5990         return ptr;
5991 }
5992
5993 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5994 {
5995         union trace_eval_map_item *ptr = v;
5996
5997         /*
5998          * Paranoid! If ptr points to end, we don't want to increment past it.
5999          * This really should never happen.
6000          */
6001         (*pos)++;
6002         ptr = update_eval_map(ptr);
6003         if (WARN_ON_ONCE(!ptr))
6004                 return NULL;
6005
6006         ptr++;
6007         ptr = update_eval_map(ptr);
6008
6009         return ptr;
6010 }
6011
6012 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6013 {
6014         union trace_eval_map_item *v;
6015         loff_t l = 0;
6016
6017         mutex_lock(&trace_eval_mutex);
6018
6019         v = trace_eval_maps;
6020         if (v)
6021                 v++;
6022
6023         while (v && l < *pos) {
6024                 v = eval_map_next(m, v, &l);
6025         }
6026
6027         return v;
6028 }
6029
6030 static void eval_map_stop(struct seq_file *m, void *v)
6031 {
6032         mutex_unlock(&trace_eval_mutex);
6033 }
6034
6035 static int eval_map_show(struct seq_file *m, void *v)
6036 {
6037         union trace_eval_map_item *ptr = v;
6038
6039         seq_printf(m, "%s %ld (%s)\n",
6040                    ptr->map.eval_string, ptr->map.eval_value,
6041                    ptr->map.system);
6042
6043         return 0;
6044 }
6045
6046 static const struct seq_operations tracing_eval_map_seq_ops = {
6047         .start          = eval_map_start,
6048         .next           = eval_map_next,
6049         .stop           = eval_map_stop,
6050         .show           = eval_map_show,
6051 };
6052
6053 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6054 {
6055         int ret;
6056
6057         ret = tracing_check_open_get_tr(NULL);
6058         if (ret)
6059                 return ret;
6060
6061         return seq_open(filp, &tracing_eval_map_seq_ops);
6062 }
6063
6064 static const struct file_operations tracing_eval_map_fops = {
6065         .open           = tracing_eval_map_open,
6066         .read           = seq_read,
6067         .llseek         = seq_lseek,
6068         .release        = seq_release,
6069 };
6070
6071 static inline union trace_eval_map_item *
6072 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6073 {
6074         /* Return tail of array given the head */
6075         return ptr + ptr->head.length + 1;
6076 }
6077
6078 static void
6079 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6080                            int len)
6081 {
6082         struct trace_eval_map **stop;
6083         struct trace_eval_map **map;
6084         union trace_eval_map_item *map_array;
6085         union trace_eval_map_item *ptr;
6086
6087         stop = start + len;
6088
6089         /*
6090          * The trace_eval_maps contains the map plus a head and tail item,
6091          * where the head holds the module and length of array, and the
6092          * tail holds a pointer to the next list.
6093          */
6094         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6095         if (!map_array) {
6096                 pr_warn("Unable to allocate trace eval mapping\n");
6097                 return;
6098         }
6099
6100         mutex_lock(&trace_eval_mutex);
6101
6102         if (!trace_eval_maps)
6103                 trace_eval_maps = map_array;
6104         else {
6105                 ptr = trace_eval_maps;
6106                 for (;;) {
6107                         ptr = trace_eval_jmp_to_tail(ptr);
6108                         if (!ptr->tail.next)
6109                                 break;
6110                         ptr = ptr->tail.next;
6111
6112                 }
6113                 ptr->tail.next = map_array;
6114         }
6115         map_array->head.mod = mod;
6116         map_array->head.length = len;
6117         map_array++;
6118
6119         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6120                 map_array->map = **map;
6121                 map_array++;
6122         }
6123         memset(map_array, 0, sizeof(*map_array));
6124
6125         mutex_unlock(&trace_eval_mutex);
6126 }
6127
6128 static void trace_create_eval_file(struct dentry *d_tracer)
6129 {
6130         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6131                           NULL, &tracing_eval_map_fops);
6132 }
6133
6134 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6135 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6136 static inline void trace_insert_eval_map_file(struct module *mod,
6137                               struct trace_eval_map **start, int len) { }
6138 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6139
6140 static void trace_insert_eval_map(struct module *mod,
6141                                   struct trace_eval_map **start, int len)
6142 {
6143         struct trace_eval_map **map;
6144
6145         if (len <= 0)
6146                 return;
6147
6148         map = start;
6149
6150         trace_event_eval_update(map, len);
6151
6152         trace_insert_eval_map_file(mod, start, len);
6153 }
6154
6155 static ssize_t
6156 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6157                        size_t cnt, loff_t *ppos)
6158 {
6159         struct trace_array *tr = filp->private_data;
6160         char buf[MAX_TRACER_SIZE+2];
6161         int r;
6162
6163         mutex_lock(&trace_types_lock);
6164         r = sprintf(buf, "%s\n", tr->current_trace->name);
6165         mutex_unlock(&trace_types_lock);
6166
6167         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6168 }
6169
6170 int tracer_init(struct tracer *t, struct trace_array *tr)
6171 {
6172         tracing_reset_online_cpus(&tr->array_buffer);
6173         return t->init(tr);
6174 }
6175
6176 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6177 {
6178         int cpu;
6179
6180         for_each_tracing_cpu(cpu)
6181                 per_cpu_ptr(buf->data, cpu)->entries = val;
6182 }
6183
6184 #ifdef CONFIG_TRACER_MAX_TRACE
6185 /* resize @tr's buffer to the size of @size_tr's entries */
6186 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6187                                         struct array_buffer *size_buf, int cpu_id)
6188 {
6189         int cpu, ret = 0;
6190
6191         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6192                 for_each_tracing_cpu(cpu) {
6193                         ret = ring_buffer_resize(trace_buf->buffer,
6194                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6195                         if (ret < 0)
6196                                 break;
6197                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6198                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6199                 }
6200         } else {
6201                 ret = ring_buffer_resize(trace_buf->buffer,
6202                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6203                 if (ret == 0)
6204                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6205                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6206         }
6207
6208         return ret;
6209 }
6210 #endif /* CONFIG_TRACER_MAX_TRACE */
6211
6212 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6213                                         unsigned long size, int cpu)
6214 {
6215         int ret;
6216
6217         /*
6218          * If kernel or user changes the size of the ring buffer
6219          * we use the size that was given, and we can forget about
6220          * expanding it later.
6221          */
6222         ring_buffer_expanded = true;
6223
6224         /* May be called before buffers are initialized */
6225         if (!tr->array_buffer.buffer)
6226                 return 0;
6227
6228         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6229         if (ret < 0)
6230                 return ret;
6231
6232 #ifdef CONFIG_TRACER_MAX_TRACE
6233         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6234             !tr->current_trace->use_max_tr)
6235                 goto out;
6236
6237         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6238         if (ret < 0) {
6239                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6240                                                      &tr->array_buffer, cpu);
6241                 if (r < 0) {
6242                         /*
6243                          * AARGH! We are left with different
6244                          * size max buffer!!!!
6245                          * The max buffer is our "snapshot" buffer.
6246                          * When a tracer needs a snapshot (one of the
6247                          * latency tracers), it swaps the max buffer
6248                          * with the saved snap shot. We succeeded to
6249                          * update the size of the main buffer, but failed to
6250                          * update the size of the max buffer. But when we tried
6251                          * to reset the main buffer to the original size, we
6252                          * failed there too. This is very unlikely to
6253                          * happen, but if it does, warn and kill all
6254                          * tracing.
6255                          */
6256                         WARN_ON(1);
6257                         tracing_disabled = 1;
6258                 }
6259                 return ret;
6260         }
6261
6262         if (cpu == RING_BUFFER_ALL_CPUS)
6263                 set_buffer_entries(&tr->max_buffer, size);
6264         else
6265                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6266
6267  out:
6268 #endif /* CONFIG_TRACER_MAX_TRACE */
6269
6270         if (cpu == RING_BUFFER_ALL_CPUS)
6271                 set_buffer_entries(&tr->array_buffer, size);
6272         else
6273                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6274
6275         return ret;
6276 }
6277
6278 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6279                                   unsigned long size, int cpu_id)
6280 {
6281         int ret;
6282
6283         mutex_lock(&trace_types_lock);
6284
6285         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6286                 /* make sure, this cpu is enabled in the mask */
6287                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6288                         ret = -EINVAL;
6289                         goto out;
6290                 }
6291         }
6292
6293         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6294         if (ret < 0)
6295                 ret = -ENOMEM;
6296
6297 out:
6298         mutex_unlock(&trace_types_lock);
6299
6300         return ret;
6301 }
6302
6303
6304 /**
6305  * tracing_update_buffers - used by tracing facility to expand ring buffers
6306  *
6307  * To save on memory when the tracing is never used on a system with it
6308  * configured in. The ring buffers are set to a minimum size. But once
6309  * a user starts to use the tracing facility, then they need to grow
6310  * to their default size.
6311  *
6312  * This function is to be called when a tracer is about to be used.
6313  */
6314 int tracing_update_buffers(void)
6315 {
6316         int ret = 0;
6317
6318         mutex_lock(&trace_types_lock);
6319         if (!ring_buffer_expanded)
6320                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6321                                                 RING_BUFFER_ALL_CPUS);
6322         mutex_unlock(&trace_types_lock);
6323
6324         return ret;
6325 }
6326
6327 struct trace_option_dentry;
6328
6329 static void
6330 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6331
6332 /*
6333  * Used to clear out the tracer before deletion of an instance.
6334  * Must have trace_types_lock held.
6335  */
6336 static void tracing_set_nop(struct trace_array *tr)
6337 {
6338         if (tr->current_trace == &nop_trace)
6339                 return;
6340         
6341         tr->current_trace->enabled--;
6342
6343         if (tr->current_trace->reset)
6344                 tr->current_trace->reset(tr);
6345
6346         tr->current_trace = &nop_trace;
6347 }
6348
6349 static bool tracer_options_updated;
6350
6351 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6352 {
6353         /* Only enable if the directory has been created already. */
6354         if (!tr->dir)
6355                 return;
6356
6357         /* Only create trace option files after update_tracer_options finish */
6358         if (!tracer_options_updated)
6359                 return;
6360
6361         create_trace_option_files(tr, t);
6362 }
6363
6364 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6365 {
6366         struct tracer *t;
6367 #ifdef CONFIG_TRACER_MAX_TRACE
6368         bool had_max_tr;
6369 #endif
6370         int ret = 0;
6371
6372         mutex_lock(&trace_types_lock);
6373
6374         if (!ring_buffer_expanded) {
6375                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6376                                                 RING_BUFFER_ALL_CPUS);
6377                 if (ret < 0)
6378                         goto out;
6379                 ret = 0;
6380         }
6381
6382         for (t = trace_types; t; t = t->next) {
6383                 if (strcmp(t->name, buf) == 0)
6384                         break;
6385         }
6386         if (!t) {
6387                 ret = -EINVAL;
6388                 goto out;
6389         }
6390         if (t == tr->current_trace)
6391                 goto out;
6392
6393 #ifdef CONFIG_TRACER_SNAPSHOT
6394         if (t->use_max_tr) {
6395                 local_irq_disable();
6396                 arch_spin_lock(&tr->max_lock);
6397                 if (tr->cond_snapshot)
6398                         ret = -EBUSY;
6399                 arch_spin_unlock(&tr->max_lock);
6400                 local_irq_enable();
6401                 if (ret)
6402                         goto out;
6403         }
6404 #endif
6405         /* Some tracers won't work on kernel command line */
6406         if (system_state < SYSTEM_RUNNING && t->noboot) {
6407                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6408                         t->name);
6409                 goto out;
6410         }
6411
6412         /* Some tracers are only allowed for the top level buffer */
6413         if (!trace_ok_for_array(t, tr)) {
6414                 ret = -EINVAL;
6415                 goto out;
6416         }
6417
6418         /* If trace pipe files are being read, we can't change the tracer */
6419         if (tr->trace_ref) {
6420                 ret = -EBUSY;
6421                 goto out;
6422         }
6423
6424         trace_branch_disable();
6425
6426         tr->current_trace->enabled--;
6427
6428         if (tr->current_trace->reset)
6429                 tr->current_trace->reset(tr);
6430
6431 #ifdef CONFIG_TRACER_MAX_TRACE
6432         had_max_tr = tr->current_trace->use_max_tr;
6433
6434         /* Current trace needs to be nop_trace before synchronize_rcu */
6435         tr->current_trace = &nop_trace;
6436
6437         if (had_max_tr && !t->use_max_tr) {
6438                 /*
6439                  * We need to make sure that the update_max_tr sees that
6440                  * current_trace changed to nop_trace to keep it from
6441                  * swapping the buffers after we resize it.
6442                  * The update_max_tr is called from interrupts disabled
6443                  * so a synchronized_sched() is sufficient.
6444                  */
6445                 synchronize_rcu();
6446                 free_snapshot(tr);
6447         }
6448
6449         if (t->use_max_tr && !tr->allocated_snapshot) {
6450                 ret = tracing_alloc_snapshot_instance(tr);
6451                 if (ret < 0)
6452                         goto out;
6453         }
6454 #else
6455         tr->current_trace = &nop_trace;
6456 #endif
6457
6458         if (t->init) {
6459                 ret = tracer_init(t, tr);
6460                 if (ret)
6461                         goto out;
6462         }
6463
6464         tr->current_trace = t;
6465         tr->current_trace->enabled++;
6466         trace_branch_enable(tr);
6467  out:
6468         mutex_unlock(&trace_types_lock);
6469
6470         return ret;
6471 }
6472
6473 static ssize_t
6474 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6475                         size_t cnt, loff_t *ppos)
6476 {
6477         struct trace_array *tr = filp->private_data;
6478         char buf[MAX_TRACER_SIZE+1];
6479         char *name;
6480         size_t ret;
6481         int err;
6482
6483         ret = cnt;
6484
6485         if (cnt > MAX_TRACER_SIZE)
6486                 cnt = MAX_TRACER_SIZE;
6487
6488         if (copy_from_user(buf, ubuf, cnt))
6489                 return -EFAULT;
6490
6491         buf[cnt] = 0;
6492
6493         name = strim(buf);
6494
6495         err = tracing_set_tracer(tr, name);
6496         if (err)
6497                 return err;
6498
6499         *ppos += ret;
6500
6501         return ret;
6502 }
6503
6504 static ssize_t
6505 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6506                    size_t cnt, loff_t *ppos)
6507 {
6508         char buf[64];
6509         int r;
6510
6511         r = snprintf(buf, sizeof(buf), "%ld\n",
6512                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6513         if (r > sizeof(buf))
6514                 r = sizeof(buf);
6515         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6516 }
6517
6518 static ssize_t
6519 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6520                     size_t cnt, loff_t *ppos)
6521 {
6522         unsigned long val;
6523         int ret;
6524
6525         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6526         if (ret)
6527                 return ret;
6528
6529         *ptr = val * 1000;
6530
6531         return cnt;
6532 }
6533
6534 static ssize_t
6535 tracing_thresh_read(struct file *filp, char __user *ubuf,
6536                     size_t cnt, loff_t *ppos)
6537 {
6538         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6539 }
6540
6541 static ssize_t
6542 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6543                      size_t cnt, loff_t *ppos)
6544 {
6545         struct trace_array *tr = filp->private_data;
6546         int ret;
6547
6548         mutex_lock(&trace_types_lock);
6549         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6550         if (ret < 0)
6551                 goto out;
6552
6553         if (tr->current_trace->update_thresh) {
6554                 ret = tr->current_trace->update_thresh(tr);
6555                 if (ret < 0)
6556                         goto out;
6557         }
6558
6559         ret = cnt;
6560 out:
6561         mutex_unlock(&trace_types_lock);
6562
6563         return ret;
6564 }
6565
6566 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6567
6568 static ssize_t
6569 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6570                      size_t cnt, loff_t *ppos)
6571 {
6572         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6573 }
6574
6575 static ssize_t
6576 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6577                       size_t cnt, loff_t *ppos)
6578 {
6579         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6580 }
6581
6582 #endif
6583
6584 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6585 {
6586         struct trace_array *tr = inode->i_private;
6587         struct trace_iterator *iter;
6588         int ret;
6589
6590         ret = tracing_check_open_get_tr(tr);
6591         if (ret)
6592                 return ret;
6593
6594         mutex_lock(&trace_types_lock);
6595
6596         /* create a buffer to store the information to pass to userspace */
6597         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6598         if (!iter) {
6599                 ret = -ENOMEM;
6600                 __trace_array_put(tr);
6601                 goto out;
6602         }
6603
6604         trace_seq_init(&iter->seq);
6605         iter->trace = tr->current_trace;
6606
6607         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6608                 ret = -ENOMEM;
6609                 goto fail;
6610         }
6611
6612         /* trace pipe does not show start of buffer */
6613         cpumask_setall(iter->started);
6614
6615         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6616                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6617
6618         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6619         if (trace_clocks[tr->clock_id].in_ns)
6620                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6621
6622         iter->tr = tr;
6623         iter->array_buffer = &tr->array_buffer;
6624         iter->cpu_file = tracing_get_cpu(inode);
6625         mutex_init(&iter->mutex);
6626         filp->private_data = iter;
6627
6628         if (iter->trace->pipe_open)
6629                 iter->trace->pipe_open(iter);
6630
6631         nonseekable_open(inode, filp);
6632
6633         tr->trace_ref++;
6634 out:
6635         mutex_unlock(&trace_types_lock);
6636         return ret;
6637
6638 fail:
6639         kfree(iter);
6640         __trace_array_put(tr);
6641         mutex_unlock(&trace_types_lock);
6642         return ret;
6643 }
6644
6645 static int tracing_release_pipe(struct inode *inode, struct file *file)
6646 {
6647         struct trace_iterator *iter = file->private_data;
6648         struct trace_array *tr = inode->i_private;
6649
6650         mutex_lock(&trace_types_lock);
6651
6652         tr->trace_ref--;
6653
6654         if (iter->trace->pipe_close)
6655                 iter->trace->pipe_close(iter);
6656
6657         mutex_unlock(&trace_types_lock);
6658
6659         free_cpumask_var(iter->started);
6660         kfree(iter->fmt);
6661         mutex_destroy(&iter->mutex);
6662         kfree(iter);
6663
6664         trace_array_put(tr);
6665
6666         return 0;
6667 }
6668
6669 static __poll_t
6670 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6671 {
6672         struct trace_array *tr = iter->tr;
6673
6674         /* Iterators are static, they should be filled or empty */
6675         if (trace_buffer_iter(iter, iter->cpu_file))
6676                 return EPOLLIN | EPOLLRDNORM;
6677
6678         if (tr->trace_flags & TRACE_ITER_BLOCK)
6679                 /*
6680                  * Always select as readable when in blocking mode
6681                  */
6682                 return EPOLLIN | EPOLLRDNORM;
6683         else
6684                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6685                                              filp, poll_table, iter->tr->buffer_percent);
6686 }
6687
6688 static __poll_t
6689 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6690 {
6691         struct trace_iterator *iter = filp->private_data;
6692
6693         return trace_poll(iter, filp, poll_table);
6694 }
6695
6696 /* Must be called with iter->mutex held. */
6697 static int tracing_wait_pipe(struct file *filp)
6698 {
6699         struct trace_iterator *iter = filp->private_data;
6700         int ret;
6701
6702         while (trace_empty(iter)) {
6703
6704                 if ((filp->f_flags & O_NONBLOCK)) {
6705                         return -EAGAIN;
6706                 }
6707
6708                 /*
6709                  * We block until we read something and tracing is disabled.
6710                  * We still block if tracing is disabled, but we have never
6711                  * read anything. This allows a user to cat this file, and
6712                  * then enable tracing. But after we have read something,
6713                  * we give an EOF when tracing is again disabled.
6714                  *
6715                  * iter->pos will be 0 if we haven't read anything.
6716                  */
6717                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6718                         break;
6719
6720                 mutex_unlock(&iter->mutex);
6721
6722                 ret = wait_on_pipe(iter, 0);
6723
6724                 mutex_lock(&iter->mutex);
6725
6726                 if (ret)
6727                         return ret;
6728         }
6729
6730         return 1;
6731 }
6732
6733 /*
6734  * Consumer reader.
6735  */
6736 static ssize_t
6737 tracing_read_pipe(struct file *filp, char __user *ubuf,
6738                   size_t cnt, loff_t *ppos)
6739 {
6740         struct trace_iterator *iter = filp->private_data;
6741         ssize_t sret;
6742
6743         /*
6744          * Avoid more than one consumer on a single file descriptor
6745          * This is just a matter of traces coherency, the ring buffer itself
6746          * is protected.
6747          */
6748         mutex_lock(&iter->mutex);
6749
6750         /* return any leftover data */
6751         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6752         if (sret != -EBUSY)
6753                 goto out;
6754
6755         trace_seq_init(&iter->seq);
6756
6757         if (iter->trace->read) {
6758                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6759                 if (sret)
6760                         goto out;
6761         }
6762
6763 waitagain:
6764         sret = tracing_wait_pipe(filp);
6765         if (sret <= 0)
6766                 goto out;
6767
6768         /* stop when tracing is finished */
6769         if (trace_empty(iter)) {
6770                 sret = 0;
6771                 goto out;
6772         }
6773
6774         if (cnt >= PAGE_SIZE)
6775                 cnt = PAGE_SIZE - 1;
6776
6777         /* reset all but tr, trace, and overruns */
6778         trace_iterator_reset(iter);
6779         cpumask_clear(iter->started);
6780         trace_seq_init(&iter->seq);
6781
6782         trace_event_read_lock();
6783         trace_access_lock(iter->cpu_file);
6784         while (trace_find_next_entry_inc(iter) != NULL) {
6785                 enum print_line_t ret;
6786                 int save_len = iter->seq.seq.len;
6787
6788                 ret = print_trace_line(iter);
6789                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6790                         /* don't print partial lines */
6791                         iter->seq.seq.len = save_len;
6792                         break;
6793                 }
6794                 if (ret != TRACE_TYPE_NO_CONSUME)
6795                         trace_consume(iter);
6796
6797                 if (trace_seq_used(&iter->seq) >= cnt)
6798                         break;
6799
6800                 /*
6801                  * Setting the full flag means we reached the trace_seq buffer
6802                  * size and we should leave by partial output condition above.
6803                  * One of the trace_seq_* functions is not used properly.
6804                  */
6805                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6806                           iter->ent->type);
6807         }
6808         trace_access_unlock(iter->cpu_file);
6809         trace_event_read_unlock();
6810
6811         /* Now copy what we have to the user */
6812         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6813         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6814                 trace_seq_init(&iter->seq);
6815
6816         /*
6817          * If there was nothing to send to user, in spite of consuming trace
6818          * entries, go back to wait for more entries.
6819          */
6820         if (sret == -EBUSY)
6821                 goto waitagain;
6822
6823 out:
6824         mutex_unlock(&iter->mutex);
6825
6826         return sret;
6827 }
6828
6829 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6830                                      unsigned int idx)
6831 {
6832         __free_page(spd->pages[idx]);
6833 }
6834
6835 static size_t
6836 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6837 {
6838         size_t count;
6839         int save_len;
6840         int ret;
6841
6842         /* Seq buffer is page-sized, exactly what we need. */
6843         for (;;) {
6844                 save_len = iter->seq.seq.len;
6845                 ret = print_trace_line(iter);
6846
6847                 if (trace_seq_has_overflowed(&iter->seq)) {
6848                         iter->seq.seq.len = save_len;
6849                         break;
6850                 }
6851
6852                 /*
6853                  * This should not be hit, because it should only
6854                  * be set if the iter->seq overflowed. But check it
6855                  * anyway to be safe.
6856                  */
6857                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6858                         iter->seq.seq.len = save_len;
6859                         break;
6860                 }
6861
6862                 count = trace_seq_used(&iter->seq) - save_len;
6863                 if (rem < count) {
6864                         rem = 0;
6865                         iter->seq.seq.len = save_len;
6866                         break;
6867                 }
6868
6869                 if (ret != TRACE_TYPE_NO_CONSUME)
6870                         trace_consume(iter);
6871                 rem -= count;
6872                 if (!trace_find_next_entry_inc(iter))   {
6873                         rem = 0;
6874                         iter->ent = NULL;
6875                         break;
6876                 }
6877         }
6878
6879         return rem;
6880 }
6881
6882 static ssize_t tracing_splice_read_pipe(struct file *filp,
6883                                         loff_t *ppos,
6884                                         struct pipe_inode_info *pipe,
6885                                         size_t len,
6886                                         unsigned int flags)
6887 {
6888         struct page *pages_def[PIPE_DEF_BUFFERS];
6889         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6890         struct trace_iterator *iter = filp->private_data;
6891         struct splice_pipe_desc spd = {
6892                 .pages          = pages_def,
6893                 .partial        = partial_def,
6894                 .nr_pages       = 0, /* This gets updated below. */
6895                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6896                 .ops            = &default_pipe_buf_ops,
6897                 .spd_release    = tracing_spd_release_pipe,
6898         };
6899         ssize_t ret;
6900         size_t rem;
6901         unsigned int i;
6902
6903         if (splice_grow_spd(pipe, &spd))
6904                 return -ENOMEM;
6905
6906         mutex_lock(&iter->mutex);
6907
6908         if (iter->trace->splice_read) {
6909                 ret = iter->trace->splice_read(iter, filp,
6910                                                ppos, pipe, len, flags);
6911                 if (ret)
6912                         goto out_err;
6913         }
6914
6915         ret = tracing_wait_pipe(filp);
6916         if (ret <= 0)
6917                 goto out_err;
6918
6919         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6920                 ret = -EFAULT;
6921                 goto out_err;
6922         }
6923
6924         trace_event_read_lock();
6925         trace_access_lock(iter->cpu_file);
6926
6927         /* Fill as many pages as possible. */
6928         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6929                 spd.pages[i] = alloc_page(GFP_KERNEL);
6930                 if (!spd.pages[i])
6931                         break;
6932
6933                 rem = tracing_fill_pipe_page(rem, iter);
6934
6935                 /* Copy the data into the page, so we can start over. */
6936                 ret = trace_seq_to_buffer(&iter->seq,
6937                                           page_address(spd.pages[i]),
6938                                           trace_seq_used(&iter->seq));
6939                 if (ret < 0) {
6940                         __free_page(spd.pages[i]);
6941                         break;
6942                 }
6943                 spd.partial[i].offset = 0;
6944                 spd.partial[i].len = trace_seq_used(&iter->seq);
6945
6946                 trace_seq_init(&iter->seq);
6947         }
6948
6949         trace_access_unlock(iter->cpu_file);
6950         trace_event_read_unlock();
6951         mutex_unlock(&iter->mutex);
6952
6953         spd.nr_pages = i;
6954
6955         if (i)
6956                 ret = splice_to_pipe(pipe, &spd);
6957         else
6958                 ret = 0;
6959 out:
6960         splice_shrink_spd(&spd);
6961         return ret;
6962
6963 out_err:
6964         mutex_unlock(&iter->mutex);
6965         goto out;
6966 }
6967
6968 static ssize_t
6969 tracing_entries_read(struct file *filp, char __user *ubuf,
6970                      size_t cnt, loff_t *ppos)
6971 {
6972         struct inode *inode = file_inode(filp);
6973         struct trace_array *tr = inode->i_private;
6974         int cpu = tracing_get_cpu(inode);
6975         char buf[64];
6976         int r = 0;
6977         ssize_t ret;
6978
6979         mutex_lock(&trace_types_lock);
6980
6981         if (cpu == RING_BUFFER_ALL_CPUS) {
6982                 int cpu, buf_size_same;
6983                 unsigned long size;
6984
6985                 size = 0;
6986                 buf_size_same = 1;
6987                 /* check if all cpu sizes are same */
6988                 for_each_tracing_cpu(cpu) {
6989                         /* fill in the size from first enabled cpu */
6990                         if (size == 0)
6991                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6992                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6993                                 buf_size_same = 0;
6994                                 break;
6995                         }
6996                 }
6997
6998                 if (buf_size_same) {
6999                         if (!ring_buffer_expanded)
7000                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7001                                             size >> 10,
7002                                             trace_buf_size >> 10);
7003                         else
7004                                 r = sprintf(buf, "%lu\n", size >> 10);
7005                 } else
7006                         r = sprintf(buf, "X\n");
7007         } else
7008                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7009
7010         mutex_unlock(&trace_types_lock);
7011
7012         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7013         return ret;
7014 }
7015
7016 static ssize_t
7017 tracing_entries_write(struct file *filp, const char __user *ubuf,
7018                       size_t cnt, loff_t *ppos)
7019 {
7020         struct inode *inode = file_inode(filp);
7021         struct trace_array *tr = inode->i_private;
7022         unsigned long val;
7023         int ret;
7024
7025         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7026         if (ret)
7027                 return ret;
7028
7029         /* must have at least 1 entry */
7030         if (!val)
7031                 return -EINVAL;
7032
7033         /* value is in KB */
7034         val <<= 10;
7035         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7036         if (ret < 0)
7037                 return ret;
7038
7039         *ppos += cnt;
7040
7041         return cnt;
7042 }
7043
7044 static ssize_t
7045 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7046                                 size_t cnt, loff_t *ppos)
7047 {
7048         struct trace_array *tr = filp->private_data;
7049         char buf[64];
7050         int r, cpu;
7051         unsigned long size = 0, expanded_size = 0;
7052
7053         mutex_lock(&trace_types_lock);
7054         for_each_tracing_cpu(cpu) {
7055                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7056                 if (!ring_buffer_expanded)
7057                         expanded_size += trace_buf_size >> 10;
7058         }
7059         if (ring_buffer_expanded)
7060                 r = sprintf(buf, "%lu\n", size);
7061         else
7062                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7063         mutex_unlock(&trace_types_lock);
7064
7065         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7066 }
7067
7068 static ssize_t
7069 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7070                           size_t cnt, loff_t *ppos)
7071 {
7072         /*
7073          * There is no need to read what the user has written, this function
7074          * is just to make sure that there is no error when "echo" is used
7075          */
7076
7077         *ppos += cnt;
7078
7079         return cnt;
7080 }
7081
7082 static int
7083 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7084 {
7085         struct trace_array *tr = inode->i_private;
7086
7087         /* disable tracing ? */
7088         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7089                 tracer_tracing_off(tr);
7090         /* resize the ring buffer to 0 */
7091         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7092
7093         trace_array_put(tr);
7094
7095         return 0;
7096 }
7097
7098 static ssize_t
7099 tracing_mark_write(struct file *filp, const char __user *ubuf,
7100                                         size_t cnt, loff_t *fpos)
7101 {
7102         struct trace_array *tr = filp->private_data;
7103         struct ring_buffer_event *event;
7104         enum event_trigger_type tt = ETT_NONE;
7105         struct trace_buffer *buffer;
7106         struct print_entry *entry;
7107         ssize_t written;
7108         int size;
7109         int len;
7110
7111 /* Used in tracing_mark_raw_write() as well */
7112 #define FAULTED_STR "<faulted>"
7113 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7114
7115         if (tracing_disabled)
7116                 return -EINVAL;
7117
7118         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7119                 return -EINVAL;
7120
7121         if (cnt > TRACE_BUF_SIZE)
7122                 cnt = TRACE_BUF_SIZE;
7123
7124         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7125
7126         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7127
7128         /* If less than "<faulted>", then make sure we can still add that */
7129         if (cnt < FAULTED_SIZE)
7130                 size += FAULTED_SIZE - cnt;
7131
7132         buffer = tr->array_buffer.buffer;
7133         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7134                                             tracing_gen_ctx());
7135         if (unlikely(!event))
7136                 /* Ring buffer disabled, return as if not open for write */
7137                 return -EBADF;
7138
7139         entry = ring_buffer_event_data(event);
7140         entry->ip = _THIS_IP_;
7141
7142         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7143         if (len) {
7144                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7145                 cnt = FAULTED_SIZE;
7146                 written = -EFAULT;
7147         } else
7148                 written = cnt;
7149
7150         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7151                 /* do not add \n before testing triggers, but add \0 */
7152                 entry->buf[cnt] = '\0';
7153                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7154         }
7155
7156         if (entry->buf[cnt - 1] != '\n') {
7157                 entry->buf[cnt] = '\n';
7158                 entry->buf[cnt + 1] = '\0';
7159         } else
7160                 entry->buf[cnt] = '\0';
7161
7162         if (static_branch_unlikely(&trace_marker_exports_enabled))
7163                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7164         __buffer_unlock_commit(buffer, event);
7165
7166         if (tt)
7167                 event_triggers_post_call(tr->trace_marker_file, tt);
7168
7169         return written;
7170 }
7171
7172 /* Limit it for now to 3K (including tag) */
7173 #define RAW_DATA_MAX_SIZE (1024*3)
7174
7175 static ssize_t
7176 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7177                                         size_t cnt, loff_t *fpos)
7178 {
7179         struct trace_array *tr = filp->private_data;
7180         struct ring_buffer_event *event;
7181         struct trace_buffer *buffer;
7182         struct raw_data_entry *entry;
7183         ssize_t written;
7184         int size;
7185         int len;
7186
7187 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7188
7189         if (tracing_disabled)
7190                 return -EINVAL;
7191
7192         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7193                 return -EINVAL;
7194
7195         /* The marker must at least have a tag id */
7196         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7197                 return -EINVAL;
7198
7199         if (cnt > TRACE_BUF_SIZE)
7200                 cnt = TRACE_BUF_SIZE;
7201
7202         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7203
7204         size = sizeof(*entry) + cnt;
7205         if (cnt < FAULT_SIZE_ID)
7206                 size += FAULT_SIZE_ID - cnt;
7207
7208         buffer = tr->array_buffer.buffer;
7209         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7210                                             tracing_gen_ctx());
7211         if (!event)
7212                 /* Ring buffer disabled, return as if not open for write */
7213                 return -EBADF;
7214
7215         entry = ring_buffer_event_data(event);
7216
7217         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7218         if (len) {
7219                 entry->id = -1;
7220                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7221                 written = -EFAULT;
7222         } else
7223                 written = cnt;
7224
7225         __buffer_unlock_commit(buffer, event);
7226
7227         return written;
7228 }
7229
7230 static int tracing_clock_show(struct seq_file *m, void *v)
7231 {
7232         struct trace_array *tr = m->private;
7233         int i;
7234
7235         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7236                 seq_printf(m,
7237                         "%s%s%s%s", i ? " " : "",
7238                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7239                         i == tr->clock_id ? "]" : "");
7240         seq_putc(m, '\n');
7241
7242         return 0;
7243 }
7244
7245 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7246 {
7247         int i;
7248
7249         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7250                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7251                         break;
7252         }
7253         if (i == ARRAY_SIZE(trace_clocks))
7254                 return -EINVAL;
7255
7256         mutex_lock(&trace_types_lock);
7257
7258         tr->clock_id = i;
7259
7260         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7261
7262         /*
7263          * New clock may not be consistent with the previous clock.
7264          * Reset the buffer so that it doesn't have incomparable timestamps.
7265          */
7266         tracing_reset_online_cpus(&tr->array_buffer);
7267
7268 #ifdef CONFIG_TRACER_MAX_TRACE
7269         if (tr->max_buffer.buffer)
7270                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7271         tracing_reset_online_cpus(&tr->max_buffer);
7272 #endif
7273
7274         mutex_unlock(&trace_types_lock);
7275
7276         return 0;
7277 }
7278
7279 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7280                                    size_t cnt, loff_t *fpos)
7281 {
7282         struct seq_file *m = filp->private_data;
7283         struct trace_array *tr = m->private;
7284         char buf[64];
7285         const char *clockstr;
7286         int ret;
7287
7288         if (cnt >= sizeof(buf))
7289                 return -EINVAL;
7290
7291         if (copy_from_user(buf, ubuf, cnt))
7292                 return -EFAULT;
7293
7294         buf[cnt] = 0;
7295
7296         clockstr = strstrip(buf);
7297
7298         ret = tracing_set_clock(tr, clockstr);
7299         if (ret)
7300                 return ret;
7301
7302         *fpos += cnt;
7303
7304         return cnt;
7305 }
7306
7307 static int tracing_clock_open(struct inode *inode, struct file *file)
7308 {
7309         struct trace_array *tr = inode->i_private;
7310         int ret;
7311
7312         ret = tracing_check_open_get_tr(tr);
7313         if (ret)
7314                 return ret;
7315
7316         ret = single_open(file, tracing_clock_show, inode->i_private);
7317         if (ret < 0)
7318                 trace_array_put(tr);
7319
7320         return ret;
7321 }
7322
7323 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7324 {
7325         struct trace_array *tr = m->private;
7326
7327         mutex_lock(&trace_types_lock);
7328
7329         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7330                 seq_puts(m, "delta [absolute]\n");
7331         else
7332                 seq_puts(m, "[delta] absolute\n");
7333
7334         mutex_unlock(&trace_types_lock);
7335
7336         return 0;
7337 }
7338
7339 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7340 {
7341         struct trace_array *tr = inode->i_private;
7342         int ret;
7343
7344         ret = tracing_check_open_get_tr(tr);
7345         if (ret)
7346                 return ret;
7347
7348         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7349         if (ret < 0)
7350                 trace_array_put(tr);
7351
7352         return ret;
7353 }
7354
7355 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7356 {
7357         if (rbe == this_cpu_read(trace_buffered_event))
7358                 return ring_buffer_time_stamp(buffer);
7359
7360         return ring_buffer_event_time_stamp(buffer, rbe);
7361 }
7362
7363 /*
7364  * Set or disable using the per CPU trace_buffer_event when possible.
7365  */
7366 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7367 {
7368         int ret = 0;
7369
7370         mutex_lock(&trace_types_lock);
7371
7372         if (set && tr->no_filter_buffering_ref++)
7373                 goto out;
7374
7375         if (!set) {
7376                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7377                         ret = -EINVAL;
7378                         goto out;
7379                 }
7380
7381                 --tr->no_filter_buffering_ref;
7382         }
7383  out:
7384         mutex_unlock(&trace_types_lock);
7385
7386         return ret;
7387 }
7388
7389 struct ftrace_buffer_info {
7390         struct trace_iterator   iter;
7391         void                    *spare;
7392         unsigned int            spare_cpu;
7393         unsigned int            read;
7394 };
7395
7396 #ifdef CONFIG_TRACER_SNAPSHOT
7397 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7398 {
7399         struct trace_array *tr = inode->i_private;
7400         struct trace_iterator *iter;
7401         struct seq_file *m;
7402         int ret;
7403
7404         ret = tracing_check_open_get_tr(tr);
7405         if (ret)
7406                 return ret;
7407
7408         if (file->f_mode & FMODE_READ) {
7409                 iter = __tracing_open(inode, file, true);
7410                 if (IS_ERR(iter))
7411                         ret = PTR_ERR(iter);
7412         } else {
7413                 /* Writes still need the seq_file to hold the private data */
7414                 ret = -ENOMEM;
7415                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7416                 if (!m)
7417                         goto out;
7418                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7419                 if (!iter) {
7420                         kfree(m);
7421                         goto out;
7422                 }
7423                 ret = 0;
7424
7425                 iter->tr = tr;
7426                 iter->array_buffer = &tr->max_buffer;
7427                 iter->cpu_file = tracing_get_cpu(inode);
7428                 m->private = iter;
7429                 file->private_data = m;
7430         }
7431 out:
7432         if (ret < 0)
7433                 trace_array_put(tr);
7434
7435         return ret;
7436 }
7437
7438 static ssize_t
7439 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7440                        loff_t *ppos)
7441 {
7442         struct seq_file *m = filp->private_data;
7443         struct trace_iterator *iter = m->private;
7444         struct trace_array *tr = iter->tr;
7445         unsigned long val;
7446         int ret;
7447
7448         ret = tracing_update_buffers();
7449         if (ret < 0)
7450                 return ret;
7451
7452         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7453         if (ret)
7454                 return ret;
7455
7456         mutex_lock(&trace_types_lock);
7457
7458         if (tr->current_trace->use_max_tr) {
7459                 ret = -EBUSY;
7460                 goto out;
7461         }
7462
7463         local_irq_disable();
7464         arch_spin_lock(&tr->max_lock);
7465         if (tr->cond_snapshot)
7466                 ret = -EBUSY;
7467         arch_spin_unlock(&tr->max_lock);
7468         local_irq_enable();
7469         if (ret)
7470                 goto out;
7471
7472         switch (val) {
7473         case 0:
7474                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7475                         ret = -EINVAL;
7476                         break;
7477                 }
7478                 if (tr->allocated_snapshot)
7479                         free_snapshot(tr);
7480                 break;
7481         case 1:
7482 /* Only allow per-cpu swap if the ring buffer supports it */
7483 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7484                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7485                         ret = -EINVAL;
7486                         break;
7487                 }
7488 #endif
7489                 if (tr->allocated_snapshot)
7490                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7491                                         &tr->array_buffer, iter->cpu_file);
7492                 else
7493                         ret = tracing_alloc_snapshot_instance(tr);
7494                 if (ret < 0)
7495                         break;
7496                 local_irq_disable();
7497                 /* Now, we're going to swap */
7498                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7499                         update_max_tr(tr, current, smp_processor_id(), NULL);
7500                 else
7501                         update_max_tr_single(tr, current, iter->cpu_file);
7502                 local_irq_enable();
7503                 break;
7504         default:
7505                 if (tr->allocated_snapshot) {
7506                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7507                                 tracing_reset_online_cpus(&tr->max_buffer);
7508                         else
7509                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7510                 }
7511                 break;
7512         }
7513
7514         if (ret >= 0) {
7515                 *ppos += cnt;
7516                 ret = cnt;
7517         }
7518 out:
7519         mutex_unlock(&trace_types_lock);
7520         return ret;
7521 }
7522
7523 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7524 {
7525         struct seq_file *m = file->private_data;
7526         int ret;
7527
7528         ret = tracing_release(inode, file);
7529
7530         if (file->f_mode & FMODE_READ)
7531                 return ret;
7532
7533         /* If write only, the seq_file is just a stub */
7534         if (m)
7535                 kfree(m->private);
7536         kfree(m);
7537
7538         return 0;
7539 }
7540
7541 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7542 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7543                                     size_t count, loff_t *ppos);
7544 static int tracing_buffers_release(struct inode *inode, struct file *file);
7545 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7546                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7547
7548 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7549 {
7550         struct ftrace_buffer_info *info;
7551         int ret;
7552
7553         /* The following checks for tracefs lockdown */
7554         ret = tracing_buffers_open(inode, filp);
7555         if (ret < 0)
7556                 return ret;
7557
7558         info = filp->private_data;
7559
7560         if (info->iter.trace->use_max_tr) {
7561                 tracing_buffers_release(inode, filp);
7562                 return -EBUSY;
7563         }
7564
7565         info->iter.snapshot = true;
7566         info->iter.array_buffer = &info->iter.tr->max_buffer;
7567
7568         return ret;
7569 }
7570
7571 #endif /* CONFIG_TRACER_SNAPSHOT */
7572
7573
7574 static const struct file_operations tracing_thresh_fops = {
7575         .open           = tracing_open_generic,
7576         .read           = tracing_thresh_read,
7577         .write          = tracing_thresh_write,
7578         .llseek         = generic_file_llseek,
7579 };
7580
7581 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7582 static const struct file_operations tracing_max_lat_fops = {
7583         .open           = tracing_open_generic,
7584         .read           = tracing_max_lat_read,
7585         .write          = tracing_max_lat_write,
7586         .llseek         = generic_file_llseek,
7587 };
7588 #endif
7589
7590 static const struct file_operations set_tracer_fops = {
7591         .open           = tracing_open_generic,
7592         .read           = tracing_set_trace_read,
7593         .write          = tracing_set_trace_write,
7594         .llseek         = generic_file_llseek,
7595 };
7596
7597 static const struct file_operations tracing_pipe_fops = {
7598         .open           = tracing_open_pipe,
7599         .poll           = tracing_poll_pipe,
7600         .read           = tracing_read_pipe,
7601         .splice_read    = tracing_splice_read_pipe,
7602         .release        = tracing_release_pipe,
7603         .llseek         = no_llseek,
7604 };
7605
7606 static const struct file_operations tracing_entries_fops = {
7607         .open           = tracing_open_generic_tr,
7608         .read           = tracing_entries_read,
7609         .write          = tracing_entries_write,
7610         .llseek         = generic_file_llseek,
7611         .release        = tracing_release_generic_tr,
7612 };
7613
7614 static const struct file_operations tracing_total_entries_fops = {
7615         .open           = tracing_open_generic_tr,
7616         .read           = tracing_total_entries_read,
7617         .llseek         = generic_file_llseek,
7618         .release        = tracing_release_generic_tr,
7619 };
7620
7621 static const struct file_operations tracing_free_buffer_fops = {
7622         .open           = tracing_open_generic_tr,
7623         .write          = tracing_free_buffer_write,
7624         .release        = tracing_free_buffer_release,
7625 };
7626
7627 static const struct file_operations tracing_mark_fops = {
7628         .open           = tracing_mark_open,
7629         .write          = tracing_mark_write,
7630         .release        = tracing_release_generic_tr,
7631 };
7632
7633 static const struct file_operations tracing_mark_raw_fops = {
7634         .open           = tracing_mark_open,
7635         .write          = tracing_mark_raw_write,
7636         .release        = tracing_release_generic_tr,
7637 };
7638
7639 static const struct file_operations trace_clock_fops = {
7640         .open           = tracing_clock_open,
7641         .read           = seq_read,
7642         .llseek         = seq_lseek,
7643         .release        = tracing_single_release_tr,
7644         .write          = tracing_clock_write,
7645 };
7646
7647 static const struct file_operations trace_time_stamp_mode_fops = {
7648         .open           = tracing_time_stamp_mode_open,
7649         .read           = seq_read,
7650         .llseek         = seq_lseek,
7651         .release        = tracing_single_release_tr,
7652 };
7653
7654 #ifdef CONFIG_TRACER_SNAPSHOT
7655 static const struct file_operations snapshot_fops = {
7656         .open           = tracing_snapshot_open,
7657         .read           = seq_read,
7658         .write          = tracing_snapshot_write,
7659         .llseek         = tracing_lseek,
7660         .release        = tracing_snapshot_release,
7661 };
7662
7663 static const struct file_operations snapshot_raw_fops = {
7664         .open           = snapshot_raw_open,
7665         .read           = tracing_buffers_read,
7666         .release        = tracing_buffers_release,
7667         .splice_read    = tracing_buffers_splice_read,
7668         .llseek         = no_llseek,
7669 };
7670
7671 #endif /* CONFIG_TRACER_SNAPSHOT */
7672
7673 /*
7674  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7675  * @filp: The active open file structure
7676  * @ubuf: The userspace provided buffer to read value into
7677  * @cnt: The maximum number of bytes to read
7678  * @ppos: The current "file" position
7679  *
7680  * This function implements the write interface for a struct trace_min_max_param.
7681  * The filp->private_data must point to a trace_min_max_param structure that
7682  * defines where to write the value, the min and the max acceptable values,
7683  * and a lock to protect the write.
7684  */
7685 static ssize_t
7686 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7687 {
7688         struct trace_min_max_param *param = filp->private_data;
7689         u64 val;
7690         int err;
7691
7692         if (!param)
7693                 return -EFAULT;
7694
7695         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7696         if (err)
7697                 return err;
7698
7699         if (param->lock)
7700                 mutex_lock(param->lock);
7701
7702         if (param->min && val < *param->min)
7703                 err = -EINVAL;
7704
7705         if (param->max && val > *param->max)
7706                 err = -EINVAL;
7707
7708         if (!err)
7709                 *param->val = val;
7710
7711         if (param->lock)
7712                 mutex_unlock(param->lock);
7713
7714         if (err)
7715                 return err;
7716
7717         return cnt;
7718 }
7719
7720 /*
7721  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7722  * @filp: The active open file structure
7723  * @ubuf: The userspace provided buffer to read value into
7724  * @cnt: The maximum number of bytes to read
7725  * @ppos: The current "file" position
7726  *
7727  * This function implements the read interface for a struct trace_min_max_param.
7728  * The filp->private_data must point to a trace_min_max_param struct with valid
7729  * data.
7730  */
7731 static ssize_t
7732 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7733 {
7734         struct trace_min_max_param *param = filp->private_data;
7735         char buf[U64_STR_SIZE];
7736         int len;
7737         u64 val;
7738
7739         if (!param)
7740                 return -EFAULT;
7741
7742         val = *param->val;
7743
7744         if (cnt > sizeof(buf))
7745                 cnt = sizeof(buf);
7746
7747         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7748
7749         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7750 }
7751
7752 const struct file_operations trace_min_max_fops = {
7753         .open           = tracing_open_generic,
7754         .read           = trace_min_max_read,
7755         .write          = trace_min_max_write,
7756 };
7757
7758 #define TRACING_LOG_ERRS_MAX    8
7759 #define TRACING_LOG_LOC_MAX     128
7760
7761 #define CMD_PREFIX "  Command: "
7762
7763 struct err_info {
7764         const char      **errs; /* ptr to loc-specific array of err strings */
7765         u8              type;   /* index into errs -> specific err string */
7766         u16             pos;    /* caret position */
7767         u64             ts;
7768 };
7769
7770 struct tracing_log_err {
7771         struct list_head        list;
7772         struct err_info         info;
7773         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7774         char                    *cmd;                     /* what caused err */
7775 };
7776
7777 static DEFINE_MUTEX(tracing_err_log_lock);
7778
7779 static struct tracing_log_err *alloc_tracing_log_err(int len)
7780 {
7781         struct tracing_log_err *err;
7782
7783         err = kzalloc(sizeof(*err), GFP_KERNEL);
7784         if (!err)
7785                 return ERR_PTR(-ENOMEM);
7786
7787         err->cmd = kzalloc(len, GFP_KERNEL);
7788         if (!err->cmd) {
7789                 kfree(err);
7790                 return ERR_PTR(-ENOMEM);
7791         }
7792
7793         return err;
7794 }
7795
7796 static void free_tracing_log_err(struct tracing_log_err *err)
7797 {
7798         kfree(err->cmd);
7799         kfree(err);
7800 }
7801
7802 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7803                                                    int len)
7804 {
7805         struct tracing_log_err *err;
7806
7807         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7808                 err = alloc_tracing_log_err(len);
7809                 if (PTR_ERR(err) != -ENOMEM)
7810                         tr->n_err_log_entries++;
7811
7812                 return err;
7813         }
7814
7815         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7816         kfree(err->cmd);
7817         err->cmd = kzalloc(len, GFP_KERNEL);
7818         if (!err->cmd)
7819                 return ERR_PTR(-ENOMEM);
7820         list_del(&err->list);
7821
7822         return err;
7823 }
7824
7825 /**
7826  * err_pos - find the position of a string within a command for error careting
7827  * @cmd: The tracing command that caused the error
7828  * @str: The string to position the caret at within @cmd
7829  *
7830  * Finds the position of the first occurrence of @str within @cmd.  The
7831  * return value can be passed to tracing_log_err() for caret placement
7832  * within @cmd.
7833  *
7834  * Returns the index within @cmd of the first occurrence of @str or 0
7835  * if @str was not found.
7836  */
7837 unsigned int err_pos(char *cmd, const char *str)
7838 {
7839         char *found;
7840
7841         if (WARN_ON(!strlen(cmd)))
7842                 return 0;
7843
7844         found = strstr(cmd, str);
7845         if (found)
7846                 return found - cmd;
7847
7848         return 0;
7849 }
7850
7851 /**
7852  * tracing_log_err - write an error to the tracing error log
7853  * @tr: The associated trace array for the error (NULL for top level array)
7854  * @loc: A string describing where the error occurred
7855  * @cmd: The tracing command that caused the error
7856  * @errs: The array of loc-specific static error strings
7857  * @type: The index into errs[], which produces the specific static err string
7858  * @pos: The position the caret should be placed in the cmd
7859  *
7860  * Writes an error into tracing/error_log of the form:
7861  *
7862  * <loc>: error: <text>
7863  *   Command: <cmd>
7864  *              ^
7865  *
7866  * tracing/error_log is a small log file containing the last
7867  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7868  * unless there has been a tracing error, and the error log can be
7869  * cleared and have its memory freed by writing the empty string in
7870  * truncation mode to it i.e. echo > tracing/error_log.
7871  *
7872  * NOTE: the @errs array along with the @type param are used to
7873  * produce a static error string - this string is not copied and saved
7874  * when the error is logged - only a pointer to it is saved.  See
7875  * existing callers for examples of how static strings are typically
7876  * defined for use with tracing_log_err().
7877  */
7878 void tracing_log_err(struct trace_array *tr,
7879                      const char *loc, const char *cmd,
7880                      const char **errs, u8 type, u16 pos)
7881 {
7882         struct tracing_log_err *err;
7883         int len = 0;
7884
7885         if (!tr)
7886                 tr = &global_trace;
7887
7888         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7889
7890         mutex_lock(&tracing_err_log_lock);
7891         err = get_tracing_log_err(tr, len);
7892         if (PTR_ERR(err) == -ENOMEM) {
7893                 mutex_unlock(&tracing_err_log_lock);
7894                 return;
7895         }
7896
7897         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7898         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7899
7900         err->info.errs = errs;
7901         err->info.type = type;
7902         err->info.pos = pos;
7903         err->info.ts = local_clock();
7904
7905         list_add_tail(&err->list, &tr->err_log);
7906         mutex_unlock(&tracing_err_log_lock);
7907 }
7908
7909 static void clear_tracing_err_log(struct trace_array *tr)
7910 {
7911         struct tracing_log_err *err, *next;
7912
7913         mutex_lock(&tracing_err_log_lock);
7914         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7915                 list_del(&err->list);
7916                 free_tracing_log_err(err);
7917         }
7918
7919         tr->n_err_log_entries = 0;
7920         mutex_unlock(&tracing_err_log_lock);
7921 }
7922
7923 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7924 {
7925         struct trace_array *tr = m->private;
7926
7927         mutex_lock(&tracing_err_log_lock);
7928
7929         return seq_list_start(&tr->err_log, *pos);
7930 }
7931
7932 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7933 {
7934         struct trace_array *tr = m->private;
7935
7936         return seq_list_next(v, &tr->err_log, pos);
7937 }
7938
7939 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7940 {
7941         mutex_unlock(&tracing_err_log_lock);
7942 }
7943
7944 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7945 {
7946         u16 i;
7947
7948         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7949                 seq_putc(m, ' ');
7950         for (i = 0; i < pos; i++)
7951                 seq_putc(m, ' ');
7952         seq_puts(m, "^\n");
7953 }
7954
7955 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7956 {
7957         struct tracing_log_err *err = v;
7958
7959         if (err) {
7960                 const char *err_text = err->info.errs[err->info.type];
7961                 u64 sec = err->info.ts;
7962                 u32 nsec;
7963
7964                 nsec = do_div(sec, NSEC_PER_SEC);
7965                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7966                            err->loc, err_text);
7967                 seq_printf(m, "%s", err->cmd);
7968                 tracing_err_log_show_pos(m, err->info.pos);
7969         }
7970
7971         return 0;
7972 }
7973
7974 static const struct seq_operations tracing_err_log_seq_ops = {
7975         .start  = tracing_err_log_seq_start,
7976         .next   = tracing_err_log_seq_next,
7977         .stop   = tracing_err_log_seq_stop,
7978         .show   = tracing_err_log_seq_show
7979 };
7980
7981 static int tracing_err_log_open(struct inode *inode, struct file *file)
7982 {
7983         struct trace_array *tr = inode->i_private;
7984         int ret = 0;
7985
7986         ret = tracing_check_open_get_tr(tr);
7987         if (ret)
7988                 return ret;
7989
7990         /* If this file was opened for write, then erase contents */
7991         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7992                 clear_tracing_err_log(tr);
7993
7994         if (file->f_mode & FMODE_READ) {
7995                 ret = seq_open(file, &tracing_err_log_seq_ops);
7996                 if (!ret) {
7997                         struct seq_file *m = file->private_data;
7998                         m->private = tr;
7999                 } else {
8000                         trace_array_put(tr);
8001                 }
8002         }
8003         return ret;
8004 }
8005
8006 static ssize_t tracing_err_log_write(struct file *file,
8007                                      const char __user *buffer,
8008                                      size_t count, loff_t *ppos)
8009 {
8010         return count;
8011 }
8012
8013 static int tracing_err_log_release(struct inode *inode, struct file *file)
8014 {
8015         struct trace_array *tr = inode->i_private;
8016
8017         trace_array_put(tr);
8018
8019         if (file->f_mode & FMODE_READ)
8020                 seq_release(inode, file);
8021
8022         return 0;
8023 }
8024
8025 static const struct file_operations tracing_err_log_fops = {
8026         .open           = tracing_err_log_open,
8027         .write          = tracing_err_log_write,
8028         .read           = seq_read,
8029         .llseek         = seq_lseek,
8030         .release        = tracing_err_log_release,
8031 };
8032
8033 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8034 {
8035         struct trace_array *tr = inode->i_private;
8036         struct ftrace_buffer_info *info;
8037         int ret;
8038
8039         ret = tracing_check_open_get_tr(tr);
8040         if (ret)
8041                 return ret;
8042
8043         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8044         if (!info) {
8045                 trace_array_put(tr);
8046                 return -ENOMEM;
8047         }
8048
8049         mutex_lock(&trace_types_lock);
8050
8051         info->iter.tr           = tr;
8052         info->iter.cpu_file     = tracing_get_cpu(inode);
8053         info->iter.trace        = tr->current_trace;
8054         info->iter.array_buffer = &tr->array_buffer;
8055         info->spare             = NULL;
8056         /* Force reading ring buffer for first read */
8057         info->read              = (unsigned int)-1;
8058
8059         filp->private_data = info;
8060
8061         tr->trace_ref++;
8062
8063         mutex_unlock(&trace_types_lock);
8064
8065         ret = nonseekable_open(inode, filp);
8066         if (ret < 0)
8067                 trace_array_put(tr);
8068
8069         return ret;
8070 }
8071
8072 static __poll_t
8073 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8074 {
8075         struct ftrace_buffer_info *info = filp->private_data;
8076         struct trace_iterator *iter = &info->iter;
8077
8078         return trace_poll(iter, filp, poll_table);
8079 }
8080
8081 static ssize_t
8082 tracing_buffers_read(struct file *filp, char __user *ubuf,
8083                      size_t count, loff_t *ppos)
8084 {
8085         struct ftrace_buffer_info *info = filp->private_data;
8086         struct trace_iterator *iter = &info->iter;
8087         ssize_t ret = 0;
8088         ssize_t size;
8089
8090         if (!count)
8091                 return 0;
8092
8093 #ifdef CONFIG_TRACER_MAX_TRACE
8094         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8095                 return -EBUSY;
8096 #endif
8097
8098         if (!info->spare) {
8099                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8100                                                           iter->cpu_file);
8101                 if (IS_ERR(info->spare)) {
8102                         ret = PTR_ERR(info->spare);
8103                         info->spare = NULL;
8104                 } else {
8105                         info->spare_cpu = iter->cpu_file;
8106                 }
8107         }
8108         if (!info->spare)
8109                 return ret;
8110
8111         /* Do we have previous read data to read? */
8112         if (info->read < PAGE_SIZE)
8113                 goto read;
8114
8115  again:
8116         trace_access_lock(iter->cpu_file);
8117         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8118                                     &info->spare,
8119                                     count,
8120                                     iter->cpu_file, 0);
8121         trace_access_unlock(iter->cpu_file);
8122
8123         if (ret < 0) {
8124                 if (trace_empty(iter)) {
8125                         if ((filp->f_flags & O_NONBLOCK))
8126                                 return -EAGAIN;
8127
8128                         ret = wait_on_pipe(iter, 0);
8129                         if (ret)
8130                                 return ret;
8131
8132                         goto again;
8133                 }
8134                 return 0;
8135         }
8136
8137         info->read = 0;
8138  read:
8139         size = PAGE_SIZE - info->read;
8140         if (size > count)
8141                 size = count;
8142
8143         ret = copy_to_user(ubuf, info->spare + info->read, size);
8144         if (ret == size)
8145                 return -EFAULT;
8146
8147         size -= ret;
8148
8149         *ppos += size;
8150         info->read += size;
8151
8152         return size;
8153 }
8154
8155 static int tracing_buffers_release(struct inode *inode, struct file *file)
8156 {
8157         struct ftrace_buffer_info *info = file->private_data;
8158         struct trace_iterator *iter = &info->iter;
8159
8160         mutex_lock(&trace_types_lock);
8161
8162         iter->tr->trace_ref--;
8163
8164         __trace_array_put(iter->tr);
8165
8166         iter->wait_index++;
8167         /* Make sure the waiters see the new wait_index */
8168         smp_wmb();
8169
8170         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8171
8172         if (info->spare)
8173                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8174                                            info->spare_cpu, info->spare);
8175         kvfree(info);
8176
8177         mutex_unlock(&trace_types_lock);
8178
8179         return 0;
8180 }
8181
8182 struct buffer_ref {
8183         struct trace_buffer     *buffer;
8184         void                    *page;
8185         int                     cpu;
8186         refcount_t              refcount;
8187 };
8188
8189 static void buffer_ref_release(struct buffer_ref *ref)
8190 {
8191         if (!refcount_dec_and_test(&ref->refcount))
8192                 return;
8193         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8194         kfree(ref);
8195 }
8196
8197 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8198                                     struct pipe_buffer *buf)
8199 {
8200         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8201
8202         buffer_ref_release(ref);
8203         buf->private = 0;
8204 }
8205
8206 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8207                                 struct pipe_buffer *buf)
8208 {
8209         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8210
8211         if (refcount_read(&ref->refcount) > INT_MAX/2)
8212                 return false;
8213
8214         refcount_inc(&ref->refcount);
8215         return true;
8216 }
8217
8218 /* Pipe buffer operations for a buffer. */
8219 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8220         .release                = buffer_pipe_buf_release,
8221         .get                    = buffer_pipe_buf_get,
8222 };
8223
8224 /*
8225  * Callback from splice_to_pipe(), if we need to release some pages
8226  * at the end of the spd in case we error'ed out in filling the pipe.
8227  */
8228 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8229 {
8230         struct buffer_ref *ref =
8231                 (struct buffer_ref *)spd->partial[i].private;
8232
8233         buffer_ref_release(ref);
8234         spd->partial[i].private = 0;
8235 }
8236
8237 static ssize_t
8238 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8239                             struct pipe_inode_info *pipe, size_t len,
8240                             unsigned int flags)
8241 {
8242         struct ftrace_buffer_info *info = file->private_data;
8243         struct trace_iterator *iter = &info->iter;
8244         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8245         struct page *pages_def[PIPE_DEF_BUFFERS];
8246         struct splice_pipe_desc spd = {
8247                 .pages          = pages_def,
8248                 .partial        = partial_def,
8249                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8250                 .ops            = &buffer_pipe_buf_ops,
8251                 .spd_release    = buffer_spd_release,
8252         };
8253         struct buffer_ref *ref;
8254         int entries, i;
8255         ssize_t ret = 0;
8256
8257 #ifdef CONFIG_TRACER_MAX_TRACE
8258         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8259                 return -EBUSY;
8260 #endif
8261
8262         if (*ppos & (PAGE_SIZE - 1))
8263                 return -EINVAL;
8264
8265         if (len & (PAGE_SIZE - 1)) {
8266                 if (len < PAGE_SIZE)
8267                         return -EINVAL;
8268                 len &= PAGE_MASK;
8269         }
8270
8271         if (splice_grow_spd(pipe, &spd))
8272                 return -ENOMEM;
8273
8274  again:
8275         trace_access_lock(iter->cpu_file);
8276         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8277
8278         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8279                 struct page *page;
8280                 int r;
8281
8282                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8283                 if (!ref) {
8284                         ret = -ENOMEM;
8285                         break;
8286                 }
8287
8288                 refcount_set(&ref->refcount, 1);
8289                 ref->buffer = iter->array_buffer->buffer;
8290                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8291                 if (IS_ERR(ref->page)) {
8292                         ret = PTR_ERR(ref->page);
8293                         ref->page = NULL;
8294                         kfree(ref);
8295                         break;
8296                 }
8297                 ref->cpu = iter->cpu_file;
8298
8299                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8300                                           len, iter->cpu_file, 1);
8301                 if (r < 0) {
8302                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8303                                                    ref->page);
8304                         kfree(ref);
8305                         break;
8306                 }
8307
8308                 page = virt_to_page(ref->page);
8309
8310                 spd.pages[i] = page;
8311                 spd.partial[i].len = PAGE_SIZE;
8312                 spd.partial[i].offset = 0;
8313                 spd.partial[i].private = (unsigned long)ref;
8314                 spd.nr_pages++;
8315                 *ppos += PAGE_SIZE;
8316
8317                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8318         }
8319
8320         trace_access_unlock(iter->cpu_file);
8321         spd.nr_pages = i;
8322
8323         /* did we read anything? */
8324         if (!spd.nr_pages) {
8325                 long wait_index;
8326
8327                 if (ret)
8328                         goto out;
8329
8330                 ret = -EAGAIN;
8331                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8332                         goto out;
8333
8334                 wait_index = READ_ONCE(iter->wait_index);
8335
8336                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8337                 if (ret)
8338                         goto out;
8339
8340                 /* No need to wait after waking up when tracing is off */
8341                 if (!tracer_tracing_is_on(iter->tr))
8342                         goto out;
8343
8344                 /* Make sure we see the new wait_index */
8345                 smp_rmb();
8346                 if (wait_index != iter->wait_index)
8347                         goto out;
8348
8349                 goto again;
8350         }
8351
8352         ret = splice_to_pipe(pipe, &spd);
8353 out:
8354         splice_shrink_spd(&spd);
8355
8356         return ret;
8357 }
8358
8359 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8360 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8361 {
8362         struct ftrace_buffer_info *info = file->private_data;
8363         struct trace_iterator *iter = &info->iter;
8364
8365         if (cmd)
8366                 return -ENOIOCTLCMD;
8367
8368         mutex_lock(&trace_types_lock);
8369
8370         iter->wait_index++;
8371         /* Make sure the waiters see the new wait_index */
8372         smp_wmb();
8373
8374         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8375
8376         mutex_unlock(&trace_types_lock);
8377         return 0;
8378 }
8379
8380 static const struct file_operations tracing_buffers_fops = {
8381         .open           = tracing_buffers_open,
8382         .read           = tracing_buffers_read,
8383         .poll           = tracing_buffers_poll,
8384         .release        = tracing_buffers_release,
8385         .splice_read    = tracing_buffers_splice_read,
8386         .unlocked_ioctl = tracing_buffers_ioctl,
8387         .llseek         = no_llseek,
8388 };
8389
8390 static ssize_t
8391 tracing_stats_read(struct file *filp, char __user *ubuf,
8392                    size_t count, loff_t *ppos)
8393 {
8394         struct inode *inode = file_inode(filp);
8395         struct trace_array *tr = inode->i_private;
8396         struct array_buffer *trace_buf = &tr->array_buffer;
8397         int cpu = tracing_get_cpu(inode);
8398         struct trace_seq *s;
8399         unsigned long cnt;
8400         unsigned long long t;
8401         unsigned long usec_rem;
8402
8403         s = kmalloc(sizeof(*s), GFP_KERNEL);
8404         if (!s)
8405                 return -ENOMEM;
8406
8407         trace_seq_init(s);
8408
8409         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8410         trace_seq_printf(s, "entries: %ld\n", cnt);
8411
8412         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8413         trace_seq_printf(s, "overrun: %ld\n", cnt);
8414
8415         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8416         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8417
8418         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8419         trace_seq_printf(s, "bytes: %ld\n", cnt);
8420
8421         if (trace_clocks[tr->clock_id].in_ns) {
8422                 /* local or global for trace_clock */
8423                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8424                 usec_rem = do_div(t, USEC_PER_SEC);
8425                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8426                                                                 t, usec_rem);
8427
8428                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8429                 usec_rem = do_div(t, USEC_PER_SEC);
8430                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8431         } else {
8432                 /* counter or tsc mode for trace_clock */
8433                 trace_seq_printf(s, "oldest event ts: %llu\n",
8434                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8435
8436                 trace_seq_printf(s, "now ts: %llu\n",
8437                                 ring_buffer_time_stamp(trace_buf->buffer));
8438         }
8439
8440         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8441         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8442
8443         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8444         trace_seq_printf(s, "read events: %ld\n", cnt);
8445
8446         count = simple_read_from_buffer(ubuf, count, ppos,
8447                                         s->buffer, trace_seq_used(s));
8448
8449         kfree(s);
8450
8451         return count;
8452 }
8453
8454 static const struct file_operations tracing_stats_fops = {
8455         .open           = tracing_open_generic_tr,
8456         .read           = tracing_stats_read,
8457         .llseek         = generic_file_llseek,
8458         .release        = tracing_release_generic_tr,
8459 };
8460
8461 #ifdef CONFIG_DYNAMIC_FTRACE
8462
8463 static ssize_t
8464 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8465                   size_t cnt, loff_t *ppos)
8466 {
8467         ssize_t ret;
8468         char *buf;
8469         int r;
8470
8471         /* 256 should be plenty to hold the amount needed */
8472         buf = kmalloc(256, GFP_KERNEL);
8473         if (!buf)
8474                 return -ENOMEM;
8475
8476         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8477                       ftrace_update_tot_cnt,
8478                       ftrace_number_of_pages,
8479                       ftrace_number_of_groups);
8480
8481         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8482         kfree(buf);
8483         return ret;
8484 }
8485
8486 static const struct file_operations tracing_dyn_info_fops = {
8487         .open           = tracing_open_generic,
8488         .read           = tracing_read_dyn_info,
8489         .llseek         = generic_file_llseek,
8490 };
8491 #endif /* CONFIG_DYNAMIC_FTRACE */
8492
8493 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8494 static void
8495 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8496                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8497                 void *data)
8498 {
8499         tracing_snapshot_instance(tr);
8500 }
8501
8502 static void
8503 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8504                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8505                       void *data)
8506 {
8507         struct ftrace_func_mapper *mapper = data;
8508         long *count = NULL;
8509
8510         if (mapper)
8511                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8512
8513         if (count) {
8514
8515                 if (*count <= 0)
8516                         return;
8517
8518                 (*count)--;
8519         }
8520
8521         tracing_snapshot_instance(tr);
8522 }
8523
8524 static int
8525 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8526                       struct ftrace_probe_ops *ops, void *data)
8527 {
8528         struct ftrace_func_mapper *mapper = data;
8529         long *count = NULL;
8530
8531         seq_printf(m, "%ps:", (void *)ip);
8532
8533         seq_puts(m, "snapshot");
8534
8535         if (mapper)
8536                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8537
8538         if (count)
8539                 seq_printf(m, ":count=%ld\n", *count);
8540         else
8541                 seq_puts(m, ":unlimited\n");
8542
8543         return 0;
8544 }
8545
8546 static int
8547 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8548                      unsigned long ip, void *init_data, void **data)
8549 {
8550         struct ftrace_func_mapper *mapper = *data;
8551
8552         if (!mapper) {
8553                 mapper = allocate_ftrace_func_mapper();
8554                 if (!mapper)
8555                         return -ENOMEM;
8556                 *data = mapper;
8557         }
8558
8559         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8560 }
8561
8562 static void
8563 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8564                      unsigned long ip, void *data)
8565 {
8566         struct ftrace_func_mapper *mapper = data;
8567
8568         if (!ip) {
8569                 if (!mapper)
8570                         return;
8571                 free_ftrace_func_mapper(mapper, NULL);
8572                 return;
8573         }
8574
8575         ftrace_func_mapper_remove_ip(mapper, ip);
8576 }
8577
8578 static struct ftrace_probe_ops snapshot_probe_ops = {
8579         .func                   = ftrace_snapshot,
8580         .print                  = ftrace_snapshot_print,
8581 };
8582
8583 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8584         .func                   = ftrace_count_snapshot,
8585         .print                  = ftrace_snapshot_print,
8586         .init                   = ftrace_snapshot_init,
8587         .free                   = ftrace_snapshot_free,
8588 };
8589
8590 static int
8591 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8592                                char *glob, char *cmd, char *param, int enable)
8593 {
8594         struct ftrace_probe_ops *ops;
8595         void *count = (void *)-1;
8596         char *number;
8597         int ret;
8598
8599         if (!tr)
8600                 return -ENODEV;
8601
8602         /* hash funcs only work with set_ftrace_filter */
8603         if (!enable)
8604                 return -EINVAL;
8605
8606         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8607
8608         if (glob[0] == '!')
8609                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8610
8611         if (!param)
8612                 goto out_reg;
8613
8614         number = strsep(&param, ":");
8615
8616         if (!strlen(number))
8617                 goto out_reg;
8618
8619         /*
8620          * We use the callback data field (which is a pointer)
8621          * as our counter.
8622          */
8623         ret = kstrtoul(number, 0, (unsigned long *)&count);
8624         if (ret)
8625                 return ret;
8626
8627  out_reg:
8628         ret = tracing_alloc_snapshot_instance(tr);
8629         if (ret < 0)
8630                 goto out;
8631
8632         ret = register_ftrace_function_probe(glob, tr, ops, count);
8633
8634  out:
8635         return ret < 0 ? ret : 0;
8636 }
8637
8638 static struct ftrace_func_command ftrace_snapshot_cmd = {
8639         .name                   = "snapshot",
8640         .func                   = ftrace_trace_snapshot_callback,
8641 };
8642
8643 static __init int register_snapshot_cmd(void)
8644 {
8645         return register_ftrace_command(&ftrace_snapshot_cmd);
8646 }
8647 #else
8648 static inline __init int register_snapshot_cmd(void) { return 0; }
8649 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8650
8651 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8652 {
8653         if (WARN_ON(!tr->dir))
8654                 return ERR_PTR(-ENODEV);
8655
8656         /* Top directory uses NULL as the parent */
8657         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8658                 return NULL;
8659
8660         /* All sub buffers have a descriptor */
8661         return tr->dir;
8662 }
8663
8664 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8665 {
8666         struct dentry *d_tracer;
8667
8668         if (tr->percpu_dir)
8669                 return tr->percpu_dir;
8670
8671         d_tracer = tracing_get_dentry(tr);
8672         if (IS_ERR(d_tracer))
8673                 return NULL;
8674
8675         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8676
8677         MEM_FAIL(!tr->percpu_dir,
8678                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8679
8680         return tr->percpu_dir;
8681 }
8682
8683 static struct dentry *
8684 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8685                       void *data, long cpu, const struct file_operations *fops)
8686 {
8687         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8688
8689         if (ret) /* See tracing_get_cpu() */
8690                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8691         return ret;
8692 }
8693
8694 static void
8695 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8696 {
8697         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8698         struct dentry *d_cpu;
8699         char cpu_dir[30]; /* 30 characters should be more than enough */
8700
8701         if (!d_percpu)
8702                 return;
8703
8704         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8705         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8706         if (!d_cpu) {
8707                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8708                 return;
8709         }
8710
8711         /* per cpu trace_pipe */
8712         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8713                                 tr, cpu, &tracing_pipe_fops);
8714
8715         /* per cpu trace */
8716         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8717                                 tr, cpu, &tracing_fops);
8718
8719         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8720                                 tr, cpu, &tracing_buffers_fops);
8721
8722         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8723                                 tr, cpu, &tracing_stats_fops);
8724
8725         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8726                                 tr, cpu, &tracing_entries_fops);
8727
8728 #ifdef CONFIG_TRACER_SNAPSHOT
8729         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8730                                 tr, cpu, &snapshot_fops);
8731
8732         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8733                                 tr, cpu, &snapshot_raw_fops);
8734 #endif
8735 }
8736
8737 #ifdef CONFIG_FTRACE_SELFTEST
8738 /* Let selftest have access to static functions in this file */
8739 #include "trace_selftest.c"
8740 #endif
8741
8742 static ssize_t
8743 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8744                         loff_t *ppos)
8745 {
8746         struct trace_option_dentry *topt = filp->private_data;
8747         char *buf;
8748
8749         if (topt->flags->val & topt->opt->bit)
8750                 buf = "1\n";
8751         else
8752                 buf = "0\n";
8753
8754         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8755 }
8756
8757 static ssize_t
8758 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8759                          loff_t *ppos)
8760 {
8761         struct trace_option_dentry *topt = filp->private_data;
8762         unsigned long val;
8763         int ret;
8764
8765         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8766         if (ret)
8767                 return ret;
8768
8769         if (val != 0 && val != 1)
8770                 return -EINVAL;
8771
8772         if (!!(topt->flags->val & topt->opt->bit) != val) {
8773                 mutex_lock(&trace_types_lock);
8774                 ret = __set_tracer_option(topt->tr, topt->flags,
8775                                           topt->opt, !val);
8776                 mutex_unlock(&trace_types_lock);
8777                 if (ret)
8778                         return ret;
8779         }
8780
8781         *ppos += cnt;
8782
8783         return cnt;
8784 }
8785
8786
8787 static const struct file_operations trace_options_fops = {
8788         .open = tracing_open_generic,
8789         .read = trace_options_read,
8790         .write = trace_options_write,
8791         .llseek = generic_file_llseek,
8792 };
8793
8794 /*
8795  * In order to pass in both the trace_array descriptor as well as the index
8796  * to the flag that the trace option file represents, the trace_array
8797  * has a character array of trace_flags_index[], which holds the index
8798  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8799  * The address of this character array is passed to the flag option file
8800  * read/write callbacks.
8801  *
8802  * In order to extract both the index and the trace_array descriptor,
8803  * get_tr_index() uses the following algorithm.
8804  *
8805  *   idx = *ptr;
8806  *
8807  * As the pointer itself contains the address of the index (remember
8808  * index[1] == 1).
8809  *
8810  * Then to get the trace_array descriptor, by subtracting that index
8811  * from the ptr, we get to the start of the index itself.
8812  *
8813  *   ptr - idx == &index[0]
8814  *
8815  * Then a simple container_of() from that pointer gets us to the
8816  * trace_array descriptor.
8817  */
8818 static void get_tr_index(void *data, struct trace_array **ptr,
8819                          unsigned int *pindex)
8820 {
8821         *pindex = *(unsigned char *)data;
8822
8823         *ptr = container_of(data - *pindex, struct trace_array,
8824                             trace_flags_index);
8825 }
8826
8827 static ssize_t
8828 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8829                         loff_t *ppos)
8830 {
8831         void *tr_index = filp->private_data;
8832         struct trace_array *tr;
8833         unsigned int index;
8834         char *buf;
8835
8836         get_tr_index(tr_index, &tr, &index);
8837
8838         if (tr->trace_flags & (1 << index))
8839                 buf = "1\n";
8840         else
8841                 buf = "0\n";
8842
8843         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8844 }
8845
8846 static ssize_t
8847 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8848                          loff_t *ppos)
8849 {
8850         void *tr_index = filp->private_data;
8851         struct trace_array *tr;
8852         unsigned int index;
8853         unsigned long val;
8854         int ret;
8855
8856         get_tr_index(tr_index, &tr, &index);
8857
8858         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8859         if (ret)
8860                 return ret;
8861
8862         if (val != 0 && val != 1)
8863                 return -EINVAL;
8864
8865         mutex_lock(&event_mutex);
8866         mutex_lock(&trace_types_lock);
8867         ret = set_tracer_flag(tr, 1 << index, val);
8868         mutex_unlock(&trace_types_lock);
8869         mutex_unlock(&event_mutex);
8870
8871         if (ret < 0)
8872                 return ret;
8873
8874         *ppos += cnt;
8875
8876         return cnt;
8877 }
8878
8879 static const struct file_operations trace_options_core_fops = {
8880         .open = tracing_open_generic,
8881         .read = trace_options_core_read,
8882         .write = trace_options_core_write,
8883         .llseek = generic_file_llseek,
8884 };
8885
8886 struct dentry *trace_create_file(const char *name,
8887                                  umode_t mode,
8888                                  struct dentry *parent,
8889                                  void *data,
8890                                  const struct file_operations *fops)
8891 {
8892         struct dentry *ret;
8893
8894         ret = tracefs_create_file(name, mode, parent, data, fops);
8895         if (!ret)
8896                 pr_warn("Could not create tracefs '%s' entry\n", name);
8897
8898         return ret;
8899 }
8900
8901
8902 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8903 {
8904         struct dentry *d_tracer;
8905
8906         if (tr->options)
8907                 return tr->options;
8908
8909         d_tracer = tracing_get_dentry(tr);
8910         if (IS_ERR(d_tracer))
8911                 return NULL;
8912
8913         tr->options = tracefs_create_dir("options", d_tracer);
8914         if (!tr->options) {
8915                 pr_warn("Could not create tracefs directory 'options'\n");
8916                 return NULL;
8917         }
8918
8919         return tr->options;
8920 }
8921
8922 static void
8923 create_trace_option_file(struct trace_array *tr,
8924                          struct trace_option_dentry *topt,
8925                          struct tracer_flags *flags,
8926                          struct tracer_opt *opt)
8927 {
8928         struct dentry *t_options;
8929
8930         t_options = trace_options_init_dentry(tr);
8931         if (!t_options)
8932                 return;
8933
8934         topt->flags = flags;
8935         topt->opt = opt;
8936         topt->tr = tr;
8937
8938         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8939                                         t_options, topt, &trace_options_fops);
8940
8941 }
8942
8943 static void
8944 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8945 {
8946         struct trace_option_dentry *topts;
8947         struct trace_options *tr_topts;
8948         struct tracer_flags *flags;
8949         struct tracer_opt *opts;
8950         int cnt;
8951         int i;
8952
8953         if (!tracer)
8954                 return;
8955
8956         flags = tracer->flags;
8957
8958         if (!flags || !flags->opts)
8959                 return;
8960
8961         /*
8962          * If this is an instance, only create flags for tracers
8963          * the instance may have.
8964          */
8965         if (!trace_ok_for_array(tracer, tr))
8966                 return;
8967
8968         for (i = 0; i < tr->nr_topts; i++) {
8969                 /* Make sure there's no duplicate flags. */
8970                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8971                         return;
8972         }
8973
8974         opts = flags->opts;
8975
8976         for (cnt = 0; opts[cnt].name; cnt++)
8977                 ;
8978
8979         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8980         if (!topts)
8981                 return;
8982
8983         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8984                             GFP_KERNEL);
8985         if (!tr_topts) {
8986                 kfree(topts);
8987                 return;
8988         }
8989
8990         tr->topts = tr_topts;
8991         tr->topts[tr->nr_topts].tracer = tracer;
8992         tr->topts[tr->nr_topts].topts = topts;
8993         tr->nr_topts++;
8994
8995         for (cnt = 0; opts[cnt].name; cnt++) {
8996                 create_trace_option_file(tr, &topts[cnt], flags,
8997                                          &opts[cnt]);
8998                 MEM_FAIL(topts[cnt].entry == NULL,
8999                           "Failed to create trace option: %s",
9000                           opts[cnt].name);
9001         }
9002 }
9003
9004 static struct dentry *
9005 create_trace_option_core_file(struct trace_array *tr,
9006                               const char *option, long index)
9007 {
9008         struct dentry *t_options;
9009
9010         t_options = trace_options_init_dentry(tr);
9011         if (!t_options)
9012                 return NULL;
9013
9014         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9015                                  (void *)&tr->trace_flags_index[index],
9016                                  &trace_options_core_fops);
9017 }
9018
9019 static void create_trace_options_dir(struct trace_array *tr)
9020 {
9021         struct dentry *t_options;
9022         bool top_level = tr == &global_trace;
9023         int i;
9024
9025         t_options = trace_options_init_dentry(tr);
9026         if (!t_options)
9027                 return;
9028
9029         for (i = 0; trace_options[i]; i++) {
9030                 if (top_level ||
9031                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9032                         create_trace_option_core_file(tr, trace_options[i], i);
9033         }
9034 }
9035
9036 static ssize_t
9037 rb_simple_read(struct file *filp, char __user *ubuf,
9038                size_t cnt, loff_t *ppos)
9039 {
9040         struct trace_array *tr = filp->private_data;
9041         char buf[64];
9042         int r;
9043
9044         r = tracer_tracing_is_on(tr);
9045         r = sprintf(buf, "%d\n", r);
9046
9047         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9048 }
9049
9050 static ssize_t
9051 rb_simple_write(struct file *filp, const char __user *ubuf,
9052                 size_t cnt, loff_t *ppos)
9053 {
9054         struct trace_array *tr = filp->private_data;
9055         struct trace_buffer *buffer = tr->array_buffer.buffer;
9056         unsigned long val;
9057         int ret;
9058
9059         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9060         if (ret)
9061                 return ret;
9062
9063         if (buffer) {
9064                 mutex_lock(&trace_types_lock);
9065                 if (!!val == tracer_tracing_is_on(tr)) {
9066                         val = 0; /* do nothing */
9067                 } else if (val) {
9068                         tracer_tracing_on(tr);
9069                         if (tr->current_trace->start)
9070                                 tr->current_trace->start(tr);
9071                 } else {
9072                         tracer_tracing_off(tr);
9073                         if (tr->current_trace->stop)
9074                                 tr->current_trace->stop(tr);
9075                         /* Wake up any waiters */
9076                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9077                 }
9078                 mutex_unlock(&trace_types_lock);
9079         }
9080
9081         (*ppos)++;
9082
9083         return cnt;
9084 }
9085
9086 static const struct file_operations rb_simple_fops = {
9087         .open           = tracing_open_generic_tr,
9088         .read           = rb_simple_read,
9089         .write          = rb_simple_write,
9090         .release        = tracing_release_generic_tr,
9091         .llseek         = default_llseek,
9092 };
9093
9094 static ssize_t
9095 buffer_percent_read(struct file *filp, char __user *ubuf,
9096                     size_t cnt, loff_t *ppos)
9097 {
9098         struct trace_array *tr = filp->private_data;
9099         char buf[64];
9100         int r;
9101
9102         r = tr->buffer_percent;
9103         r = sprintf(buf, "%d\n", r);
9104
9105         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9106 }
9107
9108 static ssize_t
9109 buffer_percent_write(struct file *filp, const char __user *ubuf,
9110                      size_t cnt, loff_t *ppos)
9111 {
9112         struct trace_array *tr = filp->private_data;
9113         unsigned long val;
9114         int ret;
9115
9116         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9117         if (ret)
9118                 return ret;
9119
9120         if (val > 100)
9121                 return -EINVAL;
9122
9123         if (!val)
9124                 val = 1;
9125
9126         tr->buffer_percent = val;
9127
9128         (*ppos)++;
9129
9130         return cnt;
9131 }
9132
9133 static const struct file_operations buffer_percent_fops = {
9134         .open           = tracing_open_generic_tr,
9135         .read           = buffer_percent_read,
9136         .write          = buffer_percent_write,
9137         .release        = tracing_release_generic_tr,
9138         .llseek         = default_llseek,
9139 };
9140
9141 static struct dentry *trace_instance_dir;
9142
9143 static void
9144 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9145
9146 static int
9147 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9148 {
9149         enum ring_buffer_flags rb_flags;
9150
9151         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9152
9153         buf->tr = tr;
9154
9155         buf->buffer = ring_buffer_alloc(size, rb_flags);
9156         if (!buf->buffer)
9157                 return -ENOMEM;
9158
9159         buf->data = alloc_percpu(struct trace_array_cpu);
9160         if (!buf->data) {
9161                 ring_buffer_free(buf->buffer);
9162                 buf->buffer = NULL;
9163                 return -ENOMEM;
9164         }
9165
9166         /* Allocate the first page for all buffers */
9167         set_buffer_entries(&tr->array_buffer,
9168                            ring_buffer_size(tr->array_buffer.buffer, 0));
9169
9170         return 0;
9171 }
9172
9173 static void free_trace_buffer(struct array_buffer *buf)
9174 {
9175         if (buf->buffer) {
9176                 ring_buffer_free(buf->buffer);
9177                 buf->buffer = NULL;
9178                 free_percpu(buf->data);
9179                 buf->data = NULL;
9180         }
9181 }
9182
9183 static int allocate_trace_buffers(struct trace_array *tr, int size)
9184 {
9185         int ret;
9186
9187         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9188         if (ret)
9189                 return ret;
9190
9191 #ifdef CONFIG_TRACER_MAX_TRACE
9192         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9193                                     allocate_snapshot ? size : 1);
9194         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9195                 free_trace_buffer(&tr->array_buffer);
9196                 return -ENOMEM;
9197         }
9198         tr->allocated_snapshot = allocate_snapshot;
9199
9200         /*
9201          * Only the top level trace array gets its snapshot allocated
9202          * from the kernel command line.
9203          */
9204         allocate_snapshot = false;
9205 #endif
9206
9207         return 0;
9208 }
9209
9210 static void free_trace_buffers(struct trace_array *tr)
9211 {
9212         if (!tr)
9213                 return;
9214
9215         free_trace_buffer(&tr->array_buffer);
9216
9217 #ifdef CONFIG_TRACER_MAX_TRACE
9218         free_trace_buffer(&tr->max_buffer);
9219 #endif
9220 }
9221
9222 static void init_trace_flags_index(struct trace_array *tr)
9223 {
9224         int i;
9225
9226         /* Used by the trace options files */
9227         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9228                 tr->trace_flags_index[i] = i;
9229 }
9230
9231 static void __update_tracer_options(struct trace_array *tr)
9232 {
9233         struct tracer *t;
9234
9235         for (t = trace_types; t; t = t->next)
9236                 add_tracer_options(tr, t);
9237 }
9238
9239 static void update_tracer_options(struct trace_array *tr)
9240 {
9241         mutex_lock(&trace_types_lock);
9242         tracer_options_updated = true;
9243         __update_tracer_options(tr);
9244         mutex_unlock(&trace_types_lock);
9245 }
9246
9247 /* Must have trace_types_lock held */
9248 struct trace_array *trace_array_find(const char *instance)
9249 {
9250         struct trace_array *tr, *found = NULL;
9251
9252         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9253                 if (tr->name && strcmp(tr->name, instance) == 0) {
9254                         found = tr;
9255                         break;
9256                 }
9257         }
9258
9259         return found;
9260 }
9261
9262 struct trace_array *trace_array_find_get(const char *instance)
9263 {
9264         struct trace_array *tr;
9265
9266         mutex_lock(&trace_types_lock);
9267         tr = trace_array_find(instance);
9268         if (tr)
9269                 tr->ref++;
9270         mutex_unlock(&trace_types_lock);
9271
9272         return tr;
9273 }
9274
9275 static int trace_array_create_dir(struct trace_array *tr)
9276 {
9277         int ret;
9278
9279         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9280         if (!tr->dir)
9281                 return -EINVAL;
9282
9283         ret = event_trace_add_tracer(tr->dir, tr);
9284         if (ret) {
9285                 tracefs_remove(tr->dir);
9286                 return ret;
9287         }
9288
9289         init_tracer_tracefs(tr, tr->dir);
9290         __update_tracer_options(tr);
9291
9292         return ret;
9293 }
9294
9295 static struct trace_array *trace_array_create(const char *name)
9296 {
9297         struct trace_array *tr;
9298         int ret;
9299
9300         ret = -ENOMEM;
9301         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9302         if (!tr)
9303                 return ERR_PTR(ret);
9304
9305         tr->name = kstrdup(name, GFP_KERNEL);
9306         if (!tr->name)
9307                 goto out_free_tr;
9308
9309         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9310                 goto out_free_tr;
9311
9312         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9313
9314         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9315
9316         raw_spin_lock_init(&tr->start_lock);
9317
9318         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9319
9320         tr->current_trace = &nop_trace;
9321
9322         INIT_LIST_HEAD(&tr->systems);
9323         INIT_LIST_HEAD(&tr->events);
9324         INIT_LIST_HEAD(&tr->hist_vars);
9325         INIT_LIST_HEAD(&tr->err_log);
9326
9327         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9328                 goto out_free_tr;
9329
9330         if (ftrace_allocate_ftrace_ops(tr) < 0)
9331                 goto out_free_tr;
9332
9333         ftrace_init_trace_array(tr);
9334
9335         init_trace_flags_index(tr);
9336
9337         if (trace_instance_dir) {
9338                 ret = trace_array_create_dir(tr);
9339                 if (ret)
9340                         goto out_free_tr;
9341         } else
9342                 __trace_early_add_events(tr);
9343
9344         list_add(&tr->list, &ftrace_trace_arrays);
9345
9346         tr->ref++;
9347
9348         return tr;
9349
9350  out_free_tr:
9351         ftrace_free_ftrace_ops(tr);
9352         free_trace_buffers(tr);
9353         free_cpumask_var(tr->tracing_cpumask);
9354         kfree(tr->name);
9355         kfree(tr);
9356
9357         return ERR_PTR(ret);
9358 }
9359
9360 static int instance_mkdir(const char *name)
9361 {
9362         struct trace_array *tr;
9363         int ret;
9364
9365         mutex_lock(&event_mutex);
9366         mutex_lock(&trace_types_lock);
9367
9368         ret = -EEXIST;
9369         if (trace_array_find(name))
9370                 goto out_unlock;
9371
9372         tr = trace_array_create(name);
9373
9374         ret = PTR_ERR_OR_ZERO(tr);
9375
9376 out_unlock:
9377         mutex_unlock(&trace_types_lock);
9378         mutex_unlock(&event_mutex);
9379         return ret;
9380 }
9381
9382 /**
9383  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9384  * @name: The name of the trace array to be looked up/created.
9385  *
9386  * Returns pointer to trace array with given name.
9387  * NULL, if it cannot be created.
9388  *
9389  * NOTE: This function increments the reference counter associated with the
9390  * trace array returned. This makes sure it cannot be freed while in use.
9391  * Use trace_array_put() once the trace array is no longer needed.
9392  * If the trace_array is to be freed, trace_array_destroy() needs to
9393  * be called after the trace_array_put(), or simply let user space delete
9394  * it from the tracefs instances directory. But until the
9395  * trace_array_put() is called, user space can not delete it.
9396  *
9397  */
9398 struct trace_array *trace_array_get_by_name(const char *name)
9399 {
9400         struct trace_array *tr;
9401
9402         mutex_lock(&event_mutex);
9403         mutex_lock(&trace_types_lock);
9404
9405         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9406                 if (tr->name && strcmp(tr->name, name) == 0)
9407                         goto out_unlock;
9408         }
9409
9410         tr = trace_array_create(name);
9411
9412         if (IS_ERR(tr))
9413                 tr = NULL;
9414 out_unlock:
9415         if (tr)
9416                 tr->ref++;
9417
9418         mutex_unlock(&trace_types_lock);
9419         mutex_unlock(&event_mutex);
9420         return tr;
9421 }
9422 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9423
9424 static int __remove_instance(struct trace_array *tr)
9425 {
9426         int i;
9427
9428         /* Reference counter for a newly created trace array = 1. */
9429         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9430                 return -EBUSY;
9431
9432         list_del(&tr->list);
9433
9434         /* Disable all the flags that were enabled coming in */
9435         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9436                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9437                         set_tracer_flag(tr, 1 << i, 0);
9438         }
9439
9440         tracing_set_nop(tr);
9441         clear_ftrace_function_probes(tr);
9442         event_trace_del_tracer(tr);
9443         ftrace_clear_pids(tr);
9444         ftrace_destroy_function_files(tr);
9445         tracefs_remove(tr->dir);
9446         free_percpu(tr->last_func_repeats);
9447         free_trace_buffers(tr);
9448
9449         for (i = 0; i < tr->nr_topts; i++) {
9450                 kfree(tr->topts[i].topts);
9451         }
9452         kfree(tr->topts);
9453
9454         free_cpumask_var(tr->tracing_cpumask);
9455         kfree(tr->name);
9456         kfree(tr);
9457
9458         return 0;
9459 }
9460
9461 int trace_array_destroy(struct trace_array *this_tr)
9462 {
9463         struct trace_array *tr;
9464         int ret;
9465
9466         if (!this_tr)
9467                 return -EINVAL;
9468
9469         mutex_lock(&event_mutex);
9470         mutex_lock(&trace_types_lock);
9471
9472         ret = -ENODEV;
9473
9474         /* Making sure trace array exists before destroying it. */
9475         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9476                 if (tr == this_tr) {
9477                         ret = __remove_instance(tr);
9478                         break;
9479                 }
9480         }
9481
9482         mutex_unlock(&trace_types_lock);
9483         mutex_unlock(&event_mutex);
9484
9485         return ret;
9486 }
9487 EXPORT_SYMBOL_GPL(trace_array_destroy);
9488
9489 static int instance_rmdir(const char *name)
9490 {
9491         struct trace_array *tr;
9492         int ret;
9493
9494         mutex_lock(&event_mutex);
9495         mutex_lock(&trace_types_lock);
9496
9497         ret = -ENODEV;
9498         tr = trace_array_find(name);
9499         if (tr)
9500                 ret = __remove_instance(tr);
9501
9502         mutex_unlock(&trace_types_lock);
9503         mutex_unlock(&event_mutex);
9504
9505         return ret;
9506 }
9507
9508 static __init void create_trace_instances(struct dentry *d_tracer)
9509 {
9510         struct trace_array *tr;
9511
9512         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9513                                                          instance_mkdir,
9514                                                          instance_rmdir);
9515         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9516                 return;
9517
9518         mutex_lock(&event_mutex);
9519         mutex_lock(&trace_types_lock);
9520
9521         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9522                 if (!tr->name)
9523                         continue;
9524                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9525                              "Failed to create instance directory\n"))
9526                         break;
9527         }
9528
9529         mutex_unlock(&trace_types_lock);
9530         mutex_unlock(&event_mutex);
9531 }
9532
9533 static void
9534 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9535 {
9536         struct trace_event_file *file;
9537         int cpu;
9538
9539         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9540                         tr, &show_traces_fops);
9541
9542         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9543                         tr, &set_tracer_fops);
9544
9545         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9546                           tr, &tracing_cpumask_fops);
9547
9548         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9549                           tr, &tracing_iter_fops);
9550
9551         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9552                           tr, &tracing_fops);
9553
9554         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9555                           tr, &tracing_pipe_fops);
9556
9557         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9558                           tr, &tracing_entries_fops);
9559
9560         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9561                           tr, &tracing_total_entries_fops);
9562
9563         trace_create_file("free_buffer", 0200, d_tracer,
9564                           tr, &tracing_free_buffer_fops);
9565
9566         trace_create_file("trace_marker", 0220, d_tracer,
9567                           tr, &tracing_mark_fops);
9568
9569         file = __find_event_file(tr, "ftrace", "print");
9570         if (file && file->dir)
9571                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9572                                   file, &event_trigger_fops);
9573         tr->trace_marker_file = file;
9574
9575         trace_create_file("trace_marker_raw", 0220, d_tracer,
9576                           tr, &tracing_mark_raw_fops);
9577
9578         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9579                           &trace_clock_fops);
9580
9581         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9582                           tr, &rb_simple_fops);
9583
9584         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9585                           &trace_time_stamp_mode_fops);
9586
9587         tr->buffer_percent = 50;
9588
9589         trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9590                         tr, &buffer_percent_fops);
9591
9592         create_trace_options_dir(tr);
9593
9594         trace_create_maxlat_file(tr, d_tracer);
9595
9596         if (ftrace_create_function_files(tr, d_tracer))
9597                 MEM_FAIL(1, "Could not allocate function filter files");
9598
9599 #ifdef CONFIG_TRACER_SNAPSHOT
9600         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9601                           tr, &snapshot_fops);
9602 #endif
9603
9604         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9605                           tr, &tracing_err_log_fops);
9606
9607         for_each_tracing_cpu(cpu)
9608                 tracing_init_tracefs_percpu(tr, cpu);
9609
9610         ftrace_init_tracefs(tr, d_tracer);
9611 }
9612
9613 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9614 {
9615         struct vfsmount *mnt;
9616         struct file_system_type *type;
9617
9618         /*
9619          * To maintain backward compatibility for tools that mount
9620          * debugfs to get to the tracing facility, tracefs is automatically
9621          * mounted to the debugfs/tracing directory.
9622          */
9623         type = get_fs_type("tracefs");
9624         if (!type)
9625                 return NULL;
9626         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9627         put_filesystem(type);
9628         if (IS_ERR(mnt))
9629                 return NULL;
9630         mntget(mnt);
9631
9632         return mnt;
9633 }
9634
9635 /**
9636  * tracing_init_dentry - initialize top level trace array
9637  *
9638  * This is called when creating files or directories in the tracing
9639  * directory. It is called via fs_initcall() by any of the boot up code
9640  * and expects to return the dentry of the top level tracing directory.
9641  */
9642 int tracing_init_dentry(void)
9643 {
9644         struct trace_array *tr = &global_trace;
9645
9646         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9647                 pr_warn("Tracing disabled due to lockdown\n");
9648                 return -EPERM;
9649         }
9650
9651         /* The top level trace array uses  NULL as parent */
9652         if (tr->dir)
9653                 return 0;
9654
9655         if (WARN_ON(!tracefs_initialized()))
9656                 return -ENODEV;
9657
9658         /*
9659          * As there may still be users that expect the tracing
9660          * files to exist in debugfs/tracing, we must automount
9661          * the tracefs file system there, so older tools still
9662          * work with the newer kernel.
9663          */
9664         tr->dir = debugfs_create_automount("tracing", NULL,
9665                                            trace_automount, NULL);
9666
9667         return 0;
9668 }
9669
9670 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9671 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9672
9673 static struct workqueue_struct *eval_map_wq __initdata;
9674 static struct work_struct eval_map_work __initdata;
9675 static struct work_struct tracerfs_init_work __initdata;
9676
9677 static void __init eval_map_work_func(struct work_struct *work)
9678 {
9679         int len;
9680
9681         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9682         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9683 }
9684
9685 static int __init trace_eval_init(void)
9686 {
9687         INIT_WORK(&eval_map_work, eval_map_work_func);
9688
9689         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9690         if (!eval_map_wq) {
9691                 pr_err("Unable to allocate eval_map_wq\n");
9692                 /* Do work here */
9693                 eval_map_work_func(&eval_map_work);
9694                 return -ENOMEM;
9695         }
9696
9697         queue_work(eval_map_wq, &eval_map_work);
9698         return 0;
9699 }
9700
9701 subsys_initcall(trace_eval_init);
9702
9703 static int __init trace_eval_sync(void)
9704 {
9705         /* Make sure the eval map updates are finished */
9706         if (eval_map_wq)
9707                 destroy_workqueue(eval_map_wq);
9708         return 0;
9709 }
9710
9711 late_initcall_sync(trace_eval_sync);
9712
9713
9714 #ifdef CONFIG_MODULES
9715 static void trace_module_add_evals(struct module *mod)
9716 {
9717         if (!mod->num_trace_evals)
9718                 return;
9719
9720         /*
9721          * Modules with bad taint do not have events created, do
9722          * not bother with enums either.
9723          */
9724         if (trace_module_has_bad_taint(mod))
9725                 return;
9726
9727         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9728 }
9729
9730 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9731 static void trace_module_remove_evals(struct module *mod)
9732 {
9733         union trace_eval_map_item *map;
9734         union trace_eval_map_item **last = &trace_eval_maps;
9735
9736         if (!mod->num_trace_evals)
9737                 return;
9738
9739         mutex_lock(&trace_eval_mutex);
9740
9741         map = trace_eval_maps;
9742
9743         while (map) {
9744                 if (map->head.mod == mod)
9745                         break;
9746                 map = trace_eval_jmp_to_tail(map);
9747                 last = &map->tail.next;
9748                 map = map->tail.next;
9749         }
9750         if (!map)
9751                 goto out;
9752
9753         *last = trace_eval_jmp_to_tail(map)->tail.next;
9754         kfree(map);
9755  out:
9756         mutex_unlock(&trace_eval_mutex);
9757 }
9758 #else
9759 static inline void trace_module_remove_evals(struct module *mod) { }
9760 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9761
9762 static int trace_module_notify(struct notifier_block *self,
9763                                unsigned long val, void *data)
9764 {
9765         struct module *mod = data;
9766
9767         switch (val) {
9768         case MODULE_STATE_COMING:
9769                 trace_module_add_evals(mod);
9770                 break;
9771         case MODULE_STATE_GOING:
9772                 trace_module_remove_evals(mod);
9773                 break;
9774         }
9775
9776         return NOTIFY_OK;
9777 }
9778
9779 static struct notifier_block trace_module_nb = {
9780         .notifier_call = trace_module_notify,
9781         .priority = 0,
9782 };
9783 #endif /* CONFIG_MODULES */
9784
9785 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9786 {
9787
9788         event_trace_init();
9789
9790         init_tracer_tracefs(&global_trace, NULL);
9791         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9792
9793         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9794                         &global_trace, &tracing_thresh_fops);
9795
9796         trace_create_file("README", TRACE_MODE_READ, NULL,
9797                         NULL, &tracing_readme_fops);
9798
9799         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9800                         NULL, &tracing_saved_cmdlines_fops);
9801
9802         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9803                           NULL, &tracing_saved_cmdlines_size_fops);
9804
9805         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9806                         NULL, &tracing_saved_tgids_fops);
9807
9808         trace_create_eval_file(NULL);
9809
9810 #ifdef CONFIG_MODULES
9811         register_module_notifier(&trace_module_nb);
9812 #endif
9813
9814 #ifdef CONFIG_DYNAMIC_FTRACE
9815         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9816                         NULL, &tracing_dyn_info_fops);
9817 #endif
9818
9819         create_trace_instances(NULL);
9820
9821         update_tracer_options(&global_trace);
9822 }
9823
9824 static __init int tracer_init_tracefs(void)
9825 {
9826         int ret;
9827
9828         trace_access_lock_init();
9829
9830         ret = tracing_init_dentry();
9831         if (ret)
9832                 return 0;
9833
9834         if (eval_map_wq) {
9835                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9836                 queue_work(eval_map_wq, &tracerfs_init_work);
9837         } else {
9838                 tracer_init_tracefs_work_func(NULL);
9839         }
9840
9841         rv_init_interface();
9842
9843         return 0;
9844 }
9845
9846 fs_initcall(tracer_init_tracefs);
9847
9848 static int trace_panic_handler(struct notifier_block *this,
9849                                unsigned long event, void *unused)
9850 {
9851         if (ftrace_dump_on_oops)
9852                 ftrace_dump(ftrace_dump_on_oops);
9853         return NOTIFY_OK;
9854 }
9855
9856 static struct notifier_block trace_panic_notifier = {
9857         .notifier_call  = trace_panic_handler,
9858         .next           = NULL,
9859         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9860 };
9861
9862 static int trace_die_handler(struct notifier_block *self,
9863                              unsigned long val,
9864                              void *data)
9865 {
9866         switch (val) {
9867         case DIE_OOPS:
9868                 if (ftrace_dump_on_oops)
9869                         ftrace_dump(ftrace_dump_on_oops);
9870                 break;
9871         default:
9872                 break;
9873         }
9874         return NOTIFY_OK;
9875 }
9876
9877 static struct notifier_block trace_die_notifier = {
9878         .notifier_call = trace_die_handler,
9879         .priority = 200
9880 };
9881
9882 /*
9883  * printk is set to max of 1024, we really don't need it that big.
9884  * Nothing should be printing 1000 characters anyway.
9885  */
9886 #define TRACE_MAX_PRINT         1000
9887
9888 /*
9889  * Define here KERN_TRACE so that we have one place to modify
9890  * it if we decide to change what log level the ftrace dump
9891  * should be at.
9892  */
9893 #define KERN_TRACE              KERN_EMERG
9894
9895 void
9896 trace_printk_seq(struct trace_seq *s)
9897 {
9898         /* Probably should print a warning here. */
9899         if (s->seq.len >= TRACE_MAX_PRINT)
9900                 s->seq.len = TRACE_MAX_PRINT;
9901
9902         /*
9903          * More paranoid code. Although the buffer size is set to
9904          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9905          * an extra layer of protection.
9906          */
9907         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9908                 s->seq.len = s->seq.size - 1;
9909
9910         /* should be zero ended, but we are paranoid. */
9911         s->buffer[s->seq.len] = 0;
9912
9913         printk(KERN_TRACE "%s", s->buffer);
9914
9915         trace_seq_init(s);
9916 }
9917
9918 void trace_init_global_iter(struct trace_iterator *iter)
9919 {
9920         iter->tr = &global_trace;
9921         iter->trace = iter->tr->current_trace;
9922         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9923         iter->array_buffer = &global_trace.array_buffer;
9924
9925         if (iter->trace && iter->trace->open)
9926                 iter->trace->open(iter);
9927
9928         /* Annotate start of buffers if we had overruns */
9929         if (ring_buffer_overruns(iter->array_buffer->buffer))
9930                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9931
9932         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9933         if (trace_clocks[iter->tr->clock_id].in_ns)
9934                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9935
9936         /* Can not use kmalloc for iter.temp and iter.fmt */
9937         iter->temp = static_temp_buf;
9938         iter->temp_size = STATIC_TEMP_BUF_SIZE;
9939         iter->fmt = static_fmt_buf;
9940         iter->fmt_size = STATIC_FMT_BUF_SIZE;
9941 }
9942
9943 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9944 {
9945         /* use static because iter can be a bit big for the stack */
9946         static struct trace_iterator iter;
9947         static atomic_t dump_running;
9948         struct trace_array *tr = &global_trace;
9949         unsigned int old_userobj;
9950         unsigned long flags;
9951         int cnt = 0, cpu;
9952
9953         /* Only allow one dump user at a time. */
9954         if (atomic_inc_return(&dump_running) != 1) {
9955                 atomic_dec(&dump_running);
9956                 return;
9957         }
9958
9959         /*
9960          * Always turn off tracing when we dump.
9961          * We don't need to show trace output of what happens
9962          * between multiple crashes.
9963          *
9964          * If the user does a sysrq-z, then they can re-enable
9965          * tracing with echo 1 > tracing_on.
9966          */
9967         tracing_off();
9968
9969         local_irq_save(flags);
9970
9971         /* Simulate the iterator */
9972         trace_init_global_iter(&iter);
9973
9974         for_each_tracing_cpu(cpu) {
9975                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9976         }
9977
9978         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9979
9980         /* don't look at user memory in panic mode */
9981         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9982
9983         switch (oops_dump_mode) {
9984         case DUMP_ALL:
9985                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9986                 break;
9987         case DUMP_ORIG:
9988                 iter.cpu_file = raw_smp_processor_id();
9989                 break;
9990         case DUMP_NONE:
9991                 goto out_enable;
9992         default:
9993                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9994                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9995         }
9996
9997         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9998
9999         /* Did function tracer already get disabled? */
10000         if (ftrace_is_dead()) {
10001                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10002                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10003         }
10004
10005         /*
10006          * We need to stop all tracing on all CPUS to read
10007          * the next buffer. This is a bit expensive, but is
10008          * not done often. We fill all what we can read,
10009          * and then release the locks again.
10010          */
10011
10012         while (!trace_empty(&iter)) {
10013
10014                 if (!cnt)
10015                         printk(KERN_TRACE "---------------------------------\n");
10016
10017                 cnt++;
10018
10019                 trace_iterator_reset(&iter);
10020                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10021
10022                 if (trace_find_next_entry_inc(&iter) != NULL) {
10023                         int ret;
10024
10025                         ret = print_trace_line(&iter);
10026                         if (ret != TRACE_TYPE_NO_CONSUME)
10027                                 trace_consume(&iter);
10028                 }
10029                 touch_nmi_watchdog();
10030
10031                 trace_printk_seq(&iter.seq);
10032         }
10033
10034         if (!cnt)
10035                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10036         else
10037                 printk(KERN_TRACE "---------------------------------\n");
10038
10039  out_enable:
10040         tr->trace_flags |= old_userobj;
10041
10042         for_each_tracing_cpu(cpu) {
10043                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10044         }
10045         atomic_dec(&dump_running);
10046         local_irq_restore(flags);
10047 }
10048 EXPORT_SYMBOL_GPL(ftrace_dump);
10049
10050 #define WRITE_BUFSIZE  4096
10051
10052 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10053                                 size_t count, loff_t *ppos,
10054                                 int (*createfn)(const char *))
10055 {
10056         char *kbuf, *buf, *tmp;
10057         int ret = 0;
10058         size_t done = 0;
10059         size_t size;
10060
10061         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10062         if (!kbuf)
10063                 return -ENOMEM;
10064
10065         while (done < count) {
10066                 size = count - done;
10067
10068                 if (size >= WRITE_BUFSIZE)
10069                         size = WRITE_BUFSIZE - 1;
10070
10071                 if (copy_from_user(kbuf, buffer + done, size)) {
10072                         ret = -EFAULT;
10073                         goto out;
10074                 }
10075                 kbuf[size] = '\0';
10076                 buf = kbuf;
10077                 do {
10078                         tmp = strchr(buf, '\n');
10079                         if (tmp) {
10080                                 *tmp = '\0';
10081                                 size = tmp - buf + 1;
10082                         } else {
10083                                 size = strlen(buf);
10084                                 if (done + size < count) {
10085                                         if (buf != kbuf)
10086                                                 break;
10087                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10088                                         pr_warn("Line length is too long: Should be less than %d\n",
10089                                                 WRITE_BUFSIZE - 2);
10090                                         ret = -EINVAL;
10091                                         goto out;
10092                                 }
10093                         }
10094                         done += size;
10095
10096                         /* Remove comments */
10097                         tmp = strchr(buf, '#');
10098
10099                         if (tmp)
10100                                 *tmp = '\0';
10101
10102                         ret = createfn(buf);
10103                         if (ret)
10104                                 goto out;
10105                         buf += size;
10106
10107                 } while (done < count);
10108         }
10109         ret = done;
10110
10111 out:
10112         kfree(kbuf);
10113
10114         return ret;
10115 }
10116
10117 __init static int tracer_alloc_buffers(void)
10118 {
10119         int ring_buf_size;
10120         int ret = -ENOMEM;
10121
10122
10123         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10124                 pr_warn("Tracing disabled due to lockdown\n");
10125                 return -EPERM;
10126         }
10127
10128         /*
10129          * Make sure we don't accidentally add more trace options
10130          * than we have bits for.
10131          */
10132         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10133
10134         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10135                 goto out;
10136
10137         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10138                 goto out_free_buffer_mask;
10139
10140         /* Only allocate trace_printk buffers if a trace_printk exists */
10141         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10142                 /* Must be called before global_trace.buffer is allocated */
10143                 trace_printk_init_buffers();
10144
10145         /* To save memory, keep the ring buffer size to its minimum */
10146         if (ring_buffer_expanded)
10147                 ring_buf_size = trace_buf_size;
10148         else
10149                 ring_buf_size = 1;
10150
10151         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10152         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10153
10154         raw_spin_lock_init(&global_trace.start_lock);
10155
10156         /*
10157          * The prepare callbacks allocates some memory for the ring buffer. We
10158          * don't free the buffer if the CPU goes down. If we were to free
10159          * the buffer, then the user would lose any trace that was in the
10160          * buffer. The memory will be removed once the "instance" is removed.
10161          */
10162         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10163                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10164                                       NULL);
10165         if (ret < 0)
10166                 goto out_free_cpumask;
10167         /* Used for event triggers */
10168         ret = -ENOMEM;
10169         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10170         if (!temp_buffer)
10171                 goto out_rm_hp_state;
10172
10173         if (trace_create_savedcmd() < 0)
10174                 goto out_free_temp_buffer;
10175
10176         /* TODO: make the number of buffers hot pluggable with CPUS */
10177         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10178                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10179                 goto out_free_savedcmd;
10180         }
10181
10182         if (global_trace.buffer_disabled)
10183                 tracing_off();
10184
10185         if (trace_boot_clock) {
10186                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10187                 if (ret < 0)
10188                         pr_warn("Trace clock %s not defined, going back to default\n",
10189                                 trace_boot_clock);
10190         }
10191
10192         /*
10193          * register_tracer() might reference current_trace, so it
10194          * needs to be set before we register anything. This is
10195          * just a bootstrap of current_trace anyway.
10196          */
10197         global_trace.current_trace = &nop_trace;
10198
10199         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10200
10201         ftrace_init_global_array_ops(&global_trace);
10202
10203         init_trace_flags_index(&global_trace);
10204
10205         register_tracer(&nop_trace);
10206
10207         /* Function tracing may start here (via kernel command line) */
10208         init_function_trace();
10209
10210         /* All seems OK, enable tracing */
10211         tracing_disabled = 0;
10212
10213         atomic_notifier_chain_register(&panic_notifier_list,
10214                                        &trace_panic_notifier);
10215
10216         register_die_notifier(&trace_die_notifier);
10217
10218         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10219
10220         INIT_LIST_HEAD(&global_trace.systems);
10221         INIT_LIST_HEAD(&global_trace.events);
10222         INIT_LIST_HEAD(&global_trace.hist_vars);
10223         INIT_LIST_HEAD(&global_trace.err_log);
10224         list_add(&global_trace.list, &ftrace_trace_arrays);
10225
10226         apply_trace_boot_options();
10227
10228         register_snapshot_cmd();
10229
10230         test_can_verify();
10231
10232         return 0;
10233
10234 out_free_savedcmd:
10235         free_saved_cmdlines_buffer(savedcmd);
10236 out_free_temp_buffer:
10237         ring_buffer_free(temp_buffer);
10238 out_rm_hp_state:
10239         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10240 out_free_cpumask:
10241         free_cpumask_var(global_trace.tracing_cpumask);
10242 out_free_buffer_mask:
10243         free_cpumask_var(tracing_buffer_mask);
10244 out:
10245         return ret;
10246 }
10247
10248 void __init ftrace_boot_snapshot(void)
10249 {
10250         if (snapshot_at_boot) {
10251                 tracing_snapshot();
10252                 internal_trace_puts("** Boot snapshot taken **\n");
10253         }
10254 }
10255
10256 void __init early_trace_init(void)
10257 {
10258         if (tracepoint_printk) {
10259                 tracepoint_print_iter =
10260                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10261                 if (MEM_FAIL(!tracepoint_print_iter,
10262                              "Failed to allocate trace iterator\n"))
10263                         tracepoint_printk = 0;
10264                 else
10265                         static_key_enable(&tracepoint_printk_key.key);
10266         }
10267         tracer_alloc_buffers();
10268 }
10269
10270 void __init trace_init(void)
10271 {
10272         trace_event_init();
10273 }
10274
10275 __init static void clear_boot_tracer(void)
10276 {
10277         /*
10278          * The default tracer at boot buffer is an init section.
10279          * This function is called in lateinit. If we did not
10280          * find the boot tracer, then clear it out, to prevent
10281          * later registration from accessing the buffer that is
10282          * about to be freed.
10283          */
10284         if (!default_bootup_tracer)
10285                 return;
10286
10287         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10288                default_bootup_tracer);
10289         default_bootup_tracer = NULL;
10290 }
10291
10292 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10293 __init static void tracing_set_default_clock(void)
10294 {
10295         /* sched_clock_stable() is determined in late_initcall */
10296         if (!trace_boot_clock && !sched_clock_stable()) {
10297                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10298                         pr_warn("Can not set tracing clock due to lockdown\n");
10299                         return;
10300                 }
10301
10302                 printk(KERN_WARNING
10303                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10304                        "If you want to keep using the local clock, then add:\n"
10305                        "  \"trace_clock=local\"\n"
10306                        "on the kernel command line\n");
10307                 tracing_set_clock(&global_trace, "global");
10308         }
10309 }
10310 #else
10311 static inline void tracing_set_default_clock(void) { }
10312 #endif
10313
10314 __init static int late_trace_init(void)
10315 {
10316         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10317                 static_key_disable(&tracepoint_printk_key.key);
10318                 tracepoint_printk = 0;
10319         }
10320
10321         tracing_set_default_clock();
10322         clear_boot_tracer();
10323         return 0;
10324 }
10325
10326 late_initcall_sync(late_trace_init);