drm/edid/firmware: Add built-in edid/1280x720.bin firmware
[platform/kernel/linux-starfive.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
189
190 static int __init set_cmdline_ftrace(char *str)
191 {
192         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
193         default_bootup_tracer = bootup_tracer_buf;
194         /* We are using ftrace early, expand it */
195         ring_buffer_expanded = true;
196         return 1;
197 }
198 __setup("ftrace=", set_cmdline_ftrace);
199
200 static int __init set_ftrace_dump_on_oops(char *str)
201 {
202         if (*str++ != '=' || !*str || !strcmp("1", str)) {
203                 ftrace_dump_on_oops = DUMP_ALL;
204                 return 1;
205         }
206
207         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
208                 ftrace_dump_on_oops = DUMP_ORIG;
209                 return 1;
210         }
211
212         return 0;
213 }
214 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
215
216 static int __init stop_trace_on_warning(char *str)
217 {
218         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
219                 __disable_trace_on_warning = 1;
220         return 1;
221 }
222 __setup("traceoff_on_warning", stop_trace_on_warning);
223
224 static int __init boot_alloc_snapshot(char *str)
225 {
226         allocate_snapshot = true;
227         /* We also need the main ring buffer expanded */
228         ring_buffer_expanded = true;
229         return 1;
230 }
231 __setup("alloc_snapshot", boot_alloc_snapshot);
232
233
234 static int __init boot_snapshot(char *str)
235 {
236         snapshot_at_boot = true;
237         boot_alloc_snapshot(str);
238         return 1;
239 }
240 __setup("ftrace_boot_snapshot", boot_snapshot);
241
242
243 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
244
245 static int __init set_trace_boot_options(char *str)
246 {
247         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
248         return 1;
249 }
250 __setup("trace_options=", set_trace_boot_options);
251
252 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
253 static char *trace_boot_clock __initdata;
254
255 static int __init set_trace_boot_clock(char *str)
256 {
257         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
258         trace_boot_clock = trace_boot_clock_buf;
259         return 1;
260 }
261 __setup("trace_clock=", set_trace_boot_clock);
262
263 static int __init set_tracepoint_printk(char *str)
264 {
265         /* Ignore the "tp_printk_stop_on_boot" param */
266         if (*str == '_')
267                 return 0;
268
269         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
270                 tracepoint_printk = 1;
271         return 1;
272 }
273 __setup("tp_printk", set_tracepoint_printk);
274
275 static int __init set_tracepoint_printk_stop(char *str)
276 {
277         tracepoint_printk_stop_on_boot = true;
278         return 1;
279 }
280 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
281
282 unsigned long long ns2usecs(u64 nsec)
283 {
284         nsec += 500;
285         do_div(nsec, 1000);
286         return nsec;
287 }
288
289 static void
290 trace_process_export(struct trace_export *export,
291                struct ring_buffer_event *event, int flag)
292 {
293         struct trace_entry *entry;
294         unsigned int size = 0;
295
296         if (export->flags & flag) {
297                 entry = ring_buffer_event_data(event);
298                 size = ring_buffer_event_length(event);
299                 export->write(export, entry, size);
300         }
301 }
302
303 static DEFINE_MUTEX(ftrace_export_lock);
304
305 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
306
307 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
308 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
309 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
310
311 static inline void ftrace_exports_enable(struct trace_export *export)
312 {
313         if (export->flags & TRACE_EXPORT_FUNCTION)
314                 static_branch_inc(&trace_function_exports_enabled);
315
316         if (export->flags & TRACE_EXPORT_EVENT)
317                 static_branch_inc(&trace_event_exports_enabled);
318
319         if (export->flags & TRACE_EXPORT_MARKER)
320                 static_branch_inc(&trace_marker_exports_enabled);
321 }
322
323 static inline void ftrace_exports_disable(struct trace_export *export)
324 {
325         if (export->flags & TRACE_EXPORT_FUNCTION)
326                 static_branch_dec(&trace_function_exports_enabled);
327
328         if (export->flags & TRACE_EXPORT_EVENT)
329                 static_branch_dec(&trace_event_exports_enabled);
330
331         if (export->flags & TRACE_EXPORT_MARKER)
332                 static_branch_dec(&trace_marker_exports_enabled);
333 }
334
335 static void ftrace_exports(struct ring_buffer_event *event, int flag)
336 {
337         struct trace_export *export;
338
339         preempt_disable_notrace();
340
341         export = rcu_dereference_raw_check(ftrace_exports_list);
342         while (export) {
343                 trace_process_export(export, event, flag);
344                 export = rcu_dereference_raw_check(export->next);
345         }
346
347         preempt_enable_notrace();
348 }
349
350 static inline void
351 add_trace_export(struct trace_export **list, struct trace_export *export)
352 {
353         rcu_assign_pointer(export->next, *list);
354         /*
355          * We are entering export into the list but another
356          * CPU might be walking that list. We need to make sure
357          * the export->next pointer is valid before another CPU sees
358          * the export pointer included into the list.
359          */
360         rcu_assign_pointer(*list, export);
361 }
362
363 static inline int
364 rm_trace_export(struct trace_export **list, struct trace_export *export)
365 {
366         struct trace_export **p;
367
368         for (p = list; *p != NULL; p = &(*p)->next)
369                 if (*p == export)
370                         break;
371
372         if (*p != export)
373                 return -1;
374
375         rcu_assign_pointer(*p, (*p)->next);
376
377         return 0;
378 }
379
380 static inline void
381 add_ftrace_export(struct trace_export **list, struct trace_export *export)
382 {
383         ftrace_exports_enable(export);
384
385         add_trace_export(list, export);
386 }
387
388 static inline int
389 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
390 {
391         int ret;
392
393         ret = rm_trace_export(list, export);
394         ftrace_exports_disable(export);
395
396         return ret;
397 }
398
399 int register_ftrace_export(struct trace_export *export)
400 {
401         if (WARN_ON_ONCE(!export->write))
402                 return -1;
403
404         mutex_lock(&ftrace_export_lock);
405
406         add_ftrace_export(&ftrace_exports_list, export);
407
408         mutex_unlock(&ftrace_export_lock);
409
410         return 0;
411 }
412 EXPORT_SYMBOL_GPL(register_ftrace_export);
413
414 int unregister_ftrace_export(struct trace_export *export)
415 {
416         int ret;
417
418         mutex_lock(&ftrace_export_lock);
419
420         ret = rm_ftrace_export(&ftrace_exports_list, export);
421
422         mutex_unlock(&ftrace_export_lock);
423
424         return ret;
425 }
426 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
427
428 /* trace_flags holds trace_options default values */
429 #define TRACE_DEFAULT_FLAGS                                             \
430         (FUNCTION_DEFAULT_FLAGS |                                       \
431          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
432          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
433          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
434          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
435          TRACE_ITER_HASH_PTR)
436
437 /* trace_options that are only supported by global_trace */
438 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
439                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
440
441 /* trace_flags that are default zero for instances */
442 #define ZEROED_TRACE_FLAGS \
443         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
444
445 /*
446  * The global_trace is the descriptor that holds the top-level tracing
447  * buffers for the live tracing.
448  */
449 static struct trace_array global_trace = {
450         .trace_flags = TRACE_DEFAULT_FLAGS,
451 };
452
453 LIST_HEAD(ftrace_trace_arrays);
454
455 int trace_array_get(struct trace_array *this_tr)
456 {
457         struct trace_array *tr;
458         int ret = -ENODEV;
459
460         mutex_lock(&trace_types_lock);
461         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
462                 if (tr == this_tr) {
463                         tr->ref++;
464                         ret = 0;
465                         break;
466                 }
467         }
468         mutex_unlock(&trace_types_lock);
469
470         return ret;
471 }
472
473 static void __trace_array_put(struct trace_array *this_tr)
474 {
475         WARN_ON(!this_tr->ref);
476         this_tr->ref--;
477 }
478
479 /**
480  * trace_array_put - Decrement the reference counter for this trace array.
481  * @this_tr : pointer to the trace array
482  *
483  * NOTE: Use this when we no longer need the trace array returned by
484  * trace_array_get_by_name(). This ensures the trace array can be later
485  * destroyed.
486  *
487  */
488 void trace_array_put(struct trace_array *this_tr)
489 {
490         if (!this_tr)
491                 return;
492
493         mutex_lock(&trace_types_lock);
494         __trace_array_put(this_tr);
495         mutex_unlock(&trace_types_lock);
496 }
497 EXPORT_SYMBOL_GPL(trace_array_put);
498
499 int tracing_check_open_get_tr(struct trace_array *tr)
500 {
501         int ret;
502
503         ret = security_locked_down(LOCKDOWN_TRACEFS);
504         if (ret)
505                 return ret;
506
507         if (tracing_disabled)
508                 return -ENODEV;
509
510         if (tr && trace_array_get(tr) < 0)
511                 return -ENODEV;
512
513         return 0;
514 }
515
516 int call_filter_check_discard(struct trace_event_call *call, void *rec,
517                               struct trace_buffer *buffer,
518                               struct ring_buffer_event *event)
519 {
520         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
521             !filter_match_preds(call->filter, rec)) {
522                 __trace_event_discard_commit(buffer, event);
523                 return 1;
524         }
525
526         return 0;
527 }
528
529 /**
530  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
531  * @filtered_pids: The list of pids to check
532  * @search_pid: The PID to find in @filtered_pids
533  *
534  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
535  */
536 bool
537 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
538 {
539         return trace_pid_list_is_set(filtered_pids, search_pid);
540 }
541
542 /**
543  * trace_ignore_this_task - should a task be ignored for tracing
544  * @filtered_pids: The list of pids to check
545  * @filtered_no_pids: The list of pids not to be traced
546  * @task: The task that should be ignored if not filtered
547  *
548  * Checks if @task should be traced or not from @filtered_pids.
549  * Returns true if @task should *NOT* be traced.
550  * Returns false if @task should be traced.
551  */
552 bool
553 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
554                        struct trace_pid_list *filtered_no_pids,
555                        struct task_struct *task)
556 {
557         /*
558          * If filtered_no_pids is not empty, and the task's pid is listed
559          * in filtered_no_pids, then return true.
560          * Otherwise, if filtered_pids is empty, that means we can
561          * trace all tasks. If it has content, then only trace pids
562          * within filtered_pids.
563          */
564
565         return (filtered_pids &&
566                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
567                 (filtered_no_pids &&
568                  trace_find_filtered_pid(filtered_no_pids, task->pid));
569 }
570
571 /**
572  * trace_filter_add_remove_task - Add or remove a task from a pid_list
573  * @pid_list: The list to modify
574  * @self: The current task for fork or NULL for exit
575  * @task: The task to add or remove
576  *
577  * If adding a task, if @self is defined, the task is only added if @self
578  * is also included in @pid_list. This happens on fork and tasks should
579  * only be added when the parent is listed. If @self is NULL, then the
580  * @task pid will be removed from the list, which would happen on exit
581  * of a task.
582  */
583 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
584                                   struct task_struct *self,
585                                   struct task_struct *task)
586 {
587         if (!pid_list)
588                 return;
589
590         /* For forks, we only add if the forking task is listed */
591         if (self) {
592                 if (!trace_find_filtered_pid(pid_list, self->pid))
593                         return;
594         }
595
596         /* "self" is set for forks, and NULL for exits */
597         if (self)
598                 trace_pid_list_set(pid_list, task->pid);
599         else
600                 trace_pid_list_clear(pid_list, task->pid);
601 }
602
603 /**
604  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
605  * @pid_list: The pid list to show
606  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
607  * @pos: The position of the file
608  *
609  * This is used by the seq_file "next" operation to iterate the pids
610  * listed in a trace_pid_list structure.
611  *
612  * Returns the pid+1 as we want to display pid of zero, but NULL would
613  * stop the iteration.
614  */
615 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
616 {
617         long pid = (unsigned long)v;
618         unsigned int next;
619
620         (*pos)++;
621
622         /* pid already is +1 of the actual previous bit */
623         if (trace_pid_list_next(pid_list, pid, &next) < 0)
624                 return NULL;
625
626         pid = next;
627
628         /* Return pid + 1 to allow zero to be represented */
629         return (void *)(pid + 1);
630 }
631
632 /**
633  * trace_pid_start - Used for seq_file to start reading pid lists
634  * @pid_list: The pid list to show
635  * @pos: The position of the file
636  *
637  * This is used by seq_file "start" operation to start the iteration
638  * of listing pids.
639  *
640  * Returns the pid+1 as we want to display pid of zero, but NULL would
641  * stop the iteration.
642  */
643 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
644 {
645         unsigned long pid;
646         unsigned int first;
647         loff_t l = 0;
648
649         if (trace_pid_list_first(pid_list, &first) < 0)
650                 return NULL;
651
652         pid = first;
653
654         /* Return pid + 1 so that zero can be the exit value */
655         for (pid++; pid && l < *pos;
656              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657                 ;
658         return (void *)pid;
659 }
660
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671         unsigned long pid = (unsigned long)v - 1;
672
673         seq_printf(m, "%lu\n", pid);
674         return 0;
675 }
676
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE            127
679
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681                     struct trace_pid_list **new_pid_list,
682                     const char __user *ubuf, size_t cnt)
683 {
684         struct trace_pid_list *pid_list;
685         struct trace_parser parser;
686         unsigned long val;
687         int nr_pids = 0;
688         ssize_t read = 0;
689         ssize_t ret;
690         loff_t pos;
691         pid_t pid;
692
693         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694                 return -ENOMEM;
695
696         /*
697          * Always recreate a new array. The write is an all or nothing
698          * operation. Always create a new array when adding new pids by
699          * the user. If the operation fails, then the current list is
700          * not modified.
701          */
702         pid_list = trace_pid_list_alloc();
703         if (!pid_list) {
704                 trace_parser_put(&parser);
705                 return -ENOMEM;
706         }
707
708         if (filtered_pids) {
709                 /* copy the current bits to the new max */
710                 ret = trace_pid_list_first(filtered_pids, &pid);
711                 while (!ret) {
712                         trace_pid_list_set(pid_list, pid);
713                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
714                         nr_pids++;
715                 }
716         }
717
718         ret = 0;
719         while (cnt > 0) {
720
721                 pos = 0;
722
723                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
724                 if (ret < 0)
725                         break;
726
727                 read += ret;
728                 ubuf += ret;
729                 cnt -= ret;
730
731                 if (!trace_parser_loaded(&parser))
732                         break;
733
734                 ret = -EINVAL;
735                 if (kstrtoul(parser.buffer, 0, &val))
736                         break;
737
738                 pid = (pid_t)val;
739
740                 if (trace_pid_list_set(pid_list, pid) < 0) {
741                         ret = -1;
742                         break;
743                 }
744                 nr_pids++;
745
746                 trace_parser_clear(&parser);
747                 ret = 0;
748         }
749         trace_parser_put(&parser);
750
751         if (ret < 0) {
752                 trace_pid_list_free(pid_list);
753                 return ret;
754         }
755
756         if (!nr_pids) {
757                 /* Cleared the list of pids */
758                 trace_pid_list_free(pid_list);
759                 pid_list = NULL;
760         }
761
762         *new_pid_list = pid_list;
763
764         return read;
765 }
766
767 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
768 {
769         u64 ts;
770
771         /* Early boot up does not have a buffer yet */
772         if (!buf->buffer)
773                 return trace_clock_local();
774
775         ts = ring_buffer_time_stamp(buf->buffer);
776         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
777
778         return ts;
779 }
780
781 u64 ftrace_now(int cpu)
782 {
783         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
784 }
785
786 /**
787  * tracing_is_enabled - Show if global_trace has been enabled
788  *
789  * Shows if the global trace has been enabled or not. It uses the
790  * mirror flag "buffer_disabled" to be used in fast paths such as for
791  * the irqsoff tracer. But it may be inaccurate due to races. If you
792  * need to know the accurate state, use tracing_is_on() which is a little
793  * slower, but accurate.
794  */
795 int tracing_is_enabled(void)
796 {
797         /*
798          * For quick access (irqsoff uses this in fast path), just
799          * return the mirror variable of the state of the ring buffer.
800          * It's a little racy, but we don't really care.
801          */
802         smp_rmb();
803         return !global_trace.buffer_disabled;
804 }
805
806 /*
807  * trace_buf_size is the size in bytes that is allocated
808  * for a buffer. Note, the number of bytes is always rounded
809  * to page size.
810  *
811  * This number is purposely set to a low number of 16384.
812  * If the dump on oops happens, it will be much appreciated
813  * to not have to wait for all that output. Anyway this can be
814  * boot time and run time configurable.
815  */
816 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
817
818 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
819
820 /* trace_types holds a link list of available tracers. */
821 static struct tracer            *trace_types __read_mostly;
822
823 /*
824  * trace_types_lock is used to protect the trace_types list.
825  */
826 DEFINE_MUTEX(trace_types_lock);
827
828 /*
829  * serialize the access of the ring buffer
830  *
831  * ring buffer serializes readers, but it is low level protection.
832  * The validity of the events (which returns by ring_buffer_peek() ..etc)
833  * are not protected by ring buffer.
834  *
835  * The content of events may become garbage if we allow other process consumes
836  * these events concurrently:
837  *   A) the page of the consumed events may become a normal page
838  *      (not reader page) in ring buffer, and this page will be rewritten
839  *      by events producer.
840  *   B) The page of the consumed events may become a page for splice_read,
841  *      and this page will be returned to system.
842  *
843  * These primitives allow multi process access to different cpu ring buffer
844  * concurrently.
845  *
846  * These primitives don't distinguish read-only and read-consume access.
847  * Multi read-only access are also serialized.
848  */
849
850 #ifdef CONFIG_SMP
851 static DECLARE_RWSEM(all_cpu_access_lock);
852 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
853
854 static inline void trace_access_lock(int cpu)
855 {
856         if (cpu == RING_BUFFER_ALL_CPUS) {
857                 /* gain it for accessing the whole ring buffer. */
858                 down_write(&all_cpu_access_lock);
859         } else {
860                 /* gain it for accessing a cpu ring buffer. */
861
862                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
863                 down_read(&all_cpu_access_lock);
864
865                 /* Secondly block other access to this @cpu ring buffer. */
866                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
867         }
868 }
869
870 static inline void trace_access_unlock(int cpu)
871 {
872         if (cpu == RING_BUFFER_ALL_CPUS) {
873                 up_write(&all_cpu_access_lock);
874         } else {
875                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
876                 up_read(&all_cpu_access_lock);
877         }
878 }
879
880 static inline void trace_access_lock_init(void)
881 {
882         int cpu;
883
884         for_each_possible_cpu(cpu)
885                 mutex_init(&per_cpu(cpu_access_lock, cpu));
886 }
887
888 #else
889
890 static DEFINE_MUTEX(access_lock);
891
892 static inline void trace_access_lock(int cpu)
893 {
894         (void)cpu;
895         mutex_lock(&access_lock);
896 }
897
898 static inline void trace_access_unlock(int cpu)
899 {
900         (void)cpu;
901         mutex_unlock(&access_lock);
902 }
903
904 static inline void trace_access_lock_init(void)
905 {
906 }
907
908 #endif
909
910 #ifdef CONFIG_STACKTRACE
911 static void __ftrace_trace_stack(struct trace_buffer *buffer,
912                                  unsigned int trace_ctx,
913                                  int skip, struct pt_regs *regs);
914 static inline void ftrace_trace_stack(struct trace_array *tr,
915                                       struct trace_buffer *buffer,
916                                       unsigned int trace_ctx,
917                                       int skip, struct pt_regs *regs);
918
919 #else
920 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
921                                         unsigned int trace_ctx,
922                                         int skip, struct pt_regs *regs)
923 {
924 }
925 static inline void ftrace_trace_stack(struct trace_array *tr,
926                                       struct trace_buffer *buffer,
927                                       unsigned long trace_ctx,
928                                       int skip, struct pt_regs *regs)
929 {
930 }
931
932 #endif
933
934 static __always_inline void
935 trace_event_setup(struct ring_buffer_event *event,
936                   int type, unsigned int trace_ctx)
937 {
938         struct trace_entry *ent = ring_buffer_event_data(event);
939
940         tracing_generic_entry_update(ent, type, trace_ctx);
941 }
942
943 static __always_inline struct ring_buffer_event *
944 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
945                           int type,
946                           unsigned long len,
947                           unsigned int trace_ctx)
948 {
949         struct ring_buffer_event *event;
950
951         event = ring_buffer_lock_reserve(buffer, len);
952         if (event != NULL)
953                 trace_event_setup(event, type, trace_ctx);
954
955         return event;
956 }
957
958 void tracer_tracing_on(struct trace_array *tr)
959 {
960         if (tr->array_buffer.buffer)
961                 ring_buffer_record_on(tr->array_buffer.buffer);
962         /*
963          * This flag is looked at when buffers haven't been allocated
964          * yet, or by some tracers (like irqsoff), that just want to
965          * know if the ring buffer has been disabled, but it can handle
966          * races of where it gets disabled but we still do a record.
967          * As the check is in the fast path of the tracers, it is more
968          * important to be fast than accurate.
969          */
970         tr->buffer_disabled = 0;
971         /* Make the flag seen by readers */
972         smp_wmb();
973 }
974
975 /**
976  * tracing_on - enable tracing buffers
977  *
978  * This function enables tracing buffers that may have been
979  * disabled with tracing_off.
980  */
981 void tracing_on(void)
982 {
983         tracer_tracing_on(&global_trace);
984 }
985 EXPORT_SYMBOL_GPL(tracing_on);
986
987
988 static __always_inline void
989 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
990 {
991         __this_cpu_write(trace_taskinfo_save, true);
992
993         /* If this is the temp buffer, we need to commit fully */
994         if (this_cpu_read(trace_buffered_event) == event) {
995                 /* Length is in event->array[0] */
996                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
997                 /* Release the temp buffer */
998                 this_cpu_dec(trace_buffered_event_cnt);
999                 /* ring_buffer_unlock_commit() enables preemption */
1000                 preempt_enable_notrace();
1001         } else
1002                 ring_buffer_unlock_commit(buffer, event);
1003 }
1004
1005 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1006                        const char *str, int size)
1007 {
1008         struct ring_buffer_event *event;
1009         struct trace_buffer *buffer;
1010         struct print_entry *entry;
1011         unsigned int trace_ctx;
1012         int alloc;
1013
1014         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1015                 return 0;
1016
1017         if (unlikely(tracing_selftest_running || tracing_disabled))
1018                 return 0;
1019
1020         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1021
1022         trace_ctx = tracing_gen_ctx();
1023         buffer = tr->array_buffer.buffer;
1024         ring_buffer_nest_start(buffer);
1025         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1026                                             trace_ctx);
1027         if (!event) {
1028                 size = 0;
1029                 goto out;
1030         }
1031
1032         entry = ring_buffer_event_data(event);
1033         entry->ip = ip;
1034
1035         memcpy(&entry->buf, str, size);
1036
1037         /* Add a newline if necessary */
1038         if (entry->buf[size - 1] != '\n') {
1039                 entry->buf[size] = '\n';
1040                 entry->buf[size + 1] = '\0';
1041         } else
1042                 entry->buf[size] = '\0';
1043
1044         __buffer_unlock_commit(buffer, event);
1045         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1046  out:
1047         ring_buffer_nest_end(buffer);
1048         return size;
1049 }
1050 EXPORT_SYMBOL_GPL(__trace_array_puts);
1051
1052 /**
1053  * __trace_puts - write a constant string into the trace buffer.
1054  * @ip:    The address of the caller
1055  * @str:   The constant string to write
1056  * @size:  The size of the string.
1057  */
1058 int __trace_puts(unsigned long ip, const char *str, int size)
1059 {
1060         return __trace_array_puts(&global_trace, ip, str, size);
1061 }
1062 EXPORT_SYMBOL_GPL(__trace_puts);
1063
1064 /**
1065  * __trace_bputs - write the pointer to a constant string into trace buffer
1066  * @ip:    The address of the caller
1067  * @str:   The constant string to write to the buffer to
1068  */
1069 int __trace_bputs(unsigned long ip, const char *str)
1070 {
1071         struct ring_buffer_event *event;
1072         struct trace_buffer *buffer;
1073         struct bputs_entry *entry;
1074         unsigned int trace_ctx;
1075         int size = sizeof(struct bputs_entry);
1076         int ret = 0;
1077
1078         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1079                 return 0;
1080
1081         if (unlikely(tracing_selftest_running || tracing_disabled))
1082                 return 0;
1083
1084         trace_ctx = tracing_gen_ctx();
1085         buffer = global_trace.array_buffer.buffer;
1086
1087         ring_buffer_nest_start(buffer);
1088         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1089                                             trace_ctx);
1090         if (!event)
1091                 goto out;
1092
1093         entry = ring_buffer_event_data(event);
1094         entry->ip                       = ip;
1095         entry->str                      = str;
1096
1097         __buffer_unlock_commit(buffer, event);
1098         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1099
1100         ret = 1;
1101  out:
1102         ring_buffer_nest_end(buffer);
1103         return ret;
1104 }
1105 EXPORT_SYMBOL_GPL(__trace_bputs);
1106
1107 #ifdef CONFIG_TRACER_SNAPSHOT
1108 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1109                                            void *cond_data)
1110 {
1111         struct tracer *tracer = tr->current_trace;
1112         unsigned long flags;
1113
1114         if (in_nmi()) {
1115                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1116                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1117                 return;
1118         }
1119
1120         if (!tr->allocated_snapshot) {
1121                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1122                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1123                 tracer_tracing_off(tr);
1124                 return;
1125         }
1126
1127         /* Note, snapshot can not be used when the tracer uses it */
1128         if (tracer->use_max_tr) {
1129                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1130                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1131                 return;
1132         }
1133
1134         local_irq_save(flags);
1135         update_max_tr(tr, current, smp_processor_id(), cond_data);
1136         local_irq_restore(flags);
1137 }
1138
1139 void tracing_snapshot_instance(struct trace_array *tr)
1140 {
1141         tracing_snapshot_instance_cond(tr, NULL);
1142 }
1143
1144 /**
1145  * tracing_snapshot - take a snapshot of the current buffer.
1146  *
1147  * This causes a swap between the snapshot buffer and the current live
1148  * tracing buffer. You can use this to take snapshots of the live
1149  * trace when some condition is triggered, but continue to trace.
1150  *
1151  * Note, make sure to allocate the snapshot with either
1152  * a tracing_snapshot_alloc(), or by doing it manually
1153  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1154  *
1155  * If the snapshot buffer is not allocated, it will stop tracing.
1156  * Basically making a permanent snapshot.
1157  */
1158 void tracing_snapshot(void)
1159 {
1160         struct trace_array *tr = &global_trace;
1161
1162         tracing_snapshot_instance(tr);
1163 }
1164 EXPORT_SYMBOL_GPL(tracing_snapshot);
1165
1166 /**
1167  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1168  * @tr:         The tracing instance to snapshot
1169  * @cond_data:  The data to be tested conditionally, and possibly saved
1170  *
1171  * This is the same as tracing_snapshot() except that the snapshot is
1172  * conditional - the snapshot will only happen if the
1173  * cond_snapshot.update() implementation receiving the cond_data
1174  * returns true, which means that the trace array's cond_snapshot
1175  * update() operation used the cond_data to determine whether the
1176  * snapshot should be taken, and if it was, presumably saved it along
1177  * with the snapshot.
1178  */
1179 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1180 {
1181         tracing_snapshot_instance_cond(tr, cond_data);
1182 }
1183 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1184
1185 /**
1186  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1187  * @tr:         The tracing instance
1188  *
1189  * When the user enables a conditional snapshot using
1190  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1191  * with the snapshot.  This accessor is used to retrieve it.
1192  *
1193  * Should not be called from cond_snapshot.update(), since it takes
1194  * the tr->max_lock lock, which the code calling
1195  * cond_snapshot.update() has already done.
1196  *
1197  * Returns the cond_data associated with the trace array's snapshot.
1198  */
1199 void *tracing_cond_snapshot_data(struct trace_array *tr)
1200 {
1201         void *cond_data = NULL;
1202
1203         local_irq_disable();
1204         arch_spin_lock(&tr->max_lock);
1205
1206         if (tr->cond_snapshot)
1207                 cond_data = tr->cond_snapshot->cond_data;
1208
1209         arch_spin_unlock(&tr->max_lock);
1210         local_irq_enable();
1211
1212         return cond_data;
1213 }
1214 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1215
1216 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1217                                         struct array_buffer *size_buf, int cpu_id);
1218 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1219
1220 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1221 {
1222         int ret;
1223
1224         if (!tr->allocated_snapshot) {
1225
1226                 /* allocate spare buffer */
1227                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1228                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1229                 if (ret < 0)
1230                         return ret;
1231
1232                 tr->allocated_snapshot = true;
1233         }
1234
1235         return 0;
1236 }
1237
1238 static void free_snapshot(struct trace_array *tr)
1239 {
1240         /*
1241          * We don't free the ring buffer. instead, resize it because
1242          * The max_tr ring buffer has some state (e.g. ring->clock) and
1243          * we want preserve it.
1244          */
1245         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1246         set_buffer_entries(&tr->max_buffer, 1);
1247         tracing_reset_online_cpus(&tr->max_buffer);
1248         tr->allocated_snapshot = false;
1249 }
1250
1251 /**
1252  * tracing_alloc_snapshot - allocate snapshot buffer.
1253  *
1254  * This only allocates the snapshot buffer if it isn't already
1255  * allocated - it doesn't also take a snapshot.
1256  *
1257  * This is meant to be used in cases where the snapshot buffer needs
1258  * to be set up for events that can't sleep but need to be able to
1259  * trigger a snapshot.
1260  */
1261 int tracing_alloc_snapshot(void)
1262 {
1263         struct trace_array *tr = &global_trace;
1264         int ret;
1265
1266         ret = tracing_alloc_snapshot_instance(tr);
1267         WARN_ON(ret < 0);
1268
1269         return ret;
1270 }
1271 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1272
1273 /**
1274  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1275  *
1276  * This is similar to tracing_snapshot(), but it will allocate the
1277  * snapshot buffer if it isn't already allocated. Use this only
1278  * where it is safe to sleep, as the allocation may sleep.
1279  *
1280  * This causes a swap between the snapshot buffer and the current live
1281  * tracing buffer. You can use this to take snapshots of the live
1282  * trace when some condition is triggered, but continue to trace.
1283  */
1284 void tracing_snapshot_alloc(void)
1285 {
1286         int ret;
1287
1288         ret = tracing_alloc_snapshot();
1289         if (ret < 0)
1290                 return;
1291
1292         tracing_snapshot();
1293 }
1294 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1295
1296 /**
1297  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1298  * @tr:         The tracing instance
1299  * @cond_data:  User data to associate with the snapshot
1300  * @update:     Implementation of the cond_snapshot update function
1301  *
1302  * Check whether the conditional snapshot for the given instance has
1303  * already been enabled, or if the current tracer is already using a
1304  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1305  * save the cond_data and update function inside.
1306  *
1307  * Returns 0 if successful, error otherwise.
1308  */
1309 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1310                                  cond_update_fn_t update)
1311 {
1312         struct cond_snapshot *cond_snapshot;
1313         int ret = 0;
1314
1315         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1316         if (!cond_snapshot)
1317                 return -ENOMEM;
1318
1319         cond_snapshot->cond_data = cond_data;
1320         cond_snapshot->update = update;
1321
1322         mutex_lock(&trace_types_lock);
1323
1324         ret = tracing_alloc_snapshot_instance(tr);
1325         if (ret)
1326                 goto fail_unlock;
1327
1328         if (tr->current_trace->use_max_tr) {
1329                 ret = -EBUSY;
1330                 goto fail_unlock;
1331         }
1332
1333         /*
1334          * The cond_snapshot can only change to NULL without the
1335          * trace_types_lock. We don't care if we race with it going
1336          * to NULL, but we want to make sure that it's not set to
1337          * something other than NULL when we get here, which we can
1338          * do safely with only holding the trace_types_lock and not
1339          * having to take the max_lock.
1340          */
1341         if (tr->cond_snapshot) {
1342                 ret = -EBUSY;
1343                 goto fail_unlock;
1344         }
1345
1346         local_irq_disable();
1347         arch_spin_lock(&tr->max_lock);
1348         tr->cond_snapshot = cond_snapshot;
1349         arch_spin_unlock(&tr->max_lock);
1350         local_irq_enable();
1351
1352         mutex_unlock(&trace_types_lock);
1353
1354         return ret;
1355
1356  fail_unlock:
1357         mutex_unlock(&trace_types_lock);
1358         kfree(cond_snapshot);
1359         return ret;
1360 }
1361 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1362
1363 /**
1364  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1365  * @tr:         The tracing instance
1366  *
1367  * Check whether the conditional snapshot for the given instance is
1368  * enabled; if so, free the cond_snapshot associated with it,
1369  * otherwise return -EINVAL.
1370  *
1371  * Returns 0 if successful, error otherwise.
1372  */
1373 int tracing_snapshot_cond_disable(struct trace_array *tr)
1374 {
1375         int ret = 0;
1376
1377         local_irq_disable();
1378         arch_spin_lock(&tr->max_lock);
1379
1380         if (!tr->cond_snapshot)
1381                 ret = -EINVAL;
1382         else {
1383                 kfree(tr->cond_snapshot);
1384                 tr->cond_snapshot = NULL;
1385         }
1386
1387         arch_spin_unlock(&tr->max_lock);
1388         local_irq_enable();
1389
1390         return ret;
1391 }
1392 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1393 #else
1394 void tracing_snapshot(void)
1395 {
1396         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1397 }
1398 EXPORT_SYMBOL_GPL(tracing_snapshot);
1399 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1400 {
1401         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1402 }
1403 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1404 int tracing_alloc_snapshot(void)
1405 {
1406         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1407         return -ENODEV;
1408 }
1409 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1410 void tracing_snapshot_alloc(void)
1411 {
1412         /* Give warning */
1413         tracing_snapshot();
1414 }
1415 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1416 void *tracing_cond_snapshot_data(struct trace_array *tr)
1417 {
1418         return NULL;
1419 }
1420 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1421 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1422 {
1423         return -ENODEV;
1424 }
1425 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1426 int tracing_snapshot_cond_disable(struct trace_array *tr)
1427 {
1428         return false;
1429 }
1430 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1431 #define free_snapshot(tr)       do { } while (0)
1432 #endif /* CONFIG_TRACER_SNAPSHOT */
1433
1434 void tracer_tracing_off(struct trace_array *tr)
1435 {
1436         if (tr->array_buffer.buffer)
1437                 ring_buffer_record_off(tr->array_buffer.buffer);
1438         /*
1439          * This flag is looked at when buffers haven't been allocated
1440          * yet, or by some tracers (like irqsoff), that just want to
1441          * know if the ring buffer has been disabled, but it can handle
1442          * races of where it gets disabled but we still do a record.
1443          * As the check is in the fast path of the tracers, it is more
1444          * important to be fast than accurate.
1445          */
1446         tr->buffer_disabled = 1;
1447         /* Make the flag seen by readers */
1448         smp_wmb();
1449 }
1450
1451 /**
1452  * tracing_off - turn off tracing buffers
1453  *
1454  * This function stops the tracing buffers from recording data.
1455  * It does not disable any overhead the tracers themselves may
1456  * be causing. This function simply causes all recording to
1457  * the ring buffers to fail.
1458  */
1459 void tracing_off(void)
1460 {
1461         tracer_tracing_off(&global_trace);
1462 }
1463 EXPORT_SYMBOL_GPL(tracing_off);
1464
1465 void disable_trace_on_warning(void)
1466 {
1467         if (__disable_trace_on_warning) {
1468                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1469                         "Disabling tracing due to warning\n");
1470                 tracing_off();
1471         }
1472 }
1473
1474 /**
1475  * tracer_tracing_is_on - show real state of ring buffer enabled
1476  * @tr : the trace array to know if ring buffer is enabled
1477  *
1478  * Shows real state of the ring buffer if it is enabled or not.
1479  */
1480 bool tracer_tracing_is_on(struct trace_array *tr)
1481 {
1482         if (tr->array_buffer.buffer)
1483                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1484         return !tr->buffer_disabled;
1485 }
1486
1487 /**
1488  * tracing_is_on - show state of ring buffers enabled
1489  */
1490 int tracing_is_on(void)
1491 {
1492         return tracer_tracing_is_on(&global_trace);
1493 }
1494 EXPORT_SYMBOL_GPL(tracing_is_on);
1495
1496 static int __init set_buf_size(char *str)
1497 {
1498         unsigned long buf_size;
1499
1500         if (!str)
1501                 return 0;
1502         buf_size = memparse(str, &str);
1503         /*
1504          * nr_entries can not be zero and the startup
1505          * tests require some buffer space. Therefore
1506          * ensure we have at least 4096 bytes of buffer.
1507          */
1508         trace_buf_size = max(4096UL, buf_size);
1509         return 1;
1510 }
1511 __setup("trace_buf_size=", set_buf_size);
1512
1513 static int __init set_tracing_thresh(char *str)
1514 {
1515         unsigned long threshold;
1516         int ret;
1517
1518         if (!str)
1519                 return 0;
1520         ret = kstrtoul(str, 0, &threshold);
1521         if (ret < 0)
1522                 return 0;
1523         tracing_thresh = threshold * 1000;
1524         return 1;
1525 }
1526 __setup("tracing_thresh=", set_tracing_thresh);
1527
1528 unsigned long nsecs_to_usecs(unsigned long nsecs)
1529 {
1530         return nsecs / 1000;
1531 }
1532
1533 /*
1534  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1535  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1536  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1537  * of strings in the order that the evals (enum) were defined.
1538  */
1539 #undef C
1540 #define C(a, b) b
1541
1542 /* These must match the bit positions in trace_iterator_flags */
1543 static const char *trace_options[] = {
1544         TRACE_FLAGS
1545         NULL
1546 };
1547
1548 static struct {
1549         u64 (*func)(void);
1550         const char *name;
1551         int in_ns;              /* is this clock in nanoseconds? */
1552 } trace_clocks[] = {
1553         { trace_clock_local,            "local",        1 },
1554         { trace_clock_global,           "global",       1 },
1555         { trace_clock_counter,          "counter",      0 },
1556         { trace_clock_jiffies,          "uptime",       0 },
1557         { trace_clock,                  "perf",         1 },
1558         { ktime_get_mono_fast_ns,       "mono",         1 },
1559         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1560         { ktime_get_boot_fast_ns,       "boot",         1 },
1561         { ktime_get_tai_fast_ns,        "tai",          1 },
1562         ARCH_TRACE_CLOCKS
1563 };
1564
1565 bool trace_clock_in_ns(struct trace_array *tr)
1566 {
1567         if (trace_clocks[tr->clock_id].in_ns)
1568                 return true;
1569
1570         return false;
1571 }
1572
1573 /*
1574  * trace_parser_get_init - gets the buffer for trace parser
1575  */
1576 int trace_parser_get_init(struct trace_parser *parser, int size)
1577 {
1578         memset(parser, 0, sizeof(*parser));
1579
1580         parser->buffer = kmalloc(size, GFP_KERNEL);
1581         if (!parser->buffer)
1582                 return 1;
1583
1584         parser->size = size;
1585         return 0;
1586 }
1587
1588 /*
1589  * trace_parser_put - frees the buffer for trace parser
1590  */
1591 void trace_parser_put(struct trace_parser *parser)
1592 {
1593         kfree(parser->buffer);
1594         parser->buffer = NULL;
1595 }
1596
1597 /*
1598  * trace_get_user - reads the user input string separated by  space
1599  * (matched by isspace(ch))
1600  *
1601  * For each string found the 'struct trace_parser' is updated,
1602  * and the function returns.
1603  *
1604  * Returns number of bytes read.
1605  *
1606  * See kernel/trace/trace.h for 'struct trace_parser' details.
1607  */
1608 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1609         size_t cnt, loff_t *ppos)
1610 {
1611         char ch;
1612         size_t read = 0;
1613         ssize_t ret;
1614
1615         if (!*ppos)
1616                 trace_parser_clear(parser);
1617
1618         ret = get_user(ch, ubuf++);
1619         if (ret)
1620                 goto out;
1621
1622         read++;
1623         cnt--;
1624
1625         /*
1626          * The parser is not finished with the last write,
1627          * continue reading the user input without skipping spaces.
1628          */
1629         if (!parser->cont) {
1630                 /* skip white space */
1631                 while (cnt && isspace(ch)) {
1632                         ret = get_user(ch, ubuf++);
1633                         if (ret)
1634                                 goto out;
1635                         read++;
1636                         cnt--;
1637                 }
1638
1639                 parser->idx = 0;
1640
1641                 /* only spaces were written */
1642                 if (isspace(ch) || !ch) {
1643                         *ppos += read;
1644                         ret = read;
1645                         goto out;
1646                 }
1647         }
1648
1649         /* read the non-space input */
1650         while (cnt && !isspace(ch) && ch) {
1651                 if (parser->idx < parser->size - 1)
1652                         parser->buffer[parser->idx++] = ch;
1653                 else {
1654                         ret = -EINVAL;
1655                         goto out;
1656                 }
1657                 ret = get_user(ch, ubuf++);
1658                 if (ret)
1659                         goto out;
1660                 read++;
1661                 cnt--;
1662         }
1663
1664         /* We either got finished input or we have to wait for another call. */
1665         if (isspace(ch) || !ch) {
1666                 parser->buffer[parser->idx] = 0;
1667                 parser->cont = false;
1668         } else if (parser->idx < parser->size - 1) {
1669                 parser->cont = true;
1670                 parser->buffer[parser->idx++] = ch;
1671                 /* Make sure the parsed string always terminates with '\0'. */
1672                 parser->buffer[parser->idx] = 0;
1673         } else {
1674                 ret = -EINVAL;
1675                 goto out;
1676         }
1677
1678         *ppos += read;
1679         ret = read;
1680
1681 out:
1682         return ret;
1683 }
1684
1685 /* TODO add a seq_buf_to_buffer() */
1686 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1687 {
1688         int len;
1689
1690         if (trace_seq_used(s) <= s->seq.readpos)
1691                 return -EBUSY;
1692
1693         len = trace_seq_used(s) - s->seq.readpos;
1694         if (cnt > len)
1695                 cnt = len;
1696         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1697
1698         s->seq.readpos += cnt;
1699         return cnt;
1700 }
1701
1702 unsigned long __read_mostly     tracing_thresh;
1703
1704 #ifdef CONFIG_TRACER_MAX_TRACE
1705 static const struct file_operations tracing_max_lat_fops;
1706
1707 #ifdef LATENCY_FS_NOTIFY
1708
1709 static struct workqueue_struct *fsnotify_wq;
1710
1711 static void latency_fsnotify_workfn(struct work_struct *work)
1712 {
1713         struct trace_array *tr = container_of(work, struct trace_array,
1714                                               fsnotify_work);
1715         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1716 }
1717
1718 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1719 {
1720         struct trace_array *tr = container_of(iwork, struct trace_array,
1721                                               fsnotify_irqwork);
1722         queue_work(fsnotify_wq, &tr->fsnotify_work);
1723 }
1724
1725 static void trace_create_maxlat_file(struct trace_array *tr,
1726                                      struct dentry *d_tracer)
1727 {
1728         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1729         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1730         tr->d_max_latency = trace_create_file("tracing_max_latency",
1731                                               TRACE_MODE_WRITE,
1732                                               d_tracer, &tr->max_latency,
1733                                               &tracing_max_lat_fops);
1734 }
1735
1736 __init static int latency_fsnotify_init(void)
1737 {
1738         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1739                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1740         if (!fsnotify_wq) {
1741                 pr_err("Unable to allocate tr_max_lat_wq\n");
1742                 return -ENOMEM;
1743         }
1744         return 0;
1745 }
1746
1747 late_initcall_sync(latency_fsnotify_init);
1748
1749 void latency_fsnotify(struct trace_array *tr)
1750 {
1751         if (!fsnotify_wq)
1752                 return;
1753         /*
1754          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1755          * possible that we are called from __schedule() or do_idle(), which
1756          * could cause a deadlock.
1757          */
1758         irq_work_queue(&tr->fsnotify_irqwork);
1759 }
1760
1761 #else /* !LATENCY_FS_NOTIFY */
1762
1763 #define trace_create_maxlat_file(tr, d_tracer)                          \
1764         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1765                           d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1766
1767 #endif
1768
1769 /*
1770  * Copy the new maximum trace into the separate maximum-trace
1771  * structure. (this way the maximum trace is permanently saved,
1772  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1773  */
1774 static void
1775 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1776 {
1777         struct array_buffer *trace_buf = &tr->array_buffer;
1778         struct array_buffer *max_buf = &tr->max_buffer;
1779         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1780         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1781
1782         max_buf->cpu = cpu;
1783         max_buf->time_start = data->preempt_timestamp;
1784
1785         max_data->saved_latency = tr->max_latency;
1786         max_data->critical_start = data->critical_start;
1787         max_data->critical_end = data->critical_end;
1788
1789         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1790         max_data->pid = tsk->pid;
1791         /*
1792          * If tsk == current, then use current_uid(), as that does not use
1793          * RCU. The irq tracer can be called out of RCU scope.
1794          */
1795         if (tsk == current)
1796                 max_data->uid = current_uid();
1797         else
1798                 max_data->uid = task_uid(tsk);
1799
1800         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1801         max_data->policy = tsk->policy;
1802         max_data->rt_priority = tsk->rt_priority;
1803
1804         /* record this tasks comm */
1805         tracing_record_cmdline(tsk);
1806         latency_fsnotify(tr);
1807 }
1808
1809 /**
1810  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1811  * @tr: tracer
1812  * @tsk: the task with the latency
1813  * @cpu: The cpu that initiated the trace.
1814  * @cond_data: User data associated with a conditional snapshot
1815  *
1816  * Flip the buffers between the @tr and the max_tr and record information
1817  * about which task was the cause of this latency.
1818  */
1819 void
1820 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1821               void *cond_data)
1822 {
1823         if (tr->stop_count)
1824                 return;
1825
1826         WARN_ON_ONCE(!irqs_disabled());
1827
1828         if (!tr->allocated_snapshot) {
1829                 /* Only the nop tracer should hit this when disabling */
1830                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1831                 return;
1832         }
1833
1834         arch_spin_lock(&tr->max_lock);
1835
1836         /* Inherit the recordable setting from array_buffer */
1837         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1838                 ring_buffer_record_on(tr->max_buffer.buffer);
1839         else
1840                 ring_buffer_record_off(tr->max_buffer.buffer);
1841
1842 #ifdef CONFIG_TRACER_SNAPSHOT
1843         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1844                 arch_spin_unlock(&tr->max_lock);
1845                 return;
1846         }
1847 #endif
1848         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1849
1850         __update_max_tr(tr, tsk, cpu);
1851
1852         arch_spin_unlock(&tr->max_lock);
1853 }
1854
1855 /**
1856  * update_max_tr_single - only copy one trace over, and reset the rest
1857  * @tr: tracer
1858  * @tsk: task with the latency
1859  * @cpu: the cpu of the buffer to copy.
1860  *
1861  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1862  */
1863 void
1864 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1865 {
1866         int ret;
1867
1868         if (tr->stop_count)
1869                 return;
1870
1871         WARN_ON_ONCE(!irqs_disabled());
1872         if (!tr->allocated_snapshot) {
1873                 /* Only the nop tracer should hit this when disabling */
1874                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1875                 return;
1876         }
1877
1878         arch_spin_lock(&tr->max_lock);
1879
1880         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1881
1882         if (ret == -EBUSY) {
1883                 /*
1884                  * We failed to swap the buffer due to a commit taking
1885                  * place on this CPU. We fail to record, but we reset
1886                  * the max trace buffer (no one writes directly to it)
1887                  * and flag that it failed.
1888                  */
1889                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1890                         "Failed to swap buffers due to commit in progress\n");
1891         }
1892
1893         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1894
1895         __update_max_tr(tr, tsk, cpu);
1896         arch_spin_unlock(&tr->max_lock);
1897 }
1898
1899 #endif /* CONFIG_TRACER_MAX_TRACE */
1900
1901 static int wait_on_pipe(struct trace_iterator *iter, int full)
1902 {
1903         /* Iterators are static, they should be filled or empty */
1904         if (trace_buffer_iter(iter, iter->cpu_file))
1905                 return 0;
1906
1907         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1908                                 full);
1909 }
1910
1911 #ifdef CONFIG_FTRACE_STARTUP_TEST
1912 static bool selftests_can_run;
1913
1914 struct trace_selftests {
1915         struct list_head                list;
1916         struct tracer                   *type;
1917 };
1918
1919 static LIST_HEAD(postponed_selftests);
1920
1921 static int save_selftest(struct tracer *type)
1922 {
1923         struct trace_selftests *selftest;
1924
1925         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1926         if (!selftest)
1927                 return -ENOMEM;
1928
1929         selftest->type = type;
1930         list_add(&selftest->list, &postponed_selftests);
1931         return 0;
1932 }
1933
1934 static int run_tracer_selftest(struct tracer *type)
1935 {
1936         struct trace_array *tr = &global_trace;
1937         struct tracer *saved_tracer = tr->current_trace;
1938         int ret;
1939
1940         if (!type->selftest || tracing_selftest_disabled)
1941                 return 0;
1942
1943         /*
1944          * If a tracer registers early in boot up (before scheduling is
1945          * initialized and such), then do not run its selftests yet.
1946          * Instead, run it a little later in the boot process.
1947          */
1948         if (!selftests_can_run)
1949                 return save_selftest(type);
1950
1951         if (!tracing_is_on()) {
1952                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1953                         type->name);
1954                 return 0;
1955         }
1956
1957         /*
1958          * Run a selftest on this tracer.
1959          * Here we reset the trace buffer, and set the current
1960          * tracer to be this tracer. The tracer can then run some
1961          * internal tracing to verify that everything is in order.
1962          * If we fail, we do not register this tracer.
1963          */
1964         tracing_reset_online_cpus(&tr->array_buffer);
1965
1966         tr->current_trace = type;
1967
1968 #ifdef CONFIG_TRACER_MAX_TRACE
1969         if (type->use_max_tr) {
1970                 /* If we expanded the buffers, make sure the max is expanded too */
1971                 if (ring_buffer_expanded)
1972                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1973                                            RING_BUFFER_ALL_CPUS);
1974                 tr->allocated_snapshot = true;
1975         }
1976 #endif
1977
1978         /* the test is responsible for initializing and enabling */
1979         pr_info("Testing tracer %s: ", type->name);
1980         ret = type->selftest(type, tr);
1981         /* the test is responsible for resetting too */
1982         tr->current_trace = saved_tracer;
1983         if (ret) {
1984                 printk(KERN_CONT "FAILED!\n");
1985                 /* Add the warning after printing 'FAILED' */
1986                 WARN_ON(1);
1987                 return -1;
1988         }
1989         /* Only reset on passing, to avoid touching corrupted buffers */
1990         tracing_reset_online_cpus(&tr->array_buffer);
1991
1992 #ifdef CONFIG_TRACER_MAX_TRACE
1993         if (type->use_max_tr) {
1994                 tr->allocated_snapshot = false;
1995
1996                 /* Shrink the max buffer again */
1997                 if (ring_buffer_expanded)
1998                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1999                                            RING_BUFFER_ALL_CPUS);
2000         }
2001 #endif
2002
2003         printk(KERN_CONT "PASSED\n");
2004         return 0;
2005 }
2006
2007 static __init int init_trace_selftests(void)
2008 {
2009         struct trace_selftests *p, *n;
2010         struct tracer *t, **last;
2011         int ret;
2012
2013         selftests_can_run = true;
2014
2015         mutex_lock(&trace_types_lock);
2016
2017         if (list_empty(&postponed_selftests))
2018                 goto out;
2019
2020         pr_info("Running postponed tracer tests:\n");
2021
2022         tracing_selftest_running = true;
2023         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2024                 /* This loop can take minutes when sanitizers are enabled, so
2025                  * lets make sure we allow RCU processing.
2026                  */
2027                 cond_resched();
2028                 ret = run_tracer_selftest(p->type);
2029                 /* If the test fails, then warn and remove from available_tracers */
2030                 if (ret < 0) {
2031                         WARN(1, "tracer: %s failed selftest, disabling\n",
2032                              p->type->name);
2033                         last = &trace_types;
2034                         for (t = trace_types; t; t = t->next) {
2035                                 if (t == p->type) {
2036                                         *last = t->next;
2037                                         break;
2038                                 }
2039                                 last = &t->next;
2040                         }
2041                 }
2042                 list_del(&p->list);
2043                 kfree(p);
2044         }
2045         tracing_selftest_running = false;
2046
2047  out:
2048         mutex_unlock(&trace_types_lock);
2049
2050         return 0;
2051 }
2052 core_initcall(init_trace_selftests);
2053 #else
2054 static inline int run_tracer_selftest(struct tracer *type)
2055 {
2056         return 0;
2057 }
2058 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2059
2060 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2061
2062 static void __init apply_trace_boot_options(void);
2063
2064 /**
2065  * register_tracer - register a tracer with the ftrace system.
2066  * @type: the plugin for the tracer
2067  *
2068  * Register a new plugin tracer.
2069  */
2070 int __init register_tracer(struct tracer *type)
2071 {
2072         struct tracer *t;
2073         int ret = 0;
2074
2075         if (!type->name) {
2076                 pr_info("Tracer must have a name\n");
2077                 return -1;
2078         }
2079
2080         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2081                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2082                 return -1;
2083         }
2084
2085         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2086                 pr_warn("Can not register tracer %s due to lockdown\n",
2087                            type->name);
2088                 return -EPERM;
2089         }
2090
2091         mutex_lock(&trace_types_lock);
2092
2093         tracing_selftest_running = true;
2094
2095         for (t = trace_types; t; t = t->next) {
2096                 if (strcmp(type->name, t->name) == 0) {
2097                         /* already found */
2098                         pr_info("Tracer %s already registered\n",
2099                                 type->name);
2100                         ret = -1;
2101                         goto out;
2102                 }
2103         }
2104
2105         if (!type->set_flag)
2106                 type->set_flag = &dummy_set_flag;
2107         if (!type->flags) {
2108                 /*allocate a dummy tracer_flags*/
2109                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2110                 if (!type->flags) {
2111                         ret = -ENOMEM;
2112                         goto out;
2113                 }
2114                 type->flags->val = 0;
2115                 type->flags->opts = dummy_tracer_opt;
2116         } else
2117                 if (!type->flags->opts)
2118                         type->flags->opts = dummy_tracer_opt;
2119
2120         /* store the tracer for __set_tracer_option */
2121         type->flags->trace = type;
2122
2123         ret = run_tracer_selftest(type);
2124         if (ret < 0)
2125                 goto out;
2126
2127         type->next = trace_types;
2128         trace_types = type;
2129         add_tracer_options(&global_trace, type);
2130
2131  out:
2132         tracing_selftest_running = false;
2133         mutex_unlock(&trace_types_lock);
2134
2135         if (ret || !default_bootup_tracer)
2136                 goto out_unlock;
2137
2138         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2139                 goto out_unlock;
2140
2141         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2142         /* Do we want this tracer to start on bootup? */
2143         tracing_set_tracer(&global_trace, type->name);
2144         default_bootup_tracer = NULL;
2145
2146         apply_trace_boot_options();
2147
2148         /* disable other selftests, since this will break it. */
2149         disable_tracing_selftest("running a tracer");
2150
2151  out_unlock:
2152         return ret;
2153 }
2154
2155 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2156 {
2157         struct trace_buffer *buffer = buf->buffer;
2158
2159         if (!buffer)
2160                 return;
2161
2162         ring_buffer_record_disable(buffer);
2163
2164         /* Make sure all commits have finished */
2165         synchronize_rcu();
2166         ring_buffer_reset_cpu(buffer, cpu);
2167
2168         ring_buffer_record_enable(buffer);
2169 }
2170
2171 void tracing_reset_online_cpus(struct array_buffer *buf)
2172 {
2173         struct trace_buffer *buffer = buf->buffer;
2174
2175         if (!buffer)
2176                 return;
2177
2178         ring_buffer_record_disable(buffer);
2179
2180         /* Make sure all commits have finished */
2181         synchronize_rcu();
2182
2183         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2184
2185         ring_buffer_reset_online_cpus(buffer);
2186
2187         ring_buffer_record_enable(buffer);
2188 }
2189
2190 /* Must have trace_types_lock held */
2191 void tracing_reset_all_online_cpus_unlocked(void)
2192 {
2193         struct trace_array *tr;
2194
2195         lockdep_assert_held(&trace_types_lock);
2196
2197         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2198                 if (!tr->clear_trace)
2199                         continue;
2200                 tr->clear_trace = false;
2201                 tracing_reset_online_cpus(&tr->array_buffer);
2202 #ifdef CONFIG_TRACER_MAX_TRACE
2203                 tracing_reset_online_cpus(&tr->max_buffer);
2204 #endif
2205         }
2206 }
2207
2208 void tracing_reset_all_online_cpus(void)
2209 {
2210         mutex_lock(&trace_types_lock);
2211         tracing_reset_all_online_cpus_unlocked();
2212         mutex_unlock(&trace_types_lock);
2213 }
2214
2215 /*
2216  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2217  * is the tgid last observed corresponding to pid=i.
2218  */
2219 static int *tgid_map;
2220
2221 /* The maximum valid index into tgid_map. */
2222 static size_t tgid_map_max;
2223
2224 #define SAVED_CMDLINES_DEFAULT 128
2225 #define NO_CMDLINE_MAP UINT_MAX
2226 /*
2227  * Preemption must be disabled before acquiring trace_cmdline_lock.
2228  * The various trace_arrays' max_lock must be acquired in a context
2229  * where interrupt is disabled.
2230  */
2231 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2232 struct saved_cmdlines_buffer {
2233         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2234         unsigned *map_cmdline_to_pid;
2235         unsigned cmdline_num;
2236         int cmdline_idx;
2237         char *saved_cmdlines;
2238 };
2239 static struct saved_cmdlines_buffer *savedcmd;
2240
2241 static inline char *get_saved_cmdlines(int idx)
2242 {
2243         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2244 }
2245
2246 static inline void set_cmdline(int idx, const char *cmdline)
2247 {
2248         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2249 }
2250
2251 static int allocate_cmdlines_buffer(unsigned int val,
2252                                     struct saved_cmdlines_buffer *s)
2253 {
2254         s->map_cmdline_to_pid = kmalloc_array(val,
2255                                               sizeof(*s->map_cmdline_to_pid),
2256                                               GFP_KERNEL);
2257         if (!s->map_cmdline_to_pid)
2258                 return -ENOMEM;
2259
2260         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2261         if (!s->saved_cmdlines) {
2262                 kfree(s->map_cmdline_to_pid);
2263                 return -ENOMEM;
2264         }
2265
2266         s->cmdline_idx = 0;
2267         s->cmdline_num = val;
2268         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2269                sizeof(s->map_pid_to_cmdline));
2270         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2271                val * sizeof(*s->map_cmdline_to_pid));
2272
2273         return 0;
2274 }
2275
2276 static int trace_create_savedcmd(void)
2277 {
2278         int ret;
2279
2280         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2281         if (!savedcmd)
2282                 return -ENOMEM;
2283
2284         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2285         if (ret < 0) {
2286                 kfree(savedcmd);
2287                 savedcmd = NULL;
2288                 return -ENOMEM;
2289         }
2290
2291         return 0;
2292 }
2293
2294 int is_tracing_stopped(void)
2295 {
2296         return global_trace.stop_count;
2297 }
2298
2299 /**
2300  * tracing_start - quick start of the tracer
2301  *
2302  * If tracing is enabled but was stopped by tracing_stop,
2303  * this will start the tracer back up.
2304  */
2305 void tracing_start(void)
2306 {
2307         struct trace_buffer *buffer;
2308         unsigned long flags;
2309
2310         if (tracing_disabled)
2311                 return;
2312
2313         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2314         if (--global_trace.stop_count) {
2315                 if (global_trace.stop_count < 0) {
2316                         /* Someone screwed up their debugging */
2317                         WARN_ON_ONCE(1);
2318                         global_trace.stop_count = 0;
2319                 }
2320                 goto out;
2321         }
2322
2323         /* Prevent the buffers from switching */
2324         arch_spin_lock(&global_trace.max_lock);
2325
2326         buffer = global_trace.array_buffer.buffer;
2327         if (buffer)
2328                 ring_buffer_record_enable(buffer);
2329
2330 #ifdef CONFIG_TRACER_MAX_TRACE
2331         buffer = global_trace.max_buffer.buffer;
2332         if (buffer)
2333                 ring_buffer_record_enable(buffer);
2334 #endif
2335
2336         arch_spin_unlock(&global_trace.max_lock);
2337
2338  out:
2339         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2340 }
2341
2342 static void tracing_start_tr(struct trace_array *tr)
2343 {
2344         struct trace_buffer *buffer;
2345         unsigned long flags;
2346
2347         if (tracing_disabled)
2348                 return;
2349
2350         /* If global, we need to also start the max tracer */
2351         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2352                 return tracing_start();
2353
2354         raw_spin_lock_irqsave(&tr->start_lock, flags);
2355
2356         if (--tr->stop_count) {
2357                 if (tr->stop_count < 0) {
2358                         /* Someone screwed up their debugging */
2359                         WARN_ON_ONCE(1);
2360                         tr->stop_count = 0;
2361                 }
2362                 goto out;
2363         }
2364
2365         buffer = tr->array_buffer.buffer;
2366         if (buffer)
2367                 ring_buffer_record_enable(buffer);
2368
2369  out:
2370         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2371 }
2372
2373 /**
2374  * tracing_stop - quick stop of the tracer
2375  *
2376  * Light weight way to stop tracing. Use in conjunction with
2377  * tracing_start.
2378  */
2379 void tracing_stop(void)
2380 {
2381         struct trace_buffer *buffer;
2382         unsigned long flags;
2383
2384         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2385         if (global_trace.stop_count++)
2386                 goto out;
2387
2388         /* Prevent the buffers from switching */
2389         arch_spin_lock(&global_trace.max_lock);
2390
2391         buffer = global_trace.array_buffer.buffer;
2392         if (buffer)
2393                 ring_buffer_record_disable(buffer);
2394
2395 #ifdef CONFIG_TRACER_MAX_TRACE
2396         buffer = global_trace.max_buffer.buffer;
2397         if (buffer)
2398                 ring_buffer_record_disable(buffer);
2399 #endif
2400
2401         arch_spin_unlock(&global_trace.max_lock);
2402
2403  out:
2404         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2405 }
2406
2407 static void tracing_stop_tr(struct trace_array *tr)
2408 {
2409         struct trace_buffer *buffer;
2410         unsigned long flags;
2411
2412         /* If global, we need to also stop the max tracer */
2413         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2414                 return tracing_stop();
2415
2416         raw_spin_lock_irqsave(&tr->start_lock, flags);
2417         if (tr->stop_count++)
2418                 goto out;
2419
2420         buffer = tr->array_buffer.buffer;
2421         if (buffer)
2422                 ring_buffer_record_disable(buffer);
2423
2424  out:
2425         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2426 }
2427
2428 static int trace_save_cmdline(struct task_struct *tsk)
2429 {
2430         unsigned tpid, idx;
2431
2432         /* treat recording of idle task as a success */
2433         if (!tsk->pid)
2434                 return 1;
2435
2436         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2437
2438         /*
2439          * It's not the end of the world if we don't get
2440          * the lock, but we also don't want to spin
2441          * nor do we want to disable interrupts,
2442          * so if we miss here, then better luck next time.
2443          *
2444          * This is called within the scheduler and wake up, so interrupts
2445          * had better been disabled and run queue lock been held.
2446          */
2447         lockdep_assert_preemption_disabled();
2448         if (!arch_spin_trylock(&trace_cmdline_lock))
2449                 return 0;
2450
2451         idx = savedcmd->map_pid_to_cmdline[tpid];
2452         if (idx == NO_CMDLINE_MAP) {
2453                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2454
2455                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2456                 savedcmd->cmdline_idx = idx;
2457         }
2458
2459         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2460         set_cmdline(idx, tsk->comm);
2461
2462         arch_spin_unlock(&trace_cmdline_lock);
2463
2464         return 1;
2465 }
2466
2467 static void __trace_find_cmdline(int pid, char comm[])
2468 {
2469         unsigned map;
2470         int tpid;
2471
2472         if (!pid) {
2473                 strcpy(comm, "<idle>");
2474                 return;
2475         }
2476
2477         if (WARN_ON_ONCE(pid < 0)) {
2478                 strcpy(comm, "<XXX>");
2479                 return;
2480         }
2481
2482         tpid = pid & (PID_MAX_DEFAULT - 1);
2483         map = savedcmd->map_pid_to_cmdline[tpid];
2484         if (map != NO_CMDLINE_MAP) {
2485                 tpid = savedcmd->map_cmdline_to_pid[map];
2486                 if (tpid == pid) {
2487                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2488                         return;
2489                 }
2490         }
2491         strcpy(comm, "<...>");
2492 }
2493
2494 void trace_find_cmdline(int pid, char comm[])
2495 {
2496         preempt_disable();
2497         arch_spin_lock(&trace_cmdline_lock);
2498
2499         __trace_find_cmdline(pid, comm);
2500
2501         arch_spin_unlock(&trace_cmdline_lock);
2502         preempt_enable();
2503 }
2504
2505 static int *trace_find_tgid_ptr(int pid)
2506 {
2507         /*
2508          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2509          * if we observe a non-NULL tgid_map then we also observe the correct
2510          * tgid_map_max.
2511          */
2512         int *map = smp_load_acquire(&tgid_map);
2513
2514         if (unlikely(!map || pid > tgid_map_max))
2515                 return NULL;
2516
2517         return &map[pid];
2518 }
2519
2520 int trace_find_tgid(int pid)
2521 {
2522         int *ptr = trace_find_tgid_ptr(pid);
2523
2524         return ptr ? *ptr : 0;
2525 }
2526
2527 static int trace_save_tgid(struct task_struct *tsk)
2528 {
2529         int *ptr;
2530
2531         /* treat recording of idle task as a success */
2532         if (!tsk->pid)
2533                 return 1;
2534
2535         ptr = trace_find_tgid_ptr(tsk->pid);
2536         if (!ptr)
2537                 return 0;
2538
2539         *ptr = tsk->tgid;
2540         return 1;
2541 }
2542
2543 static bool tracing_record_taskinfo_skip(int flags)
2544 {
2545         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2546                 return true;
2547         if (!__this_cpu_read(trace_taskinfo_save))
2548                 return true;
2549         return false;
2550 }
2551
2552 /**
2553  * tracing_record_taskinfo - record the task info of a task
2554  *
2555  * @task:  task to record
2556  * @flags: TRACE_RECORD_CMDLINE for recording comm
2557  *         TRACE_RECORD_TGID for recording tgid
2558  */
2559 void tracing_record_taskinfo(struct task_struct *task, int flags)
2560 {
2561         bool done;
2562
2563         if (tracing_record_taskinfo_skip(flags))
2564                 return;
2565
2566         /*
2567          * Record as much task information as possible. If some fail, continue
2568          * to try to record the others.
2569          */
2570         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2571         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2572
2573         /* If recording any information failed, retry again soon. */
2574         if (!done)
2575                 return;
2576
2577         __this_cpu_write(trace_taskinfo_save, false);
2578 }
2579
2580 /**
2581  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2582  *
2583  * @prev: previous task during sched_switch
2584  * @next: next task during sched_switch
2585  * @flags: TRACE_RECORD_CMDLINE for recording comm
2586  *         TRACE_RECORD_TGID for recording tgid
2587  */
2588 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2589                                           struct task_struct *next, int flags)
2590 {
2591         bool done;
2592
2593         if (tracing_record_taskinfo_skip(flags))
2594                 return;
2595
2596         /*
2597          * Record as much task information as possible. If some fail, continue
2598          * to try to record the others.
2599          */
2600         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2601         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2602         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2603         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2604
2605         /* If recording any information failed, retry again soon. */
2606         if (!done)
2607                 return;
2608
2609         __this_cpu_write(trace_taskinfo_save, false);
2610 }
2611
2612 /* Helpers to record a specific task information */
2613 void tracing_record_cmdline(struct task_struct *task)
2614 {
2615         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2616 }
2617
2618 void tracing_record_tgid(struct task_struct *task)
2619 {
2620         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2621 }
2622
2623 /*
2624  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2625  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2626  * simplifies those functions and keeps them in sync.
2627  */
2628 enum print_line_t trace_handle_return(struct trace_seq *s)
2629 {
2630         return trace_seq_has_overflowed(s) ?
2631                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2632 }
2633 EXPORT_SYMBOL_GPL(trace_handle_return);
2634
2635 static unsigned short migration_disable_value(void)
2636 {
2637 #if defined(CONFIG_SMP)
2638         return current->migration_disabled;
2639 #else
2640         return 0;
2641 #endif
2642 }
2643
2644 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2645 {
2646         unsigned int trace_flags = irqs_status;
2647         unsigned int pc;
2648
2649         pc = preempt_count();
2650
2651         if (pc & NMI_MASK)
2652                 trace_flags |= TRACE_FLAG_NMI;
2653         if (pc & HARDIRQ_MASK)
2654                 trace_flags |= TRACE_FLAG_HARDIRQ;
2655         if (in_serving_softirq())
2656                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2657         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2658                 trace_flags |= TRACE_FLAG_BH_OFF;
2659
2660         if (tif_need_resched())
2661                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2662         if (test_preempt_need_resched())
2663                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2664         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2665                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2666 }
2667
2668 struct ring_buffer_event *
2669 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2670                           int type,
2671                           unsigned long len,
2672                           unsigned int trace_ctx)
2673 {
2674         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2675 }
2676
2677 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2678 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2679 static int trace_buffered_event_ref;
2680
2681 /**
2682  * trace_buffered_event_enable - enable buffering events
2683  *
2684  * When events are being filtered, it is quicker to use a temporary
2685  * buffer to write the event data into if there's a likely chance
2686  * that it will not be committed. The discard of the ring buffer
2687  * is not as fast as committing, and is much slower than copying
2688  * a commit.
2689  *
2690  * When an event is to be filtered, allocate per cpu buffers to
2691  * write the event data into, and if the event is filtered and discarded
2692  * it is simply dropped, otherwise, the entire data is to be committed
2693  * in one shot.
2694  */
2695 void trace_buffered_event_enable(void)
2696 {
2697         struct ring_buffer_event *event;
2698         struct page *page;
2699         int cpu;
2700
2701         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2702
2703         if (trace_buffered_event_ref++)
2704                 return;
2705
2706         for_each_tracing_cpu(cpu) {
2707                 page = alloc_pages_node(cpu_to_node(cpu),
2708                                         GFP_KERNEL | __GFP_NORETRY, 0);
2709                 if (!page)
2710                         goto failed;
2711
2712                 event = page_address(page);
2713                 memset(event, 0, sizeof(*event));
2714
2715                 per_cpu(trace_buffered_event, cpu) = event;
2716
2717                 preempt_disable();
2718                 if (cpu == smp_processor_id() &&
2719                     __this_cpu_read(trace_buffered_event) !=
2720                     per_cpu(trace_buffered_event, cpu))
2721                         WARN_ON_ONCE(1);
2722                 preempt_enable();
2723         }
2724
2725         return;
2726  failed:
2727         trace_buffered_event_disable();
2728 }
2729
2730 static void enable_trace_buffered_event(void *data)
2731 {
2732         /* Probably not needed, but do it anyway */
2733         smp_rmb();
2734         this_cpu_dec(trace_buffered_event_cnt);
2735 }
2736
2737 static void disable_trace_buffered_event(void *data)
2738 {
2739         this_cpu_inc(trace_buffered_event_cnt);
2740 }
2741
2742 /**
2743  * trace_buffered_event_disable - disable buffering events
2744  *
2745  * When a filter is removed, it is faster to not use the buffered
2746  * events, and to commit directly into the ring buffer. Free up
2747  * the temp buffers when there are no more users. This requires
2748  * special synchronization with current events.
2749  */
2750 void trace_buffered_event_disable(void)
2751 {
2752         int cpu;
2753
2754         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2755
2756         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2757                 return;
2758
2759         if (--trace_buffered_event_ref)
2760                 return;
2761
2762         preempt_disable();
2763         /* For each CPU, set the buffer as used. */
2764         smp_call_function_many(tracing_buffer_mask,
2765                                disable_trace_buffered_event, NULL, 1);
2766         preempt_enable();
2767
2768         /* Wait for all current users to finish */
2769         synchronize_rcu();
2770
2771         for_each_tracing_cpu(cpu) {
2772                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2773                 per_cpu(trace_buffered_event, cpu) = NULL;
2774         }
2775         /*
2776          * Make sure trace_buffered_event is NULL before clearing
2777          * trace_buffered_event_cnt.
2778          */
2779         smp_wmb();
2780
2781         preempt_disable();
2782         /* Do the work on each cpu */
2783         smp_call_function_many(tracing_buffer_mask,
2784                                enable_trace_buffered_event, NULL, 1);
2785         preempt_enable();
2786 }
2787
2788 static struct trace_buffer *temp_buffer;
2789
2790 struct ring_buffer_event *
2791 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2792                           struct trace_event_file *trace_file,
2793                           int type, unsigned long len,
2794                           unsigned int trace_ctx)
2795 {
2796         struct ring_buffer_event *entry;
2797         struct trace_array *tr = trace_file->tr;
2798         int val;
2799
2800         *current_rb = tr->array_buffer.buffer;
2801
2802         if (!tr->no_filter_buffering_ref &&
2803             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2804                 preempt_disable_notrace();
2805                 /*
2806                  * Filtering is on, so try to use the per cpu buffer first.
2807                  * This buffer will simulate a ring_buffer_event,
2808                  * where the type_len is zero and the array[0] will
2809                  * hold the full length.
2810                  * (see include/linux/ring-buffer.h for details on
2811                  *  how the ring_buffer_event is structured).
2812                  *
2813                  * Using a temp buffer during filtering and copying it
2814                  * on a matched filter is quicker than writing directly
2815                  * into the ring buffer and then discarding it when
2816                  * it doesn't match. That is because the discard
2817                  * requires several atomic operations to get right.
2818                  * Copying on match and doing nothing on a failed match
2819                  * is still quicker than no copy on match, but having
2820                  * to discard out of the ring buffer on a failed match.
2821                  */
2822                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2823                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2824
2825                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2826
2827                         /*
2828                          * Preemption is disabled, but interrupts and NMIs
2829                          * can still come in now. If that happens after
2830                          * the above increment, then it will have to go
2831                          * back to the old method of allocating the event
2832                          * on the ring buffer, and if the filter fails, it
2833                          * will have to call ring_buffer_discard_commit()
2834                          * to remove it.
2835                          *
2836                          * Need to also check the unlikely case that the
2837                          * length is bigger than the temp buffer size.
2838                          * If that happens, then the reserve is pretty much
2839                          * guaranteed to fail, as the ring buffer currently
2840                          * only allows events less than a page. But that may
2841                          * change in the future, so let the ring buffer reserve
2842                          * handle the failure in that case.
2843                          */
2844                         if (val == 1 && likely(len <= max_len)) {
2845                                 trace_event_setup(entry, type, trace_ctx);
2846                                 entry->array[0] = len;
2847                                 /* Return with preemption disabled */
2848                                 return entry;
2849                         }
2850                         this_cpu_dec(trace_buffered_event_cnt);
2851                 }
2852                 /* __trace_buffer_lock_reserve() disables preemption */
2853                 preempt_enable_notrace();
2854         }
2855
2856         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2857                                             trace_ctx);
2858         /*
2859          * If tracing is off, but we have triggers enabled
2860          * we still need to look at the event data. Use the temp_buffer
2861          * to store the trace event for the trigger to use. It's recursive
2862          * safe and will not be recorded anywhere.
2863          */
2864         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2865                 *current_rb = temp_buffer;
2866                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2867                                                     trace_ctx);
2868         }
2869         return entry;
2870 }
2871 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2872
2873 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2874 static DEFINE_MUTEX(tracepoint_printk_mutex);
2875
2876 static void output_printk(struct trace_event_buffer *fbuffer)
2877 {
2878         struct trace_event_call *event_call;
2879         struct trace_event_file *file;
2880         struct trace_event *event;
2881         unsigned long flags;
2882         struct trace_iterator *iter = tracepoint_print_iter;
2883
2884         /* We should never get here if iter is NULL */
2885         if (WARN_ON_ONCE(!iter))
2886                 return;
2887
2888         event_call = fbuffer->trace_file->event_call;
2889         if (!event_call || !event_call->event.funcs ||
2890             !event_call->event.funcs->trace)
2891                 return;
2892
2893         file = fbuffer->trace_file;
2894         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2895             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2896              !filter_match_preds(file->filter, fbuffer->entry)))
2897                 return;
2898
2899         event = &fbuffer->trace_file->event_call->event;
2900
2901         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2902         trace_seq_init(&iter->seq);
2903         iter->ent = fbuffer->entry;
2904         event_call->event.funcs->trace(iter, 0, event);
2905         trace_seq_putc(&iter->seq, 0);
2906         printk("%s", iter->seq.buffer);
2907
2908         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2909 }
2910
2911 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2912                              void *buffer, size_t *lenp,
2913                              loff_t *ppos)
2914 {
2915         int save_tracepoint_printk;
2916         int ret;
2917
2918         mutex_lock(&tracepoint_printk_mutex);
2919         save_tracepoint_printk = tracepoint_printk;
2920
2921         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2922
2923         /*
2924          * This will force exiting early, as tracepoint_printk
2925          * is always zero when tracepoint_printk_iter is not allocated
2926          */
2927         if (!tracepoint_print_iter)
2928                 tracepoint_printk = 0;
2929
2930         if (save_tracepoint_printk == tracepoint_printk)
2931                 goto out;
2932
2933         if (tracepoint_printk)
2934                 static_key_enable(&tracepoint_printk_key.key);
2935         else
2936                 static_key_disable(&tracepoint_printk_key.key);
2937
2938  out:
2939         mutex_unlock(&tracepoint_printk_mutex);
2940
2941         return ret;
2942 }
2943
2944 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2945 {
2946         enum event_trigger_type tt = ETT_NONE;
2947         struct trace_event_file *file = fbuffer->trace_file;
2948
2949         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2950                         fbuffer->entry, &tt))
2951                 goto discard;
2952
2953         if (static_key_false(&tracepoint_printk_key.key))
2954                 output_printk(fbuffer);
2955
2956         if (static_branch_unlikely(&trace_event_exports_enabled))
2957                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2958
2959         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2960                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2961
2962 discard:
2963         if (tt)
2964                 event_triggers_post_call(file, tt);
2965
2966 }
2967 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2968
2969 /*
2970  * Skip 3:
2971  *
2972  *   trace_buffer_unlock_commit_regs()
2973  *   trace_event_buffer_commit()
2974  *   trace_event_raw_event_xxx()
2975  */
2976 # define STACK_SKIP 3
2977
2978 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2979                                      struct trace_buffer *buffer,
2980                                      struct ring_buffer_event *event,
2981                                      unsigned int trace_ctx,
2982                                      struct pt_regs *regs)
2983 {
2984         __buffer_unlock_commit(buffer, event);
2985
2986         /*
2987          * If regs is not set, then skip the necessary functions.
2988          * Note, we can still get here via blktrace, wakeup tracer
2989          * and mmiotrace, but that's ok if they lose a function or
2990          * two. They are not that meaningful.
2991          */
2992         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2993         ftrace_trace_userstack(tr, buffer, trace_ctx);
2994 }
2995
2996 /*
2997  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2998  */
2999 void
3000 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3001                                    struct ring_buffer_event *event)
3002 {
3003         __buffer_unlock_commit(buffer, event);
3004 }
3005
3006 void
3007 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3008                parent_ip, unsigned int trace_ctx)
3009 {
3010         struct trace_event_call *call = &event_function;
3011         struct trace_buffer *buffer = tr->array_buffer.buffer;
3012         struct ring_buffer_event *event;
3013         struct ftrace_entry *entry;
3014
3015         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3016                                             trace_ctx);
3017         if (!event)
3018                 return;
3019         entry   = ring_buffer_event_data(event);
3020         entry->ip                       = ip;
3021         entry->parent_ip                = parent_ip;
3022
3023         if (!call_filter_check_discard(call, entry, buffer, event)) {
3024                 if (static_branch_unlikely(&trace_function_exports_enabled))
3025                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3026                 __buffer_unlock_commit(buffer, event);
3027         }
3028 }
3029
3030 #ifdef CONFIG_STACKTRACE
3031
3032 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3033 #define FTRACE_KSTACK_NESTING   4
3034
3035 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3036
3037 struct ftrace_stack {
3038         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3039 };
3040
3041
3042 struct ftrace_stacks {
3043         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3044 };
3045
3046 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3047 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3048
3049 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3050                                  unsigned int trace_ctx,
3051                                  int skip, struct pt_regs *regs)
3052 {
3053         struct trace_event_call *call = &event_kernel_stack;
3054         struct ring_buffer_event *event;
3055         unsigned int size, nr_entries;
3056         struct ftrace_stack *fstack;
3057         struct stack_entry *entry;
3058         int stackidx;
3059
3060         /*
3061          * Add one, for this function and the call to save_stack_trace()
3062          * If regs is set, then these functions will not be in the way.
3063          */
3064 #ifndef CONFIG_UNWINDER_ORC
3065         if (!regs)
3066                 skip++;
3067 #endif
3068
3069         preempt_disable_notrace();
3070
3071         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3072
3073         /* This should never happen. If it does, yell once and skip */
3074         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3075                 goto out;
3076
3077         /*
3078          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3079          * interrupt will either see the value pre increment or post
3080          * increment. If the interrupt happens pre increment it will have
3081          * restored the counter when it returns.  We just need a barrier to
3082          * keep gcc from moving things around.
3083          */
3084         barrier();
3085
3086         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3087         size = ARRAY_SIZE(fstack->calls);
3088
3089         if (regs) {
3090                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3091                                                    size, skip);
3092         } else {
3093                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3094         }
3095
3096         size = nr_entries * sizeof(unsigned long);
3097         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3098                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3099                                     trace_ctx);
3100         if (!event)
3101                 goto out;
3102         entry = ring_buffer_event_data(event);
3103
3104         memcpy(&entry->caller, fstack->calls, size);
3105         entry->size = nr_entries;
3106
3107         if (!call_filter_check_discard(call, entry, buffer, event))
3108                 __buffer_unlock_commit(buffer, event);
3109
3110  out:
3111         /* Again, don't let gcc optimize things here */
3112         barrier();
3113         __this_cpu_dec(ftrace_stack_reserve);
3114         preempt_enable_notrace();
3115
3116 }
3117
3118 static inline void ftrace_trace_stack(struct trace_array *tr,
3119                                       struct trace_buffer *buffer,
3120                                       unsigned int trace_ctx,
3121                                       int skip, struct pt_regs *regs)
3122 {
3123         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3124                 return;
3125
3126         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3127 }
3128
3129 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3130                    int skip)
3131 {
3132         struct trace_buffer *buffer = tr->array_buffer.buffer;
3133
3134         if (rcu_is_watching()) {
3135                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3136                 return;
3137         }
3138
3139         /*
3140          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3141          * but if the above rcu_is_watching() failed, then the NMI
3142          * triggered someplace critical, and ct_irq_enter() should
3143          * not be called from NMI.
3144          */
3145         if (unlikely(in_nmi()))
3146                 return;
3147
3148         ct_irq_enter_irqson();
3149         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3150         ct_irq_exit_irqson();
3151 }
3152
3153 /**
3154  * trace_dump_stack - record a stack back trace in the trace buffer
3155  * @skip: Number of functions to skip (helper handlers)
3156  */
3157 void trace_dump_stack(int skip)
3158 {
3159         if (tracing_disabled || tracing_selftest_running)
3160                 return;
3161
3162 #ifndef CONFIG_UNWINDER_ORC
3163         /* Skip 1 to skip this function. */
3164         skip++;
3165 #endif
3166         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3167                              tracing_gen_ctx(), skip, NULL);
3168 }
3169 EXPORT_SYMBOL_GPL(trace_dump_stack);
3170
3171 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3172 static DEFINE_PER_CPU(int, user_stack_count);
3173
3174 static void
3175 ftrace_trace_userstack(struct trace_array *tr,
3176                        struct trace_buffer *buffer, unsigned int trace_ctx)
3177 {
3178         struct trace_event_call *call = &event_user_stack;
3179         struct ring_buffer_event *event;
3180         struct userstack_entry *entry;
3181
3182         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3183                 return;
3184
3185         /*
3186          * NMIs can not handle page faults, even with fix ups.
3187          * The save user stack can (and often does) fault.
3188          */
3189         if (unlikely(in_nmi()))
3190                 return;
3191
3192         /*
3193          * prevent recursion, since the user stack tracing may
3194          * trigger other kernel events.
3195          */
3196         preempt_disable();
3197         if (__this_cpu_read(user_stack_count))
3198                 goto out;
3199
3200         __this_cpu_inc(user_stack_count);
3201
3202         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3203                                             sizeof(*entry), trace_ctx);
3204         if (!event)
3205                 goto out_drop_count;
3206         entry   = ring_buffer_event_data(event);
3207
3208         entry->tgid             = current->tgid;
3209         memset(&entry->caller, 0, sizeof(entry->caller));
3210
3211         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3212         if (!call_filter_check_discard(call, entry, buffer, event))
3213                 __buffer_unlock_commit(buffer, event);
3214
3215  out_drop_count:
3216         __this_cpu_dec(user_stack_count);
3217  out:
3218         preempt_enable();
3219 }
3220 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3221 static void ftrace_trace_userstack(struct trace_array *tr,
3222                                    struct trace_buffer *buffer,
3223                                    unsigned int trace_ctx)
3224 {
3225 }
3226 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3227
3228 #endif /* CONFIG_STACKTRACE */
3229
3230 static inline void
3231 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3232                           unsigned long long delta)
3233 {
3234         entry->bottom_delta_ts = delta & U32_MAX;
3235         entry->top_delta_ts = (delta >> 32);
3236 }
3237
3238 void trace_last_func_repeats(struct trace_array *tr,
3239                              struct trace_func_repeats *last_info,
3240                              unsigned int trace_ctx)
3241 {
3242         struct trace_buffer *buffer = tr->array_buffer.buffer;
3243         struct func_repeats_entry *entry;
3244         struct ring_buffer_event *event;
3245         u64 delta;
3246
3247         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3248                                             sizeof(*entry), trace_ctx);
3249         if (!event)
3250                 return;
3251
3252         delta = ring_buffer_event_time_stamp(buffer, event) -
3253                 last_info->ts_last_call;
3254
3255         entry = ring_buffer_event_data(event);
3256         entry->ip = last_info->ip;
3257         entry->parent_ip = last_info->parent_ip;
3258         entry->count = last_info->count;
3259         func_repeats_set_delta_ts(entry, delta);
3260
3261         __buffer_unlock_commit(buffer, event);
3262 }
3263
3264 /* created for use with alloc_percpu */
3265 struct trace_buffer_struct {
3266         int nesting;
3267         char buffer[4][TRACE_BUF_SIZE];
3268 };
3269
3270 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3271
3272 /*
3273  * This allows for lockless recording.  If we're nested too deeply, then
3274  * this returns NULL.
3275  */
3276 static char *get_trace_buf(void)
3277 {
3278         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3279
3280         if (!trace_percpu_buffer || buffer->nesting >= 4)
3281                 return NULL;
3282
3283         buffer->nesting++;
3284
3285         /* Interrupts must see nesting incremented before we use the buffer */
3286         barrier();
3287         return &buffer->buffer[buffer->nesting - 1][0];
3288 }
3289
3290 static void put_trace_buf(void)
3291 {
3292         /* Don't let the decrement of nesting leak before this */
3293         barrier();
3294         this_cpu_dec(trace_percpu_buffer->nesting);
3295 }
3296
3297 static int alloc_percpu_trace_buffer(void)
3298 {
3299         struct trace_buffer_struct __percpu *buffers;
3300
3301         if (trace_percpu_buffer)
3302                 return 0;
3303
3304         buffers = alloc_percpu(struct trace_buffer_struct);
3305         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3306                 return -ENOMEM;
3307
3308         trace_percpu_buffer = buffers;
3309         return 0;
3310 }
3311
3312 static int buffers_allocated;
3313
3314 void trace_printk_init_buffers(void)
3315 {
3316         if (buffers_allocated)
3317                 return;
3318
3319         if (alloc_percpu_trace_buffer())
3320                 return;
3321
3322         /* trace_printk() is for debug use only. Don't use it in production. */
3323
3324         pr_warn("\n");
3325         pr_warn("**********************************************************\n");
3326         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3327         pr_warn("**                                                      **\n");
3328         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3329         pr_warn("**                                                      **\n");
3330         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3331         pr_warn("** unsafe for production use.                           **\n");
3332         pr_warn("**                                                      **\n");
3333         pr_warn("** If you see this message and you are not debugging    **\n");
3334         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3335         pr_warn("**                                                      **\n");
3336         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3337         pr_warn("**********************************************************\n");
3338
3339         /* Expand the buffers to set size */
3340         tracing_update_buffers();
3341
3342         buffers_allocated = 1;
3343
3344         /*
3345          * trace_printk_init_buffers() can be called by modules.
3346          * If that happens, then we need to start cmdline recording
3347          * directly here. If the global_trace.buffer is already
3348          * allocated here, then this was called by module code.
3349          */
3350         if (global_trace.array_buffer.buffer)
3351                 tracing_start_cmdline_record();
3352 }
3353 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3354
3355 void trace_printk_start_comm(void)
3356 {
3357         /* Start tracing comms if trace printk is set */
3358         if (!buffers_allocated)
3359                 return;
3360         tracing_start_cmdline_record();
3361 }
3362
3363 static void trace_printk_start_stop_comm(int enabled)
3364 {
3365         if (!buffers_allocated)
3366                 return;
3367
3368         if (enabled)
3369                 tracing_start_cmdline_record();
3370         else
3371                 tracing_stop_cmdline_record();
3372 }
3373
3374 /**
3375  * trace_vbprintk - write binary msg to tracing buffer
3376  * @ip:    The address of the caller
3377  * @fmt:   The string format to write to the buffer
3378  * @args:  Arguments for @fmt
3379  */
3380 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3381 {
3382         struct trace_event_call *call = &event_bprint;
3383         struct ring_buffer_event *event;
3384         struct trace_buffer *buffer;
3385         struct trace_array *tr = &global_trace;
3386         struct bprint_entry *entry;
3387         unsigned int trace_ctx;
3388         char *tbuffer;
3389         int len = 0, size;
3390
3391         if (unlikely(tracing_selftest_running || tracing_disabled))
3392                 return 0;
3393
3394         /* Don't pollute graph traces with trace_vprintk internals */
3395         pause_graph_tracing();
3396
3397         trace_ctx = tracing_gen_ctx();
3398         preempt_disable_notrace();
3399
3400         tbuffer = get_trace_buf();
3401         if (!tbuffer) {
3402                 len = 0;
3403                 goto out_nobuffer;
3404         }
3405
3406         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3407
3408         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3409                 goto out_put;
3410
3411         size = sizeof(*entry) + sizeof(u32) * len;
3412         buffer = tr->array_buffer.buffer;
3413         ring_buffer_nest_start(buffer);
3414         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3415                                             trace_ctx);
3416         if (!event)
3417                 goto out;
3418         entry = ring_buffer_event_data(event);
3419         entry->ip                       = ip;
3420         entry->fmt                      = fmt;
3421
3422         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3423         if (!call_filter_check_discard(call, entry, buffer, event)) {
3424                 __buffer_unlock_commit(buffer, event);
3425                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3426         }
3427
3428 out:
3429         ring_buffer_nest_end(buffer);
3430 out_put:
3431         put_trace_buf();
3432
3433 out_nobuffer:
3434         preempt_enable_notrace();
3435         unpause_graph_tracing();
3436
3437         return len;
3438 }
3439 EXPORT_SYMBOL_GPL(trace_vbprintk);
3440
3441 __printf(3, 0)
3442 static int
3443 __trace_array_vprintk(struct trace_buffer *buffer,
3444                       unsigned long ip, const char *fmt, va_list args)
3445 {
3446         struct trace_event_call *call = &event_print;
3447         struct ring_buffer_event *event;
3448         int len = 0, size;
3449         struct print_entry *entry;
3450         unsigned int trace_ctx;
3451         char *tbuffer;
3452
3453         if (tracing_disabled || tracing_selftest_running)
3454                 return 0;
3455
3456         /* Don't pollute graph traces with trace_vprintk internals */
3457         pause_graph_tracing();
3458
3459         trace_ctx = tracing_gen_ctx();
3460         preempt_disable_notrace();
3461
3462
3463         tbuffer = get_trace_buf();
3464         if (!tbuffer) {
3465                 len = 0;
3466                 goto out_nobuffer;
3467         }
3468
3469         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3470
3471         size = sizeof(*entry) + len + 1;
3472         ring_buffer_nest_start(buffer);
3473         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3474                                             trace_ctx);
3475         if (!event)
3476                 goto out;
3477         entry = ring_buffer_event_data(event);
3478         entry->ip = ip;
3479
3480         memcpy(&entry->buf, tbuffer, len + 1);
3481         if (!call_filter_check_discard(call, entry, buffer, event)) {
3482                 __buffer_unlock_commit(buffer, event);
3483                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3484         }
3485
3486 out:
3487         ring_buffer_nest_end(buffer);
3488         put_trace_buf();
3489
3490 out_nobuffer:
3491         preempt_enable_notrace();
3492         unpause_graph_tracing();
3493
3494         return len;
3495 }
3496
3497 __printf(3, 0)
3498 int trace_array_vprintk(struct trace_array *tr,
3499                         unsigned long ip, const char *fmt, va_list args)
3500 {
3501         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3502 }
3503
3504 /**
3505  * trace_array_printk - Print a message to a specific instance
3506  * @tr: The instance trace_array descriptor
3507  * @ip: The instruction pointer that this is called from.
3508  * @fmt: The format to print (printf format)
3509  *
3510  * If a subsystem sets up its own instance, they have the right to
3511  * printk strings into their tracing instance buffer using this
3512  * function. Note, this function will not write into the top level
3513  * buffer (use trace_printk() for that), as writing into the top level
3514  * buffer should only have events that can be individually disabled.
3515  * trace_printk() is only used for debugging a kernel, and should not
3516  * be ever incorporated in normal use.
3517  *
3518  * trace_array_printk() can be used, as it will not add noise to the
3519  * top level tracing buffer.
3520  *
3521  * Note, trace_array_init_printk() must be called on @tr before this
3522  * can be used.
3523  */
3524 __printf(3, 0)
3525 int trace_array_printk(struct trace_array *tr,
3526                        unsigned long ip, const char *fmt, ...)
3527 {
3528         int ret;
3529         va_list ap;
3530
3531         if (!tr)
3532                 return -ENOENT;
3533
3534         /* This is only allowed for created instances */
3535         if (tr == &global_trace)
3536                 return 0;
3537
3538         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3539                 return 0;
3540
3541         va_start(ap, fmt);
3542         ret = trace_array_vprintk(tr, ip, fmt, ap);
3543         va_end(ap);
3544         return ret;
3545 }
3546 EXPORT_SYMBOL_GPL(trace_array_printk);
3547
3548 /**
3549  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3550  * @tr: The trace array to initialize the buffers for
3551  *
3552  * As trace_array_printk() only writes into instances, they are OK to
3553  * have in the kernel (unlike trace_printk()). This needs to be called
3554  * before trace_array_printk() can be used on a trace_array.
3555  */
3556 int trace_array_init_printk(struct trace_array *tr)
3557 {
3558         if (!tr)
3559                 return -ENOENT;
3560
3561         /* This is only allowed for created instances */
3562         if (tr == &global_trace)
3563                 return -EINVAL;
3564
3565         return alloc_percpu_trace_buffer();
3566 }
3567 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3568
3569 __printf(3, 4)
3570 int trace_array_printk_buf(struct trace_buffer *buffer,
3571                            unsigned long ip, const char *fmt, ...)
3572 {
3573         int ret;
3574         va_list ap;
3575
3576         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3577                 return 0;
3578
3579         va_start(ap, fmt);
3580         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3581         va_end(ap);
3582         return ret;
3583 }
3584
3585 __printf(2, 0)
3586 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3587 {
3588         return trace_array_vprintk(&global_trace, ip, fmt, args);
3589 }
3590 EXPORT_SYMBOL_GPL(trace_vprintk);
3591
3592 static void trace_iterator_increment(struct trace_iterator *iter)
3593 {
3594         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3595
3596         iter->idx++;
3597         if (buf_iter)
3598                 ring_buffer_iter_advance(buf_iter);
3599 }
3600
3601 static struct trace_entry *
3602 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3603                 unsigned long *lost_events)
3604 {
3605         struct ring_buffer_event *event;
3606         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3607
3608         if (buf_iter) {
3609                 event = ring_buffer_iter_peek(buf_iter, ts);
3610                 if (lost_events)
3611                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3612                                 (unsigned long)-1 : 0;
3613         } else {
3614                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3615                                          lost_events);
3616         }
3617
3618         if (event) {
3619                 iter->ent_size = ring_buffer_event_length(event);
3620                 return ring_buffer_event_data(event);
3621         }
3622         iter->ent_size = 0;
3623         return NULL;
3624 }
3625
3626 static struct trace_entry *
3627 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3628                   unsigned long *missing_events, u64 *ent_ts)
3629 {
3630         struct trace_buffer *buffer = iter->array_buffer->buffer;
3631         struct trace_entry *ent, *next = NULL;
3632         unsigned long lost_events = 0, next_lost = 0;
3633         int cpu_file = iter->cpu_file;
3634         u64 next_ts = 0, ts;
3635         int next_cpu = -1;
3636         int next_size = 0;
3637         int cpu;
3638
3639         /*
3640          * If we are in a per_cpu trace file, don't bother by iterating over
3641          * all cpu and peek directly.
3642          */
3643         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3644                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3645                         return NULL;
3646                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3647                 if (ent_cpu)
3648                         *ent_cpu = cpu_file;
3649
3650                 return ent;
3651         }
3652
3653         for_each_tracing_cpu(cpu) {
3654
3655                 if (ring_buffer_empty_cpu(buffer, cpu))
3656                         continue;
3657
3658                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3659
3660                 /*
3661                  * Pick the entry with the smallest timestamp:
3662                  */
3663                 if (ent && (!next || ts < next_ts)) {
3664                         next = ent;
3665                         next_cpu = cpu;
3666                         next_ts = ts;
3667                         next_lost = lost_events;
3668                         next_size = iter->ent_size;
3669                 }
3670         }
3671
3672         iter->ent_size = next_size;
3673
3674         if (ent_cpu)
3675                 *ent_cpu = next_cpu;
3676
3677         if (ent_ts)
3678                 *ent_ts = next_ts;
3679
3680         if (missing_events)
3681                 *missing_events = next_lost;
3682
3683         return next;
3684 }
3685
3686 #define STATIC_FMT_BUF_SIZE     128
3687 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3688
3689 static char *trace_iter_expand_format(struct trace_iterator *iter)
3690 {
3691         char *tmp;
3692
3693         /*
3694          * iter->tr is NULL when used with tp_printk, which makes
3695          * this get called where it is not safe to call krealloc().
3696          */
3697         if (!iter->tr || iter->fmt == static_fmt_buf)
3698                 return NULL;
3699
3700         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3701                        GFP_KERNEL);
3702         if (tmp) {
3703                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3704                 iter->fmt = tmp;
3705         }
3706
3707         return tmp;
3708 }
3709
3710 /* Returns true if the string is safe to dereference from an event */
3711 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3712                            bool star, int len)
3713 {
3714         unsigned long addr = (unsigned long)str;
3715         struct trace_event *trace_event;
3716         struct trace_event_call *event;
3717
3718         /* Ignore strings with no length */
3719         if (star && !len)
3720                 return true;
3721
3722         /* OK if part of the event data */
3723         if ((addr >= (unsigned long)iter->ent) &&
3724             (addr < (unsigned long)iter->ent + iter->ent_size))
3725                 return true;
3726
3727         /* OK if part of the temp seq buffer */
3728         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3729             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3730                 return true;
3731
3732         /* Core rodata can not be freed */
3733         if (is_kernel_rodata(addr))
3734                 return true;
3735
3736         if (trace_is_tracepoint_string(str))
3737                 return true;
3738
3739         /*
3740          * Now this could be a module event, referencing core module
3741          * data, which is OK.
3742          */
3743         if (!iter->ent)
3744                 return false;
3745
3746         trace_event = ftrace_find_event(iter->ent->type);
3747         if (!trace_event)
3748                 return false;
3749
3750         event = container_of(trace_event, struct trace_event_call, event);
3751         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3752                 return false;
3753
3754         /* Would rather have rodata, but this will suffice */
3755         if (within_module_core(addr, event->module))
3756                 return true;
3757
3758         return false;
3759 }
3760
3761 static const char *show_buffer(struct trace_seq *s)
3762 {
3763         struct seq_buf *seq = &s->seq;
3764
3765         seq_buf_terminate(seq);
3766
3767         return seq->buffer;
3768 }
3769
3770 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3771
3772 static int test_can_verify_check(const char *fmt, ...)
3773 {
3774         char buf[16];
3775         va_list ap;
3776         int ret;
3777
3778         /*
3779          * The verifier is dependent on vsnprintf() modifies the va_list
3780          * passed to it, where it is sent as a reference. Some architectures
3781          * (like x86_32) passes it by value, which means that vsnprintf()
3782          * does not modify the va_list passed to it, and the verifier
3783          * would then need to be able to understand all the values that
3784          * vsnprintf can use. If it is passed by value, then the verifier
3785          * is disabled.
3786          */
3787         va_start(ap, fmt);
3788         vsnprintf(buf, 16, "%d", ap);
3789         ret = va_arg(ap, int);
3790         va_end(ap);
3791
3792         return ret;
3793 }
3794
3795 static void test_can_verify(void)
3796 {
3797         if (!test_can_verify_check("%d %d", 0, 1)) {
3798                 pr_info("trace event string verifier disabled\n");
3799                 static_branch_inc(&trace_no_verify);
3800         }
3801 }
3802
3803 /**
3804  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3805  * @iter: The iterator that holds the seq buffer and the event being printed
3806  * @fmt: The format used to print the event
3807  * @ap: The va_list holding the data to print from @fmt.
3808  *
3809  * This writes the data into the @iter->seq buffer using the data from
3810  * @fmt and @ap. If the format has a %s, then the source of the string
3811  * is examined to make sure it is safe to print, otherwise it will
3812  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3813  * pointer.
3814  */
3815 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3816                          va_list ap)
3817 {
3818         const char *p = fmt;
3819         const char *str;
3820         int i, j;
3821
3822         if (WARN_ON_ONCE(!fmt))
3823                 return;
3824
3825         if (static_branch_unlikely(&trace_no_verify))
3826                 goto print;
3827
3828         /* Don't bother checking when doing a ftrace_dump() */
3829         if (iter->fmt == static_fmt_buf)
3830                 goto print;
3831
3832         while (*p) {
3833                 bool star = false;
3834                 int len = 0;
3835
3836                 j = 0;
3837
3838                 /* We only care about %s and variants */
3839                 for (i = 0; p[i]; i++) {
3840                         if (i + 1 >= iter->fmt_size) {
3841                                 /*
3842                                  * If we can't expand the copy buffer,
3843                                  * just print it.
3844                                  */
3845                                 if (!trace_iter_expand_format(iter))
3846                                         goto print;
3847                         }
3848
3849                         if (p[i] == '\\' && p[i+1]) {
3850                                 i++;
3851                                 continue;
3852                         }
3853                         if (p[i] == '%') {
3854                                 /* Need to test cases like %08.*s */
3855                                 for (j = 1; p[i+j]; j++) {
3856                                         if (isdigit(p[i+j]) ||
3857                                             p[i+j] == '.')
3858                                                 continue;
3859                                         if (p[i+j] == '*') {
3860                                                 star = true;
3861                                                 continue;
3862                                         }
3863                                         break;
3864                                 }
3865                                 if (p[i+j] == 's')
3866                                         break;
3867                                 star = false;
3868                         }
3869                         j = 0;
3870                 }
3871                 /* If no %s found then just print normally */
3872                 if (!p[i])
3873                         break;
3874
3875                 /* Copy up to the %s, and print that */
3876                 strncpy(iter->fmt, p, i);
3877                 iter->fmt[i] = '\0';
3878                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3879
3880                 /*
3881                  * If iter->seq is full, the above call no longer guarantees
3882                  * that ap is in sync with fmt processing, and further calls
3883                  * to va_arg() can return wrong positional arguments.
3884                  *
3885                  * Ensure that ap is no longer used in this case.
3886                  */
3887                 if (iter->seq.full) {
3888                         p = "";
3889                         break;
3890                 }
3891
3892                 if (star)
3893                         len = va_arg(ap, int);
3894
3895                 /* The ap now points to the string data of the %s */
3896                 str = va_arg(ap, const char *);
3897
3898                 /*
3899                  * If you hit this warning, it is likely that the
3900                  * trace event in question used %s on a string that
3901                  * was saved at the time of the event, but may not be
3902                  * around when the trace is read. Use __string(),
3903                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3904                  * instead. See samples/trace_events/trace-events-sample.h
3905                  * for reference.
3906                  */
3907                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3908                               "fmt: '%s' current_buffer: '%s'",
3909                               fmt, show_buffer(&iter->seq))) {
3910                         int ret;
3911
3912                         /* Try to safely read the string */
3913                         if (star) {
3914                                 if (len + 1 > iter->fmt_size)
3915                                         len = iter->fmt_size - 1;
3916                                 if (len < 0)
3917                                         len = 0;
3918                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3919                                 iter->fmt[len] = 0;
3920                                 star = false;
3921                         } else {
3922                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3923                                                                   iter->fmt_size);
3924                         }
3925                         if (ret < 0)
3926                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3927                         else
3928                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3929                                                  str, iter->fmt);
3930                         str = "[UNSAFE-MEMORY]";
3931                         strcpy(iter->fmt, "%s");
3932                 } else {
3933                         strncpy(iter->fmt, p + i, j + 1);
3934                         iter->fmt[j+1] = '\0';
3935                 }
3936                 if (star)
3937                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3938                 else
3939                         trace_seq_printf(&iter->seq, iter->fmt, str);
3940
3941                 p += i + j + 1;
3942         }
3943  print:
3944         if (*p)
3945                 trace_seq_vprintf(&iter->seq, p, ap);
3946 }
3947
3948 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3949 {
3950         const char *p, *new_fmt;
3951         char *q;
3952
3953         if (WARN_ON_ONCE(!fmt))
3954                 return fmt;
3955
3956         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3957                 return fmt;
3958
3959         p = fmt;
3960         new_fmt = q = iter->fmt;
3961         while (*p) {
3962                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3963                         if (!trace_iter_expand_format(iter))
3964                                 return fmt;
3965
3966                         q += iter->fmt - new_fmt;
3967                         new_fmt = iter->fmt;
3968                 }
3969
3970                 *q++ = *p++;
3971
3972                 /* Replace %p with %px */
3973                 if (p[-1] == '%') {
3974                         if (p[0] == '%') {
3975                                 *q++ = *p++;
3976                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3977                                 *q++ = *p++;
3978                                 *q++ = 'x';
3979                         }
3980                 }
3981         }
3982         *q = '\0';
3983
3984         return new_fmt;
3985 }
3986
3987 #define STATIC_TEMP_BUF_SIZE    128
3988 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3989
3990 /* Find the next real entry, without updating the iterator itself */
3991 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3992                                           int *ent_cpu, u64 *ent_ts)
3993 {
3994         /* __find_next_entry will reset ent_size */
3995         int ent_size = iter->ent_size;
3996         struct trace_entry *entry;
3997
3998         /*
3999          * If called from ftrace_dump(), then the iter->temp buffer
4000          * will be the static_temp_buf and not created from kmalloc.
4001          * If the entry size is greater than the buffer, we can
4002          * not save it. Just return NULL in that case. This is only
4003          * used to add markers when two consecutive events' time
4004          * stamps have a large delta. See trace_print_lat_context()
4005          */
4006         if (iter->temp == static_temp_buf &&
4007             STATIC_TEMP_BUF_SIZE < ent_size)
4008                 return NULL;
4009
4010         /*
4011          * The __find_next_entry() may call peek_next_entry(), which may
4012          * call ring_buffer_peek() that may make the contents of iter->ent
4013          * undefined. Need to copy iter->ent now.
4014          */
4015         if (iter->ent && iter->ent != iter->temp) {
4016                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4017                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4018                         void *temp;
4019                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4020                         if (!temp)
4021                                 return NULL;
4022                         kfree(iter->temp);
4023                         iter->temp = temp;
4024                         iter->temp_size = iter->ent_size;
4025                 }
4026                 memcpy(iter->temp, iter->ent, iter->ent_size);
4027                 iter->ent = iter->temp;
4028         }
4029         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4030         /* Put back the original ent_size */
4031         iter->ent_size = ent_size;
4032
4033         return entry;
4034 }
4035
4036 /* Find the next real entry, and increment the iterator to the next entry */
4037 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4038 {
4039         iter->ent = __find_next_entry(iter, &iter->cpu,
4040                                       &iter->lost_events, &iter->ts);
4041
4042         if (iter->ent)
4043                 trace_iterator_increment(iter);
4044
4045         return iter->ent ? iter : NULL;
4046 }
4047
4048 static void trace_consume(struct trace_iterator *iter)
4049 {
4050         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4051                             &iter->lost_events);
4052 }
4053
4054 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4055 {
4056         struct trace_iterator *iter = m->private;
4057         int i = (int)*pos;
4058         void *ent;
4059
4060         WARN_ON_ONCE(iter->leftover);
4061
4062         (*pos)++;
4063
4064         /* can't go backwards */
4065         if (iter->idx > i)
4066                 return NULL;
4067
4068         if (iter->idx < 0)
4069                 ent = trace_find_next_entry_inc(iter);
4070         else
4071                 ent = iter;
4072
4073         while (ent && iter->idx < i)
4074                 ent = trace_find_next_entry_inc(iter);
4075
4076         iter->pos = *pos;
4077
4078         return ent;
4079 }
4080
4081 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4082 {
4083         struct ring_buffer_iter *buf_iter;
4084         unsigned long entries = 0;
4085         u64 ts;
4086
4087         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4088
4089         buf_iter = trace_buffer_iter(iter, cpu);
4090         if (!buf_iter)
4091                 return;
4092
4093         ring_buffer_iter_reset(buf_iter);
4094
4095         /*
4096          * We could have the case with the max latency tracers
4097          * that a reset never took place on a cpu. This is evident
4098          * by the timestamp being before the start of the buffer.
4099          */
4100         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4101                 if (ts >= iter->array_buffer->time_start)
4102                         break;
4103                 entries++;
4104                 ring_buffer_iter_advance(buf_iter);
4105         }
4106
4107         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4108 }
4109
4110 /*
4111  * The current tracer is copied to avoid a global locking
4112  * all around.
4113  */
4114 static void *s_start(struct seq_file *m, loff_t *pos)
4115 {
4116         struct trace_iterator *iter = m->private;
4117         struct trace_array *tr = iter->tr;
4118         int cpu_file = iter->cpu_file;
4119         void *p = NULL;
4120         loff_t l = 0;
4121         int cpu;
4122
4123         /*
4124          * copy the tracer to avoid using a global lock all around.
4125          * iter->trace is a copy of current_trace, the pointer to the
4126          * name may be used instead of a strcmp(), as iter->trace->name
4127          * will point to the same string as current_trace->name.
4128          */
4129         mutex_lock(&trace_types_lock);
4130         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4131                 *iter->trace = *tr->current_trace;
4132         mutex_unlock(&trace_types_lock);
4133
4134 #ifdef CONFIG_TRACER_MAX_TRACE
4135         if (iter->snapshot && iter->trace->use_max_tr)
4136                 return ERR_PTR(-EBUSY);
4137 #endif
4138
4139         if (*pos != iter->pos) {
4140                 iter->ent = NULL;
4141                 iter->cpu = 0;
4142                 iter->idx = -1;
4143
4144                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4145                         for_each_tracing_cpu(cpu)
4146                                 tracing_iter_reset(iter, cpu);
4147                 } else
4148                         tracing_iter_reset(iter, cpu_file);
4149
4150                 iter->leftover = 0;
4151                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4152                         ;
4153
4154         } else {
4155                 /*
4156                  * If we overflowed the seq_file before, then we want
4157                  * to just reuse the trace_seq buffer again.
4158                  */
4159                 if (iter->leftover)
4160                         p = iter;
4161                 else {
4162                         l = *pos - 1;
4163                         p = s_next(m, p, &l);
4164                 }
4165         }
4166
4167         trace_event_read_lock();
4168         trace_access_lock(cpu_file);
4169         return p;
4170 }
4171
4172 static void s_stop(struct seq_file *m, void *p)
4173 {
4174         struct trace_iterator *iter = m->private;
4175
4176 #ifdef CONFIG_TRACER_MAX_TRACE
4177         if (iter->snapshot && iter->trace->use_max_tr)
4178                 return;
4179 #endif
4180
4181         trace_access_unlock(iter->cpu_file);
4182         trace_event_read_unlock();
4183 }
4184
4185 static void
4186 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4187                       unsigned long *entries, int cpu)
4188 {
4189         unsigned long count;
4190
4191         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4192         /*
4193          * If this buffer has skipped entries, then we hold all
4194          * entries for the trace and we need to ignore the
4195          * ones before the time stamp.
4196          */
4197         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4198                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4199                 /* total is the same as the entries */
4200                 *total = count;
4201         } else
4202                 *total = count +
4203                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4204         *entries = count;
4205 }
4206
4207 static void
4208 get_total_entries(struct array_buffer *buf,
4209                   unsigned long *total, unsigned long *entries)
4210 {
4211         unsigned long t, e;
4212         int cpu;
4213
4214         *total = 0;
4215         *entries = 0;
4216
4217         for_each_tracing_cpu(cpu) {
4218                 get_total_entries_cpu(buf, &t, &e, cpu);
4219                 *total += t;
4220                 *entries += e;
4221         }
4222 }
4223
4224 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4225 {
4226         unsigned long total, entries;
4227
4228         if (!tr)
4229                 tr = &global_trace;
4230
4231         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4232
4233         return entries;
4234 }
4235
4236 unsigned long trace_total_entries(struct trace_array *tr)
4237 {
4238         unsigned long total, entries;
4239
4240         if (!tr)
4241                 tr = &global_trace;
4242
4243         get_total_entries(&tr->array_buffer, &total, &entries);
4244
4245         return entries;
4246 }
4247
4248 static void print_lat_help_header(struct seq_file *m)
4249 {
4250         seq_puts(m, "#                    _------=> CPU#            \n"
4251                     "#                   / _-----=> irqs-off/BH-disabled\n"
4252                     "#                  | / _----=> need-resched    \n"
4253                     "#                  || / _---=> hardirq/softirq \n"
4254                     "#                  ||| / _--=> preempt-depth   \n"
4255                     "#                  |||| / _-=> migrate-disable \n"
4256                     "#                  ||||| /     delay           \n"
4257                     "#  cmd     pid     |||||| time  |   caller     \n"
4258                     "#     \\   /        ||||||  \\    |    /       \n");
4259 }
4260
4261 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4262 {
4263         unsigned long total;
4264         unsigned long entries;
4265
4266         get_total_entries(buf, &total, &entries);
4267         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4268                    entries, total, num_online_cpus());
4269         seq_puts(m, "#\n");
4270 }
4271
4272 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4273                                    unsigned int flags)
4274 {
4275         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4276
4277         print_event_info(buf, m);
4278
4279         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4280         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4281 }
4282
4283 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4284                                        unsigned int flags)
4285 {
4286         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4287         static const char space[] = "            ";
4288         int prec = tgid ? 12 : 2;
4289
4290         print_event_info(buf, m);
4291
4292         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4293         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4294         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4295         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4296         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4297         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4298         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4299         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4300 }
4301
4302 void
4303 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4304 {
4305         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4306         struct array_buffer *buf = iter->array_buffer;
4307         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4308         struct tracer *type = iter->trace;
4309         unsigned long entries;
4310         unsigned long total;
4311         const char *name = type->name;
4312
4313         get_total_entries(buf, &total, &entries);
4314
4315         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4316                    name, UTS_RELEASE);
4317         seq_puts(m, "# -----------------------------------"
4318                  "---------------------------------\n");
4319         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4320                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4321                    nsecs_to_usecs(data->saved_latency),
4322                    entries,
4323                    total,
4324                    buf->cpu,
4325                    preempt_model_none()      ? "server" :
4326                    preempt_model_voluntary() ? "desktop" :
4327                    preempt_model_full()      ? "preempt" :
4328                    preempt_model_rt()        ? "preempt_rt" :
4329                    "unknown",
4330                    /* These are reserved for later use */
4331                    0, 0, 0, 0);
4332 #ifdef CONFIG_SMP
4333         seq_printf(m, " #P:%d)\n", num_online_cpus());
4334 #else
4335         seq_puts(m, ")\n");
4336 #endif
4337         seq_puts(m, "#    -----------------\n");
4338         seq_printf(m, "#    | task: %.16s-%d "
4339                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4340                    data->comm, data->pid,
4341                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4342                    data->policy, data->rt_priority);
4343         seq_puts(m, "#    -----------------\n");
4344
4345         if (data->critical_start) {
4346                 seq_puts(m, "#  => started at: ");
4347                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4348                 trace_print_seq(m, &iter->seq);
4349                 seq_puts(m, "\n#  => ended at:   ");
4350                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4351                 trace_print_seq(m, &iter->seq);
4352                 seq_puts(m, "\n#\n");
4353         }
4354
4355         seq_puts(m, "#\n");
4356 }
4357
4358 static void test_cpu_buff_start(struct trace_iterator *iter)
4359 {
4360         struct trace_seq *s = &iter->seq;
4361         struct trace_array *tr = iter->tr;
4362
4363         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4364                 return;
4365
4366         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4367                 return;
4368
4369         if (cpumask_available(iter->started) &&
4370             cpumask_test_cpu(iter->cpu, iter->started))
4371                 return;
4372
4373         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4374                 return;
4375
4376         if (cpumask_available(iter->started))
4377                 cpumask_set_cpu(iter->cpu, iter->started);
4378
4379         /* Don't print started cpu buffer for the first entry of the trace */
4380         if (iter->idx > 1)
4381                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4382                                 iter->cpu);
4383 }
4384
4385 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4386 {
4387         struct trace_array *tr = iter->tr;
4388         struct trace_seq *s = &iter->seq;
4389         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4390         struct trace_entry *entry;
4391         struct trace_event *event;
4392
4393         entry = iter->ent;
4394
4395         test_cpu_buff_start(iter);
4396
4397         event = ftrace_find_event(entry->type);
4398
4399         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4400                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4401                         trace_print_lat_context(iter);
4402                 else
4403                         trace_print_context(iter);
4404         }
4405
4406         if (trace_seq_has_overflowed(s))
4407                 return TRACE_TYPE_PARTIAL_LINE;
4408
4409         if (event)
4410                 return event->funcs->trace(iter, sym_flags, event);
4411
4412         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4413
4414         return trace_handle_return(s);
4415 }
4416
4417 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4418 {
4419         struct trace_array *tr = iter->tr;
4420         struct trace_seq *s = &iter->seq;
4421         struct trace_entry *entry;
4422         struct trace_event *event;
4423
4424         entry = iter->ent;
4425
4426         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4427                 trace_seq_printf(s, "%d %d %llu ",
4428                                  entry->pid, iter->cpu, iter->ts);
4429
4430         if (trace_seq_has_overflowed(s))
4431                 return TRACE_TYPE_PARTIAL_LINE;
4432
4433         event = ftrace_find_event(entry->type);
4434         if (event)
4435                 return event->funcs->raw(iter, 0, event);
4436
4437         trace_seq_printf(s, "%d ?\n", entry->type);
4438
4439         return trace_handle_return(s);
4440 }
4441
4442 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4443 {
4444         struct trace_array *tr = iter->tr;
4445         struct trace_seq *s = &iter->seq;
4446         unsigned char newline = '\n';
4447         struct trace_entry *entry;
4448         struct trace_event *event;
4449
4450         entry = iter->ent;
4451
4452         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4453                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4454                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4455                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4456                 if (trace_seq_has_overflowed(s))
4457                         return TRACE_TYPE_PARTIAL_LINE;
4458         }
4459
4460         event = ftrace_find_event(entry->type);
4461         if (event) {
4462                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4463                 if (ret != TRACE_TYPE_HANDLED)
4464                         return ret;
4465         }
4466
4467         SEQ_PUT_FIELD(s, newline);
4468
4469         return trace_handle_return(s);
4470 }
4471
4472 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4473 {
4474         struct trace_array *tr = iter->tr;
4475         struct trace_seq *s = &iter->seq;
4476         struct trace_entry *entry;
4477         struct trace_event *event;
4478
4479         entry = iter->ent;
4480
4481         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4482                 SEQ_PUT_FIELD(s, entry->pid);
4483                 SEQ_PUT_FIELD(s, iter->cpu);
4484                 SEQ_PUT_FIELD(s, iter->ts);
4485                 if (trace_seq_has_overflowed(s))
4486                         return TRACE_TYPE_PARTIAL_LINE;
4487         }
4488
4489         event = ftrace_find_event(entry->type);
4490         return event ? event->funcs->binary(iter, 0, event) :
4491                 TRACE_TYPE_HANDLED;
4492 }
4493
4494 int trace_empty(struct trace_iterator *iter)
4495 {
4496         struct ring_buffer_iter *buf_iter;
4497         int cpu;
4498
4499         /* If we are looking at one CPU buffer, only check that one */
4500         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4501                 cpu = iter->cpu_file;
4502                 buf_iter = trace_buffer_iter(iter, cpu);
4503                 if (buf_iter) {
4504                         if (!ring_buffer_iter_empty(buf_iter))
4505                                 return 0;
4506                 } else {
4507                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4508                                 return 0;
4509                 }
4510                 return 1;
4511         }
4512
4513         for_each_tracing_cpu(cpu) {
4514                 buf_iter = trace_buffer_iter(iter, cpu);
4515                 if (buf_iter) {
4516                         if (!ring_buffer_iter_empty(buf_iter))
4517                                 return 0;
4518                 } else {
4519                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4520                                 return 0;
4521                 }
4522         }
4523
4524         return 1;
4525 }
4526
4527 /*  Called with trace_event_read_lock() held. */
4528 enum print_line_t print_trace_line(struct trace_iterator *iter)
4529 {
4530         struct trace_array *tr = iter->tr;
4531         unsigned long trace_flags = tr->trace_flags;
4532         enum print_line_t ret;
4533
4534         if (iter->lost_events) {
4535                 if (iter->lost_events == (unsigned long)-1)
4536                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4537                                          iter->cpu);
4538                 else
4539                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4540                                          iter->cpu, iter->lost_events);
4541                 if (trace_seq_has_overflowed(&iter->seq))
4542                         return TRACE_TYPE_PARTIAL_LINE;
4543         }
4544
4545         if (iter->trace && iter->trace->print_line) {
4546                 ret = iter->trace->print_line(iter);
4547                 if (ret != TRACE_TYPE_UNHANDLED)
4548                         return ret;
4549         }
4550
4551         if (iter->ent->type == TRACE_BPUTS &&
4552                         trace_flags & TRACE_ITER_PRINTK &&
4553                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4554                 return trace_print_bputs_msg_only(iter);
4555
4556         if (iter->ent->type == TRACE_BPRINT &&
4557                         trace_flags & TRACE_ITER_PRINTK &&
4558                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4559                 return trace_print_bprintk_msg_only(iter);
4560
4561         if (iter->ent->type == TRACE_PRINT &&
4562                         trace_flags & TRACE_ITER_PRINTK &&
4563                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4564                 return trace_print_printk_msg_only(iter);
4565
4566         if (trace_flags & TRACE_ITER_BIN)
4567                 return print_bin_fmt(iter);
4568
4569         if (trace_flags & TRACE_ITER_HEX)
4570                 return print_hex_fmt(iter);
4571
4572         if (trace_flags & TRACE_ITER_RAW)
4573                 return print_raw_fmt(iter);
4574
4575         return print_trace_fmt(iter);
4576 }
4577
4578 void trace_latency_header(struct seq_file *m)
4579 {
4580         struct trace_iterator *iter = m->private;
4581         struct trace_array *tr = iter->tr;
4582
4583         /* print nothing if the buffers are empty */
4584         if (trace_empty(iter))
4585                 return;
4586
4587         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4588                 print_trace_header(m, iter);
4589
4590         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4591                 print_lat_help_header(m);
4592 }
4593
4594 void trace_default_header(struct seq_file *m)
4595 {
4596         struct trace_iterator *iter = m->private;
4597         struct trace_array *tr = iter->tr;
4598         unsigned long trace_flags = tr->trace_flags;
4599
4600         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4601                 return;
4602
4603         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4604                 /* print nothing if the buffers are empty */
4605                 if (trace_empty(iter))
4606                         return;
4607                 print_trace_header(m, iter);
4608                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4609                         print_lat_help_header(m);
4610         } else {
4611                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4612                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4613                                 print_func_help_header_irq(iter->array_buffer,
4614                                                            m, trace_flags);
4615                         else
4616                                 print_func_help_header(iter->array_buffer, m,
4617                                                        trace_flags);
4618                 }
4619         }
4620 }
4621
4622 static void test_ftrace_alive(struct seq_file *m)
4623 {
4624         if (!ftrace_is_dead())
4625                 return;
4626         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4627                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4628 }
4629
4630 #ifdef CONFIG_TRACER_MAX_TRACE
4631 static void show_snapshot_main_help(struct seq_file *m)
4632 {
4633         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4634                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4635                     "#                      Takes a snapshot of the main buffer.\n"
4636                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4637                     "#                      (Doesn't have to be '2' works with any number that\n"
4638                     "#                       is not a '0' or '1')\n");
4639 }
4640
4641 static void show_snapshot_percpu_help(struct seq_file *m)
4642 {
4643         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4644 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4645         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4646                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4647 #else
4648         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4649                     "#                     Must use main snapshot file to allocate.\n");
4650 #endif
4651         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4652                     "#                      (Doesn't have to be '2' works with any number that\n"
4653                     "#                       is not a '0' or '1')\n");
4654 }
4655
4656 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4657 {
4658         if (iter->tr->allocated_snapshot)
4659                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4660         else
4661                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4662
4663         seq_puts(m, "# Snapshot commands:\n");
4664         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4665                 show_snapshot_main_help(m);
4666         else
4667                 show_snapshot_percpu_help(m);
4668 }
4669 #else
4670 /* Should never be called */
4671 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4672 #endif
4673
4674 static int s_show(struct seq_file *m, void *v)
4675 {
4676         struct trace_iterator *iter = v;
4677         int ret;
4678
4679         if (iter->ent == NULL) {
4680                 if (iter->tr) {
4681                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4682                         seq_puts(m, "#\n");
4683                         test_ftrace_alive(m);
4684                 }
4685                 if (iter->snapshot && trace_empty(iter))
4686                         print_snapshot_help(m, iter);
4687                 else if (iter->trace && iter->trace->print_header)
4688                         iter->trace->print_header(m);
4689                 else
4690                         trace_default_header(m);
4691
4692         } else if (iter->leftover) {
4693                 /*
4694                  * If we filled the seq_file buffer earlier, we
4695                  * want to just show it now.
4696                  */
4697                 ret = trace_print_seq(m, &iter->seq);
4698
4699                 /* ret should this time be zero, but you never know */
4700                 iter->leftover = ret;
4701
4702         } else {
4703                 print_trace_line(iter);
4704                 ret = trace_print_seq(m, &iter->seq);
4705                 /*
4706                  * If we overflow the seq_file buffer, then it will
4707                  * ask us for this data again at start up.
4708                  * Use that instead.
4709                  *  ret is 0 if seq_file write succeeded.
4710                  *        -1 otherwise.
4711                  */
4712                 iter->leftover = ret;
4713         }
4714
4715         return 0;
4716 }
4717
4718 /*
4719  * Should be used after trace_array_get(), trace_types_lock
4720  * ensures that i_cdev was already initialized.
4721  */
4722 static inline int tracing_get_cpu(struct inode *inode)
4723 {
4724         if (inode->i_cdev) /* See trace_create_cpu_file() */
4725                 return (long)inode->i_cdev - 1;
4726         return RING_BUFFER_ALL_CPUS;
4727 }
4728
4729 static const struct seq_operations tracer_seq_ops = {
4730         .start          = s_start,
4731         .next           = s_next,
4732         .stop           = s_stop,
4733         .show           = s_show,
4734 };
4735
4736 static struct trace_iterator *
4737 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4738 {
4739         struct trace_array *tr = inode->i_private;
4740         struct trace_iterator *iter;
4741         int cpu;
4742
4743         if (tracing_disabled)
4744                 return ERR_PTR(-ENODEV);
4745
4746         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4747         if (!iter)
4748                 return ERR_PTR(-ENOMEM);
4749
4750         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4751                                     GFP_KERNEL);
4752         if (!iter->buffer_iter)
4753                 goto release;
4754
4755         /*
4756          * trace_find_next_entry() may need to save off iter->ent.
4757          * It will place it into the iter->temp buffer. As most
4758          * events are less than 128, allocate a buffer of that size.
4759          * If one is greater, then trace_find_next_entry() will
4760          * allocate a new buffer to adjust for the bigger iter->ent.
4761          * It's not critical if it fails to get allocated here.
4762          */
4763         iter->temp = kmalloc(128, GFP_KERNEL);
4764         if (iter->temp)
4765                 iter->temp_size = 128;
4766
4767         /*
4768          * trace_event_printf() may need to modify given format
4769          * string to replace %p with %px so that it shows real address
4770          * instead of hash value. However, that is only for the event
4771          * tracing, other tracer may not need. Defer the allocation
4772          * until it is needed.
4773          */
4774         iter->fmt = NULL;
4775         iter->fmt_size = 0;
4776
4777         /*
4778          * We make a copy of the current tracer to avoid concurrent
4779          * changes on it while we are reading.
4780          */
4781         mutex_lock(&trace_types_lock);
4782         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4783         if (!iter->trace)
4784                 goto fail;
4785
4786         *iter->trace = *tr->current_trace;
4787
4788         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4789                 goto fail;
4790
4791         iter->tr = tr;
4792
4793 #ifdef CONFIG_TRACER_MAX_TRACE
4794         /* Currently only the top directory has a snapshot */
4795         if (tr->current_trace->print_max || snapshot)
4796                 iter->array_buffer = &tr->max_buffer;
4797         else
4798 #endif
4799                 iter->array_buffer = &tr->array_buffer;
4800         iter->snapshot = snapshot;
4801         iter->pos = -1;
4802         iter->cpu_file = tracing_get_cpu(inode);
4803         mutex_init(&iter->mutex);
4804
4805         /* Notify the tracer early; before we stop tracing. */
4806         if (iter->trace->open)
4807                 iter->trace->open(iter);
4808
4809         /* Annotate start of buffers if we had overruns */
4810         if (ring_buffer_overruns(iter->array_buffer->buffer))
4811                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4812
4813         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4814         if (trace_clocks[tr->clock_id].in_ns)
4815                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4816
4817         /*
4818          * If pause-on-trace is enabled, then stop the trace while
4819          * dumping, unless this is the "snapshot" file
4820          */
4821         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4822                 tracing_stop_tr(tr);
4823
4824         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4825                 for_each_tracing_cpu(cpu) {
4826                         iter->buffer_iter[cpu] =
4827                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4828                                                          cpu, GFP_KERNEL);
4829                 }
4830                 ring_buffer_read_prepare_sync();
4831                 for_each_tracing_cpu(cpu) {
4832                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4833                         tracing_iter_reset(iter, cpu);
4834                 }
4835         } else {
4836                 cpu = iter->cpu_file;
4837                 iter->buffer_iter[cpu] =
4838                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4839                                                  cpu, GFP_KERNEL);
4840                 ring_buffer_read_prepare_sync();
4841                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4842                 tracing_iter_reset(iter, cpu);
4843         }
4844
4845         mutex_unlock(&trace_types_lock);
4846
4847         return iter;
4848
4849  fail:
4850         mutex_unlock(&trace_types_lock);
4851         kfree(iter->trace);
4852         kfree(iter->temp);
4853         kfree(iter->buffer_iter);
4854 release:
4855         seq_release_private(inode, file);
4856         return ERR_PTR(-ENOMEM);
4857 }
4858
4859 int tracing_open_generic(struct inode *inode, struct file *filp)
4860 {
4861         int ret;
4862
4863         ret = tracing_check_open_get_tr(NULL);
4864         if (ret)
4865                 return ret;
4866
4867         filp->private_data = inode->i_private;
4868         return 0;
4869 }
4870
4871 bool tracing_is_disabled(void)
4872 {
4873         return (tracing_disabled) ? true: false;
4874 }
4875
4876 /*
4877  * Open and update trace_array ref count.
4878  * Must have the current trace_array passed to it.
4879  */
4880 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4881 {
4882         struct trace_array *tr = inode->i_private;
4883         int ret;
4884
4885         ret = tracing_check_open_get_tr(tr);
4886         if (ret)
4887                 return ret;
4888
4889         filp->private_data = inode->i_private;
4890
4891         return 0;
4892 }
4893
4894 static int tracing_mark_open(struct inode *inode, struct file *filp)
4895 {
4896         stream_open(inode, filp);
4897         return tracing_open_generic_tr(inode, filp);
4898 }
4899
4900 static int tracing_release(struct inode *inode, struct file *file)
4901 {
4902         struct trace_array *tr = inode->i_private;
4903         struct seq_file *m = file->private_data;
4904         struct trace_iterator *iter;
4905         int cpu;
4906
4907         if (!(file->f_mode & FMODE_READ)) {
4908                 trace_array_put(tr);
4909                 return 0;
4910         }
4911
4912         /* Writes do not use seq_file */
4913         iter = m->private;
4914         mutex_lock(&trace_types_lock);
4915
4916         for_each_tracing_cpu(cpu) {
4917                 if (iter->buffer_iter[cpu])
4918                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4919         }
4920
4921         if (iter->trace && iter->trace->close)
4922                 iter->trace->close(iter);
4923
4924         if (!iter->snapshot && tr->stop_count)
4925                 /* reenable tracing if it was previously enabled */
4926                 tracing_start_tr(tr);
4927
4928         __trace_array_put(tr);
4929
4930         mutex_unlock(&trace_types_lock);
4931
4932         mutex_destroy(&iter->mutex);
4933         free_cpumask_var(iter->started);
4934         kfree(iter->fmt);
4935         kfree(iter->temp);
4936         kfree(iter->trace);
4937         kfree(iter->buffer_iter);
4938         seq_release_private(inode, file);
4939
4940         return 0;
4941 }
4942
4943 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4944 {
4945         struct trace_array *tr = inode->i_private;
4946
4947         trace_array_put(tr);
4948         return 0;
4949 }
4950
4951 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4952 {
4953         struct trace_array *tr = inode->i_private;
4954
4955         trace_array_put(tr);
4956
4957         return single_release(inode, file);
4958 }
4959
4960 static int tracing_open(struct inode *inode, struct file *file)
4961 {
4962         struct trace_array *tr = inode->i_private;
4963         struct trace_iterator *iter;
4964         int ret;
4965
4966         ret = tracing_check_open_get_tr(tr);
4967         if (ret)
4968                 return ret;
4969
4970         /* If this file was open for write, then erase contents */
4971         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4972                 int cpu = tracing_get_cpu(inode);
4973                 struct array_buffer *trace_buf = &tr->array_buffer;
4974
4975 #ifdef CONFIG_TRACER_MAX_TRACE
4976                 if (tr->current_trace->print_max)
4977                         trace_buf = &tr->max_buffer;
4978 #endif
4979
4980                 if (cpu == RING_BUFFER_ALL_CPUS)
4981                         tracing_reset_online_cpus(trace_buf);
4982                 else
4983                         tracing_reset_cpu(trace_buf, cpu);
4984         }
4985
4986         if (file->f_mode & FMODE_READ) {
4987                 iter = __tracing_open(inode, file, false);
4988                 if (IS_ERR(iter))
4989                         ret = PTR_ERR(iter);
4990                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4991                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4992         }
4993
4994         if (ret < 0)
4995                 trace_array_put(tr);
4996
4997         return ret;
4998 }
4999
5000 /*
5001  * Some tracers are not suitable for instance buffers.
5002  * A tracer is always available for the global array (toplevel)
5003  * or if it explicitly states that it is.
5004  */
5005 static bool
5006 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5007 {
5008         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5009 }
5010
5011 /* Find the next tracer that this trace array may use */
5012 static struct tracer *
5013 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5014 {
5015         while (t && !trace_ok_for_array(t, tr))
5016                 t = t->next;
5017
5018         return t;
5019 }
5020
5021 static void *
5022 t_next(struct seq_file *m, void *v, loff_t *pos)
5023 {
5024         struct trace_array *tr = m->private;
5025         struct tracer *t = v;
5026
5027         (*pos)++;
5028
5029         if (t)
5030                 t = get_tracer_for_array(tr, t->next);
5031
5032         return t;
5033 }
5034
5035 static void *t_start(struct seq_file *m, loff_t *pos)
5036 {
5037         struct trace_array *tr = m->private;
5038         struct tracer *t;
5039         loff_t l = 0;
5040
5041         mutex_lock(&trace_types_lock);
5042
5043         t = get_tracer_for_array(tr, trace_types);
5044         for (; t && l < *pos; t = t_next(m, t, &l))
5045                         ;
5046
5047         return t;
5048 }
5049
5050 static void t_stop(struct seq_file *m, void *p)
5051 {
5052         mutex_unlock(&trace_types_lock);
5053 }
5054
5055 static int t_show(struct seq_file *m, void *v)
5056 {
5057         struct tracer *t = v;
5058
5059         if (!t)
5060                 return 0;
5061
5062         seq_puts(m, t->name);
5063         if (t->next)
5064                 seq_putc(m, ' ');
5065         else
5066                 seq_putc(m, '\n');
5067
5068         return 0;
5069 }
5070
5071 static const struct seq_operations show_traces_seq_ops = {
5072         .start          = t_start,
5073         .next           = t_next,
5074         .stop           = t_stop,
5075         .show           = t_show,
5076 };
5077
5078 static int show_traces_open(struct inode *inode, struct file *file)
5079 {
5080         struct trace_array *tr = inode->i_private;
5081         struct seq_file *m;
5082         int ret;
5083
5084         ret = tracing_check_open_get_tr(tr);
5085         if (ret)
5086                 return ret;
5087
5088         ret = seq_open(file, &show_traces_seq_ops);
5089         if (ret) {
5090                 trace_array_put(tr);
5091                 return ret;
5092         }
5093
5094         m = file->private_data;
5095         m->private = tr;
5096
5097         return 0;
5098 }
5099
5100 static int show_traces_release(struct inode *inode, struct file *file)
5101 {
5102         struct trace_array *tr = inode->i_private;
5103
5104         trace_array_put(tr);
5105         return seq_release(inode, file);
5106 }
5107
5108 static ssize_t
5109 tracing_write_stub(struct file *filp, const char __user *ubuf,
5110                    size_t count, loff_t *ppos)
5111 {
5112         return count;
5113 }
5114
5115 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5116 {
5117         int ret;
5118
5119         if (file->f_mode & FMODE_READ)
5120                 ret = seq_lseek(file, offset, whence);
5121         else
5122                 file->f_pos = ret = 0;
5123
5124         return ret;
5125 }
5126
5127 static const struct file_operations tracing_fops = {
5128         .open           = tracing_open,
5129         .read           = seq_read,
5130         .read_iter      = seq_read_iter,
5131         .splice_read    = generic_file_splice_read,
5132         .write          = tracing_write_stub,
5133         .llseek         = tracing_lseek,
5134         .release        = tracing_release,
5135 };
5136
5137 static const struct file_operations show_traces_fops = {
5138         .open           = show_traces_open,
5139         .read           = seq_read,
5140         .llseek         = seq_lseek,
5141         .release        = show_traces_release,
5142 };
5143
5144 static ssize_t
5145 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5146                      size_t count, loff_t *ppos)
5147 {
5148         struct trace_array *tr = file_inode(filp)->i_private;
5149         char *mask_str;
5150         int len;
5151
5152         len = snprintf(NULL, 0, "%*pb\n",
5153                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5154         mask_str = kmalloc(len, GFP_KERNEL);
5155         if (!mask_str)
5156                 return -ENOMEM;
5157
5158         len = snprintf(mask_str, len, "%*pb\n",
5159                        cpumask_pr_args(tr->tracing_cpumask));
5160         if (len >= count) {
5161                 count = -EINVAL;
5162                 goto out_err;
5163         }
5164         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5165
5166 out_err:
5167         kfree(mask_str);
5168
5169         return count;
5170 }
5171
5172 int tracing_set_cpumask(struct trace_array *tr,
5173                         cpumask_var_t tracing_cpumask_new)
5174 {
5175         int cpu;
5176
5177         if (!tr)
5178                 return -EINVAL;
5179
5180         local_irq_disable();
5181         arch_spin_lock(&tr->max_lock);
5182         for_each_tracing_cpu(cpu) {
5183                 /*
5184                  * Increase/decrease the disabled counter if we are
5185                  * about to flip a bit in the cpumask:
5186                  */
5187                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5188                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5189                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5190                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5191                 }
5192                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5193                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5194                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5195                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5196                 }
5197         }
5198         arch_spin_unlock(&tr->max_lock);
5199         local_irq_enable();
5200
5201         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5202
5203         return 0;
5204 }
5205
5206 static ssize_t
5207 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5208                       size_t count, loff_t *ppos)
5209 {
5210         struct trace_array *tr = file_inode(filp)->i_private;
5211         cpumask_var_t tracing_cpumask_new;
5212         int err;
5213
5214         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5215                 return -ENOMEM;
5216
5217         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5218         if (err)
5219                 goto err_free;
5220
5221         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5222         if (err)
5223                 goto err_free;
5224
5225         free_cpumask_var(tracing_cpumask_new);
5226
5227         return count;
5228
5229 err_free:
5230         free_cpumask_var(tracing_cpumask_new);
5231
5232         return err;
5233 }
5234
5235 static const struct file_operations tracing_cpumask_fops = {
5236         .open           = tracing_open_generic_tr,
5237         .read           = tracing_cpumask_read,
5238         .write          = tracing_cpumask_write,
5239         .release        = tracing_release_generic_tr,
5240         .llseek         = generic_file_llseek,
5241 };
5242
5243 static int tracing_trace_options_show(struct seq_file *m, void *v)
5244 {
5245         struct tracer_opt *trace_opts;
5246         struct trace_array *tr = m->private;
5247         u32 tracer_flags;
5248         int i;
5249
5250         mutex_lock(&trace_types_lock);
5251         tracer_flags = tr->current_trace->flags->val;
5252         trace_opts = tr->current_trace->flags->opts;
5253
5254         for (i = 0; trace_options[i]; i++) {
5255                 if (tr->trace_flags & (1 << i))
5256                         seq_printf(m, "%s\n", trace_options[i]);
5257                 else
5258                         seq_printf(m, "no%s\n", trace_options[i]);
5259         }
5260
5261         for (i = 0; trace_opts[i].name; i++) {
5262                 if (tracer_flags & trace_opts[i].bit)
5263                         seq_printf(m, "%s\n", trace_opts[i].name);
5264                 else
5265                         seq_printf(m, "no%s\n", trace_opts[i].name);
5266         }
5267         mutex_unlock(&trace_types_lock);
5268
5269         return 0;
5270 }
5271
5272 static int __set_tracer_option(struct trace_array *tr,
5273                                struct tracer_flags *tracer_flags,
5274                                struct tracer_opt *opts, int neg)
5275 {
5276         struct tracer *trace = tracer_flags->trace;
5277         int ret;
5278
5279         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5280         if (ret)
5281                 return ret;
5282
5283         if (neg)
5284                 tracer_flags->val &= ~opts->bit;
5285         else
5286                 tracer_flags->val |= opts->bit;
5287         return 0;
5288 }
5289
5290 /* Try to assign a tracer specific option */
5291 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5292 {
5293         struct tracer *trace = tr->current_trace;
5294         struct tracer_flags *tracer_flags = trace->flags;
5295         struct tracer_opt *opts = NULL;
5296         int i;
5297
5298         for (i = 0; tracer_flags->opts[i].name; i++) {
5299                 opts = &tracer_flags->opts[i];
5300
5301                 if (strcmp(cmp, opts->name) == 0)
5302                         return __set_tracer_option(tr, trace->flags, opts, neg);
5303         }
5304
5305         return -EINVAL;
5306 }
5307
5308 /* Some tracers require overwrite to stay enabled */
5309 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5310 {
5311         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5312                 return -1;
5313
5314         return 0;
5315 }
5316
5317 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5318 {
5319         int *map;
5320
5321         if ((mask == TRACE_ITER_RECORD_TGID) ||
5322             (mask == TRACE_ITER_RECORD_CMD))
5323                 lockdep_assert_held(&event_mutex);
5324
5325         /* do nothing if flag is already set */
5326         if (!!(tr->trace_flags & mask) == !!enabled)
5327                 return 0;
5328
5329         /* Give the tracer a chance to approve the change */
5330         if (tr->current_trace->flag_changed)
5331                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5332                         return -EINVAL;
5333
5334         if (enabled)
5335                 tr->trace_flags |= mask;
5336         else
5337                 tr->trace_flags &= ~mask;
5338
5339         if (mask == TRACE_ITER_RECORD_CMD)
5340                 trace_event_enable_cmd_record(enabled);
5341
5342         if (mask == TRACE_ITER_RECORD_TGID) {
5343                 if (!tgid_map) {
5344                         tgid_map_max = pid_max;
5345                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5346                                        GFP_KERNEL);
5347
5348                         /*
5349                          * Pairs with smp_load_acquire() in
5350                          * trace_find_tgid_ptr() to ensure that if it observes
5351                          * the tgid_map we just allocated then it also observes
5352                          * the corresponding tgid_map_max value.
5353                          */
5354                         smp_store_release(&tgid_map, map);
5355                 }
5356                 if (!tgid_map) {
5357                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5358                         return -ENOMEM;
5359                 }
5360
5361                 trace_event_enable_tgid_record(enabled);
5362         }
5363
5364         if (mask == TRACE_ITER_EVENT_FORK)
5365                 trace_event_follow_fork(tr, enabled);
5366
5367         if (mask == TRACE_ITER_FUNC_FORK)
5368                 ftrace_pid_follow_fork(tr, enabled);
5369
5370         if (mask == TRACE_ITER_OVERWRITE) {
5371                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5372 #ifdef CONFIG_TRACER_MAX_TRACE
5373                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5374 #endif
5375         }
5376
5377         if (mask == TRACE_ITER_PRINTK) {
5378                 trace_printk_start_stop_comm(enabled);
5379                 trace_printk_control(enabled);
5380         }
5381
5382         return 0;
5383 }
5384
5385 int trace_set_options(struct trace_array *tr, char *option)
5386 {
5387         char *cmp;
5388         int neg = 0;
5389         int ret;
5390         size_t orig_len = strlen(option);
5391         int len;
5392
5393         cmp = strstrip(option);
5394
5395         len = str_has_prefix(cmp, "no");
5396         if (len)
5397                 neg = 1;
5398
5399         cmp += len;
5400
5401         mutex_lock(&event_mutex);
5402         mutex_lock(&trace_types_lock);
5403
5404         ret = match_string(trace_options, -1, cmp);
5405         /* If no option could be set, test the specific tracer options */
5406         if (ret < 0)
5407                 ret = set_tracer_option(tr, cmp, neg);
5408         else
5409                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5410
5411         mutex_unlock(&trace_types_lock);
5412         mutex_unlock(&event_mutex);
5413
5414         /*
5415          * If the first trailing whitespace is replaced with '\0' by strstrip,
5416          * turn it back into a space.
5417          */
5418         if (orig_len > strlen(option))
5419                 option[strlen(option)] = ' ';
5420
5421         return ret;
5422 }
5423
5424 static void __init apply_trace_boot_options(void)
5425 {
5426         char *buf = trace_boot_options_buf;
5427         char *option;
5428
5429         while (true) {
5430                 option = strsep(&buf, ",");
5431
5432                 if (!option)
5433                         break;
5434
5435                 if (*option)
5436                         trace_set_options(&global_trace, option);
5437
5438                 /* Put back the comma to allow this to be called again */
5439                 if (buf)
5440                         *(buf - 1) = ',';
5441         }
5442 }
5443
5444 static ssize_t
5445 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5446                         size_t cnt, loff_t *ppos)
5447 {
5448         struct seq_file *m = filp->private_data;
5449         struct trace_array *tr = m->private;
5450         char buf[64];
5451         int ret;
5452
5453         if (cnt >= sizeof(buf))
5454                 return -EINVAL;
5455
5456         if (copy_from_user(buf, ubuf, cnt))
5457                 return -EFAULT;
5458
5459         buf[cnt] = 0;
5460
5461         ret = trace_set_options(tr, buf);
5462         if (ret < 0)
5463                 return ret;
5464
5465         *ppos += cnt;
5466
5467         return cnt;
5468 }
5469
5470 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5471 {
5472         struct trace_array *tr = inode->i_private;
5473         int ret;
5474
5475         ret = tracing_check_open_get_tr(tr);
5476         if (ret)
5477                 return ret;
5478
5479         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5480         if (ret < 0)
5481                 trace_array_put(tr);
5482
5483         return ret;
5484 }
5485
5486 static const struct file_operations tracing_iter_fops = {
5487         .open           = tracing_trace_options_open,
5488         .read           = seq_read,
5489         .llseek         = seq_lseek,
5490         .release        = tracing_single_release_tr,
5491         .write          = tracing_trace_options_write,
5492 };
5493
5494 static const char readme_msg[] =
5495         "tracing mini-HOWTO:\n\n"
5496         "# echo 0 > tracing_on : quick way to disable tracing\n"
5497         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5498         " Important files:\n"
5499         "  trace\t\t\t- The static contents of the buffer\n"
5500         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5501         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5502         "  current_tracer\t- function and latency tracers\n"
5503         "  available_tracers\t- list of configured tracers for current_tracer\n"
5504         "  error_log\t- error log for failed commands (that support it)\n"
5505         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5506         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5507         "  trace_clock\t\t- change the clock used to order events\n"
5508         "       local:   Per cpu clock but may not be synced across CPUs\n"
5509         "      global:   Synced across CPUs but slows tracing down.\n"
5510         "     counter:   Not a clock, but just an increment\n"
5511         "      uptime:   Jiffy counter from time of boot\n"
5512         "        perf:   Same clock that perf events use\n"
5513 #ifdef CONFIG_X86_64
5514         "     x86-tsc:   TSC cycle counter\n"
5515 #endif
5516         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5517         "       delta:   Delta difference against a buffer-wide timestamp\n"
5518         "    absolute:   Absolute (standalone) timestamp\n"
5519         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5520         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5521         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5522         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5523         "\t\t\t  Remove sub-buffer with rmdir\n"
5524         "  trace_options\t\t- Set format or modify how tracing happens\n"
5525         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5526         "\t\t\t  option name\n"
5527         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5528 #ifdef CONFIG_DYNAMIC_FTRACE
5529         "\n  available_filter_functions - list of functions that can be filtered on\n"
5530         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5531         "\t\t\t  functions\n"
5532         "\t     accepts: func_full_name or glob-matching-pattern\n"
5533         "\t     modules: Can select a group via module\n"
5534         "\t      Format: :mod:<module-name>\n"
5535         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5536         "\t    triggers: a command to perform when function is hit\n"
5537         "\t      Format: <function>:<trigger>[:count]\n"
5538         "\t     trigger: traceon, traceoff\n"
5539         "\t\t      enable_event:<system>:<event>\n"
5540         "\t\t      disable_event:<system>:<event>\n"
5541 #ifdef CONFIG_STACKTRACE
5542         "\t\t      stacktrace\n"
5543 #endif
5544 #ifdef CONFIG_TRACER_SNAPSHOT
5545         "\t\t      snapshot\n"
5546 #endif
5547         "\t\t      dump\n"
5548         "\t\t      cpudump\n"
5549         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5550         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5551         "\t     The first one will disable tracing every time do_fault is hit\n"
5552         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5553         "\t       The first time do trap is hit and it disables tracing, the\n"
5554         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5555         "\t       the counter will not decrement. It only decrements when the\n"
5556         "\t       trigger did work\n"
5557         "\t     To remove trigger without count:\n"
5558         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5559         "\t     To remove trigger with a count:\n"
5560         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5561         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5562         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5563         "\t    modules: Can select a group via module command :mod:\n"
5564         "\t    Does not accept triggers\n"
5565 #endif /* CONFIG_DYNAMIC_FTRACE */
5566 #ifdef CONFIG_FUNCTION_TRACER
5567         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5568         "\t\t    (function)\n"
5569         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5570         "\t\t    (function)\n"
5571 #endif
5572 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5573         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5574         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5575         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5576 #endif
5577 #ifdef CONFIG_TRACER_SNAPSHOT
5578         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5579         "\t\t\t  snapshot buffer. Read the contents for more\n"
5580         "\t\t\t  information\n"
5581 #endif
5582 #ifdef CONFIG_STACK_TRACER
5583         "  stack_trace\t\t- Shows the max stack trace when active\n"
5584         "  stack_max_size\t- Shows current max stack size that was traced\n"
5585         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5586         "\t\t\t  new trace)\n"
5587 #ifdef CONFIG_DYNAMIC_FTRACE
5588         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5589         "\t\t\t  traces\n"
5590 #endif
5591 #endif /* CONFIG_STACK_TRACER */
5592 #ifdef CONFIG_DYNAMIC_EVENTS
5593         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5594         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5595 #endif
5596 #ifdef CONFIG_KPROBE_EVENTS
5597         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5598         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5599 #endif
5600 #ifdef CONFIG_UPROBE_EVENTS
5601         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5602         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5603 #endif
5604 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5605         "\t  accepts: event-definitions (one definition per line)\n"
5606         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5607         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5608 #ifdef CONFIG_HIST_TRIGGERS
5609         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5610 #endif
5611         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5612         "\t           -:[<group>/][<event>]\n"
5613 #ifdef CONFIG_KPROBE_EVENTS
5614         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5615   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5616 #endif
5617 #ifdef CONFIG_UPROBE_EVENTS
5618   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5619 #endif
5620         "\t     args: <name>=fetcharg[:type]\n"
5621         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5622 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5623         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5624 #else
5625         "\t           $stack<index>, $stack, $retval, $comm,\n"
5626 #endif
5627         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5628         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5629         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5630         "\t           <type>\\[<array-size>\\]\n"
5631 #ifdef CONFIG_HIST_TRIGGERS
5632         "\t    field: <stype> <name>;\n"
5633         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5634         "\t           [unsigned] char/int/long\n"
5635 #endif
5636         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5637         "\t            of the <attached-group>/<attached-event>.\n"
5638 #endif
5639         "  events/\t\t- Directory containing all trace event subsystems:\n"
5640         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5641         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5642         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5643         "\t\t\t  events\n"
5644         "      filter\t\t- If set, only events passing filter are traced\n"
5645         "  events/<system>/<event>/\t- Directory containing control files for\n"
5646         "\t\t\t  <event>:\n"
5647         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5648         "      filter\t\t- If set, only events passing filter are traced\n"
5649         "      trigger\t\t- If set, a command to perform when event is hit\n"
5650         "\t    Format: <trigger>[:count][if <filter>]\n"
5651         "\t   trigger: traceon, traceoff\n"
5652         "\t            enable_event:<system>:<event>\n"
5653         "\t            disable_event:<system>:<event>\n"
5654 #ifdef CONFIG_HIST_TRIGGERS
5655         "\t            enable_hist:<system>:<event>\n"
5656         "\t            disable_hist:<system>:<event>\n"
5657 #endif
5658 #ifdef CONFIG_STACKTRACE
5659         "\t\t    stacktrace\n"
5660 #endif
5661 #ifdef CONFIG_TRACER_SNAPSHOT
5662         "\t\t    snapshot\n"
5663 #endif
5664 #ifdef CONFIG_HIST_TRIGGERS
5665         "\t\t    hist (see below)\n"
5666 #endif
5667         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5668         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5669         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5670         "\t                  events/block/block_unplug/trigger\n"
5671         "\t   The first disables tracing every time block_unplug is hit.\n"
5672         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5673         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5674         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5675         "\t   Like function triggers, the counter is only decremented if it\n"
5676         "\t    enabled or disabled tracing.\n"
5677         "\t   To remove a trigger without a count:\n"
5678         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5679         "\t   To remove a trigger with a count:\n"
5680         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5681         "\t   Filters can be ignored when removing a trigger.\n"
5682 #ifdef CONFIG_HIST_TRIGGERS
5683         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5684         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5685         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5686         "\t            [:values=<field1[,field2,...]>]\n"
5687         "\t            [:sort=<field1[,field2,...]>]\n"
5688         "\t            [:size=#entries]\n"
5689         "\t            [:pause][:continue][:clear]\n"
5690         "\t            [:name=histname1]\n"
5691         "\t            [:<handler>.<action>]\n"
5692         "\t            [if <filter>]\n\n"
5693         "\t    Note, special fields can be used as well:\n"
5694         "\t            common_timestamp - to record current timestamp\n"
5695         "\t            common_cpu - to record the CPU the event happened on\n"
5696         "\n"
5697         "\t    A hist trigger variable can be:\n"
5698         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5699         "\t        - a reference to another variable e.g. y=$x,\n"
5700         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5701         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5702         "\n"
5703         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5704         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5705         "\t    variable reference, field or numeric literal.\n"
5706         "\n"
5707         "\t    When a matching event is hit, an entry is added to a hash\n"
5708         "\t    table using the key(s) and value(s) named, and the value of a\n"
5709         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5710         "\t    correspond to fields in the event's format description.  Keys\n"
5711         "\t    can be any field, or the special string 'stacktrace'.\n"
5712         "\t    Compound keys consisting of up to two fields can be specified\n"
5713         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5714         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5715         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5716         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5717         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5718         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5719         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5720         "\t    its histogram data will be shared with other triggers of the\n"
5721         "\t    same name, and trigger hits will update this common data.\n\n"
5722         "\t    Reading the 'hist' file for the event will dump the hash\n"
5723         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5724         "\t    triggers attached to an event, there will be a table for each\n"
5725         "\t    trigger in the output.  The table displayed for a named\n"
5726         "\t    trigger will be the same as any other instance having the\n"
5727         "\t    same name.  The default format used to display a given field\n"
5728         "\t    can be modified by appending any of the following modifiers\n"
5729         "\t    to the field name, as applicable:\n\n"
5730         "\t            .hex        display a number as a hex value\n"
5731         "\t            .sym        display an address as a symbol\n"
5732         "\t            .sym-offset display an address as a symbol and offset\n"
5733         "\t            .execname   display a common_pid as a program name\n"
5734         "\t            .syscall    display a syscall id as a syscall name\n"
5735         "\t            .log2       display log2 value rather than raw number\n"
5736         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5737         "\t            .usecs      display a common_timestamp in microseconds\n"
5738         "\t            .percent    display a number of percentage value\n"
5739         "\t            .graph      display a bar-graph of a value\n\n"
5740         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5741         "\t    trigger or to start a hist trigger but not log any events\n"
5742         "\t    until told to do so.  'continue' can be used to start or\n"
5743         "\t    restart a paused hist trigger.\n\n"
5744         "\t    The 'clear' parameter will clear the contents of a running\n"
5745         "\t    hist trigger and leave its current paused/active state\n"
5746         "\t    unchanged.\n\n"
5747         "\t    The enable_hist and disable_hist triggers can be used to\n"
5748         "\t    have one event conditionally start and stop another event's\n"
5749         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5750         "\t    the enable_event and disable_event triggers.\n\n"
5751         "\t    Hist trigger handlers and actions are executed whenever a\n"
5752         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5753         "\t        <handler>.<action>\n\n"
5754         "\t    The available handlers are:\n\n"
5755         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5756         "\t        onmax(var)               - invoke if var exceeds current max\n"
5757         "\t        onchange(var)            - invoke action if var changes\n\n"
5758         "\t    The available actions are:\n\n"
5759         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5760         "\t        save(field,...)                      - save current event fields\n"
5761 #ifdef CONFIG_TRACER_SNAPSHOT
5762         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5763 #endif
5764 #ifdef CONFIG_SYNTH_EVENTS
5765         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5766         "\t  Write into this file to define/undefine new synthetic events.\n"
5767         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5768 #endif
5769 #endif
5770 ;
5771
5772 static ssize_t
5773 tracing_readme_read(struct file *filp, char __user *ubuf,
5774                        size_t cnt, loff_t *ppos)
5775 {
5776         return simple_read_from_buffer(ubuf, cnt, ppos,
5777                                         readme_msg, strlen(readme_msg));
5778 }
5779
5780 static const struct file_operations tracing_readme_fops = {
5781         .open           = tracing_open_generic,
5782         .read           = tracing_readme_read,
5783         .llseek         = generic_file_llseek,
5784 };
5785
5786 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5787 {
5788         int pid = ++(*pos);
5789
5790         return trace_find_tgid_ptr(pid);
5791 }
5792
5793 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5794 {
5795         int pid = *pos;
5796
5797         return trace_find_tgid_ptr(pid);
5798 }
5799
5800 static void saved_tgids_stop(struct seq_file *m, void *v)
5801 {
5802 }
5803
5804 static int saved_tgids_show(struct seq_file *m, void *v)
5805 {
5806         int *entry = (int *)v;
5807         int pid = entry - tgid_map;
5808         int tgid = *entry;
5809
5810         if (tgid == 0)
5811                 return SEQ_SKIP;
5812
5813         seq_printf(m, "%d %d\n", pid, tgid);
5814         return 0;
5815 }
5816
5817 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5818         .start          = saved_tgids_start,
5819         .stop           = saved_tgids_stop,
5820         .next           = saved_tgids_next,
5821         .show           = saved_tgids_show,
5822 };
5823
5824 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5825 {
5826         int ret;
5827
5828         ret = tracing_check_open_get_tr(NULL);
5829         if (ret)
5830                 return ret;
5831
5832         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5833 }
5834
5835
5836 static const struct file_operations tracing_saved_tgids_fops = {
5837         .open           = tracing_saved_tgids_open,
5838         .read           = seq_read,
5839         .llseek         = seq_lseek,
5840         .release        = seq_release,
5841 };
5842
5843 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5844 {
5845         unsigned int *ptr = v;
5846
5847         if (*pos || m->count)
5848                 ptr++;
5849
5850         (*pos)++;
5851
5852         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5853              ptr++) {
5854                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5855                         continue;
5856
5857                 return ptr;
5858         }
5859
5860         return NULL;
5861 }
5862
5863 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5864 {
5865         void *v;
5866         loff_t l = 0;
5867
5868         preempt_disable();
5869         arch_spin_lock(&trace_cmdline_lock);
5870
5871         v = &savedcmd->map_cmdline_to_pid[0];
5872         while (l <= *pos) {
5873                 v = saved_cmdlines_next(m, v, &l);
5874                 if (!v)
5875                         return NULL;
5876         }
5877
5878         return v;
5879 }
5880
5881 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5882 {
5883         arch_spin_unlock(&trace_cmdline_lock);
5884         preempt_enable();
5885 }
5886
5887 static int saved_cmdlines_show(struct seq_file *m, void *v)
5888 {
5889         char buf[TASK_COMM_LEN];
5890         unsigned int *pid = v;
5891
5892         __trace_find_cmdline(*pid, buf);
5893         seq_printf(m, "%d %s\n", *pid, buf);
5894         return 0;
5895 }
5896
5897 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5898         .start          = saved_cmdlines_start,
5899         .next           = saved_cmdlines_next,
5900         .stop           = saved_cmdlines_stop,
5901         .show           = saved_cmdlines_show,
5902 };
5903
5904 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5905 {
5906         int ret;
5907
5908         ret = tracing_check_open_get_tr(NULL);
5909         if (ret)
5910                 return ret;
5911
5912         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5913 }
5914
5915 static const struct file_operations tracing_saved_cmdlines_fops = {
5916         .open           = tracing_saved_cmdlines_open,
5917         .read           = seq_read,
5918         .llseek         = seq_lseek,
5919         .release        = seq_release,
5920 };
5921
5922 static ssize_t
5923 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5924                                  size_t cnt, loff_t *ppos)
5925 {
5926         char buf[64];
5927         int r;
5928
5929         preempt_disable();
5930         arch_spin_lock(&trace_cmdline_lock);
5931         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5932         arch_spin_unlock(&trace_cmdline_lock);
5933         preempt_enable();
5934
5935         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5936 }
5937
5938 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5939 {
5940         kfree(s->saved_cmdlines);
5941         kfree(s->map_cmdline_to_pid);
5942         kfree(s);
5943 }
5944
5945 static int tracing_resize_saved_cmdlines(unsigned int val)
5946 {
5947         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5948
5949         s = kmalloc(sizeof(*s), GFP_KERNEL);
5950         if (!s)
5951                 return -ENOMEM;
5952
5953         if (allocate_cmdlines_buffer(val, s) < 0) {
5954                 kfree(s);
5955                 return -ENOMEM;
5956         }
5957
5958         preempt_disable();
5959         arch_spin_lock(&trace_cmdline_lock);
5960         savedcmd_temp = savedcmd;
5961         savedcmd = s;
5962         arch_spin_unlock(&trace_cmdline_lock);
5963         preempt_enable();
5964         free_saved_cmdlines_buffer(savedcmd_temp);
5965
5966         return 0;
5967 }
5968
5969 static ssize_t
5970 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5971                                   size_t cnt, loff_t *ppos)
5972 {
5973         unsigned long val;
5974         int ret;
5975
5976         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5977         if (ret)
5978                 return ret;
5979
5980         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5981         if (!val || val > PID_MAX_DEFAULT)
5982                 return -EINVAL;
5983
5984         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5985         if (ret < 0)
5986                 return ret;
5987
5988         *ppos += cnt;
5989
5990         return cnt;
5991 }
5992
5993 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5994         .open           = tracing_open_generic,
5995         .read           = tracing_saved_cmdlines_size_read,
5996         .write          = tracing_saved_cmdlines_size_write,
5997 };
5998
5999 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6000 static union trace_eval_map_item *
6001 update_eval_map(union trace_eval_map_item *ptr)
6002 {
6003         if (!ptr->map.eval_string) {
6004                 if (ptr->tail.next) {
6005                         ptr = ptr->tail.next;
6006                         /* Set ptr to the next real item (skip head) */
6007                         ptr++;
6008                 } else
6009                         return NULL;
6010         }
6011         return ptr;
6012 }
6013
6014 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6015 {
6016         union trace_eval_map_item *ptr = v;
6017
6018         /*
6019          * Paranoid! If ptr points to end, we don't want to increment past it.
6020          * This really should never happen.
6021          */
6022         (*pos)++;
6023         ptr = update_eval_map(ptr);
6024         if (WARN_ON_ONCE(!ptr))
6025                 return NULL;
6026
6027         ptr++;
6028         ptr = update_eval_map(ptr);
6029
6030         return ptr;
6031 }
6032
6033 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6034 {
6035         union trace_eval_map_item *v;
6036         loff_t l = 0;
6037
6038         mutex_lock(&trace_eval_mutex);
6039
6040         v = trace_eval_maps;
6041         if (v)
6042                 v++;
6043
6044         while (v && l < *pos) {
6045                 v = eval_map_next(m, v, &l);
6046         }
6047
6048         return v;
6049 }
6050
6051 static void eval_map_stop(struct seq_file *m, void *v)
6052 {
6053         mutex_unlock(&trace_eval_mutex);
6054 }
6055
6056 static int eval_map_show(struct seq_file *m, void *v)
6057 {
6058         union trace_eval_map_item *ptr = v;
6059
6060         seq_printf(m, "%s %ld (%s)\n",
6061                    ptr->map.eval_string, ptr->map.eval_value,
6062                    ptr->map.system);
6063
6064         return 0;
6065 }
6066
6067 static const struct seq_operations tracing_eval_map_seq_ops = {
6068         .start          = eval_map_start,
6069         .next           = eval_map_next,
6070         .stop           = eval_map_stop,
6071         .show           = eval_map_show,
6072 };
6073
6074 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6075 {
6076         int ret;
6077
6078         ret = tracing_check_open_get_tr(NULL);
6079         if (ret)
6080                 return ret;
6081
6082         return seq_open(filp, &tracing_eval_map_seq_ops);
6083 }
6084
6085 static const struct file_operations tracing_eval_map_fops = {
6086         .open           = tracing_eval_map_open,
6087         .read           = seq_read,
6088         .llseek         = seq_lseek,
6089         .release        = seq_release,
6090 };
6091
6092 static inline union trace_eval_map_item *
6093 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6094 {
6095         /* Return tail of array given the head */
6096         return ptr + ptr->head.length + 1;
6097 }
6098
6099 static void
6100 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6101                            int len)
6102 {
6103         struct trace_eval_map **stop;
6104         struct trace_eval_map **map;
6105         union trace_eval_map_item *map_array;
6106         union trace_eval_map_item *ptr;
6107
6108         stop = start + len;
6109
6110         /*
6111          * The trace_eval_maps contains the map plus a head and tail item,
6112          * where the head holds the module and length of array, and the
6113          * tail holds a pointer to the next list.
6114          */
6115         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6116         if (!map_array) {
6117                 pr_warn("Unable to allocate trace eval mapping\n");
6118                 return;
6119         }
6120
6121         mutex_lock(&trace_eval_mutex);
6122
6123         if (!trace_eval_maps)
6124                 trace_eval_maps = map_array;
6125         else {
6126                 ptr = trace_eval_maps;
6127                 for (;;) {
6128                         ptr = trace_eval_jmp_to_tail(ptr);
6129                         if (!ptr->tail.next)
6130                                 break;
6131                         ptr = ptr->tail.next;
6132
6133                 }
6134                 ptr->tail.next = map_array;
6135         }
6136         map_array->head.mod = mod;
6137         map_array->head.length = len;
6138         map_array++;
6139
6140         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6141                 map_array->map = **map;
6142                 map_array++;
6143         }
6144         memset(map_array, 0, sizeof(*map_array));
6145
6146         mutex_unlock(&trace_eval_mutex);
6147 }
6148
6149 static void trace_create_eval_file(struct dentry *d_tracer)
6150 {
6151         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6152                           NULL, &tracing_eval_map_fops);
6153 }
6154
6155 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6156 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6157 static inline void trace_insert_eval_map_file(struct module *mod,
6158                               struct trace_eval_map **start, int len) { }
6159 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6160
6161 static void trace_insert_eval_map(struct module *mod,
6162                                   struct trace_eval_map **start, int len)
6163 {
6164         struct trace_eval_map **map;
6165
6166         if (len <= 0)
6167                 return;
6168
6169         map = start;
6170
6171         trace_event_eval_update(map, len);
6172
6173         trace_insert_eval_map_file(mod, start, len);
6174 }
6175
6176 static ssize_t
6177 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6178                        size_t cnt, loff_t *ppos)
6179 {
6180         struct trace_array *tr = filp->private_data;
6181         char buf[MAX_TRACER_SIZE+2];
6182         int r;
6183
6184         mutex_lock(&trace_types_lock);
6185         r = sprintf(buf, "%s\n", tr->current_trace->name);
6186         mutex_unlock(&trace_types_lock);
6187
6188         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6189 }
6190
6191 int tracer_init(struct tracer *t, struct trace_array *tr)
6192 {
6193         tracing_reset_online_cpus(&tr->array_buffer);
6194         return t->init(tr);
6195 }
6196
6197 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6198 {
6199         int cpu;
6200
6201         for_each_tracing_cpu(cpu)
6202                 per_cpu_ptr(buf->data, cpu)->entries = val;
6203 }
6204
6205 #ifdef CONFIG_TRACER_MAX_TRACE
6206 /* resize @tr's buffer to the size of @size_tr's entries */
6207 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6208                                         struct array_buffer *size_buf, int cpu_id)
6209 {
6210         int cpu, ret = 0;
6211
6212         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6213                 for_each_tracing_cpu(cpu) {
6214                         ret = ring_buffer_resize(trace_buf->buffer,
6215                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6216                         if (ret < 0)
6217                                 break;
6218                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6219                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6220                 }
6221         } else {
6222                 ret = ring_buffer_resize(trace_buf->buffer,
6223                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6224                 if (ret == 0)
6225                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6226                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6227         }
6228
6229         return ret;
6230 }
6231 #endif /* CONFIG_TRACER_MAX_TRACE */
6232
6233 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6234                                         unsigned long size, int cpu)
6235 {
6236         int ret;
6237
6238         /*
6239          * If kernel or user changes the size of the ring buffer
6240          * we use the size that was given, and we can forget about
6241          * expanding it later.
6242          */
6243         ring_buffer_expanded = true;
6244
6245         /* May be called before buffers are initialized */
6246         if (!tr->array_buffer.buffer)
6247                 return 0;
6248
6249         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6250         if (ret < 0)
6251                 return ret;
6252
6253 #ifdef CONFIG_TRACER_MAX_TRACE
6254         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6255             !tr->current_trace->use_max_tr)
6256                 goto out;
6257
6258         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6259         if (ret < 0) {
6260                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6261                                                      &tr->array_buffer, cpu);
6262                 if (r < 0) {
6263                         /*
6264                          * AARGH! We are left with different
6265                          * size max buffer!!!!
6266                          * The max buffer is our "snapshot" buffer.
6267                          * When a tracer needs a snapshot (one of the
6268                          * latency tracers), it swaps the max buffer
6269                          * with the saved snap shot. We succeeded to
6270                          * update the size of the main buffer, but failed to
6271                          * update the size of the max buffer. But when we tried
6272                          * to reset the main buffer to the original size, we
6273                          * failed there too. This is very unlikely to
6274                          * happen, but if it does, warn and kill all
6275                          * tracing.
6276                          */
6277                         WARN_ON(1);
6278                         tracing_disabled = 1;
6279                 }
6280                 return ret;
6281         }
6282
6283         if (cpu == RING_BUFFER_ALL_CPUS)
6284                 set_buffer_entries(&tr->max_buffer, size);
6285         else
6286                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6287
6288  out:
6289 #endif /* CONFIG_TRACER_MAX_TRACE */
6290
6291         if (cpu == RING_BUFFER_ALL_CPUS)
6292                 set_buffer_entries(&tr->array_buffer, size);
6293         else
6294                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6295
6296         return ret;
6297 }
6298
6299 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6300                                   unsigned long size, int cpu_id)
6301 {
6302         int ret;
6303
6304         mutex_lock(&trace_types_lock);
6305
6306         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6307                 /* make sure, this cpu is enabled in the mask */
6308                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6309                         ret = -EINVAL;
6310                         goto out;
6311                 }
6312         }
6313
6314         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6315         if (ret < 0)
6316                 ret = -ENOMEM;
6317
6318 out:
6319         mutex_unlock(&trace_types_lock);
6320
6321         return ret;
6322 }
6323
6324
6325 /**
6326  * tracing_update_buffers - used by tracing facility to expand ring buffers
6327  *
6328  * To save on memory when the tracing is never used on a system with it
6329  * configured in. The ring buffers are set to a minimum size. But once
6330  * a user starts to use the tracing facility, then they need to grow
6331  * to their default size.
6332  *
6333  * This function is to be called when a tracer is about to be used.
6334  */
6335 int tracing_update_buffers(void)
6336 {
6337         int ret = 0;
6338
6339         mutex_lock(&trace_types_lock);
6340         if (!ring_buffer_expanded)
6341                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6342                                                 RING_BUFFER_ALL_CPUS);
6343         mutex_unlock(&trace_types_lock);
6344
6345         return ret;
6346 }
6347
6348 struct trace_option_dentry;
6349
6350 static void
6351 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6352
6353 /*
6354  * Used to clear out the tracer before deletion of an instance.
6355  * Must have trace_types_lock held.
6356  */
6357 static void tracing_set_nop(struct trace_array *tr)
6358 {
6359         if (tr->current_trace == &nop_trace)
6360                 return;
6361         
6362         tr->current_trace->enabled--;
6363
6364         if (tr->current_trace->reset)
6365                 tr->current_trace->reset(tr);
6366
6367         tr->current_trace = &nop_trace;
6368 }
6369
6370 static bool tracer_options_updated;
6371
6372 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6373 {
6374         /* Only enable if the directory has been created already. */
6375         if (!tr->dir)
6376                 return;
6377
6378         /* Only create trace option files after update_tracer_options finish */
6379         if (!tracer_options_updated)
6380                 return;
6381
6382         create_trace_option_files(tr, t);
6383 }
6384
6385 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6386 {
6387         struct tracer *t;
6388 #ifdef CONFIG_TRACER_MAX_TRACE
6389         bool had_max_tr;
6390 #endif
6391         int ret = 0;
6392
6393         mutex_lock(&trace_types_lock);
6394
6395         if (!ring_buffer_expanded) {
6396                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6397                                                 RING_BUFFER_ALL_CPUS);
6398                 if (ret < 0)
6399                         goto out;
6400                 ret = 0;
6401         }
6402
6403         for (t = trace_types; t; t = t->next) {
6404                 if (strcmp(t->name, buf) == 0)
6405                         break;
6406         }
6407         if (!t) {
6408                 ret = -EINVAL;
6409                 goto out;
6410         }
6411         if (t == tr->current_trace)
6412                 goto out;
6413
6414 #ifdef CONFIG_TRACER_SNAPSHOT
6415         if (t->use_max_tr) {
6416                 local_irq_disable();
6417                 arch_spin_lock(&tr->max_lock);
6418                 if (tr->cond_snapshot)
6419                         ret = -EBUSY;
6420                 arch_spin_unlock(&tr->max_lock);
6421                 local_irq_enable();
6422                 if (ret)
6423                         goto out;
6424         }
6425 #endif
6426         /* Some tracers won't work on kernel command line */
6427         if (system_state < SYSTEM_RUNNING && t->noboot) {
6428                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6429                         t->name);
6430                 goto out;
6431         }
6432
6433         /* Some tracers are only allowed for the top level buffer */
6434         if (!trace_ok_for_array(t, tr)) {
6435                 ret = -EINVAL;
6436                 goto out;
6437         }
6438
6439         /* If trace pipe files are being read, we can't change the tracer */
6440         if (tr->trace_ref) {
6441                 ret = -EBUSY;
6442                 goto out;
6443         }
6444
6445         trace_branch_disable();
6446
6447         tr->current_trace->enabled--;
6448
6449         if (tr->current_trace->reset)
6450                 tr->current_trace->reset(tr);
6451
6452 #ifdef CONFIG_TRACER_MAX_TRACE
6453         had_max_tr = tr->current_trace->use_max_tr;
6454
6455         /* Current trace needs to be nop_trace before synchronize_rcu */
6456         tr->current_trace = &nop_trace;
6457
6458         if (had_max_tr && !t->use_max_tr) {
6459                 /*
6460                  * We need to make sure that the update_max_tr sees that
6461                  * current_trace changed to nop_trace to keep it from
6462                  * swapping the buffers after we resize it.
6463                  * The update_max_tr is called from interrupts disabled
6464                  * so a synchronized_sched() is sufficient.
6465                  */
6466                 synchronize_rcu();
6467                 free_snapshot(tr);
6468         }
6469
6470         if (t->use_max_tr && !tr->allocated_snapshot) {
6471                 ret = tracing_alloc_snapshot_instance(tr);
6472                 if (ret < 0)
6473                         goto out;
6474         }
6475 #else
6476         tr->current_trace = &nop_trace;
6477 #endif
6478
6479         if (t->init) {
6480                 ret = tracer_init(t, tr);
6481                 if (ret)
6482                         goto out;
6483         }
6484
6485         tr->current_trace = t;
6486         tr->current_trace->enabled++;
6487         trace_branch_enable(tr);
6488  out:
6489         mutex_unlock(&trace_types_lock);
6490
6491         return ret;
6492 }
6493
6494 static ssize_t
6495 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6496                         size_t cnt, loff_t *ppos)
6497 {
6498         struct trace_array *tr = filp->private_data;
6499         char buf[MAX_TRACER_SIZE+1];
6500         char *name;
6501         size_t ret;
6502         int err;
6503
6504         ret = cnt;
6505
6506         if (cnt > MAX_TRACER_SIZE)
6507                 cnt = MAX_TRACER_SIZE;
6508
6509         if (copy_from_user(buf, ubuf, cnt))
6510                 return -EFAULT;
6511
6512         buf[cnt] = 0;
6513
6514         name = strim(buf);
6515
6516         err = tracing_set_tracer(tr, name);
6517         if (err)
6518                 return err;
6519
6520         *ppos += ret;
6521
6522         return ret;
6523 }
6524
6525 static ssize_t
6526 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6527                    size_t cnt, loff_t *ppos)
6528 {
6529         char buf[64];
6530         int r;
6531
6532         r = snprintf(buf, sizeof(buf), "%ld\n",
6533                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6534         if (r > sizeof(buf))
6535                 r = sizeof(buf);
6536         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6537 }
6538
6539 static ssize_t
6540 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6541                     size_t cnt, loff_t *ppos)
6542 {
6543         unsigned long val;
6544         int ret;
6545
6546         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6547         if (ret)
6548                 return ret;
6549
6550         *ptr = val * 1000;
6551
6552         return cnt;
6553 }
6554
6555 static ssize_t
6556 tracing_thresh_read(struct file *filp, char __user *ubuf,
6557                     size_t cnt, loff_t *ppos)
6558 {
6559         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6560 }
6561
6562 static ssize_t
6563 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6564                      size_t cnt, loff_t *ppos)
6565 {
6566         struct trace_array *tr = filp->private_data;
6567         int ret;
6568
6569         mutex_lock(&trace_types_lock);
6570         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6571         if (ret < 0)
6572                 goto out;
6573
6574         if (tr->current_trace->update_thresh) {
6575                 ret = tr->current_trace->update_thresh(tr);
6576                 if (ret < 0)
6577                         goto out;
6578         }
6579
6580         ret = cnt;
6581 out:
6582         mutex_unlock(&trace_types_lock);
6583
6584         return ret;
6585 }
6586
6587 #ifdef CONFIG_TRACER_MAX_TRACE
6588
6589 static ssize_t
6590 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6591                      size_t cnt, loff_t *ppos)
6592 {
6593         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6594 }
6595
6596 static ssize_t
6597 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6598                       size_t cnt, loff_t *ppos)
6599 {
6600         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6601 }
6602
6603 #endif
6604
6605 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6606 {
6607         struct trace_array *tr = inode->i_private;
6608         struct trace_iterator *iter;
6609         int ret;
6610
6611         ret = tracing_check_open_get_tr(tr);
6612         if (ret)
6613                 return ret;
6614
6615         mutex_lock(&trace_types_lock);
6616
6617         /* create a buffer to store the information to pass to userspace */
6618         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6619         if (!iter) {
6620                 ret = -ENOMEM;
6621                 __trace_array_put(tr);
6622                 goto out;
6623         }
6624
6625         trace_seq_init(&iter->seq);
6626         iter->trace = tr->current_trace;
6627
6628         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6629                 ret = -ENOMEM;
6630                 goto fail;
6631         }
6632
6633         /* trace pipe does not show start of buffer */
6634         cpumask_setall(iter->started);
6635
6636         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6637                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6638
6639         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6640         if (trace_clocks[tr->clock_id].in_ns)
6641                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6642
6643         iter->tr = tr;
6644         iter->array_buffer = &tr->array_buffer;
6645         iter->cpu_file = tracing_get_cpu(inode);
6646         mutex_init(&iter->mutex);
6647         filp->private_data = iter;
6648
6649         if (iter->trace->pipe_open)
6650                 iter->trace->pipe_open(iter);
6651
6652         nonseekable_open(inode, filp);
6653
6654         tr->trace_ref++;
6655 out:
6656         mutex_unlock(&trace_types_lock);
6657         return ret;
6658
6659 fail:
6660         kfree(iter);
6661         __trace_array_put(tr);
6662         mutex_unlock(&trace_types_lock);
6663         return ret;
6664 }
6665
6666 static int tracing_release_pipe(struct inode *inode, struct file *file)
6667 {
6668         struct trace_iterator *iter = file->private_data;
6669         struct trace_array *tr = inode->i_private;
6670
6671         mutex_lock(&trace_types_lock);
6672
6673         tr->trace_ref--;
6674
6675         if (iter->trace->pipe_close)
6676                 iter->trace->pipe_close(iter);
6677
6678         mutex_unlock(&trace_types_lock);
6679
6680         free_cpumask_var(iter->started);
6681         kfree(iter->fmt);
6682         mutex_destroy(&iter->mutex);
6683         kfree(iter);
6684
6685         trace_array_put(tr);
6686
6687         return 0;
6688 }
6689
6690 static __poll_t
6691 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6692 {
6693         struct trace_array *tr = iter->tr;
6694
6695         /* Iterators are static, they should be filled or empty */
6696         if (trace_buffer_iter(iter, iter->cpu_file))
6697                 return EPOLLIN | EPOLLRDNORM;
6698
6699         if (tr->trace_flags & TRACE_ITER_BLOCK)
6700                 /*
6701                  * Always select as readable when in blocking mode
6702                  */
6703                 return EPOLLIN | EPOLLRDNORM;
6704         else
6705                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6706                                              filp, poll_table, iter->tr->buffer_percent);
6707 }
6708
6709 static __poll_t
6710 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6711 {
6712         struct trace_iterator *iter = filp->private_data;
6713
6714         return trace_poll(iter, filp, poll_table);
6715 }
6716
6717 /* Must be called with iter->mutex held. */
6718 static int tracing_wait_pipe(struct file *filp)
6719 {
6720         struct trace_iterator *iter = filp->private_data;
6721         int ret;
6722
6723         while (trace_empty(iter)) {
6724
6725                 if ((filp->f_flags & O_NONBLOCK)) {
6726                         return -EAGAIN;
6727                 }
6728
6729                 /*
6730                  * We block until we read something and tracing is disabled.
6731                  * We still block if tracing is disabled, but we have never
6732                  * read anything. This allows a user to cat this file, and
6733                  * then enable tracing. But after we have read something,
6734                  * we give an EOF when tracing is again disabled.
6735                  *
6736                  * iter->pos will be 0 if we haven't read anything.
6737                  */
6738                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6739                         break;
6740
6741                 mutex_unlock(&iter->mutex);
6742
6743                 ret = wait_on_pipe(iter, 0);
6744
6745                 mutex_lock(&iter->mutex);
6746
6747                 if (ret)
6748                         return ret;
6749         }
6750
6751         return 1;
6752 }
6753
6754 /*
6755  * Consumer reader.
6756  */
6757 static ssize_t
6758 tracing_read_pipe(struct file *filp, char __user *ubuf,
6759                   size_t cnt, loff_t *ppos)
6760 {
6761         struct trace_iterator *iter = filp->private_data;
6762         ssize_t sret;
6763
6764         /*
6765          * Avoid more than one consumer on a single file descriptor
6766          * This is just a matter of traces coherency, the ring buffer itself
6767          * is protected.
6768          */
6769         mutex_lock(&iter->mutex);
6770
6771         /* return any leftover data */
6772         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6773         if (sret != -EBUSY)
6774                 goto out;
6775
6776         trace_seq_init(&iter->seq);
6777
6778         if (iter->trace->read) {
6779                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6780                 if (sret)
6781                         goto out;
6782         }
6783
6784 waitagain:
6785         sret = tracing_wait_pipe(filp);
6786         if (sret <= 0)
6787                 goto out;
6788
6789         /* stop when tracing is finished */
6790         if (trace_empty(iter)) {
6791                 sret = 0;
6792                 goto out;
6793         }
6794
6795         if (cnt >= PAGE_SIZE)
6796                 cnt = PAGE_SIZE - 1;
6797
6798         /* reset all but tr, trace, and overruns */
6799         trace_iterator_reset(iter);
6800         cpumask_clear(iter->started);
6801         trace_seq_init(&iter->seq);
6802
6803         trace_event_read_lock();
6804         trace_access_lock(iter->cpu_file);
6805         while (trace_find_next_entry_inc(iter) != NULL) {
6806                 enum print_line_t ret;
6807                 int save_len = iter->seq.seq.len;
6808
6809                 ret = print_trace_line(iter);
6810                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6811                         /*
6812                          * If one print_trace_line() fills entire trace_seq in one shot,
6813                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6814                          * In this case, we need to consume it, otherwise, loop will peek
6815                          * this event next time, resulting in an infinite loop.
6816                          */
6817                         if (save_len == 0) {
6818                                 iter->seq.full = 0;
6819                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6820                                 trace_consume(iter);
6821                                 break;
6822                         }
6823
6824                         /* In other cases, don't print partial lines */
6825                         iter->seq.seq.len = save_len;
6826                         break;
6827                 }
6828                 if (ret != TRACE_TYPE_NO_CONSUME)
6829                         trace_consume(iter);
6830
6831                 if (trace_seq_used(&iter->seq) >= cnt)
6832                         break;
6833
6834                 /*
6835                  * Setting the full flag means we reached the trace_seq buffer
6836                  * size and we should leave by partial output condition above.
6837                  * One of the trace_seq_* functions is not used properly.
6838                  */
6839                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6840                           iter->ent->type);
6841         }
6842         trace_access_unlock(iter->cpu_file);
6843         trace_event_read_unlock();
6844
6845         /* Now copy what we have to the user */
6846         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6847         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6848                 trace_seq_init(&iter->seq);
6849
6850         /*
6851          * If there was nothing to send to user, in spite of consuming trace
6852          * entries, go back to wait for more entries.
6853          */
6854         if (sret == -EBUSY)
6855                 goto waitagain;
6856
6857 out:
6858         mutex_unlock(&iter->mutex);
6859
6860         return sret;
6861 }
6862
6863 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6864                                      unsigned int idx)
6865 {
6866         __free_page(spd->pages[idx]);
6867 }
6868
6869 static size_t
6870 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6871 {
6872         size_t count;
6873         int save_len;
6874         int ret;
6875
6876         /* Seq buffer is page-sized, exactly what we need. */
6877         for (;;) {
6878                 save_len = iter->seq.seq.len;
6879                 ret = print_trace_line(iter);
6880
6881                 if (trace_seq_has_overflowed(&iter->seq)) {
6882                         iter->seq.seq.len = save_len;
6883                         break;
6884                 }
6885
6886                 /*
6887                  * This should not be hit, because it should only
6888                  * be set if the iter->seq overflowed. But check it
6889                  * anyway to be safe.
6890                  */
6891                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6892                         iter->seq.seq.len = save_len;
6893                         break;
6894                 }
6895
6896                 count = trace_seq_used(&iter->seq) - save_len;
6897                 if (rem < count) {
6898                         rem = 0;
6899                         iter->seq.seq.len = save_len;
6900                         break;
6901                 }
6902
6903                 if (ret != TRACE_TYPE_NO_CONSUME)
6904                         trace_consume(iter);
6905                 rem -= count;
6906                 if (!trace_find_next_entry_inc(iter))   {
6907                         rem = 0;
6908                         iter->ent = NULL;
6909                         break;
6910                 }
6911         }
6912
6913         return rem;
6914 }
6915
6916 static ssize_t tracing_splice_read_pipe(struct file *filp,
6917                                         loff_t *ppos,
6918                                         struct pipe_inode_info *pipe,
6919                                         size_t len,
6920                                         unsigned int flags)
6921 {
6922         struct page *pages_def[PIPE_DEF_BUFFERS];
6923         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6924         struct trace_iterator *iter = filp->private_data;
6925         struct splice_pipe_desc spd = {
6926                 .pages          = pages_def,
6927                 .partial        = partial_def,
6928                 .nr_pages       = 0, /* This gets updated below. */
6929                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6930                 .ops            = &default_pipe_buf_ops,
6931                 .spd_release    = tracing_spd_release_pipe,
6932         };
6933         ssize_t ret;
6934         size_t rem;
6935         unsigned int i;
6936
6937         if (splice_grow_spd(pipe, &spd))
6938                 return -ENOMEM;
6939
6940         mutex_lock(&iter->mutex);
6941
6942         if (iter->trace->splice_read) {
6943                 ret = iter->trace->splice_read(iter, filp,
6944                                                ppos, pipe, len, flags);
6945                 if (ret)
6946                         goto out_err;
6947         }
6948
6949         ret = tracing_wait_pipe(filp);
6950         if (ret <= 0)
6951                 goto out_err;
6952
6953         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6954                 ret = -EFAULT;
6955                 goto out_err;
6956         }
6957
6958         trace_event_read_lock();
6959         trace_access_lock(iter->cpu_file);
6960
6961         /* Fill as many pages as possible. */
6962         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6963                 spd.pages[i] = alloc_page(GFP_KERNEL);
6964                 if (!spd.pages[i])
6965                         break;
6966
6967                 rem = tracing_fill_pipe_page(rem, iter);
6968
6969                 /* Copy the data into the page, so we can start over. */
6970                 ret = trace_seq_to_buffer(&iter->seq,
6971                                           page_address(spd.pages[i]),
6972                                           trace_seq_used(&iter->seq));
6973                 if (ret < 0) {
6974                         __free_page(spd.pages[i]);
6975                         break;
6976                 }
6977                 spd.partial[i].offset = 0;
6978                 spd.partial[i].len = trace_seq_used(&iter->seq);
6979
6980                 trace_seq_init(&iter->seq);
6981         }
6982
6983         trace_access_unlock(iter->cpu_file);
6984         trace_event_read_unlock();
6985         mutex_unlock(&iter->mutex);
6986
6987         spd.nr_pages = i;
6988
6989         if (i)
6990                 ret = splice_to_pipe(pipe, &spd);
6991         else
6992                 ret = 0;
6993 out:
6994         splice_shrink_spd(&spd);
6995         return ret;
6996
6997 out_err:
6998         mutex_unlock(&iter->mutex);
6999         goto out;
7000 }
7001
7002 static ssize_t
7003 tracing_entries_read(struct file *filp, char __user *ubuf,
7004                      size_t cnt, loff_t *ppos)
7005 {
7006         struct inode *inode = file_inode(filp);
7007         struct trace_array *tr = inode->i_private;
7008         int cpu = tracing_get_cpu(inode);
7009         char buf[64];
7010         int r = 0;
7011         ssize_t ret;
7012
7013         mutex_lock(&trace_types_lock);
7014
7015         if (cpu == RING_BUFFER_ALL_CPUS) {
7016                 int cpu, buf_size_same;
7017                 unsigned long size;
7018
7019                 size = 0;
7020                 buf_size_same = 1;
7021                 /* check if all cpu sizes are same */
7022                 for_each_tracing_cpu(cpu) {
7023                         /* fill in the size from first enabled cpu */
7024                         if (size == 0)
7025                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7026                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7027                                 buf_size_same = 0;
7028                                 break;
7029                         }
7030                 }
7031
7032                 if (buf_size_same) {
7033                         if (!ring_buffer_expanded)
7034                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7035                                             size >> 10,
7036                                             trace_buf_size >> 10);
7037                         else
7038                                 r = sprintf(buf, "%lu\n", size >> 10);
7039                 } else
7040                         r = sprintf(buf, "X\n");
7041         } else
7042                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7043
7044         mutex_unlock(&trace_types_lock);
7045
7046         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7047         return ret;
7048 }
7049
7050 static ssize_t
7051 tracing_entries_write(struct file *filp, const char __user *ubuf,
7052                       size_t cnt, loff_t *ppos)
7053 {
7054         struct inode *inode = file_inode(filp);
7055         struct trace_array *tr = inode->i_private;
7056         unsigned long val;
7057         int ret;
7058
7059         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7060         if (ret)
7061                 return ret;
7062
7063         /* must have at least 1 entry */
7064         if (!val)
7065                 return -EINVAL;
7066
7067         /* value is in KB */
7068         val <<= 10;
7069         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7070         if (ret < 0)
7071                 return ret;
7072
7073         *ppos += cnt;
7074
7075         return cnt;
7076 }
7077
7078 static ssize_t
7079 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7080                                 size_t cnt, loff_t *ppos)
7081 {
7082         struct trace_array *tr = filp->private_data;
7083         char buf[64];
7084         int r, cpu;
7085         unsigned long size = 0, expanded_size = 0;
7086
7087         mutex_lock(&trace_types_lock);
7088         for_each_tracing_cpu(cpu) {
7089                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7090                 if (!ring_buffer_expanded)
7091                         expanded_size += trace_buf_size >> 10;
7092         }
7093         if (ring_buffer_expanded)
7094                 r = sprintf(buf, "%lu\n", size);
7095         else
7096                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7097         mutex_unlock(&trace_types_lock);
7098
7099         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7100 }
7101
7102 static ssize_t
7103 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7104                           size_t cnt, loff_t *ppos)
7105 {
7106         /*
7107          * There is no need to read what the user has written, this function
7108          * is just to make sure that there is no error when "echo" is used
7109          */
7110
7111         *ppos += cnt;
7112
7113         return cnt;
7114 }
7115
7116 static int
7117 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7118 {
7119         struct trace_array *tr = inode->i_private;
7120
7121         /* disable tracing ? */
7122         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7123                 tracer_tracing_off(tr);
7124         /* resize the ring buffer to 0 */
7125         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7126
7127         trace_array_put(tr);
7128
7129         return 0;
7130 }
7131
7132 static ssize_t
7133 tracing_mark_write(struct file *filp, const char __user *ubuf,
7134                                         size_t cnt, loff_t *fpos)
7135 {
7136         struct trace_array *tr = filp->private_data;
7137         struct ring_buffer_event *event;
7138         enum event_trigger_type tt = ETT_NONE;
7139         struct trace_buffer *buffer;
7140         struct print_entry *entry;
7141         ssize_t written;
7142         int size;
7143         int len;
7144
7145 /* Used in tracing_mark_raw_write() as well */
7146 #define FAULTED_STR "<faulted>"
7147 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7148
7149         if (tracing_disabled)
7150                 return -EINVAL;
7151
7152         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7153                 return -EINVAL;
7154
7155         if (cnt > TRACE_BUF_SIZE)
7156                 cnt = TRACE_BUF_SIZE;
7157
7158         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7159
7160         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7161
7162         /* If less than "<faulted>", then make sure we can still add that */
7163         if (cnt < FAULTED_SIZE)
7164                 size += FAULTED_SIZE - cnt;
7165
7166         buffer = tr->array_buffer.buffer;
7167         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7168                                             tracing_gen_ctx());
7169         if (unlikely(!event))
7170                 /* Ring buffer disabled, return as if not open for write */
7171                 return -EBADF;
7172
7173         entry = ring_buffer_event_data(event);
7174         entry->ip = _THIS_IP_;
7175
7176         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7177         if (len) {
7178                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7179                 cnt = FAULTED_SIZE;
7180                 written = -EFAULT;
7181         } else
7182                 written = cnt;
7183
7184         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7185                 /* do not add \n before testing triggers, but add \0 */
7186                 entry->buf[cnt] = '\0';
7187                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7188         }
7189
7190         if (entry->buf[cnt - 1] != '\n') {
7191                 entry->buf[cnt] = '\n';
7192                 entry->buf[cnt + 1] = '\0';
7193         } else
7194                 entry->buf[cnt] = '\0';
7195
7196         if (static_branch_unlikely(&trace_marker_exports_enabled))
7197                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7198         __buffer_unlock_commit(buffer, event);
7199
7200         if (tt)
7201                 event_triggers_post_call(tr->trace_marker_file, tt);
7202
7203         return written;
7204 }
7205
7206 /* Limit it for now to 3K (including tag) */
7207 #define RAW_DATA_MAX_SIZE (1024*3)
7208
7209 static ssize_t
7210 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7211                                         size_t cnt, loff_t *fpos)
7212 {
7213         struct trace_array *tr = filp->private_data;
7214         struct ring_buffer_event *event;
7215         struct trace_buffer *buffer;
7216         struct raw_data_entry *entry;
7217         ssize_t written;
7218         int size;
7219         int len;
7220
7221 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7222
7223         if (tracing_disabled)
7224                 return -EINVAL;
7225
7226         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7227                 return -EINVAL;
7228
7229         /* The marker must at least have a tag id */
7230         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7231                 return -EINVAL;
7232
7233         if (cnt > TRACE_BUF_SIZE)
7234                 cnt = TRACE_BUF_SIZE;
7235
7236         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7237
7238         size = sizeof(*entry) + cnt;
7239         if (cnt < FAULT_SIZE_ID)
7240                 size += FAULT_SIZE_ID - cnt;
7241
7242         buffer = tr->array_buffer.buffer;
7243         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7244                                             tracing_gen_ctx());
7245         if (!event)
7246                 /* Ring buffer disabled, return as if not open for write */
7247                 return -EBADF;
7248
7249         entry = ring_buffer_event_data(event);
7250
7251         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7252         if (len) {
7253                 entry->id = -1;
7254                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7255                 written = -EFAULT;
7256         } else
7257                 written = cnt;
7258
7259         __buffer_unlock_commit(buffer, event);
7260
7261         return written;
7262 }
7263
7264 static int tracing_clock_show(struct seq_file *m, void *v)
7265 {
7266         struct trace_array *tr = m->private;
7267         int i;
7268
7269         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7270                 seq_printf(m,
7271                         "%s%s%s%s", i ? " " : "",
7272                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7273                         i == tr->clock_id ? "]" : "");
7274         seq_putc(m, '\n');
7275
7276         return 0;
7277 }
7278
7279 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7280 {
7281         int i;
7282
7283         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7284                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7285                         break;
7286         }
7287         if (i == ARRAY_SIZE(trace_clocks))
7288                 return -EINVAL;
7289
7290         mutex_lock(&trace_types_lock);
7291
7292         tr->clock_id = i;
7293
7294         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7295
7296         /*
7297          * New clock may not be consistent with the previous clock.
7298          * Reset the buffer so that it doesn't have incomparable timestamps.
7299          */
7300         tracing_reset_online_cpus(&tr->array_buffer);
7301
7302 #ifdef CONFIG_TRACER_MAX_TRACE
7303         if (tr->max_buffer.buffer)
7304                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7305         tracing_reset_online_cpus(&tr->max_buffer);
7306 #endif
7307
7308         mutex_unlock(&trace_types_lock);
7309
7310         return 0;
7311 }
7312
7313 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7314                                    size_t cnt, loff_t *fpos)
7315 {
7316         struct seq_file *m = filp->private_data;
7317         struct trace_array *tr = m->private;
7318         char buf[64];
7319         const char *clockstr;
7320         int ret;
7321
7322         if (cnt >= sizeof(buf))
7323                 return -EINVAL;
7324
7325         if (copy_from_user(buf, ubuf, cnt))
7326                 return -EFAULT;
7327
7328         buf[cnt] = 0;
7329
7330         clockstr = strstrip(buf);
7331
7332         ret = tracing_set_clock(tr, clockstr);
7333         if (ret)
7334                 return ret;
7335
7336         *fpos += cnt;
7337
7338         return cnt;
7339 }
7340
7341 static int tracing_clock_open(struct inode *inode, struct file *file)
7342 {
7343         struct trace_array *tr = inode->i_private;
7344         int ret;
7345
7346         ret = tracing_check_open_get_tr(tr);
7347         if (ret)
7348                 return ret;
7349
7350         ret = single_open(file, tracing_clock_show, inode->i_private);
7351         if (ret < 0)
7352                 trace_array_put(tr);
7353
7354         return ret;
7355 }
7356
7357 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7358 {
7359         struct trace_array *tr = m->private;
7360
7361         mutex_lock(&trace_types_lock);
7362
7363         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7364                 seq_puts(m, "delta [absolute]\n");
7365         else
7366                 seq_puts(m, "[delta] absolute\n");
7367
7368         mutex_unlock(&trace_types_lock);
7369
7370         return 0;
7371 }
7372
7373 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7374 {
7375         struct trace_array *tr = inode->i_private;
7376         int ret;
7377
7378         ret = tracing_check_open_get_tr(tr);
7379         if (ret)
7380                 return ret;
7381
7382         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7383         if (ret < 0)
7384                 trace_array_put(tr);
7385
7386         return ret;
7387 }
7388
7389 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7390 {
7391         if (rbe == this_cpu_read(trace_buffered_event))
7392                 return ring_buffer_time_stamp(buffer);
7393
7394         return ring_buffer_event_time_stamp(buffer, rbe);
7395 }
7396
7397 /*
7398  * Set or disable using the per CPU trace_buffer_event when possible.
7399  */
7400 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7401 {
7402         int ret = 0;
7403
7404         mutex_lock(&trace_types_lock);
7405
7406         if (set && tr->no_filter_buffering_ref++)
7407                 goto out;
7408
7409         if (!set) {
7410                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7411                         ret = -EINVAL;
7412                         goto out;
7413                 }
7414
7415                 --tr->no_filter_buffering_ref;
7416         }
7417  out:
7418         mutex_unlock(&trace_types_lock);
7419
7420         return ret;
7421 }
7422
7423 struct ftrace_buffer_info {
7424         struct trace_iterator   iter;
7425         void                    *spare;
7426         unsigned int            spare_cpu;
7427         unsigned int            read;
7428 };
7429
7430 #ifdef CONFIG_TRACER_SNAPSHOT
7431 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7432 {
7433         struct trace_array *tr = inode->i_private;
7434         struct trace_iterator *iter;
7435         struct seq_file *m;
7436         int ret;
7437
7438         ret = tracing_check_open_get_tr(tr);
7439         if (ret)
7440                 return ret;
7441
7442         if (file->f_mode & FMODE_READ) {
7443                 iter = __tracing_open(inode, file, true);
7444                 if (IS_ERR(iter))
7445                         ret = PTR_ERR(iter);
7446         } else {
7447                 /* Writes still need the seq_file to hold the private data */
7448                 ret = -ENOMEM;
7449                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7450                 if (!m)
7451                         goto out;
7452                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7453                 if (!iter) {
7454                         kfree(m);
7455                         goto out;
7456                 }
7457                 ret = 0;
7458
7459                 iter->tr = tr;
7460                 iter->array_buffer = &tr->max_buffer;
7461                 iter->cpu_file = tracing_get_cpu(inode);
7462                 m->private = iter;
7463                 file->private_data = m;
7464         }
7465 out:
7466         if (ret < 0)
7467                 trace_array_put(tr);
7468
7469         return ret;
7470 }
7471
7472 static ssize_t
7473 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7474                        loff_t *ppos)
7475 {
7476         struct seq_file *m = filp->private_data;
7477         struct trace_iterator *iter = m->private;
7478         struct trace_array *tr = iter->tr;
7479         unsigned long val;
7480         int ret;
7481
7482         ret = tracing_update_buffers();
7483         if (ret < 0)
7484                 return ret;
7485
7486         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7487         if (ret)
7488                 return ret;
7489
7490         mutex_lock(&trace_types_lock);
7491
7492         if (tr->current_trace->use_max_tr) {
7493                 ret = -EBUSY;
7494                 goto out;
7495         }
7496
7497         local_irq_disable();
7498         arch_spin_lock(&tr->max_lock);
7499         if (tr->cond_snapshot)
7500                 ret = -EBUSY;
7501         arch_spin_unlock(&tr->max_lock);
7502         local_irq_enable();
7503         if (ret)
7504                 goto out;
7505
7506         switch (val) {
7507         case 0:
7508                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7509                         ret = -EINVAL;
7510                         break;
7511                 }
7512                 if (tr->allocated_snapshot)
7513                         free_snapshot(tr);
7514                 break;
7515         case 1:
7516 /* Only allow per-cpu swap if the ring buffer supports it */
7517 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7518                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7519                         ret = -EINVAL;
7520                         break;
7521                 }
7522 #endif
7523                 if (tr->allocated_snapshot)
7524                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7525                                         &tr->array_buffer, iter->cpu_file);
7526                 else
7527                         ret = tracing_alloc_snapshot_instance(tr);
7528                 if (ret < 0)
7529                         break;
7530                 local_irq_disable();
7531                 /* Now, we're going to swap */
7532                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7533                         update_max_tr(tr, current, smp_processor_id(), NULL);
7534                 else
7535                         update_max_tr_single(tr, current, iter->cpu_file);
7536                 local_irq_enable();
7537                 break;
7538         default:
7539                 if (tr->allocated_snapshot) {
7540                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7541                                 tracing_reset_online_cpus(&tr->max_buffer);
7542                         else
7543                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7544                 }
7545                 break;
7546         }
7547
7548         if (ret >= 0) {
7549                 *ppos += cnt;
7550                 ret = cnt;
7551         }
7552 out:
7553         mutex_unlock(&trace_types_lock);
7554         return ret;
7555 }
7556
7557 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7558 {
7559         struct seq_file *m = file->private_data;
7560         int ret;
7561
7562         ret = tracing_release(inode, file);
7563
7564         if (file->f_mode & FMODE_READ)
7565                 return ret;
7566
7567         /* If write only, the seq_file is just a stub */
7568         if (m)
7569                 kfree(m->private);
7570         kfree(m);
7571
7572         return 0;
7573 }
7574
7575 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7576 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7577                                     size_t count, loff_t *ppos);
7578 static int tracing_buffers_release(struct inode *inode, struct file *file);
7579 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7580                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7581
7582 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7583 {
7584         struct ftrace_buffer_info *info;
7585         int ret;
7586
7587         /* The following checks for tracefs lockdown */
7588         ret = tracing_buffers_open(inode, filp);
7589         if (ret < 0)
7590                 return ret;
7591
7592         info = filp->private_data;
7593
7594         if (info->iter.trace->use_max_tr) {
7595                 tracing_buffers_release(inode, filp);
7596                 return -EBUSY;
7597         }
7598
7599         info->iter.snapshot = true;
7600         info->iter.array_buffer = &info->iter.tr->max_buffer;
7601
7602         return ret;
7603 }
7604
7605 #endif /* CONFIG_TRACER_SNAPSHOT */
7606
7607
7608 static const struct file_operations tracing_thresh_fops = {
7609         .open           = tracing_open_generic,
7610         .read           = tracing_thresh_read,
7611         .write          = tracing_thresh_write,
7612         .llseek         = generic_file_llseek,
7613 };
7614
7615 #ifdef CONFIG_TRACER_MAX_TRACE
7616 static const struct file_operations tracing_max_lat_fops = {
7617         .open           = tracing_open_generic,
7618         .read           = tracing_max_lat_read,
7619         .write          = tracing_max_lat_write,
7620         .llseek         = generic_file_llseek,
7621 };
7622 #endif
7623
7624 static const struct file_operations set_tracer_fops = {
7625         .open           = tracing_open_generic,
7626         .read           = tracing_set_trace_read,
7627         .write          = tracing_set_trace_write,
7628         .llseek         = generic_file_llseek,
7629 };
7630
7631 static const struct file_operations tracing_pipe_fops = {
7632         .open           = tracing_open_pipe,
7633         .poll           = tracing_poll_pipe,
7634         .read           = tracing_read_pipe,
7635         .splice_read    = tracing_splice_read_pipe,
7636         .release        = tracing_release_pipe,
7637         .llseek         = no_llseek,
7638 };
7639
7640 static const struct file_operations tracing_entries_fops = {
7641         .open           = tracing_open_generic_tr,
7642         .read           = tracing_entries_read,
7643         .write          = tracing_entries_write,
7644         .llseek         = generic_file_llseek,
7645         .release        = tracing_release_generic_tr,
7646 };
7647
7648 static const struct file_operations tracing_total_entries_fops = {
7649         .open           = tracing_open_generic_tr,
7650         .read           = tracing_total_entries_read,
7651         .llseek         = generic_file_llseek,
7652         .release        = tracing_release_generic_tr,
7653 };
7654
7655 static const struct file_operations tracing_free_buffer_fops = {
7656         .open           = tracing_open_generic_tr,
7657         .write          = tracing_free_buffer_write,
7658         .release        = tracing_free_buffer_release,
7659 };
7660
7661 static const struct file_operations tracing_mark_fops = {
7662         .open           = tracing_mark_open,
7663         .write          = tracing_mark_write,
7664         .release        = tracing_release_generic_tr,
7665 };
7666
7667 static const struct file_operations tracing_mark_raw_fops = {
7668         .open           = tracing_mark_open,
7669         .write          = tracing_mark_raw_write,
7670         .release        = tracing_release_generic_tr,
7671 };
7672
7673 static const struct file_operations trace_clock_fops = {
7674         .open           = tracing_clock_open,
7675         .read           = seq_read,
7676         .llseek         = seq_lseek,
7677         .release        = tracing_single_release_tr,
7678         .write          = tracing_clock_write,
7679 };
7680
7681 static const struct file_operations trace_time_stamp_mode_fops = {
7682         .open           = tracing_time_stamp_mode_open,
7683         .read           = seq_read,
7684         .llseek         = seq_lseek,
7685         .release        = tracing_single_release_tr,
7686 };
7687
7688 #ifdef CONFIG_TRACER_SNAPSHOT
7689 static const struct file_operations snapshot_fops = {
7690         .open           = tracing_snapshot_open,
7691         .read           = seq_read,
7692         .write          = tracing_snapshot_write,
7693         .llseek         = tracing_lseek,
7694         .release        = tracing_snapshot_release,
7695 };
7696
7697 static const struct file_operations snapshot_raw_fops = {
7698         .open           = snapshot_raw_open,
7699         .read           = tracing_buffers_read,
7700         .release        = tracing_buffers_release,
7701         .splice_read    = tracing_buffers_splice_read,
7702         .llseek         = no_llseek,
7703 };
7704
7705 #endif /* CONFIG_TRACER_SNAPSHOT */
7706
7707 /*
7708  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7709  * @filp: The active open file structure
7710  * @ubuf: The userspace provided buffer to read value into
7711  * @cnt: The maximum number of bytes to read
7712  * @ppos: The current "file" position
7713  *
7714  * This function implements the write interface for a struct trace_min_max_param.
7715  * The filp->private_data must point to a trace_min_max_param structure that
7716  * defines where to write the value, the min and the max acceptable values,
7717  * and a lock to protect the write.
7718  */
7719 static ssize_t
7720 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7721 {
7722         struct trace_min_max_param *param = filp->private_data;
7723         u64 val;
7724         int err;
7725
7726         if (!param)
7727                 return -EFAULT;
7728
7729         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7730         if (err)
7731                 return err;
7732
7733         if (param->lock)
7734                 mutex_lock(param->lock);
7735
7736         if (param->min && val < *param->min)
7737                 err = -EINVAL;
7738
7739         if (param->max && val > *param->max)
7740                 err = -EINVAL;
7741
7742         if (!err)
7743                 *param->val = val;
7744
7745         if (param->lock)
7746                 mutex_unlock(param->lock);
7747
7748         if (err)
7749                 return err;
7750
7751         return cnt;
7752 }
7753
7754 /*
7755  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7756  * @filp: The active open file structure
7757  * @ubuf: The userspace provided buffer to read value into
7758  * @cnt: The maximum number of bytes to read
7759  * @ppos: The current "file" position
7760  *
7761  * This function implements the read interface for a struct trace_min_max_param.
7762  * The filp->private_data must point to a trace_min_max_param struct with valid
7763  * data.
7764  */
7765 static ssize_t
7766 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7767 {
7768         struct trace_min_max_param *param = filp->private_data;
7769         char buf[U64_STR_SIZE];
7770         int len;
7771         u64 val;
7772
7773         if (!param)
7774                 return -EFAULT;
7775
7776         val = *param->val;
7777
7778         if (cnt > sizeof(buf))
7779                 cnt = sizeof(buf);
7780
7781         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7782
7783         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7784 }
7785
7786 const struct file_operations trace_min_max_fops = {
7787         .open           = tracing_open_generic,
7788         .read           = trace_min_max_read,
7789         .write          = trace_min_max_write,
7790 };
7791
7792 #define TRACING_LOG_ERRS_MAX    8
7793 #define TRACING_LOG_LOC_MAX     128
7794
7795 #define CMD_PREFIX "  Command: "
7796
7797 struct err_info {
7798         const char      **errs; /* ptr to loc-specific array of err strings */
7799         u8              type;   /* index into errs -> specific err string */
7800         u16             pos;    /* caret position */
7801         u64             ts;
7802 };
7803
7804 struct tracing_log_err {
7805         struct list_head        list;
7806         struct err_info         info;
7807         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7808         char                    *cmd;                     /* what caused err */
7809 };
7810
7811 static DEFINE_MUTEX(tracing_err_log_lock);
7812
7813 static struct tracing_log_err *alloc_tracing_log_err(int len)
7814 {
7815         struct tracing_log_err *err;
7816
7817         err = kzalloc(sizeof(*err), GFP_KERNEL);
7818         if (!err)
7819                 return ERR_PTR(-ENOMEM);
7820
7821         err->cmd = kzalloc(len, GFP_KERNEL);
7822         if (!err->cmd) {
7823                 kfree(err);
7824                 return ERR_PTR(-ENOMEM);
7825         }
7826
7827         return err;
7828 }
7829
7830 static void free_tracing_log_err(struct tracing_log_err *err)
7831 {
7832         kfree(err->cmd);
7833         kfree(err);
7834 }
7835
7836 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7837                                                    int len)
7838 {
7839         struct tracing_log_err *err;
7840         char *cmd;
7841
7842         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7843                 err = alloc_tracing_log_err(len);
7844                 if (PTR_ERR(err) != -ENOMEM)
7845                         tr->n_err_log_entries++;
7846
7847                 return err;
7848         }
7849         cmd = kzalloc(len, GFP_KERNEL);
7850         if (!cmd)
7851                 return ERR_PTR(-ENOMEM);
7852         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7853         kfree(err->cmd);
7854         err->cmd = cmd;
7855         list_del(&err->list);
7856
7857         return err;
7858 }
7859
7860 /**
7861  * err_pos - find the position of a string within a command for error careting
7862  * @cmd: The tracing command that caused the error
7863  * @str: The string to position the caret at within @cmd
7864  *
7865  * Finds the position of the first occurrence of @str within @cmd.  The
7866  * return value can be passed to tracing_log_err() for caret placement
7867  * within @cmd.
7868  *
7869  * Returns the index within @cmd of the first occurrence of @str or 0
7870  * if @str was not found.
7871  */
7872 unsigned int err_pos(char *cmd, const char *str)
7873 {
7874         char *found;
7875
7876         if (WARN_ON(!strlen(cmd)))
7877                 return 0;
7878
7879         found = strstr(cmd, str);
7880         if (found)
7881                 return found - cmd;
7882
7883         return 0;
7884 }
7885
7886 /**
7887  * tracing_log_err - write an error to the tracing error log
7888  * @tr: The associated trace array for the error (NULL for top level array)
7889  * @loc: A string describing where the error occurred
7890  * @cmd: The tracing command that caused the error
7891  * @errs: The array of loc-specific static error strings
7892  * @type: The index into errs[], which produces the specific static err string
7893  * @pos: The position the caret should be placed in the cmd
7894  *
7895  * Writes an error into tracing/error_log of the form:
7896  *
7897  * <loc>: error: <text>
7898  *   Command: <cmd>
7899  *              ^
7900  *
7901  * tracing/error_log is a small log file containing the last
7902  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7903  * unless there has been a tracing error, and the error log can be
7904  * cleared and have its memory freed by writing the empty string in
7905  * truncation mode to it i.e. echo > tracing/error_log.
7906  *
7907  * NOTE: the @errs array along with the @type param are used to
7908  * produce a static error string - this string is not copied and saved
7909  * when the error is logged - only a pointer to it is saved.  See
7910  * existing callers for examples of how static strings are typically
7911  * defined for use with tracing_log_err().
7912  */
7913 void tracing_log_err(struct trace_array *tr,
7914                      const char *loc, const char *cmd,
7915                      const char **errs, u8 type, u16 pos)
7916 {
7917         struct tracing_log_err *err;
7918         int len = 0;
7919
7920         if (!tr)
7921                 tr = &global_trace;
7922
7923         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7924
7925         mutex_lock(&tracing_err_log_lock);
7926         err = get_tracing_log_err(tr, len);
7927         if (PTR_ERR(err) == -ENOMEM) {
7928                 mutex_unlock(&tracing_err_log_lock);
7929                 return;
7930         }
7931
7932         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7933         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7934
7935         err->info.errs = errs;
7936         err->info.type = type;
7937         err->info.pos = pos;
7938         err->info.ts = local_clock();
7939
7940         list_add_tail(&err->list, &tr->err_log);
7941         mutex_unlock(&tracing_err_log_lock);
7942 }
7943
7944 static void clear_tracing_err_log(struct trace_array *tr)
7945 {
7946         struct tracing_log_err *err, *next;
7947
7948         mutex_lock(&tracing_err_log_lock);
7949         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7950                 list_del(&err->list);
7951                 free_tracing_log_err(err);
7952         }
7953
7954         tr->n_err_log_entries = 0;
7955         mutex_unlock(&tracing_err_log_lock);
7956 }
7957
7958 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7959 {
7960         struct trace_array *tr = m->private;
7961
7962         mutex_lock(&tracing_err_log_lock);
7963
7964         return seq_list_start(&tr->err_log, *pos);
7965 }
7966
7967 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7968 {
7969         struct trace_array *tr = m->private;
7970
7971         return seq_list_next(v, &tr->err_log, pos);
7972 }
7973
7974 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7975 {
7976         mutex_unlock(&tracing_err_log_lock);
7977 }
7978
7979 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7980 {
7981         u16 i;
7982
7983         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7984                 seq_putc(m, ' ');
7985         for (i = 0; i < pos; i++)
7986                 seq_putc(m, ' ');
7987         seq_puts(m, "^\n");
7988 }
7989
7990 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7991 {
7992         struct tracing_log_err *err = v;
7993
7994         if (err) {
7995                 const char *err_text = err->info.errs[err->info.type];
7996                 u64 sec = err->info.ts;
7997                 u32 nsec;
7998
7999                 nsec = do_div(sec, NSEC_PER_SEC);
8000                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8001                            err->loc, err_text);
8002                 seq_printf(m, "%s", err->cmd);
8003                 tracing_err_log_show_pos(m, err->info.pos);
8004         }
8005
8006         return 0;
8007 }
8008
8009 static const struct seq_operations tracing_err_log_seq_ops = {
8010         .start  = tracing_err_log_seq_start,
8011         .next   = tracing_err_log_seq_next,
8012         .stop   = tracing_err_log_seq_stop,
8013         .show   = tracing_err_log_seq_show
8014 };
8015
8016 static int tracing_err_log_open(struct inode *inode, struct file *file)
8017 {
8018         struct trace_array *tr = inode->i_private;
8019         int ret = 0;
8020
8021         ret = tracing_check_open_get_tr(tr);
8022         if (ret)
8023                 return ret;
8024
8025         /* If this file was opened for write, then erase contents */
8026         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8027                 clear_tracing_err_log(tr);
8028
8029         if (file->f_mode & FMODE_READ) {
8030                 ret = seq_open(file, &tracing_err_log_seq_ops);
8031                 if (!ret) {
8032                         struct seq_file *m = file->private_data;
8033                         m->private = tr;
8034                 } else {
8035                         trace_array_put(tr);
8036                 }
8037         }
8038         return ret;
8039 }
8040
8041 static ssize_t tracing_err_log_write(struct file *file,
8042                                      const char __user *buffer,
8043                                      size_t count, loff_t *ppos)
8044 {
8045         return count;
8046 }
8047
8048 static int tracing_err_log_release(struct inode *inode, struct file *file)
8049 {
8050         struct trace_array *tr = inode->i_private;
8051
8052         trace_array_put(tr);
8053
8054         if (file->f_mode & FMODE_READ)
8055                 seq_release(inode, file);
8056
8057         return 0;
8058 }
8059
8060 static const struct file_operations tracing_err_log_fops = {
8061         .open           = tracing_err_log_open,
8062         .write          = tracing_err_log_write,
8063         .read           = seq_read,
8064         .llseek         = seq_lseek,
8065         .release        = tracing_err_log_release,
8066 };
8067
8068 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8069 {
8070         struct trace_array *tr = inode->i_private;
8071         struct ftrace_buffer_info *info;
8072         int ret;
8073
8074         ret = tracing_check_open_get_tr(tr);
8075         if (ret)
8076                 return ret;
8077
8078         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8079         if (!info) {
8080                 trace_array_put(tr);
8081                 return -ENOMEM;
8082         }
8083
8084         mutex_lock(&trace_types_lock);
8085
8086         info->iter.tr           = tr;
8087         info->iter.cpu_file     = tracing_get_cpu(inode);
8088         info->iter.trace        = tr->current_trace;
8089         info->iter.array_buffer = &tr->array_buffer;
8090         info->spare             = NULL;
8091         /* Force reading ring buffer for first read */
8092         info->read              = (unsigned int)-1;
8093
8094         filp->private_data = info;
8095
8096         tr->trace_ref++;
8097
8098         mutex_unlock(&trace_types_lock);
8099
8100         ret = nonseekable_open(inode, filp);
8101         if (ret < 0)
8102                 trace_array_put(tr);
8103
8104         return ret;
8105 }
8106
8107 static __poll_t
8108 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8109 {
8110         struct ftrace_buffer_info *info = filp->private_data;
8111         struct trace_iterator *iter = &info->iter;
8112
8113         return trace_poll(iter, filp, poll_table);
8114 }
8115
8116 static ssize_t
8117 tracing_buffers_read(struct file *filp, char __user *ubuf,
8118                      size_t count, loff_t *ppos)
8119 {
8120         struct ftrace_buffer_info *info = filp->private_data;
8121         struct trace_iterator *iter = &info->iter;
8122         ssize_t ret = 0;
8123         ssize_t size;
8124
8125         if (!count)
8126                 return 0;
8127
8128 #ifdef CONFIG_TRACER_MAX_TRACE
8129         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8130                 return -EBUSY;
8131 #endif
8132
8133         if (!info->spare) {
8134                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8135                                                           iter->cpu_file);
8136                 if (IS_ERR(info->spare)) {
8137                         ret = PTR_ERR(info->spare);
8138                         info->spare = NULL;
8139                 } else {
8140                         info->spare_cpu = iter->cpu_file;
8141                 }
8142         }
8143         if (!info->spare)
8144                 return ret;
8145
8146         /* Do we have previous read data to read? */
8147         if (info->read < PAGE_SIZE)
8148                 goto read;
8149
8150  again:
8151         trace_access_lock(iter->cpu_file);
8152         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8153                                     &info->spare,
8154                                     count,
8155                                     iter->cpu_file, 0);
8156         trace_access_unlock(iter->cpu_file);
8157
8158         if (ret < 0) {
8159                 if (trace_empty(iter)) {
8160                         if ((filp->f_flags & O_NONBLOCK))
8161                                 return -EAGAIN;
8162
8163                         ret = wait_on_pipe(iter, 0);
8164                         if (ret)
8165                                 return ret;
8166
8167                         goto again;
8168                 }
8169                 return 0;
8170         }
8171
8172         info->read = 0;
8173  read:
8174         size = PAGE_SIZE - info->read;
8175         if (size > count)
8176                 size = count;
8177
8178         ret = copy_to_user(ubuf, info->spare + info->read, size);
8179         if (ret == size)
8180                 return -EFAULT;
8181
8182         size -= ret;
8183
8184         *ppos += size;
8185         info->read += size;
8186
8187         return size;
8188 }
8189
8190 static int tracing_buffers_release(struct inode *inode, struct file *file)
8191 {
8192         struct ftrace_buffer_info *info = file->private_data;
8193         struct trace_iterator *iter = &info->iter;
8194
8195         mutex_lock(&trace_types_lock);
8196
8197         iter->tr->trace_ref--;
8198
8199         __trace_array_put(iter->tr);
8200
8201         iter->wait_index++;
8202         /* Make sure the waiters see the new wait_index */
8203         smp_wmb();
8204
8205         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8206
8207         if (info->spare)
8208                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8209                                            info->spare_cpu, info->spare);
8210         kvfree(info);
8211
8212         mutex_unlock(&trace_types_lock);
8213
8214         return 0;
8215 }
8216
8217 struct buffer_ref {
8218         struct trace_buffer     *buffer;
8219         void                    *page;
8220         int                     cpu;
8221         refcount_t              refcount;
8222 };
8223
8224 static void buffer_ref_release(struct buffer_ref *ref)
8225 {
8226         if (!refcount_dec_and_test(&ref->refcount))
8227                 return;
8228         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8229         kfree(ref);
8230 }
8231
8232 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8233                                     struct pipe_buffer *buf)
8234 {
8235         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8236
8237         buffer_ref_release(ref);
8238         buf->private = 0;
8239 }
8240
8241 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8242                                 struct pipe_buffer *buf)
8243 {
8244         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8245
8246         if (refcount_read(&ref->refcount) > INT_MAX/2)
8247                 return false;
8248
8249         refcount_inc(&ref->refcount);
8250         return true;
8251 }
8252
8253 /* Pipe buffer operations for a buffer. */
8254 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8255         .release                = buffer_pipe_buf_release,
8256         .get                    = buffer_pipe_buf_get,
8257 };
8258
8259 /*
8260  * Callback from splice_to_pipe(), if we need to release some pages
8261  * at the end of the spd in case we error'ed out in filling the pipe.
8262  */
8263 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8264 {
8265         struct buffer_ref *ref =
8266                 (struct buffer_ref *)spd->partial[i].private;
8267
8268         buffer_ref_release(ref);
8269         spd->partial[i].private = 0;
8270 }
8271
8272 static ssize_t
8273 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8274                             struct pipe_inode_info *pipe, size_t len,
8275                             unsigned int flags)
8276 {
8277         struct ftrace_buffer_info *info = file->private_data;
8278         struct trace_iterator *iter = &info->iter;
8279         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8280         struct page *pages_def[PIPE_DEF_BUFFERS];
8281         struct splice_pipe_desc spd = {
8282                 .pages          = pages_def,
8283                 .partial        = partial_def,
8284                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8285                 .ops            = &buffer_pipe_buf_ops,
8286                 .spd_release    = buffer_spd_release,
8287         };
8288         struct buffer_ref *ref;
8289         int entries, i;
8290         ssize_t ret = 0;
8291
8292 #ifdef CONFIG_TRACER_MAX_TRACE
8293         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8294                 return -EBUSY;
8295 #endif
8296
8297         if (*ppos & (PAGE_SIZE - 1))
8298                 return -EINVAL;
8299
8300         if (len & (PAGE_SIZE - 1)) {
8301                 if (len < PAGE_SIZE)
8302                         return -EINVAL;
8303                 len &= PAGE_MASK;
8304         }
8305
8306         if (splice_grow_spd(pipe, &spd))
8307                 return -ENOMEM;
8308
8309  again:
8310         trace_access_lock(iter->cpu_file);
8311         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8312
8313         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8314                 struct page *page;
8315                 int r;
8316
8317                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8318                 if (!ref) {
8319                         ret = -ENOMEM;
8320                         break;
8321                 }
8322
8323                 refcount_set(&ref->refcount, 1);
8324                 ref->buffer = iter->array_buffer->buffer;
8325                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8326                 if (IS_ERR(ref->page)) {
8327                         ret = PTR_ERR(ref->page);
8328                         ref->page = NULL;
8329                         kfree(ref);
8330                         break;
8331                 }
8332                 ref->cpu = iter->cpu_file;
8333
8334                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8335                                           len, iter->cpu_file, 1);
8336                 if (r < 0) {
8337                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8338                                                    ref->page);
8339                         kfree(ref);
8340                         break;
8341                 }
8342
8343                 page = virt_to_page(ref->page);
8344
8345                 spd.pages[i] = page;
8346                 spd.partial[i].len = PAGE_SIZE;
8347                 spd.partial[i].offset = 0;
8348                 spd.partial[i].private = (unsigned long)ref;
8349                 spd.nr_pages++;
8350                 *ppos += PAGE_SIZE;
8351
8352                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8353         }
8354
8355         trace_access_unlock(iter->cpu_file);
8356         spd.nr_pages = i;
8357
8358         /* did we read anything? */
8359         if (!spd.nr_pages) {
8360                 long wait_index;
8361
8362                 if (ret)
8363                         goto out;
8364
8365                 ret = -EAGAIN;
8366                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8367                         goto out;
8368
8369                 wait_index = READ_ONCE(iter->wait_index);
8370
8371                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8372                 if (ret)
8373                         goto out;
8374
8375                 /* No need to wait after waking up when tracing is off */
8376                 if (!tracer_tracing_is_on(iter->tr))
8377                         goto out;
8378
8379                 /* Make sure we see the new wait_index */
8380                 smp_rmb();
8381                 if (wait_index != iter->wait_index)
8382                         goto out;
8383
8384                 goto again;
8385         }
8386
8387         ret = splice_to_pipe(pipe, &spd);
8388 out:
8389         splice_shrink_spd(&spd);
8390
8391         return ret;
8392 }
8393
8394 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8395 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8396 {
8397         struct ftrace_buffer_info *info = file->private_data;
8398         struct trace_iterator *iter = &info->iter;
8399
8400         if (cmd)
8401                 return -ENOIOCTLCMD;
8402
8403         mutex_lock(&trace_types_lock);
8404
8405         iter->wait_index++;
8406         /* Make sure the waiters see the new wait_index */
8407         smp_wmb();
8408
8409         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8410
8411         mutex_unlock(&trace_types_lock);
8412         return 0;
8413 }
8414
8415 static const struct file_operations tracing_buffers_fops = {
8416         .open           = tracing_buffers_open,
8417         .read           = tracing_buffers_read,
8418         .poll           = tracing_buffers_poll,
8419         .release        = tracing_buffers_release,
8420         .splice_read    = tracing_buffers_splice_read,
8421         .unlocked_ioctl = tracing_buffers_ioctl,
8422         .llseek         = no_llseek,
8423 };
8424
8425 static ssize_t
8426 tracing_stats_read(struct file *filp, char __user *ubuf,
8427                    size_t count, loff_t *ppos)
8428 {
8429         struct inode *inode = file_inode(filp);
8430         struct trace_array *tr = inode->i_private;
8431         struct array_buffer *trace_buf = &tr->array_buffer;
8432         int cpu = tracing_get_cpu(inode);
8433         struct trace_seq *s;
8434         unsigned long cnt;
8435         unsigned long long t;
8436         unsigned long usec_rem;
8437
8438         s = kmalloc(sizeof(*s), GFP_KERNEL);
8439         if (!s)
8440                 return -ENOMEM;
8441
8442         trace_seq_init(s);
8443
8444         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8445         trace_seq_printf(s, "entries: %ld\n", cnt);
8446
8447         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8448         trace_seq_printf(s, "overrun: %ld\n", cnt);
8449
8450         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8451         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8452
8453         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8454         trace_seq_printf(s, "bytes: %ld\n", cnt);
8455
8456         if (trace_clocks[tr->clock_id].in_ns) {
8457                 /* local or global for trace_clock */
8458                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8459                 usec_rem = do_div(t, USEC_PER_SEC);
8460                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8461                                                                 t, usec_rem);
8462
8463                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8464                 usec_rem = do_div(t, USEC_PER_SEC);
8465                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8466         } else {
8467                 /* counter or tsc mode for trace_clock */
8468                 trace_seq_printf(s, "oldest event ts: %llu\n",
8469                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8470
8471                 trace_seq_printf(s, "now ts: %llu\n",
8472                                 ring_buffer_time_stamp(trace_buf->buffer));
8473         }
8474
8475         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8476         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8477
8478         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8479         trace_seq_printf(s, "read events: %ld\n", cnt);
8480
8481         count = simple_read_from_buffer(ubuf, count, ppos,
8482                                         s->buffer, trace_seq_used(s));
8483
8484         kfree(s);
8485
8486         return count;
8487 }
8488
8489 static const struct file_operations tracing_stats_fops = {
8490         .open           = tracing_open_generic_tr,
8491         .read           = tracing_stats_read,
8492         .llseek         = generic_file_llseek,
8493         .release        = tracing_release_generic_tr,
8494 };
8495
8496 #ifdef CONFIG_DYNAMIC_FTRACE
8497
8498 static ssize_t
8499 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8500                   size_t cnt, loff_t *ppos)
8501 {
8502         ssize_t ret;
8503         char *buf;
8504         int r;
8505
8506         /* 256 should be plenty to hold the amount needed */
8507         buf = kmalloc(256, GFP_KERNEL);
8508         if (!buf)
8509                 return -ENOMEM;
8510
8511         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8512                       ftrace_update_tot_cnt,
8513                       ftrace_number_of_pages,
8514                       ftrace_number_of_groups);
8515
8516         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8517         kfree(buf);
8518         return ret;
8519 }
8520
8521 static const struct file_operations tracing_dyn_info_fops = {
8522         .open           = tracing_open_generic,
8523         .read           = tracing_read_dyn_info,
8524         .llseek         = generic_file_llseek,
8525 };
8526 #endif /* CONFIG_DYNAMIC_FTRACE */
8527
8528 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8529 static void
8530 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8531                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8532                 void *data)
8533 {
8534         tracing_snapshot_instance(tr);
8535 }
8536
8537 static void
8538 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8539                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8540                       void *data)
8541 {
8542         struct ftrace_func_mapper *mapper = data;
8543         long *count = NULL;
8544
8545         if (mapper)
8546                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8547
8548         if (count) {
8549
8550                 if (*count <= 0)
8551                         return;
8552
8553                 (*count)--;
8554         }
8555
8556         tracing_snapshot_instance(tr);
8557 }
8558
8559 static int
8560 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8561                       struct ftrace_probe_ops *ops, void *data)
8562 {
8563         struct ftrace_func_mapper *mapper = data;
8564         long *count = NULL;
8565
8566         seq_printf(m, "%ps:", (void *)ip);
8567
8568         seq_puts(m, "snapshot");
8569
8570         if (mapper)
8571                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8572
8573         if (count)
8574                 seq_printf(m, ":count=%ld\n", *count);
8575         else
8576                 seq_puts(m, ":unlimited\n");
8577
8578         return 0;
8579 }
8580
8581 static int
8582 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8583                      unsigned long ip, void *init_data, void **data)
8584 {
8585         struct ftrace_func_mapper *mapper = *data;
8586
8587         if (!mapper) {
8588                 mapper = allocate_ftrace_func_mapper();
8589                 if (!mapper)
8590                         return -ENOMEM;
8591                 *data = mapper;
8592         }
8593
8594         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8595 }
8596
8597 static void
8598 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8599                      unsigned long ip, void *data)
8600 {
8601         struct ftrace_func_mapper *mapper = data;
8602
8603         if (!ip) {
8604                 if (!mapper)
8605                         return;
8606                 free_ftrace_func_mapper(mapper, NULL);
8607                 return;
8608         }
8609
8610         ftrace_func_mapper_remove_ip(mapper, ip);
8611 }
8612
8613 static struct ftrace_probe_ops snapshot_probe_ops = {
8614         .func                   = ftrace_snapshot,
8615         .print                  = ftrace_snapshot_print,
8616 };
8617
8618 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8619         .func                   = ftrace_count_snapshot,
8620         .print                  = ftrace_snapshot_print,
8621         .init                   = ftrace_snapshot_init,
8622         .free                   = ftrace_snapshot_free,
8623 };
8624
8625 static int
8626 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8627                                char *glob, char *cmd, char *param, int enable)
8628 {
8629         struct ftrace_probe_ops *ops;
8630         void *count = (void *)-1;
8631         char *number;
8632         int ret;
8633
8634         if (!tr)
8635                 return -ENODEV;
8636
8637         /* hash funcs only work with set_ftrace_filter */
8638         if (!enable)
8639                 return -EINVAL;
8640
8641         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8642
8643         if (glob[0] == '!')
8644                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8645
8646         if (!param)
8647                 goto out_reg;
8648
8649         number = strsep(&param, ":");
8650
8651         if (!strlen(number))
8652                 goto out_reg;
8653
8654         /*
8655          * We use the callback data field (which is a pointer)
8656          * as our counter.
8657          */
8658         ret = kstrtoul(number, 0, (unsigned long *)&count);
8659         if (ret)
8660                 return ret;
8661
8662  out_reg:
8663         ret = tracing_alloc_snapshot_instance(tr);
8664         if (ret < 0)
8665                 goto out;
8666
8667         ret = register_ftrace_function_probe(glob, tr, ops, count);
8668
8669  out:
8670         return ret < 0 ? ret : 0;
8671 }
8672
8673 static struct ftrace_func_command ftrace_snapshot_cmd = {
8674         .name                   = "snapshot",
8675         .func                   = ftrace_trace_snapshot_callback,
8676 };
8677
8678 static __init int register_snapshot_cmd(void)
8679 {
8680         return register_ftrace_command(&ftrace_snapshot_cmd);
8681 }
8682 #else
8683 static inline __init int register_snapshot_cmd(void) { return 0; }
8684 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8685
8686 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8687 {
8688         if (WARN_ON(!tr->dir))
8689                 return ERR_PTR(-ENODEV);
8690
8691         /* Top directory uses NULL as the parent */
8692         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8693                 return NULL;
8694
8695         /* All sub buffers have a descriptor */
8696         return tr->dir;
8697 }
8698
8699 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8700 {
8701         struct dentry *d_tracer;
8702
8703         if (tr->percpu_dir)
8704                 return tr->percpu_dir;
8705
8706         d_tracer = tracing_get_dentry(tr);
8707         if (IS_ERR(d_tracer))
8708                 return NULL;
8709
8710         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8711
8712         MEM_FAIL(!tr->percpu_dir,
8713                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8714
8715         return tr->percpu_dir;
8716 }
8717
8718 static struct dentry *
8719 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8720                       void *data, long cpu, const struct file_operations *fops)
8721 {
8722         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8723
8724         if (ret) /* See tracing_get_cpu() */
8725                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8726         return ret;
8727 }
8728
8729 static void
8730 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8731 {
8732         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8733         struct dentry *d_cpu;
8734         char cpu_dir[30]; /* 30 characters should be more than enough */
8735
8736         if (!d_percpu)
8737                 return;
8738
8739         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8740         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8741         if (!d_cpu) {
8742                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8743                 return;
8744         }
8745
8746         /* per cpu trace_pipe */
8747         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8748                                 tr, cpu, &tracing_pipe_fops);
8749
8750         /* per cpu trace */
8751         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8752                                 tr, cpu, &tracing_fops);
8753
8754         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8755                                 tr, cpu, &tracing_buffers_fops);
8756
8757         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8758                                 tr, cpu, &tracing_stats_fops);
8759
8760         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8761                                 tr, cpu, &tracing_entries_fops);
8762
8763 #ifdef CONFIG_TRACER_SNAPSHOT
8764         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8765                                 tr, cpu, &snapshot_fops);
8766
8767         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8768                                 tr, cpu, &snapshot_raw_fops);
8769 #endif
8770 }
8771
8772 #ifdef CONFIG_FTRACE_SELFTEST
8773 /* Let selftest have access to static functions in this file */
8774 #include "trace_selftest.c"
8775 #endif
8776
8777 static ssize_t
8778 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8779                         loff_t *ppos)
8780 {
8781         struct trace_option_dentry *topt = filp->private_data;
8782         char *buf;
8783
8784         if (topt->flags->val & topt->opt->bit)
8785                 buf = "1\n";
8786         else
8787                 buf = "0\n";
8788
8789         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8790 }
8791
8792 static ssize_t
8793 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8794                          loff_t *ppos)
8795 {
8796         struct trace_option_dentry *topt = filp->private_data;
8797         unsigned long val;
8798         int ret;
8799
8800         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8801         if (ret)
8802                 return ret;
8803
8804         if (val != 0 && val != 1)
8805                 return -EINVAL;
8806
8807         if (!!(topt->flags->val & topt->opt->bit) != val) {
8808                 mutex_lock(&trace_types_lock);
8809                 ret = __set_tracer_option(topt->tr, topt->flags,
8810                                           topt->opt, !val);
8811                 mutex_unlock(&trace_types_lock);
8812                 if (ret)
8813                         return ret;
8814         }
8815
8816         *ppos += cnt;
8817
8818         return cnt;
8819 }
8820
8821
8822 static const struct file_operations trace_options_fops = {
8823         .open = tracing_open_generic,
8824         .read = trace_options_read,
8825         .write = trace_options_write,
8826         .llseek = generic_file_llseek,
8827 };
8828
8829 /*
8830  * In order to pass in both the trace_array descriptor as well as the index
8831  * to the flag that the trace option file represents, the trace_array
8832  * has a character array of trace_flags_index[], which holds the index
8833  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8834  * The address of this character array is passed to the flag option file
8835  * read/write callbacks.
8836  *
8837  * In order to extract both the index and the trace_array descriptor,
8838  * get_tr_index() uses the following algorithm.
8839  *
8840  *   idx = *ptr;
8841  *
8842  * As the pointer itself contains the address of the index (remember
8843  * index[1] == 1).
8844  *
8845  * Then to get the trace_array descriptor, by subtracting that index
8846  * from the ptr, we get to the start of the index itself.
8847  *
8848  *   ptr - idx == &index[0]
8849  *
8850  * Then a simple container_of() from that pointer gets us to the
8851  * trace_array descriptor.
8852  */
8853 static void get_tr_index(void *data, struct trace_array **ptr,
8854                          unsigned int *pindex)
8855 {
8856         *pindex = *(unsigned char *)data;
8857
8858         *ptr = container_of(data - *pindex, struct trace_array,
8859                             trace_flags_index);
8860 }
8861
8862 static ssize_t
8863 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8864                         loff_t *ppos)
8865 {
8866         void *tr_index = filp->private_data;
8867         struct trace_array *tr;
8868         unsigned int index;
8869         char *buf;
8870
8871         get_tr_index(tr_index, &tr, &index);
8872
8873         if (tr->trace_flags & (1 << index))
8874                 buf = "1\n";
8875         else
8876                 buf = "0\n";
8877
8878         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8879 }
8880
8881 static ssize_t
8882 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8883                          loff_t *ppos)
8884 {
8885         void *tr_index = filp->private_data;
8886         struct trace_array *tr;
8887         unsigned int index;
8888         unsigned long val;
8889         int ret;
8890
8891         get_tr_index(tr_index, &tr, &index);
8892
8893         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8894         if (ret)
8895                 return ret;
8896
8897         if (val != 0 && val != 1)
8898                 return -EINVAL;
8899
8900         mutex_lock(&event_mutex);
8901         mutex_lock(&trace_types_lock);
8902         ret = set_tracer_flag(tr, 1 << index, val);
8903         mutex_unlock(&trace_types_lock);
8904         mutex_unlock(&event_mutex);
8905
8906         if (ret < 0)
8907                 return ret;
8908
8909         *ppos += cnt;
8910
8911         return cnt;
8912 }
8913
8914 static const struct file_operations trace_options_core_fops = {
8915         .open = tracing_open_generic,
8916         .read = trace_options_core_read,
8917         .write = trace_options_core_write,
8918         .llseek = generic_file_llseek,
8919 };
8920
8921 struct dentry *trace_create_file(const char *name,
8922                                  umode_t mode,
8923                                  struct dentry *parent,
8924                                  void *data,
8925                                  const struct file_operations *fops)
8926 {
8927         struct dentry *ret;
8928
8929         ret = tracefs_create_file(name, mode, parent, data, fops);
8930         if (!ret)
8931                 pr_warn("Could not create tracefs '%s' entry\n", name);
8932
8933         return ret;
8934 }
8935
8936
8937 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8938 {
8939         struct dentry *d_tracer;
8940
8941         if (tr->options)
8942                 return tr->options;
8943
8944         d_tracer = tracing_get_dentry(tr);
8945         if (IS_ERR(d_tracer))
8946                 return NULL;
8947
8948         tr->options = tracefs_create_dir("options", d_tracer);
8949         if (!tr->options) {
8950                 pr_warn("Could not create tracefs directory 'options'\n");
8951                 return NULL;
8952         }
8953
8954         return tr->options;
8955 }
8956
8957 static void
8958 create_trace_option_file(struct trace_array *tr,
8959                          struct trace_option_dentry *topt,
8960                          struct tracer_flags *flags,
8961                          struct tracer_opt *opt)
8962 {
8963         struct dentry *t_options;
8964
8965         t_options = trace_options_init_dentry(tr);
8966         if (!t_options)
8967                 return;
8968
8969         topt->flags = flags;
8970         topt->opt = opt;
8971         topt->tr = tr;
8972
8973         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8974                                         t_options, topt, &trace_options_fops);
8975
8976 }
8977
8978 static void
8979 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8980 {
8981         struct trace_option_dentry *topts;
8982         struct trace_options *tr_topts;
8983         struct tracer_flags *flags;
8984         struct tracer_opt *opts;
8985         int cnt;
8986         int i;
8987
8988         if (!tracer)
8989                 return;
8990
8991         flags = tracer->flags;
8992
8993         if (!flags || !flags->opts)
8994                 return;
8995
8996         /*
8997          * If this is an instance, only create flags for tracers
8998          * the instance may have.
8999          */
9000         if (!trace_ok_for_array(tracer, tr))
9001                 return;
9002
9003         for (i = 0; i < tr->nr_topts; i++) {
9004                 /* Make sure there's no duplicate flags. */
9005                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9006                         return;
9007         }
9008
9009         opts = flags->opts;
9010
9011         for (cnt = 0; opts[cnt].name; cnt++)
9012                 ;
9013
9014         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9015         if (!topts)
9016                 return;
9017
9018         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9019                             GFP_KERNEL);
9020         if (!tr_topts) {
9021                 kfree(topts);
9022                 return;
9023         }
9024
9025         tr->topts = tr_topts;
9026         tr->topts[tr->nr_topts].tracer = tracer;
9027         tr->topts[tr->nr_topts].topts = topts;
9028         tr->nr_topts++;
9029
9030         for (cnt = 0; opts[cnt].name; cnt++) {
9031                 create_trace_option_file(tr, &topts[cnt], flags,
9032                                          &opts[cnt]);
9033                 MEM_FAIL(topts[cnt].entry == NULL,
9034                           "Failed to create trace option: %s",
9035                           opts[cnt].name);
9036         }
9037 }
9038
9039 static struct dentry *
9040 create_trace_option_core_file(struct trace_array *tr,
9041                               const char *option, long index)
9042 {
9043         struct dentry *t_options;
9044
9045         t_options = trace_options_init_dentry(tr);
9046         if (!t_options)
9047                 return NULL;
9048
9049         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9050                                  (void *)&tr->trace_flags_index[index],
9051                                  &trace_options_core_fops);
9052 }
9053
9054 static void create_trace_options_dir(struct trace_array *tr)
9055 {
9056         struct dentry *t_options;
9057         bool top_level = tr == &global_trace;
9058         int i;
9059
9060         t_options = trace_options_init_dentry(tr);
9061         if (!t_options)
9062                 return;
9063
9064         for (i = 0; trace_options[i]; i++) {
9065                 if (top_level ||
9066                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9067                         create_trace_option_core_file(tr, trace_options[i], i);
9068         }
9069 }
9070
9071 static ssize_t
9072 rb_simple_read(struct file *filp, char __user *ubuf,
9073                size_t cnt, loff_t *ppos)
9074 {
9075         struct trace_array *tr = filp->private_data;
9076         char buf[64];
9077         int r;
9078
9079         r = tracer_tracing_is_on(tr);
9080         r = sprintf(buf, "%d\n", r);
9081
9082         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9083 }
9084
9085 static ssize_t
9086 rb_simple_write(struct file *filp, const char __user *ubuf,
9087                 size_t cnt, loff_t *ppos)
9088 {
9089         struct trace_array *tr = filp->private_data;
9090         struct trace_buffer *buffer = tr->array_buffer.buffer;
9091         unsigned long val;
9092         int ret;
9093
9094         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9095         if (ret)
9096                 return ret;
9097
9098         if (buffer) {
9099                 mutex_lock(&trace_types_lock);
9100                 if (!!val == tracer_tracing_is_on(tr)) {
9101                         val = 0; /* do nothing */
9102                 } else if (val) {
9103                         tracer_tracing_on(tr);
9104                         if (tr->current_trace->start)
9105                                 tr->current_trace->start(tr);
9106                 } else {
9107                         tracer_tracing_off(tr);
9108                         if (tr->current_trace->stop)
9109                                 tr->current_trace->stop(tr);
9110                         /* Wake up any waiters */
9111                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9112                 }
9113                 mutex_unlock(&trace_types_lock);
9114         }
9115
9116         (*ppos)++;
9117
9118         return cnt;
9119 }
9120
9121 static const struct file_operations rb_simple_fops = {
9122         .open           = tracing_open_generic_tr,
9123         .read           = rb_simple_read,
9124         .write          = rb_simple_write,
9125         .release        = tracing_release_generic_tr,
9126         .llseek         = default_llseek,
9127 };
9128
9129 static ssize_t
9130 buffer_percent_read(struct file *filp, char __user *ubuf,
9131                     size_t cnt, loff_t *ppos)
9132 {
9133         struct trace_array *tr = filp->private_data;
9134         char buf[64];
9135         int r;
9136
9137         r = tr->buffer_percent;
9138         r = sprintf(buf, "%d\n", r);
9139
9140         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9141 }
9142
9143 static ssize_t
9144 buffer_percent_write(struct file *filp, const char __user *ubuf,
9145                      size_t cnt, loff_t *ppos)
9146 {
9147         struct trace_array *tr = filp->private_data;
9148         unsigned long val;
9149         int ret;
9150
9151         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9152         if (ret)
9153                 return ret;
9154
9155         if (val > 100)
9156                 return -EINVAL;
9157
9158         tr->buffer_percent = val;
9159
9160         (*ppos)++;
9161
9162         return cnt;
9163 }
9164
9165 static const struct file_operations buffer_percent_fops = {
9166         .open           = tracing_open_generic_tr,
9167         .read           = buffer_percent_read,
9168         .write          = buffer_percent_write,
9169         .release        = tracing_release_generic_tr,
9170         .llseek         = default_llseek,
9171 };
9172
9173 static struct dentry *trace_instance_dir;
9174
9175 static void
9176 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9177
9178 static int
9179 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9180 {
9181         enum ring_buffer_flags rb_flags;
9182
9183         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9184
9185         buf->tr = tr;
9186
9187         buf->buffer = ring_buffer_alloc(size, rb_flags);
9188         if (!buf->buffer)
9189                 return -ENOMEM;
9190
9191         buf->data = alloc_percpu(struct trace_array_cpu);
9192         if (!buf->data) {
9193                 ring_buffer_free(buf->buffer);
9194                 buf->buffer = NULL;
9195                 return -ENOMEM;
9196         }
9197
9198         /* Allocate the first page for all buffers */
9199         set_buffer_entries(&tr->array_buffer,
9200                            ring_buffer_size(tr->array_buffer.buffer, 0));
9201
9202         return 0;
9203 }
9204
9205 static void free_trace_buffer(struct array_buffer *buf)
9206 {
9207         if (buf->buffer) {
9208                 ring_buffer_free(buf->buffer);
9209                 buf->buffer = NULL;
9210                 free_percpu(buf->data);
9211                 buf->data = NULL;
9212         }
9213 }
9214
9215 static int allocate_trace_buffers(struct trace_array *tr, int size)
9216 {
9217         int ret;
9218
9219         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9220         if (ret)
9221                 return ret;
9222
9223 #ifdef CONFIG_TRACER_MAX_TRACE
9224         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9225                                     allocate_snapshot ? size : 1);
9226         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9227                 free_trace_buffer(&tr->array_buffer);
9228                 return -ENOMEM;
9229         }
9230         tr->allocated_snapshot = allocate_snapshot;
9231
9232         /*
9233          * Only the top level trace array gets its snapshot allocated
9234          * from the kernel command line.
9235          */
9236         allocate_snapshot = false;
9237 #endif
9238
9239         return 0;
9240 }
9241
9242 static void free_trace_buffers(struct trace_array *tr)
9243 {
9244         if (!tr)
9245                 return;
9246
9247         free_trace_buffer(&tr->array_buffer);
9248
9249 #ifdef CONFIG_TRACER_MAX_TRACE
9250         free_trace_buffer(&tr->max_buffer);
9251 #endif
9252 }
9253
9254 static void init_trace_flags_index(struct trace_array *tr)
9255 {
9256         int i;
9257
9258         /* Used by the trace options files */
9259         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9260                 tr->trace_flags_index[i] = i;
9261 }
9262
9263 static void __update_tracer_options(struct trace_array *tr)
9264 {
9265         struct tracer *t;
9266
9267         for (t = trace_types; t; t = t->next)
9268                 add_tracer_options(tr, t);
9269 }
9270
9271 static void update_tracer_options(struct trace_array *tr)
9272 {
9273         mutex_lock(&trace_types_lock);
9274         tracer_options_updated = true;
9275         __update_tracer_options(tr);
9276         mutex_unlock(&trace_types_lock);
9277 }
9278
9279 /* Must have trace_types_lock held */
9280 struct trace_array *trace_array_find(const char *instance)
9281 {
9282         struct trace_array *tr, *found = NULL;
9283
9284         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9285                 if (tr->name && strcmp(tr->name, instance) == 0) {
9286                         found = tr;
9287                         break;
9288                 }
9289         }
9290
9291         return found;
9292 }
9293
9294 struct trace_array *trace_array_find_get(const char *instance)
9295 {
9296         struct trace_array *tr;
9297
9298         mutex_lock(&trace_types_lock);
9299         tr = trace_array_find(instance);
9300         if (tr)
9301                 tr->ref++;
9302         mutex_unlock(&trace_types_lock);
9303
9304         return tr;
9305 }
9306
9307 static int trace_array_create_dir(struct trace_array *tr)
9308 {
9309         int ret;
9310
9311         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9312         if (!tr->dir)
9313                 return -EINVAL;
9314
9315         ret = event_trace_add_tracer(tr->dir, tr);
9316         if (ret) {
9317                 tracefs_remove(tr->dir);
9318                 return ret;
9319         }
9320
9321         init_tracer_tracefs(tr, tr->dir);
9322         __update_tracer_options(tr);
9323
9324         return ret;
9325 }
9326
9327 static struct trace_array *trace_array_create(const char *name)
9328 {
9329         struct trace_array *tr;
9330         int ret;
9331
9332         ret = -ENOMEM;
9333         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9334         if (!tr)
9335                 return ERR_PTR(ret);
9336
9337         tr->name = kstrdup(name, GFP_KERNEL);
9338         if (!tr->name)
9339                 goto out_free_tr;
9340
9341         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9342                 goto out_free_tr;
9343
9344         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9345
9346         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9347
9348         raw_spin_lock_init(&tr->start_lock);
9349
9350         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9351
9352         tr->current_trace = &nop_trace;
9353
9354         INIT_LIST_HEAD(&tr->systems);
9355         INIT_LIST_HEAD(&tr->events);
9356         INIT_LIST_HEAD(&tr->hist_vars);
9357         INIT_LIST_HEAD(&tr->err_log);
9358
9359         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9360                 goto out_free_tr;
9361
9362         if (ftrace_allocate_ftrace_ops(tr) < 0)
9363                 goto out_free_tr;
9364
9365         ftrace_init_trace_array(tr);
9366
9367         init_trace_flags_index(tr);
9368
9369         if (trace_instance_dir) {
9370                 ret = trace_array_create_dir(tr);
9371                 if (ret)
9372                         goto out_free_tr;
9373         } else
9374                 __trace_early_add_events(tr);
9375
9376         list_add(&tr->list, &ftrace_trace_arrays);
9377
9378         tr->ref++;
9379
9380         return tr;
9381
9382  out_free_tr:
9383         ftrace_free_ftrace_ops(tr);
9384         free_trace_buffers(tr);
9385         free_cpumask_var(tr->tracing_cpumask);
9386         kfree(tr->name);
9387         kfree(tr);
9388
9389         return ERR_PTR(ret);
9390 }
9391
9392 static int instance_mkdir(const char *name)
9393 {
9394         struct trace_array *tr;
9395         int ret;
9396
9397         mutex_lock(&event_mutex);
9398         mutex_lock(&trace_types_lock);
9399
9400         ret = -EEXIST;
9401         if (trace_array_find(name))
9402                 goto out_unlock;
9403
9404         tr = trace_array_create(name);
9405
9406         ret = PTR_ERR_OR_ZERO(tr);
9407
9408 out_unlock:
9409         mutex_unlock(&trace_types_lock);
9410         mutex_unlock(&event_mutex);
9411         return ret;
9412 }
9413
9414 /**
9415  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9416  * @name: The name of the trace array to be looked up/created.
9417  *
9418  * Returns pointer to trace array with given name.
9419  * NULL, if it cannot be created.
9420  *
9421  * NOTE: This function increments the reference counter associated with the
9422  * trace array returned. This makes sure it cannot be freed while in use.
9423  * Use trace_array_put() once the trace array is no longer needed.
9424  * If the trace_array is to be freed, trace_array_destroy() needs to
9425  * be called after the trace_array_put(), or simply let user space delete
9426  * it from the tracefs instances directory. But until the
9427  * trace_array_put() is called, user space can not delete it.
9428  *
9429  */
9430 struct trace_array *trace_array_get_by_name(const char *name)
9431 {
9432         struct trace_array *tr;
9433
9434         mutex_lock(&event_mutex);
9435         mutex_lock(&trace_types_lock);
9436
9437         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9438                 if (tr->name && strcmp(tr->name, name) == 0)
9439                         goto out_unlock;
9440         }
9441
9442         tr = trace_array_create(name);
9443
9444         if (IS_ERR(tr))
9445                 tr = NULL;
9446 out_unlock:
9447         if (tr)
9448                 tr->ref++;
9449
9450         mutex_unlock(&trace_types_lock);
9451         mutex_unlock(&event_mutex);
9452         return tr;
9453 }
9454 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9455
9456 static int __remove_instance(struct trace_array *tr)
9457 {
9458         int i;
9459
9460         /* Reference counter for a newly created trace array = 1. */
9461         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9462                 return -EBUSY;
9463
9464         list_del(&tr->list);
9465
9466         /* Disable all the flags that were enabled coming in */
9467         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9468                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9469                         set_tracer_flag(tr, 1 << i, 0);
9470         }
9471
9472         tracing_set_nop(tr);
9473         clear_ftrace_function_probes(tr);
9474         event_trace_del_tracer(tr);
9475         ftrace_clear_pids(tr);
9476         ftrace_destroy_function_files(tr);
9477         tracefs_remove(tr->dir);
9478         free_percpu(tr->last_func_repeats);
9479         free_trace_buffers(tr);
9480         clear_tracing_err_log(tr);
9481
9482         for (i = 0; i < tr->nr_topts; i++) {
9483                 kfree(tr->topts[i].topts);
9484         }
9485         kfree(tr->topts);
9486
9487         free_cpumask_var(tr->tracing_cpumask);
9488         kfree(tr->name);
9489         kfree(tr);
9490
9491         return 0;
9492 }
9493
9494 int trace_array_destroy(struct trace_array *this_tr)
9495 {
9496         struct trace_array *tr;
9497         int ret;
9498
9499         if (!this_tr)
9500                 return -EINVAL;
9501
9502         mutex_lock(&event_mutex);
9503         mutex_lock(&trace_types_lock);
9504
9505         ret = -ENODEV;
9506
9507         /* Making sure trace array exists before destroying it. */
9508         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9509                 if (tr == this_tr) {
9510                         ret = __remove_instance(tr);
9511                         break;
9512                 }
9513         }
9514
9515         mutex_unlock(&trace_types_lock);
9516         mutex_unlock(&event_mutex);
9517
9518         return ret;
9519 }
9520 EXPORT_SYMBOL_GPL(trace_array_destroy);
9521
9522 static int instance_rmdir(const char *name)
9523 {
9524         struct trace_array *tr;
9525         int ret;
9526
9527         mutex_lock(&event_mutex);
9528         mutex_lock(&trace_types_lock);
9529
9530         ret = -ENODEV;
9531         tr = trace_array_find(name);
9532         if (tr)
9533                 ret = __remove_instance(tr);
9534
9535         mutex_unlock(&trace_types_lock);
9536         mutex_unlock(&event_mutex);
9537
9538         return ret;
9539 }
9540
9541 static __init void create_trace_instances(struct dentry *d_tracer)
9542 {
9543         struct trace_array *tr;
9544
9545         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9546                                                          instance_mkdir,
9547                                                          instance_rmdir);
9548         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9549                 return;
9550
9551         mutex_lock(&event_mutex);
9552         mutex_lock(&trace_types_lock);
9553
9554         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9555                 if (!tr->name)
9556                         continue;
9557                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9558                              "Failed to create instance directory\n"))
9559                         break;
9560         }
9561
9562         mutex_unlock(&trace_types_lock);
9563         mutex_unlock(&event_mutex);
9564 }
9565
9566 static void
9567 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9568 {
9569         struct trace_event_file *file;
9570         int cpu;
9571
9572         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9573                         tr, &show_traces_fops);
9574
9575         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9576                         tr, &set_tracer_fops);
9577
9578         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9579                           tr, &tracing_cpumask_fops);
9580
9581         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9582                           tr, &tracing_iter_fops);
9583
9584         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9585                           tr, &tracing_fops);
9586
9587         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9588                           tr, &tracing_pipe_fops);
9589
9590         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9591                           tr, &tracing_entries_fops);
9592
9593         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9594                           tr, &tracing_total_entries_fops);
9595
9596         trace_create_file("free_buffer", 0200, d_tracer,
9597                           tr, &tracing_free_buffer_fops);
9598
9599         trace_create_file("trace_marker", 0220, d_tracer,
9600                           tr, &tracing_mark_fops);
9601
9602         file = __find_event_file(tr, "ftrace", "print");
9603         if (file && file->dir)
9604                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9605                                   file, &event_trigger_fops);
9606         tr->trace_marker_file = file;
9607
9608         trace_create_file("trace_marker_raw", 0220, d_tracer,
9609                           tr, &tracing_mark_raw_fops);
9610
9611         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9612                           &trace_clock_fops);
9613
9614         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9615                           tr, &rb_simple_fops);
9616
9617         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9618                           &trace_time_stamp_mode_fops);
9619
9620         tr->buffer_percent = 50;
9621
9622         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9623                         tr, &buffer_percent_fops);
9624
9625         create_trace_options_dir(tr);
9626
9627 #ifdef CONFIG_TRACER_MAX_TRACE
9628         trace_create_maxlat_file(tr, d_tracer);
9629 #endif
9630
9631         if (ftrace_create_function_files(tr, d_tracer))
9632                 MEM_FAIL(1, "Could not allocate function filter files");
9633
9634 #ifdef CONFIG_TRACER_SNAPSHOT
9635         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9636                           tr, &snapshot_fops);
9637 #endif
9638
9639         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9640                           tr, &tracing_err_log_fops);
9641
9642         for_each_tracing_cpu(cpu)
9643                 tracing_init_tracefs_percpu(tr, cpu);
9644
9645         ftrace_init_tracefs(tr, d_tracer);
9646 }
9647
9648 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9649 {
9650         struct vfsmount *mnt;
9651         struct file_system_type *type;
9652
9653         /*
9654          * To maintain backward compatibility for tools that mount
9655          * debugfs to get to the tracing facility, tracefs is automatically
9656          * mounted to the debugfs/tracing directory.
9657          */
9658         type = get_fs_type("tracefs");
9659         if (!type)
9660                 return NULL;
9661         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9662         put_filesystem(type);
9663         if (IS_ERR(mnt))
9664                 return NULL;
9665         mntget(mnt);
9666
9667         return mnt;
9668 }
9669
9670 /**
9671  * tracing_init_dentry - initialize top level trace array
9672  *
9673  * This is called when creating files or directories in the tracing
9674  * directory. It is called via fs_initcall() by any of the boot up code
9675  * and expects to return the dentry of the top level tracing directory.
9676  */
9677 int tracing_init_dentry(void)
9678 {
9679         struct trace_array *tr = &global_trace;
9680
9681         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9682                 pr_warn("Tracing disabled due to lockdown\n");
9683                 return -EPERM;
9684         }
9685
9686         /* The top level trace array uses  NULL as parent */
9687         if (tr->dir)
9688                 return 0;
9689
9690         if (WARN_ON(!tracefs_initialized()))
9691                 return -ENODEV;
9692
9693         /*
9694          * As there may still be users that expect the tracing
9695          * files to exist in debugfs/tracing, we must automount
9696          * the tracefs file system there, so older tools still
9697          * work with the newer kernel.
9698          */
9699         tr->dir = debugfs_create_automount("tracing", NULL,
9700                                            trace_automount, NULL);
9701
9702         return 0;
9703 }
9704
9705 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9706 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9707
9708 static struct workqueue_struct *eval_map_wq __initdata;
9709 static struct work_struct eval_map_work __initdata;
9710 static struct work_struct tracerfs_init_work __initdata;
9711
9712 static void __init eval_map_work_func(struct work_struct *work)
9713 {
9714         int len;
9715
9716         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9717         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9718 }
9719
9720 static int __init trace_eval_init(void)
9721 {
9722         INIT_WORK(&eval_map_work, eval_map_work_func);
9723
9724         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9725         if (!eval_map_wq) {
9726                 pr_err("Unable to allocate eval_map_wq\n");
9727                 /* Do work here */
9728                 eval_map_work_func(&eval_map_work);
9729                 return -ENOMEM;
9730         }
9731
9732         queue_work(eval_map_wq, &eval_map_work);
9733         return 0;
9734 }
9735
9736 subsys_initcall(trace_eval_init);
9737
9738 static int __init trace_eval_sync(void)
9739 {
9740         /* Make sure the eval map updates are finished */
9741         if (eval_map_wq)
9742                 destroy_workqueue(eval_map_wq);
9743         return 0;
9744 }
9745
9746 late_initcall_sync(trace_eval_sync);
9747
9748
9749 #ifdef CONFIG_MODULES
9750 static void trace_module_add_evals(struct module *mod)
9751 {
9752         if (!mod->num_trace_evals)
9753                 return;
9754
9755         /*
9756          * Modules with bad taint do not have events created, do
9757          * not bother with enums either.
9758          */
9759         if (trace_module_has_bad_taint(mod))
9760                 return;
9761
9762         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9763 }
9764
9765 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9766 static void trace_module_remove_evals(struct module *mod)
9767 {
9768         union trace_eval_map_item *map;
9769         union trace_eval_map_item **last = &trace_eval_maps;
9770
9771         if (!mod->num_trace_evals)
9772                 return;
9773
9774         mutex_lock(&trace_eval_mutex);
9775
9776         map = trace_eval_maps;
9777
9778         while (map) {
9779                 if (map->head.mod == mod)
9780                         break;
9781                 map = trace_eval_jmp_to_tail(map);
9782                 last = &map->tail.next;
9783                 map = map->tail.next;
9784         }
9785         if (!map)
9786                 goto out;
9787
9788         *last = trace_eval_jmp_to_tail(map)->tail.next;
9789         kfree(map);
9790  out:
9791         mutex_unlock(&trace_eval_mutex);
9792 }
9793 #else
9794 static inline void trace_module_remove_evals(struct module *mod) { }
9795 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9796
9797 static int trace_module_notify(struct notifier_block *self,
9798                                unsigned long val, void *data)
9799 {
9800         struct module *mod = data;
9801
9802         switch (val) {
9803         case MODULE_STATE_COMING:
9804                 trace_module_add_evals(mod);
9805                 break;
9806         case MODULE_STATE_GOING:
9807                 trace_module_remove_evals(mod);
9808                 break;
9809         }
9810
9811         return NOTIFY_OK;
9812 }
9813
9814 static struct notifier_block trace_module_nb = {
9815         .notifier_call = trace_module_notify,
9816         .priority = 0,
9817 };
9818 #endif /* CONFIG_MODULES */
9819
9820 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9821 {
9822
9823         event_trace_init();
9824
9825         init_tracer_tracefs(&global_trace, NULL);
9826         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9827
9828         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9829                         &global_trace, &tracing_thresh_fops);
9830
9831         trace_create_file("README", TRACE_MODE_READ, NULL,
9832                         NULL, &tracing_readme_fops);
9833
9834         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9835                         NULL, &tracing_saved_cmdlines_fops);
9836
9837         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9838                           NULL, &tracing_saved_cmdlines_size_fops);
9839
9840         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9841                         NULL, &tracing_saved_tgids_fops);
9842
9843         trace_create_eval_file(NULL);
9844
9845 #ifdef CONFIG_MODULES
9846         register_module_notifier(&trace_module_nb);
9847 #endif
9848
9849 #ifdef CONFIG_DYNAMIC_FTRACE
9850         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9851                         NULL, &tracing_dyn_info_fops);
9852 #endif
9853
9854         create_trace_instances(NULL);
9855
9856         update_tracer_options(&global_trace);
9857 }
9858
9859 static __init int tracer_init_tracefs(void)
9860 {
9861         int ret;
9862
9863         trace_access_lock_init();
9864
9865         ret = tracing_init_dentry();
9866         if (ret)
9867                 return 0;
9868
9869         if (eval_map_wq) {
9870                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9871                 queue_work(eval_map_wq, &tracerfs_init_work);
9872         } else {
9873                 tracer_init_tracefs_work_func(NULL);
9874         }
9875
9876         rv_init_interface();
9877
9878         return 0;
9879 }
9880
9881 fs_initcall(tracer_init_tracefs);
9882
9883 static int trace_panic_handler(struct notifier_block *this,
9884                                unsigned long event, void *unused)
9885 {
9886         if (ftrace_dump_on_oops)
9887                 ftrace_dump(ftrace_dump_on_oops);
9888         return NOTIFY_OK;
9889 }
9890
9891 static struct notifier_block trace_panic_notifier = {
9892         .notifier_call  = trace_panic_handler,
9893         .next           = NULL,
9894         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9895 };
9896
9897 static int trace_die_handler(struct notifier_block *self,
9898                              unsigned long val,
9899                              void *data)
9900 {
9901         switch (val) {
9902         case DIE_OOPS:
9903                 if (ftrace_dump_on_oops)
9904                         ftrace_dump(ftrace_dump_on_oops);
9905                 break;
9906         default:
9907                 break;
9908         }
9909         return NOTIFY_OK;
9910 }
9911
9912 static struct notifier_block trace_die_notifier = {
9913         .notifier_call = trace_die_handler,
9914         .priority = 200
9915 };
9916
9917 /*
9918  * printk is set to max of 1024, we really don't need it that big.
9919  * Nothing should be printing 1000 characters anyway.
9920  */
9921 #define TRACE_MAX_PRINT         1000
9922
9923 /*
9924  * Define here KERN_TRACE so that we have one place to modify
9925  * it if we decide to change what log level the ftrace dump
9926  * should be at.
9927  */
9928 #define KERN_TRACE              KERN_EMERG
9929
9930 void
9931 trace_printk_seq(struct trace_seq *s)
9932 {
9933         /* Probably should print a warning here. */
9934         if (s->seq.len >= TRACE_MAX_PRINT)
9935                 s->seq.len = TRACE_MAX_PRINT;
9936
9937         /*
9938          * More paranoid code. Although the buffer size is set to
9939          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9940          * an extra layer of protection.
9941          */
9942         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9943                 s->seq.len = s->seq.size - 1;
9944
9945         /* should be zero ended, but we are paranoid. */
9946         s->buffer[s->seq.len] = 0;
9947
9948         printk(KERN_TRACE "%s", s->buffer);
9949
9950         trace_seq_init(s);
9951 }
9952
9953 void trace_init_global_iter(struct trace_iterator *iter)
9954 {
9955         iter->tr = &global_trace;
9956         iter->trace = iter->tr->current_trace;
9957         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9958         iter->array_buffer = &global_trace.array_buffer;
9959
9960         if (iter->trace && iter->trace->open)
9961                 iter->trace->open(iter);
9962
9963         /* Annotate start of buffers if we had overruns */
9964         if (ring_buffer_overruns(iter->array_buffer->buffer))
9965                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9966
9967         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9968         if (trace_clocks[iter->tr->clock_id].in_ns)
9969                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9970
9971         /* Can not use kmalloc for iter.temp and iter.fmt */
9972         iter->temp = static_temp_buf;
9973         iter->temp_size = STATIC_TEMP_BUF_SIZE;
9974         iter->fmt = static_fmt_buf;
9975         iter->fmt_size = STATIC_FMT_BUF_SIZE;
9976 }
9977
9978 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9979 {
9980         /* use static because iter can be a bit big for the stack */
9981         static struct trace_iterator iter;
9982         static atomic_t dump_running;
9983         struct trace_array *tr = &global_trace;
9984         unsigned int old_userobj;
9985         unsigned long flags;
9986         int cnt = 0, cpu;
9987
9988         /* Only allow one dump user at a time. */
9989         if (atomic_inc_return(&dump_running) != 1) {
9990                 atomic_dec(&dump_running);
9991                 return;
9992         }
9993
9994         /*
9995          * Always turn off tracing when we dump.
9996          * We don't need to show trace output of what happens
9997          * between multiple crashes.
9998          *
9999          * If the user does a sysrq-z, then they can re-enable
10000          * tracing with echo 1 > tracing_on.
10001          */
10002         tracing_off();
10003
10004         local_irq_save(flags);
10005
10006         /* Simulate the iterator */
10007         trace_init_global_iter(&iter);
10008
10009         for_each_tracing_cpu(cpu) {
10010                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10011         }
10012
10013         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10014
10015         /* don't look at user memory in panic mode */
10016         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10017
10018         switch (oops_dump_mode) {
10019         case DUMP_ALL:
10020                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10021                 break;
10022         case DUMP_ORIG:
10023                 iter.cpu_file = raw_smp_processor_id();
10024                 break;
10025         case DUMP_NONE:
10026                 goto out_enable;
10027         default:
10028                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10029                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10030         }
10031
10032         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10033
10034         /* Did function tracer already get disabled? */
10035         if (ftrace_is_dead()) {
10036                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10037                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10038         }
10039
10040         /*
10041          * We need to stop all tracing on all CPUS to read
10042          * the next buffer. This is a bit expensive, but is
10043          * not done often. We fill all what we can read,
10044          * and then release the locks again.
10045          */
10046
10047         while (!trace_empty(&iter)) {
10048
10049                 if (!cnt)
10050                         printk(KERN_TRACE "---------------------------------\n");
10051
10052                 cnt++;
10053
10054                 trace_iterator_reset(&iter);
10055                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10056
10057                 if (trace_find_next_entry_inc(&iter) != NULL) {
10058                         int ret;
10059
10060                         ret = print_trace_line(&iter);
10061                         if (ret != TRACE_TYPE_NO_CONSUME)
10062                                 trace_consume(&iter);
10063                 }
10064                 touch_nmi_watchdog();
10065
10066                 trace_printk_seq(&iter.seq);
10067         }
10068
10069         if (!cnt)
10070                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10071         else
10072                 printk(KERN_TRACE "---------------------------------\n");
10073
10074  out_enable:
10075         tr->trace_flags |= old_userobj;
10076
10077         for_each_tracing_cpu(cpu) {
10078                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10079         }
10080         atomic_dec(&dump_running);
10081         local_irq_restore(flags);
10082 }
10083 EXPORT_SYMBOL_GPL(ftrace_dump);
10084
10085 #define WRITE_BUFSIZE  4096
10086
10087 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10088                                 size_t count, loff_t *ppos,
10089                                 int (*createfn)(const char *))
10090 {
10091         char *kbuf, *buf, *tmp;
10092         int ret = 0;
10093         size_t done = 0;
10094         size_t size;
10095
10096         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10097         if (!kbuf)
10098                 return -ENOMEM;
10099
10100         while (done < count) {
10101                 size = count - done;
10102
10103                 if (size >= WRITE_BUFSIZE)
10104                         size = WRITE_BUFSIZE - 1;
10105
10106                 if (copy_from_user(kbuf, buffer + done, size)) {
10107                         ret = -EFAULT;
10108                         goto out;
10109                 }
10110                 kbuf[size] = '\0';
10111                 buf = kbuf;
10112                 do {
10113                         tmp = strchr(buf, '\n');
10114                         if (tmp) {
10115                                 *tmp = '\0';
10116                                 size = tmp - buf + 1;
10117                         } else {
10118                                 size = strlen(buf);
10119                                 if (done + size < count) {
10120                                         if (buf != kbuf)
10121                                                 break;
10122                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10123                                         pr_warn("Line length is too long: Should be less than %d\n",
10124                                                 WRITE_BUFSIZE - 2);
10125                                         ret = -EINVAL;
10126                                         goto out;
10127                                 }
10128                         }
10129                         done += size;
10130
10131                         /* Remove comments */
10132                         tmp = strchr(buf, '#');
10133
10134                         if (tmp)
10135                                 *tmp = '\0';
10136
10137                         ret = createfn(buf);
10138                         if (ret)
10139                                 goto out;
10140                         buf += size;
10141
10142                 } while (done < count);
10143         }
10144         ret = done;
10145
10146 out:
10147         kfree(kbuf);
10148
10149         return ret;
10150 }
10151
10152 __init static int tracer_alloc_buffers(void)
10153 {
10154         int ring_buf_size;
10155         int ret = -ENOMEM;
10156
10157
10158         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10159                 pr_warn("Tracing disabled due to lockdown\n");
10160                 return -EPERM;
10161         }
10162
10163         /*
10164          * Make sure we don't accidentally add more trace options
10165          * than we have bits for.
10166          */
10167         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10168
10169         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10170                 goto out;
10171
10172         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10173                 goto out_free_buffer_mask;
10174
10175         /* Only allocate trace_printk buffers if a trace_printk exists */
10176         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10177                 /* Must be called before global_trace.buffer is allocated */
10178                 trace_printk_init_buffers();
10179
10180         /* To save memory, keep the ring buffer size to its minimum */
10181         if (ring_buffer_expanded)
10182                 ring_buf_size = trace_buf_size;
10183         else
10184                 ring_buf_size = 1;
10185
10186         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10187         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10188
10189         raw_spin_lock_init(&global_trace.start_lock);
10190
10191         /*
10192          * The prepare callbacks allocates some memory for the ring buffer. We
10193          * don't free the buffer if the CPU goes down. If we were to free
10194          * the buffer, then the user would lose any trace that was in the
10195          * buffer. The memory will be removed once the "instance" is removed.
10196          */
10197         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10198                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10199                                       NULL);
10200         if (ret < 0)
10201                 goto out_free_cpumask;
10202         /* Used for event triggers */
10203         ret = -ENOMEM;
10204         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10205         if (!temp_buffer)
10206                 goto out_rm_hp_state;
10207
10208         if (trace_create_savedcmd() < 0)
10209                 goto out_free_temp_buffer;
10210
10211         /* TODO: make the number of buffers hot pluggable with CPUS */
10212         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10213                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10214                 goto out_free_savedcmd;
10215         }
10216
10217         if (global_trace.buffer_disabled)
10218                 tracing_off();
10219
10220         if (trace_boot_clock) {
10221                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10222                 if (ret < 0)
10223                         pr_warn("Trace clock %s not defined, going back to default\n",
10224                                 trace_boot_clock);
10225         }
10226
10227         /*
10228          * register_tracer() might reference current_trace, so it
10229          * needs to be set before we register anything. This is
10230          * just a bootstrap of current_trace anyway.
10231          */
10232         global_trace.current_trace = &nop_trace;
10233
10234         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10235
10236         ftrace_init_global_array_ops(&global_trace);
10237
10238         init_trace_flags_index(&global_trace);
10239
10240         register_tracer(&nop_trace);
10241
10242         /* Function tracing may start here (via kernel command line) */
10243         init_function_trace();
10244
10245         /* All seems OK, enable tracing */
10246         tracing_disabled = 0;
10247
10248         atomic_notifier_chain_register(&panic_notifier_list,
10249                                        &trace_panic_notifier);
10250
10251         register_die_notifier(&trace_die_notifier);
10252
10253         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10254
10255         INIT_LIST_HEAD(&global_trace.systems);
10256         INIT_LIST_HEAD(&global_trace.events);
10257         INIT_LIST_HEAD(&global_trace.hist_vars);
10258         INIT_LIST_HEAD(&global_trace.err_log);
10259         list_add(&global_trace.list, &ftrace_trace_arrays);
10260
10261         apply_trace_boot_options();
10262
10263         register_snapshot_cmd();
10264
10265         test_can_verify();
10266
10267         return 0;
10268
10269 out_free_savedcmd:
10270         free_saved_cmdlines_buffer(savedcmd);
10271 out_free_temp_buffer:
10272         ring_buffer_free(temp_buffer);
10273 out_rm_hp_state:
10274         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10275 out_free_cpumask:
10276         free_cpumask_var(global_trace.tracing_cpumask);
10277 out_free_buffer_mask:
10278         free_cpumask_var(tracing_buffer_mask);
10279 out:
10280         return ret;
10281 }
10282
10283 void __init ftrace_boot_snapshot(void)
10284 {
10285         if (snapshot_at_boot) {
10286                 tracing_snapshot();
10287                 internal_trace_puts("** Boot snapshot taken **\n");
10288         }
10289 }
10290
10291 void __init early_trace_init(void)
10292 {
10293         if (tracepoint_printk) {
10294                 tracepoint_print_iter =
10295                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10296                 if (MEM_FAIL(!tracepoint_print_iter,
10297                              "Failed to allocate trace iterator\n"))
10298                         tracepoint_printk = 0;
10299                 else
10300                         static_key_enable(&tracepoint_printk_key.key);
10301         }
10302         tracer_alloc_buffers();
10303
10304         init_events();
10305 }
10306
10307 void __init trace_init(void)
10308 {
10309         trace_event_init();
10310 }
10311
10312 __init static void clear_boot_tracer(void)
10313 {
10314         /*
10315          * The default tracer at boot buffer is an init section.
10316          * This function is called in lateinit. If we did not
10317          * find the boot tracer, then clear it out, to prevent
10318          * later registration from accessing the buffer that is
10319          * about to be freed.
10320          */
10321         if (!default_bootup_tracer)
10322                 return;
10323
10324         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10325                default_bootup_tracer);
10326         default_bootup_tracer = NULL;
10327 }
10328
10329 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10330 __init static void tracing_set_default_clock(void)
10331 {
10332         /* sched_clock_stable() is determined in late_initcall */
10333         if (!trace_boot_clock && !sched_clock_stable()) {
10334                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10335                         pr_warn("Can not set tracing clock due to lockdown\n");
10336                         return;
10337                 }
10338
10339                 printk(KERN_WARNING
10340                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10341                        "If you want to keep using the local clock, then add:\n"
10342                        "  \"trace_clock=local\"\n"
10343                        "on the kernel command line\n");
10344                 tracing_set_clock(&global_trace, "global");
10345         }
10346 }
10347 #else
10348 static inline void tracing_set_default_clock(void) { }
10349 #endif
10350
10351 __init static int late_trace_init(void)
10352 {
10353         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10354                 static_key_disable(&tracepoint_printk_key.key);
10355                 tracepoint_printk = 0;
10356         }
10357
10358         tracing_set_default_clock();
10359         clear_boot_tracer();
10360         return 0;
10361 }
10362
10363 late_initcall_sync(late_trace_init);