tracing: Add enabling of events to boot instances
[platform/kernel/linux-starfive.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53
54 #include "trace.h"
55 #include "trace_output.h"
56
57 /*
58  * On boot up, the ring buffer is set to the minimum size, so that
59  * we do not waste memory on systems that are not using tracing.
60  */
61 bool ring_buffer_expanded;
62
63 /*
64  * We need to change this state when a selftest is running.
65  * A selftest will lurk into the ring-buffer to count the
66  * entries inserted during the selftest although some concurrent
67  * insertions into the ring-buffer such as trace_printk could occurred
68  * at the same time, giving false positive or negative results.
69  */
70 static bool __read_mostly tracing_selftest_running;
71
72 /*
73  * If boot-time tracing including tracers/events via kernel cmdline
74  * is running, we do not want to run SELFTEST.
75  */
76 bool __read_mostly tracing_selftest_disabled;
77
78 #ifdef CONFIG_FTRACE_STARTUP_TEST
79 void __init disable_tracing_selftest(const char *reason)
80 {
81         if (!tracing_selftest_disabled) {
82                 tracing_selftest_disabled = true;
83                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
84         }
85 }
86 #endif
87
88 /* Pipe tracepoints to printk */
89 static struct trace_iterator *tracepoint_print_iter;
90 int tracepoint_printk;
91 static bool tracepoint_printk_stop_on_boot __initdata;
92 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
93
94 /* For tracers that don't implement custom flags */
95 static struct tracer_opt dummy_tracer_opt[] = {
96         { }
97 };
98
99 static int
100 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
101 {
102         return 0;
103 }
104
105 /*
106  * To prevent the comm cache from being overwritten when no
107  * tracing is active, only save the comm when a trace event
108  * occurred.
109  */
110 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
111
112 /*
113  * Kill all tracing for good (never come back).
114  * It is initialized to 1 but will turn to zero if the initialization
115  * of the tracer is successful. But that is the only place that sets
116  * this back to zero.
117  */
118 static int tracing_disabled = 1;
119
120 cpumask_var_t __read_mostly     tracing_buffer_mask;
121
122 /*
123  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
124  *
125  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
126  * is set, then ftrace_dump is called. This will output the contents
127  * of the ftrace buffers to the console.  This is very useful for
128  * capturing traces that lead to crashes and outputing it to a
129  * serial console.
130  *
131  * It is default off, but you can enable it with either specifying
132  * "ftrace_dump_on_oops" in the kernel command line, or setting
133  * /proc/sys/kernel/ftrace_dump_on_oops
134  * Set 1 if you want to dump buffers of all CPUs
135  * Set 2 if you want to dump the buffer of the CPU that triggered oops
136  */
137
138 enum ftrace_dump_mode ftrace_dump_on_oops;
139
140 /* When set, tracing will stop when a WARN*() is hit */
141 int __disable_trace_on_warning;
142
143 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
144 /* Map of enums to their values, for "eval_map" file */
145 struct trace_eval_map_head {
146         struct module                   *mod;
147         unsigned long                   length;
148 };
149
150 union trace_eval_map_item;
151
152 struct trace_eval_map_tail {
153         /*
154          * "end" is first and points to NULL as it must be different
155          * than "mod" or "eval_string"
156          */
157         union trace_eval_map_item       *next;
158         const char                      *end;   /* points to NULL */
159 };
160
161 static DEFINE_MUTEX(trace_eval_mutex);
162
163 /*
164  * The trace_eval_maps are saved in an array with two extra elements,
165  * one at the beginning, and one at the end. The beginning item contains
166  * the count of the saved maps (head.length), and the module they
167  * belong to if not built in (head.mod). The ending item contains a
168  * pointer to the next array of saved eval_map items.
169  */
170 union trace_eval_map_item {
171         struct trace_eval_map           map;
172         struct trace_eval_map_head      head;
173         struct trace_eval_map_tail      tail;
174 };
175
176 static union trace_eval_map_item *trace_eval_maps;
177 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
178
179 int tracing_set_tracer(struct trace_array *tr, const char *buf);
180 static void ftrace_trace_userstack(struct trace_array *tr,
181                                    struct trace_buffer *buffer,
182                                    unsigned int trace_ctx);
183
184 #define MAX_TRACER_SIZE         100
185 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
186 static char *default_bootup_tracer;
187
188 static bool allocate_snapshot;
189 static bool snapshot_at_boot;
190
191 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
192 static int boot_instance_index;
193
194 static int __init set_cmdline_ftrace(char *str)
195 {
196         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
197         default_bootup_tracer = bootup_tracer_buf;
198         /* We are using ftrace early, expand it */
199         ring_buffer_expanded = true;
200         return 1;
201 }
202 __setup("ftrace=", set_cmdline_ftrace);
203
204 static int __init set_ftrace_dump_on_oops(char *str)
205 {
206         if (*str++ != '=' || !*str || !strcmp("1", str)) {
207                 ftrace_dump_on_oops = DUMP_ALL;
208                 return 1;
209         }
210
211         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
212                 ftrace_dump_on_oops = DUMP_ORIG;
213                 return 1;
214         }
215
216         return 0;
217 }
218 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
219
220 static int __init stop_trace_on_warning(char *str)
221 {
222         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
223                 __disable_trace_on_warning = 1;
224         return 1;
225 }
226 __setup("traceoff_on_warning", stop_trace_on_warning);
227
228 static int __init boot_alloc_snapshot(char *str)
229 {
230         allocate_snapshot = true;
231         /* We also need the main ring buffer expanded */
232         ring_buffer_expanded = true;
233         return 1;
234 }
235 __setup("alloc_snapshot", boot_alloc_snapshot);
236
237
238 static int __init boot_snapshot(char *str)
239 {
240         snapshot_at_boot = true;
241         boot_alloc_snapshot(str);
242         return 1;
243 }
244 __setup("ftrace_boot_snapshot", boot_snapshot);
245
246
247 static int __init boot_instance(char *str)
248 {
249         char *slot = boot_instance_info + boot_instance_index;
250         int left = sizeof(boot_instance_info) - boot_instance_index;
251         int ret;
252
253         if (strlen(str) >= left)
254                 return -1;
255
256         ret = snprintf(slot, left, "%s\t", str);
257         boot_instance_index += ret;
258
259         return 1;
260 }
261 __setup("trace_instance=", boot_instance);
262
263
264 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
265
266 static int __init set_trace_boot_options(char *str)
267 {
268         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
269         return 1;
270 }
271 __setup("trace_options=", set_trace_boot_options);
272
273 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
274 static char *trace_boot_clock __initdata;
275
276 static int __init set_trace_boot_clock(char *str)
277 {
278         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
279         trace_boot_clock = trace_boot_clock_buf;
280         return 1;
281 }
282 __setup("trace_clock=", set_trace_boot_clock);
283
284 static int __init set_tracepoint_printk(char *str)
285 {
286         /* Ignore the "tp_printk_stop_on_boot" param */
287         if (*str == '_')
288                 return 0;
289
290         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
291                 tracepoint_printk = 1;
292         return 1;
293 }
294 __setup("tp_printk", set_tracepoint_printk);
295
296 static int __init set_tracepoint_printk_stop(char *str)
297 {
298         tracepoint_printk_stop_on_boot = true;
299         return 1;
300 }
301 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
302
303 unsigned long long ns2usecs(u64 nsec)
304 {
305         nsec += 500;
306         do_div(nsec, 1000);
307         return nsec;
308 }
309
310 static void
311 trace_process_export(struct trace_export *export,
312                struct ring_buffer_event *event, int flag)
313 {
314         struct trace_entry *entry;
315         unsigned int size = 0;
316
317         if (export->flags & flag) {
318                 entry = ring_buffer_event_data(event);
319                 size = ring_buffer_event_length(event);
320                 export->write(export, entry, size);
321         }
322 }
323
324 static DEFINE_MUTEX(ftrace_export_lock);
325
326 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
327
328 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
329 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
330 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
331
332 static inline void ftrace_exports_enable(struct trace_export *export)
333 {
334         if (export->flags & TRACE_EXPORT_FUNCTION)
335                 static_branch_inc(&trace_function_exports_enabled);
336
337         if (export->flags & TRACE_EXPORT_EVENT)
338                 static_branch_inc(&trace_event_exports_enabled);
339
340         if (export->flags & TRACE_EXPORT_MARKER)
341                 static_branch_inc(&trace_marker_exports_enabled);
342 }
343
344 static inline void ftrace_exports_disable(struct trace_export *export)
345 {
346         if (export->flags & TRACE_EXPORT_FUNCTION)
347                 static_branch_dec(&trace_function_exports_enabled);
348
349         if (export->flags & TRACE_EXPORT_EVENT)
350                 static_branch_dec(&trace_event_exports_enabled);
351
352         if (export->flags & TRACE_EXPORT_MARKER)
353                 static_branch_dec(&trace_marker_exports_enabled);
354 }
355
356 static void ftrace_exports(struct ring_buffer_event *event, int flag)
357 {
358         struct trace_export *export;
359
360         preempt_disable_notrace();
361
362         export = rcu_dereference_raw_check(ftrace_exports_list);
363         while (export) {
364                 trace_process_export(export, event, flag);
365                 export = rcu_dereference_raw_check(export->next);
366         }
367
368         preempt_enable_notrace();
369 }
370
371 static inline void
372 add_trace_export(struct trace_export **list, struct trace_export *export)
373 {
374         rcu_assign_pointer(export->next, *list);
375         /*
376          * We are entering export into the list but another
377          * CPU might be walking that list. We need to make sure
378          * the export->next pointer is valid before another CPU sees
379          * the export pointer included into the list.
380          */
381         rcu_assign_pointer(*list, export);
382 }
383
384 static inline int
385 rm_trace_export(struct trace_export **list, struct trace_export *export)
386 {
387         struct trace_export **p;
388
389         for (p = list; *p != NULL; p = &(*p)->next)
390                 if (*p == export)
391                         break;
392
393         if (*p != export)
394                 return -1;
395
396         rcu_assign_pointer(*p, (*p)->next);
397
398         return 0;
399 }
400
401 static inline void
402 add_ftrace_export(struct trace_export **list, struct trace_export *export)
403 {
404         ftrace_exports_enable(export);
405
406         add_trace_export(list, export);
407 }
408
409 static inline int
410 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
411 {
412         int ret;
413
414         ret = rm_trace_export(list, export);
415         ftrace_exports_disable(export);
416
417         return ret;
418 }
419
420 int register_ftrace_export(struct trace_export *export)
421 {
422         if (WARN_ON_ONCE(!export->write))
423                 return -1;
424
425         mutex_lock(&ftrace_export_lock);
426
427         add_ftrace_export(&ftrace_exports_list, export);
428
429         mutex_unlock(&ftrace_export_lock);
430
431         return 0;
432 }
433 EXPORT_SYMBOL_GPL(register_ftrace_export);
434
435 int unregister_ftrace_export(struct trace_export *export)
436 {
437         int ret;
438
439         mutex_lock(&ftrace_export_lock);
440
441         ret = rm_ftrace_export(&ftrace_exports_list, export);
442
443         mutex_unlock(&ftrace_export_lock);
444
445         return ret;
446 }
447 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
448
449 /* trace_flags holds trace_options default values */
450 #define TRACE_DEFAULT_FLAGS                                             \
451         (FUNCTION_DEFAULT_FLAGS |                                       \
452          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
453          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
454          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
455          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
456          TRACE_ITER_HASH_PTR)
457
458 /* trace_options that are only supported by global_trace */
459 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
460                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
461
462 /* trace_flags that are default zero for instances */
463 #define ZEROED_TRACE_FLAGS \
464         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
465
466 /*
467  * The global_trace is the descriptor that holds the top-level tracing
468  * buffers for the live tracing.
469  */
470 static struct trace_array global_trace = {
471         .trace_flags = TRACE_DEFAULT_FLAGS,
472 };
473
474 LIST_HEAD(ftrace_trace_arrays);
475
476 int trace_array_get(struct trace_array *this_tr)
477 {
478         struct trace_array *tr;
479         int ret = -ENODEV;
480
481         mutex_lock(&trace_types_lock);
482         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
483                 if (tr == this_tr) {
484                         tr->ref++;
485                         ret = 0;
486                         break;
487                 }
488         }
489         mutex_unlock(&trace_types_lock);
490
491         return ret;
492 }
493
494 static void __trace_array_put(struct trace_array *this_tr)
495 {
496         WARN_ON(!this_tr->ref);
497         this_tr->ref--;
498 }
499
500 /**
501  * trace_array_put - Decrement the reference counter for this trace array.
502  * @this_tr : pointer to the trace array
503  *
504  * NOTE: Use this when we no longer need the trace array returned by
505  * trace_array_get_by_name(). This ensures the trace array can be later
506  * destroyed.
507  *
508  */
509 void trace_array_put(struct trace_array *this_tr)
510 {
511         if (!this_tr)
512                 return;
513
514         mutex_lock(&trace_types_lock);
515         __trace_array_put(this_tr);
516         mutex_unlock(&trace_types_lock);
517 }
518 EXPORT_SYMBOL_GPL(trace_array_put);
519
520 int tracing_check_open_get_tr(struct trace_array *tr)
521 {
522         int ret;
523
524         ret = security_locked_down(LOCKDOWN_TRACEFS);
525         if (ret)
526                 return ret;
527
528         if (tracing_disabled)
529                 return -ENODEV;
530
531         if (tr && trace_array_get(tr) < 0)
532                 return -ENODEV;
533
534         return 0;
535 }
536
537 int call_filter_check_discard(struct trace_event_call *call, void *rec,
538                               struct trace_buffer *buffer,
539                               struct ring_buffer_event *event)
540 {
541         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
542             !filter_match_preds(call->filter, rec)) {
543                 __trace_event_discard_commit(buffer, event);
544                 return 1;
545         }
546
547         return 0;
548 }
549
550 /**
551  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
552  * @filtered_pids: The list of pids to check
553  * @search_pid: The PID to find in @filtered_pids
554  *
555  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
556  */
557 bool
558 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
559 {
560         return trace_pid_list_is_set(filtered_pids, search_pid);
561 }
562
563 /**
564  * trace_ignore_this_task - should a task be ignored for tracing
565  * @filtered_pids: The list of pids to check
566  * @filtered_no_pids: The list of pids not to be traced
567  * @task: The task that should be ignored if not filtered
568  *
569  * Checks if @task should be traced or not from @filtered_pids.
570  * Returns true if @task should *NOT* be traced.
571  * Returns false if @task should be traced.
572  */
573 bool
574 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
575                        struct trace_pid_list *filtered_no_pids,
576                        struct task_struct *task)
577 {
578         /*
579          * If filtered_no_pids is not empty, and the task's pid is listed
580          * in filtered_no_pids, then return true.
581          * Otherwise, if filtered_pids is empty, that means we can
582          * trace all tasks. If it has content, then only trace pids
583          * within filtered_pids.
584          */
585
586         return (filtered_pids &&
587                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
588                 (filtered_no_pids &&
589                  trace_find_filtered_pid(filtered_no_pids, task->pid));
590 }
591
592 /**
593  * trace_filter_add_remove_task - Add or remove a task from a pid_list
594  * @pid_list: The list to modify
595  * @self: The current task for fork or NULL for exit
596  * @task: The task to add or remove
597  *
598  * If adding a task, if @self is defined, the task is only added if @self
599  * is also included in @pid_list. This happens on fork and tasks should
600  * only be added when the parent is listed. If @self is NULL, then the
601  * @task pid will be removed from the list, which would happen on exit
602  * of a task.
603  */
604 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
605                                   struct task_struct *self,
606                                   struct task_struct *task)
607 {
608         if (!pid_list)
609                 return;
610
611         /* For forks, we only add if the forking task is listed */
612         if (self) {
613                 if (!trace_find_filtered_pid(pid_list, self->pid))
614                         return;
615         }
616
617         /* "self" is set for forks, and NULL for exits */
618         if (self)
619                 trace_pid_list_set(pid_list, task->pid);
620         else
621                 trace_pid_list_clear(pid_list, task->pid);
622 }
623
624 /**
625  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
626  * @pid_list: The pid list to show
627  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
628  * @pos: The position of the file
629  *
630  * This is used by the seq_file "next" operation to iterate the pids
631  * listed in a trace_pid_list structure.
632  *
633  * Returns the pid+1 as we want to display pid of zero, but NULL would
634  * stop the iteration.
635  */
636 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
637 {
638         long pid = (unsigned long)v;
639         unsigned int next;
640
641         (*pos)++;
642
643         /* pid already is +1 of the actual previous bit */
644         if (trace_pid_list_next(pid_list, pid, &next) < 0)
645                 return NULL;
646
647         pid = next;
648
649         /* Return pid + 1 to allow zero to be represented */
650         return (void *)(pid + 1);
651 }
652
653 /**
654  * trace_pid_start - Used for seq_file to start reading pid lists
655  * @pid_list: The pid list to show
656  * @pos: The position of the file
657  *
658  * This is used by seq_file "start" operation to start the iteration
659  * of listing pids.
660  *
661  * Returns the pid+1 as we want to display pid of zero, but NULL would
662  * stop the iteration.
663  */
664 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
665 {
666         unsigned long pid;
667         unsigned int first;
668         loff_t l = 0;
669
670         if (trace_pid_list_first(pid_list, &first) < 0)
671                 return NULL;
672
673         pid = first;
674
675         /* Return pid + 1 so that zero can be the exit value */
676         for (pid++; pid && l < *pos;
677              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
678                 ;
679         return (void *)pid;
680 }
681
682 /**
683  * trace_pid_show - show the current pid in seq_file processing
684  * @m: The seq_file structure to write into
685  * @v: A void pointer of the pid (+1) value to display
686  *
687  * Can be directly used by seq_file operations to display the current
688  * pid value.
689  */
690 int trace_pid_show(struct seq_file *m, void *v)
691 {
692         unsigned long pid = (unsigned long)v - 1;
693
694         seq_printf(m, "%lu\n", pid);
695         return 0;
696 }
697
698 /* 128 should be much more than enough */
699 #define PID_BUF_SIZE            127
700
701 int trace_pid_write(struct trace_pid_list *filtered_pids,
702                     struct trace_pid_list **new_pid_list,
703                     const char __user *ubuf, size_t cnt)
704 {
705         struct trace_pid_list *pid_list;
706         struct trace_parser parser;
707         unsigned long val;
708         int nr_pids = 0;
709         ssize_t read = 0;
710         ssize_t ret;
711         loff_t pos;
712         pid_t pid;
713
714         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
715                 return -ENOMEM;
716
717         /*
718          * Always recreate a new array. The write is an all or nothing
719          * operation. Always create a new array when adding new pids by
720          * the user. If the operation fails, then the current list is
721          * not modified.
722          */
723         pid_list = trace_pid_list_alloc();
724         if (!pid_list) {
725                 trace_parser_put(&parser);
726                 return -ENOMEM;
727         }
728
729         if (filtered_pids) {
730                 /* copy the current bits to the new max */
731                 ret = trace_pid_list_first(filtered_pids, &pid);
732                 while (!ret) {
733                         trace_pid_list_set(pid_list, pid);
734                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
735                         nr_pids++;
736                 }
737         }
738
739         ret = 0;
740         while (cnt > 0) {
741
742                 pos = 0;
743
744                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
745                 if (ret < 0)
746                         break;
747
748                 read += ret;
749                 ubuf += ret;
750                 cnt -= ret;
751
752                 if (!trace_parser_loaded(&parser))
753                         break;
754
755                 ret = -EINVAL;
756                 if (kstrtoul(parser.buffer, 0, &val))
757                         break;
758
759                 pid = (pid_t)val;
760
761                 if (trace_pid_list_set(pid_list, pid) < 0) {
762                         ret = -1;
763                         break;
764                 }
765                 nr_pids++;
766
767                 trace_parser_clear(&parser);
768                 ret = 0;
769         }
770         trace_parser_put(&parser);
771
772         if (ret < 0) {
773                 trace_pid_list_free(pid_list);
774                 return ret;
775         }
776
777         if (!nr_pids) {
778                 /* Cleared the list of pids */
779                 trace_pid_list_free(pid_list);
780                 pid_list = NULL;
781         }
782
783         *new_pid_list = pid_list;
784
785         return read;
786 }
787
788 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
789 {
790         u64 ts;
791
792         /* Early boot up does not have a buffer yet */
793         if (!buf->buffer)
794                 return trace_clock_local();
795
796         ts = ring_buffer_time_stamp(buf->buffer);
797         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
798
799         return ts;
800 }
801
802 u64 ftrace_now(int cpu)
803 {
804         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
805 }
806
807 /**
808  * tracing_is_enabled - Show if global_trace has been enabled
809  *
810  * Shows if the global trace has been enabled or not. It uses the
811  * mirror flag "buffer_disabled" to be used in fast paths such as for
812  * the irqsoff tracer. But it may be inaccurate due to races. If you
813  * need to know the accurate state, use tracing_is_on() which is a little
814  * slower, but accurate.
815  */
816 int tracing_is_enabled(void)
817 {
818         /*
819          * For quick access (irqsoff uses this in fast path), just
820          * return the mirror variable of the state of the ring buffer.
821          * It's a little racy, but we don't really care.
822          */
823         smp_rmb();
824         return !global_trace.buffer_disabled;
825 }
826
827 /*
828  * trace_buf_size is the size in bytes that is allocated
829  * for a buffer. Note, the number of bytes is always rounded
830  * to page size.
831  *
832  * This number is purposely set to a low number of 16384.
833  * If the dump on oops happens, it will be much appreciated
834  * to not have to wait for all that output. Anyway this can be
835  * boot time and run time configurable.
836  */
837 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
838
839 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
840
841 /* trace_types holds a link list of available tracers. */
842 static struct tracer            *trace_types __read_mostly;
843
844 /*
845  * trace_types_lock is used to protect the trace_types list.
846  */
847 DEFINE_MUTEX(trace_types_lock);
848
849 /*
850  * serialize the access of the ring buffer
851  *
852  * ring buffer serializes readers, but it is low level protection.
853  * The validity of the events (which returns by ring_buffer_peek() ..etc)
854  * are not protected by ring buffer.
855  *
856  * The content of events may become garbage if we allow other process consumes
857  * these events concurrently:
858  *   A) the page of the consumed events may become a normal page
859  *      (not reader page) in ring buffer, and this page will be rewritten
860  *      by events producer.
861  *   B) The page of the consumed events may become a page for splice_read,
862  *      and this page will be returned to system.
863  *
864  * These primitives allow multi process access to different cpu ring buffer
865  * concurrently.
866  *
867  * These primitives don't distinguish read-only and read-consume access.
868  * Multi read-only access are also serialized.
869  */
870
871 #ifdef CONFIG_SMP
872 static DECLARE_RWSEM(all_cpu_access_lock);
873 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
874
875 static inline void trace_access_lock(int cpu)
876 {
877         if (cpu == RING_BUFFER_ALL_CPUS) {
878                 /* gain it for accessing the whole ring buffer. */
879                 down_write(&all_cpu_access_lock);
880         } else {
881                 /* gain it for accessing a cpu ring buffer. */
882
883                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
884                 down_read(&all_cpu_access_lock);
885
886                 /* Secondly block other access to this @cpu ring buffer. */
887                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
888         }
889 }
890
891 static inline void trace_access_unlock(int cpu)
892 {
893         if (cpu == RING_BUFFER_ALL_CPUS) {
894                 up_write(&all_cpu_access_lock);
895         } else {
896                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
897                 up_read(&all_cpu_access_lock);
898         }
899 }
900
901 static inline void trace_access_lock_init(void)
902 {
903         int cpu;
904
905         for_each_possible_cpu(cpu)
906                 mutex_init(&per_cpu(cpu_access_lock, cpu));
907 }
908
909 #else
910
911 static DEFINE_MUTEX(access_lock);
912
913 static inline void trace_access_lock(int cpu)
914 {
915         (void)cpu;
916         mutex_lock(&access_lock);
917 }
918
919 static inline void trace_access_unlock(int cpu)
920 {
921         (void)cpu;
922         mutex_unlock(&access_lock);
923 }
924
925 static inline void trace_access_lock_init(void)
926 {
927 }
928
929 #endif
930
931 #ifdef CONFIG_STACKTRACE
932 static void __ftrace_trace_stack(struct trace_buffer *buffer,
933                                  unsigned int trace_ctx,
934                                  int skip, struct pt_regs *regs);
935 static inline void ftrace_trace_stack(struct trace_array *tr,
936                                       struct trace_buffer *buffer,
937                                       unsigned int trace_ctx,
938                                       int skip, struct pt_regs *regs);
939
940 #else
941 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
942                                         unsigned int trace_ctx,
943                                         int skip, struct pt_regs *regs)
944 {
945 }
946 static inline void ftrace_trace_stack(struct trace_array *tr,
947                                       struct trace_buffer *buffer,
948                                       unsigned long trace_ctx,
949                                       int skip, struct pt_regs *regs)
950 {
951 }
952
953 #endif
954
955 static __always_inline void
956 trace_event_setup(struct ring_buffer_event *event,
957                   int type, unsigned int trace_ctx)
958 {
959         struct trace_entry *ent = ring_buffer_event_data(event);
960
961         tracing_generic_entry_update(ent, type, trace_ctx);
962 }
963
964 static __always_inline struct ring_buffer_event *
965 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
966                           int type,
967                           unsigned long len,
968                           unsigned int trace_ctx)
969 {
970         struct ring_buffer_event *event;
971
972         event = ring_buffer_lock_reserve(buffer, len);
973         if (event != NULL)
974                 trace_event_setup(event, type, trace_ctx);
975
976         return event;
977 }
978
979 void tracer_tracing_on(struct trace_array *tr)
980 {
981         if (tr->array_buffer.buffer)
982                 ring_buffer_record_on(tr->array_buffer.buffer);
983         /*
984          * This flag is looked at when buffers haven't been allocated
985          * yet, or by some tracers (like irqsoff), that just want to
986          * know if the ring buffer has been disabled, but it can handle
987          * races of where it gets disabled but we still do a record.
988          * As the check is in the fast path of the tracers, it is more
989          * important to be fast than accurate.
990          */
991         tr->buffer_disabled = 0;
992         /* Make the flag seen by readers */
993         smp_wmb();
994 }
995
996 /**
997  * tracing_on - enable tracing buffers
998  *
999  * This function enables tracing buffers that may have been
1000  * disabled with tracing_off.
1001  */
1002 void tracing_on(void)
1003 {
1004         tracer_tracing_on(&global_trace);
1005 }
1006 EXPORT_SYMBOL_GPL(tracing_on);
1007
1008
1009 static __always_inline void
1010 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1011 {
1012         __this_cpu_write(trace_taskinfo_save, true);
1013
1014         /* If this is the temp buffer, we need to commit fully */
1015         if (this_cpu_read(trace_buffered_event) == event) {
1016                 /* Length is in event->array[0] */
1017                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1018                 /* Release the temp buffer */
1019                 this_cpu_dec(trace_buffered_event_cnt);
1020                 /* ring_buffer_unlock_commit() enables preemption */
1021                 preempt_enable_notrace();
1022         } else
1023                 ring_buffer_unlock_commit(buffer);
1024 }
1025
1026 /**
1027  * __trace_puts - write a constant string into the trace buffer.
1028  * @ip:    The address of the caller
1029  * @str:   The constant string to write
1030  * @size:  The size of the string.
1031  */
1032 int __trace_puts(unsigned long ip, const char *str, int size)
1033 {
1034         struct ring_buffer_event *event;
1035         struct trace_buffer *buffer;
1036         struct print_entry *entry;
1037         unsigned int trace_ctx;
1038         int alloc;
1039
1040         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1041                 return 0;
1042
1043         if (unlikely(tracing_selftest_running || tracing_disabled))
1044                 return 0;
1045
1046         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1047
1048         trace_ctx = tracing_gen_ctx();
1049         buffer = global_trace.array_buffer.buffer;
1050         ring_buffer_nest_start(buffer);
1051         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1052                                             trace_ctx);
1053         if (!event) {
1054                 size = 0;
1055                 goto out;
1056         }
1057
1058         entry = ring_buffer_event_data(event);
1059         entry->ip = ip;
1060
1061         memcpy(&entry->buf, str, size);
1062
1063         /* Add a newline if necessary */
1064         if (entry->buf[size - 1] != '\n') {
1065                 entry->buf[size] = '\n';
1066                 entry->buf[size + 1] = '\0';
1067         } else
1068                 entry->buf[size] = '\0';
1069
1070         __buffer_unlock_commit(buffer, event);
1071         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1072  out:
1073         ring_buffer_nest_end(buffer);
1074         return size;
1075 }
1076 EXPORT_SYMBOL_GPL(__trace_puts);
1077
1078 /**
1079  * __trace_bputs - write the pointer to a constant string into trace buffer
1080  * @ip:    The address of the caller
1081  * @str:   The constant string to write to the buffer to
1082  */
1083 int __trace_bputs(unsigned long ip, const char *str)
1084 {
1085         struct ring_buffer_event *event;
1086         struct trace_buffer *buffer;
1087         struct bputs_entry *entry;
1088         unsigned int trace_ctx;
1089         int size = sizeof(struct bputs_entry);
1090         int ret = 0;
1091
1092         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1093                 return 0;
1094
1095         if (unlikely(tracing_selftest_running || tracing_disabled))
1096                 return 0;
1097
1098         trace_ctx = tracing_gen_ctx();
1099         buffer = global_trace.array_buffer.buffer;
1100
1101         ring_buffer_nest_start(buffer);
1102         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1103                                             trace_ctx);
1104         if (!event)
1105                 goto out;
1106
1107         entry = ring_buffer_event_data(event);
1108         entry->ip                       = ip;
1109         entry->str                      = str;
1110
1111         __buffer_unlock_commit(buffer, event);
1112         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1113
1114         ret = 1;
1115  out:
1116         ring_buffer_nest_end(buffer);
1117         return ret;
1118 }
1119 EXPORT_SYMBOL_GPL(__trace_bputs);
1120
1121 #ifdef CONFIG_TRACER_SNAPSHOT
1122 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1123                                            void *cond_data)
1124 {
1125         struct tracer *tracer = tr->current_trace;
1126         unsigned long flags;
1127
1128         if (in_nmi()) {
1129                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1130                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1131                 return;
1132         }
1133
1134         if (!tr->allocated_snapshot) {
1135                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1136                 internal_trace_puts("*** stopping trace here!   ***\n");
1137                 tracing_off();
1138                 return;
1139         }
1140
1141         /* Note, snapshot can not be used when the tracer uses it */
1142         if (tracer->use_max_tr) {
1143                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1144                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1145                 return;
1146         }
1147
1148         local_irq_save(flags);
1149         update_max_tr(tr, current, smp_processor_id(), cond_data);
1150         local_irq_restore(flags);
1151 }
1152
1153 void tracing_snapshot_instance(struct trace_array *tr)
1154 {
1155         tracing_snapshot_instance_cond(tr, NULL);
1156 }
1157
1158 /**
1159  * tracing_snapshot - take a snapshot of the current buffer.
1160  *
1161  * This causes a swap between the snapshot buffer and the current live
1162  * tracing buffer. You can use this to take snapshots of the live
1163  * trace when some condition is triggered, but continue to trace.
1164  *
1165  * Note, make sure to allocate the snapshot with either
1166  * a tracing_snapshot_alloc(), or by doing it manually
1167  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1168  *
1169  * If the snapshot buffer is not allocated, it will stop tracing.
1170  * Basically making a permanent snapshot.
1171  */
1172 void tracing_snapshot(void)
1173 {
1174         struct trace_array *tr = &global_trace;
1175
1176         tracing_snapshot_instance(tr);
1177 }
1178 EXPORT_SYMBOL_GPL(tracing_snapshot);
1179
1180 /**
1181  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1182  * @tr:         The tracing instance to snapshot
1183  * @cond_data:  The data to be tested conditionally, and possibly saved
1184  *
1185  * This is the same as tracing_snapshot() except that the snapshot is
1186  * conditional - the snapshot will only happen if the
1187  * cond_snapshot.update() implementation receiving the cond_data
1188  * returns true, which means that the trace array's cond_snapshot
1189  * update() operation used the cond_data to determine whether the
1190  * snapshot should be taken, and if it was, presumably saved it along
1191  * with the snapshot.
1192  */
1193 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1194 {
1195         tracing_snapshot_instance_cond(tr, cond_data);
1196 }
1197 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1198
1199 /**
1200  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1201  * @tr:         The tracing instance
1202  *
1203  * When the user enables a conditional snapshot using
1204  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1205  * with the snapshot.  This accessor is used to retrieve it.
1206  *
1207  * Should not be called from cond_snapshot.update(), since it takes
1208  * the tr->max_lock lock, which the code calling
1209  * cond_snapshot.update() has already done.
1210  *
1211  * Returns the cond_data associated with the trace array's snapshot.
1212  */
1213 void *tracing_cond_snapshot_data(struct trace_array *tr)
1214 {
1215         void *cond_data = NULL;
1216
1217         local_irq_disable();
1218         arch_spin_lock(&tr->max_lock);
1219
1220         if (tr->cond_snapshot)
1221                 cond_data = tr->cond_snapshot->cond_data;
1222
1223         arch_spin_unlock(&tr->max_lock);
1224         local_irq_enable();
1225
1226         return cond_data;
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1229
1230 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1231                                         struct array_buffer *size_buf, int cpu_id);
1232 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1233
1234 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1235 {
1236         int ret;
1237
1238         if (!tr->allocated_snapshot) {
1239
1240                 /* allocate spare buffer */
1241                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1242                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1243                 if (ret < 0)
1244                         return ret;
1245
1246                 tr->allocated_snapshot = true;
1247         }
1248
1249         return 0;
1250 }
1251
1252 static void free_snapshot(struct trace_array *tr)
1253 {
1254         /*
1255          * We don't free the ring buffer. instead, resize it because
1256          * The max_tr ring buffer has some state (e.g. ring->clock) and
1257          * we want preserve it.
1258          */
1259         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1260         set_buffer_entries(&tr->max_buffer, 1);
1261         tracing_reset_online_cpus(&tr->max_buffer);
1262         tr->allocated_snapshot = false;
1263 }
1264
1265 /**
1266  * tracing_alloc_snapshot - allocate snapshot buffer.
1267  *
1268  * This only allocates the snapshot buffer if it isn't already
1269  * allocated - it doesn't also take a snapshot.
1270  *
1271  * This is meant to be used in cases where the snapshot buffer needs
1272  * to be set up for events that can't sleep but need to be able to
1273  * trigger a snapshot.
1274  */
1275 int tracing_alloc_snapshot(void)
1276 {
1277         struct trace_array *tr = &global_trace;
1278         int ret;
1279
1280         ret = tracing_alloc_snapshot_instance(tr);
1281         WARN_ON(ret < 0);
1282
1283         return ret;
1284 }
1285 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1286
1287 /**
1288  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1289  *
1290  * This is similar to tracing_snapshot(), but it will allocate the
1291  * snapshot buffer if it isn't already allocated. Use this only
1292  * where it is safe to sleep, as the allocation may sleep.
1293  *
1294  * This causes a swap between the snapshot buffer and the current live
1295  * tracing buffer. You can use this to take snapshots of the live
1296  * trace when some condition is triggered, but continue to trace.
1297  */
1298 void tracing_snapshot_alloc(void)
1299 {
1300         int ret;
1301
1302         ret = tracing_alloc_snapshot();
1303         if (ret < 0)
1304                 return;
1305
1306         tracing_snapshot();
1307 }
1308 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1309
1310 /**
1311  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1312  * @tr:         The tracing instance
1313  * @cond_data:  User data to associate with the snapshot
1314  * @update:     Implementation of the cond_snapshot update function
1315  *
1316  * Check whether the conditional snapshot for the given instance has
1317  * already been enabled, or if the current tracer is already using a
1318  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1319  * save the cond_data and update function inside.
1320  *
1321  * Returns 0 if successful, error otherwise.
1322  */
1323 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1324                                  cond_update_fn_t update)
1325 {
1326         struct cond_snapshot *cond_snapshot;
1327         int ret = 0;
1328
1329         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1330         if (!cond_snapshot)
1331                 return -ENOMEM;
1332
1333         cond_snapshot->cond_data = cond_data;
1334         cond_snapshot->update = update;
1335
1336         mutex_lock(&trace_types_lock);
1337
1338         ret = tracing_alloc_snapshot_instance(tr);
1339         if (ret)
1340                 goto fail_unlock;
1341
1342         if (tr->current_trace->use_max_tr) {
1343                 ret = -EBUSY;
1344                 goto fail_unlock;
1345         }
1346
1347         /*
1348          * The cond_snapshot can only change to NULL without the
1349          * trace_types_lock. We don't care if we race with it going
1350          * to NULL, but we want to make sure that it's not set to
1351          * something other than NULL when we get here, which we can
1352          * do safely with only holding the trace_types_lock and not
1353          * having to take the max_lock.
1354          */
1355         if (tr->cond_snapshot) {
1356                 ret = -EBUSY;
1357                 goto fail_unlock;
1358         }
1359
1360         local_irq_disable();
1361         arch_spin_lock(&tr->max_lock);
1362         tr->cond_snapshot = cond_snapshot;
1363         arch_spin_unlock(&tr->max_lock);
1364         local_irq_enable();
1365
1366         mutex_unlock(&trace_types_lock);
1367
1368         return ret;
1369
1370  fail_unlock:
1371         mutex_unlock(&trace_types_lock);
1372         kfree(cond_snapshot);
1373         return ret;
1374 }
1375 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1376
1377 /**
1378  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1379  * @tr:         The tracing instance
1380  *
1381  * Check whether the conditional snapshot for the given instance is
1382  * enabled; if so, free the cond_snapshot associated with it,
1383  * otherwise return -EINVAL.
1384  *
1385  * Returns 0 if successful, error otherwise.
1386  */
1387 int tracing_snapshot_cond_disable(struct trace_array *tr)
1388 {
1389         int ret = 0;
1390
1391         local_irq_disable();
1392         arch_spin_lock(&tr->max_lock);
1393
1394         if (!tr->cond_snapshot)
1395                 ret = -EINVAL;
1396         else {
1397                 kfree(tr->cond_snapshot);
1398                 tr->cond_snapshot = NULL;
1399         }
1400
1401         arch_spin_unlock(&tr->max_lock);
1402         local_irq_enable();
1403
1404         return ret;
1405 }
1406 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1407 #else
1408 void tracing_snapshot(void)
1409 {
1410         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1411 }
1412 EXPORT_SYMBOL_GPL(tracing_snapshot);
1413 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1414 {
1415         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1416 }
1417 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1418 int tracing_alloc_snapshot(void)
1419 {
1420         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1421         return -ENODEV;
1422 }
1423 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1424 void tracing_snapshot_alloc(void)
1425 {
1426         /* Give warning */
1427         tracing_snapshot();
1428 }
1429 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1430 void *tracing_cond_snapshot_data(struct trace_array *tr)
1431 {
1432         return NULL;
1433 }
1434 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1435 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1436 {
1437         return -ENODEV;
1438 }
1439 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1440 int tracing_snapshot_cond_disable(struct trace_array *tr)
1441 {
1442         return false;
1443 }
1444 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1445 #define free_snapshot(tr)       do { } while (0)
1446 #endif /* CONFIG_TRACER_SNAPSHOT */
1447
1448 void tracer_tracing_off(struct trace_array *tr)
1449 {
1450         if (tr->array_buffer.buffer)
1451                 ring_buffer_record_off(tr->array_buffer.buffer);
1452         /*
1453          * This flag is looked at when buffers haven't been allocated
1454          * yet, or by some tracers (like irqsoff), that just want to
1455          * know if the ring buffer has been disabled, but it can handle
1456          * races of where it gets disabled but we still do a record.
1457          * As the check is in the fast path of the tracers, it is more
1458          * important to be fast than accurate.
1459          */
1460         tr->buffer_disabled = 1;
1461         /* Make the flag seen by readers */
1462         smp_wmb();
1463 }
1464
1465 /**
1466  * tracing_off - turn off tracing buffers
1467  *
1468  * This function stops the tracing buffers from recording data.
1469  * It does not disable any overhead the tracers themselves may
1470  * be causing. This function simply causes all recording to
1471  * the ring buffers to fail.
1472  */
1473 void tracing_off(void)
1474 {
1475         tracer_tracing_off(&global_trace);
1476 }
1477 EXPORT_SYMBOL_GPL(tracing_off);
1478
1479 void disable_trace_on_warning(void)
1480 {
1481         if (__disable_trace_on_warning) {
1482                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1483                         "Disabling tracing due to warning\n");
1484                 tracing_off();
1485         }
1486 }
1487
1488 /**
1489  * tracer_tracing_is_on - show real state of ring buffer enabled
1490  * @tr : the trace array to know if ring buffer is enabled
1491  *
1492  * Shows real state of the ring buffer if it is enabled or not.
1493  */
1494 bool tracer_tracing_is_on(struct trace_array *tr)
1495 {
1496         if (tr->array_buffer.buffer)
1497                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1498         return !tr->buffer_disabled;
1499 }
1500
1501 /**
1502  * tracing_is_on - show state of ring buffers enabled
1503  */
1504 int tracing_is_on(void)
1505 {
1506         return tracer_tracing_is_on(&global_trace);
1507 }
1508 EXPORT_SYMBOL_GPL(tracing_is_on);
1509
1510 static int __init set_buf_size(char *str)
1511 {
1512         unsigned long buf_size;
1513
1514         if (!str)
1515                 return 0;
1516         buf_size = memparse(str, &str);
1517         /*
1518          * nr_entries can not be zero and the startup
1519          * tests require some buffer space. Therefore
1520          * ensure we have at least 4096 bytes of buffer.
1521          */
1522         trace_buf_size = max(4096UL, buf_size);
1523         return 1;
1524 }
1525 __setup("trace_buf_size=", set_buf_size);
1526
1527 static int __init set_tracing_thresh(char *str)
1528 {
1529         unsigned long threshold;
1530         int ret;
1531
1532         if (!str)
1533                 return 0;
1534         ret = kstrtoul(str, 0, &threshold);
1535         if (ret < 0)
1536                 return 0;
1537         tracing_thresh = threshold * 1000;
1538         return 1;
1539 }
1540 __setup("tracing_thresh=", set_tracing_thresh);
1541
1542 unsigned long nsecs_to_usecs(unsigned long nsecs)
1543 {
1544         return nsecs / 1000;
1545 }
1546
1547 /*
1548  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1549  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1550  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1551  * of strings in the order that the evals (enum) were defined.
1552  */
1553 #undef C
1554 #define C(a, b) b
1555
1556 /* These must match the bit positions in trace_iterator_flags */
1557 static const char *trace_options[] = {
1558         TRACE_FLAGS
1559         NULL
1560 };
1561
1562 static struct {
1563         u64 (*func)(void);
1564         const char *name;
1565         int in_ns;              /* is this clock in nanoseconds? */
1566 } trace_clocks[] = {
1567         { trace_clock_local,            "local",        1 },
1568         { trace_clock_global,           "global",       1 },
1569         { trace_clock_counter,          "counter",      0 },
1570         { trace_clock_jiffies,          "uptime",       0 },
1571         { trace_clock,                  "perf",         1 },
1572         { ktime_get_mono_fast_ns,       "mono",         1 },
1573         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1574         { ktime_get_boot_fast_ns,       "boot",         1 },
1575         { ktime_get_tai_fast_ns,        "tai",          1 },
1576         ARCH_TRACE_CLOCKS
1577 };
1578
1579 bool trace_clock_in_ns(struct trace_array *tr)
1580 {
1581         if (trace_clocks[tr->clock_id].in_ns)
1582                 return true;
1583
1584         return false;
1585 }
1586
1587 /*
1588  * trace_parser_get_init - gets the buffer for trace parser
1589  */
1590 int trace_parser_get_init(struct trace_parser *parser, int size)
1591 {
1592         memset(parser, 0, sizeof(*parser));
1593
1594         parser->buffer = kmalloc(size, GFP_KERNEL);
1595         if (!parser->buffer)
1596                 return 1;
1597
1598         parser->size = size;
1599         return 0;
1600 }
1601
1602 /*
1603  * trace_parser_put - frees the buffer for trace parser
1604  */
1605 void trace_parser_put(struct trace_parser *parser)
1606 {
1607         kfree(parser->buffer);
1608         parser->buffer = NULL;
1609 }
1610
1611 /*
1612  * trace_get_user - reads the user input string separated by  space
1613  * (matched by isspace(ch))
1614  *
1615  * For each string found the 'struct trace_parser' is updated,
1616  * and the function returns.
1617  *
1618  * Returns number of bytes read.
1619  *
1620  * See kernel/trace/trace.h for 'struct trace_parser' details.
1621  */
1622 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1623         size_t cnt, loff_t *ppos)
1624 {
1625         char ch;
1626         size_t read = 0;
1627         ssize_t ret;
1628
1629         if (!*ppos)
1630                 trace_parser_clear(parser);
1631
1632         ret = get_user(ch, ubuf++);
1633         if (ret)
1634                 goto out;
1635
1636         read++;
1637         cnt--;
1638
1639         /*
1640          * The parser is not finished with the last write,
1641          * continue reading the user input without skipping spaces.
1642          */
1643         if (!parser->cont) {
1644                 /* skip white space */
1645                 while (cnt && isspace(ch)) {
1646                         ret = get_user(ch, ubuf++);
1647                         if (ret)
1648                                 goto out;
1649                         read++;
1650                         cnt--;
1651                 }
1652
1653                 parser->idx = 0;
1654
1655                 /* only spaces were written */
1656                 if (isspace(ch) || !ch) {
1657                         *ppos += read;
1658                         ret = read;
1659                         goto out;
1660                 }
1661         }
1662
1663         /* read the non-space input */
1664         while (cnt && !isspace(ch) && ch) {
1665                 if (parser->idx < parser->size - 1)
1666                         parser->buffer[parser->idx++] = ch;
1667                 else {
1668                         ret = -EINVAL;
1669                         goto out;
1670                 }
1671                 ret = get_user(ch, ubuf++);
1672                 if (ret)
1673                         goto out;
1674                 read++;
1675                 cnt--;
1676         }
1677
1678         /* We either got finished input or we have to wait for another call. */
1679         if (isspace(ch) || !ch) {
1680                 parser->buffer[parser->idx] = 0;
1681                 parser->cont = false;
1682         } else if (parser->idx < parser->size - 1) {
1683                 parser->cont = true;
1684                 parser->buffer[parser->idx++] = ch;
1685                 /* Make sure the parsed string always terminates with '\0'. */
1686                 parser->buffer[parser->idx] = 0;
1687         } else {
1688                 ret = -EINVAL;
1689                 goto out;
1690         }
1691
1692         *ppos += read;
1693         ret = read;
1694
1695 out:
1696         return ret;
1697 }
1698
1699 /* TODO add a seq_buf_to_buffer() */
1700 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1701 {
1702         int len;
1703
1704         if (trace_seq_used(s) <= s->seq.readpos)
1705                 return -EBUSY;
1706
1707         len = trace_seq_used(s) - s->seq.readpos;
1708         if (cnt > len)
1709                 cnt = len;
1710         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1711
1712         s->seq.readpos += cnt;
1713         return cnt;
1714 }
1715
1716 unsigned long __read_mostly     tracing_thresh;
1717
1718 #ifdef CONFIG_TRACER_MAX_TRACE
1719 static const struct file_operations tracing_max_lat_fops;
1720
1721 #ifdef LATENCY_FS_NOTIFY
1722
1723 static struct workqueue_struct *fsnotify_wq;
1724
1725 static void latency_fsnotify_workfn(struct work_struct *work)
1726 {
1727         struct trace_array *tr = container_of(work, struct trace_array,
1728                                               fsnotify_work);
1729         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1730 }
1731
1732 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1733 {
1734         struct trace_array *tr = container_of(iwork, struct trace_array,
1735                                               fsnotify_irqwork);
1736         queue_work(fsnotify_wq, &tr->fsnotify_work);
1737 }
1738
1739 static void trace_create_maxlat_file(struct trace_array *tr,
1740                                      struct dentry *d_tracer)
1741 {
1742         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1743         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1744         tr->d_max_latency = trace_create_file("tracing_max_latency",
1745                                               TRACE_MODE_WRITE,
1746                                               d_tracer, &tr->max_latency,
1747                                               &tracing_max_lat_fops);
1748 }
1749
1750 __init static int latency_fsnotify_init(void)
1751 {
1752         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1753                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1754         if (!fsnotify_wq) {
1755                 pr_err("Unable to allocate tr_max_lat_wq\n");
1756                 return -ENOMEM;
1757         }
1758         return 0;
1759 }
1760
1761 late_initcall_sync(latency_fsnotify_init);
1762
1763 void latency_fsnotify(struct trace_array *tr)
1764 {
1765         if (!fsnotify_wq)
1766                 return;
1767         /*
1768          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1769          * possible that we are called from __schedule() or do_idle(), which
1770          * could cause a deadlock.
1771          */
1772         irq_work_queue(&tr->fsnotify_irqwork);
1773 }
1774
1775 #else /* !LATENCY_FS_NOTIFY */
1776
1777 #define trace_create_maxlat_file(tr, d_tracer)                          \
1778         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1779                           d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1780
1781 #endif
1782
1783 /*
1784  * Copy the new maximum trace into the separate maximum-trace
1785  * structure. (this way the maximum trace is permanently saved,
1786  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1787  */
1788 static void
1789 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1790 {
1791         struct array_buffer *trace_buf = &tr->array_buffer;
1792         struct array_buffer *max_buf = &tr->max_buffer;
1793         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1794         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1795
1796         max_buf->cpu = cpu;
1797         max_buf->time_start = data->preempt_timestamp;
1798
1799         max_data->saved_latency = tr->max_latency;
1800         max_data->critical_start = data->critical_start;
1801         max_data->critical_end = data->critical_end;
1802
1803         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1804         max_data->pid = tsk->pid;
1805         /*
1806          * If tsk == current, then use current_uid(), as that does not use
1807          * RCU. The irq tracer can be called out of RCU scope.
1808          */
1809         if (tsk == current)
1810                 max_data->uid = current_uid();
1811         else
1812                 max_data->uid = task_uid(tsk);
1813
1814         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1815         max_data->policy = tsk->policy;
1816         max_data->rt_priority = tsk->rt_priority;
1817
1818         /* record this tasks comm */
1819         tracing_record_cmdline(tsk);
1820         latency_fsnotify(tr);
1821 }
1822
1823 /**
1824  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1825  * @tr: tracer
1826  * @tsk: the task with the latency
1827  * @cpu: The cpu that initiated the trace.
1828  * @cond_data: User data associated with a conditional snapshot
1829  *
1830  * Flip the buffers between the @tr and the max_tr and record information
1831  * about which task was the cause of this latency.
1832  */
1833 void
1834 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1835               void *cond_data)
1836 {
1837         if (tr->stop_count)
1838                 return;
1839
1840         WARN_ON_ONCE(!irqs_disabled());
1841
1842         if (!tr->allocated_snapshot) {
1843                 /* Only the nop tracer should hit this when disabling */
1844                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1845                 return;
1846         }
1847
1848         arch_spin_lock(&tr->max_lock);
1849
1850         /* Inherit the recordable setting from array_buffer */
1851         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1852                 ring_buffer_record_on(tr->max_buffer.buffer);
1853         else
1854                 ring_buffer_record_off(tr->max_buffer.buffer);
1855
1856 #ifdef CONFIG_TRACER_SNAPSHOT
1857         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1858                 arch_spin_unlock(&tr->max_lock);
1859                 return;
1860         }
1861 #endif
1862         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1863
1864         __update_max_tr(tr, tsk, cpu);
1865
1866         arch_spin_unlock(&tr->max_lock);
1867 }
1868
1869 /**
1870  * update_max_tr_single - only copy one trace over, and reset the rest
1871  * @tr: tracer
1872  * @tsk: task with the latency
1873  * @cpu: the cpu of the buffer to copy.
1874  *
1875  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1876  */
1877 void
1878 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1879 {
1880         int ret;
1881
1882         if (tr->stop_count)
1883                 return;
1884
1885         WARN_ON_ONCE(!irqs_disabled());
1886         if (!tr->allocated_snapshot) {
1887                 /* Only the nop tracer should hit this when disabling */
1888                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1889                 return;
1890         }
1891
1892         arch_spin_lock(&tr->max_lock);
1893
1894         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1895
1896         if (ret == -EBUSY) {
1897                 /*
1898                  * We failed to swap the buffer due to a commit taking
1899                  * place on this CPU. We fail to record, but we reset
1900                  * the max trace buffer (no one writes directly to it)
1901                  * and flag that it failed.
1902                  */
1903                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1904                         "Failed to swap buffers due to commit in progress\n");
1905         }
1906
1907         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1908
1909         __update_max_tr(tr, tsk, cpu);
1910         arch_spin_unlock(&tr->max_lock);
1911 }
1912
1913 #endif /* CONFIG_TRACER_MAX_TRACE */
1914
1915 static int wait_on_pipe(struct trace_iterator *iter, int full)
1916 {
1917         /* Iterators are static, they should be filled or empty */
1918         if (trace_buffer_iter(iter, iter->cpu_file))
1919                 return 0;
1920
1921         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1922                                 full);
1923 }
1924
1925 #ifdef CONFIG_FTRACE_STARTUP_TEST
1926 static bool selftests_can_run;
1927
1928 struct trace_selftests {
1929         struct list_head                list;
1930         struct tracer                   *type;
1931 };
1932
1933 static LIST_HEAD(postponed_selftests);
1934
1935 static int save_selftest(struct tracer *type)
1936 {
1937         struct trace_selftests *selftest;
1938
1939         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1940         if (!selftest)
1941                 return -ENOMEM;
1942
1943         selftest->type = type;
1944         list_add(&selftest->list, &postponed_selftests);
1945         return 0;
1946 }
1947
1948 static int run_tracer_selftest(struct tracer *type)
1949 {
1950         struct trace_array *tr = &global_trace;
1951         struct tracer *saved_tracer = tr->current_trace;
1952         int ret;
1953
1954         if (!type->selftest || tracing_selftest_disabled)
1955                 return 0;
1956
1957         /*
1958          * If a tracer registers early in boot up (before scheduling is
1959          * initialized and such), then do not run its selftests yet.
1960          * Instead, run it a little later in the boot process.
1961          */
1962         if (!selftests_can_run)
1963                 return save_selftest(type);
1964
1965         if (!tracing_is_on()) {
1966                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1967                         type->name);
1968                 return 0;
1969         }
1970
1971         /*
1972          * Run a selftest on this tracer.
1973          * Here we reset the trace buffer, and set the current
1974          * tracer to be this tracer. The tracer can then run some
1975          * internal tracing to verify that everything is in order.
1976          * If we fail, we do not register this tracer.
1977          */
1978         tracing_reset_online_cpus(&tr->array_buffer);
1979
1980         tr->current_trace = type;
1981
1982 #ifdef CONFIG_TRACER_MAX_TRACE
1983         if (type->use_max_tr) {
1984                 /* If we expanded the buffers, make sure the max is expanded too */
1985                 if (ring_buffer_expanded)
1986                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1987                                            RING_BUFFER_ALL_CPUS);
1988                 tr->allocated_snapshot = true;
1989         }
1990 #endif
1991
1992         /* the test is responsible for initializing and enabling */
1993         pr_info("Testing tracer %s: ", type->name);
1994         ret = type->selftest(type, tr);
1995         /* the test is responsible for resetting too */
1996         tr->current_trace = saved_tracer;
1997         if (ret) {
1998                 printk(KERN_CONT "FAILED!\n");
1999                 /* Add the warning after printing 'FAILED' */
2000                 WARN_ON(1);
2001                 return -1;
2002         }
2003         /* Only reset on passing, to avoid touching corrupted buffers */
2004         tracing_reset_online_cpus(&tr->array_buffer);
2005
2006 #ifdef CONFIG_TRACER_MAX_TRACE
2007         if (type->use_max_tr) {
2008                 tr->allocated_snapshot = false;
2009
2010                 /* Shrink the max buffer again */
2011                 if (ring_buffer_expanded)
2012                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2013                                            RING_BUFFER_ALL_CPUS);
2014         }
2015 #endif
2016
2017         printk(KERN_CONT "PASSED\n");
2018         return 0;
2019 }
2020
2021 static __init int init_trace_selftests(void)
2022 {
2023         struct trace_selftests *p, *n;
2024         struct tracer *t, **last;
2025         int ret;
2026
2027         selftests_can_run = true;
2028
2029         mutex_lock(&trace_types_lock);
2030
2031         if (list_empty(&postponed_selftests))
2032                 goto out;
2033
2034         pr_info("Running postponed tracer tests:\n");
2035
2036         tracing_selftest_running = true;
2037         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2038                 /* This loop can take minutes when sanitizers are enabled, so
2039                  * lets make sure we allow RCU processing.
2040                  */
2041                 cond_resched();
2042                 ret = run_tracer_selftest(p->type);
2043                 /* If the test fails, then warn and remove from available_tracers */
2044                 if (ret < 0) {
2045                         WARN(1, "tracer: %s failed selftest, disabling\n",
2046                              p->type->name);
2047                         last = &trace_types;
2048                         for (t = trace_types; t; t = t->next) {
2049                                 if (t == p->type) {
2050                                         *last = t->next;
2051                                         break;
2052                                 }
2053                                 last = &t->next;
2054                         }
2055                 }
2056                 list_del(&p->list);
2057                 kfree(p);
2058         }
2059         tracing_selftest_running = false;
2060
2061  out:
2062         mutex_unlock(&trace_types_lock);
2063
2064         return 0;
2065 }
2066 core_initcall(init_trace_selftests);
2067 #else
2068 static inline int run_tracer_selftest(struct tracer *type)
2069 {
2070         return 0;
2071 }
2072 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2073
2074 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2075
2076 static void __init apply_trace_boot_options(void);
2077
2078 /**
2079  * register_tracer - register a tracer with the ftrace system.
2080  * @type: the plugin for the tracer
2081  *
2082  * Register a new plugin tracer.
2083  */
2084 int __init register_tracer(struct tracer *type)
2085 {
2086         struct tracer *t;
2087         int ret = 0;
2088
2089         if (!type->name) {
2090                 pr_info("Tracer must have a name\n");
2091                 return -1;
2092         }
2093
2094         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2095                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2096                 return -1;
2097         }
2098
2099         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2100                 pr_warn("Can not register tracer %s due to lockdown\n",
2101                            type->name);
2102                 return -EPERM;
2103         }
2104
2105         mutex_lock(&trace_types_lock);
2106
2107         tracing_selftest_running = true;
2108
2109         for (t = trace_types; t; t = t->next) {
2110                 if (strcmp(type->name, t->name) == 0) {
2111                         /* already found */
2112                         pr_info("Tracer %s already registered\n",
2113                                 type->name);
2114                         ret = -1;
2115                         goto out;
2116                 }
2117         }
2118
2119         if (!type->set_flag)
2120                 type->set_flag = &dummy_set_flag;
2121         if (!type->flags) {
2122                 /*allocate a dummy tracer_flags*/
2123                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2124                 if (!type->flags) {
2125                         ret = -ENOMEM;
2126                         goto out;
2127                 }
2128                 type->flags->val = 0;
2129                 type->flags->opts = dummy_tracer_opt;
2130         } else
2131                 if (!type->flags->opts)
2132                         type->flags->opts = dummy_tracer_opt;
2133
2134         /* store the tracer for __set_tracer_option */
2135         type->flags->trace = type;
2136
2137         ret = run_tracer_selftest(type);
2138         if (ret < 0)
2139                 goto out;
2140
2141         type->next = trace_types;
2142         trace_types = type;
2143         add_tracer_options(&global_trace, type);
2144
2145  out:
2146         tracing_selftest_running = false;
2147         mutex_unlock(&trace_types_lock);
2148
2149         if (ret || !default_bootup_tracer)
2150                 goto out_unlock;
2151
2152         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2153                 goto out_unlock;
2154
2155         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2156         /* Do we want this tracer to start on bootup? */
2157         tracing_set_tracer(&global_trace, type->name);
2158         default_bootup_tracer = NULL;
2159
2160         apply_trace_boot_options();
2161
2162         /* disable other selftests, since this will break it. */
2163         disable_tracing_selftest("running a tracer");
2164
2165  out_unlock:
2166         return ret;
2167 }
2168
2169 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2170 {
2171         struct trace_buffer *buffer = buf->buffer;
2172
2173         if (!buffer)
2174                 return;
2175
2176         ring_buffer_record_disable(buffer);
2177
2178         /* Make sure all commits have finished */
2179         synchronize_rcu();
2180         ring_buffer_reset_cpu(buffer, cpu);
2181
2182         ring_buffer_record_enable(buffer);
2183 }
2184
2185 void tracing_reset_online_cpus(struct array_buffer *buf)
2186 {
2187         struct trace_buffer *buffer = buf->buffer;
2188
2189         if (!buffer)
2190                 return;
2191
2192         ring_buffer_record_disable(buffer);
2193
2194         /* Make sure all commits have finished */
2195         synchronize_rcu();
2196
2197         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2198
2199         ring_buffer_reset_online_cpus(buffer);
2200
2201         ring_buffer_record_enable(buffer);
2202 }
2203
2204 /* Must have trace_types_lock held */
2205 void tracing_reset_all_online_cpus_unlocked(void)
2206 {
2207         struct trace_array *tr;
2208
2209         lockdep_assert_held(&trace_types_lock);
2210
2211         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2212                 if (!tr->clear_trace)
2213                         continue;
2214                 tr->clear_trace = false;
2215                 tracing_reset_online_cpus(&tr->array_buffer);
2216 #ifdef CONFIG_TRACER_MAX_TRACE
2217                 tracing_reset_online_cpus(&tr->max_buffer);
2218 #endif
2219         }
2220 }
2221
2222 void tracing_reset_all_online_cpus(void)
2223 {
2224         mutex_lock(&trace_types_lock);
2225         tracing_reset_all_online_cpus_unlocked();
2226         mutex_unlock(&trace_types_lock);
2227 }
2228
2229 /*
2230  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2231  * is the tgid last observed corresponding to pid=i.
2232  */
2233 static int *tgid_map;
2234
2235 /* The maximum valid index into tgid_map. */
2236 static size_t tgid_map_max;
2237
2238 #define SAVED_CMDLINES_DEFAULT 128
2239 #define NO_CMDLINE_MAP UINT_MAX
2240 /*
2241  * Preemption must be disabled before acquiring trace_cmdline_lock.
2242  * The various trace_arrays' max_lock must be acquired in a context
2243  * where interrupt is disabled.
2244  */
2245 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2246 struct saved_cmdlines_buffer {
2247         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2248         unsigned *map_cmdline_to_pid;
2249         unsigned cmdline_num;
2250         int cmdline_idx;
2251         char *saved_cmdlines;
2252 };
2253 static struct saved_cmdlines_buffer *savedcmd;
2254
2255 static inline char *get_saved_cmdlines(int idx)
2256 {
2257         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2258 }
2259
2260 static inline void set_cmdline(int idx, const char *cmdline)
2261 {
2262         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2263 }
2264
2265 static int allocate_cmdlines_buffer(unsigned int val,
2266                                     struct saved_cmdlines_buffer *s)
2267 {
2268         s->map_cmdline_to_pid = kmalloc_array(val,
2269                                               sizeof(*s->map_cmdline_to_pid),
2270                                               GFP_KERNEL);
2271         if (!s->map_cmdline_to_pid)
2272                 return -ENOMEM;
2273
2274         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2275         if (!s->saved_cmdlines) {
2276                 kfree(s->map_cmdline_to_pid);
2277                 return -ENOMEM;
2278         }
2279
2280         s->cmdline_idx = 0;
2281         s->cmdline_num = val;
2282         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2283                sizeof(s->map_pid_to_cmdline));
2284         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2285                val * sizeof(*s->map_cmdline_to_pid));
2286
2287         return 0;
2288 }
2289
2290 static int trace_create_savedcmd(void)
2291 {
2292         int ret;
2293
2294         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2295         if (!savedcmd)
2296                 return -ENOMEM;
2297
2298         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2299         if (ret < 0) {
2300                 kfree(savedcmd);
2301                 savedcmd = NULL;
2302                 return -ENOMEM;
2303         }
2304
2305         return 0;
2306 }
2307
2308 int is_tracing_stopped(void)
2309 {
2310         return global_trace.stop_count;
2311 }
2312
2313 /**
2314  * tracing_start - quick start of the tracer
2315  *
2316  * If tracing is enabled but was stopped by tracing_stop,
2317  * this will start the tracer back up.
2318  */
2319 void tracing_start(void)
2320 {
2321         struct trace_buffer *buffer;
2322         unsigned long flags;
2323
2324         if (tracing_disabled)
2325                 return;
2326
2327         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2328         if (--global_trace.stop_count) {
2329                 if (global_trace.stop_count < 0) {
2330                         /* Someone screwed up their debugging */
2331                         WARN_ON_ONCE(1);
2332                         global_trace.stop_count = 0;
2333                 }
2334                 goto out;
2335         }
2336
2337         /* Prevent the buffers from switching */
2338         arch_spin_lock(&global_trace.max_lock);
2339
2340         buffer = global_trace.array_buffer.buffer;
2341         if (buffer)
2342                 ring_buffer_record_enable(buffer);
2343
2344 #ifdef CONFIG_TRACER_MAX_TRACE
2345         buffer = global_trace.max_buffer.buffer;
2346         if (buffer)
2347                 ring_buffer_record_enable(buffer);
2348 #endif
2349
2350         arch_spin_unlock(&global_trace.max_lock);
2351
2352  out:
2353         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2354 }
2355
2356 static void tracing_start_tr(struct trace_array *tr)
2357 {
2358         struct trace_buffer *buffer;
2359         unsigned long flags;
2360
2361         if (tracing_disabled)
2362                 return;
2363
2364         /* If global, we need to also start the max tracer */
2365         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2366                 return tracing_start();
2367
2368         raw_spin_lock_irqsave(&tr->start_lock, flags);
2369
2370         if (--tr->stop_count) {
2371                 if (tr->stop_count < 0) {
2372                         /* Someone screwed up their debugging */
2373                         WARN_ON_ONCE(1);
2374                         tr->stop_count = 0;
2375                 }
2376                 goto out;
2377         }
2378
2379         buffer = tr->array_buffer.buffer;
2380         if (buffer)
2381                 ring_buffer_record_enable(buffer);
2382
2383  out:
2384         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2385 }
2386
2387 /**
2388  * tracing_stop - quick stop of the tracer
2389  *
2390  * Light weight way to stop tracing. Use in conjunction with
2391  * tracing_start.
2392  */
2393 void tracing_stop(void)
2394 {
2395         struct trace_buffer *buffer;
2396         unsigned long flags;
2397
2398         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2399         if (global_trace.stop_count++)
2400                 goto out;
2401
2402         /* Prevent the buffers from switching */
2403         arch_spin_lock(&global_trace.max_lock);
2404
2405         buffer = global_trace.array_buffer.buffer;
2406         if (buffer)
2407                 ring_buffer_record_disable(buffer);
2408
2409 #ifdef CONFIG_TRACER_MAX_TRACE
2410         buffer = global_trace.max_buffer.buffer;
2411         if (buffer)
2412                 ring_buffer_record_disable(buffer);
2413 #endif
2414
2415         arch_spin_unlock(&global_trace.max_lock);
2416
2417  out:
2418         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2419 }
2420
2421 static void tracing_stop_tr(struct trace_array *tr)
2422 {
2423         struct trace_buffer *buffer;
2424         unsigned long flags;
2425
2426         /* If global, we need to also stop the max tracer */
2427         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2428                 return tracing_stop();
2429
2430         raw_spin_lock_irqsave(&tr->start_lock, flags);
2431         if (tr->stop_count++)
2432                 goto out;
2433
2434         buffer = tr->array_buffer.buffer;
2435         if (buffer)
2436                 ring_buffer_record_disable(buffer);
2437
2438  out:
2439         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2440 }
2441
2442 static int trace_save_cmdline(struct task_struct *tsk)
2443 {
2444         unsigned tpid, idx;
2445
2446         /* treat recording of idle task as a success */
2447         if (!tsk->pid)
2448                 return 1;
2449
2450         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2451
2452         /*
2453          * It's not the end of the world if we don't get
2454          * the lock, but we also don't want to spin
2455          * nor do we want to disable interrupts,
2456          * so if we miss here, then better luck next time.
2457          *
2458          * This is called within the scheduler and wake up, so interrupts
2459          * had better been disabled and run queue lock been held.
2460          */
2461         lockdep_assert_preemption_disabled();
2462         if (!arch_spin_trylock(&trace_cmdline_lock))
2463                 return 0;
2464
2465         idx = savedcmd->map_pid_to_cmdline[tpid];
2466         if (idx == NO_CMDLINE_MAP) {
2467                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2468
2469                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2470                 savedcmd->cmdline_idx = idx;
2471         }
2472
2473         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2474         set_cmdline(idx, tsk->comm);
2475
2476         arch_spin_unlock(&trace_cmdline_lock);
2477
2478         return 1;
2479 }
2480
2481 static void __trace_find_cmdline(int pid, char comm[])
2482 {
2483         unsigned map;
2484         int tpid;
2485
2486         if (!pid) {
2487                 strcpy(comm, "<idle>");
2488                 return;
2489         }
2490
2491         if (WARN_ON_ONCE(pid < 0)) {
2492                 strcpy(comm, "<XXX>");
2493                 return;
2494         }
2495
2496         tpid = pid & (PID_MAX_DEFAULT - 1);
2497         map = savedcmd->map_pid_to_cmdline[tpid];
2498         if (map != NO_CMDLINE_MAP) {
2499                 tpid = savedcmd->map_cmdline_to_pid[map];
2500                 if (tpid == pid) {
2501                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2502                         return;
2503                 }
2504         }
2505         strcpy(comm, "<...>");
2506 }
2507
2508 void trace_find_cmdline(int pid, char comm[])
2509 {
2510         preempt_disable();
2511         arch_spin_lock(&trace_cmdline_lock);
2512
2513         __trace_find_cmdline(pid, comm);
2514
2515         arch_spin_unlock(&trace_cmdline_lock);
2516         preempt_enable();
2517 }
2518
2519 static int *trace_find_tgid_ptr(int pid)
2520 {
2521         /*
2522          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2523          * if we observe a non-NULL tgid_map then we also observe the correct
2524          * tgid_map_max.
2525          */
2526         int *map = smp_load_acquire(&tgid_map);
2527
2528         if (unlikely(!map || pid > tgid_map_max))
2529                 return NULL;
2530
2531         return &map[pid];
2532 }
2533
2534 int trace_find_tgid(int pid)
2535 {
2536         int *ptr = trace_find_tgid_ptr(pid);
2537
2538         return ptr ? *ptr : 0;
2539 }
2540
2541 static int trace_save_tgid(struct task_struct *tsk)
2542 {
2543         int *ptr;
2544
2545         /* treat recording of idle task as a success */
2546         if (!tsk->pid)
2547                 return 1;
2548
2549         ptr = trace_find_tgid_ptr(tsk->pid);
2550         if (!ptr)
2551                 return 0;
2552
2553         *ptr = tsk->tgid;
2554         return 1;
2555 }
2556
2557 static bool tracing_record_taskinfo_skip(int flags)
2558 {
2559         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2560                 return true;
2561         if (!__this_cpu_read(trace_taskinfo_save))
2562                 return true;
2563         return false;
2564 }
2565
2566 /**
2567  * tracing_record_taskinfo - record the task info of a task
2568  *
2569  * @task:  task to record
2570  * @flags: TRACE_RECORD_CMDLINE for recording comm
2571  *         TRACE_RECORD_TGID for recording tgid
2572  */
2573 void tracing_record_taskinfo(struct task_struct *task, int flags)
2574 {
2575         bool done;
2576
2577         if (tracing_record_taskinfo_skip(flags))
2578                 return;
2579
2580         /*
2581          * Record as much task information as possible. If some fail, continue
2582          * to try to record the others.
2583          */
2584         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2585         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2586
2587         /* If recording any information failed, retry again soon. */
2588         if (!done)
2589                 return;
2590
2591         __this_cpu_write(trace_taskinfo_save, false);
2592 }
2593
2594 /**
2595  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2596  *
2597  * @prev: previous task during sched_switch
2598  * @next: next task during sched_switch
2599  * @flags: TRACE_RECORD_CMDLINE for recording comm
2600  *         TRACE_RECORD_TGID for recording tgid
2601  */
2602 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2603                                           struct task_struct *next, int flags)
2604 {
2605         bool done;
2606
2607         if (tracing_record_taskinfo_skip(flags))
2608                 return;
2609
2610         /*
2611          * Record as much task information as possible. If some fail, continue
2612          * to try to record the others.
2613          */
2614         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2615         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2616         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2617         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2618
2619         /* If recording any information failed, retry again soon. */
2620         if (!done)
2621                 return;
2622
2623         __this_cpu_write(trace_taskinfo_save, false);
2624 }
2625
2626 /* Helpers to record a specific task information */
2627 void tracing_record_cmdline(struct task_struct *task)
2628 {
2629         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2630 }
2631
2632 void tracing_record_tgid(struct task_struct *task)
2633 {
2634         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2635 }
2636
2637 /*
2638  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2639  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2640  * simplifies those functions and keeps them in sync.
2641  */
2642 enum print_line_t trace_handle_return(struct trace_seq *s)
2643 {
2644         return trace_seq_has_overflowed(s) ?
2645                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2646 }
2647 EXPORT_SYMBOL_GPL(trace_handle_return);
2648
2649 static unsigned short migration_disable_value(void)
2650 {
2651 #if defined(CONFIG_SMP)
2652         return current->migration_disabled;
2653 #else
2654         return 0;
2655 #endif
2656 }
2657
2658 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2659 {
2660         unsigned int trace_flags = irqs_status;
2661         unsigned int pc;
2662
2663         pc = preempt_count();
2664
2665         if (pc & NMI_MASK)
2666                 trace_flags |= TRACE_FLAG_NMI;
2667         if (pc & HARDIRQ_MASK)
2668                 trace_flags |= TRACE_FLAG_HARDIRQ;
2669         if (in_serving_softirq())
2670                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2671         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2672                 trace_flags |= TRACE_FLAG_BH_OFF;
2673
2674         if (tif_need_resched())
2675                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2676         if (test_preempt_need_resched())
2677                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2678         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2679                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2680 }
2681
2682 struct ring_buffer_event *
2683 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2684                           int type,
2685                           unsigned long len,
2686                           unsigned int trace_ctx)
2687 {
2688         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2689 }
2690
2691 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2692 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2693 static int trace_buffered_event_ref;
2694
2695 /**
2696  * trace_buffered_event_enable - enable buffering events
2697  *
2698  * When events are being filtered, it is quicker to use a temporary
2699  * buffer to write the event data into if there's a likely chance
2700  * that it will not be committed. The discard of the ring buffer
2701  * is not as fast as committing, and is much slower than copying
2702  * a commit.
2703  *
2704  * When an event is to be filtered, allocate per cpu buffers to
2705  * write the event data into, and if the event is filtered and discarded
2706  * it is simply dropped, otherwise, the entire data is to be committed
2707  * in one shot.
2708  */
2709 void trace_buffered_event_enable(void)
2710 {
2711         struct ring_buffer_event *event;
2712         struct page *page;
2713         int cpu;
2714
2715         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2716
2717         if (trace_buffered_event_ref++)
2718                 return;
2719
2720         for_each_tracing_cpu(cpu) {
2721                 page = alloc_pages_node(cpu_to_node(cpu),
2722                                         GFP_KERNEL | __GFP_NORETRY, 0);
2723                 if (!page)
2724                         goto failed;
2725
2726                 event = page_address(page);
2727                 memset(event, 0, sizeof(*event));
2728
2729                 per_cpu(trace_buffered_event, cpu) = event;
2730
2731                 preempt_disable();
2732                 if (cpu == smp_processor_id() &&
2733                     __this_cpu_read(trace_buffered_event) !=
2734                     per_cpu(trace_buffered_event, cpu))
2735                         WARN_ON_ONCE(1);
2736                 preempt_enable();
2737         }
2738
2739         return;
2740  failed:
2741         trace_buffered_event_disable();
2742 }
2743
2744 static void enable_trace_buffered_event(void *data)
2745 {
2746         /* Probably not needed, but do it anyway */
2747         smp_rmb();
2748         this_cpu_dec(trace_buffered_event_cnt);
2749 }
2750
2751 static void disable_trace_buffered_event(void *data)
2752 {
2753         this_cpu_inc(trace_buffered_event_cnt);
2754 }
2755
2756 /**
2757  * trace_buffered_event_disable - disable buffering events
2758  *
2759  * When a filter is removed, it is faster to not use the buffered
2760  * events, and to commit directly into the ring buffer. Free up
2761  * the temp buffers when there are no more users. This requires
2762  * special synchronization with current events.
2763  */
2764 void trace_buffered_event_disable(void)
2765 {
2766         int cpu;
2767
2768         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2769
2770         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2771                 return;
2772
2773         if (--trace_buffered_event_ref)
2774                 return;
2775
2776         preempt_disable();
2777         /* For each CPU, set the buffer as used. */
2778         smp_call_function_many(tracing_buffer_mask,
2779                                disable_trace_buffered_event, NULL, 1);
2780         preempt_enable();
2781
2782         /* Wait for all current users to finish */
2783         synchronize_rcu();
2784
2785         for_each_tracing_cpu(cpu) {
2786                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2787                 per_cpu(trace_buffered_event, cpu) = NULL;
2788         }
2789         /*
2790          * Make sure trace_buffered_event is NULL before clearing
2791          * trace_buffered_event_cnt.
2792          */
2793         smp_wmb();
2794
2795         preempt_disable();
2796         /* Do the work on each cpu */
2797         smp_call_function_many(tracing_buffer_mask,
2798                                enable_trace_buffered_event, NULL, 1);
2799         preempt_enable();
2800 }
2801
2802 static struct trace_buffer *temp_buffer;
2803
2804 struct ring_buffer_event *
2805 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2806                           struct trace_event_file *trace_file,
2807                           int type, unsigned long len,
2808                           unsigned int trace_ctx)
2809 {
2810         struct ring_buffer_event *entry;
2811         struct trace_array *tr = trace_file->tr;
2812         int val;
2813
2814         *current_rb = tr->array_buffer.buffer;
2815
2816         if (!tr->no_filter_buffering_ref &&
2817             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2818                 preempt_disable_notrace();
2819                 /*
2820                  * Filtering is on, so try to use the per cpu buffer first.
2821                  * This buffer will simulate a ring_buffer_event,
2822                  * where the type_len is zero and the array[0] will
2823                  * hold the full length.
2824                  * (see include/linux/ring-buffer.h for details on
2825                  *  how the ring_buffer_event is structured).
2826                  *
2827                  * Using a temp buffer during filtering and copying it
2828                  * on a matched filter is quicker than writing directly
2829                  * into the ring buffer and then discarding it when
2830                  * it doesn't match. That is because the discard
2831                  * requires several atomic operations to get right.
2832                  * Copying on match and doing nothing on a failed match
2833                  * is still quicker than no copy on match, but having
2834                  * to discard out of the ring buffer on a failed match.
2835                  */
2836                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2837                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2838
2839                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2840
2841                         /*
2842                          * Preemption is disabled, but interrupts and NMIs
2843                          * can still come in now. If that happens after
2844                          * the above increment, then it will have to go
2845                          * back to the old method of allocating the event
2846                          * on the ring buffer, and if the filter fails, it
2847                          * will have to call ring_buffer_discard_commit()
2848                          * to remove it.
2849                          *
2850                          * Need to also check the unlikely case that the
2851                          * length is bigger than the temp buffer size.
2852                          * If that happens, then the reserve is pretty much
2853                          * guaranteed to fail, as the ring buffer currently
2854                          * only allows events less than a page. But that may
2855                          * change in the future, so let the ring buffer reserve
2856                          * handle the failure in that case.
2857                          */
2858                         if (val == 1 && likely(len <= max_len)) {
2859                                 trace_event_setup(entry, type, trace_ctx);
2860                                 entry->array[0] = len;
2861                                 /* Return with preemption disabled */
2862                                 return entry;
2863                         }
2864                         this_cpu_dec(trace_buffered_event_cnt);
2865                 }
2866                 /* __trace_buffer_lock_reserve() disables preemption */
2867                 preempt_enable_notrace();
2868         }
2869
2870         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2871                                             trace_ctx);
2872         /*
2873          * If tracing is off, but we have triggers enabled
2874          * we still need to look at the event data. Use the temp_buffer
2875          * to store the trace event for the trigger to use. It's recursive
2876          * safe and will not be recorded anywhere.
2877          */
2878         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2879                 *current_rb = temp_buffer;
2880                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2881                                                     trace_ctx);
2882         }
2883         return entry;
2884 }
2885 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2886
2887 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2888 static DEFINE_MUTEX(tracepoint_printk_mutex);
2889
2890 static void output_printk(struct trace_event_buffer *fbuffer)
2891 {
2892         struct trace_event_call *event_call;
2893         struct trace_event_file *file;
2894         struct trace_event *event;
2895         unsigned long flags;
2896         struct trace_iterator *iter = tracepoint_print_iter;
2897
2898         /* We should never get here if iter is NULL */
2899         if (WARN_ON_ONCE(!iter))
2900                 return;
2901
2902         event_call = fbuffer->trace_file->event_call;
2903         if (!event_call || !event_call->event.funcs ||
2904             !event_call->event.funcs->trace)
2905                 return;
2906
2907         file = fbuffer->trace_file;
2908         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2909             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2910              !filter_match_preds(file->filter, fbuffer->entry)))
2911                 return;
2912
2913         event = &fbuffer->trace_file->event_call->event;
2914
2915         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2916         trace_seq_init(&iter->seq);
2917         iter->ent = fbuffer->entry;
2918         event_call->event.funcs->trace(iter, 0, event);
2919         trace_seq_putc(&iter->seq, 0);
2920         printk("%s", iter->seq.buffer);
2921
2922         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2923 }
2924
2925 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2926                              void *buffer, size_t *lenp,
2927                              loff_t *ppos)
2928 {
2929         int save_tracepoint_printk;
2930         int ret;
2931
2932         mutex_lock(&tracepoint_printk_mutex);
2933         save_tracepoint_printk = tracepoint_printk;
2934
2935         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2936
2937         /*
2938          * This will force exiting early, as tracepoint_printk
2939          * is always zero when tracepoint_printk_iter is not allocated
2940          */
2941         if (!tracepoint_print_iter)
2942                 tracepoint_printk = 0;
2943
2944         if (save_tracepoint_printk == tracepoint_printk)
2945                 goto out;
2946
2947         if (tracepoint_printk)
2948                 static_key_enable(&tracepoint_printk_key.key);
2949         else
2950                 static_key_disable(&tracepoint_printk_key.key);
2951
2952  out:
2953         mutex_unlock(&tracepoint_printk_mutex);
2954
2955         return ret;
2956 }
2957
2958 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2959 {
2960         enum event_trigger_type tt = ETT_NONE;
2961         struct trace_event_file *file = fbuffer->trace_file;
2962
2963         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2964                         fbuffer->entry, &tt))
2965                 goto discard;
2966
2967         if (static_key_false(&tracepoint_printk_key.key))
2968                 output_printk(fbuffer);
2969
2970         if (static_branch_unlikely(&trace_event_exports_enabled))
2971                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2972
2973         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2974                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2975
2976 discard:
2977         if (tt)
2978                 event_triggers_post_call(file, tt);
2979
2980 }
2981 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2982
2983 /*
2984  * Skip 3:
2985  *
2986  *   trace_buffer_unlock_commit_regs()
2987  *   trace_event_buffer_commit()
2988  *   trace_event_raw_event_xxx()
2989  */
2990 # define STACK_SKIP 3
2991
2992 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2993                                      struct trace_buffer *buffer,
2994                                      struct ring_buffer_event *event,
2995                                      unsigned int trace_ctx,
2996                                      struct pt_regs *regs)
2997 {
2998         __buffer_unlock_commit(buffer, event);
2999
3000         /*
3001          * If regs is not set, then skip the necessary functions.
3002          * Note, we can still get here via blktrace, wakeup tracer
3003          * and mmiotrace, but that's ok if they lose a function or
3004          * two. They are not that meaningful.
3005          */
3006         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3007         ftrace_trace_userstack(tr, buffer, trace_ctx);
3008 }
3009
3010 /*
3011  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3012  */
3013 void
3014 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3015                                    struct ring_buffer_event *event)
3016 {
3017         __buffer_unlock_commit(buffer, event);
3018 }
3019
3020 void
3021 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3022                parent_ip, unsigned int trace_ctx)
3023 {
3024         struct trace_event_call *call = &event_function;
3025         struct trace_buffer *buffer = tr->array_buffer.buffer;
3026         struct ring_buffer_event *event;
3027         struct ftrace_entry *entry;
3028
3029         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3030                                             trace_ctx);
3031         if (!event)
3032                 return;
3033         entry   = ring_buffer_event_data(event);
3034         entry->ip                       = ip;
3035         entry->parent_ip                = parent_ip;
3036
3037         if (!call_filter_check_discard(call, entry, buffer, event)) {
3038                 if (static_branch_unlikely(&trace_function_exports_enabled))
3039                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3040                 __buffer_unlock_commit(buffer, event);
3041         }
3042 }
3043
3044 #ifdef CONFIG_STACKTRACE
3045
3046 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3047 #define FTRACE_KSTACK_NESTING   4
3048
3049 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3050
3051 struct ftrace_stack {
3052         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3053 };
3054
3055
3056 struct ftrace_stacks {
3057         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3058 };
3059
3060 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3061 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3062
3063 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3064                                  unsigned int trace_ctx,
3065                                  int skip, struct pt_regs *regs)
3066 {
3067         struct trace_event_call *call = &event_kernel_stack;
3068         struct ring_buffer_event *event;
3069         unsigned int size, nr_entries;
3070         struct ftrace_stack *fstack;
3071         struct stack_entry *entry;
3072         int stackidx;
3073
3074         /*
3075          * Add one, for this function and the call to save_stack_trace()
3076          * If regs is set, then these functions will not be in the way.
3077          */
3078 #ifndef CONFIG_UNWINDER_ORC
3079         if (!regs)
3080                 skip++;
3081 #endif
3082
3083         preempt_disable_notrace();
3084
3085         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3086
3087         /* This should never happen. If it does, yell once and skip */
3088         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3089                 goto out;
3090
3091         /*
3092          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3093          * interrupt will either see the value pre increment or post
3094          * increment. If the interrupt happens pre increment it will have
3095          * restored the counter when it returns.  We just need a barrier to
3096          * keep gcc from moving things around.
3097          */
3098         barrier();
3099
3100         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3101         size = ARRAY_SIZE(fstack->calls);
3102
3103         if (regs) {
3104                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3105                                                    size, skip);
3106         } else {
3107                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3108         }
3109
3110         size = nr_entries * sizeof(unsigned long);
3111         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3112                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3113                                     trace_ctx);
3114         if (!event)
3115                 goto out;
3116         entry = ring_buffer_event_data(event);
3117
3118         memcpy(&entry->caller, fstack->calls, size);
3119         entry->size = nr_entries;
3120
3121         if (!call_filter_check_discard(call, entry, buffer, event))
3122                 __buffer_unlock_commit(buffer, event);
3123
3124  out:
3125         /* Again, don't let gcc optimize things here */
3126         barrier();
3127         __this_cpu_dec(ftrace_stack_reserve);
3128         preempt_enable_notrace();
3129
3130 }
3131
3132 static inline void ftrace_trace_stack(struct trace_array *tr,
3133                                       struct trace_buffer *buffer,
3134                                       unsigned int trace_ctx,
3135                                       int skip, struct pt_regs *regs)
3136 {
3137         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3138                 return;
3139
3140         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3141 }
3142
3143 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3144                    int skip)
3145 {
3146         struct trace_buffer *buffer = tr->array_buffer.buffer;
3147
3148         if (rcu_is_watching()) {
3149                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3150                 return;
3151         }
3152
3153         /*
3154          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3155          * but if the above rcu_is_watching() failed, then the NMI
3156          * triggered someplace critical, and ct_irq_enter() should
3157          * not be called from NMI.
3158          */
3159         if (unlikely(in_nmi()))
3160                 return;
3161
3162         ct_irq_enter_irqson();
3163         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3164         ct_irq_exit_irqson();
3165 }
3166
3167 /**
3168  * trace_dump_stack - record a stack back trace in the trace buffer
3169  * @skip: Number of functions to skip (helper handlers)
3170  */
3171 void trace_dump_stack(int skip)
3172 {
3173         if (tracing_disabled || tracing_selftest_running)
3174                 return;
3175
3176 #ifndef CONFIG_UNWINDER_ORC
3177         /* Skip 1 to skip this function. */
3178         skip++;
3179 #endif
3180         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3181                              tracing_gen_ctx(), skip, NULL);
3182 }
3183 EXPORT_SYMBOL_GPL(trace_dump_stack);
3184
3185 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3186 static DEFINE_PER_CPU(int, user_stack_count);
3187
3188 static void
3189 ftrace_trace_userstack(struct trace_array *tr,
3190                        struct trace_buffer *buffer, unsigned int trace_ctx)
3191 {
3192         struct trace_event_call *call = &event_user_stack;
3193         struct ring_buffer_event *event;
3194         struct userstack_entry *entry;
3195
3196         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3197                 return;
3198
3199         /*
3200          * NMIs can not handle page faults, even with fix ups.
3201          * The save user stack can (and often does) fault.
3202          */
3203         if (unlikely(in_nmi()))
3204                 return;
3205
3206         /*
3207          * prevent recursion, since the user stack tracing may
3208          * trigger other kernel events.
3209          */
3210         preempt_disable();
3211         if (__this_cpu_read(user_stack_count))
3212                 goto out;
3213
3214         __this_cpu_inc(user_stack_count);
3215
3216         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3217                                             sizeof(*entry), trace_ctx);
3218         if (!event)
3219                 goto out_drop_count;
3220         entry   = ring_buffer_event_data(event);
3221
3222         entry->tgid             = current->tgid;
3223         memset(&entry->caller, 0, sizeof(entry->caller));
3224
3225         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3226         if (!call_filter_check_discard(call, entry, buffer, event))
3227                 __buffer_unlock_commit(buffer, event);
3228
3229  out_drop_count:
3230         __this_cpu_dec(user_stack_count);
3231  out:
3232         preempt_enable();
3233 }
3234 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3235 static void ftrace_trace_userstack(struct trace_array *tr,
3236                                    struct trace_buffer *buffer,
3237                                    unsigned int trace_ctx)
3238 {
3239 }
3240 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3241
3242 #endif /* CONFIG_STACKTRACE */
3243
3244 static inline void
3245 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3246                           unsigned long long delta)
3247 {
3248         entry->bottom_delta_ts = delta & U32_MAX;
3249         entry->top_delta_ts = (delta >> 32);
3250 }
3251
3252 void trace_last_func_repeats(struct trace_array *tr,
3253                              struct trace_func_repeats *last_info,
3254                              unsigned int trace_ctx)
3255 {
3256         struct trace_buffer *buffer = tr->array_buffer.buffer;
3257         struct func_repeats_entry *entry;
3258         struct ring_buffer_event *event;
3259         u64 delta;
3260
3261         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3262                                             sizeof(*entry), trace_ctx);
3263         if (!event)
3264                 return;
3265
3266         delta = ring_buffer_event_time_stamp(buffer, event) -
3267                 last_info->ts_last_call;
3268
3269         entry = ring_buffer_event_data(event);
3270         entry->ip = last_info->ip;
3271         entry->parent_ip = last_info->parent_ip;
3272         entry->count = last_info->count;
3273         func_repeats_set_delta_ts(entry, delta);
3274
3275         __buffer_unlock_commit(buffer, event);
3276 }
3277
3278 /* created for use with alloc_percpu */
3279 struct trace_buffer_struct {
3280         int nesting;
3281         char buffer[4][TRACE_BUF_SIZE];
3282 };
3283
3284 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3285
3286 /*
3287  * This allows for lockless recording.  If we're nested too deeply, then
3288  * this returns NULL.
3289  */
3290 static char *get_trace_buf(void)
3291 {
3292         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3293
3294         if (!trace_percpu_buffer || buffer->nesting >= 4)
3295                 return NULL;
3296
3297         buffer->nesting++;
3298
3299         /* Interrupts must see nesting incremented before we use the buffer */
3300         barrier();
3301         return &buffer->buffer[buffer->nesting - 1][0];
3302 }
3303
3304 static void put_trace_buf(void)
3305 {
3306         /* Don't let the decrement of nesting leak before this */
3307         barrier();
3308         this_cpu_dec(trace_percpu_buffer->nesting);
3309 }
3310
3311 static int alloc_percpu_trace_buffer(void)
3312 {
3313         struct trace_buffer_struct __percpu *buffers;
3314
3315         if (trace_percpu_buffer)
3316                 return 0;
3317
3318         buffers = alloc_percpu(struct trace_buffer_struct);
3319         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3320                 return -ENOMEM;
3321
3322         trace_percpu_buffer = buffers;
3323         return 0;
3324 }
3325
3326 static int buffers_allocated;
3327
3328 void trace_printk_init_buffers(void)
3329 {
3330         if (buffers_allocated)
3331                 return;
3332
3333         if (alloc_percpu_trace_buffer())
3334                 return;
3335
3336         /* trace_printk() is for debug use only. Don't use it in production. */
3337
3338         pr_warn("\n");
3339         pr_warn("**********************************************************\n");
3340         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3341         pr_warn("**                                                      **\n");
3342         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3343         pr_warn("**                                                      **\n");
3344         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3345         pr_warn("** unsafe for production use.                           **\n");
3346         pr_warn("**                                                      **\n");
3347         pr_warn("** If you see this message and you are not debugging    **\n");
3348         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3349         pr_warn("**                                                      **\n");
3350         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3351         pr_warn("**********************************************************\n");
3352
3353         /* Expand the buffers to set size */
3354         tracing_update_buffers();
3355
3356         buffers_allocated = 1;
3357
3358         /*
3359          * trace_printk_init_buffers() can be called by modules.
3360          * If that happens, then we need to start cmdline recording
3361          * directly here. If the global_trace.buffer is already
3362          * allocated here, then this was called by module code.
3363          */
3364         if (global_trace.array_buffer.buffer)
3365                 tracing_start_cmdline_record();
3366 }
3367 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3368
3369 void trace_printk_start_comm(void)
3370 {
3371         /* Start tracing comms if trace printk is set */
3372         if (!buffers_allocated)
3373                 return;
3374         tracing_start_cmdline_record();
3375 }
3376
3377 static void trace_printk_start_stop_comm(int enabled)
3378 {
3379         if (!buffers_allocated)
3380                 return;
3381
3382         if (enabled)
3383                 tracing_start_cmdline_record();
3384         else
3385                 tracing_stop_cmdline_record();
3386 }
3387
3388 /**
3389  * trace_vbprintk - write binary msg to tracing buffer
3390  * @ip:    The address of the caller
3391  * @fmt:   The string format to write to the buffer
3392  * @args:  Arguments for @fmt
3393  */
3394 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3395 {
3396         struct trace_event_call *call = &event_bprint;
3397         struct ring_buffer_event *event;
3398         struct trace_buffer *buffer;
3399         struct trace_array *tr = &global_trace;
3400         struct bprint_entry *entry;
3401         unsigned int trace_ctx;
3402         char *tbuffer;
3403         int len = 0, size;
3404
3405         if (unlikely(tracing_selftest_running || tracing_disabled))
3406                 return 0;
3407
3408         /* Don't pollute graph traces with trace_vprintk internals */
3409         pause_graph_tracing();
3410
3411         trace_ctx = tracing_gen_ctx();
3412         preempt_disable_notrace();
3413
3414         tbuffer = get_trace_buf();
3415         if (!tbuffer) {
3416                 len = 0;
3417                 goto out_nobuffer;
3418         }
3419
3420         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3421
3422         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3423                 goto out_put;
3424
3425         size = sizeof(*entry) + sizeof(u32) * len;
3426         buffer = tr->array_buffer.buffer;
3427         ring_buffer_nest_start(buffer);
3428         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3429                                             trace_ctx);
3430         if (!event)
3431                 goto out;
3432         entry = ring_buffer_event_data(event);
3433         entry->ip                       = ip;
3434         entry->fmt                      = fmt;
3435
3436         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3437         if (!call_filter_check_discard(call, entry, buffer, event)) {
3438                 __buffer_unlock_commit(buffer, event);
3439                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3440         }
3441
3442 out:
3443         ring_buffer_nest_end(buffer);
3444 out_put:
3445         put_trace_buf();
3446
3447 out_nobuffer:
3448         preempt_enable_notrace();
3449         unpause_graph_tracing();
3450
3451         return len;
3452 }
3453 EXPORT_SYMBOL_GPL(trace_vbprintk);
3454
3455 __printf(3, 0)
3456 static int
3457 __trace_array_vprintk(struct trace_buffer *buffer,
3458                       unsigned long ip, const char *fmt, va_list args)
3459 {
3460         struct trace_event_call *call = &event_print;
3461         struct ring_buffer_event *event;
3462         int len = 0, size;
3463         struct print_entry *entry;
3464         unsigned int trace_ctx;
3465         char *tbuffer;
3466
3467         if (tracing_disabled || tracing_selftest_running)
3468                 return 0;
3469
3470         /* Don't pollute graph traces with trace_vprintk internals */
3471         pause_graph_tracing();
3472
3473         trace_ctx = tracing_gen_ctx();
3474         preempt_disable_notrace();
3475
3476
3477         tbuffer = get_trace_buf();
3478         if (!tbuffer) {
3479                 len = 0;
3480                 goto out_nobuffer;
3481         }
3482
3483         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3484
3485         size = sizeof(*entry) + len + 1;
3486         ring_buffer_nest_start(buffer);
3487         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3488                                             trace_ctx);
3489         if (!event)
3490                 goto out;
3491         entry = ring_buffer_event_data(event);
3492         entry->ip = ip;
3493
3494         memcpy(&entry->buf, tbuffer, len + 1);
3495         if (!call_filter_check_discard(call, entry, buffer, event)) {
3496                 __buffer_unlock_commit(buffer, event);
3497                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3498         }
3499
3500 out:
3501         ring_buffer_nest_end(buffer);
3502         put_trace_buf();
3503
3504 out_nobuffer:
3505         preempt_enable_notrace();
3506         unpause_graph_tracing();
3507
3508         return len;
3509 }
3510
3511 __printf(3, 0)
3512 int trace_array_vprintk(struct trace_array *tr,
3513                         unsigned long ip, const char *fmt, va_list args)
3514 {
3515         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3516 }
3517
3518 /**
3519  * trace_array_printk - Print a message to a specific instance
3520  * @tr: The instance trace_array descriptor
3521  * @ip: The instruction pointer that this is called from.
3522  * @fmt: The format to print (printf format)
3523  *
3524  * If a subsystem sets up its own instance, they have the right to
3525  * printk strings into their tracing instance buffer using this
3526  * function. Note, this function will not write into the top level
3527  * buffer (use trace_printk() for that), as writing into the top level
3528  * buffer should only have events that can be individually disabled.
3529  * trace_printk() is only used for debugging a kernel, and should not
3530  * be ever incorporated in normal use.
3531  *
3532  * trace_array_printk() can be used, as it will not add noise to the
3533  * top level tracing buffer.
3534  *
3535  * Note, trace_array_init_printk() must be called on @tr before this
3536  * can be used.
3537  */
3538 __printf(3, 0)
3539 int trace_array_printk(struct trace_array *tr,
3540                        unsigned long ip, const char *fmt, ...)
3541 {
3542         int ret;
3543         va_list ap;
3544
3545         if (!tr)
3546                 return -ENOENT;
3547
3548         /* This is only allowed for created instances */
3549         if (tr == &global_trace)
3550                 return 0;
3551
3552         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3553                 return 0;
3554
3555         va_start(ap, fmt);
3556         ret = trace_array_vprintk(tr, ip, fmt, ap);
3557         va_end(ap);
3558         return ret;
3559 }
3560 EXPORT_SYMBOL_GPL(trace_array_printk);
3561
3562 /**
3563  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3564  * @tr: The trace array to initialize the buffers for
3565  *
3566  * As trace_array_printk() only writes into instances, they are OK to
3567  * have in the kernel (unlike trace_printk()). This needs to be called
3568  * before trace_array_printk() can be used on a trace_array.
3569  */
3570 int trace_array_init_printk(struct trace_array *tr)
3571 {
3572         if (!tr)
3573                 return -ENOENT;
3574
3575         /* This is only allowed for created instances */
3576         if (tr == &global_trace)
3577                 return -EINVAL;
3578
3579         return alloc_percpu_trace_buffer();
3580 }
3581 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3582
3583 __printf(3, 4)
3584 int trace_array_printk_buf(struct trace_buffer *buffer,
3585                            unsigned long ip, const char *fmt, ...)
3586 {
3587         int ret;
3588         va_list ap;
3589
3590         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3591                 return 0;
3592
3593         va_start(ap, fmt);
3594         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3595         va_end(ap);
3596         return ret;
3597 }
3598
3599 __printf(2, 0)
3600 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3601 {
3602         return trace_array_vprintk(&global_trace, ip, fmt, args);
3603 }
3604 EXPORT_SYMBOL_GPL(trace_vprintk);
3605
3606 static void trace_iterator_increment(struct trace_iterator *iter)
3607 {
3608         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3609
3610         iter->idx++;
3611         if (buf_iter)
3612                 ring_buffer_iter_advance(buf_iter);
3613 }
3614
3615 static struct trace_entry *
3616 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3617                 unsigned long *lost_events)
3618 {
3619         struct ring_buffer_event *event;
3620         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3621
3622         if (buf_iter) {
3623                 event = ring_buffer_iter_peek(buf_iter, ts);
3624                 if (lost_events)
3625                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3626                                 (unsigned long)-1 : 0;
3627         } else {
3628                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3629                                          lost_events);
3630         }
3631
3632         if (event) {
3633                 iter->ent_size = ring_buffer_event_length(event);
3634                 return ring_buffer_event_data(event);
3635         }
3636         iter->ent_size = 0;
3637         return NULL;
3638 }
3639
3640 static struct trace_entry *
3641 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3642                   unsigned long *missing_events, u64 *ent_ts)
3643 {
3644         struct trace_buffer *buffer = iter->array_buffer->buffer;
3645         struct trace_entry *ent, *next = NULL;
3646         unsigned long lost_events = 0, next_lost = 0;
3647         int cpu_file = iter->cpu_file;
3648         u64 next_ts = 0, ts;
3649         int next_cpu = -1;
3650         int next_size = 0;
3651         int cpu;
3652
3653         /*
3654          * If we are in a per_cpu trace file, don't bother by iterating over
3655          * all cpu and peek directly.
3656          */
3657         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3658                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3659                         return NULL;
3660                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3661                 if (ent_cpu)
3662                         *ent_cpu = cpu_file;
3663
3664                 return ent;
3665         }
3666
3667         for_each_tracing_cpu(cpu) {
3668
3669                 if (ring_buffer_empty_cpu(buffer, cpu))
3670                         continue;
3671
3672                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3673
3674                 /*
3675                  * Pick the entry with the smallest timestamp:
3676                  */
3677                 if (ent && (!next || ts < next_ts)) {
3678                         next = ent;
3679                         next_cpu = cpu;
3680                         next_ts = ts;
3681                         next_lost = lost_events;
3682                         next_size = iter->ent_size;
3683                 }
3684         }
3685
3686         iter->ent_size = next_size;
3687
3688         if (ent_cpu)
3689                 *ent_cpu = next_cpu;
3690
3691         if (ent_ts)
3692                 *ent_ts = next_ts;
3693
3694         if (missing_events)
3695                 *missing_events = next_lost;
3696
3697         return next;
3698 }
3699
3700 #define STATIC_FMT_BUF_SIZE     128
3701 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3702
3703 static char *trace_iter_expand_format(struct trace_iterator *iter)
3704 {
3705         char *tmp;
3706
3707         /*
3708          * iter->tr is NULL when used with tp_printk, which makes
3709          * this get called where it is not safe to call krealloc().
3710          */
3711         if (!iter->tr || iter->fmt == static_fmt_buf)
3712                 return NULL;
3713
3714         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3715                        GFP_KERNEL);
3716         if (tmp) {
3717                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3718                 iter->fmt = tmp;
3719         }
3720
3721         return tmp;
3722 }
3723
3724 /* Returns true if the string is safe to dereference from an event */
3725 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3726                            bool star, int len)
3727 {
3728         unsigned long addr = (unsigned long)str;
3729         struct trace_event *trace_event;
3730         struct trace_event_call *event;
3731
3732         /* Ignore strings with no length */
3733         if (star && !len)
3734                 return true;
3735
3736         /* OK if part of the event data */
3737         if ((addr >= (unsigned long)iter->ent) &&
3738             (addr < (unsigned long)iter->ent + iter->ent_size))
3739                 return true;
3740
3741         /* OK if part of the temp seq buffer */
3742         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3743             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3744                 return true;
3745
3746         /* Core rodata can not be freed */
3747         if (is_kernel_rodata(addr))
3748                 return true;
3749
3750         if (trace_is_tracepoint_string(str))
3751                 return true;
3752
3753         /*
3754          * Now this could be a module event, referencing core module
3755          * data, which is OK.
3756          */
3757         if (!iter->ent)
3758                 return false;
3759
3760         trace_event = ftrace_find_event(iter->ent->type);
3761         if (!trace_event)
3762                 return false;
3763
3764         event = container_of(trace_event, struct trace_event_call, event);
3765         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3766                 return false;
3767
3768         /* Would rather have rodata, but this will suffice */
3769         if (within_module_core(addr, event->module))
3770                 return true;
3771
3772         return false;
3773 }
3774
3775 static const char *show_buffer(struct trace_seq *s)
3776 {
3777         struct seq_buf *seq = &s->seq;
3778
3779         seq_buf_terminate(seq);
3780
3781         return seq->buffer;
3782 }
3783
3784 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3785
3786 static int test_can_verify_check(const char *fmt, ...)
3787 {
3788         char buf[16];
3789         va_list ap;
3790         int ret;
3791
3792         /*
3793          * The verifier is dependent on vsnprintf() modifies the va_list
3794          * passed to it, where it is sent as a reference. Some architectures
3795          * (like x86_32) passes it by value, which means that vsnprintf()
3796          * does not modify the va_list passed to it, and the verifier
3797          * would then need to be able to understand all the values that
3798          * vsnprintf can use. If it is passed by value, then the verifier
3799          * is disabled.
3800          */
3801         va_start(ap, fmt);
3802         vsnprintf(buf, 16, "%d", ap);
3803         ret = va_arg(ap, int);
3804         va_end(ap);
3805
3806         return ret;
3807 }
3808
3809 static void test_can_verify(void)
3810 {
3811         if (!test_can_verify_check("%d %d", 0, 1)) {
3812                 pr_info("trace event string verifier disabled\n");
3813                 static_branch_inc(&trace_no_verify);
3814         }
3815 }
3816
3817 /**
3818  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3819  * @iter: The iterator that holds the seq buffer and the event being printed
3820  * @fmt: The format used to print the event
3821  * @ap: The va_list holding the data to print from @fmt.
3822  *
3823  * This writes the data into the @iter->seq buffer using the data from
3824  * @fmt and @ap. If the format has a %s, then the source of the string
3825  * is examined to make sure it is safe to print, otherwise it will
3826  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3827  * pointer.
3828  */
3829 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3830                          va_list ap)
3831 {
3832         const char *p = fmt;
3833         const char *str;
3834         int i, j;
3835
3836         if (WARN_ON_ONCE(!fmt))
3837                 return;
3838
3839         if (static_branch_unlikely(&trace_no_verify))
3840                 goto print;
3841
3842         /* Don't bother checking when doing a ftrace_dump() */
3843         if (iter->fmt == static_fmt_buf)
3844                 goto print;
3845
3846         while (*p) {
3847                 bool star = false;
3848                 int len = 0;
3849
3850                 j = 0;
3851
3852                 /* We only care about %s and variants */
3853                 for (i = 0; p[i]; i++) {
3854                         if (i + 1 >= iter->fmt_size) {
3855                                 /*
3856                                  * If we can't expand the copy buffer,
3857                                  * just print it.
3858                                  */
3859                                 if (!trace_iter_expand_format(iter))
3860                                         goto print;
3861                         }
3862
3863                         if (p[i] == '\\' && p[i+1]) {
3864                                 i++;
3865                                 continue;
3866                         }
3867                         if (p[i] == '%') {
3868                                 /* Need to test cases like %08.*s */
3869                                 for (j = 1; p[i+j]; j++) {
3870                                         if (isdigit(p[i+j]) ||
3871                                             p[i+j] == '.')
3872                                                 continue;
3873                                         if (p[i+j] == '*') {
3874                                                 star = true;
3875                                                 continue;
3876                                         }
3877                                         break;
3878                                 }
3879                                 if (p[i+j] == 's')
3880                                         break;
3881                                 star = false;
3882                         }
3883                         j = 0;
3884                 }
3885                 /* If no %s found then just print normally */
3886                 if (!p[i])
3887                         break;
3888
3889                 /* Copy up to the %s, and print that */
3890                 strncpy(iter->fmt, p, i);
3891                 iter->fmt[i] = '\0';
3892                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3893
3894                 /*
3895                  * If iter->seq is full, the above call no longer guarantees
3896                  * that ap is in sync with fmt processing, and further calls
3897                  * to va_arg() can return wrong positional arguments.
3898                  *
3899                  * Ensure that ap is no longer used in this case.
3900                  */
3901                 if (iter->seq.full) {
3902                         p = "";
3903                         break;
3904                 }
3905
3906                 if (star)
3907                         len = va_arg(ap, int);
3908
3909                 /* The ap now points to the string data of the %s */
3910                 str = va_arg(ap, const char *);
3911
3912                 /*
3913                  * If you hit this warning, it is likely that the
3914                  * trace event in question used %s on a string that
3915                  * was saved at the time of the event, but may not be
3916                  * around when the trace is read. Use __string(),
3917                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3918                  * instead. See samples/trace_events/trace-events-sample.h
3919                  * for reference.
3920                  */
3921                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3922                               "fmt: '%s' current_buffer: '%s'",
3923                               fmt, show_buffer(&iter->seq))) {
3924                         int ret;
3925
3926                         /* Try to safely read the string */
3927                         if (star) {
3928                                 if (len + 1 > iter->fmt_size)
3929                                         len = iter->fmt_size - 1;
3930                                 if (len < 0)
3931                                         len = 0;
3932                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3933                                 iter->fmt[len] = 0;
3934                                 star = false;
3935                         } else {
3936                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3937                                                                   iter->fmt_size);
3938                         }
3939                         if (ret < 0)
3940                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3941                         else
3942                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3943                                                  str, iter->fmt);
3944                         str = "[UNSAFE-MEMORY]";
3945                         strcpy(iter->fmt, "%s");
3946                 } else {
3947                         strncpy(iter->fmt, p + i, j + 1);
3948                         iter->fmt[j+1] = '\0';
3949                 }
3950                 if (star)
3951                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3952                 else
3953                         trace_seq_printf(&iter->seq, iter->fmt, str);
3954
3955                 p += i + j + 1;
3956         }
3957  print:
3958         if (*p)
3959                 trace_seq_vprintf(&iter->seq, p, ap);
3960 }
3961
3962 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3963 {
3964         const char *p, *new_fmt;
3965         char *q;
3966
3967         if (WARN_ON_ONCE(!fmt))
3968                 return fmt;
3969
3970         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3971                 return fmt;
3972
3973         p = fmt;
3974         new_fmt = q = iter->fmt;
3975         while (*p) {
3976                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3977                         if (!trace_iter_expand_format(iter))
3978                                 return fmt;
3979
3980                         q += iter->fmt - new_fmt;
3981                         new_fmt = iter->fmt;
3982                 }
3983
3984                 *q++ = *p++;
3985
3986                 /* Replace %p with %px */
3987                 if (p[-1] == '%') {
3988                         if (p[0] == '%') {
3989                                 *q++ = *p++;
3990                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3991                                 *q++ = *p++;
3992                                 *q++ = 'x';
3993                         }
3994                 }
3995         }
3996         *q = '\0';
3997
3998         return new_fmt;
3999 }
4000
4001 #define STATIC_TEMP_BUF_SIZE    128
4002 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4003
4004 /* Find the next real entry, without updating the iterator itself */
4005 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4006                                           int *ent_cpu, u64 *ent_ts)
4007 {
4008         /* __find_next_entry will reset ent_size */
4009         int ent_size = iter->ent_size;
4010         struct trace_entry *entry;
4011
4012         /*
4013          * If called from ftrace_dump(), then the iter->temp buffer
4014          * will be the static_temp_buf and not created from kmalloc.
4015          * If the entry size is greater than the buffer, we can
4016          * not save it. Just return NULL in that case. This is only
4017          * used to add markers when two consecutive events' time
4018          * stamps have a large delta. See trace_print_lat_context()
4019          */
4020         if (iter->temp == static_temp_buf &&
4021             STATIC_TEMP_BUF_SIZE < ent_size)
4022                 return NULL;
4023
4024         /*
4025          * The __find_next_entry() may call peek_next_entry(), which may
4026          * call ring_buffer_peek() that may make the contents of iter->ent
4027          * undefined. Need to copy iter->ent now.
4028          */
4029         if (iter->ent && iter->ent != iter->temp) {
4030                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4031                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4032                         void *temp;
4033                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4034                         if (!temp)
4035                                 return NULL;
4036                         kfree(iter->temp);
4037                         iter->temp = temp;
4038                         iter->temp_size = iter->ent_size;
4039                 }
4040                 memcpy(iter->temp, iter->ent, iter->ent_size);
4041                 iter->ent = iter->temp;
4042         }
4043         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4044         /* Put back the original ent_size */
4045         iter->ent_size = ent_size;
4046
4047         return entry;
4048 }
4049
4050 /* Find the next real entry, and increment the iterator to the next entry */
4051 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4052 {
4053         iter->ent = __find_next_entry(iter, &iter->cpu,
4054                                       &iter->lost_events, &iter->ts);
4055
4056         if (iter->ent)
4057                 trace_iterator_increment(iter);
4058
4059         return iter->ent ? iter : NULL;
4060 }
4061
4062 static void trace_consume(struct trace_iterator *iter)
4063 {
4064         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4065                             &iter->lost_events);
4066 }
4067
4068 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4069 {
4070         struct trace_iterator *iter = m->private;
4071         int i = (int)*pos;
4072         void *ent;
4073
4074         WARN_ON_ONCE(iter->leftover);
4075
4076         (*pos)++;
4077
4078         /* can't go backwards */
4079         if (iter->idx > i)
4080                 return NULL;
4081
4082         if (iter->idx < 0)
4083                 ent = trace_find_next_entry_inc(iter);
4084         else
4085                 ent = iter;
4086
4087         while (ent && iter->idx < i)
4088                 ent = trace_find_next_entry_inc(iter);
4089
4090         iter->pos = *pos;
4091
4092         return ent;
4093 }
4094
4095 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4096 {
4097         struct ring_buffer_iter *buf_iter;
4098         unsigned long entries = 0;
4099         u64 ts;
4100
4101         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4102
4103         buf_iter = trace_buffer_iter(iter, cpu);
4104         if (!buf_iter)
4105                 return;
4106
4107         ring_buffer_iter_reset(buf_iter);
4108
4109         /*
4110          * We could have the case with the max latency tracers
4111          * that a reset never took place on a cpu. This is evident
4112          * by the timestamp being before the start of the buffer.
4113          */
4114         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4115                 if (ts >= iter->array_buffer->time_start)
4116                         break;
4117                 entries++;
4118                 ring_buffer_iter_advance(buf_iter);
4119         }
4120
4121         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4122 }
4123
4124 /*
4125  * The current tracer is copied to avoid a global locking
4126  * all around.
4127  */
4128 static void *s_start(struct seq_file *m, loff_t *pos)
4129 {
4130         struct trace_iterator *iter = m->private;
4131         struct trace_array *tr = iter->tr;
4132         int cpu_file = iter->cpu_file;
4133         void *p = NULL;
4134         loff_t l = 0;
4135         int cpu;
4136
4137         /*
4138          * copy the tracer to avoid using a global lock all around.
4139          * iter->trace is a copy of current_trace, the pointer to the
4140          * name may be used instead of a strcmp(), as iter->trace->name
4141          * will point to the same string as current_trace->name.
4142          */
4143         mutex_lock(&trace_types_lock);
4144         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4145                 *iter->trace = *tr->current_trace;
4146         mutex_unlock(&trace_types_lock);
4147
4148 #ifdef CONFIG_TRACER_MAX_TRACE
4149         if (iter->snapshot && iter->trace->use_max_tr)
4150                 return ERR_PTR(-EBUSY);
4151 #endif
4152
4153         if (*pos != iter->pos) {
4154                 iter->ent = NULL;
4155                 iter->cpu = 0;
4156                 iter->idx = -1;
4157
4158                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4159                         for_each_tracing_cpu(cpu)
4160                                 tracing_iter_reset(iter, cpu);
4161                 } else
4162                         tracing_iter_reset(iter, cpu_file);
4163
4164                 iter->leftover = 0;
4165                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4166                         ;
4167
4168         } else {
4169                 /*
4170                  * If we overflowed the seq_file before, then we want
4171                  * to just reuse the trace_seq buffer again.
4172                  */
4173                 if (iter->leftover)
4174                         p = iter;
4175                 else {
4176                         l = *pos - 1;
4177                         p = s_next(m, p, &l);
4178                 }
4179         }
4180
4181         trace_event_read_lock();
4182         trace_access_lock(cpu_file);
4183         return p;
4184 }
4185
4186 static void s_stop(struct seq_file *m, void *p)
4187 {
4188         struct trace_iterator *iter = m->private;
4189
4190 #ifdef CONFIG_TRACER_MAX_TRACE
4191         if (iter->snapshot && iter->trace->use_max_tr)
4192                 return;
4193 #endif
4194
4195         trace_access_unlock(iter->cpu_file);
4196         trace_event_read_unlock();
4197 }
4198
4199 static void
4200 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4201                       unsigned long *entries, int cpu)
4202 {
4203         unsigned long count;
4204
4205         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4206         /*
4207          * If this buffer has skipped entries, then we hold all
4208          * entries for the trace and we need to ignore the
4209          * ones before the time stamp.
4210          */
4211         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4212                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4213                 /* total is the same as the entries */
4214                 *total = count;
4215         } else
4216                 *total = count +
4217                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4218         *entries = count;
4219 }
4220
4221 static void
4222 get_total_entries(struct array_buffer *buf,
4223                   unsigned long *total, unsigned long *entries)
4224 {
4225         unsigned long t, e;
4226         int cpu;
4227
4228         *total = 0;
4229         *entries = 0;
4230
4231         for_each_tracing_cpu(cpu) {
4232                 get_total_entries_cpu(buf, &t, &e, cpu);
4233                 *total += t;
4234                 *entries += e;
4235         }
4236 }
4237
4238 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4239 {
4240         unsigned long total, entries;
4241
4242         if (!tr)
4243                 tr = &global_trace;
4244
4245         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4246
4247         return entries;
4248 }
4249
4250 unsigned long trace_total_entries(struct trace_array *tr)
4251 {
4252         unsigned long total, entries;
4253
4254         if (!tr)
4255                 tr = &global_trace;
4256
4257         get_total_entries(&tr->array_buffer, &total, &entries);
4258
4259         return entries;
4260 }
4261
4262 static void print_lat_help_header(struct seq_file *m)
4263 {
4264         seq_puts(m, "#                    _------=> CPU#            \n"
4265                     "#                   / _-----=> irqs-off/BH-disabled\n"
4266                     "#                  | / _----=> need-resched    \n"
4267                     "#                  || / _---=> hardirq/softirq \n"
4268                     "#                  ||| / _--=> preempt-depth   \n"
4269                     "#                  |||| / _-=> migrate-disable \n"
4270                     "#                  ||||| /     delay           \n"
4271                     "#  cmd     pid     |||||| time  |   caller     \n"
4272                     "#     \\   /        ||||||  \\    |    /       \n");
4273 }
4274
4275 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4276 {
4277         unsigned long total;
4278         unsigned long entries;
4279
4280         get_total_entries(buf, &total, &entries);
4281         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4282                    entries, total, num_online_cpus());
4283         seq_puts(m, "#\n");
4284 }
4285
4286 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4287                                    unsigned int flags)
4288 {
4289         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4290
4291         print_event_info(buf, m);
4292
4293         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4294         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4295 }
4296
4297 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4298                                        unsigned int flags)
4299 {
4300         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4301         static const char space[] = "            ";
4302         int prec = tgid ? 12 : 2;
4303
4304         print_event_info(buf, m);
4305
4306         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4307         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4308         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4309         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4310         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4311         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4312         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4313         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4314 }
4315
4316 void
4317 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4318 {
4319         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4320         struct array_buffer *buf = iter->array_buffer;
4321         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4322         struct tracer *type = iter->trace;
4323         unsigned long entries;
4324         unsigned long total;
4325         const char *name = type->name;
4326
4327         get_total_entries(buf, &total, &entries);
4328
4329         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4330                    name, UTS_RELEASE);
4331         seq_puts(m, "# -----------------------------------"
4332                  "---------------------------------\n");
4333         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4334                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4335                    nsecs_to_usecs(data->saved_latency),
4336                    entries,
4337                    total,
4338                    buf->cpu,
4339                    preempt_model_none()      ? "server" :
4340                    preempt_model_voluntary() ? "desktop" :
4341                    preempt_model_full()      ? "preempt" :
4342                    preempt_model_rt()        ? "preempt_rt" :
4343                    "unknown",
4344                    /* These are reserved for later use */
4345                    0, 0, 0, 0);
4346 #ifdef CONFIG_SMP
4347         seq_printf(m, " #P:%d)\n", num_online_cpus());
4348 #else
4349         seq_puts(m, ")\n");
4350 #endif
4351         seq_puts(m, "#    -----------------\n");
4352         seq_printf(m, "#    | task: %.16s-%d "
4353                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4354                    data->comm, data->pid,
4355                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4356                    data->policy, data->rt_priority);
4357         seq_puts(m, "#    -----------------\n");
4358
4359         if (data->critical_start) {
4360                 seq_puts(m, "#  => started at: ");
4361                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4362                 trace_print_seq(m, &iter->seq);
4363                 seq_puts(m, "\n#  => ended at:   ");
4364                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4365                 trace_print_seq(m, &iter->seq);
4366                 seq_puts(m, "\n#\n");
4367         }
4368
4369         seq_puts(m, "#\n");
4370 }
4371
4372 static void test_cpu_buff_start(struct trace_iterator *iter)
4373 {
4374         struct trace_seq *s = &iter->seq;
4375         struct trace_array *tr = iter->tr;
4376
4377         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4378                 return;
4379
4380         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4381                 return;
4382
4383         if (cpumask_available(iter->started) &&
4384             cpumask_test_cpu(iter->cpu, iter->started))
4385                 return;
4386
4387         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4388                 return;
4389
4390         if (cpumask_available(iter->started))
4391                 cpumask_set_cpu(iter->cpu, iter->started);
4392
4393         /* Don't print started cpu buffer for the first entry of the trace */
4394         if (iter->idx > 1)
4395                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4396                                 iter->cpu);
4397 }
4398
4399 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4400 {
4401         struct trace_array *tr = iter->tr;
4402         struct trace_seq *s = &iter->seq;
4403         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4404         struct trace_entry *entry;
4405         struct trace_event *event;
4406
4407         entry = iter->ent;
4408
4409         test_cpu_buff_start(iter);
4410
4411         event = ftrace_find_event(entry->type);
4412
4413         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4414                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4415                         trace_print_lat_context(iter);
4416                 else
4417                         trace_print_context(iter);
4418         }
4419
4420         if (trace_seq_has_overflowed(s))
4421                 return TRACE_TYPE_PARTIAL_LINE;
4422
4423         if (event)
4424                 return event->funcs->trace(iter, sym_flags, event);
4425
4426         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4427
4428         return trace_handle_return(s);
4429 }
4430
4431 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4432 {
4433         struct trace_array *tr = iter->tr;
4434         struct trace_seq *s = &iter->seq;
4435         struct trace_entry *entry;
4436         struct trace_event *event;
4437
4438         entry = iter->ent;
4439
4440         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4441                 trace_seq_printf(s, "%d %d %llu ",
4442                                  entry->pid, iter->cpu, iter->ts);
4443
4444         if (trace_seq_has_overflowed(s))
4445                 return TRACE_TYPE_PARTIAL_LINE;
4446
4447         event = ftrace_find_event(entry->type);
4448         if (event)
4449                 return event->funcs->raw(iter, 0, event);
4450
4451         trace_seq_printf(s, "%d ?\n", entry->type);
4452
4453         return trace_handle_return(s);
4454 }
4455
4456 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4457 {
4458         struct trace_array *tr = iter->tr;
4459         struct trace_seq *s = &iter->seq;
4460         unsigned char newline = '\n';
4461         struct trace_entry *entry;
4462         struct trace_event *event;
4463
4464         entry = iter->ent;
4465
4466         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4467                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4468                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4469                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4470                 if (trace_seq_has_overflowed(s))
4471                         return TRACE_TYPE_PARTIAL_LINE;
4472         }
4473
4474         event = ftrace_find_event(entry->type);
4475         if (event) {
4476                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4477                 if (ret != TRACE_TYPE_HANDLED)
4478                         return ret;
4479         }
4480
4481         SEQ_PUT_FIELD(s, newline);
4482
4483         return trace_handle_return(s);
4484 }
4485
4486 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4487 {
4488         struct trace_array *tr = iter->tr;
4489         struct trace_seq *s = &iter->seq;
4490         struct trace_entry *entry;
4491         struct trace_event *event;
4492
4493         entry = iter->ent;
4494
4495         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4496                 SEQ_PUT_FIELD(s, entry->pid);
4497                 SEQ_PUT_FIELD(s, iter->cpu);
4498                 SEQ_PUT_FIELD(s, iter->ts);
4499                 if (trace_seq_has_overflowed(s))
4500                         return TRACE_TYPE_PARTIAL_LINE;
4501         }
4502
4503         event = ftrace_find_event(entry->type);
4504         return event ? event->funcs->binary(iter, 0, event) :
4505                 TRACE_TYPE_HANDLED;
4506 }
4507
4508 int trace_empty(struct trace_iterator *iter)
4509 {
4510         struct ring_buffer_iter *buf_iter;
4511         int cpu;
4512
4513         /* If we are looking at one CPU buffer, only check that one */
4514         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4515                 cpu = iter->cpu_file;
4516                 buf_iter = trace_buffer_iter(iter, cpu);
4517                 if (buf_iter) {
4518                         if (!ring_buffer_iter_empty(buf_iter))
4519                                 return 0;
4520                 } else {
4521                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4522                                 return 0;
4523                 }
4524                 return 1;
4525         }
4526
4527         for_each_tracing_cpu(cpu) {
4528                 buf_iter = trace_buffer_iter(iter, cpu);
4529                 if (buf_iter) {
4530                         if (!ring_buffer_iter_empty(buf_iter))
4531                                 return 0;
4532                 } else {
4533                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4534                                 return 0;
4535                 }
4536         }
4537
4538         return 1;
4539 }
4540
4541 /*  Called with trace_event_read_lock() held. */
4542 enum print_line_t print_trace_line(struct trace_iterator *iter)
4543 {
4544         struct trace_array *tr = iter->tr;
4545         unsigned long trace_flags = tr->trace_flags;
4546         enum print_line_t ret;
4547
4548         if (iter->lost_events) {
4549                 if (iter->lost_events == (unsigned long)-1)
4550                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4551                                          iter->cpu);
4552                 else
4553                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4554                                          iter->cpu, iter->lost_events);
4555                 if (trace_seq_has_overflowed(&iter->seq))
4556                         return TRACE_TYPE_PARTIAL_LINE;
4557         }
4558
4559         if (iter->trace && iter->trace->print_line) {
4560                 ret = iter->trace->print_line(iter);
4561                 if (ret != TRACE_TYPE_UNHANDLED)
4562                         return ret;
4563         }
4564
4565         if (iter->ent->type == TRACE_BPUTS &&
4566                         trace_flags & TRACE_ITER_PRINTK &&
4567                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4568                 return trace_print_bputs_msg_only(iter);
4569
4570         if (iter->ent->type == TRACE_BPRINT &&
4571                         trace_flags & TRACE_ITER_PRINTK &&
4572                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4573                 return trace_print_bprintk_msg_only(iter);
4574
4575         if (iter->ent->type == TRACE_PRINT &&
4576                         trace_flags & TRACE_ITER_PRINTK &&
4577                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4578                 return trace_print_printk_msg_only(iter);
4579
4580         if (trace_flags & TRACE_ITER_BIN)
4581                 return print_bin_fmt(iter);
4582
4583         if (trace_flags & TRACE_ITER_HEX)
4584                 return print_hex_fmt(iter);
4585
4586         if (trace_flags & TRACE_ITER_RAW)
4587                 return print_raw_fmt(iter);
4588
4589         return print_trace_fmt(iter);
4590 }
4591
4592 void trace_latency_header(struct seq_file *m)
4593 {
4594         struct trace_iterator *iter = m->private;
4595         struct trace_array *tr = iter->tr;
4596
4597         /* print nothing if the buffers are empty */
4598         if (trace_empty(iter))
4599                 return;
4600
4601         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4602                 print_trace_header(m, iter);
4603
4604         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4605                 print_lat_help_header(m);
4606 }
4607
4608 void trace_default_header(struct seq_file *m)
4609 {
4610         struct trace_iterator *iter = m->private;
4611         struct trace_array *tr = iter->tr;
4612         unsigned long trace_flags = tr->trace_flags;
4613
4614         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4615                 return;
4616
4617         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4618                 /* print nothing if the buffers are empty */
4619                 if (trace_empty(iter))
4620                         return;
4621                 print_trace_header(m, iter);
4622                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4623                         print_lat_help_header(m);
4624         } else {
4625                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4626                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4627                                 print_func_help_header_irq(iter->array_buffer,
4628                                                            m, trace_flags);
4629                         else
4630                                 print_func_help_header(iter->array_buffer, m,
4631                                                        trace_flags);
4632                 }
4633         }
4634 }
4635
4636 static void test_ftrace_alive(struct seq_file *m)
4637 {
4638         if (!ftrace_is_dead())
4639                 return;
4640         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4641                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4642 }
4643
4644 #ifdef CONFIG_TRACER_MAX_TRACE
4645 static void show_snapshot_main_help(struct seq_file *m)
4646 {
4647         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4648                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4649                     "#                      Takes a snapshot of the main buffer.\n"
4650                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4651                     "#                      (Doesn't have to be '2' works with any number that\n"
4652                     "#                       is not a '0' or '1')\n");
4653 }
4654
4655 static void show_snapshot_percpu_help(struct seq_file *m)
4656 {
4657         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4658 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4659         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4660                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4661 #else
4662         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4663                     "#                     Must use main snapshot file to allocate.\n");
4664 #endif
4665         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4666                     "#                      (Doesn't have to be '2' works with any number that\n"
4667                     "#                       is not a '0' or '1')\n");
4668 }
4669
4670 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4671 {
4672         if (iter->tr->allocated_snapshot)
4673                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4674         else
4675                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4676
4677         seq_puts(m, "# Snapshot commands:\n");
4678         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4679                 show_snapshot_main_help(m);
4680         else
4681                 show_snapshot_percpu_help(m);
4682 }
4683 #else
4684 /* Should never be called */
4685 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4686 #endif
4687
4688 static int s_show(struct seq_file *m, void *v)
4689 {
4690         struct trace_iterator *iter = v;
4691         int ret;
4692
4693         if (iter->ent == NULL) {
4694                 if (iter->tr) {
4695                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4696                         seq_puts(m, "#\n");
4697                         test_ftrace_alive(m);
4698                 }
4699                 if (iter->snapshot && trace_empty(iter))
4700                         print_snapshot_help(m, iter);
4701                 else if (iter->trace && iter->trace->print_header)
4702                         iter->trace->print_header(m);
4703                 else
4704                         trace_default_header(m);
4705
4706         } else if (iter->leftover) {
4707                 /*
4708                  * If we filled the seq_file buffer earlier, we
4709                  * want to just show it now.
4710                  */
4711                 ret = trace_print_seq(m, &iter->seq);
4712
4713                 /* ret should this time be zero, but you never know */
4714                 iter->leftover = ret;
4715
4716         } else {
4717                 print_trace_line(iter);
4718                 ret = trace_print_seq(m, &iter->seq);
4719                 /*
4720                  * If we overflow the seq_file buffer, then it will
4721                  * ask us for this data again at start up.
4722                  * Use that instead.
4723                  *  ret is 0 if seq_file write succeeded.
4724                  *        -1 otherwise.
4725                  */
4726                 iter->leftover = ret;
4727         }
4728
4729         return 0;
4730 }
4731
4732 /*
4733  * Should be used after trace_array_get(), trace_types_lock
4734  * ensures that i_cdev was already initialized.
4735  */
4736 static inline int tracing_get_cpu(struct inode *inode)
4737 {
4738         if (inode->i_cdev) /* See trace_create_cpu_file() */
4739                 return (long)inode->i_cdev - 1;
4740         return RING_BUFFER_ALL_CPUS;
4741 }
4742
4743 static const struct seq_operations tracer_seq_ops = {
4744         .start          = s_start,
4745         .next           = s_next,
4746         .stop           = s_stop,
4747         .show           = s_show,
4748 };
4749
4750 static struct trace_iterator *
4751 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4752 {
4753         struct trace_array *tr = inode->i_private;
4754         struct trace_iterator *iter;
4755         int cpu;
4756
4757         if (tracing_disabled)
4758                 return ERR_PTR(-ENODEV);
4759
4760         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4761         if (!iter)
4762                 return ERR_PTR(-ENOMEM);
4763
4764         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4765                                     GFP_KERNEL);
4766         if (!iter->buffer_iter)
4767                 goto release;
4768
4769         /*
4770          * trace_find_next_entry() may need to save off iter->ent.
4771          * It will place it into the iter->temp buffer. As most
4772          * events are less than 128, allocate a buffer of that size.
4773          * If one is greater, then trace_find_next_entry() will
4774          * allocate a new buffer to adjust for the bigger iter->ent.
4775          * It's not critical if it fails to get allocated here.
4776          */
4777         iter->temp = kmalloc(128, GFP_KERNEL);
4778         if (iter->temp)
4779                 iter->temp_size = 128;
4780
4781         /*
4782          * trace_event_printf() may need to modify given format
4783          * string to replace %p with %px so that it shows real address
4784          * instead of hash value. However, that is only for the event
4785          * tracing, other tracer may not need. Defer the allocation
4786          * until it is needed.
4787          */
4788         iter->fmt = NULL;
4789         iter->fmt_size = 0;
4790
4791         /*
4792          * We make a copy of the current tracer to avoid concurrent
4793          * changes on it while we are reading.
4794          */
4795         mutex_lock(&trace_types_lock);
4796         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4797         if (!iter->trace)
4798                 goto fail;
4799
4800         *iter->trace = *tr->current_trace;
4801
4802         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4803                 goto fail;
4804
4805         iter->tr = tr;
4806
4807 #ifdef CONFIG_TRACER_MAX_TRACE
4808         /* Currently only the top directory has a snapshot */
4809         if (tr->current_trace->print_max || snapshot)
4810                 iter->array_buffer = &tr->max_buffer;
4811         else
4812 #endif
4813                 iter->array_buffer = &tr->array_buffer;
4814         iter->snapshot = snapshot;
4815         iter->pos = -1;
4816         iter->cpu_file = tracing_get_cpu(inode);
4817         mutex_init(&iter->mutex);
4818
4819         /* Notify the tracer early; before we stop tracing. */
4820         if (iter->trace->open)
4821                 iter->trace->open(iter);
4822
4823         /* Annotate start of buffers if we had overruns */
4824         if (ring_buffer_overruns(iter->array_buffer->buffer))
4825                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4826
4827         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4828         if (trace_clocks[tr->clock_id].in_ns)
4829                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4830
4831         /*
4832          * If pause-on-trace is enabled, then stop the trace while
4833          * dumping, unless this is the "snapshot" file
4834          */
4835         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4836                 tracing_stop_tr(tr);
4837
4838         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4839                 for_each_tracing_cpu(cpu) {
4840                         iter->buffer_iter[cpu] =
4841                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4842                                                          cpu, GFP_KERNEL);
4843                 }
4844                 ring_buffer_read_prepare_sync();
4845                 for_each_tracing_cpu(cpu) {
4846                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4847                         tracing_iter_reset(iter, cpu);
4848                 }
4849         } else {
4850                 cpu = iter->cpu_file;
4851                 iter->buffer_iter[cpu] =
4852                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4853                                                  cpu, GFP_KERNEL);
4854                 ring_buffer_read_prepare_sync();
4855                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4856                 tracing_iter_reset(iter, cpu);
4857         }
4858
4859         mutex_unlock(&trace_types_lock);
4860
4861         return iter;
4862
4863  fail:
4864         mutex_unlock(&trace_types_lock);
4865         kfree(iter->trace);
4866         kfree(iter->temp);
4867         kfree(iter->buffer_iter);
4868 release:
4869         seq_release_private(inode, file);
4870         return ERR_PTR(-ENOMEM);
4871 }
4872
4873 int tracing_open_generic(struct inode *inode, struct file *filp)
4874 {
4875         int ret;
4876
4877         ret = tracing_check_open_get_tr(NULL);
4878         if (ret)
4879                 return ret;
4880
4881         filp->private_data = inode->i_private;
4882         return 0;
4883 }
4884
4885 bool tracing_is_disabled(void)
4886 {
4887         return (tracing_disabled) ? true: false;
4888 }
4889
4890 /*
4891  * Open and update trace_array ref count.
4892  * Must have the current trace_array passed to it.
4893  */
4894 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4895 {
4896         struct trace_array *tr = inode->i_private;
4897         int ret;
4898
4899         ret = tracing_check_open_get_tr(tr);
4900         if (ret)
4901                 return ret;
4902
4903         filp->private_data = inode->i_private;
4904
4905         return 0;
4906 }
4907
4908 static int tracing_mark_open(struct inode *inode, struct file *filp)
4909 {
4910         stream_open(inode, filp);
4911         return tracing_open_generic_tr(inode, filp);
4912 }
4913
4914 static int tracing_release(struct inode *inode, struct file *file)
4915 {
4916         struct trace_array *tr = inode->i_private;
4917         struct seq_file *m = file->private_data;
4918         struct trace_iterator *iter;
4919         int cpu;
4920
4921         if (!(file->f_mode & FMODE_READ)) {
4922                 trace_array_put(tr);
4923                 return 0;
4924         }
4925
4926         /* Writes do not use seq_file */
4927         iter = m->private;
4928         mutex_lock(&trace_types_lock);
4929
4930         for_each_tracing_cpu(cpu) {
4931                 if (iter->buffer_iter[cpu])
4932                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4933         }
4934
4935         if (iter->trace && iter->trace->close)
4936                 iter->trace->close(iter);
4937
4938         if (!iter->snapshot && tr->stop_count)
4939                 /* reenable tracing if it was previously enabled */
4940                 tracing_start_tr(tr);
4941
4942         __trace_array_put(tr);
4943
4944         mutex_unlock(&trace_types_lock);
4945
4946         mutex_destroy(&iter->mutex);
4947         free_cpumask_var(iter->started);
4948         kfree(iter->fmt);
4949         kfree(iter->temp);
4950         kfree(iter->trace);
4951         kfree(iter->buffer_iter);
4952         seq_release_private(inode, file);
4953
4954         return 0;
4955 }
4956
4957 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4958 {
4959         struct trace_array *tr = inode->i_private;
4960
4961         trace_array_put(tr);
4962         return 0;
4963 }
4964
4965 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4966 {
4967         struct trace_array *tr = inode->i_private;
4968
4969         trace_array_put(tr);
4970
4971         return single_release(inode, file);
4972 }
4973
4974 static int tracing_open(struct inode *inode, struct file *file)
4975 {
4976         struct trace_array *tr = inode->i_private;
4977         struct trace_iterator *iter;
4978         int ret;
4979
4980         ret = tracing_check_open_get_tr(tr);
4981         if (ret)
4982                 return ret;
4983
4984         /* If this file was open for write, then erase contents */
4985         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4986                 int cpu = tracing_get_cpu(inode);
4987                 struct array_buffer *trace_buf = &tr->array_buffer;
4988
4989 #ifdef CONFIG_TRACER_MAX_TRACE
4990                 if (tr->current_trace->print_max)
4991                         trace_buf = &tr->max_buffer;
4992 #endif
4993
4994                 if (cpu == RING_BUFFER_ALL_CPUS)
4995                         tracing_reset_online_cpus(trace_buf);
4996                 else
4997                         tracing_reset_cpu(trace_buf, cpu);
4998         }
4999
5000         if (file->f_mode & FMODE_READ) {
5001                 iter = __tracing_open(inode, file, false);
5002                 if (IS_ERR(iter))
5003                         ret = PTR_ERR(iter);
5004                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5005                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
5006         }
5007
5008         if (ret < 0)
5009                 trace_array_put(tr);
5010
5011         return ret;
5012 }
5013
5014 /*
5015  * Some tracers are not suitable for instance buffers.
5016  * A tracer is always available for the global array (toplevel)
5017  * or if it explicitly states that it is.
5018  */
5019 static bool
5020 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5021 {
5022         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5023 }
5024
5025 /* Find the next tracer that this trace array may use */
5026 static struct tracer *
5027 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5028 {
5029         while (t && !trace_ok_for_array(t, tr))
5030                 t = t->next;
5031
5032         return t;
5033 }
5034
5035 static void *
5036 t_next(struct seq_file *m, void *v, loff_t *pos)
5037 {
5038         struct trace_array *tr = m->private;
5039         struct tracer *t = v;
5040
5041         (*pos)++;
5042
5043         if (t)
5044                 t = get_tracer_for_array(tr, t->next);
5045
5046         return t;
5047 }
5048
5049 static void *t_start(struct seq_file *m, loff_t *pos)
5050 {
5051         struct trace_array *tr = m->private;
5052         struct tracer *t;
5053         loff_t l = 0;
5054
5055         mutex_lock(&trace_types_lock);
5056
5057         t = get_tracer_for_array(tr, trace_types);
5058         for (; t && l < *pos; t = t_next(m, t, &l))
5059                         ;
5060
5061         return t;
5062 }
5063
5064 static void t_stop(struct seq_file *m, void *p)
5065 {
5066         mutex_unlock(&trace_types_lock);
5067 }
5068
5069 static int t_show(struct seq_file *m, void *v)
5070 {
5071         struct tracer *t = v;
5072
5073         if (!t)
5074                 return 0;
5075
5076         seq_puts(m, t->name);
5077         if (t->next)
5078                 seq_putc(m, ' ');
5079         else
5080                 seq_putc(m, '\n');
5081
5082         return 0;
5083 }
5084
5085 static const struct seq_operations show_traces_seq_ops = {
5086         .start          = t_start,
5087         .next           = t_next,
5088         .stop           = t_stop,
5089         .show           = t_show,
5090 };
5091
5092 static int show_traces_open(struct inode *inode, struct file *file)
5093 {
5094         struct trace_array *tr = inode->i_private;
5095         struct seq_file *m;
5096         int ret;
5097
5098         ret = tracing_check_open_get_tr(tr);
5099         if (ret)
5100                 return ret;
5101
5102         ret = seq_open(file, &show_traces_seq_ops);
5103         if (ret) {
5104                 trace_array_put(tr);
5105                 return ret;
5106         }
5107
5108         m = file->private_data;
5109         m->private = tr;
5110
5111         return 0;
5112 }
5113
5114 static int show_traces_release(struct inode *inode, struct file *file)
5115 {
5116         struct trace_array *tr = inode->i_private;
5117
5118         trace_array_put(tr);
5119         return seq_release(inode, file);
5120 }
5121
5122 static ssize_t
5123 tracing_write_stub(struct file *filp, const char __user *ubuf,
5124                    size_t count, loff_t *ppos)
5125 {
5126         return count;
5127 }
5128
5129 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5130 {
5131         int ret;
5132
5133         if (file->f_mode & FMODE_READ)
5134                 ret = seq_lseek(file, offset, whence);
5135         else
5136                 file->f_pos = ret = 0;
5137
5138         return ret;
5139 }
5140
5141 static const struct file_operations tracing_fops = {
5142         .open           = tracing_open,
5143         .read           = seq_read,
5144         .write          = tracing_write_stub,
5145         .llseek         = tracing_lseek,
5146         .release        = tracing_release,
5147 };
5148
5149 static const struct file_operations show_traces_fops = {
5150         .open           = show_traces_open,
5151         .read           = seq_read,
5152         .llseek         = seq_lseek,
5153         .release        = show_traces_release,
5154 };
5155
5156 static ssize_t
5157 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5158                      size_t count, loff_t *ppos)
5159 {
5160         struct trace_array *tr = file_inode(filp)->i_private;
5161         char *mask_str;
5162         int len;
5163
5164         len = snprintf(NULL, 0, "%*pb\n",
5165                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5166         mask_str = kmalloc(len, GFP_KERNEL);
5167         if (!mask_str)
5168                 return -ENOMEM;
5169
5170         len = snprintf(mask_str, len, "%*pb\n",
5171                        cpumask_pr_args(tr->tracing_cpumask));
5172         if (len >= count) {
5173                 count = -EINVAL;
5174                 goto out_err;
5175         }
5176         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5177
5178 out_err:
5179         kfree(mask_str);
5180
5181         return count;
5182 }
5183
5184 int tracing_set_cpumask(struct trace_array *tr,
5185                         cpumask_var_t tracing_cpumask_new)
5186 {
5187         int cpu;
5188
5189         if (!tr)
5190                 return -EINVAL;
5191
5192         local_irq_disable();
5193         arch_spin_lock(&tr->max_lock);
5194         for_each_tracing_cpu(cpu) {
5195                 /*
5196                  * Increase/decrease the disabled counter if we are
5197                  * about to flip a bit in the cpumask:
5198                  */
5199                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5200                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5201                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5202                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5203                 }
5204                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5205                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5206                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5207                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5208                 }
5209         }
5210         arch_spin_unlock(&tr->max_lock);
5211         local_irq_enable();
5212
5213         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5214
5215         return 0;
5216 }
5217
5218 static ssize_t
5219 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5220                       size_t count, loff_t *ppos)
5221 {
5222         struct trace_array *tr = file_inode(filp)->i_private;
5223         cpumask_var_t tracing_cpumask_new;
5224         int err;
5225
5226         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5227                 return -ENOMEM;
5228
5229         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5230         if (err)
5231                 goto err_free;
5232
5233         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5234         if (err)
5235                 goto err_free;
5236
5237         free_cpumask_var(tracing_cpumask_new);
5238
5239         return count;
5240
5241 err_free:
5242         free_cpumask_var(tracing_cpumask_new);
5243
5244         return err;
5245 }
5246
5247 static const struct file_operations tracing_cpumask_fops = {
5248         .open           = tracing_open_generic_tr,
5249         .read           = tracing_cpumask_read,
5250         .write          = tracing_cpumask_write,
5251         .release        = tracing_release_generic_tr,
5252         .llseek         = generic_file_llseek,
5253 };
5254
5255 static int tracing_trace_options_show(struct seq_file *m, void *v)
5256 {
5257         struct tracer_opt *trace_opts;
5258         struct trace_array *tr = m->private;
5259         u32 tracer_flags;
5260         int i;
5261
5262         mutex_lock(&trace_types_lock);
5263         tracer_flags = tr->current_trace->flags->val;
5264         trace_opts = tr->current_trace->flags->opts;
5265
5266         for (i = 0; trace_options[i]; i++) {
5267                 if (tr->trace_flags & (1 << i))
5268                         seq_printf(m, "%s\n", trace_options[i]);
5269                 else
5270                         seq_printf(m, "no%s\n", trace_options[i]);
5271         }
5272
5273         for (i = 0; trace_opts[i].name; i++) {
5274                 if (tracer_flags & trace_opts[i].bit)
5275                         seq_printf(m, "%s\n", trace_opts[i].name);
5276                 else
5277                         seq_printf(m, "no%s\n", trace_opts[i].name);
5278         }
5279         mutex_unlock(&trace_types_lock);
5280
5281         return 0;
5282 }
5283
5284 static int __set_tracer_option(struct trace_array *tr,
5285                                struct tracer_flags *tracer_flags,
5286                                struct tracer_opt *opts, int neg)
5287 {
5288         struct tracer *trace = tracer_flags->trace;
5289         int ret;
5290
5291         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5292         if (ret)
5293                 return ret;
5294
5295         if (neg)
5296                 tracer_flags->val &= ~opts->bit;
5297         else
5298                 tracer_flags->val |= opts->bit;
5299         return 0;
5300 }
5301
5302 /* Try to assign a tracer specific option */
5303 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5304 {
5305         struct tracer *trace = tr->current_trace;
5306         struct tracer_flags *tracer_flags = trace->flags;
5307         struct tracer_opt *opts = NULL;
5308         int i;
5309
5310         for (i = 0; tracer_flags->opts[i].name; i++) {
5311                 opts = &tracer_flags->opts[i];
5312
5313                 if (strcmp(cmp, opts->name) == 0)
5314                         return __set_tracer_option(tr, trace->flags, opts, neg);
5315         }
5316
5317         return -EINVAL;
5318 }
5319
5320 /* Some tracers require overwrite to stay enabled */
5321 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5322 {
5323         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5324                 return -1;
5325
5326         return 0;
5327 }
5328
5329 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5330 {
5331         int *map;
5332
5333         if ((mask == TRACE_ITER_RECORD_TGID) ||
5334             (mask == TRACE_ITER_RECORD_CMD))
5335                 lockdep_assert_held(&event_mutex);
5336
5337         /* do nothing if flag is already set */
5338         if (!!(tr->trace_flags & mask) == !!enabled)
5339                 return 0;
5340
5341         /* Give the tracer a chance to approve the change */
5342         if (tr->current_trace->flag_changed)
5343                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5344                         return -EINVAL;
5345
5346         if (enabled)
5347                 tr->trace_flags |= mask;
5348         else
5349                 tr->trace_flags &= ~mask;
5350
5351         if (mask == TRACE_ITER_RECORD_CMD)
5352                 trace_event_enable_cmd_record(enabled);
5353
5354         if (mask == TRACE_ITER_RECORD_TGID) {
5355                 if (!tgid_map) {
5356                         tgid_map_max = pid_max;
5357                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5358                                        GFP_KERNEL);
5359
5360                         /*
5361                          * Pairs with smp_load_acquire() in
5362                          * trace_find_tgid_ptr() to ensure that if it observes
5363                          * the tgid_map we just allocated then it also observes
5364                          * the corresponding tgid_map_max value.
5365                          */
5366                         smp_store_release(&tgid_map, map);
5367                 }
5368                 if (!tgid_map) {
5369                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5370                         return -ENOMEM;
5371                 }
5372
5373                 trace_event_enable_tgid_record(enabled);
5374         }
5375
5376         if (mask == TRACE_ITER_EVENT_FORK)
5377                 trace_event_follow_fork(tr, enabled);
5378
5379         if (mask == TRACE_ITER_FUNC_FORK)
5380                 ftrace_pid_follow_fork(tr, enabled);
5381
5382         if (mask == TRACE_ITER_OVERWRITE) {
5383                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5384 #ifdef CONFIG_TRACER_MAX_TRACE
5385                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5386 #endif
5387         }
5388
5389         if (mask == TRACE_ITER_PRINTK) {
5390                 trace_printk_start_stop_comm(enabled);
5391                 trace_printk_control(enabled);
5392         }
5393
5394         return 0;
5395 }
5396
5397 int trace_set_options(struct trace_array *tr, char *option)
5398 {
5399         char *cmp;
5400         int neg = 0;
5401         int ret;
5402         size_t orig_len = strlen(option);
5403         int len;
5404
5405         cmp = strstrip(option);
5406
5407         len = str_has_prefix(cmp, "no");
5408         if (len)
5409                 neg = 1;
5410
5411         cmp += len;
5412
5413         mutex_lock(&event_mutex);
5414         mutex_lock(&trace_types_lock);
5415
5416         ret = match_string(trace_options, -1, cmp);
5417         /* If no option could be set, test the specific tracer options */
5418         if (ret < 0)
5419                 ret = set_tracer_option(tr, cmp, neg);
5420         else
5421                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5422
5423         mutex_unlock(&trace_types_lock);
5424         mutex_unlock(&event_mutex);
5425
5426         /*
5427          * If the first trailing whitespace is replaced with '\0' by strstrip,
5428          * turn it back into a space.
5429          */
5430         if (orig_len > strlen(option))
5431                 option[strlen(option)] = ' ';
5432
5433         return ret;
5434 }
5435
5436 static void __init apply_trace_boot_options(void)
5437 {
5438         char *buf = trace_boot_options_buf;
5439         char *option;
5440
5441         while (true) {
5442                 option = strsep(&buf, ",");
5443
5444                 if (!option)
5445                         break;
5446
5447                 if (*option)
5448                         trace_set_options(&global_trace, option);
5449
5450                 /* Put back the comma to allow this to be called again */
5451                 if (buf)
5452                         *(buf - 1) = ',';
5453         }
5454 }
5455
5456 static ssize_t
5457 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5458                         size_t cnt, loff_t *ppos)
5459 {
5460         struct seq_file *m = filp->private_data;
5461         struct trace_array *tr = m->private;
5462         char buf[64];
5463         int ret;
5464
5465         if (cnt >= sizeof(buf))
5466                 return -EINVAL;
5467
5468         if (copy_from_user(buf, ubuf, cnt))
5469                 return -EFAULT;
5470
5471         buf[cnt] = 0;
5472
5473         ret = trace_set_options(tr, buf);
5474         if (ret < 0)
5475                 return ret;
5476
5477         *ppos += cnt;
5478
5479         return cnt;
5480 }
5481
5482 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5483 {
5484         struct trace_array *tr = inode->i_private;
5485         int ret;
5486
5487         ret = tracing_check_open_get_tr(tr);
5488         if (ret)
5489                 return ret;
5490
5491         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5492         if (ret < 0)
5493                 trace_array_put(tr);
5494
5495         return ret;
5496 }
5497
5498 static const struct file_operations tracing_iter_fops = {
5499         .open           = tracing_trace_options_open,
5500         .read           = seq_read,
5501         .llseek         = seq_lseek,
5502         .release        = tracing_single_release_tr,
5503         .write          = tracing_trace_options_write,
5504 };
5505
5506 static const char readme_msg[] =
5507         "tracing mini-HOWTO:\n\n"
5508         "# echo 0 > tracing_on : quick way to disable tracing\n"
5509         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5510         " Important files:\n"
5511         "  trace\t\t\t- The static contents of the buffer\n"
5512         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5513         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5514         "  current_tracer\t- function and latency tracers\n"
5515         "  available_tracers\t- list of configured tracers for current_tracer\n"
5516         "  error_log\t- error log for failed commands (that support it)\n"
5517         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5518         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5519         "  trace_clock\t\t- change the clock used to order events\n"
5520         "       local:   Per cpu clock but may not be synced across CPUs\n"
5521         "      global:   Synced across CPUs but slows tracing down.\n"
5522         "     counter:   Not a clock, but just an increment\n"
5523         "      uptime:   Jiffy counter from time of boot\n"
5524         "        perf:   Same clock that perf events use\n"
5525 #ifdef CONFIG_X86_64
5526         "     x86-tsc:   TSC cycle counter\n"
5527 #endif
5528         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5529         "       delta:   Delta difference against a buffer-wide timestamp\n"
5530         "    absolute:   Absolute (standalone) timestamp\n"
5531         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5532         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5533         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5534         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5535         "\t\t\t  Remove sub-buffer with rmdir\n"
5536         "  trace_options\t\t- Set format or modify how tracing happens\n"
5537         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5538         "\t\t\t  option name\n"
5539         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5540 #ifdef CONFIG_DYNAMIC_FTRACE
5541         "\n  available_filter_functions - list of functions that can be filtered on\n"
5542         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5543         "\t\t\t  functions\n"
5544         "\t     accepts: func_full_name or glob-matching-pattern\n"
5545         "\t     modules: Can select a group via module\n"
5546         "\t      Format: :mod:<module-name>\n"
5547         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5548         "\t    triggers: a command to perform when function is hit\n"
5549         "\t      Format: <function>:<trigger>[:count]\n"
5550         "\t     trigger: traceon, traceoff\n"
5551         "\t\t      enable_event:<system>:<event>\n"
5552         "\t\t      disable_event:<system>:<event>\n"
5553 #ifdef CONFIG_STACKTRACE
5554         "\t\t      stacktrace\n"
5555 #endif
5556 #ifdef CONFIG_TRACER_SNAPSHOT
5557         "\t\t      snapshot\n"
5558 #endif
5559         "\t\t      dump\n"
5560         "\t\t      cpudump\n"
5561         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5562         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5563         "\t     The first one will disable tracing every time do_fault is hit\n"
5564         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5565         "\t       The first time do trap is hit and it disables tracing, the\n"
5566         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5567         "\t       the counter will not decrement. It only decrements when the\n"
5568         "\t       trigger did work\n"
5569         "\t     To remove trigger without count:\n"
5570         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5571         "\t     To remove trigger with a count:\n"
5572         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5573         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5574         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5575         "\t    modules: Can select a group via module command :mod:\n"
5576         "\t    Does not accept triggers\n"
5577 #endif /* CONFIG_DYNAMIC_FTRACE */
5578 #ifdef CONFIG_FUNCTION_TRACER
5579         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5580         "\t\t    (function)\n"
5581         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5582         "\t\t    (function)\n"
5583 #endif
5584 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5585         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5586         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5587         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5588 #endif
5589 #ifdef CONFIG_TRACER_SNAPSHOT
5590         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5591         "\t\t\t  snapshot buffer. Read the contents for more\n"
5592         "\t\t\t  information\n"
5593 #endif
5594 #ifdef CONFIG_STACK_TRACER
5595         "  stack_trace\t\t- Shows the max stack trace when active\n"
5596         "  stack_max_size\t- Shows current max stack size that was traced\n"
5597         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5598         "\t\t\t  new trace)\n"
5599 #ifdef CONFIG_DYNAMIC_FTRACE
5600         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5601         "\t\t\t  traces\n"
5602 #endif
5603 #endif /* CONFIG_STACK_TRACER */
5604 #ifdef CONFIG_DYNAMIC_EVENTS
5605         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5606         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5607 #endif
5608 #ifdef CONFIG_KPROBE_EVENTS
5609         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5610         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5611 #endif
5612 #ifdef CONFIG_UPROBE_EVENTS
5613         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5614         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5615 #endif
5616 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5617         "\t  accepts: event-definitions (one definition per line)\n"
5618         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5619         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5620 #ifdef CONFIG_HIST_TRIGGERS
5621         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5622 #endif
5623         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]\n"
5624         "\t           -:[<group>/][<event>]\n"
5625 #ifdef CONFIG_KPROBE_EVENTS
5626         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5627   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5628 #endif
5629 #ifdef CONFIG_UPROBE_EVENTS
5630   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5631 #endif
5632         "\t     args: <name>=fetcharg[:type]\n"
5633         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5634 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5635         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5636 #else
5637         "\t           $stack<index>, $stack, $retval, $comm,\n"
5638 #endif
5639         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5640         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5641         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5642         "\t           symstr, <type>\\[<array-size>\\]\n"
5643 #ifdef CONFIG_HIST_TRIGGERS
5644         "\t    field: <stype> <name>;\n"
5645         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5646         "\t           [unsigned] char/int/long\n"
5647 #endif
5648         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5649         "\t            of the <attached-group>/<attached-event>.\n"
5650 #endif
5651         "  events/\t\t- Directory containing all trace event subsystems:\n"
5652         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5653         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5654         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5655         "\t\t\t  events\n"
5656         "      filter\t\t- If set, only events passing filter are traced\n"
5657         "  events/<system>/<event>/\t- Directory containing control files for\n"
5658         "\t\t\t  <event>:\n"
5659         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5660         "      filter\t\t- If set, only events passing filter are traced\n"
5661         "      trigger\t\t- If set, a command to perform when event is hit\n"
5662         "\t    Format: <trigger>[:count][if <filter>]\n"
5663         "\t   trigger: traceon, traceoff\n"
5664         "\t            enable_event:<system>:<event>\n"
5665         "\t            disable_event:<system>:<event>\n"
5666 #ifdef CONFIG_HIST_TRIGGERS
5667         "\t            enable_hist:<system>:<event>\n"
5668         "\t            disable_hist:<system>:<event>\n"
5669 #endif
5670 #ifdef CONFIG_STACKTRACE
5671         "\t\t    stacktrace\n"
5672 #endif
5673 #ifdef CONFIG_TRACER_SNAPSHOT
5674         "\t\t    snapshot\n"
5675 #endif
5676 #ifdef CONFIG_HIST_TRIGGERS
5677         "\t\t    hist (see below)\n"
5678 #endif
5679         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5680         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5681         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5682         "\t                  events/block/block_unplug/trigger\n"
5683         "\t   The first disables tracing every time block_unplug is hit.\n"
5684         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5685         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5686         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5687         "\t   Like function triggers, the counter is only decremented if it\n"
5688         "\t    enabled or disabled tracing.\n"
5689         "\t   To remove a trigger without a count:\n"
5690         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5691         "\t   To remove a trigger with a count:\n"
5692         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5693         "\t   Filters can be ignored when removing a trigger.\n"
5694 #ifdef CONFIG_HIST_TRIGGERS
5695         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5696         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5697         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5698         "\t            [:values=<field1[,field2,...]>]\n"
5699         "\t            [:sort=<field1[,field2,...]>]\n"
5700         "\t            [:size=#entries]\n"
5701         "\t            [:pause][:continue][:clear]\n"
5702         "\t            [:name=histname1]\n"
5703         "\t            [:nohitcount]\n"
5704         "\t            [:<handler>.<action>]\n"
5705         "\t            [if <filter>]\n\n"
5706         "\t    Note, special fields can be used as well:\n"
5707         "\t            common_timestamp - to record current timestamp\n"
5708         "\t            common_cpu - to record the CPU the event happened on\n"
5709         "\n"
5710         "\t    A hist trigger variable can be:\n"
5711         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5712         "\t        - a reference to another variable e.g. y=$x,\n"
5713         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5714         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5715         "\n"
5716         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5717         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5718         "\t    variable reference, field or numeric literal.\n"
5719         "\n"
5720         "\t    When a matching event is hit, an entry is added to a hash\n"
5721         "\t    table using the key(s) and value(s) named, and the value of a\n"
5722         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5723         "\t    correspond to fields in the event's format description.  Keys\n"
5724         "\t    can be any field, or the special string 'stacktrace'.\n"
5725         "\t    Compound keys consisting of up to two fields can be specified\n"
5726         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5727         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5728         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5729         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5730         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5731         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5732         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5733         "\t    its histogram data will be shared with other triggers of the\n"
5734         "\t    same name, and trigger hits will update this common data.\n\n"
5735         "\t    Reading the 'hist' file for the event will dump the hash\n"
5736         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5737         "\t    triggers attached to an event, there will be a table for each\n"
5738         "\t    trigger in the output.  The table displayed for a named\n"
5739         "\t    trigger will be the same as any other instance having the\n"
5740         "\t    same name.  The default format used to display a given field\n"
5741         "\t    can be modified by appending any of the following modifiers\n"
5742         "\t    to the field name, as applicable:\n\n"
5743         "\t            .hex        display a number as a hex value\n"
5744         "\t            .sym        display an address as a symbol\n"
5745         "\t            .sym-offset display an address as a symbol and offset\n"
5746         "\t            .execname   display a common_pid as a program name\n"
5747         "\t            .syscall    display a syscall id as a syscall name\n"
5748         "\t            .log2       display log2 value rather than raw number\n"
5749         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5750         "\t            .usecs      display a common_timestamp in microseconds\n"
5751         "\t            .percent    display a number of percentage value\n"
5752         "\t            .graph      display a bar-graph of a value\n\n"
5753         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5754         "\t    trigger or to start a hist trigger but not log any events\n"
5755         "\t    until told to do so.  'continue' can be used to start or\n"
5756         "\t    restart a paused hist trigger.\n\n"
5757         "\t    The 'clear' parameter will clear the contents of a running\n"
5758         "\t    hist trigger and leave its current paused/active state\n"
5759         "\t    unchanged.\n\n"
5760         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5761         "\t    raw hitcount in the histogram.\n\n"
5762         "\t    The enable_hist and disable_hist triggers can be used to\n"
5763         "\t    have one event conditionally start and stop another event's\n"
5764         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5765         "\t    the enable_event and disable_event triggers.\n\n"
5766         "\t    Hist trigger handlers and actions are executed whenever a\n"
5767         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5768         "\t        <handler>.<action>\n\n"
5769         "\t    The available handlers are:\n\n"
5770         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5771         "\t        onmax(var)               - invoke if var exceeds current max\n"
5772         "\t        onchange(var)            - invoke action if var changes\n\n"
5773         "\t    The available actions are:\n\n"
5774         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5775         "\t        save(field,...)                      - save current event fields\n"
5776 #ifdef CONFIG_TRACER_SNAPSHOT
5777         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5778 #endif
5779 #ifdef CONFIG_SYNTH_EVENTS
5780         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5781         "\t  Write into this file to define/undefine new synthetic events.\n"
5782         "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5783 #endif
5784 #endif
5785 ;
5786
5787 static ssize_t
5788 tracing_readme_read(struct file *filp, char __user *ubuf,
5789                        size_t cnt, loff_t *ppos)
5790 {
5791         return simple_read_from_buffer(ubuf, cnt, ppos,
5792                                         readme_msg, strlen(readme_msg));
5793 }
5794
5795 static const struct file_operations tracing_readme_fops = {
5796         .open           = tracing_open_generic,
5797         .read           = tracing_readme_read,
5798         .llseek         = generic_file_llseek,
5799 };
5800
5801 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5802 {
5803         int pid = ++(*pos);
5804
5805         return trace_find_tgid_ptr(pid);
5806 }
5807
5808 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5809 {
5810         int pid = *pos;
5811
5812         return trace_find_tgid_ptr(pid);
5813 }
5814
5815 static void saved_tgids_stop(struct seq_file *m, void *v)
5816 {
5817 }
5818
5819 static int saved_tgids_show(struct seq_file *m, void *v)
5820 {
5821         int *entry = (int *)v;
5822         int pid = entry - tgid_map;
5823         int tgid = *entry;
5824
5825         if (tgid == 0)
5826                 return SEQ_SKIP;
5827
5828         seq_printf(m, "%d %d\n", pid, tgid);
5829         return 0;
5830 }
5831
5832 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5833         .start          = saved_tgids_start,
5834         .stop           = saved_tgids_stop,
5835         .next           = saved_tgids_next,
5836         .show           = saved_tgids_show,
5837 };
5838
5839 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5840 {
5841         int ret;
5842
5843         ret = tracing_check_open_get_tr(NULL);
5844         if (ret)
5845                 return ret;
5846
5847         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5848 }
5849
5850
5851 static const struct file_operations tracing_saved_tgids_fops = {
5852         .open           = tracing_saved_tgids_open,
5853         .read           = seq_read,
5854         .llseek         = seq_lseek,
5855         .release        = seq_release,
5856 };
5857
5858 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5859 {
5860         unsigned int *ptr = v;
5861
5862         if (*pos || m->count)
5863                 ptr++;
5864
5865         (*pos)++;
5866
5867         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5868              ptr++) {
5869                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5870                         continue;
5871
5872                 return ptr;
5873         }
5874
5875         return NULL;
5876 }
5877
5878 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5879 {
5880         void *v;
5881         loff_t l = 0;
5882
5883         preempt_disable();
5884         arch_spin_lock(&trace_cmdline_lock);
5885
5886         v = &savedcmd->map_cmdline_to_pid[0];
5887         while (l <= *pos) {
5888                 v = saved_cmdlines_next(m, v, &l);
5889                 if (!v)
5890                         return NULL;
5891         }
5892
5893         return v;
5894 }
5895
5896 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5897 {
5898         arch_spin_unlock(&trace_cmdline_lock);
5899         preempt_enable();
5900 }
5901
5902 static int saved_cmdlines_show(struct seq_file *m, void *v)
5903 {
5904         char buf[TASK_COMM_LEN];
5905         unsigned int *pid = v;
5906
5907         __trace_find_cmdline(*pid, buf);
5908         seq_printf(m, "%d %s\n", *pid, buf);
5909         return 0;
5910 }
5911
5912 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5913         .start          = saved_cmdlines_start,
5914         .next           = saved_cmdlines_next,
5915         .stop           = saved_cmdlines_stop,
5916         .show           = saved_cmdlines_show,
5917 };
5918
5919 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5920 {
5921         int ret;
5922
5923         ret = tracing_check_open_get_tr(NULL);
5924         if (ret)
5925                 return ret;
5926
5927         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5928 }
5929
5930 static const struct file_operations tracing_saved_cmdlines_fops = {
5931         .open           = tracing_saved_cmdlines_open,
5932         .read           = seq_read,
5933         .llseek         = seq_lseek,
5934         .release        = seq_release,
5935 };
5936
5937 static ssize_t
5938 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5939                                  size_t cnt, loff_t *ppos)
5940 {
5941         char buf[64];
5942         int r;
5943
5944         preempt_disable();
5945         arch_spin_lock(&trace_cmdline_lock);
5946         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5947         arch_spin_unlock(&trace_cmdline_lock);
5948         preempt_enable();
5949
5950         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5951 }
5952
5953 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5954 {
5955         kfree(s->saved_cmdlines);
5956         kfree(s->map_cmdline_to_pid);
5957         kfree(s);
5958 }
5959
5960 static int tracing_resize_saved_cmdlines(unsigned int val)
5961 {
5962         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5963
5964         s = kmalloc(sizeof(*s), GFP_KERNEL);
5965         if (!s)
5966                 return -ENOMEM;
5967
5968         if (allocate_cmdlines_buffer(val, s) < 0) {
5969                 kfree(s);
5970                 return -ENOMEM;
5971         }
5972
5973         preempt_disable();
5974         arch_spin_lock(&trace_cmdline_lock);
5975         savedcmd_temp = savedcmd;
5976         savedcmd = s;
5977         arch_spin_unlock(&trace_cmdline_lock);
5978         preempt_enable();
5979         free_saved_cmdlines_buffer(savedcmd_temp);
5980
5981         return 0;
5982 }
5983
5984 static ssize_t
5985 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5986                                   size_t cnt, loff_t *ppos)
5987 {
5988         unsigned long val;
5989         int ret;
5990
5991         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5992         if (ret)
5993                 return ret;
5994
5995         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5996         if (!val || val > PID_MAX_DEFAULT)
5997                 return -EINVAL;
5998
5999         ret = tracing_resize_saved_cmdlines((unsigned int)val);
6000         if (ret < 0)
6001                 return ret;
6002
6003         *ppos += cnt;
6004
6005         return cnt;
6006 }
6007
6008 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6009         .open           = tracing_open_generic,
6010         .read           = tracing_saved_cmdlines_size_read,
6011         .write          = tracing_saved_cmdlines_size_write,
6012 };
6013
6014 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6015 static union trace_eval_map_item *
6016 update_eval_map(union trace_eval_map_item *ptr)
6017 {
6018         if (!ptr->map.eval_string) {
6019                 if (ptr->tail.next) {
6020                         ptr = ptr->tail.next;
6021                         /* Set ptr to the next real item (skip head) */
6022                         ptr++;
6023                 } else
6024                         return NULL;
6025         }
6026         return ptr;
6027 }
6028
6029 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6030 {
6031         union trace_eval_map_item *ptr = v;
6032
6033         /*
6034          * Paranoid! If ptr points to end, we don't want to increment past it.
6035          * This really should never happen.
6036          */
6037         (*pos)++;
6038         ptr = update_eval_map(ptr);
6039         if (WARN_ON_ONCE(!ptr))
6040                 return NULL;
6041
6042         ptr++;
6043         ptr = update_eval_map(ptr);
6044
6045         return ptr;
6046 }
6047
6048 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6049 {
6050         union trace_eval_map_item *v;
6051         loff_t l = 0;
6052
6053         mutex_lock(&trace_eval_mutex);
6054
6055         v = trace_eval_maps;
6056         if (v)
6057                 v++;
6058
6059         while (v && l < *pos) {
6060                 v = eval_map_next(m, v, &l);
6061         }
6062
6063         return v;
6064 }
6065
6066 static void eval_map_stop(struct seq_file *m, void *v)
6067 {
6068         mutex_unlock(&trace_eval_mutex);
6069 }
6070
6071 static int eval_map_show(struct seq_file *m, void *v)
6072 {
6073         union trace_eval_map_item *ptr = v;
6074
6075         seq_printf(m, "%s %ld (%s)\n",
6076                    ptr->map.eval_string, ptr->map.eval_value,
6077                    ptr->map.system);
6078
6079         return 0;
6080 }
6081
6082 static const struct seq_operations tracing_eval_map_seq_ops = {
6083         .start          = eval_map_start,
6084         .next           = eval_map_next,
6085         .stop           = eval_map_stop,
6086         .show           = eval_map_show,
6087 };
6088
6089 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6090 {
6091         int ret;
6092
6093         ret = tracing_check_open_get_tr(NULL);
6094         if (ret)
6095                 return ret;
6096
6097         return seq_open(filp, &tracing_eval_map_seq_ops);
6098 }
6099
6100 static const struct file_operations tracing_eval_map_fops = {
6101         .open           = tracing_eval_map_open,
6102         .read           = seq_read,
6103         .llseek         = seq_lseek,
6104         .release        = seq_release,
6105 };
6106
6107 static inline union trace_eval_map_item *
6108 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6109 {
6110         /* Return tail of array given the head */
6111         return ptr + ptr->head.length + 1;
6112 }
6113
6114 static void
6115 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6116                            int len)
6117 {
6118         struct trace_eval_map **stop;
6119         struct trace_eval_map **map;
6120         union trace_eval_map_item *map_array;
6121         union trace_eval_map_item *ptr;
6122
6123         stop = start + len;
6124
6125         /*
6126          * The trace_eval_maps contains the map plus a head and tail item,
6127          * where the head holds the module and length of array, and the
6128          * tail holds a pointer to the next list.
6129          */
6130         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6131         if (!map_array) {
6132                 pr_warn("Unable to allocate trace eval mapping\n");
6133                 return;
6134         }
6135
6136         mutex_lock(&trace_eval_mutex);
6137
6138         if (!trace_eval_maps)
6139                 trace_eval_maps = map_array;
6140         else {
6141                 ptr = trace_eval_maps;
6142                 for (;;) {
6143                         ptr = trace_eval_jmp_to_tail(ptr);
6144                         if (!ptr->tail.next)
6145                                 break;
6146                         ptr = ptr->tail.next;
6147
6148                 }
6149                 ptr->tail.next = map_array;
6150         }
6151         map_array->head.mod = mod;
6152         map_array->head.length = len;
6153         map_array++;
6154
6155         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6156                 map_array->map = **map;
6157                 map_array++;
6158         }
6159         memset(map_array, 0, sizeof(*map_array));
6160
6161         mutex_unlock(&trace_eval_mutex);
6162 }
6163
6164 static void trace_create_eval_file(struct dentry *d_tracer)
6165 {
6166         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6167                           NULL, &tracing_eval_map_fops);
6168 }
6169
6170 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6171 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6172 static inline void trace_insert_eval_map_file(struct module *mod,
6173                               struct trace_eval_map **start, int len) { }
6174 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6175
6176 static void trace_insert_eval_map(struct module *mod,
6177                                   struct trace_eval_map **start, int len)
6178 {
6179         struct trace_eval_map **map;
6180
6181         if (len <= 0)
6182                 return;
6183
6184         map = start;
6185
6186         trace_event_eval_update(map, len);
6187
6188         trace_insert_eval_map_file(mod, start, len);
6189 }
6190
6191 static ssize_t
6192 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6193                        size_t cnt, loff_t *ppos)
6194 {
6195         struct trace_array *tr = filp->private_data;
6196         char buf[MAX_TRACER_SIZE+2];
6197         int r;
6198
6199         mutex_lock(&trace_types_lock);
6200         r = sprintf(buf, "%s\n", tr->current_trace->name);
6201         mutex_unlock(&trace_types_lock);
6202
6203         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6204 }
6205
6206 int tracer_init(struct tracer *t, struct trace_array *tr)
6207 {
6208         tracing_reset_online_cpus(&tr->array_buffer);
6209         return t->init(tr);
6210 }
6211
6212 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6213 {
6214         int cpu;
6215
6216         for_each_tracing_cpu(cpu)
6217                 per_cpu_ptr(buf->data, cpu)->entries = val;
6218 }
6219
6220 #ifdef CONFIG_TRACER_MAX_TRACE
6221 /* resize @tr's buffer to the size of @size_tr's entries */
6222 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6223                                         struct array_buffer *size_buf, int cpu_id)
6224 {
6225         int cpu, ret = 0;
6226
6227         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6228                 for_each_tracing_cpu(cpu) {
6229                         ret = ring_buffer_resize(trace_buf->buffer,
6230                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6231                         if (ret < 0)
6232                                 break;
6233                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6234                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6235                 }
6236         } else {
6237                 ret = ring_buffer_resize(trace_buf->buffer,
6238                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6239                 if (ret == 0)
6240                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6241                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6242         }
6243
6244         return ret;
6245 }
6246 #endif /* CONFIG_TRACER_MAX_TRACE */
6247
6248 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6249                                         unsigned long size, int cpu)
6250 {
6251         int ret;
6252
6253         /*
6254          * If kernel or user changes the size of the ring buffer
6255          * we use the size that was given, and we can forget about
6256          * expanding it later.
6257          */
6258         ring_buffer_expanded = true;
6259
6260         /* May be called before buffers are initialized */
6261         if (!tr->array_buffer.buffer)
6262                 return 0;
6263
6264         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6265         if (ret < 0)
6266                 return ret;
6267
6268 #ifdef CONFIG_TRACER_MAX_TRACE
6269         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6270             !tr->current_trace->use_max_tr)
6271                 goto out;
6272
6273         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6274         if (ret < 0) {
6275                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6276                                                      &tr->array_buffer, cpu);
6277                 if (r < 0) {
6278                         /*
6279                          * AARGH! We are left with different
6280                          * size max buffer!!!!
6281                          * The max buffer is our "snapshot" buffer.
6282                          * When a tracer needs a snapshot (one of the
6283                          * latency tracers), it swaps the max buffer
6284                          * with the saved snap shot. We succeeded to
6285                          * update the size of the main buffer, but failed to
6286                          * update the size of the max buffer. But when we tried
6287                          * to reset the main buffer to the original size, we
6288                          * failed there too. This is very unlikely to
6289                          * happen, but if it does, warn and kill all
6290                          * tracing.
6291                          */
6292                         WARN_ON(1);
6293                         tracing_disabled = 1;
6294                 }
6295                 return ret;
6296         }
6297
6298         if (cpu == RING_BUFFER_ALL_CPUS)
6299                 set_buffer_entries(&tr->max_buffer, size);
6300         else
6301                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6302
6303  out:
6304 #endif /* CONFIG_TRACER_MAX_TRACE */
6305
6306         if (cpu == RING_BUFFER_ALL_CPUS)
6307                 set_buffer_entries(&tr->array_buffer, size);
6308         else
6309                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6310
6311         return ret;
6312 }
6313
6314 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6315                                   unsigned long size, int cpu_id)
6316 {
6317         int ret;
6318
6319         mutex_lock(&trace_types_lock);
6320
6321         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6322                 /* make sure, this cpu is enabled in the mask */
6323                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6324                         ret = -EINVAL;
6325                         goto out;
6326                 }
6327         }
6328
6329         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6330         if (ret < 0)
6331                 ret = -ENOMEM;
6332
6333 out:
6334         mutex_unlock(&trace_types_lock);
6335
6336         return ret;
6337 }
6338
6339
6340 /**
6341  * tracing_update_buffers - used by tracing facility to expand ring buffers
6342  *
6343  * To save on memory when the tracing is never used on a system with it
6344  * configured in. The ring buffers are set to a minimum size. But once
6345  * a user starts to use the tracing facility, then they need to grow
6346  * to their default size.
6347  *
6348  * This function is to be called when a tracer is about to be used.
6349  */
6350 int tracing_update_buffers(void)
6351 {
6352         int ret = 0;
6353
6354         mutex_lock(&trace_types_lock);
6355         if (!ring_buffer_expanded)
6356                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6357                                                 RING_BUFFER_ALL_CPUS);
6358         mutex_unlock(&trace_types_lock);
6359
6360         return ret;
6361 }
6362
6363 struct trace_option_dentry;
6364
6365 static void
6366 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6367
6368 /*
6369  * Used to clear out the tracer before deletion of an instance.
6370  * Must have trace_types_lock held.
6371  */
6372 static void tracing_set_nop(struct trace_array *tr)
6373 {
6374         if (tr->current_trace == &nop_trace)
6375                 return;
6376         
6377         tr->current_trace->enabled--;
6378
6379         if (tr->current_trace->reset)
6380                 tr->current_trace->reset(tr);
6381
6382         tr->current_trace = &nop_trace;
6383 }
6384
6385 static bool tracer_options_updated;
6386
6387 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6388 {
6389         /* Only enable if the directory has been created already. */
6390         if (!tr->dir)
6391                 return;
6392
6393         /* Only create trace option files after update_tracer_options finish */
6394         if (!tracer_options_updated)
6395                 return;
6396
6397         create_trace_option_files(tr, t);
6398 }
6399
6400 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6401 {
6402         struct tracer *t;
6403 #ifdef CONFIG_TRACER_MAX_TRACE
6404         bool had_max_tr;
6405 #endif
6406         int ret = 0;
6407
6408         mutex_lock(&trace_types_lock);
6409
6410         if (!ring_buffer_expanded) {
6411                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6412                                                 RING_BUFFER_ALL_CPUS);
6413                 if (ret < 0)
6414                         goto out;
6415                 ret = 0;
6416         }
6417
6418         for (t = trace_types; t; t = t->next) {
6419                 if (strcmp(t->name, buf) == 0)
6420                         break;
6421         }
6422         if (!t) {
6423                 ret = -EINVAL;
6424                 goto out;
6425         }
6426         if (t == tr->current_trace)
6427                 goto out;
6428
6429 #ifdef CONFIG_TRACER_SNAPSHOT
6430         if (t->use_max_tr) {
6431                 local_irq_disable();
6432                 arch_spin_lock(&tr->max_lock);
6433                 if (tr->cond_snapshot)
6434                         ret = -EBUSY;
6435                 arch_spin_unlock(&tr->max_lock);
6436                 local_irq_enable();
6437                 if (ret)
6438                         goto out;
6439         }
6440 #endif
6441         /* Some tracers won't work on kernel command line */
6442         if (system_state < SYSTEM_RUNNING && t->noboot) {
6443                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6444                         t->name);
6445                 goto out;
6446         }
6447
6448         /* Some tracers are only allowed for the top level buffer */
6449         if (!trace_ok_for_array(t, tr)) {
6450                 ret = -EINVAL;
6451                 goto out;
6452         }
6453
6454         /* If trace pipe files are being read, we can't change the tracer */
6455         if (tr->trace_ref) {
6456                 ret = -EBUSY;
6457                 goto out;
6458         }
6459
6460         trace_branch_disable();
6461
6462         tr->current_trace->enabled--;
6463
6464         if (tr->current_trace->reset)
6465                 tr->current_trace->reset(tr);
6466
6467 #ifdef CONFIG_TRACER_MAX_TRACE
6468         had_max_tr = tr->current_trace->use_max_tr;
6469
6470         /* Current trace needs to be nop_trace before synchronize_rcu */
6471         tr->current_trace = &nop_trace;
6472
6473         if (had_max_tr && !t->use_max_tr) {
6474                 /*
6475                  * We need to make sure that the update_max_tr sees that
6476                  * current_trace changed to nop_trace to keep it from
6477                  * swapping the buffers after we resize it.
6478                  * The update_max_tr is called from interrupts disabled
6479                  * so a synchronized_sched() is sufficient.
6480                  */
6481                 synchronize_rcu();
6482                 free_snapshot(tr);
6483         }
6484
6485         if (t->use_max_tr && !tr->allocated_snapshot) {
6486                 ret = tracing_alloc_snapshot_instance(tr);
6487                 if (ret < 0)
6488                         goto out;
6489         }
6490 #else
6491         tr->current_trace = &nop_trace;
6492 #endif
6493
6494         if (t->init) {
6495                 ret = tracer_init(t, tr);
6496                 if (ret)
6497                         goto out;
6498         }
6499
6500         tr->current_trace = t;
6501         tr->current_trace->enabled++;
6502         trace_branch_enable(tr);
6503  out:
6504         mutex_unlock(&trace_types_lock);
6505
6506         return ret;
6507 }
6508
6509 static ssize_t
6510 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6511                         size_t cnt, loff_t *ppos)
6512 {
6513         struct trace_array *tr = filp->private_data;
6514         char buf[MAX_TRACER_SIZE+1];
6515         char *name;
6516         size_t ret;
6517         int err;
6518
6519         ret = cnt;
6520
6521         if (cnt > MAX_TRACER_SIZE)
6522                 cnt = MAX_TRACER_SIZE;
6523
6524         if (copy_from_user(buf, ubuf, cnt))
6525                 return -EFAULT;
6526
6527         buf[cnt] = 0;
6528
6529         name = strim(buf);
6530
6531         err = tracing_set_tracer(tr, name);
6532         if (err)
6533                 return err;
6534
6535         *ppos += ret;
6536
6537         return ret;
6538 }
6539
6540 static ssize_t
6541 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6542                    size_t cnt, loff_t *ppos)
6543 {
6544         char buf[64];
6545         int r;
6546
6547         r = snprintf(buf, sizeof(buf), "%ld\n",
6548                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6549         if (r > sizeof(buf))
6550                 r = sizeof(buf);
6551         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6552 }
6553
6554 static ssize_t
6555 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6556                     size_t cnt, loff_t *ppos)
6557 {
6558         unsigned long val;
6559         int ret;
6560
6561         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6562         if (ret)
6563                 return ret;
6564
6565         *ptr = val * 1000;
6566
6567         return cnt;
6568 }
6569
6570 static ssize_t
6571 tracing_thresh_read(struct file *filp, char __user *ubuf,
6572                     size_t cnt, loff_t *ppos)
6573 {
6574         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6575 }
6576
6577 static ssize_t
6578 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6579                      size_t cnt, loff_t *ppos)
6580 {
6581         struct trace_array *tr = filp->private_data;
6582         int ret;
6583
6584         mutex_lock(&trace_types_lock);
6585         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6586         if (ret < 0)
6587                 goto out;
6588
6589         if (tr->current_trace->update_thresh) {
6590                 ret = tr->current_trace->update_thresh(tr);
6591                 if (ret < 0)
6592                         goto out;
6593         }
6594
6595         ret = cnt;
6596 out:
6597         mutex_unlock(&trace_types_lock);
6598
6599         return ret;
6600 }
6601
6602 #ifdef CONFIG_TRACER_MAX_TRACE
6603
6604 static ssize_t
6605 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6606                      size_t cnt, loff_t *ppos)
6607 {
6608         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6609 }
6610
6611 static ssize_t
6612 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6613                       size_t cnt, loff_t *ppos)
6614 {
6615         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6616 }
6617
6618 #endif
6619
6620 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6621 {
6622         struct trace_array *tr = inode->i_private;
6623         struct trace_iterator *iter;
6624         int ret;
6625
6626         ret = tracing_check_open_get_tr(tr);
6627         if (ret)
6628                 return ret;
6629
6630         mutex_lock(&trace_types_lock);
6631
6632         /* create a buffer to store the information to pass to userspace */
6633         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6634         if (!iter) {
6635                 ret = -ENOMEM;
6636                 __trace_array_put(tr);
6637                 goto out;
6638         }
6639
6640         trace_seq_init(&iter->seq);
6641         iter->trace = tr->current_trace;
6642
6643         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6644                 ret = -ENOMEM;
6645                 goto fail;
6646         }
6647
6648         /* trace pipe does not show start of buffer */
6649         cpumask_setall(iter->started);
6650
6651         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6652                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6653
6654         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6655         if (trace_clocks[tr->clock_id].in_ns)
6656                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6657
6658         iter->tr = tr;
6659         iter->array_buffer = &tr->array_buffer;
6660         iter->cpu_file = tracing_get_cpu(inode);
6661         mutex_init(&iter->mutex);
6662         filp->private_data = iter;
6663
6664         if (iter->trace->pipe_open)
6665                 iter->trace->pipe_open(iter);
6666
6667         nonseekable_open(inode, filp);
6668
6669         tr->trace_ref++;
6670 out:
6671         mutex_unlock(&trace_types_lock);
6672         return ret;
6673
6674 fail:
6675         kfree(iter);
6676         __trace_array_put(tr);
6677         mutex_unlock(&trace_types_lock);
6678         return ret;
6679 }
6680
6681 static int tracing_release_pipe(struct inode *inode, struct file *file)
6682 {
6683         struct trace_iterator *iter = file->private_data;
6684         struct trace_array *tr = inode->i_private;
6685
6686         mutex_lock(&trace_types_lock);
6687
6688         tr->trace_ref--;
6689
6690         if (iter->trace->pipe_close)
6691                 iter->trace->pipe_close(iter);
6692
6693         mutex_unlock(&trace_types_lock);
6694
6695         free_cpumask_var(iter->started);
6696         kfree(iter->fmt);
6697         mutex_destroy(&iter->mutex);
6698         kfree(iter);
6699
6700         trace_array_put(tr);
6701
6702         return 0;
6703 }
6704
6705 static __poll_t
6706 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6707 {
6708         struct trace_array *tr = iter->tr;
6709
6710         /* Iterators are static, they should be filled or empty */
6711         if (trace_buffer_iter(iter, iter->cpu_file))
6712                 return EPOLLIN | EPOLLRDNORM;
6713
6714         if (tr->trace_flags & TRACE_ITER_BLOCK)
6715                 /*
6716                  * Always select as readable when in blocking mode
6717                  */
6718                 return EPOLLIN | EPOLLRDNORM;
6719         else
6720                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6721                                              filp, poll_table, iter->tr->buffer_percent);
6722 }
6723
6724 static __poll_t
6725 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6726 {
6727         struct trace_iterator *iter = filp->private_data;
6728
6729         return trace_poll(iter, filp, poll_table);
6730 }
6731
6732 /* Must be called with iter->mutex held. */
6733 static int tracing_wait_pipe(struct file *filp)
6734 {
6735         struct trace_iterator *iter = filp->private_data;
6736         int ret;
6737
6738         while (trace_empty(iter)) {
6739
6740                 if ((filp->f_flags & O_NONBLOCK)) {
6741                         return -EAGAIN;
6742                 }
6743
6744                 /*
6745                  * We block until we read something and tracing is disabled.
6746                  * We still block if tracing is disabled, but we have never
6747                  * read anything. This allows a user to cat this file, and
6748                  * then enable tracing. But after we have read something,
6749                  * we give an EOF when tracing is again disabled.
6750                  *
6751                  * iter->pos will be 0 if we haven't read anything.
6752                  */
6753                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6754                         break;
6755
6756                 mutex_unlock(&iter->mutex);
6757
6758                 ret = wait_on_pipe(iter, 0);
6759
6760                 mutex_lock(&iter->mutex);
6761
6762                 if (ret)
6763                         return ret;
6764         }
6765
6766         return 1;
6767 }
6768
6769 /*
6770  * Consumer reader.
6771  */
6772 static ssize_t
6773 tracing_read_pipe(struct file *filp, char __user *ubuf,
6774                   size_t cnt, loff_t *ppos)
6775 {
6776         struct trace_iterator *iter = filp->private_data;
6777         ssize_t sret;
6778
6779         /*
6780          * Avoid more than one consumer on a single file descriptor
6781          * This is just a matter of traces coherency, the ring buffer itself
6782          * is protected.
6783          */
6784         mutex_lock(&iter->mutex);
6785
6786         /* return any leftover data */
6787         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6788         if (sret != -EBUSY)
6789                 goto out;
6790
6791         trace_seq_init(&iter->seq);
6792
6793         if (iter->trace->read) {
6794                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6795                 if (sret)
6796                         goto out;
6797         }
6798
6799 waitagain:
6800         sret = tracing_wait_pipe(filp);
6801         if (sret <= 0)
6802                 goto out;
6803
6804         /* stop when tracing is finished */
6805         if (trace_empty(iter)) {
6806                 sret = 0;
6807                 goto out;
6808         }
6809
6810         if (cnt >= PAGE_SIZE)
6811                 cnt = PAGE_SIZE - 1;
6812
6813         /* reset all but tr, trace, and overruns */
6814         trace_iterator_reset(iter);
6815         cpumask_clear(iter->started);
6816         trace_seq_init(&iter->seq);
6817
6818         trace_event_read_lock();
6819         trace_access_lock(iter->cpu_file);
6820         while (trace_find_next_entry_inc(iter) != NULL) {
6821                 enum print_line_t ret;
6822                 int save_len = iter->seq.seq.len;
6823
6824                 ret = print_trace_line(iter);
6825                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6826                         /*
6827                          * If one print_trace_line() fills entire trace_seq in one shot,
6828                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6829                          * In this case, we need to consume it, otherwise, loop will peek
6830                          * this event next time, resulting in an infinite loop.
6831                          */
6832                         if (save_len == 0) {
6833                                 iter->seq.full = 0;
6834                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6835                                 trace_consume(iter);
6836                                 break;
6837                         }
6838
6839                         /* In other cases, don't print partial lines */
6840                         iter->seq.seq.len = save_len;
6841                         break;
6842                 }
6843                 if (ret != TRACE_TYPE_NO_CONSUME)
6844                         trace_consume(iter);
6845
6846                 if (trace_seq_used(&iter->seq) >= cnt)
6847                         break;
6848
6849                 /*
6850                  * Setting the full flag means we reached the trace_seq buffer
6851                  * size and we should leave by partial output condition above.
6852                  * One of the trace_seq_* functions is not used properly.
6853                  */
6854                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6855                           iter->ent->type);
6856         }
6857         trace_access_unlock(iter->cpu_file);
6858         trace_event_read_unlock();
6859
6860         /* Now copy what we have to the user */
6861         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6862         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6863                 trace_seq_init(&iter->seq);
6864
6865         /*
6866          * If there was nothing to send to user, in spite of consuming trace
6867          * entries, go back to wait for more entries.
6868          */
6869         if (sret == -EBUSY)
6870                 goto waitagain;
6871
6872 out:
6873         mutex_unlock(&iter->mutex);
6874
6875         return sret;
6876 }
6877
6878 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6879                                      unsigned int idx)
6880 {
6881         __free_page(spd->pages[idx]);
6882 }
6883
6884 static size_t
6885 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6886 {
6887         size_t count;
6888         int save_len;
6889         int ret;
6890
6891         /* Seq buffer is page-sized, exactly what we need. */
6892         for (;;) {
6893                 save_len = iter->seq.seq.len;
6894                 ret = print_trace_line(iter);
6895
6896                 if (trace_seq_has_overflowed(&iter->seq)) {
6897                         iter->seq.seq.len = save_len;
6898                         break;
6899                 }
6900
6901                 /*
6902                  * This should not be hit, because it should only
6903                  * be set if the iter->seq overflowed. But check it
6904                  * anyway to be safe.
6905                  */
6906                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6907                         iter->seq.seq.len = save_len;
6908                         break;
6909                 }
6910
6911                 count = trace_seq_used(&iter->seq) - save_len;
6912                 if (rem < count) {
6913                         rem = 0;
6914                         iter->seq.seq.len = save_len;
6915                         break;
6916                 }
6917
6918                 if (ret != TRACE_TYPE_NO_CONSUME)
6919                         trace_consume(iter);
6920                 rem -= count;
6921                 if (!trace_find_next_entry_inc(iter))   {
6922                         rem = 0;
6923                         iter->ent = NULL;
6924                         break;
6925                 }
6926         }
6927
6928         return rem;
6929 }
6930
6931 static ssize_t tracing_splice_read_pipe(struct file *filp,
6932                                         loff_t *ppos,
6933                                         struct pipe_inode_info *pipe,
6934                                         size_t len,
6935                                         unsigned int flags)
6936 {
6937         struct page *pages_def[PIPE_DEF_BUFFERS];
6938         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6939         struct trace_iterator *iter = filp->private_data;
6940         struct splice_pipe_desc spd = {
6941                 .pages          = pages_def,
6942                 .partial        = partial_def,
6943                 .nr_pages       = 0, /* This gets updated below. */
6944                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6945                 .ops            = &default_pipe_buf_ops,
6946                 .spd_release    = tracing_spd_release_pipe,
6947         };
6948         ssize_t ret;
6949         size_t rem;
6950         unsigned int i;
6951
6952         if (splice_grow_spd(pipe, &spd))
6953                 return -ENOMEM;
6954
6955         mutex_lock(&iter->mutex);
6956
6957         if (iter->trace->splice_read) {
6958                 ret = iter->trace->splice_read(iter, filp,
6959                                                ppos, pipe, len, flags);
6960                 if (ret)
6961                         goto out_err;
6962         }
6963
6964         ret = tracing_wait_pipe(filp);
6965         if (ret <= 0)
6966                 goto out_err;
6967
6968         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6969                 ret = -EFAULT;
6970                 goto out_err;
6971         }
6972
6973         trace_event_read_lock();
6974         trace_access_lock(iter->cpu_file);
6975
6976         /* Fill as many pages as possible. */
6977         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6978                 spd.pages[i] = alloc_page(GFP_KERNEL);
6979                 if (!spd.pages[i])
6980                         break;
6981
6982                 rem = tracing_fill_pipe_page(rem, iter);
6983
6984                 /* Copy the data into the page, so we can start over. */
6985                 ret = trace_seq_to_buffer(&iter->seq,
6986                                           page_address(spd.pages[i]),
6987                                           trace_seq_used(&iter->seq));
6988                 if (ret < 0) {
6989                         __free_page(spd.pages[i]);
6990                         break;
6991                 }
6992                 spd.partial[i].offset = 0;
6993                 spd.partial[i].len = trace_seq_used(&iter->seq);
6994
6995                 trace_seq_init(&iter->seq);
6996         }
6997
6998         trace_access_unlock(iter->cpu_file);
6999         trace_event_read_unlock();
7000         mutex_unlock(&iter->mutex);
7001
7002         spd.nr_pages = i;
7003
7004         if (i)
7005                 ret = splice_to_pipe(pipe, &spd);
7006         else
7007                 ret = 0;
7008 out:
7009         splice_shrink_spd(&spd);
7010         return ret;
7011
7012 out_err:
7013         mutex_unlock(&iter->mutex);
7014         goto out;
7015 }
7016
7017 static ssize_t
7018 tracing_entries_read(struct file *filp, char __user *ubuf,
7019                      size_t cnt, loff_t *ppos)
7020 {
7021         struct inode *inode = file_inode(filp);
7022         struct trace_array *tr = inode->i_private;
7023         int cpu = tracing_get_cpu(inode);
7024         char buf[64];
7025         int r = 0;
7026         ssize_t ret;
7027
7028         mutex_lock(&trace_types_lock);
7029
7030         if (cpu == RING_BUFFER_ALL_CPUS) {
7031                 int cpu, buf_size_same;
7032                 unsigned long size;
7033
7034                 size = 0;
7035                 buf_size_same = 1;
7036                 /* check if all cpu sizes are same */
7037                 for_each_tracing_cpu(cpu) {
7038                         /* fill in the size from first enabled cpu */
7039                         if (size == 0)
7040                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7041                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7042                                 buf_size_same = 0;
7043                                 break;
7044                         }
7045                 }
7046
7047                 if (buf_size_same) {
7048                         if (!ring_buffer_expanded)
7049                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7050                                             size >> 10,
7051                                             trace_buf_size >> 10);
7052                         else
7053                                 r = sprintf(buf, "%lu\n", size >> 10);
7054                 } else
7055                         r = sprintf(buf, "X\n");
7056         } else
7057                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7058
7059         mutex_unlock(&trace_types_lock);
7060
7061         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7062         return ret;
7063 }
7064
7065 static ssize_t
7066 tracing_entries_write(struct file *filp, const char __user *ubuf,
7067                       size_t cnt, loff_t *ppos)
7068 {
7069         struct inode *inode = file_inode(filp);
7070         struct trace_array *tr = inode->i_private;
7071         unsigned long val;
7072         int ret;
7073
7074         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7075         if (ret)
7076                 return ret;
7077
7078         /* must have at least 1 entry */
7079         if (!val)
7080                 return -EINVAL;
7081
7082         /* value is in KB */
7083         val <<= 10;
7084         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7085         if (ret < 0)
7086                 return ret;
7087
7088         *ppos += cnt;
7089
7090         return cnt;
7091 }
7092
7093 static ssize_t
7094 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7095                                 size_t cnt, loff_t *ppos)
7096 {
7097         struct trace_array *tr = filp->private_data;
7098         char buf[64];
7099         int r, cpu;
7100         unsigned long size = 0, expanded_size = 0;
7101
7102         mutex_lock(&trace_types_lock);
7103         for_each_tracing_cpu(cpu) {
7104                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7105                 if (!ring_buffer_expanded)
7106                         expanded_size += trace_buf_size >> 10;
7107         }
7108         if (ring_buffer_expanded)
7109                 r = sprintf(buf, "%lu\n", size);
7110         else
7111                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7112         mutex_unlock(&trace_types_lock);
7113
7114         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7115 }
7116
7117 static ssize_t
7118 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7119                           size_t cnt, loff_t *ppos)
7120 {
7121         /*
7122          * There is no need to read what the user has written, this function
7123          * is just to make sure that there is no error when "echo" is used
7124          */
7125
7126         *ppos += cnt;
7127
7128         return cnt;
7129 }
7130
7131 static int
7132 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7133 {
7134         struct trace_array *tr = inode->i_private;
7135
7136         /* disable tracing ? */
7137         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7138                 tracer_tracing_off(tr);
7139         /* resize the ring buffer to 0 */
7140         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7141
7142         trace_array_put(tr);
7143
7144         return 0;
7145 }
7146
7147 static ssize_t
7148 tracing_mark_write(struct file *filp, const char __user *ubuf,
7149                                         size_t cnt, loff_t *fpos)
7150 {
7151         struct trace_array *tr = filp->private_data;
7152         struct ring_buffer_event *event;
7153         enum event_trigger_type tt = ETT_NONE;
7154         struct trace_buffer *buffer;
7155         struct print_entry *entry;
7156         ssize_t written;
7157         int size;
7158         int len;
7159
7160 /* Used in tracing_mark_raw_write() as well */
7161 #define FAULTED_STR "<faulted>"
7162 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7163
7164         if (tracing_disabled)
7165                 return -EINVAL;
7166
7167         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7168                 return -EINVAL;
7169
7170         if (cnt > TRACE_BUF_SIZE)
7171                 cnt = TRACE_BUF_SIZE;
7172
7173         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7174
7175         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7176
7177         /* If less than "<faulted>", then make sure we can still add that */
7178         if (cnt < FAULTED_SIZE)
7179                 size += FAULTED_SIZE - cnt;
7180
7181         buffer = tr->array_buffer.buffer;
7182         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7183                                             tracing_gen_ctx());
7184         if (unlikely(!event))
7185                 /* Ring buffer disabled, return as if not open for write */
7186                 return -EBADF;
7187
7188         entry = ring_buffer_event_data(event);
7189         entry->ip = _THIS_IP_;
7190
7191         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7192         if (len) {
7193                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7194                 cnt = FAULTED_SIZE;
7195                 written = -EFAULT;
7196         } else
7197                 written = cnt;
7198
7199         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7200                 /* do not add \n before testing triggers, but add \0 */
7201                 entry->buf[cnt] = '\0';
7202                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7203         }
7204
7205         if (entry->buf[cnt - 1] != '\n') {
7206                 entry->buf[cnt] = '\n';
7207                 entry->buf[cnt + 1] = '\0';
7208         } else
7209                 entry->buf[cnt] = '\0';
7210
7211         if (static_branch_unlikely(&trace_marker_exports_enabled))
7212                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7213         __buffer_unlock_commit(buffer, event);
7214
7215         if (tt)
7216                 event_triggers_post_call(tr->trace_marker_file, tt);
7217
7218         return written;
7219 }
7220
7221 /* Limit it for now to 3K (including tag) */
7222 #define RAW_DATA_MAX_SIZE (1024*3)
7223
7224 static ssize_t
7225 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7226                                         size_t cnt, loff_t *fpos)
7227 {
7228         struct trace_array *tr = filp->private_data;
7229         struct ring_buffer_event *event;
7230         struct trace_buffer *buffer;
7231         struct raw_data_entry *entry;
7232         ssize_t written;
7233         int size;
7234         int len;
7235
7236 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7237
7238         if (tracing_disabled)
7239                 return -EINVAL;
7240
7241         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7242                 return -EINVAL;
7243
7244         /* The marker must at least have a tag id */
7245         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7246                 return -EINVAL;
7247
7248         if (cnt > TRACE_BUF_SIZE)
7249                 cnt = TRACE_BUF_SIZE;
7250
7251         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7252
7253         size = sizeof(*entry) + cnt;
7254         if (cnt < FAULT_SIZE_ID)
7255                 size += FAULT_SIZE_ID - cnt;
7256
7257         buffer = tr->array_buffer.buffer;
7258         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7259                                             tracing_gen_ctx());
7260         if (!event)
7261                 /* Ring buffer disabled, return as if not open for write */
7262                 return -EBADF;
7263
7264         entry = ring_buffer_event_data(event);
7265
7266         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7267         if (len) {
7268                 entry->id = -1;
7269                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7270                 written = -EFAULT;
7271         } else
7272                 written = cnt;
7273
7274         __buffer_unlock_commit(buffer, event);
7275
7276         return written;
7277 }
7278
7279 static int tracing_clock_show(struct seq_file *m, void *v)
7280 {
7281         struct trace_array *tr = m->private;
7282         int i;
7283
7284         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7285                 seq_printf(m,
7286                         "%s%s%s%s", i ? " " : "",
7287                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7288                         i == tr->clock_id ? "]" : "");
7289         seq_putc(m, '\n');
7290
7291         return 0;
7292 }
7293
7294 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7295 {
7296         int i;
7297
7298         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7299                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7300                         break;
7301         }
7302         if (i == ARRAY_SIZE(trace_clocks))
7303                 return -EINVAL;
7304
7305         mutex_lock(&trace_types_lock);
7306
7307         tr->clock_id = i;
7308
7309         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7310
7311         /*
7312          * New clock may not be consistent with the previous clock.
7313          * Reset the buffer so that it doesn't have incomparable timestamps.
7314          */
7315         tracing_reset_online_cpus(&tr->array_buffer);
7316
7317 #ifdef CONFIG_TRACER_MAX_TRACE
7318         if (tr->max_buffer.buffer)
7319                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7320         tracing_reset_online_cpus(&tr->max_buffer);
7321 #endif
7322
7323         mutex_unlock(&trace_types_lock);
7324
7325         return 0;
7326 }
7327
7328 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7329                                    size_t cnt, loff_t *fpos)
7330 {
7331         struct seq_file *m = filp->private_data;
7332         struct trace_array *tr = m->private;
7333         char buf[64];
7334         const char *clockstr;
7335         int ret;
7336
7337         if (cnt >= sizeof(buf))
7338                 return -EINVAL;
7339
7340         if (copy_from_user(buf, ubuf, cnt))
7341                 return -EFAULT;
7342
7343         buf[cnt] = 0;
7344
7345         clockstr = strstrip(buf);
7346
7347         ret = tracing_set_clock(tr, clockstr);
7348         if (ret)
7349                 return ret;
7350
7351         *fpos += cnt;
7352
7353         return cnt;
7354 }
7355
7356 static int tracing_clock_open(struct inode *inode, struct file *file)
7357 {
7358         struct trace_array *tr = inode->i_private;
7359         int ret;
7360
7361         ret = tracing_check_open_get_tr(tr);
7362         if (ret)
7363                 return ret;
7364
7365         ret = single_open(file, tracing_clock_show, inode->i_private);
7366         if (ret < 0)
7367                 trace_array_put(tr);
7368
7369         return ret;
7370 }
7371
7372 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7373 {
7374         struct trace_array *tr = m->private;
7375
7376         mutex_lock(&trace_types_lock);
7377
7378         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7379                 seq_puts(m, "delta [absolute]\n");
7380         else
7381                 seq_puts(m, "[delta] absolute\n");
7382
7383         mutex_unlock(&trace_types_lock);
7384
7385         return 0;
7386 }
7387
7388 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7389 {
7390         struct trace_array *tr = inode->i_private;
7391         int ret;
7392
7393         ret = tracing_check_open_get_tr(tr);
7394         if (ret)
7395                 return ret;
7396
7397         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7398         if (ret < 0)
7399                 trace_array_put(tr);
7400
7401         return ret;
7402 }
7403
7404 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7405 {
7406         if (rbe == this_cpu_read(trace_buffered_event))
7407                 return ring_buffer_time_stamp(buffer);
7408
7409         return ring_buffer_event_time_stamp(buffer, rbe);
7410 }
7411
7412 /*
7413  * Set or disable using the per CPU trace_buffer_event when possible.
7414  */
7415 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7416 {
7417         int ret = 0;
7418
7419         mutex_lock(&trace_types_lock);
7420
7421         if (set && tr->no_filter_buffering_ref++)
7422                 goto out;
7423
7424         if (!set) {
7425                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7426                         ret = -EINVAL;
7427                         goto out;
7428                 }
7429
7430                 --tr->no_filter_buffering_ref;
7431         }
7432  out:
7433         mutex_unlock(&trace_types_lock);
7434
7435         return ret;
7436 }
7437
7438 struct ftrace_buffer_info {
7439         struct trace_iterator   iter;
7440         void                    *spare;
7441         unsigned int            spare_cpu;
7442         unsigned int            read;
7443 };
7444
7445 #ifdef CONFIG_TRACER_SNAPSHOT
7446 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7447 {
7448         struct trace_array *tr = inode->i_private;
7449         struct trace_iterator *iter;
7450         struct seq_file *m;
7451         int ret;
7452
7453         ret = tracing_check_open_get_tr(tr);
7454         if (ret)
7455                 return ret;
7456
7457         if (file->f_mode & FMODE_READ) {
7458                 iter = __tracing_open(inode, file, true);
7459                 if (IS_ERR(iter))
7460                         ret = PTR_ERR(iter);
7461         } else {
7462                 /* Writes still need the seq_file to hold the private data */
7463                 ret = -ENOMEM;
7464                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7465                 if (!m)
7466                         goto out;
7467                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7468                 if (!iter) {
7469                         kfree(m);
7470                         goto out;
7471                 }
7472                 ret = 0;
7473
7474                 iter->tr = tr;
7475                 iter->array_buffer = &tr->max_buffer;
7476                 iter->cpu_file = tracing_get_cpu(inode);
7477                 m->private = iter;
7478                 file->private_data = m;
7479         }
7480 out:
7481         if (ret < 0)
7482                 trace_array_put(tr);
7483
7484         return ret;
7485 }
7486
7487 static ssize_t
7488 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7489                        loff_t *ppos)
7490 {
7491         struct seq_file *m = filp->private_data;
7492         struct trace_iterator *iter = m->private;
7493         struct trace_array *tr = iter->tr;
7494         unsigned long val;
7495         int ret;
7496
7497         ret = tracing_update_buffers();
7498         if (ret < 0)
7499                 return ret;
7500
7501         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7502         if (ret)
7503                 return ret;
7504
7505         mutex_lock(&trace_types_lock);
7506
7507         if (tr->current_trace->use_max_tr) {
7508                 ret = -EBUSY;
7509                 goto out;
7510         }
7511
7512         local_irq_disable();
7513         arch_spin_lock(&tr->max_lock);
7514         if (tr->cond_snapshot)
7515                 ret = -EBUSY;
7516         arch_spin_unlock(&tr->max_lock);
7517         local_irq_enable();
7518         if (ret)
7519                 goto out;
7520
7521         switch (val) {
7522         case 0:
7523                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7524                         ret = -EINVAL;
7525                         break;
7526                 }
7527                 if (tr->allocated_snapshot)
7528                         free_snapshot(tr);
7529                 break;
7530         case 1:
7531 /* Only allow per-cpu swap if the ring buffer supports it */
7532 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7533                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7534                         ret = -EINVAL;
7535                         break;
7536                 }
7537 #endif
7538                 if (tr->allocated_snapshot)
7539                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7540                                         &tr->array_buffer, iter->cpu_file);
7541                 else
7542                         ret = tracing_alloc_snapshot_instance(tr);
7543                 if (ret < 0)
7544                         break;
7545                 local_irq_disable();
7546                 /* Now, we're going to swap */
7547                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7548                         update_max_tr(tr, current, smp_processor_id(), NULL);
7549                 else
7550                         update_max_tr_single(tr, current, iter->cpu_file);
7551                 local_irq_enable();
7552                 break;
7553         default:
7554                 if (tr->allocated_snapshot) {
7555                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7556                                 tracing_reset_online_cpus(&tr->max_buffer);
7557                         else
7558                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7559                 }
7560                 break;
7561         }
7562
7563         if (ret >= 0) {
7564                 *ppos += cnt;
7565                 ret = cnt;
7566         }
7567 out:
7568         mutex_unlock(&trace_types_lock);
7569         return ret;
7570 }
7571
7572 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7573 {
7574         struct seq_file *m = file->private_data;
7575         int ret;
7576
7577         ret = tracing_release(inode, file);
7578
7579         if (file->f_mode & FMODE_READ)
7580                 return ret;
7581
7582         /* If write only, the seq_file is just a stub */
7583         if (m)
7584                 kfree(m->private);
7585         kfree(m);
7586
7587         return 0;
7588 }
7589
7590 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7591 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7592                                     size_t count, loff_t *ppos);
7593 static int tracing_buffers_release(struct inode *inode, struct file *file);
7594 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7595                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7596
7597 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7598 {
7599         struct ftrace_buffer_info *info;
7600         int ret;
7601
7602         /* The following checks for tracefs lockdown */
7603         ret = tracing_buffers_open(inode, filp);
7604         if (ret < 0)
7605                 return ret;
7606
7607         info = filp->private_data;
7608
7609         if (info->iter.trace->use_max_tr) {
7610                 tracing_buffers_release(inode, filp);
7611                 return -EBUSY;
7612         }
7613
7614         info->iter.snapshot = true;
7615         info->iter.array_buffer = &info->iter.tr->max_buffer;
7616
7617         return ret;
7618 }
7619
7620 #endif /* CONFIG_TRACER_SNAPSHOT */
7621
7622
7623 static const struct file_operations tracing_thresh_fops = {
7624         .open           = tracing_open_generic,
7625         .read           = tracing_thresh_read,
7626         .write          = tracing_thresh_write,
7627         .llseek         = generic_file_llseek,
7628 };
7629
7630 #ifdef CONFIG_TRACER_MAX_TRACE
7631 static const struct file_operations tracing_max_lat_fops = {
7632         .open           = tracing_open_generic,
7633         .read           = tracing_max_lat_read,
7634         .write          = tracing_max_lat_write,
7635         .llseek         = generic_file_llseek,
7636 };
7637 #endif
7638
7639 static const struct file_operations set_tracer_fops = {
7640         .open           = tracing_open_generic,
7641         .read           = tracing_set_trace_read,
7642         .write          = tracing_set_trace_write,
7643         .llseek         = generic_file_llseek,
7644 };
7645
7646 static const struct file_operations tracing_pipe_fops = {
7647         .open           = tracing_open_pipe,
7648         .poll           = tracing_poll_pipe,
7649         .read           = tracing_read_pipe,
7650         .splice_read    = tracing_splice_read_pipe,
7651         .release        = tracing_release_pipe,
7652         .llseek         = no_llseek,
7653 };
7654
7655 static const struct file_operations tracing_entries_fops = {
7656         .open           = tracing_open_generic_tr,
7657         .read           = tracing_entries_read,
7658         .write          = tracing_entries_write,
7659         .llseek         = generic_file_llseek,
7660         .release        = tracing_release_generic_tr,
7661 };
7662
7663 static const struct file_operations tracing_total_entries_fops = {
7664         .open           = tracing_open_generic_tr,
7665         .read           = tracing_total_entries_read,
7666         .llseek         = generic_file_llseek,
7667         .release        = tracing_release_generic_tr,
7668 };
7669
7670 static const struct file_operations tracing_free_buffer_fops = {
7671         .open           = tracing_open_generic_tr,
7672         .write          = tracing_free_buffer_write,
7673         .release        = tracing_free_buffer_release,
7674 };
7675
7676 static const struct file_operations tracing_mark_fops = {
7677         .open           = tracing_mark_open,
7678         .write          = tracing_mark_write,
7679         .release        = tracing_release_generic_tr,
7680 };
7681
7682 static const struct file_operations tracing_mark_raw_fops = {
7683         .open           = tracing_mark_open,
7684         .write          = tracing_mark_raw_write,
7685         .release        = tracing_release_generic_tr,
7686 };
7687
7688 static const struct file_operations trace_clock_fops = {
7689         .open           = tracing_clock_open,
7690         .read           = seq_read,
7691         .llseek         = seq_lseek,
7692         .release        = tracing_single_release_tr,
7693         .write          = tracing_clock_write,
7694 };
7695
7696 static const struct file_operations trace_time_stamp_mode_fops = {
7697         .open           = tracing_time_stamp_mode_open,
7698         .read           = seq_read,
7699         .llseek         = seq_lseek,
7700         .release        = tracing_single_release_tr,
7701 };
7702
7703 #ifdef CONFIG_TRACER_SNAPSHOT
7704 static const struct file_operations snapshot_fops = {
7705         .open           = tracing_snapshot_open,
7706         .read           = seq_read,
7707         .write          = tracing_snapshot_write,
7708         .llseek         = tracing_lseek,
7709         .release        = tracing_snapshot_release,
7710 };
7711
7712 static const struct file_operations snapshot_raw_fops = {
7713         .open           = snapshot_raw_open,
7714         .read           = tracing_buffers_read,
7715         .release        = tracing_buffers_release,
7716         .splice_read    = tracing_buffers_splice_read,
7717         .llseek         = no_llseek,
7718 };
7719
7720 #endif /* CONFIG_TRACER_SNAPSHOT */
7721
7722 /*
7723  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7724  * @filp: The active open file structure
7725  * @ubuf: The userspace provided buffer to read value into
7726  * @cnt: The maximum number of bytes to read
7727  * @ppos: The current "file" position
7728  *
7729  * This function implements the write interface for a struct trace_min_max_param.
7730  * The filp->private_data must point to a trace_min_max_param structure that
7731  * defines where to write the value, the min and the max acceptable values,
7732  * and a lock to protect the write.
7733  */
7734 static ssize_t
7735 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7736 {
7737         struct trace_min_max_param *param = filp->private_data;
7738         u64 val;
7739         int err;
7740
7741         if (!param)
7742                 return -EFAULT;
7743
7744         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7745         if (err)
7746                 return err;
7747
7748         if (param->lock)
7749                 mutex_lock(param->lock);
7750
7751         if (param->min && val < *param->min)
7752                 err = -EINVAL;
7753
7754         if (param->max && val > *param->max)
7755                 err = -EINVAL;
7756
7757         if (!err)
7758                 *param->val = val;
7759
7760         if (param->lock)
7761                 mutex_unlock(param->lock);
7762
7763         if (err)
7764                 return err;
7765
7766         return cnt;
7767 }
7768
7769 /*
7770  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7771  * @filp: The active open file structure
7772  * @ubuf: The userspace provided buffer to read value into
7773  * @cnt: The maximum number of bytes to read
7774  * @ppos: The current "file" position
7775  *
7776  * This function implements the read interface for a struct trace_min_max_param.
7777  * The filp->private_data must point to a trace_min_max_param struct with valid
7778  * data.
7779  */
7780 static ssize_t
7781 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7782 {
7783         struct trace_min_max_param *param = filp->private_data;
7784         char buf[U64_STR_SIZE];
7785         int len;
7786         u64 val;
7787
7788         if (!param)
7789                 return -EFAULT;
7790
7791         val = *param->val;
7792
7793         if (cnt > sizeof(buf))
7794                 cnt = sizeof(buf);
7795
7796         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7797
7798         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7799 }
7800
7801 const struct file_operations trace_min_max_fops = {
7802         .open           = tracing_open_generic,
7803         .read           = trace_min_max_read,
7804         .write          = trace_min_max_write,
7805 };
7806
7807 #define TRACING_LOG_ERRS_MAX    8
7808 #define TRACING_LOG_LOC_MAX     128
7809
7810 #define CMD_PREFIX "  Command: "
7811
7812 struct err_info {
7813         const char      **errs; /* ptr to loc-specific array of err strings */
7814         u8              type;   /* index into errs -> specific err string */
7815         u16             pos;    /* caret position */
7816         u64             ts;
7817 };
7818
7819 struct tracing_log_err {
7820         struct list_head        list;
7821         struct err_info         info;
7822         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7823         char                    *cmd;                     /* what caused err */
7824 };
7825
7826 static DEFINE_MUTEX(tracing_err_log_lock);
7827
7828 static struct tracing_log_err *alloc_tracing_log_err(int len)
7829 {
7830         struct tracing_log_err *err;
7831
7832         err = kzalloc(sizeof(*err), GFP_KERNEL);
7833         if (!err)
7834                 return ERR_PTR(-ENOMEM);
7835
7836         err->cmd = kzalloc(len, GFP_KERNEL);
7837         if (!err->cmd) {
7838                 kfree(err);
7839                 return ERR_PTR(-ENOMEM);
7840         }
7841
7842         return err;
7843 }
7844
7845 static void free_tracing_log_err(struct tracing_log_err *err)
7846 {
7847         kfree(err->cmd);
7848         kfree(err);
7849 }
7850
7851 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7852                                                    int len)
7853 {
7854         struct tracing_log_err *err;
7855         char *cmd;
7856
7857         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7858                 err = alloc_tracing_log_err(len);
7859                 if (PTR_ERR(err) != -ENOMEM)
7860                         tr->n_err_log_entries++;
7861
7862                 return err;
7863         }
7864         cmd = kzalloc(len, GFP_KERNEL);
7865         if (!cmd)
7866                 return ERR_PTR(-ENOMEM);
7867         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7868         kfree(err->cmd);
7869         err->cmd = cmd;
7870         list_del(&err->list);
7871
7872         return err;
7873 }
7874
7875 /**
7876  * err_pos - find the position of a string within a command for error careting
7877  * @cmd: The tracing command that caused the error
7878  * @str: The string to position the caret at within @cmd
7879  *
7880  * Finds the position of the first occurrence of @str within @cmd.  The
7881  * return value can be passed to tracing_log_err() for caret placement
7882  * within @cmd.
7883  *
7884  * Returns the index within @cmd of the first occurrence of @str or 0
7885  * if @str was not found.
7886  */
7887 unsigned int err_pos(char *cmd, const char *str)
7888 {
7889         char *found;
7890
7891         if (WARN_ON(!strlen(cmd)))
7892                 return 0;
7893
7894         found = strstr(cmd, str);
7895         if (found)
7896                 return found - cmd;
7897
7898         return 0;
7899 }
7900
7901 /**
7902  * tracing_log_err - write an error to the tracing error log
7903  * @tr: The associated trace array for the error (NULL for top level array)
7904  * @loc: A string describing where the error occurred
7905  * @cmd: The tracing command that caused the error
7906  * @errs: The array of loc-specific static error strings
7907  * @type: The index into errs[], which produces the specific static err string
7908  * @pos: The position the caret should be placed in the cmd
7909  *
7910  * Writes an error into tracing/error_log of the form:
7911  *
7912  * <loc>: error: <text>
7913  *   Command: <cmd>
7914  *              ^
7915  *
7916  * tracing/error_log is a small log file containing the last
7917  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7918  * unless there has been a tracing error, and the error log can be
7919  * cleared and have its memory freed by writing the empty string in
7920  * truncation mode to it i.e. echo > tracing/error_log.
7921  *
7922  * NOTE: the @errs array along with the @type param are used to
7923  * produce a static error string - this string is not copied and saved
7924  * when the error is logged - only a pointer to it is saved.  See
7925  * existing callers for examples of how static strings are typically
7926  * defined for use with tracing_log_err().
7927  */
7928 void tracing_log_err(struct trace_array *tr,
7929                      const char *loc, const char *cmd,
7930                      const char **errs, u8 type, u16 pos)
7931 {
7932         struct tracing_log_err *err;
7933         int len = 0;
7934
7935         if (!tr)
7936                 tr = &global_trace;
7937
7938         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7939
7940         mutex_lock(&tracing_err_log_lock);
7941         err = get_tracing_log_err(tr, len);
7942         if (PTR_ERR(err) == -ENOMEM) {
7943                 mutex_unlock(&tracing_err_log_lock);
7944                 return;
7945         }
7946
7947         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7948         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7949
7950         err->info.errs = errs;
7951         err->info.type = type;
7952         err->info.pos = pos;
7953         err->info.ts = local_clock();
7954
7955         list_add_tail(&err->list, &tr->err_log);
7956         mutex_unlock(&tracing_err_log_lock);
7957 }
7958
7959 static void clear_tracing_err_log(struct trace_array *tr)
7960 {
7961         struct tracing_log_err *err, *next;
7962
7963         mutex_lock(&tracing_err_log_lock);
7964         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7965                 list_del(&err->list);
7966                 free_tracing_log_err(err);
7967         }
7968
7969         tr->n_err_log_entries = 0;
7970         mutex_unlock(&tracing_err_log_lock);
7971 }
7972
7973 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7974 {
7975         struct trace_array *tr = m->private;
7976
7977         mutex_lock(&tracing_err_log_lock);
7978
7979         return seq_list_start(&tr->err_log, *pos);
7980 }
7981
7982 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7983 {
7984         struct trace_array *tr = m->private;
7985
7986         return seq_list_next(v, &tr->err_log, pos);
7987 }
7988
7989 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7990 {
7991         mutex_unlock(&tracing_err_log_lock);
7992 }
7993
7994 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7995 {
7996         u16 i;
7997
7998         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7999                 seq_putc(m, ' ');
8000         for (i = 0; i < pos; i++)
8001                 seq_putc(m, ' ');
8002         seq_puts(m, "^\n");
8003 }
8004
8005 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8006 {
8007         struct tracing_log_err *err = v;
8008
8009         if (err) {
8010                 const char *err_text = err->info.errs[err->info.type];
8011                 u64 sec = err->info.ts;
8012                 u32 nsec;
8013
8014                 nsec = do_div(sec, NSEC_PER_SEC);
8015                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8016                            err->loc, err_text);
8017                 seq_printf(m, "%s", err->cmd);
8018                 tracing_err_log_show_pos(m, err->info.pos);
8019         }
8020
8021         return 0;
8022 }
8023
8024 static const struct seq_operations tracing_err_log_seq_ops = {
8025         .start  = tracing_err_log_seq_start,
8026         .next   = tracing_err_log_seq_next,
8027         .stop   = tracing_err_log_seq_stop,
8028         .show   = tracing_err_log_seq_show
8029 };
8030
8031 static int tracing_err_log_open(struct inode *inode, struct file *file)
8032 {
8033         struct trace_array *tr = inode->i_private;
8034         int ret = 0;
8035
8036         ret = tracing_check_open_get_tr(tr);
8037         if (ret)
8038                 return ret;
8039
8040         /* If this file was opened for write, then erase contents */
8041         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8042                 clear_tracing_err_log(tr);
8043
8044         if (file->f_mode & FMODE_READ) {
8045                 ret = seq_open(file, &tracing_err_log_seq_ops);
8046                 if (!ret) {
8047                         struct seq_file *m = file->private_data;
8048                         m->private = tr;
8049                 } else {
8050                         trace_array_put(tr);
8051                 }
8052         }
8053         return ret;
8054 }
8055
8056 static ssize_t tracing_err_log_write(struct file *file,
8057                                      const char __user *buffer,
8058                                      size_t count, loff_t *ppos)
8059 {
8060         return count;
8061 }
8062
8063 static int tracing_err_log_release(struct inode *inode, struct file *file)
8064 {
8065         struct trace_array *tr = inode->i_private;
8066
8067         trace_array_put(tr);
8068
8069         if (file->f_mode & FMODE_READ)
8070                 seq_release(inode, file);
8071
8072         return 0;
8073 }
8074
8075 static const struct file_operations tracing_err_log_fops = {
8076         .open           = tracing_err_log_open,
8077         .write          = tracing_err_log_write,
8078         .read           = seq_read,
8079         .llseek         = seq_lseek,
8080         .release        = tracing_err_log_release,
8081 };
8082
8083 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8084 {
8085         struct trace_array *tr = inode->i_private;
8086         struct ftrace_buffer_info *info;
8087         int ret;
8088
8089         ret = tracing_check_open_get_tr(tr);
8090         if (ret)
8091                 return ret;
8092
8093         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8094         if (!info) {
8095                 trace_array_put(tr);
8096                 return -ENOMEM;
8097         }
8098
8099         mutex_lock(&trace_types_lock);
8100
8101         info->iter.tr           = tr;
8102         info->iter.cpu_file     = tracing_get_cpu(inode);
8103         info->iter.trace        = tr->current_trace;
8104         info->iter.array_buffer = &tr->array_buffer;
8105         info->spare             = NULL;
8106         /* Force reading ring buffer for first read */
8107         info->read              = (unsigned int)-1;
8108
8109         filp->private_data = info;
8110
8111         tr->trace_ref++;
8112
8113         mutex_unlock(&trace_types_lock);
8114
8115         ret = nonseekable_open(inode, filp);
8116         if (ret < 0)
8117                 trace_array_put(tr);
8118
8119         return ret;
8120 }
8121
8122 static __poll_t
8123 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8124 {
8125         struct ftrace_buffer_info *info = filp->private_data;
8126         struct trace_iterator *iter = &info->iter;
8127
8128         return trace_poll(iter, filp, poll_table);
8129 }
8130
8131 static ssize_t
8132 tracing_buffers_read(struct file *filp, char __user *ubuf,
8133                      size_t count, loff_t *ppos)
8134 {
8135         struct ftrace_buffer_info *info = filp->private_data;
8136         struct trace_iterator *iter = &info->iter;
8137         ssize_t ret = 0;
8138         ssize_t size;
8139
8140         if (!count)
8141                 return 0;
8142
8143 #ifdef CONFIG_TRACER_MAX_TRACE
8144         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8145                 return -EBUSY;
8146 #endif
8147
8148         if (!info->spare) {
8149                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8150                                                           iter->cpu_file);
8151                 if (IS_ERR(info->spare)) {
8152                         ret = PTR_ERR(info->spare);
8153                         info->spare = NULL;
8154                 } else {
8155                         info->spare_cpu = iter->cpu_file;
8156                 }
8157         }
8158         if (!info->spare)
8159                 return ret;
8160
8161         /* Do we have previous read data to read? */
8162         if (info->read < PAGE_SIZE)
8163                 goto read;
8164
8165  again:
8166         trace_access_lock(iter->cpu_file);
8167         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8168                                     &info->spare,
8169                                     count,
8170                                     iter->cpu_file, 0);
8171         trace_access_unlock(iter->cpu_file);
8172
8173         if (ret < 0) {
8174                 if (trace_empty(iter)) {
8175                         if ((filp->f_flags & O_NONBLOCK))
8176                                 return -EAGAIN;
8177
8178                         ret = wait_on_pipe(iter, 0);
8179                         if (ret)
8180                                 return ret;
8181
8182                         goto again;
8183                 }
8184                 return 0;
8185         }
8186
8187         info->read = 0;
8188  read:
8189         size = PAGE_SIZE - info->read;
8190         if (size > count)
8191                 size = count;
8192
8193         ret = copy_to_user(ubuf, info->spare + info->read, size);
8194         if (ret == size)
8195                 return -EFAULT;
8196
8197         size -= ret;
8198
8199         *ppos += size;
8200         info->read += size;
8201
8202         return size;
8203 }
8204
8205 static int tracing_buffers_release(struct inode *inode, struct file *file)
8206 {
8207         struct ftrace_buffer_info *info = file->private_data;
8208         struct trace_iterator *iter = &info->iter;
8209
8210         mutex_lock(&trace_types_lock);
8211
8212         iter->tr->trace_ref--;
8213
8214         __trace_array_put(iter->tr);
8215
8216         iter->wait_index++;
8217         /* Make sure the waiters see the new wait_index */
8218         smp_wmb();
8219
8220         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8221
8222         if (info->spare)
8223                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8224                                            info->spare_cpu, info->spare);
8225         kvfree(info);
8226
8227         mutex_unlock(&trace_types_lock);
8228
8229         return 0;
8230 }
8231
8232 struct buffer_ref {
8233         struct trace_buffer     *buffer;
8234         void                    *page;
8235         int                     cpu;
8236         refcount_t              refcount;
8237 };
8238
8239 static void buffer_ref_release(struct buffer_ref *ref)
8240 {
8241         if (!refcount_dec_and_test(&ref->refcount))
8242                 return;
8243         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8244         kfree(ref);
8245 }
8246
8247 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8248                                     struct pipe_buffer *buf)
8249 {
8250         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8251
8252         buffer_ref_release(ref);
8253         buf->private = 0;
8254 }
8255
8256 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8257                                 struct pipe_buffer *buf)
8258 {
8259         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8260
8261         if (refcount_read(&ref->refcount) > INT_MAX/2)
8262                 return false;
8263
8264         refcount_inc(&ref->refcount);
8265         return true;
8266 }
8267
8268 /* Pipe buffer operations for a buffer. */
8269 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8270         .release                = buffer_pipe_buf_release,
8271         .get                    = buffer_pipe_buf_get,
8272 };
8273
8274 /*
8275  * Callback from splice_to_pipe(), if we need to release some pages
8276  * at the end of the spd in case we error'ed out in filling the pipe.
8277  */
8278 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8279 {
8280         struct buffer_ref *ref =
8281                 (struct buffer_ref *)spd->partial[i].private;
8282
8283         buffer_ref_release(ref);
8284         spd->partial[i].private = 0;
8285 }
8286
8287 static ssize_t
8288 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8289                             struct pipe_inode_info *pipe, size_t len,
8290                             unsigned int flags)
8291 {
8292         struct ftrace_buffer_info *info = file->private_data;
8293         struct trace_iterator *iter = &info->iter;
8294         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8295         struct page *pages_def[PIPE_DEF_BUFFERS];
8296         struct splice_pipe_desc spd = {
8297                 .pages          = pages_def,
8298                 .partial        = partial_def,
8299                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8300                 .ops            = &buffer_pipe_buf_ops,
8301                 .spd_release    = buffer_spd_release,
8302         };
8303         struct buffer_ref *ref;
8304         int entries, i;
8305         ssize_t ret = 0;
8306
8307 #ifdef CONFIG_TRACER_MAX_TRACE
8308         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8309                 return -EBUSY;
8310 #endif
8311
8312         if (*ppos & (PAGE_SIZE - 1))
8313                 return -EINVAL;
8314
8315         if (len & (PAGE_SIZE - 1)) {
8316                 if (len < PAGE_SIZE)
8317                         return -EINVAL;
8318                 len &= PAGE_MASK;
8319         }
8320
8321         if (splice_grow_spd(pipe, &spd))
8322                 return -ENOMEM;
8323
8324  again:
8325         trace_access_lock(iter->cpu_file);
8326         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8327
8328         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8329                 struct page *page;
8330                 int r;
8331
8332                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8333                 if (!ref) {
8334                         ret = -ENOMEM;
8335                         break;
8336                 }
8337
8338                 refcount_set(&ref->refcount, 1);
8339                 ref->buffer = iter->array_buffer->buffer;
8340                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8341                 if (IS_ERR(ref->page)) {
8342                         ret = PTR_ERR(ref->page);
8343                         ref->page = NULL;
8344                         kfree(ref);
8345                         break;
8346                 }
8347                 ref->cpu = iter->cpu_file;
8348
8349                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8350                                           len, iter->cpu_file, 1);
8351                 if (r < 0) {
8352                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8353                                                    ref->page);
8354                         kfree(ref);
8355                         break;
8356                 }
8357
8358                 page = virt_to_page(ref->page);
8359
8360                 spd.pages[i] = page;
8361                 spd.partial[i].len = PAGE_SIZE;
8362                 spd.partial[i].offset = 0;
8363                 spd.partial[i].private = (unsigned long)ref;
8364                 spd.nr_pages++;
8365                 *ppos += PAGE_SIZE;
8366
8367                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8368         }
8369
8370         trace_access_unlock(iter->cpu_file);
8371         spd.nr_pages = i;
8372
8373         /* did we read anything? */
8374         if (!spd.nr_pages) {
8375                 long wait_index;
8376
8377                 if (ret)
8378                         goto out;
8379
8380                 ret = -EAGAIN;
8381                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8382                         goto out;
8383
8384                 wait_index = READ_ONCE(iter->wait_index);
8385
8386                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8387                 if (ret)
8388                         goto out;
8389
8390                 /* No need to wait after waking up when tracing is off */
8391                 if (!tracer_tracing_is_on(iter->tr))
8392                         goto out;
8393
8394                 /* Make sure we see the new wait_index */
8395                 smp_rmb();
8396                 if (wait_index != iter->wait_index)
8397                         goto out;
8398
8399                 goto again;
8400         }
8401
8402         ret = splice_to_pipe(pipe, &spd);
8403 out:
8404         splice_shrink_spd(&spd);
8405
8406         return ret;
8407 }
8408
8409 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8410 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8411 {
8412         struct ftrace_buffer_info *info = file->private_data;
8413         struct trace_iterator *iter = &info->iter;
8414
8415         if (cmd)
8416                 return -ENOIOCTLCMD;
8417
8418         mutex_lock(&trace_types_lock);
8419
8420         iter->wait_index++;
8421         /* Make sure the waiters see the new wait_index */
8422         smp_wmb();
8423
8424         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8425
8426         mutex_unlock(&trace_types_lock);
8427         return 0;
8428 }
8429
8430 static const struct file_operations tracing_buffers_fops = {
8431         .open           = tracing_buffers_open,
8432         .read           = tracing_buffers_read,
8433         .poll           = tracing_buffers_poll,
8434         .release        = tracing_buffers_release,
8435         .splice_read    = tracing_buffers_splice_read,
8436         .unlocked_ioctl = tracing_buffers_ioctl,
8437         .llseek         = no_llseek,
8438 };
8439
8440 static ssize_t
8441 tracing_stats_read(struct file *filp, char __user *ubuf,
8442                    size_t count, loff_t *ppos)
8443 {
8444         struct inode *inode = file_inode(filp);
8445         struct trace_array *tr = inode->i_private;
8446         struct array_buffer *trace_buf = &tr->array_buffer;
8447         int cpu = tracing_get_cpu(inode);
8448         struct trace_seq *s;
8449         unsigned long cnt;
8450         unsigned long long t;
8451         unsigned long usec_rem;
8452
8453         s = kmalloc(sizeof(*s), GFP_KERNEL);
8454         if (!s)
8455                 return -ENOMEM;
8456
8457         trace_seq_init(s);
8458
8459         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8460         trace_seq_printf(s, "entries: %ld\n", cnt);
8461
8462         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8463         trace_seq_printf(s, "overrun: %ld\n", cnt);
8464
8465         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8466         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8467
8468         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8469         trace_seq_printf(s, "bytes: %ld\n", cnt);
8470
8471         if (trace_clocks[tr->clock_id].in_ns) {
8472                 /* local or global for trace_clock */
8473                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8474                 usec_rem = do_div(t, USEC_PER_SEC);
8475                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8476                                                                 t, usec_rem);
8477
8478                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8479                 usec_rem = do_div(t, USEC_PER_SEC);
8480                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8481         } else {
8482                 /* counter or tsc mode for trace_clock */
8483                 trace_seq_printf(s, "oldest event ts: %llu\n",
8484                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8485
8486                 trace_seq_printf(s, "now ts: %llu\n",
8487                                 ring_buffer_time_stamp(trace_buf->buffer));
8488         }
8489
8490         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8491         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8492
8493         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8494         trace_seq_printf(s, "read events: %ld\n", cnt);
8495
8496         count = simple_read_from_buffer(ubuf, count, ppos,
8497                                         s->buffer, trace_seq_used(s));
8498
8499         kfree(s);
8500
8501         return count;
8502 }
8503
8504 static const struct file_operations tracing_stats_fops = {
8505         .open           = tracing_open_generic_tr,
8506         .read           = tracing_stats_read,
8507         .llseek         = generic_file_llseek,
8508         .release        = tracing_release_generic_tr,
8509 };
8510
8511 #ifdef CONFIG_DYNAMIC_FTRACE
8512
8513 static ssize_t
8514 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8515                   size_t cnt, loff_t *ppos)
8516 {
8517         ssize_t ret;
8518         char *buf;
8519         int r;
8520
8521         /* 256 should be plenty to hold the amount needed */
8522         buf = kmalloc(256, GFP_KERNEL);
8523         if (!buf)
8524                 return -ENOMEM;
8525
8526         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8527                       ftrace_update_tot_cnt,
8528                       ftrace_number_of_pages,
8529                       ftrace_number_of_groups);
8530
8531         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8532         kfree(buf);
8533         return ret;
8534 }
8535
8536 static const struct file_operations tracing_dyn_info_fops = {
8537         .open           = tracing_open_generic,
8538         .read           = tracing_read_dyn_info,
8539         .llseek         = generic_file_llseek,
8540 };
8541 #endif /* CONFIG_DYNAMIC_FTRACE */
8542
8543 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8544 static void
8545 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8546                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8547                 void *data)
8548 {
8549         tracing_snapshot_instance(tr);
8550 }
8551
8552 static void
8553 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8554                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8555                       void *data)
8556 {
8557         struct ftrace_func_mapper *mapper = data;
8558         long *count = NULL;
8559
8560         if (mapper)
8561                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8562
8563         if (count) {
8564
8565                 if (*count <= 0)
8566                         return;
8567
8568                 (*count)--;
8569         }
8570
8571         tracing_snapshot_instance(tr);
8572 }
8573
8574 static int
8575 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8576                       struct ftrace_probe_ops *ops, void *data)
8577 {
8578         struct ftrace_func_mapper *mapper = data;
8579         long *count = NULL;
8580
8581         seq_printf(m, "%ps:", (void *)ip);
8582
8583         seq_puts(m, "snapshot");
8584
8585         if (mapper)
8586                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8587
8588         if (count)
8589                 seq_printf(m, ":count=%ld\n", *count);
8590         else
8591                 seq_puts(m, ":unlimited\n");
8592
8593         return 0;
8594 }
8595
8596 static int
8597 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8598                      unsigned long ip, void *init_data, void **data)
8599 {
8600         struct ftrace_func_mapper *mapper = *data;
8601
8602         if (!mapper) {
8603                 mapper = allocate_ftrace_func_mapper();
8604                 if (!mapper)
8605                         return -ENOMEM;
8606                 *data = mapper;
8607         }
8608
8609         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8610 }
8611
8612 static void
8613 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8614                      unsigned long ip, void *data)
8615 {
8616         struct ftrace_func_mapper *mapper = data;
8617
8618         if (!ip) {
8619                 if (!mapper)
8620                         return;
8621                 free_ftrace_func_mapper(mapper, NULL);
8622                 return;
8623         }
8624
8625         ftrace_func_mapper_remove_ip(mapper, ip);
8626 }
8627
8628 static struct ftrace_probe_ops snapshot_probe_ops = {
8629         .func                   = ftrace_snapshot,
8630         .print                  = ftrace_snapshot_print,
8631 };
8632
8633 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8634         .func                   = ftrace_count_snapshot,
8635         .print                  = ftrace_snapshot_print,
8636         .init                   = ftrace_snapshot_init,
8637         .free                   = ftrace_snapshot_free,
8638 };
8639
8640 static int
8641 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8642                                char *glob, char *cmd, char *param, int enable)
8643 {
8644         struct ftrace_probe_ops *ops;
8645         void *count = (void *)-1;
8646         char *number;
8647         int ret;
8648
8649         if (!tr)
8650                 return -ENODEV;
8651
8652         /* hash funcs only work with set_ftrace_filter */
8653         if (!enable)
8654                 return -EINVAL;
8655
8656         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8657
8658         if (glob[0] == '!')
8659                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8660
8661         if (!param)
8662                 goto out_reg;
8663
8664         number = strsep(&param, ":");
8665
8666         if (!strlen(number))
8667                 goto out_reg;
8668
8669         /*
8670          * We use the callback data field (which is a pointer)
8671          * as our counter.
8672          */
8673         ret = kstrtoul(number, 0, (unsigned long *)&count);
8674         if (ret)
8675                 return ret;
8676
8677  out_reg:
8678         ret = tracing_alloc_snapshot_instance(tr);
8679         if (ret < 0)
8680                 goto out;
8681
8682         ret = register_ftrace_function_probe(glob, tr, ops, count);
8683
8684  out:
8685         return ret < 0 ? ret : 0;
8686 }
8687
8688 static struct ftrace_func_command ftrace_snapshot_cmd = {
8689         .name                   = "snapshot",
8690         .func                   = ftrace_trace_snapshot_callback,
8691 };
8692
8693 static __init int register_snapshot_cmd(void)
8694 {
8695         return register_ftrace_command(&ftrace_snapshot_cmd);
8696 }
8697 #else
8698 static inline __init int register_snapshot_cmd(void) { return 0; }
8699 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8700
8701 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8702 {
8703         if (WARN_ON(!tr->dir))
8704                 return ERR_PTR(-ENODEV);
8705
8706         /* Top directory uses NULL as the parent */
8707         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8708                 return NULL;
8709
8710         /* All sub buffers have a descriptor */
8711         return tr->dir;
8712 }
8713
8714 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8715 {
8716         struct dentry *d_tracer;
8717
8718         if (tr->percpu_dir)
8719                 return tr->percpu_dir;
8720
8721         d_tracer = tracing_get_dentry(tr);
8722         if (IS_ERR(d_tracer))
8723                 return NULL;
8724
8725         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8726
8727         MEM_FAIL(!tr->percpu_dir,
8728                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8729
8730         return tr->percpu_dir;
8731 }
8732
8733 static struct dentry *
8734 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8735                       void *data, long cpu, const struct file_operations *fops)
8736 {
8737         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8738
8739         if (ret) /* See tracing_get_cpu() */
8740                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8741         return ret;
8742 }
8743
8744 static void
8745 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8746 {
8747         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8748         struct dentry *d_cpu;
8749         char cpu_dir[30]; /* 30 characters should be more than enough */
8750
8751         if (!d_percpu)
8752                 return;
8753
8754         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8755         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8756         if (!d_cpu) {
8757                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8758                 return;
8759         }
8760
8761         /* per cpu trace_pipe */
8762         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8763                                 tr, cpu, &tracing_pipe_fops);
8764
8765         /* per cpu trace */
8766         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8767                                 tr, cpu, &tracing_fops);
8768
8769         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8770                                 tr, cpu, &tracing_buffers_fops);
8771
8772         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8773                                 tr, cpu, &tracing_stats_fops);
8774
8775         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8776                                 tr, cpu, &tracing_entries_fops);
8777
8778 #ifdef CONFIG_TRACER_SNAPSHOT
8779         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8780                                 tr, cpu, &snapshot_fops);
8781
8782         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8783                                 tr, cpu, &snapshot_raw_fops);
8784 #endif
8785 }
8786
8787 #ifdef CONFIG_FTRACE_SELFTEST
8788 /* Let selftest have access to static functions in this file */
8789 #include "trace_selftest.c"
8790 #endif
8791
8792 static ssize_t
8793 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8794                         loff_t *ppos)
8795 {
8796         struct trace_option_dentry *topt = filp->private_data;
8797         char *buf;
8798
8799         if (topt->flags->val & topt->opt->bit)
8800                 buf = "1\n";
8801         else
8802                 buf = "0\n";
8803
8804         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8805 }
8806
8807 static ssize_t
8808 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8809                          loff_t *ppos)
8810 {
8811         struct trace_option_dentry *topt = filp->private_data;
8812         unsigned long val;
8813         int ret;
8814
8815         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8816         if (ret)
8817                 return ret;
8818
8819         if (val != 0 && val != 1)
8820                 return -EINVAL;
8821
8822         if (!!(topt->flags->val & topt->opt->bit) != val) {
8823                 mutex_lock(&trace_types_lock);
8824                 ret = __set_tracer_option(topt->tr, topt->flags,
8825                                           topt->opt, !val);
8826                 mutex_unlock(&trace_types_lock);
8827                 if (ret)
8828                         return ret;
8829         }
8830
8831         *ppos += cnt;
8832
8833         return cnt;
8834 }
8835
8836
8837 static const struct file_operations trace_options_fops = {
8838         .open = tracing_open_generic,
8839         .read = trace_options_read,
8840         .write = trace_options_write,
8841         .llseek = generic_file_llseek,
8842 };
8843
8844 /*
8845  * In order to pass in both the trace_array descriptor as well as the index
8846  * to the flag that the trace option file represents, the trace_array
8847  * has a character array of trace_flags_index[], which holds the index
8848  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8849  * The address of this character array is passed to the flag option file
8850  * read/write callbacks.
8851  *
8852  * In order to extract both the index and the trace_array descriptor,
8853  * get_tr_index() uses the following algorithm.
8854  *
8855  *   idx = *ptr;
8856  *
8857  * As the pointer itself contains the address of the index (remember
8858  * index[1] == 1).
8859  *
8860  * Then to get the trace_array descriptor, by subtracting that index
8861  * from the ptr, we get to the start of the index itself.
8862  *
8863  *   ptr - idx == &index[0]
8864  *
8865  * Then a simple container_of() from that pointer gets us to the
8866  * trace_array descriptor.
8867  */
8868 static void get_tr_index(void *data, struct trace_array **ptr,
8869                          unsigned int *pindex)
8870 {
8871         *pindex = *(unsigned char *)data;
8872
8873         *ptr = container_of(data - *pindex, struct trace_array,
8874                             trace_flags_index);
8875 }
8876
8877 static ssize_t
8878 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8879                         loff_t *ppos)
8880 {
8881         void *tr_index = filp->private_data;
8882         struct trace_array *tr;
8883         unsigned int index;
8884         char *buf;
8885
8886         get_tr_index(tr_index, &tr, &index);
8887
8888         if (tr->trace_flags & (1 << index))
8889                 buf = "1\n";
8890         else
8891                 buf = "0\n";
8892
8893         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8894 }
8895
8896 static ssize_t
8897 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8898                          loff_t *ppos)
8899 {
8900         void *tr_index = filp->private_data;
8901         struct trace_array *tr;
8902         unsigned int index;
8903         unsigned long val;
8904         int ret;
8905
8906         get_tr_index(tr_index, &tr, &index);
8907
8908         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8909         if (ret)
8910                 return ret;
8911
8912         if (val != 0 && val != 1)
8913                 return -EINVAL;
8914
8915         mutex_lock(&event_mutex);
8916         mutex_lock(&trace_types_lock);
8917         ret = set_tracer_flag(tr, 1 << index, val);
8918         mutex_unlock(&trace_types_lock);
8919         mutex_unlock(&event_mutex);
8920
8921         if (ret < 0)
8922                 return ret;
8923
8924         *ppos += cnt;
8925
8926         return cnt;
8927 }
8928
8929 static const struct file_operations trace_options_core_fops = {
8930         .open = tracing_open_generic,
8931         .read = trace_options_core_read,
8932         .write = trace_options_core_write,
8933         .llseek = generic_file_llseek,
8934 };
8935
8936 struct dentry *trace_create_file(const char *name,
8937                                  umode_t mode,
8938                                  struct dentry *parent,
8939                                  void *data,
8940                                  const struct file_operations *fops)
8941 {
8942         struct dentry *ret;
8943
8944         ret = tracefs_create_file(name, mode, parent, data, fops);
8945         if (!ret)
8946                 pr_warn("Could not create tracefs '%s' entry\n", name);
8947
8948         return ret;
8949 }
8950
8951
8952 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8953 {
8954         struct dentry *d_tracer;
8955
8956         if (tr->options)
8957                 return tr->options;
8958
8959         d_tracer = tracing_get_dentry(tr);
8960         if (IS_ERR(d_tracer))
8961                 return NULL;
8962
8963         tr->options = tracefs_create_dir("options", d_tracer);
8964         if (!tr->options) {
8965                 pr_warn("Could not create tracefs directory 'options'\n");
8966                 return NULL;
8967         }
8968
8969         return tr->options;
8970 }
8971
8972 static void
8973 create_trace_option_file(struct trace_array *tr,
8974                          struct trace_option_dentry *topt,
8975                          struct tracer_flags *flags,
8976                          struct tracer_opt *opt)
8977 {
8978         struct dentry *t_options;
8979
8980         t_options = trace_options_init_dentry(tr);
8981         if (!t_options)
8982                 return;
8983
8984         topt->flags = flags;
8985         topt->opt = opt;
8986         topt->tr = tr;
8987
8988         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8989                                         t_options, topt, &trace_options_fops);
8990
8991 }
8992
8993 static void
8994 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8995 {
8996         struct trace_option_dentry *topts;
8997         struct trace_options *tr_topts;
8998         struct tracer_flags *flags;
8999         struct tracer_opt *opts;
9000         int cnt;
9001         int i;
9002
9003         if (!tracer)
9004                 return;
9005
9006         flags = tracer->flags;
9007
9008         if (!flags || !flags->opts)
9009                 return;
9010
9011         /*
9012          * If this is an instance, only create flags for tracers
9013          * the instance may have.
9014          */
9015         if (!trace_ok_for_array(tracer, tr))
9016                 return;
9017
9018         for (i = 0; i < tr->nr_topts; i++) {
9019                 /* Make sure there's no duplicate flags. */
9020                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9021                         return;
9022         }
9023
9024         opts = flags->opts;
9025
9026         for (cnt = 0; opts[cnt].name; cnt++)
9027                 ;
9028
9029         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9030         if (!topts)
9031                 return;
9032
9033         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9034                             GFP_KERNEL);
9035         if (!tr_topts) {
9036                 kfree(topts);
9037                 return;
9038         }
9039
9040         tr->topts = tr_topts;
9041         tr->topts[tr->nr_topts].tracer = tracer;
9042         tr->topts[tr->nr_topts].topts = topts;
9043         tr->nr_topts++;
9044
9045         for (cnt = 0; opts[cnt].name; cnt++) {
9046                 create_trace_option_file(tr, &topts[cnt], flags,
9047                                          &opts[cnt]);
9048                 MEM_FAIL(topts[cnt].entry == NULL,
9049                           "Failed to create trace option: %s",
9050                           opts[cnt].name);
9051         }
9052 }
9053
9054 static struct dentry *
9055 create_trace_option_core_file(struct trace_array *tr,
9056                               const char *option, long index)
9057 {
9058         struct dentry *t_options;
9059
9060         t_options = trace_options_init_dentry(tr);
9061         if (!t_options)
9062                 return NULL;
9063
9064         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9065                                  (void *)&tr->trace_flags_index[index],
9066                                  &trace_options_core_fops);
9067 }
9068
9069 static void create_trace_options_dir(struct trace_array *tr)
9070 {
9071         struct dentry *t_options;
9072         bool top_level = tr == &global_trace;
9073         int i;
9074
9075         t_options = trace_options_init_dentry(tr);
9076         if (!t_options)
9077                 return;
9078
9079         for (i = 0; trace_options[i]; i++) {
9080                 if (top_level ||
9081                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9082                         create_trace_option_core_file(tr, trace_options[i], i);
9083         }
9084 }
9085
9086 static ssize_t
9087 rb_simple_read(struct file *filp, char __user *ubuf,
9088                size_t cnt, loff_t *ppos)
9089 {
9090         struct trace_array *tr = filp->private_data;
9091         char buf[64];
9092         int r;
9093
9094         r = tracer_tracing_is_on(tr);
9095         r = sprintf(buf, "%d\n", r);
9096
9097         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9098 }
9099
9100 static ssize_t
9101 rb_simple_write(struct file *filp, const char __user *ubuf,
9102                 size_t cnt, loff_t *ppos)
9103 {
9104         struct trace_array *tr = filp->private_data;
9105         struct trace_buffer *buffer = tr->array_buffer.buffer;
9106         unsigned long val;
9107         int ret;
9108
9109         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9110         if (ret)
9111                 return ret;
9112
9113         if (buffer) {
9114                 mutex_lock(&trace_types_lock);
9115                 if (!!val == tracer_tracing_is_on(tr)) {
9116                         val = 0; /* do nothing */
9117                 } else if (val) {
9118                         tracer_tracing_on(tr);
9119                         if (tr->current_trace->start)
9120                                 tr->current_trace->start(tr);
9121                 } else {
9122                         tracer_tracing_off(tr);
9123                         if (tr->current_trace->stop)
9124                                 tr->current_trace->stop(tr);
9125                         /* Wake up any waiters */
9126                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9127                 }
9128                 mutex_unlock(&trace_types_lock);
9129         }
9130
9131         (*ppos)++;
9132
9133         return cnt;
9134 }
9135
9136 static const struct file_operations rb_simple_fops = {
9137         .open           = tracing_open_generic_tr,
9138         .read           = rb_simple_read,
9139         .write          = rb_simple_write,
9140         .release        = tracing_release_generic_tr,
9141         .llseek         = default_llseek,
9142 };
9143
9144 static ssize_t
9145 buffer_percent_read(struct file *filp, char __user *ubuf,
9146                     size_t cnt, loff_t *ppos)
9147 {
9148         struct trace_array *tr = filp->private_data;
9149         char buf[64];
9150         int r;
9151
9152         r = tr->buffer_percent;
9153         r = sprintf(buf, "%d\n", r);
9154
9155         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9156 }
9157
9158 static ssize_t
9159 buffer_percent_write(struct file *filp, const char __user *ubuf,
9160                      size_t cnt, loff_t *ppos)
9161 {
9162         struct trace_array *tr = filp->private_data;
9163         unsigned long val;
9164         int ret;
9165
9166         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9167         if (ret)
9168                 return ret;
9169
9170         if (val > 100)
9171                 return -EINVAL;
9172
9173         if (!val)
9174                 val = 1;
9175
9176         tr->buffer_percent = val;
9177
9178         (*ppos)++;
9179
9180         return cnt;
9181 }
9182
9183 static const struct file_operations buffer_percent_fops = {
9184         .open           = tracing_open_generic_tr,
9185         .read           = buffer_percent_read,
9186         .write          = buffer_percent_write,
9187         .release        = tracing_release_generic_tr,
9188         .llseek         = default_llseek,
9189 };
9190
9191 static struct dentry *trace_instance_dir;
9192
9193 static void
9194 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9195
9196 static int
9197 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9198 {
9199         enum ring_buffer_flags rb_flags;
9200
9201         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9202
9203         buf->tr = tr;
9204
9205         buf->buffer = ring_buffer_alloc(size, rb_flags);
9206         if (!buf->buffer)
9207                 return -ENOMEM;
9208
9209         buf->data = alloc_percpu(struct trace_array_cpu);
9210         if (!buf->data) {
9211                 ring_buffer_free(buf->buffer);
9212                 buf->buffer = NULL;
9213                 return -ENOMEM;
9214         }
9215
9216         /* Allocate the first page for all buffers */
9217         set_buffer_entries(&tr->array_buffer,
9218                            ring_buffer_size(tr->array_buffer.buffer, 0));
9219
9220         return 0;
9221 }
9222
9223 static void free_trace_buffer(struct array_buffer *buf)
9224 {
9225         if (buf->buffer) {
9226                 ring_buffer_free(buf->buffer);
9227                 buf->buffer = NULL;
9228                 free_percpu(buf->data);
9229                 buf->data = NULL;
9230         }
9231 }
9232
9233 static int allocate_trace_buffers(struct trace_array *tr, int size)
9234 {
9235         int ret;
9236
9237         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9238         if (ret)
9239                 return ret;
9240
9241 #ifdef CONFIG_TRACER_MAX_TRACE
9242         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9243                                     allocate_snapshot ? size : 1);
9244         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9245                 free_trace_buffer(&tr->array_buffer);
9246                 return -ENOMEM;
9247         }
9248         tr->allocated_snapshot = allocate_snapshot;
9249
9250         /*
9251          * Only the top level trace array gets its snapshot allocated
9252          * from the kernel command line.
9253          */
9254         allocate_snapshot = false;
9255 #endif
9256
9257         return 0;
9258 }
9259
9260 static void free_trace_buffers(struct trace_array *tr)
9261 {
9262         if (!tr)
9263                 return;
9264
9265         free_trace_buffer(&tr->array_buffer);
9266
9267 #ifdef CONFIG_TRACER_MAX_TRACE
9268         free_trace_buffer(&tr->max_buffer);
9269 #endif
9270 }
9271
9272 static void init_trace_flags_index(struct trace_array *tr)
9273 {
9274         int i;
9275
9276         /* Used by the trace options files */
9277         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9278                 tr->trace_flags_index[i] = i;
9279 }
9280
9281 static void __update_tracer_options(struct trace_array *tr)
9282 {
9283         struct tracer *t;
9284
9285         for (t = trace_types; t; t = t->next)
9286                 add_tracer_options(tr, t);
9287 }
9288
9289 static void update_tracer_options(struct trace_array *tr)
9290 {
9291         mutex_lock(&trace_types_lock);
9292         tracer_options_updated = true;
9293         __update_tracer_options(tr);
9294         mutex_unlock(&trace_types_lock);
9295 }
9296
9297 /* Must have trace_types_lock held */
9298 struct trace_array *trace_array_find(const char *instance)
9299 {
9300         struct trace_array *tr, *found = NULL;
9301
9302         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9303                 if (tr->name && strcmp(tr->name, instance) == 0) {
9304                         found = tr;
9305                         break;
9306                 }
9307         }
9308
9309         return found;
9310 }
9311
9312 struct trace_array *trace_array_find_get(const char *instance)
9313 {
9314         struct trace_array *tr;
9315
9316         mutex_lock(&trace_types_lock);
9317         tr = trace_array_find(instance);
9318         if (tr)
9319                 tr->ref++;
9320         mutex_unlock(&trace_types_lock);
9321
9322         return tr;
9323 }
9324
9325 static int trace_array_create_dir(struct trace_array *tr)
9326 {
9327         int ret;
9328
9329         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9330         if (!tr->dir)
9331                 return -EINVAL;
9332
9333         ret = event_trace_add_tracer(tr->dir, tr);
9334         if (ret) {
9335                 tracefs_remove(tr->dir);
9336                 return ret;
9337         }
9338
9339         init_tracer_tracefs(tr, tr->dir);
9340         __update_tracer_options(tr);
9341
9342         return ret;
9343 }
9344
9345 static struct trace_array *trace_array_create(const char *name)
9346 {
9347         struct trace_array *tr;
9348         int ret;
9349
9350         ret = -ENOMEM;
9351         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9352         if (!tr)
9353                 return ERR_PTR(ret);
9354
9355         tr->name = kstrdup(name, GFP_KERNEL);
9356         if (!tr->name)
9357                 goto out_free_tr;
9358
9359         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9360                 goto out_free_tr;
9361
9362         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9363
9364         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9365
9366         raw_spin_lock_init(&tr->start_lock);
9367
9368         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9369
9370         tr->current_trace = &nop_trace;
9371
9372         INIT_LIST_HEAD(&tr->systems);
9373         INIT_LIST_HEAD(&tr->events);
9374         INIT_LIST_HEAD(&tr->hist_vars);
9375         INIT_LIST_HEAD(&tr->err_log);
9376
9377         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9378                 goto out_free_tr;
9379
9380         if (ftrace_allocate_ftrace_ops(tr) < 0)
9381                 goto out_free_tr;
9382
9383         ftrace_init_trace_array(tr);
9384
9385         init_trace_flags_index(tr);
9386
9387         if (trace_instance_dir) {
9388                 ret = trace_array_create_dir(tr);
9389                 if (ret)
9390                         goto out_free_tr;
9391         } else
9392                 __trace_early_add_events(tr);
9393
9394         list_add(&tr->list, &ftrace_trace_arrays);
9395
9396         tr->ref++;
9397
9398         return tr;
9399
9400  out_free_tr:
9401         ftrace_free_ftrace_ops(tr);
9402         free_trace_buffers(tr);
9403         free_cpumask_var(tr->tracing_cpumask);
9404         kfree(tr->name);
9405         kfree(tr);
9406
9407         return ERR_PTR(ret);
9408 }
9409
9410 static int instance_mkdir(const char *name)
9411 {
9412         struct trace_array *tr;
9413         int ret;
9414
9415         mutex_lock(&event_mutex);
9416         mutex_lock(&trace_types_lock);
9417
9418         ret = -EEXIST;
9419         if (trace_array_find(name))
9420                 goto out_unlock;
9421
9422         tr = trace_array_create(name);
9423
9424         ret = PTR_ERR_OR_ZERO(tr);
9425
9426 out_unlock:
9427         mutex_unlock(&trace_types_lock);
9428         mutex_unlock(&event_mutex);
9429         return ret;
9430 }
9431
9432 /**
9433  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9434  * @name: The name of the trace array to be looked up/created.
9435  *
9436  * Returns pointer to trace array with given name.
9437  * NULL, if it cannot be created.
9438  *
9439  * NOTE: This function increments the reference counter associated with the
9440  * trace array returned. This makes sure it cannot be freed while in use.
9441  * Use trace_array_put() once the trace array is no longer needed.
9442  * If the trace_array is to be freed, trace_array_destroy() needs to
9443  * be called after the trace_array_put(), or simply let user space delete
9444  * it from the tracefs instances directory. But until the
9445  * trace_array_put() is called, user space can not delete it.
9446  *
9447  */
9448 struct trace_array *trace_array_get_by_name(const char *name)
9449 {
9450         struct trace_array *tr;
9451
9452         mutex_lock(&event_mutex);
9453         mutex_lock(&trace_types_lock);
9454
9455         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9456                 if (tr->name && strcmp(tr->name, name) == 0)
9457                         goto out_unlock;
9458         }
9459
9460         tr = trace_array_create(name);
9461
9462         if (IS_ERR(tr))
9463                 tr = NULL;
9464 out_unlock:
9465         if (tr)
9466                 tr->ref++;
9467
9468         mutex_unlock(&trace_types_lock);
9469         mutex_unlock(&event_mutex);
9470         return tr;
9471 }
9472 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9473
9474 static int __remove_instance(struct trace_array *tr)
9475 {
9476         int i;
9477
9478         /* Reference counter for a newly created trace array = 1. */
9479         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9480                 return -EBUSY;
9481
9482         list_del(&tr->list);
9483
9484         /* Disable all the flags that were enabled coming in */
9485         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9486                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9487                         set_tracer_flag(tr, 1 << i, 0);
9488         }
9489
9490         tracing_set_nop(tr);
9491         clear_ftrace_function_probes(tr);
9492         event_trace_del_tracer(tr);
9493         ftrace_clear_pids(tr);
9494         ftrace_destroy_function_files(tr);
9495         tracefs_remove(tr->dir);
9496         free_percpu(tr->last_func_repeats);
9497         free_trace_buffers(tr);
9498
9499         for (i = 0; i < tr->nr_topts; i++) {
9500                 kfree(tr->topts[i].topts);
9501         }
9502         kfree(tr->topts);
9503
9504         free_cpumask_var(tr->tracing_cpumask);
9505         kfree(tr->name);
9506         kfree(tr);
9507
9508         return 0;
9509 }
9510
9511 int trace_array_destroy(struct trace_array *this_tr)
9512 {
9513         struct trace_array *tr;
9514         int ret;
9515
9516         if (!this_tr)
9517                 return -EINVAL;
9518
9519         mutex_lock(&event_mutex);
9520         mutex_lock(&trace_types_lock);
9521
9522         ret = -ENODEV;
9523
9524         /* Making sure trace array exists before destroying it. */
9525         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9526                 if (tr == this_tr) {
9527                         ret = __remove_instance(tr);
9528                         break;
9529                 }
9530         }
9531
9532         mutex_unlock(&trace_types_lock);
9533         mutex_unlock(&event_mutex);
9534
9535         return ret;
9536 }
9537 EXPORT_SYMBOL_GPL(trace_array_destroy);
9538
9539 static int instance_rmdir(const char *name)
9540 {
9541         struct trace_array *tr;
9542         int ret;
9543
9544         mutex_lock(&event_mutex);
9545         mutex_lock(&trace_types_lock);
9546
9547         ret = -ENODEV;
9548         tr = trace_array_find(name);
9549         if (tr)
9550                 ret = __remove_instance(tr);
9551
9552         mutex_unlock(&trace_types_lock);
9553         mutex_unlock(&event_mutex);
9554
9555         return ret;
9556 }
9557
9558 static __init void create_trace_instances(struct dentry *d_tracer)
9559 {
9560         struct trace_array *tr;
9561
9562         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9563                                                          instance_mkdir,
9564                                                          instance_rmdir);
9565         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9566                 return;
9567
9568         mutex_lock(&event_mutex);
9569         mutex_lock(&trace_types_lock);
9570
9571         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9572                 if (!tr->name)
9573                         continue;
9574                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9575                              "Failed to create instance directory\n"))
9576                         break;
9577         }
9578
9579         mutex_unlock(&trace_types_lock);
9580         mutex_unlock(&event_mutex);
9581 }
9582
9583 static void
9584 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9585 {
9586         struct trace_event_file *file;
9587         int cpu;
9588
9589         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9590                         tr, &show_traces_fops);
9591
9592         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9593                         tr, &set_tracer_fops);
9594
9595         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9596                           tr, &tracing_cpumask_fops);
9597
9598         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9599                           tr, &tracing_iter_fops);
9600
9601         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9602                           tr, &tracing_fops);
9603
9604         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9605                           tr, &tracing_pipe_fops);
9606
9607         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9608                           tr, &tracing_entries_fops);
9609
9610         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9611                           tr, &tracing_total_entries_fops);
9612
9613         trace_create_file("free_buffer", 0200, d_tracer,
9614                           tr, &tracing_free_buffer_fops);
9615
9616         trace_create_file("trace_marker", 0220, d_tracer,
9617                           tr, &tracing_mark_fops);
9618
9619         file = __find_event_file(tr, "ftrace", "print");
9620         if (file && file->dir)
9621                 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9622                                   file, &event_trigger_fops);
9623         tr->trace_marker_file = file;
9624
9625         trace_create_file("trace_marker_raw", 0220, d_tracer,
9626                           tr, &tracing_mark_raw_fops);
9627
9628         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9629                           &trace_clock_fops);
9630
9631         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9632                           tr, &rb_simple_fops);
9633
9634         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9635                           &trace_time_stamp_mode_fops);
9636
9637         tr->buffer_percent = 50;
9638
9639         trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9640                         tr, &buffer_percent_fops);
9641
9642         create_trace_options_dir(tr);
9643
9644 #ifdef CONFIG_TRACER_MAX_TRACE
9645         trace_create_maxlat_file(tr, d_tracer);
9646 #endif
9647
9648         if (ftrace_create_function_files(tr, d_tracer))
9649                 MEM_FAIL(1, "Could not allocate function filter files");
9650
9651 #ifdef CONFIG_TRACER_SNAPSHOT
9652         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9653                           tr, &snapshot_fops);
9654 #endif
9655
9656         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9657                           tr, &tracing_err_log_fops);
9658
9659         for_each_tracing_cpu(cpu)
9660                 tracing_init_tracefs_percpu(tr, cpu);
9661
9662         ftrace_init_tracefs(tr, d_tracer);
9663 }
9664
9665 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9666 {
9667         struct vfsmount *mnt;
9668         struct file_system_type *type;
9669
9670         /*
9671          * To maintain backward compatibility for tools that mount
9672          * debugfs to get to the tracing facility, tracefs is automatically
9673          * mounted to the debugfs/tracing directory.
9674          */
9675         type = get_fs_type("tracefs");
9676         if (!type)
9677                 return NULL;
9678         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9679         put_filesystem(type);
9680         if (IS_ERR(mnt))
9681                 return NULL;
9682         mntget(mnt);
9683
9684         return mnt;
9685 }
9686
9687 /**
9688  * tracing_init_dentry - initialize top level trace array
9689  *
9690  * This is called when creating files or directories in the tracing
9691  * directory. It is called via fs_initcall() by any of the boot up code
9692  * and expects to return the dentry of the top level tracing directory.
9693  */
9694 int tracing_init_dentry(void)
9695 {
9696         struct trace_array *tr = &global_trace;
9697
9698         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9699                 pr_warn("Tracing disabled due to lockdown\n");
9700                 return -EPERM;
9701         }
9702
9703         /* The top level trace array uses  NULL as parent */
9704         if (tr->dir)
9705                 return 0;
9706
9707         if (WARN_ON(!tracefs_initialized()))
9708                 return -ENODEV;
9709
9710         /*
9711          * As there may still be users that expect the tracing
9712          * files to exist in debugfs/tracing, we must automount
9713          * the tracefs file system there, so older tools still
9714          * work with the newer kernel.
9715          */
9716         tr->dir = debugfs_create_automount("tracing", NULL,
9717                                            trace_automount, NULL);
9718
9719         return 0;
9720 }
9721
9722 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9723 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9724
9725 static struct workqueue_struct *eval_map_wq __initdata;
9726 static struct work_struct eval_map_work __initdata;
9727 static struct work_struct tracerfs_init_work __initdata;
9728
9729 static void __init eval_map_work_func(struct work_struct *work)
9730 {
9731         int len;
9732
9733         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9734         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9735 }
9736
9737 static int __init trace_eval_init(void)
9738 {
9739         INIT_WORK(&eval_map_work, eval_map_work_func);
9740
9741         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9742         if (!eval_map_wq) {
9743                 pr_err("Unable to allocate eval_map_wq\n");
9744                 /* Do work here */
9745                 eval_map_work_func(&eval_map_work);
9746                 return -ENOMEM;
9747         }
9748
9749         queue_work(eval_map_wq, &eval_map_work);
9750         return 0;
9751 }
9752
9753 subsys_initcall(trace_eval_init);
9754
9755 static int __init trace_eval_sync(void)
9756 {
9757         /* Make sure the eval map updates are finished */
9758         if (eval_map_wq)
9759                 destroy_workqueue(eval_map_wq);
9760         return 0;
9761 }
9762
9763 late_initcall_sync(trace_eval_sync);
9764
9765
9766 #ifdef CONFIG_MODULES
9767 static void trace_module_add_evals(struct module *mod)
9768 {
9769         if (!mod->num_trace_evals)
9770                 return;
9771
9772         /*
9773          * Modules with bad taint do not have events created, do
9774          * not bother with enums either.
9775          */
9776         if (trace_module_has_bad_taint(mod))
9777                 return;
9778
9779         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9780 }
9781
9782 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9783 static void trace_module_remove_evals(struct module *mod)
9784 {
9785         union trace_eval_map_item *map;
9786         union trace_eval_map_item **last = &trace_eval_maps;
9787
9788         if (!mod->num_trace_evals)
9789                 return;
9790
9791         mutex_lock(&trace_eval_mutex);
9792
9793         map = trace_eval_maps;
9794
9795         while (map) {
9796                 if (map->head.mod == mod)
9797                         break;
9798                 map = trace_eval_jmp_to_tail(map);
9799                 last = &map->tail.next;
9800                 map = map->tail.next;
9801         }
9802         if (!map)
9803                 goto out;
9804
9805         *last = trace_eval_jmp_to_tail(map)->tail.next;
9806         kfree(map);
9807  out:
9808         mutex_unlock(&trace_eval_mutex);
9809 }
9810 #else
9811 static inline void trace_module_remove_evals(struct module *mod) { }
9812 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9813
9814 static int trace_module_notify(struct notifier_block *self,
9815                                unsigned long val, void *data)
9816 {
9817         struct module *mod = data;
9818
9819         switch (val) {
9820         case MODULE_STATE_COMING:
9821                 trace_module_add_evals(mod);
9822                 break;
9823         case MODULE_STATE_GOING:
9824                 trace_module_remove_evals(mod);
9825                 break;
9826         }
9827
9828         return NOTIFY_OK;
9829 }
9830
9831 static struct notifier_block trace_module_nb = {
9832         .notifier_call = trace_module_notify,
9833         .priority = 0,
9834 };
9835 #endif /* CONFIG_MODULES */
9836
9837 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9838 {
9839
9840         event_trace_init();
9841
9842         init_tracer_tracefs(&global_trace, NULL);
9843         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9844
9845         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9846                         &global_trace, &tracing_thresh_fops);
9847
9848         trace_create_file("README", TRACE_MODE_READ, NULL,
9849                         NULL, &tracing_readme_fops);
9850
9851         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9852                         NULL, &tracing_saved_cmdlines_fops);
9853
9854         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9855                           NULL, &tracing_saved_cmdlines_size_fops);
9856
9857         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9858                         NULL, &tracing_saved_tgids_fops);
9859
9860         trace_create_eval_file(NULL);
9861
9862 #ifdef CONFIG_MODULES
9863         register_module_notifier(&trace_module_nb);
9864 #endif
9865
9866 #ifdef CONFIG_DYNAMIC_FTRACE
9867         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9868                         NULL, &tracing_dyn_info_fops);
9869 #endif
9870
9871         create_trace_instances(NULL);
9872
9873         update_tracer_options(&global_trace);
9874 }
9875
9876 static __init int tracer_init_tracefs(void)
9877 {
9878         int ret;
9879
9880         trace_access_lock_init();
9881
9882         ret = tracing_init_dentry();
9883         if (ret)
9884                 return 0;
9885
9886         if (eval_map_wq) {
9887                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9888                 queue_work(eval_map_wq, &tracerfs_init_work);
9889         } else {
9890                 tracer_init_tracefs_work_func(NULL);
9891         }
9892
9893         rv_init_interface();
9894
9895         return 0;
9896 }
9897
9898 fs_initcall(tracer_init_tracefs);
9899
9900 static int trace_die_panic_handler(struct notifier_block *self,
9901                                 unsigned long ev, void *unused);
9902
9903 static struct notifier_block trace_panic_notifier = {
9904         .notifier_call = trace_die_panic_handler,
9905         .priority = INT_MAX - 1,
9906 };
9907
9908 static struct notifier_block trace_die_notifier = {
9909         .notifier_call = trace_die_panic_handler,
9910         .priority = INT_MAX - 1,
9911 };
9912
9913 /*
9914  * The idea is to execute the following die/panic callback early, in order
9915  * to avoid showing irrelevant information in the trace (like other panic
9916  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
9917  * warnings get disabled (to prevent potential log flooding).
9918  */
9919 static int trace_die_panic_handler(struct notifier_block *self,
9920                                 unsigned long ev, void *unused)
9921 {
9922         if (!ftrace_dump_on_oops)
9923                 return NOTIFY_DONE;
9924
9925         /* The die notifier requires DIE_OOPS to trigger */
9926         if (self == &trace_die_notifier && ev != DIE_OOPS)
9927                 return NOTIFY_DONE;
9928
9929         ftrace_dump(ftrace_dump_on_oops);
9930
9931         return NOTIFY_DONE;
9932 }
9933
9934 /*
9935  * printk is set to max of 1024, we really don't need it that big.
9936  * Nothing should be printing 1000 characters anyway.
9937  */
9938 #define TRACE_MAX_PRINT         1000
9939
9940 /*
9941  * Define here KERN_TRACE so that we have one place to modify
9942  * it if we decide to change what log level the ftrace dump
9943  * should be at.
9944  */
9945 #define KERN_TRACE              KERN_EMERG
9946
9947 void
9948 trace_printk_seq(struct trace_seq *s)
9949 {
9950         /* Probably should print a warning here. */
9951         if (s->seq.len >= TRACE_MAX_PRINT)
9952                 s->seq.len = TRACE_MAX_PRINT;
9953
9954         /*
9955          * More paranoid code. Although the buffer size is set to
9956          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9957          * an extra layer of protection.
9958          */
9959         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9960                 s->seq.len = s->seq.size - 1;
9961
9962         /* should be zero ended, but we are paranoid. */
9963         s->buffer[s->seq.len] = 0;
9964
9965         printk(KERN_TRACE "%s", s->buffer);
9966
9967         trace_seq_init(s);
9968 }
9969
9970 void trace_init_global_iter(struct trace_iterator *iter)
9971 {
9972         iter->tr = &global_trace;
9973         iter->trace = iter->tr->current_trace;
9974         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9975         iter->array_buffer = &global_trace.array_buffer;
9976
9977         if (iter->trace && iter->trace->open)
9978                 iter->trace->open(iter);
9979
9980         /* Annotate start of buffers if we had overruns */
9981         if (ring_buffer_overruns(iter->array_buffer->buffer))
9982                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9983
9984         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9985         if (trace_clocks[iter->tr->clock_id].in_ns)
9986                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9987
9988         /* Can not use kmalloc for iter.temp and iter.fmt */
9989         iter->temp = static_temp_buf;
9990         iter->temp_size = STATIC_TEMP_BUF_SIZE;
9991         iter->fmt = static_fmt_buf;
9992         iter->fmt_size = STATIC_FMT_BUF_SIZE;
9993 }
9994
9995 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9996 {
9997         /* use static because iter can be a bit big for the stack */
9998         static struct trace_iterator iter;
9999         static atomic_t dump_running;
10000         struct trace_array *tr = &global_trace;
10001         unsigned int old_userobj;
10002         unsigned long flags;
10003         int cnt = 0, cpu;
10004
10005         /* Only allow one dump user at a time. */
10006         if (atomic_inc_return(&dump_running) != 1) {
10007                 atomic_dec(&dump_running);
10008                 return;
10009         }
10010
10011         /*
10012          * Always turn off tracing when we dump.
10013          * We don't need to show trace output of what happens
10014          * between multiple crashes.
10015          *
10016          * If the user does a sysrq-z, then they can re-enable
10017          * tracing with echo 1 > tracing_on.
10018          */
10019         tracing_off();
10020
10021         local_irq_save(flags);
10022
10023         /* Simulate the iterator */
10024         trace_init_global_iter(&iter);
10025
10026         for_each_tracing_cpu(cpu) {
10027                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10028         }
10029
10030         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10031
10032         /* don't look at user memory in panic mode */
10033         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10034
10035         switch (oops_dump_mode) {
10036         case DUMP_ALL:
10037                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10038                 break;
10039         case DUMP_ORIG:
10040                 iter.cpu_file = raw_smp_processor_id();
10041                 break;
10042         case DUMP_NONE:
10043                 goto out_enable;
10044         default:
10045                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10046                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10047         }
10048
10049         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10050
10051         /* Did function tracer already get disabled? */
10052         if (ftrace_is_dead()) {
10053                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10054                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10055         }
10056
10057         /*
10058          * We need to stop all tracing on all CPUS to read
10059          * the next buffer. This is a bit expensive, but is
10060          * not done often. We fill all what we can read,
10061          * and then release the locks again.
10062          */
10063
10064         while (!trace_empty(&iter)) {
10065
10066                 if (!cnt)
10067                         printk(KERN_TRACE "---------------------------------\n");
10068
10069                 cnt++;
10070
10071                 trace_iterator_reset(&iter);
10072                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10073
10074                 if (trace_find_next_entry_inc(&iter) != NULL) {
10075                         int ret;
10076
10077                         ret = print_trace_line(&iter);
10078                         if (ret != TRACE_TYPE_NO_CONSUME)
10079                                 trace_consume(&iter);
10080                 }
10081                 touch_nmi_watchdog();
10082
10083                 trace_printk_seq(&iter.seq);
10084         }
10085
10086         if (!cnt)
10087                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10088         else
10089                 printk(KERN_TRACE "---------------------------------\n");
10090
10091  out_enable:
10092         tr->trace_flags |= old_userobj;
10093
10094         for_each_tracing_cpu(cpu) {
10095                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10096         }
10097         atomic_dec(&dump_running);
10098         local_irq_restore(flags);
10099 }
10100 EXPORT_SYMBOL_GPL(ftrace_dump);
10101
10102 #define WRITE_BUFSIZE  4096
10103
10104 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10105                                 size_t count, loff_t *ppos,
10106                                 int (*createfn)(const char *))
10107 {
10108         char *kbuf, *buf, *tmp;
10109         int ret = 0;
10110         size_t done = 0;
10111         size_t size;
10112
10113         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10114         if (!kbuf)
10115                 return -ENOMEM;
10116
10117         while (done < count) {
10118                 size = count - done;
10119
10120                 if (size >= WRITE_BUFSIZE)
10121                         size = WRITE_BUFSIZE - 1;
10122
10123                 if (copy_from_user(kbuf, buffer + done, size)) {
10124                         ret = -EFAULT;
10125                         goto out;
10126                 }
10127                 kbuf[size] = '\0';
10128                 buf = kbuf;
10129                 do {
10130                         tmp = strchr(buf, '\n');
10131                         if (tmp) {
10132                                 *tmp = '\0';
10133                                 size = tmp - buf + 1;
10134                         } else {
10135                                 size = strlen(buf);
10136                                 if (done + size < count) {
10137                                         if (buf != kbuf)
10138                                                 break;
10139                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10140                                         pr_warn("Line length is too long: Should be less than %d\n",
10141                                                 WRITE_BUFSIZE - 2);
10142                                         ret = -EINVAL;
10143                                         goto out;
10144                                 }
10145                         }
10146                         done += size;
10147
10148                         /* Remove comments */
10149                         tmp = strchr(buf, '#');
10150
10151                         if (tmp)
10152                                 *tmp = '\0';
10153
10154                         ret = createfn(buf);
10155                         if (ret)
10156                                 goto out;
10157                         buf += size;
10158
10159                 } while (done < count);
10160         }
10161         ret = done;
10162
10163 out:
10164         kfree(kbuf);
10165
10166         return ret;
10167 }
10168
10169 __init static void enable_instances(void)
10170 {
10171         struct trace_array *tr;
10172         char *curr_str;
10173         char *str;
10174         char *tok;
10175
10176         /* A tab is always appended */
10177         boot_instance_info[boot_instance_index - 1] = '\0';
10178         str = boot_instance_info;
10179
10180         while ((curr_str = strsep(&str, "\t"))) {
10181
10182                 tok = strsep(&curr_str, ",");
10183
10184                 tr = trace_array_get_by_name(tok);
10185                 if (!tr) {
10186                         pr_warn("Failed to create instance buffer %s\n", curr_str);
10187                         continue;
10188                 }
10189                 /* Allow user space to delete it */
10190                 trace_array_put(tr);
10191
10192                 while ((tok = strsep(&curr_str, ","))) {
10193                         early_enable_events(tr, tok, true);
10194                 }
10195         }
10196 }
10197
10198 __init static int tracer_alloc_buffers(void)
10199 {
10200         int ring_buf_size;
10201         int ret = -ENOMEM;
10202
10203
10204         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10205                 pr_warn("Tracing disabled due to lockdown\n");
10206                 return -EPERM;
10207         }
10208
10209         /*
10210          * Make sure we don't accidentally add more trace options
10211          * than we have bits for.
10212          */
10213         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10214
10215         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10216                 goto out;
10217
10218         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10219                 goto out_free_buffer_mask;
10220
10221         /* Only allocate trace_printk buffers if a trace_printk exists */
10222         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10223                 /* Must be called before global_trace.buffer is allocated */
10224                 trace_printk_init_buffers();
10225
10226         /* To save memory, keep the ring buffer size to its minimum */
10227         if (ring_buffer_expanded)
10228                 ring_buf_size = trace_buf_size;
10229         else
10230                 ring_buf_size = 1;
10231
10232         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10233         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10234
10235         raw_spin_lock_init(&global_trace.start_lock);
10236
10237         /*
10238          * The prepare callbacks allocates some memory for the ring buffer. We
10239          * don't free the buffer if the CPU goes down. If we were to free
10240          * the buffer, then the user would lose any trace that was in the
10241          * buffer. The memory will be removed once the "instance" is removed.
10242          */
10243         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10244                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10245                                       NULL);
10246         if (ret < 0)
10247                 goto out_free_cpumask;
10248         /* Used for event triggers */
10249         ret = -ENOMEM;
10250         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10251         if (!temp_buffer)
10252                 goto out_rm_hp_state;
10253
10254         if (trace_create_savedcmd() < 0)
10255                 goto out_free_temp_buffer;
10256
10257         /* TODO: make the number of buffers hot pluggable with CPUS */
10258         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10259                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10260                 goto out_free_savedcmd;
10261         }
10262
10263         if (global_trace.buffer_disabled)
10264                 tracing_off();
10265
10266         if (trace_boot_clock) {
10267                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10268                 if (ret < 0)
10269                         pr_warn("Trace clock %s not defined, going back to default\n",
10270                                 trace_boot_clock);
10271         }
10272
10273         /*
10274          * register_tracer() might reference current_trace, so it
10275          * needs to be set before we register anything. This is
10276          * just a bootstrap of current_trace anyway.
10277          */
10278         global_trace.current_trace = &nop_trace;
10279
10280         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10281
10282         ftrace_init_global_array_ops(&global_trace);
10283
10284         init_trace_flags_index(&global_trace);
10285
10286         register_tracer(&nop_trace);
10287
10288         /* Function tracing may start here (via kernel command line) */
10289         init_function_trace();
10290
10291         /* All seems OK, enable tracing */
10292         tracing_disabled = 0;
10293
10294         atomic_notifier_chain_register(&panic_notifier_list,
10295                                        &trace_panic_notifier);
10296
10297         register_die_notifier(&trace_die_notifier);
10298
10299         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10300
10301         INIT_LIST_HEAD(&global_trace.systems);
10302         INIT_LIST_HEAD(&global_trace.events);
10303         INIT_LIST_HEAD(&global_trace.hist_vars);
10304         INIT_LIST_HEAD(&global_trace.err_log);
10305         list_add(&global_trace.list, &ftrace_trace_arrays);
10306
10307         apply_trace_boot_options();
10308
10309         register_snapshot_cmd();
10310
10311         test_can_verify();
10312
10313         return 0;
10314
10315 out_free_savedcmd:
10316         free_saved_cmdlines_buffer(savedcmd);
10317 out_free_temp_buffer:
10318         ring_buffer_free(temp_buffer);
10319 out_rm_hp_state:
10320         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10321 out_free_cpumask:
10322         free_cpumask_var(global_trace.tracing_cpumask);
10323 out_free_buffer_mask:
10324         free_cpumask_var(tracing_buffer_mask);
10325 out:
10326         return ret;
10327 }
10328
10329 void __init ftrace_boot_snapshot(void)
10330 {
10331         if (snapshot_at_boot) {
10332                 tracing_snapshot();
10333                 internal_trace_puts("** Boot snapshot taken **\n");
10334         }
10335 }
10336
10337 void __init early_trace_init(void)
10338 {
10339         if (tracepoint_printk) {
10340                 tracepoint_print_iter =
10341                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10342                 if (MEM_FAIL(!tracepoint_print_iter,
10343                              "Failed to allocate trace iterator\n"))
10344                         tracepoint_printk = 0;
10345                 else
10346                         static_key_enable(&tracepoint_printk_key.key);
10347         }
10348         tracer_alloc_buffers();
10349
10350         init_events();
10351 }
10352
10353 void __init trace_init(void)
10354 {
10355         trace_event_init();
10356
10357         if (boot_instance_index)
10358                 enable_instances();
10359 }
10360
10361 __init static void clear_boot_tracer(void)
10362 {
10363         /*
10364          * The default tracer at boot buffer is an init section.
10365          * This function is called in lateinit. If we did not
10366          * find the boot tracer, then clear it out, to prevent
10367          * later registration from accessing the buffer that is
10368          * about to be freed.
10369          */
10370         if (!default_bootup_tracer)
10371                 return;
10372
10373         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10374                default_bootup_tracer);
10375         default_bootup_tracer = NULL;
10376 }
10377
10378 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10379 __init static void tracing_set_default_clock(void)
10380 {
10381         /* sched_clock_stable() is determined in late_initcall */
10382         if (!trace_boot_clock && !sched_clock_stable()) {
10383                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10384                         pr_warn("Can not set tracing clock due to lockdown\n");
10385                         return;
10386                 }
10387
10388                 printk(KERN_WARNING
10389                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10390                        "If you want to keep using the local clock, then add:\n"
10391                        "  \"trace_clock=local\"\n"
10392                        "on the kernel command line\n");
10393                 tracing_set_clock(&global_trace, "global");
10394         }
10395 }
10396 #else
10397 static inline void tracing_set_default_clock(void) { }
10398 #endif
10399
10400 __init static int late_trace_init(void)
10401 {
10402         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10403                 static_key_disable(&tracepoint_printk_key.key);
10404                 tracepoint_printk = 0;
10405         }
10406
10407         tracing_set_default_clock();
10408         clear_boot_tracer();
10409         return 0;
10410 }
10411
10412 late_initcall_sync(late_trace_init);