Merge tag 'for-linus' of git://git.armlinux.org.uk/~rmk/linux-arm
[platform/kernel/linux-rpi.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52
53 #include "trace.h"
54 #include "trace_output.h"
55
56 /*
57  * On boot up, the ring buffer is set to the minimum size, so that
58  * we do not waste memory on systems that are not using tracing.
59  */
60 bool ring_buffer_expanded;
61
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
79 {
80         if (!tracing_selftest_disabled) {
81                 tracing_selftest_disabled = true;
82                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
83         }
84 }
85 #endif
86
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95         { }
96 };
97
98 static int
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101         return 0;
102 }
103
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118
119 cpumask_var_t __read_mostly     tracing_buffer_mask;
120
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  */
136
137 enum ftrace_dump_mode ftrace_dump_on_oops;
138
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
141
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
145         struct module                   *mod;
146         unsigned long                   length;
147 };
148
149 union trace_eval_map_item;
150
151 struct trace_eval_map_tail {
152         /*
153          * "end" is first and points to NULL as it must be different
154          * than "mod" or "eval_string"
155          */
156         union trace_eval_map_item       *next;
157         const char                      *end;   /* points to NULL */
158 };
159
160 static DEFINE_MUTEX(trace_eval_mutex);
161
162 /*
163  * The trace_eval_maps are saved in an array with two extra elements,
164  * one at the beginning, and one at the end. The beginning item contains
165  * the count of the saved maps (head.length), and the module they
166  * belong to if not built in (head.mod). The ending item contains a
167  * pointer to the next array of saved eval_map items.
168  */
169 union trace_eval_map_item {
170         struct trace_eval_map           map;
171         struct trace_eval_map_head      head;
172         struct trace_eval_map_tail      tail;
173 };
174
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
177
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180                                    struct trace_buffer *buffer,
181                                    unsigned int trace_ctx);
182
183 #define MAX_TRACER_SIZE         100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188
189 static int __init set_cmdline_ftrace(char *str)
190 {
191         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
192         default_bootup_tracer = bootup_tracer_buf;
193         /* We are using ftrace early, expand it */
194         ring_buffer_expanded = true;
195         return 1;
196 }
197 __setup("ftrace=", set_cmdline_ftrace);
198
199 static int __init set_ftrace_dump_on_oops(char *str)
200 {
201         if (*str++ != '=' || !*str || !strcmp("1", str)) {
202                 ftrace_dump_on_oops = DUMP_ALL;
203                 return 1;
204         }
205
206         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
207                 ftrace_dump_on_oops = DUMP_ORIG;
208                 return 1;
209         }
210
211         return 0;
212 }
213 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
214
215 static int __init stop_trace_on_warning(char *str)
216 {
217         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
218                 __disable_trace_on_warning = 1;
219         return 1;
220 }
221 __setup("traceoff_on_warning", stop_trace_on_warning);
222
223 static int __init boot_alloc_snapshot(char *str)
224 {
225         allocate_snapshot = true;
226         /* We also need the main ring buffer expanded */
227         ring_buffer_expanded = true;
228         return 1;
229 }
230 __setup("alloc_snapshot", boot_alloc_snapshot);
231
232
233 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
234
235 static int __init set_trace_boot_options(char *str)
236 {
237         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238         return 0;
239 }
240 __setup("trace_options=", set_trace_boot_options);
241
242 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
243 static char *trace_boot_clock __initdata;
244
245 static int __init set_trace_boot_clock(char *str)
246 {
247         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
248         trace_boot_clock = trace_boot_clock_buf;
249         return 0;
250 }
251 __setup("trace_clock=", set_trace_boot_clock);
252
253 static int __init set_tracepoint_printk(char *str)
254 {
255         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
256                 tracepoint_printk = 1;
257         return 1;
258 }
259 __setup("tp_printk", set_tracepoint_printk);
260
261 static int __init set_tracepoint_printk_stop(char *str)
262 {
263         tracepoint_printk_stop_on_boot = true;
264         return 1;
265 }
266 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
267
268 unsigned long long ns2usecs(u64 nsec)
269 {
270         nsec += 500;
271         do_div(nsec, 1000);
272         return nsec;
273 }
274
275 static void
276 trace_process_export(struct trace_export *export,
277                struct ring_buffer_event *event, int flag)
278 {
279         struct trace_entry *entry;
280         unsigned int size = 0;
281
282         if (export->flags & flag) {
283                 entry = ring_buffer_event_data(event);
284                 size = ring_buffer_event_length(event);
285                 export->write(export, entry, size);
286         }
287 }
288
289 static DEFINE_MUTEX(ftrace_export_lock);
290
291 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
292
293 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
294 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
295 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
296
297 static inline void ftrace_exports_enable(struct trace_export *export)
298 {
299         if (export->flags & TRACE_EXPORT_FUNCTION)
300                 static_branch_inc(&trace_function_exports_enabled);
301
302         if (export->flags & TRACE_EXPORT_EVENT)
303                 static_branch_inc(&trace_event_exports_enabled);
304
305         if (export->flags & TRACE_EXPORT_MARKER)
306                 static_branch_inc(&trace_marker_exports_enabled);
307 }
308
309 static inline void ftrace_exports_disable(struct trace_export *export)
310 {
311         if (export->flags & TRACE_EXPORT_FUNCTION)
312                 static_branch_dec(&trace_function_exports_enabled);
313
314         if (export->flags & TRACE_EXPORT_EVENT)
315                 static_branch_dec(&trace_event_exports_enabled);
316
317         if (export->flags & TRACE_EXPORT_MARKER)
318                 static_branch_dec(&trace_marker_exports_enabled);
319 }
320
321 static void ftrace_exports(struct ring_buffer_event *event, int flag)
322 {
323         struct trace_export *export;
324
325         preempt_disable_notrace();
326
327         export = rcu_dereference_raw_check(ftrace_exports_list);
328         while (export) {
329                 trace_process_export(export, event, flag);
330                 export = rcu_dereference_raw_check(export->next);
331         }
332
333         preempt_enable_notrace();
334 }
335
336 static inline void
337 add_trace_export(struct trace_export **list, struct trace_export *export)
338 {
339         rcu_assign_pointer(export->next, *list);
340         /*
341          * We are entering export into the list but another
342          * CPU might be walking that list. We need to make sure
343          * the export->next pointer is valid before another CPU sees
344          * the export pointer included into the list.
345          */
346         rcu_assign_pointer(*list, export);
347 }
348
349 static inline int
350 rm_trace_export(struct trace_export **list, struct trace_export *export)
351 {
352         struct trace_export **p;
353
354         for (p = list; *p != NULL; p = &(*p)->next)
355                 if (*p == export)
356                         break;
357
358         if (*p != export)
359                 return -1;
360
361         rcu_assign_pointer(*p, (*p)->next);
362
363         return 0;
364 }
365
366 static inline void
367 add_ftrace_export(struct trace_export **list, struct trace_export *export)
368 {
369         ftrace_exports_enable(export);
370
371         add_trace_export(list, export);
372 }
373
374 static inline int
375 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
376 {
377         int ret;
378
379         ret = rm_trace_export(list, export);
380         ftrace_exports_disable(export);
381
382         return ret;
383 }
384
385 int register_ftrace_export(struct trace_export *export)
386 {
387         if (WARN_ON_ONCE(!export->write))
388                 return -1;
389
390         mutex_lock(&ftrace_export_lock);
391
392         add_ftrace_export(&ftrace_exports_list, export);
393
394         mutex_unlock(&ftrace_export_lock);
395
396         return 0;
397 }
398 EXPORT_SYMBOL_GPL(register_ftrace_export);
399
400 int unregister_ftrace_export(struct trace_export *export)
401 {
402         int ret;
403
404         mutex_lock(&ftrace_export_lock);
405
406         ret = rm_ftrace_export(&ftrace_exports_list, export);
407
408         mutex_unlock(&ftrace_export_lock);
409
410         return ret;
411 }
412 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
413
414 /* trace_flags holds trace_options default values */
415 #define TRACE_DEFAULT_FLAGS                                             \
416         (FUNCTION_DEFAULT_FLAGS |                                       \
417          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
418          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
419          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
420          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
421          TRACE_ITER_HASH_PTR)
422
423 /* trace_options that are only supported by global_trace */
424 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
425                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
426
427 /* trace_flags that are default zero for instances */
428 #define ZEROED_TRACE_FLAGS \
429         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
430
431 /*
432  * The global_trace is the descriptor that holds the top-level tracing
433  * buffers for the live tracing.
434  */
435 static struct trace_array global_trace = {
436         .trace_flags = TRACE_DEFAULT_FLAGS,
437 };
438
439 LIST_HEAD(ftrace_trace_arrays);
440
441 int trace_array_get(struct trace_array *this_tr)
442 {
443         struct trace_array *tr;
444         int ret = -ENODEV;
445
446         mutex_lock(&trace_types_lock);
447         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
448                 if (tr == this_tr) {
449                         tr->ref++;
450                         ret = 0;
451                         break;
452                 }
453         }
454         mutex_unlock(&trace_types_lock);
455
456         return ret;
457 }
458
459 static void __trace_array_put(struct trace_array *this_tr)
460 {
461         WARN_ON(!this_tr->ref);
462         this_tr->ref--;
463 }
464
465 /**
466  * trace_array_put - Decrement the reference counter for this trace array.
467  * @this_tr : pointer to the trace array
468  *
469  * NOTE: Use this when we no longer need the trace array returned by
470  * trace_array_get_by_name(). This ensures the trace array can be later
471  * destroyed.
472  *
473  */
474 void trace_array_put(struct trace_array *this_tr)
475 {
476         if (!this_tr)
477                 return;
478
479         mutex_lock(&trace_types_lock);
480         __trace_array_put(this_tr);
481         mutex_unlock(&trace_types_lock);
482 }
483 EXPORT_SYMBOL_GPL(trace_array_put);
484
485 int tracing_check_open_get_tr(struct trace_array *tr)
486 {
487         int ret;
488
489         ret = security_locked_down(LOCKDOWN_TRACEFS);
490         if (ret)
491                 return ret;
492
493         if (tracing_disabled)
494                 return -ENODEV;
495
496         if (tr && trace_array_get(tr) < 0)
497                 return -ENODEV;
498
499         return 0;
500 }
501
502 int call_filter_check_discard(struct trace_event_call *call, void *rec,
503                               struct trace_buffer *buffer,
504                               struct ring_buffer_event *event)
505 {
506         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
507             !filter_match_preds(call->filter, rec)) {
508                 __trace_event_discard_commit(buffer, event);
509                 return 1;
510         }
511
512         return 0;
513 }
514
515 void trace_free_pid_list(struct trace_pid_list *pid_list)
516 {
517         vfree(pid_list->pids);
518         kfree(pid_list);
519 }
520
521 /**
522  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
523  * @filtered_pids: The list of pids to check
524  * @search_pid: The PID to find in @filtered_pids
525  *
526  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
527  */
528 bool
529 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
530 {
531         /*
532          * If pid_max changed after filtered_pids was created, we
533          * by default ignore all pids greater than the previous pid_max.
534          */
535         if (search_pid >= filtered_pids->pid_max)
536                 return false;
537
538         return test_bit(search_pid, filtered_pids->pids);
539 }
540
541 /**
542  * trace_ignore_this_task - should a task be ignored for tracing
543  * @filtered_pids: The list of pids to check
544  * @filtered_no_pids: The list of pids not to be traced
545  * @task: The task that should be ignored if not filtered
546  *
547  * Checks if @task should be traced or not from @filtered_pids.
548  * Returns true if @task should *NOT* be traced.
549  * Returns false if @task should be traced.
550  */
551 bool
552 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
553                        struct trace_pid_list *filtered_no_pids,
554                        struct task_struct *task)
555 {
556         /*
557          * If filtered_no_pids is not empty, and the task's pid is listed
558          * in filtered_no_pids, then return true.
559          * Otherwise, if filtered_pids is empty, that means we can
560          * trace all tasks. If it has content, then only trace pids
561          * within filtered_pids.
562          */
563
564         return (filtered_pids &&
565                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
566                 (filtered_no_pids &&
567                  trace_find_filtered_pid(filtered_no_pids, task->pid));
568 }
569
570 /**
571  * trace_filter_add_remove_task - Add or remove a task from a pid_list
572  * @pid_list: The list to modify
573  * @self: The current task for fork or NULL for exit
574  * @task: The task to add or remove
575  *
576  * If adding a task, if @self is defined, the task is only added if @self
577  * is also included in @pid_list. This happens on fork and tasks should
578  * only be added when the parent is listed. If @self is NULL, then the
579  * @task pid will be removed from the list, which would happen on exit
580  * of a task.
581  */
582 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
583                                   struct task_struct *self,
584                                   struct task_struct *task)
585 {
586         if (!pid_list)
587                 return;
588
589         /* For forks, we only add if the forking task is listed */
590         if (self) {
591                 if (!trace_find_filtered_pid(pid_list, self->pid))
592                         return;
593         }
594
595         /* Sorry, but we don't support pid_max changing after setting */
596         if (task->pid >= pid_list->pid_max)
597                 return;
598
599         /* "self" is set for forks, and NULL for exits */
600         if (self)
601                 set_bit(task->pid, pid_list->pids);
602         else
603                 clear_bit(task->pid, pid_list->pids);
604 }
605
606 /**
607  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
608  * @pid_list: The pid list to show
609  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
610  * @pos: The position of the file
611  *
612  * This is used by the seq_file "next" operation to iterate the pids
613  * listed in a trace_pid_list structure.
614  *
615  * Returns the pid+1 as we want to display pid of zero, but NULL would
616  * stop the iteration.
617  */
618 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
619 {
620         unsigned long pid = (unsigned long)v;
621
622         (*pos)++;
623
624         /* pid already is +1 of the actual previous bit */
625         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
626
627         /* Return pid + 1 to allow zero to be represented */
628         if (pid < pid_list->pid_max)
629                 return (void *)(pid + 1);
630
631         return NULL;
632 }
633
634 /**
635  * trace_pid_start - Used for seq_file to start reading pid lists
636  * @pid_list: The pid list to show
637  * @pos: The position of the file
638  *
639  * This is used by seq_file "start" operation to start the iteration
640  * of listing pids.
641  *
642  * Returns the pid+1 as we want to display pid of zero, but NULL would
643  * stop the iteration.
644  */
645 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
646 {
647         unsigned long pid;
648         loff_t l = 0;
649
650         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
651         if (pid >= pid_list->pid_max)
652                 return NULL;
653
654         /* Return pid + 1 so that zero can be the exit value */
655         for (pid++; pid && l < *pos;
656              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
657                 ;
658         return (void *)pid;
659 }
660
661 /**
662  * trace_pid_show - show the current pid in seq_file processing
663  * @m: The seq_file structure to write into
664  * @v: A void pointer of the pid (+1) value to display
665  *
666  * Can be directly used by seq_file operations to display the current
667  * pid value.
668  */
669 int trace_pid_show(struct seq_file *m, void *v)
670 {
671         unsigned long pid = (unsigned long)v - 1;
672
673         seq_printf(m, "%lu\n", pid);
674         return 0;
675 }
676
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE            127
679
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681                     struct trace_pid_list **new_pid_list,
682                     const char __user *ubuf, size_t cnt)
683 {
684         struct trace_pid_list *pid_list;
685         struct trace_parser parser;
686         unsigned long val;
687         int nr_pids = 0;
688         ssize_t read = 0;
689         ssize_t ret = 0;
690         loff_t pos;
691         pid_t pid;
692
693         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
694                 return -ENOMEM;
695
696         /*
697          * Always recreate a new array. The write is an all or nothing
698          * operation. Always create a new array when adding new pids by
699          * the user. If the operation fails, then the current list is
700          * not modified.
701          */
702         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
703         if (!pid_list) {
704                 trace_parser_put(&parser);
705                 return -ENOMEM;
706         }
707
708         pid_list->pid_max = READ_ONCE(pid_max);
709
710         /* Only truncating will shrink pid_max */
711         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
712                 pid_list->pid_max = filtered_pids->pid_max;
713
714         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
715         if (!pid_list->pids) {
716                 trace_parser_put(&parser);
717                 kfree(pid_list);
718                 return -ENOMEM;
719         }
720
721         if (filtered_pids) {
722                 /* copy the current bits to the new max */
723                 for_each_set_bit(pid, filtered_pids->pids,
724                                  filtered_pids->pid_max) {
725                         set_bit(pid, pid_list->pids);
726                         nr_pids++;
727                 }
728         }
729
730         while (cnt > 0) {
731
732                 pos = 0;
733
734                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
735                 if (ret < 0 || !trace_parser_loaded(&parser))
736                         break;
737
738                 read += ret;
739                 ubuf += ret;
740                 cnt -= ret;
741
742                 ret = -EINVAL;
743                 if (kstrtoul(parser.buffer, 0, &val))
744                         break;
745                 if (val >= pid_list->pid_max)
746                         break;
747
748                 pid = (pid_t)val;
749
750                 set_bit(pid, pid_list->pids);
751                 nr_pids++;
752
753                 trace_parser_clear(&parser);
754                 ret = 0;
755         }
756         trace_parser_put(&parser);
757
758         if (ret < 0) {
759                 trace_free_pid_list(pid_list);
760                 return ret;
761         }
762
763         if (!nr_pids) {
764                 /* Cleared the list of pids */
765                 trace_free_pid_list(pid_list);
766                 read = ret;
767                 pid_list = NULL;
768         }
769
770         *new_pid_list = pid_list;
771
772         return read;
773 }
774
775 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
776 {
777         u64 ts;
778
779         /* Early boot up does not have a buffer yet */
780         if (!buf->buffer)
781                 return trace_clock_local();
782
783         ts = ring_buffer_time_stamp(buf->buffer);
784         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
785
786         return ts;
787 }
788
789 u64 ftrace_now(int cpu)
790 {
791         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
792 }
793
794 /**
795  * tracing_is_enabled - Show if global_trace has been enabled
796  *
797  * Shows if the global trace has been enabled or not. It uses the
798  * mirror flag "buffer_disabled" to be used in fast paths such as for
799  * the irqsoff tracer. But it may be inaccurate due to races. If you
800  * need to know the accurate state, use tracing_is_on() which is a little
801  * slower, but accurate.
802  */
803 int tracing_is_enabled(void)
804 {
805         /*
806          * For quick access (irqsoff uses this in fast path), just
807          * return the mirror variable of the state of the ring buffer.
808          * It's a little racy, but we don't really care.
809          */
810         smp_rmb();
811         return !global_trace.buffer_disabled;
812 }
813
814 /*
815  * trace_buf_size is the size in bytes that is allocated
816  * for a buffer. Note, the number of bytes is always rounded
817  * to page size.
818  *
819  * This number is purposely set to a low number of 16384.
820  * If the dump on oops happens, it will be much appreciated
821  * to not have to wait for all that output. Anyway this can be
822  * boot time and run time configurable.
823  */
824 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
825
826 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
827
828 /* trace_types holds a link list of available tracers. */
829 static struct tracer            *trace_types __read_mostly;
830
831 /*
832  * trace_types_lock is used to protect the trace_types list.
833  */
834 DEFINE_MUTEX(trace_types_lock);
835
836 /*
837  * serialize the access of the ring buffer
838  *
839  * ring buffer serializes readers, but it is low level protection.
840  * The validity of the events (which returns by ring_buffer_peek() ..etc)
841  * are not protected by ring buffer.
842  *
843  * The content of events may become garbage if we allow other process consumes
844  * these events concurrently:
845  *   A) the page of the consumed events may become a normal page
846  *      (not reader page) in ring buffer, and this page will be rewritten
847  *      by events producer.
848  *   B) The page of the consumed events may become a page for splice_read,
849  *      and this page will be returned to system.
850  *
851  * These primitives allow multi process access to different cpu ring buffer
852  * concurrently.
853  *
854  * These primitives don't distinguish read-only and read-consume access.
855  * Multi read-only access are also serialized.
856  */
857
858 #ifdef CONFIG_SMP
859 static DECLARE_RWSEM(all_cpu_access_lock);
860 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
861
862 static inline void trace_access_lock(int cpu)
863 {
864         if (cpu == RING_BUFFER_ALL_CPUS) {
865                 /* gain it for accessing the whole ring buffer. */
866                 down_write(&all_cpu_access_lock);
867         } else {
868                 /* gain it for accessing a cpu ring buffer. */
869
870                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
871                 down_read(&all_cpu_access_lock);
872
873                 /* Secondly block other access to this @cpu ring buffer. */
874                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
875         }
876 }
877
878 static inline void trace_access_unlock(int cpu)
879 {
880         if (cpu == RING_BUFFER_ALL_CPUS) {
881                 up_write(&all_cpu_access_lock);
882         } else {
883                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
884                 up_read(&all_cpu_access_lock);
885         }
886 }
887
888 static inline void trace_access_lock_init(void)
889 {
890         int cpu;
891
892         for_each_possible_cpu(cpu)
893                 mutex_init(&per_cpu(cpu_access_lock, cpu));
894 }
895
896 #else
897
898 static DEFINE_MUTEX(access_lock);
899
900 static inline void trace_access_lock(int cpu)
901 {
902         (void)cpu;
903         mutex_lock(&access_lock);
904 }
905
906 static inline void trace_access_unlock(int cpu)
907 {
908         (void)cpu;
909         mutex_unlock(&access_lock);
910 }
911
912 static inline void trace_access_lock_init(void)
913 {
914 }
915
916 #endif
917
918 #ifdef CONFIG_STACKTRACE
919 static void __ftrace_trace_stack(struct trace_buffer *buffer,
920                                  unsigned int trace_ctx,
921                                  int skip, struct pt_regs *regs);
922 static inline void ftrace_trace_stack(struct trace_array *tr,
923                                       struct trace_buffer *buffer,
924                                       unsigned int trace_ctx,
925                                       int skip, struct pt_regs *regs);
926
927 #else
928 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
929                                         unsigned int trace_ctx,
930                                         int skip, struct pt_regs *regs)
931 {
932 }
933 static inline void ftrace_trace_stack(struct trace_array *tr,
934                                       struct trace_buffer *buffer,
935                                       unsigned long trace_ctx,
936                                       int skip, struct pt_regs *regs)
937 {
938 }
939
940 #endif
941
942 static __always_inline void
943 trace_event_setup(struct ring_buffer_event *event,
944                   int type, unsigned int trace_ctx)
945 {
946         struct trace_entry *ent = ring_buffer_event_data(event);
947
948         tracing_generic_entry_update(ent, type, trace_ctx);
949 }
950
951 static __always_inline struct ring_buffer_event *
952 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
953                           int type,
954                           unsigned long len,
955                           unsigned int trace_ctx)
956 {
957         struct ring_buffer_event *event;
958
959         event = ring_buffer_lock_reserve(buffer, len);
960         if (event != NULL)
961                 trace_event_setup(event, type, trace_ctx);
962
963         return event;
964 }
965
966 void tracer_tracing_on(struct trace_array *tr)
967 {
968         if (tr->array_buffer.buffer)
969                 ring_buffer_record_on(tr->array_buffer.buffer);
970         /*
971          * This flag is looked at when buffers haven't been allocated
972          * yet, or by some tracers (like irqsoff), that just want to
973          * know if the ring buffer has been disabled, but it can handle
974          * races of where it gets disabled but we still do a record.
975          * As the check is in the fast path of the tracers, it is more
976          * important to be fast than accurate.
977          */
978         tr->buffer_disabled = 0;
979         /* Make the flag seen by readers */
980         smp_wmb();
981 }
982
983 /**
984  * tracing_on - enable tracing buffers
985  *
986  * This function enables tracing buffers that may have been
987  * disabled with tracing_off.
988  */
989 void tracing_on(void)
990 {
991         tracer_tracing_on(&global_trace);
992 }
993 EXPORT_SYMBOL_GPL(tracing_on);
994
995
996 static __always_inline void
997 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
998 {
999         __this_cpu_write(trace_taskinfo_save, true);
1000
1001         /* If this is the temp buffer, we need to commit fully */
1002         if (this_cpu_read(trace_buffered_event) == event) {
1003                 /* Length is in event->array[0] */
1004                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1005                 /* Release the temp buffer */
1006                 this_cpu_dec(trace_buffered_event_cnt);
1007         } else
1008                 ring_buffer_unlock_commit(buffer, event);
1009 }
1010
1011 /**
1012  * __trace_puts - write a constant string into the trace buffer.
1013  * @ip:    The address of the caller
1014  * @str:   The constant string to write
1015  * @size:  The size of the string.
1016  */
1017 int __trace_puts(unsigned long ip, const char *str, int size)
1018 {
1019         struct ring_buffer_event *event;
1020         struct trace_buffer *buffer;
1021         struct print_entry *entry;
1022         unsigned int trace_ctx;
1023         int alloc;
1024
1025         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1026                 return 0;
1027
1028         if (unlikely(tracing_selftest_running || tracing_disabled))
1029                 return 0;
1030
1031         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1032
1033         trace_ctx = tracing_gen_ctx();
1034         buffer = global_trace.array_buffer.buffer;
1035         ring_buffer_nest_start(buffer);
1036         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1037                                             trace_ctx);
1038         if (!event) {
1039                 size = 0;
1040                 goto out;
1041         }
1042
1043         entry = ring_buffer_event_data(event);
1044         entry->ip = ip;
1045
1046         memcpy(&entry->buf, str, size);
1047
1048         /* Add a newline if necessary */
1049         if (entry->buf[size - 1] != '\n') {
1050                 entry->buf[size] = '\n';
1051                 entry->buf[size + 1] = '\0';
1052         } else
1053                 entry->buf[size] = '\0';
1054
1055         __buffer_unlock_commit(buffer, event);
1056         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1057  out:
1058         ring_buffer_nest_end(buffer);
1059         return size;
1060 }
1061 EXPORT_SYMBOL_GPL(__trace_puts);
1062
1063 /**
1064  * __trace_bputs - write the pointer to a constant string into trace buffer
1065  * @ip:    The address of the caller
1066  * @str:   The constant string to write to the buffer to
1067  */
1068 int __trace_bputs(unsigned long ip, const char *str)
1069 {
1070         struct ring_buffer_event *event;
1071         struct trace_buffer *buffer;
1072         struct bputs_entry *entry;
1073         unsigned int trace_ctx;
1074         int size = sizeof(struct bputs_entry);
1075         int ret = 0;
1076
1077         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1078                 return 0;
1079
1080         if (unlikely(tracing_selftest_running || tracing_disabled))
1081                 return 0;
1082
1083         trace_ctx = tracing_gen_ctx();
1084         buffer = global_trace.array_buffer.buffer;
1085
1086         ring_buffer_nest_start(buffer);
1087         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1088                                             trace_ctx);
1089         if (!event)
1090                 goto out;
1091
1092         entry = ring_buffer_event_data(event);
1093         entry->ip                       = ip;
1094         entry->str                      = str;
1095
1096         __buffer_unlock_commit(buffer, event);
1097         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1098
1099         ret = 1;
1100  out:
1101         ring_buffer_nest_end(buffer);
1102         return ret;
1103 }
1104 EXPORT_SYMBOL_GPL(__trace_bputs);
1105
1106 #ifdef CONFIG_TRACER_SNAPSHOT
1107 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1108                                            void *cond_data)
1109 {
1110         struct tracer *tracer = tr->current_trace;
1111         unsigned long flags;
1112
1113         if (in_nmi()) {
1114                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1115                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1116                 return;
1117         }
1118
1119         if (!tr->allocated_snapshot) {
1120                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1121                 internal_trace_puts("*** stopping trace here!   ***\n");
1122                 tracing_off();
1123                 return;
1124         }
1125
1126         /* Note, snapshot can not be used when the tracer uses it */
1127         if (tracer->use_max_tr) {
1128                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1129                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1130                 return;
1131         }
1132
1133         local_irq_save(flags);
1134         update_max_tr(tr, current, smp_processor_id(), cond_data);
1135         local_irq_restore(flags);
1136 }
1137
1138 void tracing_snapshot_instance(struct trace_array *tr)
1139 {
1140         tracing_snapshot_instance_cond(tr, NULL);
1141 }
1142
1143 /**
1144  * tracing_snapshot - take a snapshot of the current buffer.
1145  *
1146  * This causes a swap between the snapshot buffer and the current live
1147  * tracing buffer. You can use this to take snapshots of the live
1148  * trace when some condition is triggered, but continue to trace.
1149  *
1150  * Note, make sure to allocate the snapshot with either
1151  * a tracing_snapshot_alloc(), or by doing it manually
1152  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1153  *
1154  * If the snapshot buffer is not allocated, it will stop tracing.
1155  * Basically making a permanent snapshot.
1156  */
1157 void tracing_snapshot(void)
1158 {
1159         struct trace_array *tr = &global_trace;
1160
1161         tracing_snapshot_instance(tr);
1162 }
1163 EXPORT_SYMBOL_GPL(tracing_snapshot);
1164
1165 /**
1166  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1167  * @tr:         The tracing instance to snapshot
1168  * @cond_data:  The data to be tested conditionally, and possibly saved
1169  *
1170  * This is the same as tracing_snapshot() except that the snapshot is
1171  * conditional - the snapshot will only happen if the
1172  * cond_snapshot.update() implementation receiving the cond_data
1173  * returns true, which means that the trace array's cond_snapshot
1174  * update() operation used the cond_data to determine whether the
1175  * snapshot should be taken, and if it was, presumably saved it along
1176  * with the snapshot.
1177  */
1178 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1179 {
1180         tracing_snapshot_instance_cond(tr, cond_data);
1181 }
1182 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1183
1184 /**
1185  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1186  * @tr:         The tracing instance
1187  *
1188  * When the user enables a conditional snapshot using
1189  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1190  * with the snapshot.  This accessor is used to retrieve it.
1191  *
1192  * Should not be called from cond_snapshot.update(), since it takes
1193  * the tr->max_lock lock, which the code calling
1194  * cond_snapshot.update() has already done.
1195  *
1196  * Returns the cond_data associated with the trace array's snapshot.
1197  */
1198 void *tracing_cond_snapshot_data(struct trace_array *tr)
1199 {
1200         void *cond_data = NULL;
1201
1202         arch_spin_lock(&tr->max_lock);
1203
1204         if (tr->cond_snapshot)
1205                 cond_data = tr->cond_snapshot->cond_data;
1206
1207         arch_spin_unlock(&tr->max_lock);
1208
1209         return cond_data;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1212
1213 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1214                                         struct array_buffer *size_buf, int cpu_id);
1215 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1216
1217 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1218 {
1219         int ret;
1220
1221         if (!tr->allocated_snapshot) {
1222
1223                 /* allocate spare buffer */
1224                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1225                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1226                 if (ret < 0)
1227                         return ret;
1228
1229                 tr->allocated_snapshot = true;
1230         }
1231
1232         return 0;
1233 }
1234
1235 static void free_snapshot(struct trace_array *tr)
1236 {
1237         /*
1238          * We don't free the ring buffer. instead, resize it because
1239          * The max_tr ring buffer has some state (e.g. ring->clock) and
1240          * we want preserve it.
1241          */
1242         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1243         set_buffer_entries(&tr->max_buffer, 1);
1244         tracing_reset_online_cpus(&tr->max_buffer);
1245         tr->allocated_snapshot = false;
1246 }
1247
1248 /**
1249  * tracing_alloc_snapshot - allocate snapshot buffer.
1250  *
1251  * This only allocates the snapshot buffer if it isn't already
1252  * allocated - it doesn't also take a snapshot.
1253  *
1254  * This is meant to be used in cases where the snapshot buffer needs
1255  * to be set up for events that can't sleep but need to be able to
1256  * trigger a snapshot.
1257  */
1258 int tracing_alloc_snapshot(void)
1259 {
1260         struct trace_array *tr = &global_trace;
1261         int ret;
1262
1263         ret = tracing_alloc_snapshot_instance(tr);
1264         WARN_ON(ret < 0);
1265
1266         return ret;
1267 }
1268 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1269
1270 /**
1271  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1272  *
1273  * This is similar to tracing_snapshot(), but it will allocate the
1274  * snapshot buffer if it isn't already allocated. Use this only
1275  * where it is safe to sleep, as the allocation may sleep.
1276  *
1277  * This causes a swap between the snapshot buffer and the current live
1278  * tracing buffer. You can use this to take snapshots of the live
1279  * trace when some condition is triggered, but continue to trace.
1280  */
1281 void tracing_snapshot_alloc(void)
1282 {
1283         int ret;
1284
1285         ret = tracing_alloc_snapshot();
1286         if (ret < 0)
1287                 return;
1288
1289         tracing_snapshot();
1290 }
1291 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1292
1293 /**
1294  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1295  * @tr:         The tracing instance
1296  * @cond_data:  User data to associate with the snapshot
1297  * @update:     Implementation of the cond_snapshot update function
1298  *
1299  * Check whether the conditional snapshot for the given instance has
1300  * already been enabled, or if the current tracer is already using a
1301  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1302  * save the cond_data and update function inside.
1303  *
1304  * Returns 0 if successful, error otherwise.
1305  */
1306 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1307                                  cond_update_fn_t update)
1308 {
1309         struct cond_snapshot *cond_snapshot;
1310         int ret = 0;
1311
1312         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1313         if (!cond_snapshot)
1314                 return -ENOMEM;
1315
1316         cond_snapshot->cond_data = cond_data;
1317         cond_snapshot->update = update;
1318
1319         mutex_lock(&trace_types_lock);
1320
1321         ret = tracing_alloc_snapshot_instance(tr);
1322         if (ret)
1323                 goto fail_unlock;
1324
1325         if (tr->current_trace->use_max_tr) {
1326                 ret = -EBUSY;
1327                 goto fail_unlock;
1328         }
1329
1330         /*
1331          * The cond_snapshot can only change to NULL without the
1332          * trace_types_lock. We don't care if we race with it going
1333          * to NULL, but we want to make sure that it's not set to
1334          * something other than NULL when we get here, which we can
1335          * do safely with only holding the trace_types_lock and not
1336          * having to take the max_lock.
1337          */
1338         if (tr->cond_snapshot) {
1339                 ret = -EBUSY;
1340                 goto fail_unlock;
1341         }
1342
1343         arch_spin_lock(&tr->max_lock);
1344         tr->cond_snapshot = cond_snapshot;
1345         arch_spin_unlock(&tr->max_lock);
1346
1347         mutex_unlock(&trace_types_lock);
1348
1349         return ret;
1350
1351  fail_unlock:
1352         mutex_unlock(&trace_types_lock);
1353         kfree(cond_snapshot);
1354         return ret;
1355 }
1356 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1357
1358 /**
1359  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1360  * @tr:         The tracing instance
1361  *
1362  * Check whether the conditional snapshot for the given instance is
1363  * enabled; if so, free the cond_snapshot associated with it,
1364  * otherwise return -EINVAL.
1365  *
1366  * Returns 0 if successful, error otherwise.
1367  */
1368 int tracing_snapshot_cond_disable(struct trace_array *tr)
1369 {
1370         int ret = 0;
1371
1372         arch_spin_lock(&tr->max_lock);
1373
1374         if (!tr->cond_snapshot)
1375                 ret = -EINVAL;
1376         else {
1377                 kfree(tr->cond_snapshot);
1378                 tr->cond_snapshot = NULL;
1379         }
1380
1381         arch_spin_unlock(&tr->max_lock);
1382
1383         return ret;
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1386 #else
1387 void tracing_snapshot(void)
1388 {
1389         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1390 }
1391 EXPORT_SYMBOL_GPL(tracing_snapshot);
1392 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1393 {
1394         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397 int tracing_alloc_snapshot(void)
1398 {
1399         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1400         return -ENODEV;
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403 void tracing_snapshot_alloc(void)
1404 {
1405         /* Give warning */
1406         tracing_snapshot();
1407 }
1408 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409 void *tracing_cond_snapshot_data(struct trace_array *tr)
1410 {
1411         return NULL;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1415 {
1416         return -ENODEV;
1417 }
1418 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419 int tracing_snapshot_cond_disable(struct trace_array *tr)
1420 {
1421         return false;
1422 }
1423 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424 #endif /* CONFIG_TRACER_SNAPSHOT */
1425
1426 void tracer_tracing_off(struct trace_array *tr)
1427 {
1428         if (tr->array_buffer.buffer)
1429                 ring_buffer_record_off(tr->array_buffer.buffer);
1430         /*
1431          * This flag is looked at when buffers haven't been allocated
1432          * yet, or by some tracers (like irqsoff), that just want to
1433          * know if the ring buffer has been disabled, but it can handle
1434          * races of where it gets disabled but we still do a record.
1435          * As the check is in the fast path of the tracers, it is more
1436          * important to be fast than accurate.
1437          */
1438         tr->buffer_disabled = 1;
1439         /* Make the flag seen by readers */
1440         smp_wmb();
1441 }
1442
1443 /**
1444  * tracing_off - turn off tracing buffers
1445  *
1446  * This function stops the tracing buffers from recording data.
1447  * It does not disable any overhead the tracers themselves may
1448  * be causing. This function simply causes all recording to
1449  * the ring buffers to fail.
1450  */
1451 void tracing_off(void)
1452 {
1453         tracer_tracing_off(&global_trace);
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_off);
1456
1457 void disable_trace_on_warning(void)
1458 {
1459         if (__disable_trace_on_warning) {
1460                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461                         "Disabling tracing due to warning\n");
1462                 tracing_off();
1463         }
1464 }
1465
1466 /**
1467  * tracer_tracing_is_on - show real state of ring buffer enabled
1468  * @tr : the trace array to know if ring buffer is enabled
1469  *
1470  * Shows real state of the ring buffer if it is enabled or not.
1471  */
1472 bool tracer_tracing_is_on(struct trace_array *tr)
1473 {
1474         if (tr->array_buffer.buffer)
1475                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476         return !tr->buffer_disabled;
1477 }
1478
1479 /**
1480  * tracing_is_on - show state of ring buffers enabled
1481  */
1482 int tracing_is_on(void)
1483 {
1484         return tracer_tracing_is_on(&global_trace);
1485 }
1486 EXPORT_SYMBOL_GPL(tracing_is_on);
1487
1488 static int __init set_buf_size(char *str)
1489 {
1490         unsigned long buf_size;
1491
1492         if (!str)
1493                 return 0;
1494         buf_size = memparse(str, &str);
1495         /* nr_entries can not be zero */
1496         if (buf_size == 0)
1497                 return 0;
1498         trace_buf_size = buf_size;
1499         return 1;
1500 }
1501 __setup("trace_buf_size=", set_buf_size);
1502
1503 static int __init set_tracing_thresh(char *str)
1504 {
1505         unsigned long threshold;
1506         int ret;
1507
1508         if (!str)
1509                 return 0;
1510         ret = kstrtoul(str, 0, &threshold);
1511         if (ret < 0)
1512                 return 0;
1513         tracing_thresh = threshold * 1000;
1514         return 1;
1515 }
1516 __setup("tracing_thresh=", set_tracing_thresh);
1517
1518 unsigned long nsecs_to_usecs(unsigned long nsecs)
1519 {
1520         return nsecs / 1000;
1521 }
1522
1523 /*
1524  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1525  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1526  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1527  * of strings in the order that the evals (enum) were defined.
1528  */
1529 #undef C
1530 #define C(a, b) b
1531
1532 /* These must match the bit positions in trace_iterator_flags */
1533 static const char *trace_options[] = {
1534         TRACE_FLAGS
1535         NULL
1536 };
1537
1538 static struct {
1539         u64 (*func)(void);
1540         const char *name;
1541         int in_ns;              /* is this clock in nanoseconds? */
1542 } trace_clocks[] = {
1543         { trace_clock_local,            "local",        1 },
1544         { trace_clock_global,           "global",       1 },
1545         { trace_clock_counter,          "counter",      0 },
1546         { trace_clock_jiffies,          "uptime",       0 },
1547         { trace_clock,                  "perf",         1 },
1548         { ktime_get_mono_fast_ns,       "mono",         1 },
1549         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1550         { ktime_get_boot_fast_ns,       "boot",         1 },
1551         ARCH_TRACE_CLOCKS
1552 };
1553
1554 bool trace_clock_in_ns(struct trace_array *tr)
1555 {
1556         if (trace_clocks[tr->clock_id].in_ns)
1557                 return true;
1558
1559         return false;
1560 }
1561
1562 /*
1563  * trace_parser_get_init - gets the buffer for trace parser
1564  */
1565 int trace_parser_get_init(struct trace_parser *parser, int size)
1566 {
1567         memset(parser, 0, sizeof(*parser));
1568
1569         parser->buffer = kmalloc(size, GFP_KERNEL);
1570         if (!parser->buffer)
1571                 return 1;
1572
1573         parser->size = size;
1574         return 0;
1575 }
1576
1577 /*
1578  * trace_parser_put - frees the buffer for trace parser
1579  */
1580 void trace_parser_put(struct trace_parser *parser)
1581 {
1582         kfree(parser->buffer);
1583         parser->buffer = NULL;
1584 }
1585
1586 /*
1587  * trace_get_user - reads the user input string separated by  space
1588  * (matched by isspace(ch))
1589  *
1590  * For each string found the 'struct trace_parser' is updated,
1591  * and the function returns.
1592  *
1593  * Returns number of bytes read.
1594  *
1595  * See kernel/trace/trace.h for 'struct trace_parser' details.
1596  */
1597 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1598         size_t cnt, loff_t *ppos)
1599 {
1600         char ch;
1601         size_t read = 0;
1602         ssize_t ret;
1603
1604         if (!*ppos)
1605                 trace_parser_clear(parser);
1606
1607         ret = get_user(ch, ubuf++);
1608         if (ret)
1609                 goto out;
1610
1611         read++;
1612         cnt--;
1613
1614         /*
1615          * The parser is not finished with the last write,
1616          * continue reading the user input without skipping spaces.
1617          */
1618         if (!parser->cont) {
1619                 /* skip white space */
1620                 while (cnt && isspace(ch)) {
1621                         ret = get_user(ch, ubuf++);
1622                         if (ret)
1623                                 goto out;
1624                         read++;
1625                         cnt--;
1626                 }
1627
1628                 parser->idx = 0;
1629
1630                 /* only spaces were written */
1631                 if (isspace(ch) || !ch) {
1632                         *ppos += read;
1633                         ret = read;
1634                         goto out;
1635                 }
1636         }
1637
1638         /* read the non-space input */
1639         while (cnt && !isspace(ch) && ch) {
1640                 if (parser->idx < parser->size - 1)
1641                         parser->buffer[parser->idx++] = ch;
1642                 else {
1643                         ret = -EINVAL;
1644                         goto out;
1645                 }
1646                 ret = get_user(ch, ubuf++);
1647                 if (ret)
1648                         goto out;
1649                 read++;
1650                 cnt--;
1651         }
1652
1653         /* We either got finished input or we have to wait for another call. */
1654         if (isspace(ch) || !ch) {
1655                 parser->buffer[parser->idx] = 0;
1656                 parser->cont = false;
1657         } else if (parser->idx < parser->size - 1) {
1658                 parser->cont = true;
1659                 parser->buffer[parser->idx++] = ch;
1660                 /* Make sure the parsed string always terminates with '\0'. */
1661                 parser->buffer[parser->idx] = 0;
1662         } else {
1663                 ret = -EINVAL;
1664                 goto out;
1665         }
1666
1667         *ppos += read;
1668         ret = read;
1669
1670 out:
1671         return ret;
1672 }
1673
1674 /* TODO add a seq_buf_to_buffer() */
1675 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1676 {
1677         int len;
1678
1679         if (trace_seq_used(s) <= s->seq.readpos)
1680                 return -EBUSY;
1681
1682         len = trace_seq_used(s) - s->seq.readpos;
1683         if (cnt > len)
1684                 cnt = len;
1685         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1686
1687         s->seq.readpos += cnt;
1688         return cnt;
1689 }
1690
1691 unsigned long __read_mostly     tracing_thresh;
1692 static const struct file_operations tracing_max_lat_fops;
1693
1694 #ifdef LATENCY_FS_NOTIFY
1695
1696 static struct workqueue_struct *fsnotify_wq;
1697
1698 static void latency_fsnotify_workfn(struct work_struct *work)
1699 {
1700         struct trace_array *tr = container_of(work, struct trace_array,
1701                                               fsnotify_work);
1702         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1703 }
1704
1705 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1706 {
1707         struct trace_array *tr = container_of(iwork, struct trace_array,
1708                                               fsnotify_irqwork);
1709         queue_work(fsnotify_wq, &tr->fsnotify_work);
1710 }
1711
1712 static void trace_create_maxlat_file(struct trace_array *tr,
1713                                      struct dentry *d_tracer)
1714 {
1715         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1716         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1717         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1718                                               d_tracer, &tr->max_latency,
1719                                               &tracing_max_lat_fops);
1720 }
1721
1722 __init static int latency_fsnotify_init(void)
1723 {
1724         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1725                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1726         if (!fsnotify_wq) {
1727                 pr_err("Unable to allocate tr_max_lat_wq\n");
1728                 return -ENOMEM;
1729         }
1730         return 0;
1731 }
1732
1733 late_initcall_sync(latency_fsnotify_init);
1734
1735 void latency_fsnotify(struct trace_array *tr)
1736 {
1737         if (!fsnotify_wq)
1738                 return;
1739         /*
1740          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1741          * possible that we are called from __schedule() or do_idle(), which
1742          * could cause a deadlock.
1743          */
1744         irq_work_queue(&tr->fsnotify_irqwork);
1745 }
1746
1747 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)  \
1748         || defined(CONFIG_OSNOISE_TRACER)
1749
1750 #define trace_create_maxlat_file(tr, d_tracer)                          \
1751         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1752                           &tr->max_latency, &tracing_max_lat_fops)
1753
1754 #else
1755 #define trace_create_maxlat_file(tr, d_tracer)   do { } while (0)
1756 #endif
1757
1758 #ifdef CONFIG_TRACER_MAX_TRACE
1759 /*
1760  * Copy the new maximum trace into the separate maximum-trace
1761  * structure. (this way the maximum trace is permanently saved,
1762  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1763  */
1764 static void
1765 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1766 {
1767         struct array_buffer *trace_buf = &tr->array_buffer;
1768         struct array_buffer *max_buf = &tr->max_buffer;
1769         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1770         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1771
1772         max_buf->cpu = cpu;
1773         max_buf->time_start = data->preempt_timestamp;
1774
1775         max_data->saved_latency = tr->max_latency;
1776         max_data->critical_start = data->critical_start;
1777         max_data->critical_end = data->critical_end;
1778
1779         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1780         max_data->pid = tsk->pid;
1781         /*
1782          * If tsk == current, then use current_uid(), as that does not use
1783          * RCU. The irq tracer can be called out of RCU scope.
1784          */
1785         if (tsk == current)
1786                 max_data->uid = current_uid();
1787         else
1788                 max_data->uid = task_uid(tsk);
1789
1790         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1791         max_data->policy = tsk->policy;
1792         max_data->rt_priority = tsk->rt_priority;
1793
1794         /* record this tasks comm */
1795         tracing_record_cmdline(tsk);
1796         latency_fsnotify(tr);
1797 }
1798
1799 /**
1800  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1801  * @tr: tracer
1802  * @tsk: the task with the latency
1803  * @cpu: The cpu that initiated the trace.
1804  * @cond_data: User data associated with a conditional snapshot
1805  *
1806  * Flip the buffers between the @tr and the max_tr and record information
1807  * about which task was the cause of this latency.
1808  */
1809 void
1810 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1811               void *cond_data)
1812 {
1813         if (tr->stop_count)
1814                 return;
1815
1816         WARN_ON_ONCE(!irqs_disabled());
1817
1818         if (!tr->allocated_snapshot) {
1819                 /* Only the nop tracer should hit this when disabling */
1820                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1821                 return;
1822         }
1823
1824         arch_spin_lock(&tr->max_lock);
1825
1826         /* Inherit the recordable setting from array_buffer */
1827         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1828                 ring_buffer_record_on(tr->max_buffer.buffer);
1829         else
1830                 ring_buffer_record_off(tr->max_buffer.buffer);
1831
1832 #ifdef CONFIG_TRACER_SNAPSHOT
1833         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1834                 goto out_unlock;
1835 #endif
1836         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1837
1838         __update_max_tr(tr, tsk, cpu);
1839
1840  out_unlock:
1841         arch_spin_unlock(&tr->max_lock);
1842 }
1843
1844 /**
1845  * update_max_tr_single - only copy one trace over, and reset the rest
1846  * @tr: tracer
1847  * @tsk: task with the latency
1848  * @cpu: the cpu of the buffer to copy.
1849  *
1850  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1851  */
1852 void
1853 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1854 {
1855         int ret;
1856
1857         if (tr->stop_count)
1858                 return;
1859
1860         WARN_ON_ONCE(!irqs_disabled());
1861         if (!tr->allocated_snapshot) {
1862                 /* Only the nop tracer should hit this when disabling */
1863                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1864                 return;
1865         }
1866
1867         arch_spin_lock(&tr->max_lock);
1868
1869         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1870
1871         if (ret == -EBUSY) {
1872                 /*
1873                  * We failed to swap the buffer due to a commit taking
1874                  * place on this CPU. We fail to record, but we reset
1875                  * the max trace buffer (no one writes directly to it)
1876                  * and flag that it failed.
1877                  */
1878                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1879                         "Failed to swap buffers due to commit in progress\n");
1880         }
1881
1882         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1883
1884         __update_max_tr(tr, tsk, cpu);
1885         arch_spin_unlock(&tr->max_lock);
1886 }
1887 #endif /* CONFIG_TRACER_MAX_TRACE */
1888
1889 static int wait_on_pipe(struct trace_iterator *iter, int full)
1890 {
1891         /* Iterators are static, they should be filled or empty */
1892         if (trace_buffer_iter(iter, iter->cpu_file))
1893                 return 0;
1894
1895         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1896                                 full);
1897 }
1898
1899 #ifdef CONFIG_FTRACE_STARTUP_TEST
1900 static bool selftests_can_run;
1901
1902 struct trace_selftests {
1903         struct list_head                list;
1904         struct tracer                   *type;
1905 };
1906
1907 static LIST_HEAD(postponed_selftests);
1908
1909 static int save_selftest(struct tracer *type)
1910 {
1911         struct trace_selftests *selftest;
1912
1913         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1914         if (!selftest)
1915                 return -ENOMEM;
1916
1917         selftest->type = type;
1918         list_add(&selftest->list, &postponed_selftests);
1919         return 0;
1920 }
1921
1922 static int run_tracer_selftest(struct tracer *type)
1923 {
1924         struct trace_array *tr = &global_trace;
1925         struct tracer *saved_tracer = tr->current_trace;
1926         int ret;
1927
1928         if (!type->selftest || tracing_selftest_disabled)
1929                 return 0;
1930
1931         /*
1932          * If a tracer registers early in boot up (before scheduling is
1933          * initialized and such), then do not run its selftests yet.
1934          * Instead, run it a little later in the boot process.
1935          */
1936         if (!selftests_can_run)
1937                 return save_selftest(type);
1938
1939         if (!tracing_is_on()) {
1940                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1941                         type->name);
1942                 return 0;
1943         }
1944
1945         /*
1946          * Run a selftest on this tracer.
1947          * Here we reset the trace buffer, and set the current
1948          * tracer to be this tracer. The tracer can then run some
1949          * internal tracing to verify that everything is in order.
1950          * If we fail, we do not register this tracer.
1951          */
1952         tracing_reset_online_cpus(&tr->array_buffer);
1953
1954         tr->current_trace = type;
1955
1956 #ifdef CONFIG_TRACER_MAX_TRACE
1957         if (type->use_max_tr) {
1958                 /* If we expanded the buffers, make sure the max is expanded too */
1959                 if (ring_buffer_expanded)
1960                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1961                                            RING_BUFFER_ALL_CPUS);
1962                 tr->allocated_snapshot = true;
1963         }
1964 #endif
1965
1966         /* the test is responsible for initializing and enabling */
1967         pr_info("Testing tracer %s: ", type->name);
1968         ret = type->selftest(type, tr);
1969         /* the test is responsible for resetting too */
1970         tr->current_trace = saved_tracer;
1971         if (ret) {
1972                 printk(KERN_CONT "FAILED!\n");
1973                 /* Add the warning after printing 'FAILED' */
1974                 WARN_ON(1);
1975                 return -1;
1976         }
1977         /* Only reset on passing, to avoid touching corrupted buffers */
1978         tracing_reset_online_cpus(&tr->array_buffer);
1979
1980 #ifdef CONFIG_TRACER_MAX_TRACE
1981         if (type->use_max_tr) {
1982                 tr->allocated_snapshot = false;
1983
1984                 /* Shrink the max buffer again */
1985                 if (ring_buffer_expanded)
1986                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1987                                            RING_BUFFER_ALL_CPUS);
1988         }
1989 #endif
1990
1991         printk(KERN_CONT "PASSED\n");
1992         return 0;
1993 }
1994
1995 static __init int init_trace_selftests(void)
1996 {
1997         struct trace_selftests *p, *n;
1998         struct tracer *t, **last;
1999         int ret;
2000
2001         selftests_can_run = true;
2002
2003         mutex_lock(&trace_types_lock);
2004
2005         if (list_empty(&postponed_selftests))
2006                 goto out;
2007
2008         pr_info("Running postponed tracer tests:\n");
2009
2010         tracing_selftest_running = true;
2011         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2012                 /* This loop can take minutes when sanitizers are enabled, so
2013                  * lets make sure we allow RCU processing.
2014                  */
2015                 cond_resched();
2016                 ret = run_tracer_selftest(p->type);
2017                 /* If the test fails, then warn and remove from available_tracers */
2018                 if (ret < 0) {
2019                         WARN(1, "tracer: %s failed selftest, disabling\n",
2020                              p->type->name);
2021                         last = &trace_types;
2022                         for (t = trace_types; t; t = t->next) {
2023                                 if (t == p->type) {
2024                                         *last = t->next;
2025                                         break;
2026                                 }
2027                                 last = &t->next;
2028                         }
2029                 }
2030                 list_del(&p->list);
2031                 kfree(p);
2032         }
2033         tracing_selftest_running = false;
2034
2035  out:
2036         mutex_unlock(&trace_types_lock);
2037
2038         return 0;
2039 }
2040 core_initcall(init_trace_selftests);
2041 #else
2042 static inline int run_tracer_selftest(struct tracer *type)
2043 {
2044         return 0;
2045 }
2046 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2047
2048 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2049
2050 static void __init apply_trace_boot_options(void);
2051
2052 /**
2053  * register_tracer - register a tracer with the ftrace system.
2054  * @type: the plugin for the tracer
2055  *
2056  * Register a new plugin tracer.
2057  */
2058 int __init register_tracer(struct tracer *type)
2059 {
2060         struct tracer *t;
2061         int ret = 0;
2062
2063         if (!type->name) {
2064                 pr_info("Tracer must have a name\n");
2065                 return -1;
2066         }
2067
2068         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2069                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2070                 return -1;
2071         }
2072
2073         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2074                 pr_warn("Can not register tracer %s due to lockdown\n",
2075                            type->name);
2076                 return -EPERM;
2077         }
2078
2079         mutex_lock(&trace_types_lock);
2080
2081         tracing_selftest_running = true;
2082
2083         for (t = trace_types; t; t = t->next) {
2084                 if (strcmp(type->name, t->name) == 0) {
2085                         /* already found */
2086                         pr_info("Tracer %s already registered\n",
2087                                 type->name);
2088                         ret = -1;
2089                         goto out;
2090                 }
2091         }
2092
2093         if (!type->set_flag)
2094                 type->set_flag = &dummy_set_flag;
2095         if (!type->flags) {
2096                 /*allocate a dummy tracer_flags*/
2097                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2098                 if (!type->flags) {
2099                         ret = -ENOMEM;
2100                         goto out;
2101                 }
2102                 type->flags->val = 0;
2103                 type->flags->opts = dummy_tracer_opt;
2104         } else
2105                 if (!type->flags->opts)
2106                         type->flags->opts = dummy_tracer_opt;
2107
2108         /* store the tracer for __set_tracer_option */
2109         type->flags->trace = type;
2110
2111         ret = run_tracer_selftest(type);
2112         if (ret < 0)
2113                 goto out;
2114
2115         type->next = trace_types;
2116         trace_types = type;
2117         add_tracer_options(&global_trace, type);
2118
2119  out:
2120         tracing_selftest_running = false;
2121         mutex_unlock(&trace_types_lock);
2122
2123         if (ret || !default_bootup_tracer)
2124                 goto out_unlock;
2125
2126         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2127                 goto out_unlock;
2128
2129         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2130         /* Do we want this tracer to start on bootup? */
2131         tracing_set_tracer(&global_trace, type->name);
2132         default_bootup_tracer = NULL;
2133
2134         apply_trace_boot_options();
2135
2136         /* disable other selftests, since this will break it. */
2137         disable_tracing_selftest("running a tracer");
2138
2139  out_unlock:
2140         return ret;
2141 }
2142
2143 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2144 {
2145         struct trace_buffer *buffer = buf->buffer;
2146
2147         if (!buffer)
2148                 return;
2149
2150         ring_buffer_record_disable(buffer);
2151
2152         /* Make sure all commits have finished */
2153         synchronize_rcu();
2154         ring_buffer_reset_cpu(buffer, cpu);
2155
2156         ring_buffer_record_enable(buffer);
2157 }
2158
2159 void tracing_reset_online_cpus(struct array_buffer *buf)
2160 {
2161         struct trace_buffer *buffer = buf->buffer;
2162
2163         if (!buffer)
2164                 return;
2165
2166         ring_buffer_record_disable(buffer);
2167
2168         /* Make sure all commits have finished */
2169         synchronize_rcu();
2170
2171         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2172
2173         ring_buffer_reset_online_cpus(buffer);
2174
2175         ring_buffer_record_enable(buffer);
2176 }
2177
2178 /* Must have trace_types_lock held */
2179 void tracing_reset_all_online_cpus(void)
2180 {
2181         struct trace_array *tr;
2182
2183         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2184                 if (!tr->clear_trace)
2185                         continue;
2186                 tr->clear_trace = false;
2187                 tracing_reset_online_cpus(&tr->array_buffer);
2188 #ifdef CONFIG_TRACER_MAX_TRACE
2189                 tracing_reset_online_cpus(&tr->max_buffer);
2190 #endif
2191         }
2192 }
2193
2194 /*
2195  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2196  * is the tgid last observed corresponding to pid=i.
2197  */
2198 static int *tgid_map;
2199
2200 /* The maximum valid index into tgid_map. */
2201 static size_t tgid_map_max;
2202
2203 #define SAVED_CMDLINES_DEFAULT 128
2204 #define NO_CMDLINE_MAP UINT_MAX
2205 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2206 struct saved_cmdlines_buffer {
2207         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2208         unsigned *map_cmdline_to_pid;
2209         unsigned cmdline_num;
2210         int cmdline_idx;
2211         char *saved_cmdlines;
2212 };
2213 static struct saved_cmdlines_buffer *savedcmd;
2214
2215 static inline char *get_saved_cmdlines(int idx)
2216 {
2217         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2218 }
2219
2220 static inline void set_cmdline(int idx, const char *cmdline)
2221 {
2222         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2223 }
2224
2225 static int allocate_cmdlines_buffer(unsigned int val,
2226                                     struct saved_cmdlines_buffer *s)
2227 {
2228         s->map_cmdline_to_pid = kmalloc_array(val,
2229                                               sizeof(*s->map_cmdline_to_pid),
2230                                               GFP_KERNEL);
2231         if (!s->map_cmdline_to_pid)
2232                 return -ENOMEM;
2233
2234         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2235         if (!s->saved_cmdlines) {
2236                 kfree(s->map_cmdline_to_pid);
2237                 return -ENOMEM;
2238         }
2239
2240         s->cmdline_idx = 0;
2241         s->cmdline_num = val;
2242         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2243                sizeof(s->map_pid_to_cmdline));
2244         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2245                val * sizeof(*s->map_cmdline_to_pid));
2246
2247         return 0;
2248 }
2249
2250 static int trace_create_savedcmd(void)
2251 {
2252         int ret;
2253
2254         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2255         if (!savedcmd)
2256                 return -ENOMEM;
2257
2258         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2259         if (ret < 0) {
2260                 kfree(savedcmd);
2261                 savedcmd = NULL;
2262                 return -ENOMEM;
2263         }
2264
2265         return 0;
2266 }
2267
2268 int is_tracing_stopped(void)
2269 {
2270         return global_trace.stop_count;
2271 }
2272
2273 /**
2274  * tracing_start - quick start of the tracer
2275  *
2276  * If tracing is enabled but was stopped by tracing_stop,
2277  * this will start the tracer back up.
2278  */
2279 void tracing_start(void)
2280 {
2281         struct trace_buffer *buffer;
2282         unsigned long flags;
2283
2284         if (tracing_disabled)
2285                 return;
2286
2287         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2288         if (--global_trace.stop_count) {
2289                 if (global_trace.stop_count < 0) {
2290                         /* Someone screwed up their debugging */
2291                         WARN_ON_ONCE(1);
2292                         global_trace.stop_count = 0;
2293                 }
2294                 goto out;
2295         }
2296
2297         /* Prevent the buffers from switching */
2298         arch_spin_lock(&global_trace.max_lock);
2299
2300         buffer = global_trace.array_buffer.buffer;
2301         if (buffer)
2302                 ring_buffer_record_enable(buffer);
2303
2304 #ifdef CONFIG_TRACER_MAX_TRACE
2305         buffer = global_trace.max_buffer.buffer;
2306         if (buffer)
2307                 ring_buffer_record_enable(buffer);
2308 #endif
2309
2310         arch_spin_unlock(&global_trace.max_lock);
2311
2312  out:
2313         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2314 }
2315
2316 static void tracing_start_tr(struct trace_array *tr)
2317 {
2318         struct trace_buffer *buffer;
2319         unsigned long flags;
2320
2321         if (tracing_disabled)
2322                 return;
2323
2324         /* If global, we need to also start the max tracer */
2325         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2326                 return tracing_start();
2327
2328         raw_spin_lock_irqsave(&tr->start_lock, flags);
2329
2330         if (--tr->stop_count) {
2331                 if (tr->stop_count < 0) {
2332                         /* Someone screwed up their debugging */
2333                         WARN_ON_ONCE(1);
2334                         tr->stop_count = 0;
2335                 }
2336                 goto out;
2337         }
2338
2339         buffer = tr->array_buffer.buffer;
2340         if (buffer)
2341                 ring_buffer_record_enable(buffer);
2342
2343  out:
2344         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2345 }
2346
2347 /**
2348  * tracing_stop - quick stop of the tracer
2349  *
2350  * Light weight way to stop tracing. Use in conjunction with
2351  * tracing_start.
2352  */
2353 void tracing_stop(void)
2354 {
2355         struct trace_buffer *buffer;
2356         unsigned long flags;
2357
2358         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2359         if (global_trace.stop_count++)
2360                 goto out;
2361
2362         /* Prevent the buffers from switching */
2363         arch_spin_lock(&global_trace.max_lock);
2364
2365         buffer = global_trace.array_buffer.buffer;
2366         if (buffer)
2367                 ring_buffer_record_disable(buffer);
2368
2369 #ifdef CONFIG_TRACER_MAX_TRACE
2370         buffer = global_trace.max_buffer.buffer;
2371         if (buffer)
2372                 ring_buffer_record_disable(buffer);
2373 #endif
2374
2375         arch_spin_unlock(&global_trace.max_lock);
2376
2377  out:
2378         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2379 }
2380
2381 static void tracing_stop_tr(struct trace_array *tr)
2382 {
2383         struct trace_buffer *buffer;
2384         unsigned long flags;
2385
2386         /* If global, we need to also stop the max tracer */
2387         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2388                 return tracing_stop();
2389
2390         raw_spin_lock_irqsave(&tr->start_lock, flags);
2391         if (tr->stop_count++)
2392                 goto out;
2393
2394         buffer = tr->array_buffer.buffer;
2395         if (buffer)
2396                 ring_buffer_record_disable(buffer);
2397
2398  out:
2399         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2400 }
2401
2402 static int trace_save_cmdline(struct task_struct *tsk)
2403 {
2404         unsigned tpid, idx;
2405
2406         /* treat recording of idle task as a success */
2407         if (!tsk->pid)
2408                 return 1;
2409
2410         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2411
2412         /*
2413          * It's not the end of the world if we don't get
2414          * the lock, but we also don't want to spin
2415          * nor do we want to disable interrupts,
2416          * so if we miss here, then better luck next time.
2417          */
2418         if (!arch_spin_trylock(&trace_cmdline_lock))
2419                 return 0;
2420
2421         idx = savedcmd->map_pid_to_cmdline[tpid];
2422         if (idx == NO_CMDLINE_MAP) {
2423                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2424
2425                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2426                 savedcmd->cmdline_idx = idx;
2427         }
2428
2429         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2430         set_cmdline(idx, tsk->comm);
2431
2432         arch_spin_unlock(&trace_cmdline_lock);
2433
2434         return 1;
2435 }
2436
2437 static void __trace_find_cmdline(int pid, char comm[])
2438 {
2439         unsigned map;
2440         int tpid;
2441
2442         if (!pid) {
2443                 strcpy(comm, "<idle>");
2444                 return;
2445         }
2446
2447         if (WARN_ON_ONCE(pid < 0)) {
2448                 strcpy(comm, "<XXX>");
2449                 return;
2450         }
2451
2452         tpid = pid & (PID_MAX_DEFAULT - 1);
2453         map = savedcmd->map_pid_to_cmdline[tpid];
2454         if (map != NO_CMDLINE_MAP) {
2455                 tpid = savedcmd->map_cmdline_to_pid[map];
2456                 if (tpid == pid) {
2457                         strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2458                         return;
2459                 }
2460         }
2461         strcpy(comm, "<...>");
2462 }
2463
2464 void trace_find_cmdline(int pid, char comm[])
2465 {
2466         preempt_disable();
2467         arch_spin_lock(&trace_cmdline_lock);
2468
2469         __trace_find_cmdline(pid, comm);
2470
2471         arch_spin_unlock(&trace_cmdline_lock);
2472         preempt_enable();
2473 }
2474
2475 static int *trace_find_tgid_ptr(int pid)
2476 {
2477         /*
2478          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2479          * if we observe a non-NULL tgid_map then we also observe the correct
2480          * tgid_map_max.
2481          */
2482         int *map = smp_load_acquire(&tgid_map);
2483
2484         if (unlikely(!map || pid > tgid_map_max))
2485                 return NULL;
2486
2487         return &map[pid];
2488 }
2489
2490 int trace_find_tgid(int pid)
2491 {
2492         int *ptr = trace_find_tgid_ptr(pid);
2493
2494         return ptr ? *ptr : 0;
2495 }
2496
2497 static int trace_save_tgid(struct task_struct *tsk)
2498 {
2499         int *ptr;
2500
2501         /* treat recording of idle task as a success */
2502         if (!tsk->pid)
2503                 return 1;
2504
2505         ptr = trace_find_tgid_ptr(tsk->pid);
2506         if (!ptr)
2507                 return 0;
2508
2509         *ptr = tsk->tgid;
2510         return 1;
2511 }
2512
2513 static bool tracing_record_taskinfo_skip(int flags)
2514 {
2515         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2516                 return true;
2517         if (!__this_cpu_read(trace_taskinfo_save))
2518                 return true;
2519         return false;
2520 }
2521
2522 /**
2523  * tracing_record_taskinfo - record the task info of a task
2524  *
2525  * @task:  task to record
2526  * @flags: TRACE_RECORD_CMDLINE for recording comm
2527  *         TRACE_RECORD_TGID for recording tgid
2528  */
2529 void tracing_record_taskinfo(struct task_struct *task, int flags)
2530 {
2531         bool done;
2532
2533         if (tracing_record_taskinfo_skip(flags))
2534                 return;
2535
2536         /*
2537          * Record as much task information as possible. If some fail, continue
2538          * to try to record the others.
2539          */
2540         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2541         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2542
2543         /* If recording any information failed, retry again soon. */
2544         if (!done)
2545                 return;
2546
2547         __this_cpu_write(trace_taskinfo_save, false);
2548 }
2549
2550 /**
2551  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2552  *
2553  * @prev: previous task during sched_switch
2554  * @next: next task during sched_switch
2555  * @flags: TRACE_RECORD_CMDLINE for recording comm
2556  *         TRACE_RECORD_TGID for recording tgid
2557  */
2558 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2559                                           struct task_struct *next, int flags)
2560 {
2561         bool done;
2562
2563         if (tracing_record_taskinfo_skip(flags))
2564                 return;
2565
2566         /*
2567          * Record as much task information as possible. If some fail, continue
2568          * to try to record the others.
2569          */
2570         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2571         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2572         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2573         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2574
2575         /* If recording any information failed, retry again soon. */
2576         if (!done)
2577                 return;
2578
2579         __this_cpu_write(trace_taskinfo_save, false);
2580 }
2581
2582 /* Helpers to record a specific task information */
2583 void tracing_record_cmdline(struct task_struct *task)
2584 {
2585         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2586 }
2587
2588 void tracing_record_tgid(struct task_struct *task)
2589 {
2590         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2591 }
2592
2593 /*
2594  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2595  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2596  * simplifies those functions and keeps them in sync.
2597  */
2598 enum print_line_t trace_handle_return(struct trace_seq *s)
2599 {
2600         return trace_seq_has_overflowed(s) ?
2601                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2602 }
2603 EXPORT_SYMBOL_GPL(trace_handle_return);
2604
2605 static unsigned short migration_disable_value(void)
2606 {
2607 #if defined(CONFIG_SMP)
2608         return current->migration_disabled;
2609 #else
2610         return 0;
2611 #endif
2612 }
2613
2614 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2615 {
2616         unsigned int trace_flags = irqs_status;
2617         unsigned int pc;
2618
2619         pc = preempt_count();
2620
2621         if (pc & NMI_MASK)
2622                 trace_flags |= TRACE_FLAG_NMI;
2623         if (pc & HARDIRQ_MASK)
2624                 trace_flags |= TRACE_FLAG_HARDIRQ;
2625         if (in_serving_softirq())
2626                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2627
2628         if (tif_need_resched())
2629                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2630         if (test_preempt_need_resched())
2631                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2632         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2633                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2634 }
2635
2636 struct ring_buffer_event *
2637 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2638                           int type,
2639                           unsigned long len,
2640                           unsigned int trace_ctx)
2641 {
2642         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2643 }
2644
2645 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2646 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2647 static int trace_buffered_event_ref;
2648
2649 /**
2650  * trace_buffered_event_enable - enable buffering events
2651  *
2652  * When events are being filtered, it is quicker to use a temporary
2653  * buffer to write the event data into if there's a likely chance
2654  * that it will not be committed. The discard of the ring buffer
2655  * is not as fast as committing, and is much slower than copying
2656  * a commit.
2657  *
2658  * When an event is to be filtered, allocate per cpu buffers to
2659  * write the event data into, and if the event is filtered and discarded
2660  * it is simply dropped, otherwise, the entire data is to be committed
2661  * in one shot.
2662  */
2663 void trace_buffered_event_enable(void)
2664 {
2665         struct ring_buffer_event *event;
2666         struct page *page;
2667         int cpu;
2668
2669         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2670
2671         if (trace_buffered_event_ref++)
2672                 return;
2673
2674         for_each_tracing_cpu(cpu) {
2675                 page = alloc_pages_node(cpu_to_node(cpu),
2676                                         GFP_KERNEL | __GFP_NORETRY, 0);
2677                 if (!page)
2678                         goto failed;
2679
2680                 event = page_address(page);
2681                 memset(event, 0, sizeof(*event));
2682
2683                 per_cpu(trace_buffered_event, cpu) = event;
2684
2685                 preempt_disable();
2686                 if (cpu == smp_processor_id() &&
2687                     __this_cpu_read(trace_buffered_event) !=
2688                     per_cpu(trace_buffered_event, cpu))
2689                         WARN_ON_ONCE(1);
2690                 preempt_enable();
2691         }
2692
2693         return;
2694  failed:
2695         trace_buffered_event_disable();
2696 }
2697
2698 static void enable_trace_buffered_event(void *data)
2699 {
2700         /* Probably not needed, but do it anyway */
2701         smp_rmb();
2702         this_cpu_dec(trace_buffered_event_cnt);
2703 }
2704
2705 static void disable_trace_buffered_event(void *data)
2706 {
2707         this_cpu_inc(trace_buffered_event_cnt);
2708 }
2709
2710 /**
2711  * trace_buffered_event_disable - disable buffering events
2712  *
2713  * When a filter is removed, it is faster to not use the buffered
2714  * events, and to commit directly into the ring buffer. Free up
2715  * the temp buffers when there are no more users. This requires
2716  * special synchronization with current events.
2717  */
2718 void trace_buffered_event_disable(void)
2719 {
2720         int cpu;
2721
2722         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2723
2724         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2725                 return;
2726
2727         if (--trace_buffered_event_ref)
2728                 return;
2729
2730         preempt_disable();
2731         /* For each CPU, set the buffer as used. */
2732         smp_call_function_many(tracing_buffer_mask,
2733                                disable_trace_buffered_event, NULL, 1);
2734         preempt_enable();
2735
2736         /* Wait for all current users to finish */
2737         synchronize_rcu();
2738
2739         for_each_tracing_cpu(cpu) {
2740                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2741                 per_cpu(trace_buffered_event, cpu) = NULL;
2742         }
2743         /*
2744          * Make sure trace_buffered_event is NULL before clearing
2745          * trace_buffered_event_cnt.
2746          */
2747         smp_wmb();
2748
2749         preempt_disable();
2750         /* Do the work on each cpu */
2751         smp_call_function_many(tracing_buffer_mask,
2752                                enable_trace_buffered_event, NULL, 1);
2753         preempt_enable();
2754 }
2755
2756 static struct trace_buffer *temp_buffer;
2757
2758 struct ring_buffer_event *
2759 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2760                           struct trace_event_file *trace_file,
2761                           int type, unsigned long len,
2762                           unsigned int trace_ctx)
2763 {
2764         struct ring_buffer_event *entry;
2765         struct trace_array *tr = trace_file->tr;
2766         int val;
2767
2768         *current_rb = tr->array_buffer.buffer;
2769
2770         if (!tr->no_filter_buffering_ref &&
2771             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2772             (entry = this_cpu_read(trace_buffered_event))) {
2773                 /*
2774                  * Filtering is on, so try to use the per cpu buffer first.
2775                  * This buffer will simulate a ring_buffer_event,
2776                  * where the type_len is zero and the array[0] will
2777                  * hold the full length.
2778                  * (see include/linux/ring-buffer.h for details on
2779                  *  how the ring_buffer_event is structured).
2780                  *
2781                  * Using a temp buffer during filtering and copying it
2782                  * on a matched filter is quicker than writing directly
2783                  * into the ring buffer and then discarding it when
2784                  * it doesn't match. That is because the discard
2785                  * requires several atomic operations to get right.
2786                  * Copying on match and doing nothing on a failed match
2787                  * is still quicker than no copy on match, but having
2788                  * to discard out of the ring buffer on a failed match.
2789                  */
2790                 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2791
2792                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2793
2794                 /*
2795                  * Preemption is disabled, but interrupts and NMIs
2796                  * can still come in now. If that happens after
2797                  * the above increment, then it will have to go
2798                  * back to the old method of allocating the event
2799                  * on the ring buffer, and if the filter fails, it
2800                  * will have to call ring_buffer_discard_commit()
2801                  * to remove it.
2802                  *
2803                  * Need to also check the unlikely case that the
2804                  * length is bigger than the temp buffer size.
2805                  * If that happens, then the reserve is pretty much
2806                  * guaranteed to fail, as the ring buffer currently
2807                  * only allows events less than a page. But that may
2808                  * change in the future, so let the ring buffer reserve
2809                  * handle the failure in that case.
2810                  */
2811                 if (val == 1 && likely(len <= max_len)) {
2812                         trace_event_setup(entry, type, trace_ctx);
2813                         entry->array[0] = len;
2814                         return entry;
2815                 }
2816                 this_cpu_dec(trace_buffered_event_cnt);
2817         }
2818
2819         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2820                                             trace_ctx);
2821         /*
2822          * If tracing is off, but we have triggers enabled
2823          * we still need to look at the event data. Use the temp_buffer
2824          * to store the trace event for the trigger to use. It's recursive
2825          * safe and will not be recorded anywhere.
2826          */
2827         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2828                 *current_rb = temp_buffer;
2829                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2830                                                     trace_ctx);
2831         }
2832         return entry;
2833 }
2834 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2835
2836 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2837 static DEFINE_MUTEX(tracepoint_printk_mutex);
2838
2839 static void output_printk(struct trace_event_buffer *fbuffer)
2840 {
2841         struct trace_event_call *event_call;
2842         struct trace_event_file *file;
2843         struct trace_event *event;
2844         unsigned long flags;
2845         struct trace_iterator *iter = tracepoint_print_iter;
2846
2847         /* We should never get here if iter is NULL */
2848         if (WARN_ON_ONCE(!iter))
2849                 return;
2850
2851         event_call = fbuffer->trace_file->event_call;
2852         if (!event_call || !event_call->event.funcs ||
2853             !event_call->event.funcs->trace)
2854                 return;
2855
2856         file = fbuffer->trace_file;
2857         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2858             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2859              !filter_match_preds(file->filter, fbuffer->entry)))
2860                 return;
2861
2862         event = &fbuffer->trace_file->event_call->event;
2863
2864         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2865         trace_seq_init(&iter->seq);
2866         iter->ent = fbuffer->entry;
2867         event_call->event.funcs->trace(iter, 0, event);
2868         trace_seq_putc(&iter->seq, 0);
2869         printk("%s", iter->seq.buffer);
2870
2871         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2872 }
2873
2874 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2875                              void *buffer, size_t *lenp,
2876                              loff_t *ppos)
2877 {
2878         int save_tracepoint_printk;
2879         int ret;
2880
2881         mutex_lock(&tracepoint_printk_mutex);
2882         save_tracepoint_printk = tracepoint_printk;
2883
2884         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2885
2886         /*
2887          * This will force exiting early, as tracepoint_printk
2888          * is always zero when tracepoint_printk_iter is not allocated
2889          */
2890         if (!tracepoint_print_iter)
2891                 tracepoint_printk = 0;
2892
2893         if (save_tracepoint_printk == tracepoint_printk)
2894                 goto out;
2895
2896         if (tracepoint_printk)
2897                 static_key_enable(&tracepoint_printk_key.key);
2898         else
2899                 static_key_disable(&tracepoint_printk_key.key);
2900
2901  out:
2902         mutex_unlock(&tracepoint_printk_mutex);
2903
2904         return ret;
2905 }
2906
2907 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2908 {
2909         enum event_trigger_type tt = ETT_NONE;
2910         struct trace_event_file *file = fbuffer->trace_file;
2911
2912         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2913                         fbuffer->entry, &tt))
2914                 goto discard;
2915
2916         if (static_key_false(&tracepoint_printk_key.key))
2917                 output_printk(fbuffer);
2918
2919         if (static_branch_unlikely(&trace_event_exports_enabled))
2920                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2921
2922         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2923                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2924
2925 discard:
2926         if (tt)
2927                 event_triggers_post_call(file, tt);
2928
2929 }
2930 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2931
2932 /*
2933  * Skip 3:
2934  *
2935  *   trace_buffer_unlock_commit_regs()
2936  *   trace_event_buffer_commit()
2937  *   trace_event_raw_event_xxx()
2938  */
2939 # define STACK_SKIP 3
2940
2941 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2942                                      struct trace_buffer *buffer,
2943                                      struct ring_buffer_event *event,
2944                                      unsigned int trace_ctx,
2945                                      struct pt_regs *regs)
2946 {
2947         __buffer_unlock_commit(buffer, event);
2948
2949         /*
2950          * If regs is not set, then skip the necessary functions.
2951          * Note, we can still get here via blktrace, wakeup tracer
2952          * and mmiotrace, but that's ok if they lose a function or
2953          * two. They are not that meaningful.
2954          */
2955         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2956         ftrace_trace_userstack(tr, buffer, trace_ctx);
2957 }
2958
2959 /*
2960  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2961  */
2962 void
2963 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2964                                    struct ring_buffer_event *event)
2965 {
2966         __buffer_unlock_commit(buffer, event);
2967 }
2968
2969 void
2970 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2971                parent_ip, unsigned int trace_ctx)
2972 {
2973         struct trace_event_call *call = &event_function;
2974         struct trace_buffer *buffer = tr->array_buffer.buffer;
2975         struct ring_buffer_event *event;
2976         struct ftrace_entry *entry;
2977
2978         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2979                                             trace_ctx);
2980         if (!event)
2981                 return;
2982         entry   = ring_buffer_event_data(event);
2983         entry->ip                       = ip;
2984         entry->parent_ip                = parent_ip;
2985
2986         if (!call_filter_check_discard(call, entry, buffer, event)) {
2987                 if (static_branch_unlikely(&trace_function_exports_enabled))
2988                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2989                 __buffer_unlock_commit(buffer, event);
2990         }
2991 }
2992
2993 #ifdef CONFIG_STACKTRACE
2994
2995 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2996 #define FTRACE_KSTACK_NESTING   4
2997
2998 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2999
3000 struct ftrace_stack {
3001         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3002 };
3003
3004
3005 struct ftrace_stacks {
3006         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3007 };
3008
3009 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3010 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3011
3012 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3013                                  unsigned int trace_ctx,
3014                                  int skip, struct pt_regs *regs)
3015 {
3016         struct trace_event_call *call = &event_kernel_stack;
3017         struct ring_buffer_event *event;
3018         unsigned int size, nr_entries;
3019         struct ftrace_stack *fstack;
3020         struct stack_entry *entry;
3021         int stackidx;
3022
3023         /*
3024          * Add one, for this function and the call to save_stack_trace()
3025          * If regs is set, then these functions will not be in the way.
3026          */
3027 #ifndef CONFIG_UNWINDER_ORC
3028         if (!regs)
3029                 skip++;
3030 #endif
3031
3032         preempt_disable_notrace();
3033
3034         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3035
3036         /* This should never happen. If it does, yell once and skip */
3037         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3038                 goto out;
3039
3040         /*
3041          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3042          * interrupt will either see the value pre increment or post
3043          * increment. If the interrupt happens pre increment it will have
3044          * restored the counter when it returns.  We just need a barrier to
3045          * keep gcc from moving things around.
3046          */
3047         barrier();
3048
3049         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3050         size = ARRAY_SIZE(fstack->calls);
3051
3052         if (regs) {
3053                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3054                                                    size, skip);
3055         } else {
3056                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3057         }
3058
3059         size = nr_entries * sizeof(unsigned long);
3060         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3061                                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3062                                     trace_ctx);
3063         if (!event)
3064                 goto out;
3065         entry = ring_buffer_event_data(event);
3066
3067         memcpy(&entry->caller, fstack->calls, size);
3068         entry->size = nr_entries;
3069
3070         if (!call_filter_check_discard(call, entry, buffer, event))
3071                 __buffer_unlock_commit(buffer, event);
3072
3073  out:
3074         /* Again, don't let gcc optimize things here */
3075         barrier();
3076         __this_cpu_dec(ftrace_stack_reserve);
3077         preempt_enable_notrace();
3078
3079 }
3080
3081 static inline void ftrace_trace_stack(struct trace_array *tr,
3082                                       struct trace_buffer *buffer,
3083                                       unsigned int trace_ctx,
3084                                       int skip, struct pt_regs *regs)
3085 {
3086         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3087                 return;
3088
3089         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3090 }
3091
3092 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3093                    int skip)
3094 {
3095         struct trace_buffer *buffer = tr->array_buffer.buffer;
3096
3097         if (rcu_is_watching()) {
3098                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3099                 return;
3100         }
3101
3102         /*
3103          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3104          * but if the above rcu_is_watching() failed, then the NMI
3105          * triggered someplace critical, and rcu_irq_enter() should
3106          * not be called from NMI.
3107          */
3108         if (unlikely(in_nmi()))
3109                 return;
3110
3111         rcu_irq_enter_irqson();
3112         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3113         rcu_irq_exit_irqson();
3114 }
3115
3116 /**
3117  * trace_dump_stack - record a stack back trace in the trace buffer
3118  * @skip: Number of functions to skip (helper handlers)
3119  */
3120 void trace_dump_stack(int skip)
3121 {
3122         if (tracing_disabled || tracing_selftest_running)
3123                 return;
3124
3125 #ifndef CONFIG_UNWINDER_ORC
3126         /* Skip 1 to skip this function. */
3127         skip++;
3128 #endif
3129         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3130                              tracing_gen_ctx(), skip, NULL);
3131 }
3132 EXPORT_SYMBOL_GPL(trace_dump_stack);
3133
3134 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3135 static DEFINE_PER_CPU(int, user_stack_count);
3136
3137 static void
3138 ftrace_trace_userstack(struct trace_array *tr,
3139                        struct trace_buffer *buffer, unsigned int trace_ctx)
3140 {
3141         struct trace_event_call *call = &event_user_stack;
3142         struct ring_buffer_event *event;
3143         struct userstack_entry *entry;
3144
3145         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3146                 return;
3147
3148         /*
3149          * NMIs can not handle page faults, even with fix ups.
3150          * The save user stack can (and often does) fault.
3151          */
3152         if (unlikely(in_nmi()))
3153                 return;
3154
3155         /*
3156          * prevent recursion, since the user stack tracing may
3157          * trigger other kernel events.
3158          */
3159         preempt_disable();
3160         if (__this_cpu_read(user_stack_count))
3161                 goto out;
3162
3163         __this_cpu_inc(user_stack_count);
3164
3165         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3166                                             sizeof(*entry), trace_ctx);
3167         if (!event)
3168                 goto out_drop_count;
3169         entry   = ring_buffer_event_data(event);
3170
3171         entry->tgid             = current->tgid;
3172         memset(&entry->caller, 0, sizeof(entry->caller));
3173
3174         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3175         if (!call_filter_check_discard(call, entry, buffer, event))
3176                 __buffer_unlock_commit(buffer, event);
3177
3178  out_drop_count:
3179         __this_cpu_dec(user_stack_count);
3180  out:
3181         preempt_enable();
3182 }
3183 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3184 static void ftrace_trace_userstack(struct trace_array *tr,
3185                                    struct trace_buffer *buffer,
3186                                    unsigned int trace_ctx)
3187 {
3188 }
3189 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3190
3191 #endif /* CONFIG_STACKTRACE */
3192
3193 static inline void
3194 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3195                           unsigned long long delta)
3196 {
3197         entry->bottom_delta_ts = delta & U32_MAX;
3198         entry->top_delta_ts = (delta >> 32);
3199 }
3200
3201 void trace_last_func_repeats(struct trace_array *tr,
3202                              struct trace_func_repeats *last_info,
3203                              unsigned int trace_ctx)
3204 {
3205         struct trace_buffer *buffer = tr->array_buffer.buffer;
3206         struct func_repeats_entry *entry;
3207         struct ring_buffer_event *event;
3208         u64 delta;
3209
3210         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3211                                             sizeof(*entry), trace_ctx);
3212         if (!event)
3213                 return;
3214
3215         delta = ring_buffer_event_time_stamp(buffer, event) -
3216                 last_info->ts_last_call;
3217
3218         entry = ring_buffer_event_data(event);
3219         entry->ip = last_info->ip;
3220         entry->parent_ip = last_info->parent_ip;
3221         entry->count = last_info->count;
3222         func_repeats_set_delta_ts(entry, delta);
3223
3224         __buffer_unlock_commit(buffer, event);
3225 }
3226
3227 /* created for use with alloc_percpu */
3228 struct trace_buffer_struct {
3229         int nesting;
3230         char buffer[4][TRACE_BUF_SIZE];
3231 };
3232
3233 static struct trace_buffer_struct *trace_percpu_buffer;
3234
3235 /*
3236  * This allows for lockless recording.  If we're nested too deeply, then
3237  * this returns NULL.
3238  */
3239 static char *get_trace_buf(void)
3240 {
3241         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3242
3243         if (!buffer || buffer->nesting >= 4)
3244                 return NULL;
3245
3246         buffer->nesting++;
3247
3248         /* Interrupts must see nesting incremented before we use the buffer */
3249         barrier();
3250         return &buffer->buffer[buffer->nesting - 1][0];
3251 }
3252
3253 static void put_trace_buf(void)
3254 {
3255         /* Don't let the decrement of nesting leak before this */
3256         barrier();
3257         this_cpu_dec(trace_percpu_buffer->nesting);
3258 }
3259
3260 static int alloc_percpu_trace_buffer(void)
3261 {
3262         struct trace_buffer_struct *buffers;
3263
3264         if (trace_percpu_buffer)
3265                 return 0;
3266
3267         buffers = alloc_percpu(struct trace_buffer_struct);
3268         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3269                 return -ENOMEM;
3270
3271         trace_percpu_buffer = buffers;
3272         return 0;
3273 }
3274
3275 static int buffers_allocated;
3276
3277 void trace_printk_init_buffers(void)
3278 {
3279         if (buffers_allocated)
3280                 return;
3281
3282         if (alloc_percpu_trace_buffer())
3283                 return;
3284
3285         /* trace_printk() is for debug use only. Don't use it in production. */
3286
3287         pr_warn("\n");
3288         pr_warn("**********************************************************\n");
3289         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3290         pr_warn("**                                                      **\n");
3291         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3292         pr_warn("**                                                      **\n");
3293         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3294         pr_warn("** unsafe for production use.                           **\n");
3295         pr_warn("**                                                      **\n");
3296         pr_warn("** If you see this message and you are not debugging    **\n");
3297         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3298         pr_warn("**                                                      **\n");
3299         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3300         pr_warn("**********************************************************\n");
3301
3302         /* Expand the buffers to set size */
3303         tracing_update_buffers();
3304
3305         buffers_allocated = 1;
3306
3307         /*
3308          * trace_printk_init_buffers() can be called by modules.
3309          * If that happens, then we need to start cmdline recording
3310          * directly here. If the global_trace.buffer is already
3311          * allocated here, then this was called by module code.
3312          */
3313         if (global_trace.array_buffer.buffer)
3314                 tracing_start_cmdline_record();
3315 }
3316 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3317
3318 void trace_printk_start_comm(void)
3319 {
3320         /* Start tracing comms if trace printk is set */
3321         if (!buffers_allocated)
3322                 return;
3323         tracing_start_cmdline_record();
3324 }
3325
3326 static void trace_printk_start_stop_comm(int enabled)
3327 {
3328         if (!buffers_allocated)
3329                 return;
3330
3331         if (enabled)
3332                 tracing_start_cmdline_record();
3333         else
3334                 tracing_stop_cmdline_record();
3335 }
3336
3337 /**
3338  * trace_vbprintk - write binary msg to tracing buffer
3339  * @ip:    The address of the caller
3340  * @fmt:   The string format to write to the buffer
3341  * @args:  Arguments for @fmt
3342  */
3343 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3344 {
3345         struct trace_event_call *call = &event_bprint;
3346         struct ring_buffer_event *event;
3347         struct trace_buffer *buffer;
3348         struct trace_array *tr = &global_trace;
3349         struct bprint_entry *entry;
3350         unsigned int trace_ctx;
3351         char *tbuffer;
3352         int len = 0, size;
3353
3354         if (unlikely(tracing_selftest_running || tracing_disabled))
3355                 return 0;
3356
3357         /* Don't pollute graph traces with trace_vprintk internals */
3358         pause_graph_tracing();
3359
3360         trace_ctx = tracing_gen_ctx();
3361         preempt_disable_notrace();
3362
3363         tbuffer = get_trace_buf();
3364         if (!tbuffer) {
3365                 len = 0;
3366                 goto out_nobuffer;
3367         }
3368
3369         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3370
3371         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3372                 goto out_put;
3373
3374         size = sizeof(*entry) + sizeof(u32) * len;
3375         buffer = tr->array_buffer.buffer;
3376         ring_buffer_nest_start(buffer);
3377         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3378                                             trace_ctx);
3379         if (!event)
3380                 goto out;
3381         entry = ring_buffer_event_data(event);
3382         entry->ip                       = ip;
3383         entry->fmt                      = fmt;
3384
3385         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3386         if (!call_filter_check_discard(call, entry, buffer, event)) {
3387                 __buffer_unlock_commit(buffer, event);
3388                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3389         }
3390
3391 out:
3392         ring_buffer_nest_end(buffer);
3393 out_put:
3394         put_trace_buf();
3395
3396 out_nobuffer:
3397         preempt_enable_notrace();
3398         unpause_graph_tracing();
3399
3400         return len;
3401 }
3402 EXPORT_SYMBOL_GPL(trace_vbprintk);
3403
3404 __printf(3, 0)
3405 static int
3406 __trace_array_vprintk(struct trace_buffer *buffer,
3407                       unsigned long ip, const char *fmt, va_list args)
3408 {
3409         struct trace_event_call *call = &event_print;
3410         struct ring_buffer_event *event;
3411         int len = 0, size;
3412         struct print_entry *entry;
3413         unsigned int trace_ctx;
3414         char *tbuffer;
3415
3416         if (tracing_disabled || tracing_selftest_running)
3417                 return 0;
3418
3419         /* Don't pollute graph traces with trace_vprintk internals */
3420         pause_graph_tracing();
3421
3422         trace_ctx = tracing_gen_ctx();
3423         preempt_disable_notrace();
3424
3425
3426         tbuffer = get_trace_buf();
3427         if (!tbuffer) {
3428                 len = 0;
3429                 goto out_nobuffer;
3430         }
3431
3432         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3433
3434         size = sizeof(*entry) + len + 1;
3435         ring_buffer_nest_start(buffer);
3436         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3437                                             trace_ctx);
3438         if (!event)
3439                 goto out;
3440         entry = ring_buffer_event_data(event);
3441         entry->ip = ip;
3442
3443         memcpy(&entry->buf, tbuffer, len + 1);
3444         if (!call_filter_check_discard(call, entry, buffer, event)) {
3445                 __buffer_unlock_commit(buffer, event);
3446                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3447         }
3448
3449 out:
3450         ring_buffer_nest_end(buffer);
3451         put_trace_buf();
3452
3453 out_nobuffer:
3454         preempt_enable_notrace();
3455         unpause_graph_tracing();
3456
3457         return len;
3458 }
3459
3460 __printf(3, 0)
3461 int trace_array_vprintk(struct trace_array *tr,
3462                         unsigned long ip, const char *fmt, va_list args)
3463 {
3464         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3465 }
3466
3467 /**
3468  * trace_array_printk - Print a message to a specific instance
3469  * @tr: The instance trace_array descriptor
3470  * @ip: The instruction pointer that this is called from.
3471  * @fmt: The format to print (printf format)
3472  *
3473  * If a subsystem sets up its own instance, they have the right to
3474  * printk strings into their tracing instance buffer using this
3475  * function. Note, this function will not write into the top level
3476  * buffer (use trace_printk() for that), as writing into the top level
3477  * buffer should only have events that can be individually disabled.
3478  * trace_printk() is only used for debugging a kernel, and should not
3479  * be ever incorporated in normal use.
3480  *
3481  * trace_array_printk() can be used, as it will not add noise to the
3482  * top level tracing buffer.
3483  *
3484  * Note, trace_array_init_printk() must be called on @tr before this
3485  * can be used.
3486  */
3487 __printf(3, 0)
3488 int trace_array_printk(struct trace_array *tr,
3489                        unsigned long ip, const char *fmt, ...)
3490 {
3491         int ret;
3492         va_list ap;
3493
3494         if (!tr)
3495                 return -ENOENT;
3496
3497         /* This is only allowed for created instances */
3498         if (tr == &global_trace)
3499                 return 0;
3500
3501         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3502                 return 0;
3503
3504         va_start(ap, fmt);
3505         ret = trace_array_vprintk(tr, ip, fmt, ap);
3506         va_end(ap);
3507         return ret;
3508 }
3509 EXPORT_SYMBOL_GPL(trace_array_printk);
3510
3511 /**
3512  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3513  * @tr: The trace array to initialize the buffers for
3514  *
3515  * As trace_array_printk() only writes into instances, they are OK to
3516  * have in the kernel (unlike trace_printk()). This needs to be called
3517  * before trace_array_printk() can be used on a trace_array.
3518  */
3519 int trace_array_init_printk(struct trace_array *tr)
3520 {
3521         if (!tr)
3522                 return -ENOENT;
3523
3524         /* This is only allowed for created instances */
3525         if (tr == &global_trace)
3526                 return -EINVAL;
3527
3528         return alloc_percpu_trace_buffer();
3529 }
3530 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3531
3532 __printf(3, 4)
3533 int trace_array_printk_buf(struct trace_buffer *buffer,
3534                            unsigned long ip, const char *fmt, ...)
3535 {
3536         int ret;
3537         va_list ap;
3538
3539         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3540                 return 0;
3541
3542         va_start(ap, fmt);
3543         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3544         va_end(ap);
3545         return ret;
3546 }
3547
3548 __printf(2, 0)
3549 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3550 {
3551         return trace_array_vprintk(&global_trace, ip, fmt, args);
3552 }
3553 EXPORT_SYMBOL_GPL(trace_vprintk);
3554
3555 static void trace_iterator_increment(struct trace_iterator *iter)
3556 {
3557         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3558
3559         iter->idx++;
3560         if (buf_iter)
3561                 ring_buffer_iter_advance(buf_iter);
3562 }
3563
3564 static struct trace_entry *
3565 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3566                 unsigned long *lost_events)
3567 {
3568         struct ring_buffer_event *event;
3569         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3570
3571         if (buf_iter) {
3572                 event = ring_buffer_iter_peek(buf_iter, ts);
3573                 if (lost_events)
3574                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3575                                 (unsigned long)-1 : 0;
3576         } else {
3577                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3578                                          lost_events);
3579         }
3580
3581         if (event) {
3582                 iter->ent_size = ring_buffer_event_length(event);
3583                 return ring_buffer_event_data(event);
3584         }
3585         iter->ent_size = 0;
3586         return NULL;
3587 }
3588
3589 static struct trace_entry *
3590 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3591                   unsigned long *missing_events, u64 *ent_ts)
3592 {
3593         struct trace_buffer *buffer = iter->array_buffer->buffer;
3594         struct trace_entry *ent, *next = NULL;
3595         unsigned long lost_events = 0, next_lost = 0;
3596         int cpu_file = iter->cpu_file;
3597         u64 next_ts = 0, ts;
3598         int next_cpu = -1;
3599         int next_size = 0;
3600         int cpu;
3601
3602         /*
3603          * If we are in a per_cpu trace file, don't bother by iterating over
3604          * all cpu and peek directly.
3605          */
3606         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3607                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3608                         return NULL;
3609                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3610                 if (ent_cpu)
3611                         *ent_cpu = cpu_file;
3612
3613                 return ent;
3614         }
3615
3616         for_each_tracing_cpu(cpu) {
3617
3618                 if (ring_buffer_empty_cpu(buffer, cpu))
3619                         continue;
3620
3621                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3622
3623                 /*
3624                  * Pick the entry with the smallest timestamp:
3625                  */
3626                 if (ent && (!next || ts < next_ts)) {
3627                         next = ent;
3628                         next_cpu = cpu;
3629                         next_ts = ts;
3630                         next_lost = lost_events;
3631                         next_size = iter->ent_size;
3632                 }
3633         }
3634
3635         iter->ent_size = next_size;
3636
3637         if (ent_cpu)
3638                 *ent_cpu = next_cpu;
3639
3640         if (ent_ts)
3641                 *ent_ts = next_ts;
3642
3643         if (missing_events)
3644                 *missing_events = next_lost;
3645
3646         return next;
3647 }
3648
3649 #define STATIC_FMT_BUF_SIZE     128
3650 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3651
3652 static char *trace_iter_expand_format(struct trace_iterator *iter)
3653 {
3654         char *tmp;
3655
3656         /*
3657          * iter->tr is NULL when used with tp_printk, which makes
3658          * this get called where it is not safe to call krealloc().
3659          */
3660         if (!iter->tr || iter->fmt == static_fmt_buf)
3661                 return NULL;
3662
3663         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3664                        GFP_KERNEL);
3665         if (tmp) {
3666                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3667                 iter->fmt = tmp;
3668         }
3669
3670         return tmp;
3671 }
3672
3673 /* Returns true if the string is safe to dereference from an event */
3674 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3675 {
3676         unsigned long addr = (unsigned long)str;
3677         struct trace_event *trace_event;
3678         struct trace_event_call *event;
3679
3680         /* OK if part of the event data */
3681         if ((addr >= (unsigned long)iter->ent) &&
3682             (addr < (unsigned long)iter->ent + iter->ent_size))
3683                 return true;
3684
3685         /* OK if part of the temp seq buffer */
3686         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3687             (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3688                 return true;
3689
3690         /* Core rodata can not be freed */
3691         if (is_kernel_rodata(addr))
3692                 return true;
3693
3694         if (trace_is_tracepoint_string(str))
3695                 return true;
3696
3697         /*
3698          * Now this could be a module event, referencing core module
3699          * data, which is OK.
3700          */
3701         if (!iter->ent)
3702                 return false;
3703
3704         trace_event = ftrace_find_event(iter->ent->type);
3705         if (!trace_event)
3706                 return false;
3707
3708         event = container_of(trace_event, struct trace_event_call, event);
3709         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3710                 return false;
3711
3712         /* Would rather have rodata, but this will suffice */
3713         if (within_module_core(addr, event->module))
3714                 return true;
3715
3716         return false;
3717 }
3718
3719 static const char *show_buffer(struct trace_seq *s)
3720 {
3721         struct seq_buf *seq = &s->seq;
3722
3723         seq_buf_terminate(seq);
3724
3725         return seq->buffer;
3726 }
3727
3728 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3729
3730 static int test_can_verify_check(const char *fmt, ...)
3731 {
3732         char buf[16];
3733         va_list ap;
3734         int ret;
3735
3736         /*
3737          * The verifier is dependent on vsnprintf() modifies the va_list
3738          * passed to it, where it is sent as a reference. Some architectures
3739          * (like x86_32) passes it by value, which means that vsnprintf()
3740          * does not modify the va_list passed to it, and the verifier
3741          * would then need to be able to understand all the values that
3742          * vsnprintf can use. If it is passed by value, then the verifier
3743          * is disabled.
3744          */
3745         va_start(ap, fmt);
3746         vsnprintf(buf, 16, "%d", ap);
3747         ret = va_arg(ap, int);
3748         va_end(ap);
3749
3750         return ret;
3751 }
3752
3753 static void test_can_verify(void)
3754 {
3755         if (!test_can_verify_check("%d %d", 0, 1)) {
3756                 pr_info("trace event string verifier disabled\n");
3757                 static_branch_inc(&trace_no_verify);
3758         }
3759 }
3760
3761 /**
3762  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3763  * @iter: The iterator that holds the seq buffer and the event being printed
3764  * @fmt: The format used to print the event
3765  * @ap: The va_list holding the data to print from @fmt.
3766  *
3767  * This writes the data into the @iter->seq buffer using the data from
3768  * @fmt and @ap. If the format has a %s, then the source of the string
3769  * is examined to make sure it is safe to print, otherwise it will
3770  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3771  * pointer.
3772  */
3773 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3774                          va_list ap)
3775 {
3776         const char *p = fmt;
3777         const char *str;
3778         int i, j;
3779
3780         if (WARN_ON_ONCE(!fmt))
3781                 return;
3782
3783         if (static_branch_unlikely(&trace_no_verify))
3784                 goto print;
3785
3786         /* Don't bother checking when doing a ftrace_dump() */
3787         if (iter->fmt == static_fmt_buf)
3788                 goto print;
3789
3790         while (*p) {
3791                 bool star = false;
3792                 int len = 0;
3793
3794                 j = 0;
3795
3796                 /* We only care about %s and variants */
3797                 for (i = 0; p[i]; i++) {
3798                         if (i + 1 >= iter->fmt_size) {
3799                                 /*
3800                                  * If we can't expand the copy buffer,
3801                                  * just print it.
3802                                  */
3803                                 if (!trace_iter_expand_format(iter))
3804                                         goto print;
3805                         }
3806
3807                         if (p[i] == '\\' && p[i+1]) {
3808                                 i++;
3809                                 continue;
3810                         }
3811                         if (p[i] == '%') {
3812                                 /* Need to test cases like %08.*s */
3813                                 for (j = 1; p[i+j]; j++) {
3814                                         if (isdigit(p[i+j]) ||
3815                                             p[i+j] == '.')
3816                                                 continue;
3817                                         if (p[i+j] == '*') {
3818                                                 star = true;
3819                                                 continue;
3820                                         }
3821                                         break;
3822                                 }
3823                                 if (p[i+j] == 's')
3824                                         break;
3825                                 star = false;
3826                         }
3827                         j = 0;
3828                 }
3829                 /* If no %s found then just print normally */
3830                 if (!p[i])
3831                         break;
3832
3833                 /* Copy up to the %s, and print that */
3834                 strncpy(iter->fmt, p, i);
3835                 iter->fmt[i] = '\0';
3836                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3837
3838                 if (star)
3839                         len = va_arg(ap, int);
3840
3841                 /* The ap now points to the string data of the %s */
3842                 str = va_arg(ap, const char *);
3843
3844                 /*
3845                  * If you hit this warning, it is likely that the
3846                  * trace event in question used %s on a string that
3847                  * was saved at the time of the event, but may not be
3848                  * around when the trace is read. Use __string(),
3849                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3850                  * instead. See samples/trace_events/trace-events-sample.h
3851                  * for reference.
3852                  */
3853                 if (WARN_ONCE(!trace_safe_str(iter, str),
3854                               "fmt: '%s' current_buffer: '%s'",
3855                               fmt, show_buffer(&iter->seq))) {
3856                         int ret;
3857
3858                         /* Try to safely read the string */
3859                         if (star) {
3860                                 if (len + 1 > iter->fmt_size)
3861                                         len = iter->fmt_size - 1;
3862                                 if (len < 0)
3863                                         len = 0;
3864                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3865                                 iter->fmt[len] = 0;
3866                                 star = false;
3867                         } else {
3868                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3869                                                                   iter->fmt_size);
3870                         }
3871                         if (ret < 0)
3872                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3873                         else
3874                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3875                                                  str, iter->fmt);
3876                         str = "[UNSAFE-MEMORY]";
3877                         strcpy(iter->fmt, "%s");
3878                 } else {
3879                         strncpy(iter->fmt, p + i, j + 1);
3880                         iter->fmt[j+1] = '\0';
3881                 }
3882                 if (star)
3883                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3884                 else
3885                         trace_seq_printf(&iter->seq, iter->fmt, str);
3886
3887                 p += i + j + 1;
3888         }
3889  print:
3890         if (*p)
3891                 trace_seq_vprintf(&iter->seq, p, ap);
3892 }
3893
3894 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3895 {
3896         const char *p, *new_fmt;
3897         char *q;
3898
3899         if (WARN_ON_ONCE(!fmt))
3900                 return fmt;
3901
3902         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3903                 return fmt;
3904
3905         p = fmt;
3906         new_fmt = q = iter->fmt;
3907         while (*p) {
3908                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3909                         if (!trace_iter_expand_format(iter))
3910                                 return fmt;
3911
3912                         q += iter->fmt - new_fmt;
3913                         new_fmt = iter->fmt;
3914                 }
3915
3916                 *q++ = *p++;
3917
3918                 /* Replace %p with %px */
3919                 if (p[-1] == '%') {
3920                         if (p[0] == '%') {
3921                                 *q++ = *p++;
3922                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3923                                 *q++ = *p++;
3924                                 *q++ = 'x';
3925                         }
3926                 }
3927         }
3928         *q = '\0';
3929
3930         return new_fmt;
3931 }
3932
3933 #define STATIC_TEMP_BUF_SIZE    128
3934 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3935
3936 /* Find the next real entry, without updating the iterator itself */
3937 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3938                                           int *ent_cpu, u64 *ent_ts)
3939 {
3940         /* __find_next_entry will reset ent_size */
3941         int ent_size = iter->ent_size;
3942         struct trace_entry *entry;
3943
3944         /*
3945          * If called from ftrace_dump(), then the iter->temp buffer
3946          * will be the static_temp_buf and not created from kmalloc.
3947          * If the entry size is greater than the buffer, we can
3948          * not save it. Just return NULL in that case. This is only
3949          * used to add markers when two consecutive events' time
3950          * stamps have a large delta. See trace_print_lat_context()
3951          */
3952         if (iter->temp == static_temp_buf &&
3953             STATIC_TEMP_BUF_SIZE < ent_size)
3954                 return NULL;
3955
3956         /*
3957          * The __find_next_entry() may call peek_next_entry(), which may
3958          * call ring_buffer_peek() that may make the contents of iter->ent
3959          * undefined. Need to copy iter->ent now.
3960          */
3961         if (iter->ent && iter->ent != iter->temp) {
3962                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3963                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3964                         void *temp;
3965                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3966                         if (!temp)
3967                                 return NULL;
3968                         kfree(iter->temp);
3969                         iter->temp = temp;
3970                         iter->temp_size = iter->ent_size;
3971                 }
3972                 memcpy(iter->temp, iter->ent, iter->ent_size);
3973                 iter->ent = iter->temp;
3974         }
3975         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3976         /* Put back the original ent_size */
3977         iter->ent_size = ent_size;
3978
3979         return entry;
3980 }
3981
3982 /* Find the next real entry, and increment the iterator to the next entry */
3983 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3984 {
3985         iter->ent = __find_next_entry(iter, &iter->cpu,
3986                                       &iter->lost_events, &iter->ts);
3987
3988         if (iter->ent)
3989                 trace_iterator_increment(iter);
3990
3991         return iter->ent ? iter : NULL;
3992 }
3993
3994 static void trace_consume(struct trace_iterator *iter)
3995 {
3996         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3997                             &iter->lost_events);
3998 }
3999
4000 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4001 {
4002         struct trace_iterator *iter = m->private;
4003         int i = (int)*pos;
4004         void *ent;
4005
4006         WARN_ON_ONCE(iter->leftover);
4007
4008         (*pos)++;
4009
4010         /* can't go backwards */
4011         if (iter->idx > i)
4012                 return NULL;
4013
4014         if (iter->idx < 0)
4015                 ent = trace_find_next_entry_inc(iter);
4016         else
4017                 ent = iter;
4018
4019         while (ent && iter->idx < i)
4020                 ent = trace_find_next_entry_inc(iter);
4021
4022         iter->pos = *pos;
4023
4024         return ent;
4025 }
4026
4027 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4028 {
4029         struct ring_buffer_iter *buf_iter;
4030         unsigned long entries = 0;
4031         u64 ts;
4032
4033         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4034
4035         buf_iter = trace_buffer_iter(iter, cpu);
4036         if (!buf_iter)
4037                 return;
4038
4039         ring_buffer_iter_reset(buf_iter);
4040
4041         /*
4042          * We could have the case with the max latency tracers
4043          * that a reset never took place on a cpu. This is evident
4044          * by the timestamp being before the start of the buffer.
4045          */
4046         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4047                 if (ts >= iter->array_buffer->time_start)
4048                         break;
4049                 entries++;
4050                 ring_buffer_iter_advance(buf_iter);
4051         }
4052
4053         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4054 }
4055
4056 /*
4057  * The current tracer is copied to avoid a global locking
4058  * all around.
4059  */
4060 static void *s_start(struct seq_file *m, loff_t *pos)
4061 {
4062         struct trace_iterator *iter = m->private;
4063         struct trace_array *tr = iter->tr;
4064         int cpu_file = iter->cpu_file;
4065         void *p = NULL;
4066         loff_t l = 0;
4067         int cpu;
4068
4069         /*
4070          * copy the tracer to avoid using a global lock all around.
4071          * iter->trace is a copy of current_trace, the pointer to the
4072          * name may be used instead of a strcmp(), as iter->trace->name
4073          * will point to the same string as current_trace->name.
4074          */
4075         mutex_lock(&trace_types_lock);
4076         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4077                 *iter->trace = *tr->current_trace;
4078         mutex_unlock(&trace_types_lock);
4079
4080 #ifdef CONFIG_TRACER_MAX_TRACE
4081         if (iter->snapshot && iter->trace->use_max_tr)
4082                 return ERR_PTR(-EBUSY);
4083 #endif
4084
4085         if (*pos != iter->pos) {
4086                 iter->ent = NULL;
4087                 iter->cpu = 0;
4088                 iter->idx = -1;
4089
4090                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4091                         for_each_tracing_cpu(cpu)
4092                                 tracing_iter_reset(iter, cpu);
4093                 } else
4094                         tracing_iter_reset(iter, cpu_file);
4095
4096                 iter->leftover = 0;
4097                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4098                         ;
4099
4100         } else {
4101                 /*
4102                  * If we overflowed the seq_file before, then we want
4103                  * to just reuse the trace_seq buffer again.
4104                  */
4105                 if (iter->leftover)
4106                         p = iter;
4107                 else {
4108                         l = *pos - 1;
4109                         p = s_next(m, p, &l);
4110                 }
4111         }
4112
4113         trace_event_read_lock();
4114         trace_access_lock(cpu_file);
4115         return p;
4116 }
4117
4118 static void s_stop(struct seq_file *m, void *p)
4119 {
4120         struct trace_iterator *iter = m->private;
4121
4122 #ifdef CONFIG_TRACER_MAX_TRACE
4123         if (iter->snapshot && iter->trace->use_max_tr)
4124                 return;
4125 #endif
4126
4127         trace_access_unlock(iter->cpu_file);
4128         trace_event_read_unlock();
4129 }
4130
4131 static void
4132 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4133                       unsigned long *entries, int cpu)
4134 {
4135         unsigned long count;
4136
4137         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4138         /*
4139          * If this buffer has skipped entries, then we hold all
4140          * entries for the trace and we need to ignore the
4141          * ones before the time stamp.
4142          */
4143         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4144                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4145                 /* total is the same as the entries */
4146                 *total = count;
4147         } else
4148                 *total = count +
4149                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4150         *entries = count;
4151 }
4152
4153 static void
4154 get_total_entries(struct array_buffer *buf,
4155                   unsigned long *total, unsigned long *entries)
4156 {
4157         unsigned long t, e;
4158         int cpu;
4159
4160         *total = 0;
4161         *entries = 0;
4162
4163         for_each_tracing_cpu(cpu) {
4164                 get_total_entries_cpu(buf, &t, &e, cpu);
4165                 *total += t;
4166                 *entries += e;
4167         }
4168 }
4169
4170 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4171 {
4172         unsigned long total, entries;
4173
4174         if (!tr)
4175                 tr = &global_trace;
4176
4177         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4178
4179         return entries;
4180 }
4181
4182 unsigned long trace_total_entries(struct trace_array *tr)
4183 {
4184         unsigned long total, entries;
4185
4186         if (!tr)
4187                 tr = &global_trace;
4188
4189         get_total_entries(&tr->array_buffer, &total, &entries);
4190
4191         return entries;
4192 }
4193
4194 static void print_lat_help_header(struct seq_file *m)
4195 {
4196         seq_puts(m, "#                    _------=> CPU#            \n"
4197                     "#                   / _-----=> irqs-off        \n"
4198                     "#                  | / _----=> need-resched    \n"
4199                     "#                  || / _---=> hardirq/softirq \n"
4200                     "#                  ||| / _--=> preempt-depth   \n"
4201                     "#                  |||| / _-=> migrate-disable \n"
4202                     "#                  ||||| /     delay           \n"
4203                     "#  cmd     pid     |||||| time  |   caller     \n"
4204                     "#     \\   /        ||||||  \\    |    /       \n");
4205 }
4206
4207 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4208 {
4209         unsigned long total;
4210         unsigned long entries;
4211
4212         get_total_entries(buf, &total, &entries);
4213         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4214                    entries, total, num_online_cpus());
4215         seq_puts(m, "#\n");
4216 }
4217
4218 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4219                                    unsigned int flags)
4220 {
4221         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4222
4223         print_event_info(buf, m);
4224
4225         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4226         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4227 }
4228
4229 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4230                                        unsigned int flags)
4231 {
4232         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4233         const char *space = "            ";
4234         int prec = tgid ? 12 : 2;
4235
4236         print_event_info(buf, m);
4237
4238         seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4239         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4240         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4241         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4242         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4243         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4244         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4245         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4246 }
4247
4248 void
4249 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4250 {
4251         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4252         struct array_buffer *buf = iter->array_buffer;
4253         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4254         struct tracer *type = iter->trace;
4255         unsigned long entries;
4256         unsigned long total;
4257         const char *name = "preemption";
4258
4259         name = type->name;
4260
4261         get_total_entries(buf, &total, &entries);
4262
4263         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4264                    name, UTS_RELEASE);
4265         seq_puts(m, "# -----------------------------------"
4266                  "---------------------------------\n");
4267         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4268                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4269                    nsecs_to_usecs(data->saved_latency),
4270                    entries,
4271                    total,
4272                    buf->cpu,
4273 #if defined(CONFIG_PREEMPT_NONE)
4274                    "server",
4275 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4276                    "desktop",
4277 #elif defined(CONFIG_PREEMPT)
4278                    "preempt",
4279 #elif defined(CONFIG_PREEMPT_RT)
4280                    "preempt_rt",
4281 #else
4282                    "unknown",
4283 #endif
4284                    /* These are reserved for later use */
4285                    0, 0, 0, 0);
4286 #ifdef CONFIG_SMP
4287         seq_printf(m, " #P:%d)\n", num_online_cpus());
4288 #else
4289         seq_puts(m, ")\n");
4290 #endif
4291         seq_puts(m, "#    -----------------\n");
4292         seq_printf(m, "#    | task: %.16s-%d "
4293                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4294                    data->comm, data->pid,
4295                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4296                    data->policy, data->rt_priority);
4297         seq_puts(m, "#    -----------------\n");
4298
4299         if (data->critical_start) {
4300                 seq_puts(m, "#  => started at: ");
4301                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4302                 trace_print_seq(m, &iter->seq);
4303                 seq_puts(m, "\n#  => ended at:   ");
4304                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4305                 trace_print_seq(m, &iter->seq);
4306                 seq_puts(m, "\n#\n");
4307         }
4308
4309         seq_puts(m, "#\n");
4310 }
4311
4312 static void test_cpu_buff_start(struct trace_iterator *iter)
4313 {
4314         struct trace_seq *s = &iter->seq;
4315         struct trace_array *tr = iter->tr;
4316
4317         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4318                 return;
4319
4320         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4321                 return;
4322
4323         if (cpumask_available(iter->started) &&
4324             cpumask_test_cpu(iter->cpu, iter->started))
4325                 return;
4326
4327         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4328                 return;
4329
4330         if (cpumask_available(iter->started))
4331                 cpumask_set_cpu(iter->cpu, iter->started);
4332
4333         /* Don't print started cpu buffer for the first entry of the trace */
4334         if (iter->idx > 1)
4335                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4336                                 iter->cpu);
4337 }
4338
4339 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4340 {
4341         struct trace_array *tr = iter->tr;
4342         struct trace_seq *s = &iter->seq;
4343         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4344         struct trace_entry *entry;
4345         struct trace_event *event;
4346
4347         entry = iter->ent;
4348
4349         test_cpu_buff_start(iter);
4350
4351         event = ftrace_find_event(entry->type);
4352
4353         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4354                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4355                         trace_print_lat_context(iter);
4356                 else
4357                         trace_print_context(iter);
4358         }
4359
4360         if (trace_seq_has_overflowed(s))
4361                 return TRACE_TYPE_PARTIAL_LINE;
4362
4363         if (event)
4364                 return event->funcs->trace(iter, sym_flags, event);
4365
4366         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4367
4368         return trace_handle_return(s);
4369 }
4370
4371 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4372 {
4373         struct trace_array *tr = iter->tr;
4374         struct trace_seq *s = &iter->seq;
4375         struct trace_entry *entry;
4376         struct trace_event *event;
4377
4378         entry = iter->ent;
4379
4380         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4381                 trace_seq_printf(s, "%d %d %llu ",
4382                                  entry->pid, iter->cpu, iter->ts);
4383
4384         if (trace_seq_has_overflowed(s))
4385                 return TRACE_TYPE_PARTIAL_LINE;
4386
4387         event = ftrace_find_event(entry->type);
4388         if (event)
4389                 return event->funcs->raw(iter, 0, event);
4390
4391         trace_seq_printf(s, "%d ?\n", entry->type);
4392
4393         return trace_handle_return(s);
4394 }
4395
4396 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4397 {
4398         struct trace_array *tr = iter->tr;
4399         struct trace_seq *s = &iter->seq;
4400         unsigned char newline = '\n';
4401         struct trace_entry *entry;
4402         struct trace_event *event;
4403
4404         entry = iter->ent;
4405
4406         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4407                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4408                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4409                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4410                 if (trace_seq_has_overflowed(s))
4411                         return TRACE_TYPE_PARTIAL_LINE;
4412         }
4413
4414         event = ftrace_find_event(entry->type);
4415         if (event) {
4416                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4417                 if (ret != TRACE_TYPE_HANDLED)
4418                         return ret;
4419         }
4420
4421         SEQ_PUT_FIELD(s, newline);
4422
4423         return trace_handle_return(s);
4424 }
4425
4426 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4427 {
4428         struct trace_array *tr = iter->tr;
4429         struct trace_seq *s = &iter->seq;
4430         struct trace_entry *entry;
4431         struct trace_event *event;
4432
4433         entry = iter->ent;
4434
4435         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4436                 SEQ_PUT_FIELD(s, entry->pid);
4437                 SEQ_PUT_FIELD(s, iter->cpu);
4438                 SEQ_PUT_FIELD(s, iter->ts);
4439                 if (trace_seq_has_overflowed(s))
4440                         return TRACE_TYPE_PARTIAL_LINE;
4441         }
4442
4443         event = ftrace_find_event(entry->type);
4444         return event ? event->funcs->binary(iter, 0, event) :
4445                 TRACE_TYPE_HANDLED;
4446 }
4447
4448 int trace_empty(struct trace_iterator *iter)
4449 {
4450         struct ring_buffer_iter *buf_iter;
4451         int cpu;
4452
4453         /* If we are looking at one CPU buffer, only check that one */
4454         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4455                 cpu = iter->cpu_file;
4456                 buf_iter = trace_buffer_iter(iter, cpu);
4457                 if (buf_iter) {
4458                         if (!ring_buffer_iter_empty(buf_iter))
4459                                 return 0;
4460                 } else {
4461                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4462                                 return 0;
4463                 }
4464                 return 1;
4465         }
4466
4467         for_each_tracing_cpu(cpu) {
4468                 buf_iter = trace_buffer_iter(iter, cpu);
4469                 if (buf_iter) {
4470                         if (!ring_buffer_iter_empty(buf_iter))
4471                                 return 0;
4472                 } else {
4473                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4474                                 return 0;
4475                 }
4476         }
4477
4478         return 1;
4479 }
4480
4481 /*  Called with trace_event_read_lock() held. */
4482 enum print_line_t print_trace_line(struct trace_iterator *iter)
4483 {
4484         struct trace_array *tr = iter->tr;
4485         unsigned long trace_flags = tr->trace_flags;
4486         enum print_line_t ret;
4487
4488         if (iter->lost_events) {
4489                 if (iter->lost_events == (unsigned long)-1)
4490                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4491                                          iter->cpu);
4492                 else
4493                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4494                                          iter->cpu, iter->lost_events);
4495                 if (trace_seq_has_overflowed(&iter->seq))
4496                         return TRACE_TYPE_PARTIAL_LINE;
4497         }
4498
4499         if (iter->trace && iter->trace->print_line) {
4500                 ret = iter->trace->print_line(iter);
4501                 if (ret != TRACE_TYPE_UNHANDLED)
4502                         return ret;
4503         }
4504
4505         if (iter->ent->type == TRACE_BPUTS &&
4506                         trace_flags & TRACE_ITER_PRINTK &&
4507                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4508                 return trace_print_bputs_msg_only(iter);
4509
4510         if (iter->ent->type == TRACE_BPRINT &&
4511                         trace_flags & TRACE_ITER_PRINTK &&
4512                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4513                 return trace_print_bprintk_msg_only(iter);
4514
4515         if (iter->ent->type == TRACE_PRINT &&
4516                         trace_flags & TRACE_ITER_PRINTK &&
4517                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4518                 return trace_print_printk_msg_only(iter);
4519
4520         if (trace_flags & TRACE_ITER_BIN)
4521                 return print_bin_fmt(iter);
4522
4523         if (trace_flags & TRACE_ITER_HEX)
4524                 return print_hex_fmt(iter);
4525
4526         if (trace_flags & TRACE_ITER_RAW)
4527                 return print_raw_fmt(iter);
4528
4529         return print_trace_fmt(iter);
4530 }
4531
4532 void trace_latency_header(struct seq_file *m)
4533 {
4534         struct trace_iterator *iter = m->private;
4535         struct trace_array *tr = iter->tr;
4536
4537         /* print nothing if the buffers are empty */
4538         if (trace_empty(iter))
4539                 return;
4540
4541         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4542                 print_trace_header(m, iter);
4543
4544         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4545                 print_lat_help_header(m);
4546 }
4547
4548 void trace_default_header(struct seq_file *m)
4549 {
4550         struct trace_iterator *iter = m->private;
4551         struct trace_array *tr = iter->tr;
4552         unsigned long trace_flags = tr->trace_flags;
4553
4554         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4555                 return;
4556
4557         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4558                 /* print nothing if the buffers are empty */
4559                 if (trace_empty(iter))
4560                         return;
4561                 print_trace_header(m, iter);
4562                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4563                         print_lat_help_header(m);
4564         } else {
4565                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4566                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4567                                 print_func_help_header_irq(iter->array_buffer,
4568                                                            m, trace_flags);
4569                         else
4570                                 print_func_help_header(iter->array_buffer, m,
4571                                                        trace_flags);
4572                 }
4573         }
4574 }
4575
4576 static void test_ftrace_alive(struct seq_file *m)
4577 {
4578         if (!ftrace_is_dead())
4579                 return;
4580         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4581                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4582 }
4583
4584 #ifdef CONFIG_TRACER_MAX_TRACE
4585 static void show_snapshot_main_help(struct seq_file *m)
4586 {
4587         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4588                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4589                     "#                      Takes a snapshot of the main buffer.\n"
4590                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4591                     "#                      (Doesn't have to be '2' works with any number that\n"
4592                     "#                       is not a '0' or '1')\n");
4593 }
4594
4595 static void show_snapshot_percpu_help(struct seq_file *m)
4596 {
4597         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4598 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4599         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4600                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4601 #else
4602         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4603                     "#                     Must use main snapshot file to allocate.\n");
4604 #endif
4605         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4606                     "#                      (Doesn't have to be '2' works with any number that\n"
4607                     "#                       is not a '0' or '1')\n");
4608 }
4609
4610 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4611 {
4612         if (iter->tr->allocated_snapshot)
4613                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4614         else
4615                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4616
4617         seq_puts(m, "# Snapshot commands:\n");
4618         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4619                 show_snapshot_main_help(m);
4620         else
4621                 show_snapshot_percpu_help(m);
4622 }
4623 #else
4624 /* Should never be called */
4625 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4626 #endif
4627
4628 static int s_show(struct seq_file *m, void *v)
4629 {
4630         struct trace_iterator *iter = v;
4631         int ret;
4632
4633         if (iter->ent == NULL) {
4634                 if (iter->tr) {
4635                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4636                         seq_puts(m, "#\n");
4637                         test_ftrace_alive(m);
4638                 }
4639                 if (iter->snapshot && trace_empty(iter))
4640                         print_snapshot_help(m, iter);
4641                 else if (iter->trace && iter->trace->print_header)
4642                         iter->trace->print_header(m);
4643                 else
4644                         trace_default_header(m);
4645
4646         } else if (iter->leftover) {
4647                 /*
4648                  * If we filled the seq_file buffer earlier, we
4649                  * want to just show it now.
4650                  */
4651                 ret = trace_print_seq(m, &iter->seq);
4652
4653                 /* ret should this time be zero, but you never know */
4654                 iter->leftover = ret;
4655
4656         } else {
4657                 print_trace_line(iter);
4658                 ret = trace_print_seq(m, &iter->seq);
4659                 /*
4660                  * If we overflow the seq_file buffer, then it will
4661                  * ask us for this data again at start up.
4662                  * Use that instead.
4663                  *  ret is 0 if seq_file write succeeded.
4664                  *        -1 otherwise.
4665                  */
4666                 iter->leftover = ret;
4667         }
4668
4669         return 0;
4670 }
4671
4672 /*
4673  * Should be used after trace_array_get(), trace_types_lock
4674  * ensures that i_cdev was already initialized.
4675  */
4676 static inline int tracing_get_cpu(struct inode *inode)
4677 {
4678         if (inode->i_cdev) /* See trace_create_cpu_file() */
4679                 return (long)inode->i_cdev - 1;
4680         return RING_BUFFER_ALL_CPUS;
4681 }
4682
4683 static const struct seq_operations tracer_seq_ops = {
4684         .start          = s_start,
4685         .next           = s_next,
4686         .stop           = s_stop,
4687         .show           = s_show,
4688 };
4689
4690 static struct trace_iterator *
4691 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4692 {
4693         struct trace_array *tr = inode->i_private;
4694         struct trace_iterator *iter;
4695         int cpu;
4696
4697         if (tracing_disabled)
4698                 return ERR_PTR(-ENODEV);
4699
4700         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4701         if (!iter)
4702                 return ERR_PTR(-ENOMEM);
4703
4704         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4705                                     GFP_KERNEL);
4706         if (!iter->buffer_iter)
4707                 goto release;
4708
4709         /*
4710          * trace_find_next_entry() may need to save off iter->ent.
4711          * It will place it into the iter->temp buffer. As most
4712          * events are less than 128, allocate a buffer of that size.
4713          * If one is greater, then trace_find_next_entry() will
4714          * allocate a new buffer to adjust for the bigger iter->ent.
4715          * It's not critical if it fails to get allocated here.
4716          */
4717         iter->temp = kmalloc(128, GFP_KERNEL);
4718         if (iter->temp)
4719                 iter->temp_size = 128;
4720
4721         /*
4722          * trace_event_printf() may need to modify given format
4723          * string to replace %p with %px so that it shows real address
4724          * instead of hash value. However, that is only for the event
4725          * tracing, other tracer may not need. Defer the allocation
4726          * until it is needed.
4727          */
4728         iter->fmt = NULL;
4729         iter->fmt_size = 0;
4730
4731         /*
4732          * We make a copy of the current tracer to avoid concurrent
4733          * changes on it while we are reading.
4734          */
4735         mutex_lock(&trace_types_lock);
4736         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4737         if (!iter->trace)
4738                 goto fail;
4739
4740         *iter->trace = *tr->current_trace;
4741
4742         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4743                 goto fail;
4744
4745         iter->tr = tr;
4746
4747 #ifdef CONFIG_TRACER_MAX_TRACE
4748         /* Currently only the top directory has a snapshot */
4749         if (tr->current_trace->print_max || snapshot)
4750                 iter->array_buffer = &tr->max_buffer;
4751         else
4752 #endif
4753                 iter->array_buffer = &tr->array_buffer;
4754         iter->snapshot = snapshot;
4755         iter->pos = -1;
4756         iter->cpu_file = tracing_get_cpu(inode);
4757         mutex_init(&iter->mutex);
4758
4759         /* Notify the tracer early; before we stop tracing. */
4760         if (iter->trace->open)
4761                 iter->trace->open(iter);
4762
4763         /* Annotate start of buffers if we had overruns */
4764         if (ring_buffer_overruns(iter->array_buffer->buffer))
4765                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4766
4767         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4768         if (trace_clocks[tr->clock_id].in_ns)
4769                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4770
4771         /*
4772          * If pause-on-trace is enabled, then stop the trace while
4773          * dumping, unless this is the "snapshot" file
4774          */
4775         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4776                 tracing_stop_tr(tr);
4777
4778         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4779                 for_each_tracing_cpu(cpu) {
4780                         iter->buffer_iter[cpu] =
4781                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4782                                                          cpu, GFP_KERNEL);
4783                 }
4784                 ring_buffer_read_prepare_sync();
4785                 for_each_tracing_cpu(cpu) {
4786                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4787                         tracing_iter_reset(iter, cpu);
4788                 }
4789         } else {
4790                 cpu = iter->cpu_file;
4791                 iter->buffer_iter[cpu] =
4792                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4793                                                  cpu, GFP_KERNEL);
4794                 ring_buffer_read_prepare_sync();
4795                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4796                 tracing_iter_reset(iter, cpu);
4797         }
4798
4799         mutex_unlock(&trace_types_lock);
4800
4801         return iter;
4802
4803  fail:
4804         mutex_unlock(&trace_types_lock);
4805         kfree(iter->trace);
4806         kfree(iter->temp);
4807         kfree(iter->buffer_iter);
4808 release:
4809         seq_release_private(inode, file);
4810         return ERR_PTR(-ENOMEM);
4811 }
4812
4813 int tracing_open_generic(struct inode *inode, struct file *filp)
4814 {
4815         int ret;
4816
4817         ret = tracing_check_open_get_tr(NULL);
4818         if (ret)
4819                 return ret;
4820
4821         filp->private_data = inode->i_private;
4822         return 0;
4823 }
4824
4825 bool tracing_is_disabled(void)
4826 {
4827         return (tracing_disabled) ? true: false;
4828 }
4829
4830 /*
4831  * Open and update trace_array ref count.
4832  * Must have the current trace_array passed to it.
4833  */
4834 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4835 {
4836         struct trace_array *tr = inode->i_private;
4837         int ret;
4838
4839         ret = tracing_check_open_get_tr(tr);
4840         if (ret)
4841                 return ret;
4842
4843         filp->private_data = inode->i_private;
4844
4845         return 0;
4846 }
4847
4848 static int tracing_release(struct inode *inode, struct file *file)
4849 {
4850         struct trace_array *tr = inode->i_private;
4851         struct seq_file *m = file->private_data;
4852         struct trace_iterator *iter;
4853         int cpu;
4854
4855         if (!(file->f_mode & FMODE_READ)) {
4856                 trace_array_put(tr);
4857                 return 0;
4858         }
4859
4860         /* Writes do not use seq_file */
4861         iter = m->private;
4862         mutex_lock(&trace_types_lock);
4863
4864         for_each_tracing_cpu(cpu) {
4865                 if (iter->buffer_iter[cpu])
4866                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4867         }
4868
4869         if (iter->trace && iter->trace->close)
4870                 iter->trace->close(iter);
4871
4872         if (!iter->snapshot && tr->stop_count)
4873                 /* reenable tracing if it was previously enabled */
4874                 tracing_start_tr(tr);
4875
4876         __trace_array_put(tr);
4877
4878         mutex_unlock(&trace_types_lock);
4879
4880         mutex_destroy(&iter->mutex);
4881         free_cpumask_var(iter->started);
4882         kfree(iter->fmt);
4883         kfree(iter->temp);
4884         kfree(iter->trace);
4885         kfree(iter->buffer_iter);
4886         seq_release_private(inode, file);
4887
4888         return 0;
4889 }
4890
4891 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4892 {
4893         struct trace_array *tr = inode->i_private;
4894
4895         trace_array_put(tr);
4896         return 0;
4897 }
4898
4899 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4900 {
4901         struct trace_array *tr = inode->i_private;
4902
4903         trace_array_put(tr);
4904
4905         return single_release(inode, file);
4906 }
4907
4908 static int tracing_open(struct inode *inode, struct file *file)
4909 {
4910         struct trace_array *tr = inode->i_private;
4911         struct trace_iterator *iter;
4912         int ret;
4913
4914         ret = tracing_check_open_get_tr(tr);
4915         if (ret)
4916                 return ret;
4917
4918         /* If this file was open for write, then erase contents */
4919         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4920                 int cpu = tracing_get_cpu(inode);
4921                 struct array_buffer *trace_buf = &tr->array_buffer;
4922
4923 #ifdef CONFIG_TRACER_MAX_TRACE
4924                 if (tr->current_trace->print_max)
4925                         trace_buf = &tr->max_buffer;
4926 #endif
4927
4928                 if (cpu == RING_BUFFER_ALL_CPUS)
4929                         tracing_reset_online_cpus(trace_buf);
4930                 else
4931                         tracing_reset_cpu(trace_buf, cpu);
4932         }
4933
4934         if (file->f_mode & FMODE_READ) {
4935                 iter = __tracing_open(inode, file, false);
4936                 if (IS_ERR(iter))
4937                         ret = PTR_ERR(iter);
4938                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4939                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4940         }
4941
4942         if (ret < 0)
4943                 trace_array_put(tr);
4944
4945         return ret;
4946 }
4947
4948 /*
4949  * Some tracers are not suitable for instance buffers.
4950  * A tracer is always available for the global array (toplevel)
4951  * or if it explicitly states that it is.
4952  */
4953 static bool
4954 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4955 {
4956         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4957 }
4958
4959 /* Find the next tracer that this trace array may use */
4960 static struct tracer *
4961 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4962 {
4963         while (t && !trace_ok_for_array(t, tr))
4964                 t = t->next;
4965
4966         return t;
4967 }
4968
4969 static void *
4970 t_next(struct seq_file *m, void *v, loff_t *pos)
4971 {
4972         struct trace_array *tr = m->private;
4973         struct tracer *t = v;
4974
4975         (*pos)++;
4976
4977         if (t)
4978                 t = get_tracer_for_array(tr, t->next);
4979
4980         return t;
4981 }
4982
4983 static void *t_start(struct seq_file *m, loff_t *pos)
4984 {
4985         struct trace_array *tr = m->private;
4986         struct tracer *t;
4987         loff_t l = 0;
4988
4989         mutex_lock(&trace_types_lock);
4990
4991         t = get_tracer_for_array(tr, trace_types);
4992         for (; t && l < *pos; t = t_next(m, t, &l))
4993                         ;
4994
4995         return t;
4996 }
4997
4998 static void t_stop(struct seq_file *m, void *p)
4999 {
5000         mutex_unlock(&trace_types_lock);
5001 }
5002
5003 static int t_show(struct seq_file *m, void *v)
5004 {
5005         struct tracer *t = v;
5006
5007         if (!t)
5008                 return 0;
5009
5010         seq_puts(m, t->name);
5011         if (t->next)
5012                 seq_putc(m, ' ');
5013         else
5014                 seq_putc(m, '\n');
5015
5016         return 0;
5017 }
5018
5019 static const struct seq_operations show_traces_seq_ops = {
5020         .start          = t_start,
5021         .next           = t_next,
5022         .stop           = t_stop,
5023         .show           = t_show,
5024 };
5025
5026 static int show_traces_open(struct inode *inode, struct file *file)
5027 {
5028         struct trace_array *tr = inode->i_private;
5029         struct seq_file *m;
5030         int ret;
5031
5032         ret = tracing_check_open_get_tr(tr);
5033         if (ret)
5034                 return ret;
5035
5036         ret = seq_open(file, &show_traces_seq_ops);
5037         if (ret) {
5038                 trace_array_put(tr);
5039                 return ret;
5040         }
5041
5042         m = file->private_data;
5043         m->private = tr;
5044
5045         return 0;
5046 }
5047
5048 static int show_traces_release(struct inode *inode, struct file *file)
5049 {
5050         struct trace_array *tr = inode->i_private;
5051
5052         trace_array_put(tr);
5053         return seq_release(inode, file);
5054 }
5055
5056 static ssize_t
5057 tracing_write_stub(struct file *filp, const char __user *ubuf,
5058                    size_t count, loff_t *ppos)
5059 {
5060         return count;
5061 }
5062
5063 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5064 {
5065         int ret;
5066
5067         if (file->f_mode & FMODE_READ)
5068                 ret = seq_lseek(file, offset, whence);
5069         else
5070                 file->f_pos = ret = 0;
5071
5072         return ret;
5073 }
5074
5075 static const struct file_operations tracing_fops = {
5076         .open           = tracing_open,
5077         .read           = seq_read,
5078         .write          = tracing_write_stub,
5079         .llseek         = tracing_lseek,
5080         .release        = tracing_release,
5081 };
5082
5083 static const struct file_operations show_traces_fops = {
5084         .open           = show_traces_open,
5085         .read           = seq_read,
5086         .llseek         = seq_lseek,
5087         .release        = show_traces_release,
5088 };
5089
5090 static ssize_t
5091 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5092                      size_t count, loff_t *ppos)
5093 {
5094         struct trace_array *tr = file_inode(filp)->i_private;
5095         char *mask_str;
5096         int len;
5097
5098         len = snprintf(NULL, 0, "%*pb\n",
5099                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5100         mask_str = kmalloc(len, GFP_KERNEL);
5101         if (!mask_str)
5102                 return -ENOMEM;
5103
5104         len = snprintf(mask_str, len, "%*pb\n",
5105                        cpumask_pr_args(tr->tracing_cpumask));
5106         if (len >= count) {
5107                 count = -EINVAL;
5108                 goto out_err;
5109         }
5110         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5111
5112 out_err:
5113         kfree(mask_str);
5114
5115         return count;
5116 }
5117
5118 int tracing_set_cpumask(struct trace_array *tr,
5119                         cpumask_var_t tracing_cpumask_new)
5120 {
5121         int cpu;
5122
5123         if (!tr)
5124                 return -EINVAL;
5125
5126         local_irq_disable();
5127         arch_spin_lock(&tr->max_lock);
5128         for_each_tracing_cpu(cpu) {
5129                 /*
5130                  * Increase/decrease the disabled counter if we are
5131                  * about to flip a bit in the cpumask:
5132                  */
5133                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5134                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5135                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5136                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5137                 }
5138                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5139                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5140                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5141                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5142                 }
5143         }
5144         arch_spin_unlock(&tr->max_lock);
5145         local_irq_enable();
5146
5147         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5148
5149         return 0;
5150 }
5151
5152 static ssize_t
5153 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5154                       size_t count, loff_t *ppos)
5155 {
5156         struct trace_array *tr = file_inode(filp)->i_private;
5157         cpumask_var_t tracing_cpumask_new;
5158         int err;
5159
5160         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5161                 return -ENOMEM;
5162
5163         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5164         if (err)
5165                 goto err_free;
5166
5167         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5168         if (err)
5169                 goto err_free;
5170
5171         free_cpumask_var(tracing_cpumask_new);
5172
5173         return count;
5174
5175 err_free:
5176         free_cpumask_var(tracing_cpumask_new);
5177
5178         return err;
5179 }
5180
5181 static const struct file_operations tracing_cpumask_fops = {
5182         .open           = tracing_open_generic_tr,
5183         .read           = tracing_cpumask_read,
5184         .write          = tracing_cpumask_write,
5185         .release        = tracing_release_generic_tr,
5186         .llseek         = generic_file_llseek,
5187 };
5188
5189 static int tracing_trace_options_show(struct seq_file *m, void *v)
5190 {
5191         struct tracer_opt *trace_opts;
5192         struct trace_array *tr = m->private;
5193         u32 tracer_flags;
5194         int i;
5195
5196         mutex_lock(&trace_types_lock);
5197         tracer_flags = tr->current_trace->flags->val;
5198         trace_opts = tr->current_trace->flags->opts;
5199
5200         for (i = 0; trace_options[i]; i++) {
5201                 if (tr->trace_flags & (1 << i))
5202                         seq_printf(m, "%s\n", trace_options[i]);
5203                 else
5204                         seq_printf(m, "no%s\n", trace_options[i]);
5205         }
5206
5207         for (i = 0; trace_opts[i].name; i++) {
5208                 if (tracer_flags & trace_opts[i].bit)
5209                         seq_printf(m, "%s\n", trace_opts[i].name);
5210                 else
5211                         seq_printf(m, "no%s\n", trace_opts[i].name);
5212         }
5213         mutex_unlock(&trace_types_lock);
5214
5215         return 0;
5216 }
5217
5218 static int __set_tracer_option(struct trace_array *tr,
5219                                struct tracer_flags *tracer_flags,
5220                                struct tracer_opt *opts, int neg)
5221 {
5222         struct tracer *trace = tracer_flags->trace;
5223         int ret;
5224
5225         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5226         if (ret)
5227                 return ret;
5228
5229         if (neg)
5230                 tracer_flags->val &= ~opts->bit;
5231         else
5232                 tracer_flags->val |= opts->bit;
5233         return 0;
5234 }
5235
5236 /* Try to assign a tracer specific option */
5237 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5238 {
5239         struct tracer *trace = tr->current_trace;
5240         struct tracer_flags *tracer_flags = trace->flags;
5241         struct tracer_opt *opts = NULL;
5242         int i;
5243
5244         for (i = 0; tracer_flags->opts[i].name; i++) {
5245                 opts = &tracer_flags->opts[i];
5246
5247                 if (strcmp(cmp, opts->name) == 0)
5248                         return __set_tracer_option(tr, trace->flags, opts, neg);
5249         }
5250
5251         return -EINVAL;
5252 }
5253
5254 /* Some tracers require overwrite to stay enabled */
5255 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5256 {
5257         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5258                 return -1;
5259
5260         return 0;
5261 }
5262
5263 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5264 {
5265         int *map;
5266
5267         if ((mask == TRACE_ITER_RECORD_TGID) ||
5268             (mask == TRACE_ITER_RECORD_CMD))
5269                 lockdep_assert_held(&event_mutex);
5270
5271         /* do nothing if flag is already set */
5272         if (!!(tr->trace_flags & mask) == !!enabled)
5273                 return 0;
5274
5275         /* Give the tracer a chance to approve the change */
5276         if (tr->current_trace->flag_changed)
5277                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5278                         return -EINVAL;
5279
5280         if (enabled)
5281                 tr->trace_flags |= mask;
5282         else
5283                 tr->trace_flags &= ~mask;
5284
5285         if (mask == TRACE_ITER_RECORD_CMD)
5286                 trace_event_enable_cmd_record(enabled);
5287
5288         if (mask == TRACE_ITER_RECORD_TGID) {
5289                 if (!tgid_map) {
5290                         tgid_map_max = pid_max;
5291                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5292                                        GFP_KERNEL);
5293
5294                         /*
5295                          * Pairs with smp_load_acquire() in
5296                          * trace_find_tgid_ptr() to ensure that if it observes
5297                          * the tgid_map we just allocated then it also observes
5298                          * the corresponding tgid_map_max value.
5299                          */
5300                         smp_store_release(&tgid_map, map);
5301                 }
5302                 if (!tgid_map) {
5303                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5304                         return -ENOMEM;
5305                 }
5306
5307                 trace_event_enable_tgid_record(enabled);
5308         }
5309
5310         if (mask == TRACE_ITER_EVENT_FORK)
5311                 trace_event_follow_fork(tr, enabled);
5312
5313         if (mask == TRACE_ITER_FUNC_FORK)
5314                 ftrace_pid_follow_fork(tr, enabled);
5315
5316         if (mask == TRACE_ITER_OVERWRITE) {
5317                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5318 #ifdef CONFIG_TRACER_MAX_TRACE
5319                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5320 #endif
5321         }
5322
5323         if (mask == TRACE_ITER_PRINTK) {
5324                 trace_printk_start_stop_comm(enabled);
5325                 trace_printk_control(enabled);
5326         }
5327
5328         return 0;
5329 }
5330
5331 int trace_set_options(struct trace_array *tr, char *option)
5332 {
5333         char *cmp;
5334         int neg = 0;
5335         int ret;
5336         size_t orig_len = strlen(option);
5337         int len;
5338
5339         cmp = strstrip(option);
5340
5341         len = str_has_prefix(cmp, "no");
5342         if (len)
5343                 neg = 1;
5344
5345         cmp += len;
5346
5347         mutex_lock(&event_mutex);
5348         mutex_lock(&trace_types_lock);
5349
5350         ret = match_string(trace_options, -1, cmp);
5351         /* If no option could be set, test the specific tracer options */
5352         if (ret < 0)
5353                 ret = set_tracer_option(tr, cmp, neg);
5354         else
5355                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5356
5357         mutex_unlock(&trace_types_lock);
5358         mutex_unlock(&event_mutex);
5359
5360         /*
5361          * If the first trailing whitespace is replaced with '\0' by strstrip,
5362          * turn it back into a space.
5363          */
5364         if (orig_len > strlen(option))
5365                 option[strlen(option)] = ' ';
5366
5367         return ret;
5368 }
5369
5370 static void __init apply_trace_boot_options(void)
5371 {
5372         char *buf = trace_boot_options_buf;
5373         char *option;
5374
5375         while (true) {
5376                 option = strsep(&buf, ",");
5377
5378                 if (!option)
5379                         break;
5380
5381                 if (*option)
5382                         trace_set_options(&global_trace, option);
5383
5384                 /* Put back the comma to allow this to be called again */
5385                 if (buf)
5386                         *(buf - 1) = ',';
5387         }
5388 }
5389
5390 static ssize_t
5391 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5392                         size_t cnt, loff_t *ppos)
5393 {
5394         struct seq_file *m = filp->private_data;
5395         struct trace_array *tr = m->private;
5396         char buf[64];
5397         int ret;
5398
5399         if (cnt >= sizeof(buf))
5400                 return -EINVAL;
5401
5402         if (copy_from_user(buf, ubuf, cnt))
5403                 return -EFAULT;
5404
5405         buf[cnt] = 0;
5406
5407         ret = trace_set_options(tr, buf);
5408         if (ret < 0)
5409                 return ret;
5410
5411         *ppos += cnt;
5412
5413         return cnt;
5414 }
5415
5416 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5417 {
5418         struct trace_array *tr = inode->i_private;
5419         int ret;
5420
5421         ret = tracing_check_open_get_tr(tr);
5422         if (ret)
5423                 return ret;
5424
5425         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5426         if (ret < 0)
5427                 trace_array_put(tr);
5428
5429         return ret;
5430 }
5431
5432 static const struct file_operations tracing_iter_fops = {
5433         .open           = tracing_trace_options_open,
5434         .read           = seq_read,
5435         .llseek         = seq_lseek,
5436         .release        = tracing_single_release_tr,
5437         .write          = tracing_trace_options_write,
5438 };
5439
5440 static const char readme_msg[] =
5441         "tracing mini-HOWTO:\n\n"
5442         "# echo 0 > tracing_on : quick way to disable tracing\n"
5443         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5444         " Important files:\n"
5445         "  trace\t\t\t- The static contents of the buffer\n"
5446         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5447         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5448         "  current_tracer\t- function and latency tracers\n"
5449         "  available_tracers\t- list of configured tracers for current_tracer\n"
5450         "  error_log\t- error log for failed commands (that support it)\n"
5451         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5452         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5453         "  trace_clock\t\t-change the clock used to order events\n"
5454         "       local:   Per cpu clock but may not be synced across CPUs\n"
5455         "      global:   Synced across CPUs but slows tracing down.\n"
5456         "     counter:   Not a clock, but just an increment\n"
5457         "      uptime:   Jiffy counter from time of boot\n"
5458         "        perf:   Same clock that perf events use\n"
5459 #ifdef CONFIG_X86_64
5460         "     x86-tsc:   TSC cycle counter\n"
5461 #endif
5462         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5463         "       delta:   Delta difference against a buffer-wide timestamp\n"
5464         "    absolute:   Absolute (standalone) timestamp\n"
5465         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5466         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5467         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5468         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5469         "\t\t\t  Remove sub-buffer with rmdir\n"
5470         "  trace_options\t\t- Set format or modify how tracing happens\n"
5471         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5472         "\t\t\t  option name\n"
5473         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5474 #ifdef CONFIG_DYNAMIC_FTRACE
5475         "\n  available_filter_functions - list of functions that can be filtered on\n"
5476         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5477         "\t\t\t  functions\n"
5478         "\t     accepts: func_full_name or glob-matching-pattern\n"
5479         "\t     modules: Can select a group via module\n"
5480         "\t      Format: :mod:<module-name>\n"
5481         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5482         "\t    triggers: a command to perform when function is hit\n"
5483         "\t      Format: <function>:<trigger>[:count]\n"
5484         "\t     trigger: traceon, traceoff\n"
5485         "\t\t      enable_event:<system>:<event>\n"
5486         "\t\t      disable_event:<system>:<event>\n"
5487 #ifdef CONFIG_STACKTRACE
5488         "\t\t      stacktrace\n"
5489 #endif
5490 #ifdef CONFIG_TRACER_SNAPSHOT
5491         "\t\t      snapshot\n"
5492 #endif
5493         "\t\t      dump\n"
5494         "\t\t      cpudump\n"
5495         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5496         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5497         "\t     The first one will disable tracing every time do_fault is hit\n"
5498         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5499         "\t       The first time do trap is hit and it disables tracing, the\n"
5500         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5501         "\t       the counter will not decrement. It only decrements when the\n"
5502         "\t       trigger did work\n"
5503         "\t     To remove trigger without count:\n"
5504         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5505         "\t     To remove trigger with a count:\n"
5506         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5507         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5508         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5509         "\t    modules: Can select a group via module command :mod:\n"
5510         "\t    Does not accept triggers\n"
5511 #endif /* CONFIG_DYNAMIC_FTRACE */
5512 #ifdef CONFIG_FUNCTION_TRACER
5513         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5514         "\t\t    (function)\n"
5515         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5516         "\t\t    (function)\n"
5517 #endif
5518 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5519         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5520         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5521         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5522 #endif
5523 #ifdef CONFIG_TRACER_SNAPSHOT
5524         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5525         "\t\t\t  snapshot buffer. Read the contents for more\n"
5526         "\t\t\t  information\n"
5527 #endif
5528 #ifdef CONFIG_STACK_TRACER
5529         "  stack_trace\t\t- Shows the max stack trace when active\n"
5530         "  stack_max_size\t- Shows current max stack size that was traced\n"
5531         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5532         "\t\t\t  new trace)\n"
5533 #ifdef CONFIG_DYNAMIC_FTRACE
5534         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5535         "\t\t\t  traces\n"
5536 #endif
5537 #endif /* CONFIG_STACK_TRACER */
5538 #ifdef CONFIG_DYNAMIC_EVENTS
5539         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5540         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5541 #endif
5542 #ifdef CONFIG_KPROBE_EVENTS
5543         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5544         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5545 #endif
5546 #ifdef CONFIG_UPROBE_EVENTS
5547         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5548         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5549 #endif
5550 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5551         "\t  accepts: event-definitions (one definition per line)\n"
5552         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5553         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5554 #ifdef CONFIG_HIST_TRIGGERS
5555         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5556 #endif
5557         "\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5558         "\t           -:[<group>/]<event>\n"
5559 #ifdef CONFIG_KPROBE_EVENTS
5560         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5561   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5562 #endif
5563 #ifdef CONFIG_UPROBE_EVENTS
5564   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5565 #endif
5566         "\t     args: <name>=fetcharg[:type]\n"
5567         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5568 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5569         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5570 #else
5571         "\t           $stack<index>, $stack, $retval, $comm,\n"
5572 #endif
5573         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5574         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5575         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5576         "\t           <type>\\[<array-size>\\]\n"
5577 #ifdef CONFIG_HIST_TRIGGERS
5578         "\t    field: <stype> <name>;\n"
5579         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5580         "\t           [unsigned] char/int/long\n"
5581 #endif
5582         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5583         "\t            of the <attached-group>/<attached-event>.\n"
5584 #endif
5585         "  events/\t\t- Directory containing all trace event subsystems:\n"
5586         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5587         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5588         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5589         "\t\t\t  events\n"
5590         "      filter\t\t- If set, only events passing filter are traced\n"
5591         "  events/<system>/<event>/\t- Directory containing control files for\n"
5592         "\t\t\t  <event>:\n"
5593         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5594         "      filter\t\t- If set, only events passing filter are traced\n"
5595         "      trigger\t\t- If set, a command to perform when event is hit\n"
5596         "\t    Format: <trigger>[:count][if <filter>]\n"
5597         "\t   trigger: traceon, traceoff\n"
5598         "\t            enable_event:<system>:<event>\n"
5599         "\t            disable_event:<system>:<event>\n"
5600 #ifdef CONFIG_HIST_TRIGGERS
5601         "\t            enable_hist:<system>:<event>\n"
5602         "\t            disable_hist:<system>:<event>\n"
5603 #endif
5604 #ifdef CONFIG_STACKTRACE
5605         "\t\t    stacktrace\n"
5606 #endif
5607 #ifdef CONFIG_TRACER_SNAPSHOT
5608         "\t\t    snapshot\n"
5609 #endif
5610 #ifdef CONFIG_HIST_TRIGGERS
5611         "\t\t    hist (see below)\n"
5612 #endif
5613         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5614         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5615         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5616         "\t                  events/block/block_unplug/trigger\n"
5617         "\t   The first disables tracing every time block_unplug is hit.\n"
5618         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5619         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5620         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5621         "\t   Like function triggers, the counter is only decremented if it\n"
5622         "\t    enabled or disabled tracing.\n"
5623         "\t   To remove a trigger without a count:\n"
5624         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5625         "\t   To remove a trigger with a count:\n"
5626         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5627         "\t   Filters can be ignored when removing a trigger.\n"
5628 #ifdef CONFIG_HIST_TRIGGERS
5629         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5630         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5631         "\t            [:values=<field1[,field2,...]>]\n"
5632         "\t            [:sort=<field1[,field2,...]>]\n"
5633         "\t            [:size=#entries]\n"
5634         "\t            [:pause][:continue][:clear]\n"
5635         "\t            [:name=histname1]\n"
5636         "\t            [:<handler>.<action>]\n"
5637         "\t            [if <filter>]\n\n"
5638         "\t    Note, special fields can be used as well:\n"
5639         "\t            common_timestamp - to record current timestamp\n"
5640         "\t            common_cpu - to record the CPU the event happened on\n"
5641         "\n"
5642         "\t    When a matching event is hit, an entry is added to a hash\n"
5643         "\t    table using the key(s) and value(s) named, and the value of a\n"
5644         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5645         "\t    correspond to fields in the event's format description.  Keys\n"
5646         "\t    can be any field, or the special string 'stacktrace'.\n"
5647         "\t    Compound keys consisting of up to two fields can be specified\n"
5648         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5649         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5650         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5651         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5652         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5653         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5654         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5655         "\t    its histogram data will be shared with other triggers of the\n"
5656         "\t    same name, and trigger hits will update this common data.\n\n"
5657         "\t    Reading the 'hist' file for the event will dump the hash\n"
5658         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5659         "\t    triggers attached to an event, there will be a table for each\n"
5660         "\t    trigger in the output.  The table displayed for a named\n"
5661         "\t    trigger will be the same as any other instance having the\n"
5662         "\t    same name.  The default format used to display a given field\n"
5663         "\t    can be modified by appending any of the following modifiers\n"
5664         "\t    to the field name, as applicable:\n\n"
5665         "\t            .hex        display a number as a hex value\n"
5666         "\t            .sym        display an address as a symbol\n"
5667         "\t            .sym-offset display an address as a symbol and offset\n"
5668         "\t            .execname   display a common_pid as a program name\n"
5669         "\t            .syscall    display a syscall id as a syscall name\n"
5670         "\t            .log2       display log2 value rather than raw number\n"
5671         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5672         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5673         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5674         "\t    trigger or to start a hist trigger but not log any events\n"
5675         "\t    until told to do so.  'continue' can be used to start or\n"
5676         "\t    restart a paused hist trigger.\n\n"
5677         "\t    The 'clear' parameter will clear the contents of a running\n"
5678         "\t    hist trigger and leave its current paused/active state\n"
5679         "\t    unchanged.\n\n"
5680         "\t    The enable_hist and disable_hist triggers can be used to\n"
5681         "\t    have one event conditionally start and stop another event's\n"
5682         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5683         "\t    the enable_event and disable_event triggers.\n\n"
5684         "\t    Hist trigger handlers and actions are executed whenever a\n"
5685         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5686         "\t        <handler>.<action>\n\n"
5687         "\t    The available handlers are:\n\n"
5688         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5689         "\t        onmax(var)               - invoke if var exceeds current max\n"
5690         "\t        onchange(var)            - invoke action if var changes\n\n"
5691         "\t    The available actions are:\n\n"
5692         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5693         "\t        save(field,...)                      - save current event fields\n"
5694 #ifdef CONFIG_TRACER_SNAPSHOT
5695         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5696 #endif
5697 #ifdef CONFIG_SYNTH_EVENTS
5698         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5699         "\t  Write into this file to define/undefine new synthetic events.\n"
5700         "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5701 #endif
5702 #endif
5703 ;
5704
5705 static ssize_t
5706 tracing_readme_read(struct file *filp, char __user *ubuf,
5707                        size_t cnt, loff_t *ppos)
5708 {
5709         return simple_read_from_buffer(ubuf, cnt, ppos,
5710                                         readme_msg, strlen(readme_msg));
5711 }
5712
5713 static const struct file_operations tracing_readme_fops = {
5714         .open           = tracing_open_generic,
5715         .read           = tracing_readme_read,
5716         .llseek         = generic_file_llseek,
5717 };
5718
5719 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5720 {
5721         int pid = ++(*pos);
5722
5723         return trace_find_tgid_ptr(pid);
5724 }
5725
5726 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5727 {
5728         int pid = *pos;
5729
5730         return trace_find_tgid_ptr(pid);
5731 }
5732
5733 static void saved_tgids_stop(struct seq_file *m, void *v)
5734 {
5735 }
5736
5737 static int saved_tgids_show(struct seq_file *m, void *v)
5738 {
5739         int *entry = (int *)v;
5740         int pid = entry - tgid_map;
5741         int tgid = *entry;
5742
5743         if (tgid == 0)
5744                 return SEQ_SKIP;
5745
5746         seq_printf(m, "%d %d\n", pid, tgid);
5747         return 0;
5748 }
5749
5750 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5751         .start          = saved_tgids_start,
5752         .stop           = saved_tgids_stop,
5753         .next           = saved_tgids_next,
5754         .show           = saved_tgids_show,
5755 };
5756
5757 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5758 {
5759         int ret;
5760
5761         ret = tracing_check_open_get_tr(NULL);
5762         if (ret)
5763                 return ret;
5764
5765         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5766 }
5767
5768
5769 static const struct file_operations tracing_saved_tgids_fops = {
5770         .open           = tracing_saved_tgids_open,
5771         .read           = seq_read,
5772         .llseek         = seq_lseek,
5773         .release        = seq_release,
5774 };
5775
5776 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5777 {
5778         unsigned int *ptr = v;
5779
5780         if (*pos || m->count)
5781                 ptr++;
5782
5783         (*pos)++;
5784
5785         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5786              ptr++) {
5787                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5788                         continue;
5789
5790                 return ptr;
5791         }
5792
5793         return NULL;
5794 }
5795
5796 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5797 {
5798         void *v;
5799         loff_t l = 0;
5800
5801         preempt_disable();
5802         arch_spin_lock(&trace_cmdline_lock);
5803
5804         v = &savedcmd->map_cmdline_to_pid[0];
5805         while (l <= *pos) {
5806                 v = saved_cmdlines_next(m, v, &l);
5807                 if (!v)
5808                         return NULL;
5809         }
5810
5811         return v;
5812 }
5813
5814 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5815 {
5816         arch_spin_unlock(&trace_cmdline_lock);
5817         preempt_enable();
5818 }
5819
5820 static int saved_cmdlines_show(struct seq_file *m, void *v)
5821 {
5822         char buf[TASK_COMM_LEN];
5823         unsigned int *pid = v;
5824
5825         __trace_find_cmdline(*pid, buf);
5826         seq_printf(m, "%d %s\n", *pid, buf);
5827         return 0;
5828 }
5829
5830 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5831         .start          = saved_cmdlines_start,
5832         .next           = saved_cmdlines_next,
5833         .stop           = saved_cmdlines_stop,
5834         .show           = saved_cmdlines_show,
5835 };
5836
5837 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5838 {
5839         int ret;
5840
5841         ret = tracing_check_open_get_tr(NULL);
5842         if (ret)
5843                 return ret;
5844
5845         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5846 }
5847
5848 static const struct file_operations tracing_saved_cmdlines_fops = {
5849         .open           = tracing_saved_cmdlines_open,
5850         .read           = seq_read,
5851         .llseek         = seq_lseek,
5852         .release        = seq_release,
5853 };
5854
5855 static ssize_t
5856 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5857                                  size_t cnt, loff_t *ppos)
5858 {
5859         char buf[64];
5860         int r;
5861
5862         arch_spin_lock(&trace_cmdline_lock);
5863         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5864         arch_spin_unlock(&trace_cmdline_lock);
5865
5866         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5867 }
5868
5869 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5870 {
5871         kfree(s->saved_cmdlines);
5872         kfree(s->map_cmdline_to_pid);
5873         kfree(s);
5874 }
5875
5876 static int tracing_resize_saved_cmdlines(unsigned int val)
5877 {
5878         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5879
5880         s = kmalloc(sizeof(*s), GFP_KERNEL);
5881         if (!s)
5882                 return -ENOMEM;
5883
5884         if (allocate_cmdlines_buffer(val, s) < 0) {
5885                 kfree(s);
5886                 return -ENOMEM;
5887         }
5888
5889         arch_spin_lock(&trace_cmdline_lock);
5890         savedcmd_temp = savedcmd;
5891         savedcmd = s;
5892         arch_spin_unlock(&trace_cmdline_lock);
5893         free_saved_cmdlines_buffer(savedcmd_temp);
5894
5895         return 0;
5896 }
5897
5898 static ssize_t
5899 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5900                                   size_t cnt, loff_t *ppos)
5901 {
5902         unsigned long val;
5903         int ret;
5904
5905         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5906         if (ret)
5907                 return ret;
5908
5909         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5910         if (!val || val > PID_MAX_DEFAULT)
5911                 return -EINVAL;
5912
5913         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5914         if (ret < 0)
5915                 return ret;
5916
5917         *ppos += cnt;
5918
5919         return cnt;
5920 }
5921
5922 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5923         .open           = tracing_open_generic,
5924         .read           = tracing_saved_cmdlines_size_read,
5925         .write          = tracing_saved_cmdlines_size_write,
5926 };
5927
5928 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5929 static union trace_eval_map_item *
5930 update_eval_map(union trace_eval_map_item *ptr)
5931 {
5932         if (!ptr->map.eval_string) {
5933                 if (ptr->tail.next) {
5934                         ptr = ptr->tail.next;
5935                         /* Set ptr to the next real item (skip head) */
5936                         ptr++;
5937                 } else
5938                         return NULL;
5939         }
5940         return ptr;
5941 }
5942
5943 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5944 {
5945         union trace_eval_map_item *ptr = v;
5946
5947         /*
5948          * Paranoid! If ptr points to end, we don't want to increment past it.
5949          * This really should never happen.
5950          */
5951         (*pos)++;
5952         ptr = update_eval_map(ptr);
5953         if (WARN_ON_ONCE(!ptr))
5954                 return NULL;
5955
5956         ptr++;
5957         ptr = update_eval_map(ptr);
5958
5959         return ptr;
5960 }
5961
5962 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5963 {
5964         union trace_eval_map_item *v;
5965         loff_t l = 0;
5966
5967         mutex_lock(&trace_eval_mutex);
5968
5969         v = trace_eval_maps;
5970         if (v)
5971                 v++;
5972
5973         while (v && l < *pos) {
5974                 v = eval_map_next(m, v, &l);
5975         }
5976
5977         return v;
5978 }
5979
5980 static void eval_map_stop(struct seq_file *m, void *v)
5981 {
5982         mutex_unlock(&trace_eval_mutex);
5983 }
5984
5985 static int eval_map_show(struct seq_file *m, void *v)
5986 {
5987         union trace_eval_map_item *ptr = v;
5988
5989         seq_printf(m, "%s %ld (%s)\n",
5990                    ptr->map.eval_string, ptr->map.eval_value,
5991                    ptr->map.system);
5992
5993         return 0;
5994 }
5995
5996 static const struct seq_operations tracing_eval_map_seq_ops = {
5997         .start          = eval_map_start,
5998         .next           = eval_map_next,
5999         .stop           = eval_map_stop,
6000         .show           = eval_map_show,
6001 };
6002
6003 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6004 {
6005         int ret;
6006
6007         ret = tracing_check_open_get_tr(NULL);
6008         if (ret)
6009                 return ret;
6010
6011         return seq_open(filp, &tracing_eval_map_seq_ops);
6012 }
6013
6014 static const struct file_operations tracing_eval_map_fops = {
6015         .open           = tracing_eval_map_open,
6016         .read           = seq_read,
6017         .llseek         = seq_lseek,
6018         .release        = seq_release,
6019 };
6020
6021 static inline union trace_eval_map_item *
6022 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6023 {
6024         /* Return tail of array given the head */
6025         return ptr + ptr->head.length + 1;
6026 }
6027
6028 static void
6029 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6030                            int len)
6031 {
6032         struct trace_eval_map **stop;
6033         struct trace_eval_map **map;
6034         union trace_eval_map_item *map_array;
6035         union trace_eval_map_item *ptr;
6036
6037         stop = start + len;
6038
6039         /*
6040          * The trace_eval_maps contains the map plus a head and tail item,
6041          * where the head holds the module and length of array, and the
6042          * tail holds a pointer to the next list.
6043          */
6044         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6045         if (!map_array) {
6046                 pr_warn("Unable to allocate trace eval mapping\n");
6047                 return;
6048         }
6049
6050         mutex_lock(&trace_eval_mutex);
6051
6052         if (!trace_eval_maps)
6053                 trace_eval_maps = map_array;
6054         else {
6055                 ptr = trace_eval_maps;
6056                 for (;;) {
6057                         ptr = trace_eval_jmp_to_tail(ptr);
6058                         if (!ptr->tail.next)
6059                                 break;
6060                         ptr = ptr->tail.next;
6061
6062                 }
6063                 ptr->tail.next = map_array;
6064         }
6065         map_array->head.mod = mod;
6066         map_array->head.length = len;
6067         map_array++;
6068
6069         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6070                 map_array->map = **map;
6071                 map_array++;
6072         }
6073         memset(map_array, 0, sizeof(*map_array));
6074
6075         mutex_unlock(&trace_eval_mutex);
6076 }
6077
6078 static void trace_create_eval_file(struct dentry *d_tracer)
6079 {
6080         trace_create_file("eval_map", 0444, d_tracer,
6081                           NULL, &tracing_eval_map_fops);
6082 }
6083
6084 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6085 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6086 static inline void trace_insert_eval_map_file(struct module *mod,
6087                               struct trace_eval_map **start, int len) { }
6088 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6089
6090 static void trace_insert_eval_map(struct module *mod,
6091                                   struct trace_eval_map **start, int len)
6092 {
6093         struct trace_eval_map **map;
6094
6095         if (len <= 0)
6096                 return;
6097
6098         map = start;
6099
6100         trace_event_eval_update(map, len);
6101
6102         trace_insert_eval_map_file(mod, start, len);
6103 }
6104
6105 static ssize_t
6106 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6107                        size_t cnt, loff_t *ppos)
6108 {
6109         struct trace_array *tr = filp->private_data;
6110         char buf[MAX_TRACER_SIZE+2];
6111         int r;
6112
6113         mutex_lock(&trace_types_lock);
6114         r = sprintf(buf, "%s\n", tr->current_trace->name);
6115         mutex_unlock(&trace_types_lock);
6116
6117         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6118 }
6119
6120 int tracer_init(struct tracer *t, struct trace_array *tr)
6121 {
6122         tracing_reset_online_cpus(&tr->array_buffer);
6123         return t->init(tr);
6124 }
6125
6126 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6127 {
6128         int cpu;
6129
6130         for_each_tracing_cpu(cpu)
6131                 per_cpu_ptr(buf->data, cpu)->entries = val;
6132 }
6133
6134 #ifdef CONFIG_TRACER_MAX_TRACE
6135 /* resize @tr's buffer to the size of @size_tr's entries */
6136 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6137                                         struct array_buffer *size_buf, int cpu_id)
6138 {
6139         int cpu, ret = 0;
6140
6141         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6142                 for_each_tracing_cpu(cpu) {
6143                         ret = ring_buffer_resize(trace_buf->buffer,
6144                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6145                         if (ret < 0)
6146                                 break;
6147                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6148                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6149                 }
6150         } else {
6151                 ret = ring_buffer_resize(trace_buf->buffer,
6152                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6153                 if (ret == 0)
6154                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6155                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6156         }
6157
6158         return ret;
6159 }
6160 #endif /* CONFIG_TRACER_MAX_TRACE */
6161
6162 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6163                                         unsigned long size, int cpu)
6164 {
6165         int ret;
6166
6167         /*
6168          * If kernel or user changes the size of the ring buffer
6169          * we use the size that was given, and we can forget about
6170          * expanding it later.
6171          */
6172         ring_buffer_expanded = true;
6173
6174         /* May be called before buffers are initialized */
6175         if (!tr->array_buffer.buffer)
6176                 return 0;
6177
6178         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6179         if (ret < 0)
6180                 return ret;
6181
6182 #ifdef CONFIG_TRACER_MAX_TRACE
6183         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6184             !tr->current_trace->use_max_tr)
6185                 goto out;
6186
6187         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6188         if (ret < 0) {
6189                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6190                                                      &tr->array_buffer, cpu);
6191                 if (r < 0) {
6192                         /*
6193                          * AARGH! We are left with different
6194                          * size max buffer!!!!
6195                          * The max buffer is our "snapshot" buffer.
6196                          * When a tracer needs a snapshot (one of the
6197                          * latency tracers), it swaps the max buffer
6198                          * with the saved snap shot. We succeeded to
6199                          * update the size of the main buffer, but failed to
6200                          * update the size of the max buffer. But when we tried
6201                          * to reset the main buffer to the original size, we
6202                          * failed there too. This is very unlikely to
6203                          * happen, but if it does, warn and kill all
6204                          * tracing.
6205                          */
6206                         WARN_ON(1);
6207                         tracing_disabled = 1;
6208                 }
6209                 return ret;
6210         }
6211
6212         if (cpu == RING_BUFFER_ALL_CPUS)
6213                 set_buffer_entries(&tr->max_buffer, size);
6214         else
6215                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6216
6217  out:
6218 #endif /* CONFIG_TRACER_MAX_TRACE */
6219
6220         if (cpu == RING_BUFFER_ALL_CPUS)
6221                 set_buffer_entries(&tr->array_buffer, size);
6222         else
6223                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6224
6225         return ret;
6226 }
6227
6228 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6229                                   unsigned long size, int cpu_id)
6230 {
6231         int ret;
6232
6233         mutex_lock(&trace_types_lock);
6234
6235         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6236                 /* make sure, this cpu is enabled in the mask */
6237                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6238                         ret = -EINVAL;
6239                         goto out;
6240                 }
6241         }
6242
6243         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6244         if (ret < 0)
6245                 ret = -ENOMEM;
6246
6247 out:
6248         mutex_unlock(&trace_types_lock);
6249
6250         return ret;
6251 }
6252
6253
6254 /**
6255  * tracing_update_buffers - used by tracing facility to expand ring buffers
6256  *
6257  * To save on memory when the tracing is never used on a system with it
6258  * configured in. The ring buffers are set to a minimum size. But once
6259  * a user starts to use the tracing facility, then they need to grow
6260  * to their default size.
6261  *
6262  * This function is to be called when a tracer is about to be used.
6263  */
6264 int tracing_update_buffers(void)
6265 {
6266         int ret = 0;
6267
6268         mutex_lock(&trace_types_lock);
6269         if (!ring_buffer_expanded)
6270                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6271                                                 RING_BUFFER_ALL_CPUS);
6272         mutex_unlock(&trace_types_lock);
6273
6274         return ret;
6275 }
6276
6277 struct trace_option_dentry;
6278
6279 static void
6280 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6281
6282 /*
6283  * Used to clear out the tracer before deletion of an instance.
6284  * Must have trace_types_lock held.
6285  */
6286 static void tracing_set_nop(struct trace_array *tr)
6287 {
6288         if (tr->current_trace == &nop_trace)
6289                 return;
6290         
6291         tr->current_trace->enabled--;
6292
6293         if (tr->current_trace->reset)
6294                 tr->current_trace->reset(tr);
6295
6296         tr->current_trace = &nop_trace;
6297 }
6298
6299 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6300 {
6301         /* Only enable if the directory has been created already. */
6302         if (!tr->dir)
6303                 return;
6304
6305         create_trace_option_files(tr, t);
6306 }
6307
6308 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6309 {
6310         struct tracer *t;
6311 #ifdef CONFIG_TRACER_MAX_TRACE
6312         bool had_max_tr;
6313 #endif
6314         int ret = 0;
6315
6316         mutex_lock(&trace_types_lock);
6317
6318         if (!ring_buffer_expanded) {
6319                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6320                                                 RING_BUFFER_ALL_CPUS);
6321                 if (ret < 0)
6322                         goto out;
6323                 ret = 0;
6324         }
6325
6326         for (t = trace_types; t; t = t->next) {
6327                 if (strcmp(t->name, buf) == 0)
6328                         break;
6329         }
6330         if (!t) {
6331                 ret = -EINVAL;
6332                 goto out;
6333         }
6334         if (t == tr->current_trace)
6335                 goto out;
6336
6337 #ifdef CONFIG_TRACER_SNAPSHOT
6338         if (t->use_max_tr) {
6339                 arch_spin_lock(&tr->max_lock);
6340                 if (tr->cond_snapshot)
6341                         ret = -EBUSY;
6342                 arch_spin_unlock(&tr->max_lock);
6343                 if (ret)
6344                         goto out;
6345         }
6346 #endif
6347         /* Some tracers won't work on kernel command line */
6348         if (system_state < SYSTEM_RUNNING && t->noboot) {
6349                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6350                         t->name);
6351                 goto out;
6352         }
6353
6354         /* Some tracers are only allowed for the top level buffer */
6355         if (!trace_ok_for_array(t, tr)) {
6356                 ret = -EINVAL;
6357                 goto out;
6358         }
6359
6360         /* If trace pipe files are being read, we can't change the tracer */
6361         if (tr->trace_ref) {
6362                 ret = -EBUSY;
6363                 goto out;
6364         }
6365
6366         trace_branch_disable();
6367
6368         tr->current_trace->enabled--;
6369
6370         if (tr->current_trace->reset)
6371                 tr->current_trace->reset(tr);
6372
6373         /* Current trace needs to be nop_trace before synchronize_rcu */
6374         tr->current_trace = &nop_trace;
6375
6376 #ifdef CONFIG_TRACER_MAX_TRACE
6377         had_max_tr = tr->allocated_snapshot;
6378
6379         if (had_max_tr && !t->use_max_tr) {
6380                 /*
6381                  * We need to make sure that the update_max_tr sees that
6382                  * current_trace changed to nop_trace to keep it from
6383                  * swapping the buffers after we resize it.
6384                  * The update_max_tr is called from interrupts disabled
6385                  * so a synchronized_sched() is sufficient.
6386                  */
6387                 synchronize_rcu();
6388                 free_snapshot(tr);
6389         }
6390 #endif
6391
6392 #ifdef CONFIG_TRACER_MAX_TRACE
6393         if (t->use_max_tr && !had_max_tr) {
6394                 ret = tracing_alloc_snapshot_instance(tr);
6395                 if (ret < 0)
6396                         goto out;
6397         }
6398 #endif
6399
6400         if (t->init) {
6401                 ret = tracer_init(t, tr);
6402                 if (ret)
6403                         goto out;
6404         }
6405
6406         tr->current_trace = t;
6407         tr->current_trace->enabled++;
6408         trace_branch_enable(tr);
6409  out:
6410         mutex_unlock(&trace_types_lock);
6411
6412         return ret;
6413 }
6414
6415 static ssize_t
6416 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6417                         size_t cnt, loff_t *ppos)
6418 {
6419         struct trace_array *tr = filp->private_data;
6420         char buf[MAX_TRACER_SIZE+1];
6421         int i;
6422         size_t ret;
6423         int err;
6424
6425         ret = cnt;
6426
6427         if (cnt > MAX_TRACER_SIZE)
6428                 cnt = MAX_TRACER_SIZE;
6429
6430         if (copy_from_user(buf, ubuf, cnt))
6431                 return -EFAULT;
6432
6433         buf[cnt] = 0;
6434
6435         /* strip ending whitespace. */
6436         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6437                 buf[i] = 0;
6438
6439         err = tracing_set_tracer(tr, buf);
6440         if (err)
6441                 return err;
6442
6443         *ppos += ret;
6444
6445         return ret;
6446 }
6447
6448 static ssize_t
6449 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6450                    size_t cnt, loff_t *ppos)
6451 {
6452         char buf[64];
6453         int r;
6454
6455         r = snprintf(buf, sizeof(buf), "%ld\n",
6456                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6457         if (r > sizeof(buf))
6458                 r = sizeof(buf);
6459         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6460 }
6461
6462 static ssize_t
6463 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6464                     size_t cnt, loff_t *ppos)
6465 {
6466         unsigned long val;
6467         int ret;
6468
6469         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6470         if (ret)
6471                 return ret;
6472
6473         *ptr = val * 1000;
6474
6475         return cnt;
6476 }
6477
6478 static ssize_t
6479 tracing_thresh_read(struct file *filp, char __user *ubuf,
6480                     size_t cnt, loff_t *ppos)
6481 {
6482         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6483 }
6484
6485 static ssize_t
6486 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6487                      size_t cnt, loff_t *ppos)
6488 {
6489         struct trace_array *tr = filp->private_data;
6490         int ret;
6491
6492         mutex_lock(&trace_types_lock);
6493         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6494         if (ret < 0)
6495                 goto out;
6496
6497         if (tr->current_trace->update_thresh) {
6498                 ret = tr->current_trace->update_thresh(tr);
6499                 if (ret < 0)
6500                         goto out;
6501         }
6502
6503         ret = cnt;
6504 out:
6505         mutex_unlock(&trace_types_lock);
6506
6507         return ret;
6508 }
6509
6510 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6511
6512 static ssize_t
6513 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6514                      size_t cnt, loff_t *ppos)
6515 {
6516         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6517 }
6518
6519 static ssize_t
6520 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6521                       size_t cnt, loff_t *ppos)
6522 {
6523         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6524 }
6525
6526 #endif
6527
6528 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6529 {
6530         struct trace_array *tr = inode->i_private;
6531         struct trace_iterator *iter;
6532         int ret;
6533
6534         ret = tracing_check_open_get_tr(tr);
6535         if (ret)
6536                 return ret;
6537
6538         mutex_lock(&trace_types_lock);
6539
6540         /* create a buffer to store the information to pass to userspace */
6541         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6542         if (!iter) {
6543                 ret = -ENOMEM;
6544                 __trace_array_put(tr);
6545                 goto out;
6546         }
6547
6548         trace_seq_init(&iter->seq);
6549         iter->trace = tr->current_trace;
6550
6551         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6552                 ret = -ENOMEM;
6553                 goto fail;
6554         }
6555
6556         /* trace pipe does not show start of buffer */
6557         cpumask_setall(iter->started);
6558
6559         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6560                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6561
6562         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6563         if (trace_clocks[tr->clock_id].in_ns)
6564                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6565
6566         iter->tr = tr;
6567         iter->array_buffer = &tr->array_buffer;
6568         iter->cpu_file = tracing_get_cpu(inode);
6569         mutex_init(&iter->mutex);
6570         filp->private_data = iter;
6571
6572         if (iter->trace->pipe_open)
6573                 iter->trace->pipe_open(iter);
6574
6575         nonseekable_open(inode, filp);
6576
6577         tr->trace_ref++;
6578 out:
6579         mutex_unlock(&trace_types_lock);
6580         return ret;
6581
6582 fail:
6583         kfree(iter);
6584         __trace_array_put(tr);
6585         mutex_unlock(&trace_types_lock);
6586         return ret;
6587 }
6588
6589 static int tracing_release_pipe(struct inode *inode, struct file *file)
6590 {
6591         struct trace_iterator *iter = file->private_data;
6592         struct trace_array *tr = inode->i_private;
6593
6594         mutex_lock(&trace_types_lock);
6595
6596         tr->trace_ref--;
6597
6598         if (iter->trace->pipe_close)
6599                 iter->trace->pipe_close(iter);
6600
6601         mutex_unlock(&trace_types_lock);
6602
6603         free_cpumask_var(iter->started);
6604         mutex_destroy(&iter->mutex);
6605         kfree(iter);
6606
6607         trace_array_put(tr);
6608
6609         return 0;
6610 }
6611
6612 static __poll_t
6613 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6614 {
6615         struct trace_array *tr = iter->tr;
6616
6617         /* Iterators are static, they should be filled or empty */
6618         if (trace_buffer_iter(iter, iter->cpu_file))
6619                 return EPOLLIN | EPOLLRDNORM;
6620
6621         if (tr->trace_flags & TRACE_ITER_BLOCK)
6622                 /*
6623                  * Always select as readable when in blocking mode
6624                  */
6625                 return EPOLLIN | EPOLLRDNORM;
6626         else
6627                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6628                                              filp, poll_table);
6629 }
6630
6631 static __poll_t
6632 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6633 {
6634         struct trace_iterator *iter = filp->private_data;
6635
6636         return trace_poll(iter, filp, poll_table);
6637 }
6638
6639 /* Must be called with iter->mutex held. */
6640 static int tracing_wait_pipe(struct file *filp)
6641 {
6642         struct trace_iterator *iter = filp->private_data;
6643         int ret;
6644
6645         while (trace_empty(iter)) {
6646
6647                 if ((filp->f_flags & O_NONBLOCK)) {
6648                         return -EAGAIN;
6649                 }
6650
6651                 /*
6652                  * We block until we read something and tracing is disabled.
6653                  * We still block if tracing is disabled, but we have never
6654                  * read anything. This allows a user to cat this file, and
6655                  * then enable tracing. But after we have read something,
6656                  * we give an EOF when tracing is again disabled.
6657                  *
6658                  * iter->pos will be 0 if we haven't read anything.
6659                  */
6660                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6661                         break;
6662
6663                 mutex_unlock(&iter->mutex);
6664
6665                 ret = wait_on_pipe(iter, 0);
6666
6667                 mutex_lock(&iter->mutex);
6668
6669                 if (ret)
6670                         return ret;
6671         }
6672
6673         return 1;
6674 }
6675
6676 /*
6677  * Consumer reader.
6678  */
6679 static ssize_t
6680 tracing_read_pipe(struct file *filp, char __user *ubuf,
6681                   size_t cnt, loff_t *ppos)
6682 {
6683         struct trace_iterator *iter = filp->private_data;
6684         ssize_t sret;
6685
6686         /*
6687          * Avoid more than one consumer on a single file descriptor
6688          * This is just a matter of traces coherency, the ring buffer itself
6689          * is protected.
6690          */
6691         mutex_lock(&iter->mutex);
6692
6693         /* return any leftover data */
6694         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6695         if (sret != -EBUSY)
6696                 goto out;
6697
6698         trace_seq_init(&iter->seq);
6699
6700         if (iter->trace->read) {
6701                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6702                 if (sret)
6703                         goto out;
6704         }
6705
6706 waitagain:
6707         sret = tracing_wait_pipe(filp);
6708         if (sret <= 0)
6709                 goto out;
6710
6711         /* stop when tracing is finished */
6712         if (trace_empty(iter)) {
6713                 sret = 0;
6714                 goto out;
6715         }
6716
6717         if (cnt >= PAGE_SIZE)
6718                 cnt = PAGE_SIZE - 1;
6719
6720         /* reset all but tr, trace, and overruns */
6721         memset(&iter->seq, 0,
6722                sizeof(struct trace_iterator) -
6723                offsetof(struct trace_iterator, seq));
6724         cpumask_clear(iter->started);
6725         trace_seq_init(&iter->seq);
6726         iter->pos = -1;
6727
6728         trace_event_read_lock();
6729         trace_access_lock(iter->cpu_file);
6730         while (trace_find_next_entry_inc(iter) != NULL) {
6731                 enum print_line_t ret;
6732                 int save_len = iter->seq.seq.len;
6733
6734                 ret = print_trace_line(iter);
6735                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6736                         /* don't print partial lines */
6737                         iter->seq.seq.len = save_len;
6738                         break;
6739                 }
6740                 if (ret != TRACE_TYPE_NO_CONSUME)
6741                         trace_consume(iter);
6742
6743                 if (trace_seq_used(&iter->seq) >= cnt)
6744                         break;
6745
6746                 /*
6747                  * Setting the full flag means we reached the trace_seq buffer
6748                  * size and we should leave by partial output condition above.
6749                  * One of the trace_seq_* functions is not used properly.
6750                  */
6751                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6752                           iter->ent->type);
6753         }
6754         trace_access_unlock(iter->cpu_file);
6755         trace_event_read_unlock();
6756
6757         /* Now copy what we have to the user */
6758         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6759         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6760                 trace_seq_init(&iter->seq);
6761
6762         /*
6763          * If there was nothing to send to user, in spite of consuming trace
6764          * entries, go back to wait for more entries.
6765          */
6766         if (sret == -EBUSY)
6767                 goto waitagain;
6768
6769 out:
6770         mutex_unlock(&iter->mutex);
6771
6772         return sret;
6773 }
6774
6775 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6776                                      unsigned int idx)
6777 {
6778         __free_page(spd->pages[idx]);
6779 }
6780
6781 static size_t
6782 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6783 {
6784         size_t count;
6785         int save_len;
6786         int ret;
6787
6788         /* Seq buffer is page-sized, exactly what we need. */
6789         for (;;) {
6790                 save_len = iter->seq.seq.len;
6791                 ret = print_trace_line(iter);
6792
6793                 if (trace_seq_has_overflowed(&iter->seq)) {
6794                         iter->seq.seq.len = save_len;
6795                         break;
6796                 }
6797
6798                 /*
6799                  * This should not be hit, because it should only
6800                  * be set if the iter->seq overflowed. But check it
6801                  * anyway to be safe.
6802                  */
6803                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6804                         iter->seq.seq.len = save_len;
6805                         break;
6806                 }
6807
6808                 count = trace_seq_used(&iter->seq) - save_len;
6809                 if (rem < count) {
6810                         rem = 0;
6811                         iter->seq.seq.len = save_len;
6812                         break;
6813                 }
6814
6815                 if (ret != TRACE_TYPE_NO_CONSUME)
6816                         trace_consume(iter);
6817                 rem -= count;
6818                 if (!trace_find_next_entry_inc(iter))   {
6819                         rem = 0;
6820                         iter->ent = NULL;
6821                         break;
6822                 }
6823         }
6824
6825         return rem;
6826 }
6827
6828 static ssize_t tracing_splice_read_pipe(struct file *filp,
6829                                         loff_t *ppos,
6830                                         struct pipe_inode_info *pipe,
6831                                         size_t len,
6832                                         unsigned int flags)
6833 {
6834         struct page *pages_def[PIPE_DEF_BUFFERS];
6835         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6836         struct trace_iterator *iter = filp->private_data;
6837         struct splice_pipe_desc spd = {
6838                 .pages          = pages_def,
6839                 .partial        = partial_def,
6840                 .nr_pages       = 0, /* This gets updated below. */
6841                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6842                 .ops            = &default_pipe_buf_ops,
6843                 .spd_release    = tracing_spd_release_pipe,
6844         };
6845         ssize_t ret;
6846         size_t rem;
6847         unsigned int i;
6848
6849         if (splice_grow_spd(pipe, &spd))
6850                 return -ENOMEM;
6851
6852         mutex_lock(&iter->mutex);
6853
6854         if (iter->trace->splice_read) {
6855                 ret = iter->trace->splice_read(iter, filp,
6856                                                ppos, pipe, len, flags);
6857                 if (ret)
6858                         goto out_err;
6859         }
6860
6861         ret = tracing_wait_pipe(filp);
6862         if (ret <= 0)
6863                 goto out_err;
6864
6865         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6866                 ret = -EFAULT;
6867                 goto out_err;
6868         }
6869
6870         trace_event_read_lock();
6871         trace_access_lock(iter->cpu_file);
6872
6873         /* Fill as many pages as possible. */
6874         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6875                 spd.pages[i] = alloc_page(GFP_KERNEL);
6876                 if (!spd.pages[i])
6877                         break;
6878
6879                 rem = tracing_fill_pipe_page(rem, iter);
6880
6881                 /* Copy the data into the page, so we can start over. */
6882                 ret = trace_seq_to_buffer(&iter->seq,
6883                                           page_address(spd.pages[i]),
6884                                           trace_seq_used(&iter->seq));
6885                 if (ret < 0) {
6886                         __free_page(spd.pages[i]);
6887                         break;
6888                 }
6889                 spd.partial[i].offset = 0;
6890                 spd.partial[i].len = trace_seq_used(&iter->seq);
6891
6892                 trace_seq_init(&iter->seq);
6893         }
6894
6895         trace_access_unlock(iter->cpu_file);
6896         trace_event_read_unlock();
6897         mutex_unlock(&iter->mutex);
6898
6899         spd.nr_pages = i;
6900
6901         if (i)
6902                 ret = splice_to_pipe(pipe, &spd);
6903         else
6904                 ret = 0;
6905 out:
6906         splice_shrink_spd(&spd);
6907         return ret;
6908
6909 out_err:
6910         mutex_unlock(&iter->mutex);
6911         goto out;
6912 }
6913
6914 static ssize_t
6915 tracing_entries_read(struct file *filp, char __user *ubuf,
6916                      size_t cnt, loff_t *ppos)
6917 {
6918         struct inode *inode = file_inode(filp);
6919         struct trace_array *tr = inode->i_private;
6920         int cpu = tracing_get_cpu(inode);
6921         char buf[64];
6922         int r = 0;
6923         ssize_t ret;
6924
6925         mutex_lock(&trace_types_lock);
6926
6927         if (cpu == RING_BUFFER_ALL_CPUS) {
6928                 int cpu, buf_size_same;
6929                 unsigned long size;
6930
6931                 size = 0;
6932                 buf_size_same = 1;
6933                 /* check if all cpu sizes are same */
6934                 for_each_tracing_cpu(cpu) {
6935                         /* fill in the size from first enabled cpu */
6936                         if (size == 0)
6937                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6938                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6939                                 buf_size_same = 0;
6940                                 break;
6941                         }
6942                 }
6943
6944                 if (buf_size_same) {
6945                         if (!ring_buffer_expanded)
6946                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6947                                             size >> 10,
6948                                             trace_buf_size >> 10);
6949                         else
6950                                 r = sprintf(buf, "%lu\n", size >> 10);
6951                 } else
6952                         r = sprintf(buf, "X\n");
6953         } else
6954                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6955
6956         mutex_unlock(&trace_types_lock);
6957
6958         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6959         return ret;
6960 }
6961
6962 static ssize_t
6963 tracing_entries_write(struct file *filp, const char __user *ubuf,
6964                       size_t cnt, loff_t *ppos)
6965 {
6966         struct inode *inode = file_inode(filp);
6967         struct trace_array *tr = inode->i_private;
6968         unsigned long val;
6969         int ret;
6970
6971         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6972         if (ret)
6973                 return ret;
6974
6975         /* must have at least 1 entry */
6976         if (!val)
6977                 return -EINVAL;
6978
6979         /* value is in KB */
6980         val <<= 10;
6981         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6982         if (ret < 0)
6983                 return ret;
6984
6985         *ppos += cnt;
6986
6987         return cnt;
6988 }
6989
6990 static ssize_t
6991 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6992                                 size_t cnt, loff_t *ppos)
6993 {
6994         struct trace_array *tr = filp->private_data;
6995         char buf[64];
6996         int r, cpu;
6997         unsigned long size = 0, expanded_size = 0;
6998
6999         mutex_lock(&trace_types_lock);
7000         for_each_tracing_cpu(cpu) {
7001                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7002                 if (!ring_buffer_expanded)
7003                         expanded_size += trace_buf_size >> 10;
7004         }
7005         if (ring_buffer_expanded)
7006                 r = sprintf(buf, "%lu\n", size);
7007         else
7008                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7009         mutex_unlock(&trace_types_lock);
7010
7011         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7012 }
7013
7014 static ssize_t
7015 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7016                           size_t cnt, loff_t *ppos)
7017 {
7018         /*
7019          * There is no need to read what the user has written, this function
7020          * is just to make sure that there is no error when "echo" is used
7021          */
7022
7023         *ppos += cnt;
7024
7025         return cnt;
7026 }
7027
7028 static int
7029 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7030 {
7031         struct trace_array *tr = inode->i_private;
7032
7033         /* disable tracing ? */
7034         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7035                 tracer_tracing_off(tr);
7036         /* resize the ring buffer to 0 */
7037         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7038
7039         trace_array_put(tr);
7040
7041         return 0;
7042 }
7043
7044 static ssize_t
7045 tracing_mark_write(struct file *filp, const char __user *ubuf,
7046                                         size_t cnt, loff_t *fpos)
7047 {
7048         struct trace_array *tr = filp->private_data;
7049         struct ring_buffer_event *event;
7050         enum event_trigger_type tt = ETT_NONE;
7051         struct trace_buffer *buffer;
7052         struct print_entry *entry;
7053         ssize_t written;
7054         int size;
7055         int len;
7056
7057 /* Used in tracing_mark_raw_write() as well */
7058 #define FAULTED_STR "<faulted>"
7059 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7060
7061         if (tracing_disabled)
7062                 return -EINVAL;
7063
7064         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7065                 return -EINVAL;
7066
7067         if (cnt > TRACE_BUF_SIZE)
7068                 cnt = TRACE_BUF_SIZE;
7069
7070         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7071
7072         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7073
7074         /* If less than "<faulted>", then make sure we can still add that */
7075         if (cnt < FAULTED_SIZE)
7076                 size += FAULTED_SIZE - cnt;
7077
7078         buffer = tr->array_buffer.buffer;
7079         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7080                                             tracing_gen_ctx());
7081         if (unlikely(!event))
7082                 /* Ring buffer disabled, return as if not open for write */
7083                 return -EBADF;
7084
7085         entry = ring_buffer_event_data(event);
7086         entry->ip = _THIS_IP_;
7087
7088         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7089         if (len) {
7090                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7091                 cnt = FAULTED_SIZE;
7092                 written = -EFAULT;
7093         } else
7094                 written = cnt;
7095
7096         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7097                 /* do not add \n before testing triggers, but add \0 */
7098                 entry->buf[cnt] = '\0';
7099                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7100         }
7101
7102         if (entry->buf[cnt - 1] != '\n') {
7103                 entry->buf[cnt] = '\n';
7104                 entry->buf[cnt + 1] = '\0';
7105         } else
7106                 entry->buf[cnt] = '\0';
7107
7108         if (static_branch_unlikely(&trace_marker_exports_enabled))
7109                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7110         __buffer_unlock_commit(buffer, event);
7111
7112         if (tt)
7113                 event_triggers_post_call(tr->trace_marker_file, tt);
7114
7115         if (written > 0)
7116                 *fpos += written;
7117
7118         return written;
7119 }
7120
7121 /* Limit it for now to 3K (including tag) */
7122 #define RAW_DATA_MAX_SIZE (1024*3)
7123
7124 static ssize_t
7125 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7126                                         size_t cnt, loff_t *fpos)
7127 {
7128         struct trace_array *tr = filp->private_data;
7129         struct ring_buffer_event *event;
7130         struct trace_buffer *buffer;
7131         struct raw_data_entry *entry;
7132         ssize_t written;
7133         int size;
7134         int len;
7135
7136 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7137
7138         if (tracing_disabled)
7139                 return -EINVAL;
7140
7141         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7142                 return -EINVAL;
7143
7144         /* The marker must at least have a tag id */
7145         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7146                 return -EINVAL;
7147
7148         if (cnt > TRACE_BUF_SIZE)
7149                 cnt = TRACE_BUF_SIZE;
7150
7151         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7152
7153         size = sizeof(*entry) + cnt;
7154         if (cnt < FAULT_SIZE_ID)
7155                 size += FAULT_SIZE_ID - cnt;
7156
7157         buffer = tr->array_buffer.buffer;
7158         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7159                                             tracing_gen_ctx());
7160         if (!event)
7161                 /* Ring buffer disabled, return as if not open for write */
7162                 return -EBADF;
7163
7164         entry = ring_buffer_event_data(event);
7165
7166         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7167         if (len) {
7168                 entry->id = -1;
7169                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7170                 written = -EFAULT;
7171         } else
7172                 written = cnt;
7173
7174         __buffer_unlock_commit(buffer, event);
7175
7176         if (written > 0)
7177                 *fpos += written;
7178
7179         return written;
7180 }
7181
7182 static int tracing_clock_show(struct seq_file *m, void *v)
7183 {
7184         struct trace_array *tr = m->private;
7185         int i;
7186
7187         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7188                 seq_printf(m,
7189                         "%s%s%s%s", i ? " " : "",
7190                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7191                         i == tr->clock_id ? "]" : "");
7192         seq_putc(m, '\n');
7193
7194         return 0;
7195 }
7196
7197 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7198 {
7199         int i;
7200
7201         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7202                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7203                         break;
7204         }
7205         if (i == ARRAY_SIZE(trace_clocks))
7206                 return -EINVAL;
7207
7208         mutex_lock(&trace_types_lock);
7209
7210         tr->clock_id = i;
7211
7212         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7213
7214         /*
7215          * New clock may not be consistent with the previous clock.
7216          * Reset the buffer so that it doesn't have incomparable timestamps.
7217          */
7218         tracing_reset_online_cpus(&tr->array_buffer);
7219
7220 #ifdef CONFIG_TRACER_MAX_TRACE
7221         if (tr->max_buffer.buffer)
7222                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7223         tracing_reset_online_cpus(&tr->max_buffer);
7224 #endif
7225
7226         mutex_unlock(&trace_types_lock);
7227
7228         return 0;
7229 }
7230
7231 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7232                                    size_t cnt, loff_t *fpos)
7233 {
7234         struct seq_file *m = filp->private_data;
7235         struct trace_array *tr = m->private;
7236         char buf[64];
7237         const char *clockstr;
7238         int ret;
7239
7240         if (cnt >= sizeof(buf))
7241                 return -EINVAL;
7242
7243         if (copy_from_user(buf, ubuf, cnt))
7244                 return -EFAULT;
7245
7246         buf[cnt] = 0;
7247
7248         clockstr = strstrip(buf);
7249
7250         ret = tracing_set_clock(tr, clockstr);
7251         if (ret)
7252                 return ret;
7253
7254         *fpos += cnt;
7255
7256         return cnt;
7257 }
7258
7259 static int tracing_clock_open(struct inode *inode, struct file *file)
7260 {
7261         struct trace_array *tr = inode->i_private;
7262         int ret;
7263
7264         ret = tracing_check_open_get_tr(tr);
7265         if (ret)
7266                 return ret;
7267
7268         ret = single_open(file, tracing_clock_show, inode->i_private);
7269         if (ret < 0)
7270                 trace_array_put(tr);
7271
7272         return ret;
7273 }
7274
7275 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7276 {
7277         struct trace_array *tr = m->private;
7278
7279         mutex_lock(&trace_types_lock);
7280
7281         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7282                 seq_puts(m, "delta [absolute]\n");
7283         else
7284                 seq_puts(m, "[delta] absolute\n");
7285
7286         mutex_unlock(&trace_types_lock);
7287
7288         return 0;
7289 }
7290
7291 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7292 {
7293         struct trace_array *tr = inode->i_private;
7294         int ret;
7295
7296         ret = tracing_check_open_get_tr(tr);
7297         if (ret)
7298                 return ret;
7299
7300         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7301         if (ret < 0)
7302                 trace_array_put(tr);
7303
7304         return ret;
7305 }
7306
7307 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7308 {
7309         if (rbe == this_cpu_read(trace_buffered_event))
7310                 return ring_buffer_time_stamp(buffer);
7311
7312         return ring_buffer_event_time_stamp(buffer, rbe);
7313 }
7314
7315 /*
7316  * Set or disable using the per CPU trace_buffer_event when possible.
7317  */
7318 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7319 {
7320         int ret = 0;
7321
7322         mutex_lock(&trace_types_lock);
7323
7324         if (set && tr->no_filter_buffering_ref++)
7325                 goto out;
7326
7327         if (!set) {
7328                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7329                         ret = -EINVAL;
7330                         goto out;
7331                 }
7332
7333                 --tr->no_filter_buffering_ref;
7334         }
7335  out:
7336         mutex_unlock(&trace_types_lock);
7337
7338         return ret;
7339 }
7340
7341 struct ftrace_buffer_info {
7342         struct trace_iterator   iter;
7343         void                    *spare;
7344         unsigned int            spare_cpu;
7345         unsigned int            read;
7346 };
7347
7348 #ifdef CONFIG_TRACER_SNAPSHOT
7349 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7350 {
7351         struct trace_array *tr = inode->i_private;
7352         struct trace_iterator *iter;
7353         struct seq_file *m;
7354         int ret;
7355
7356         ret = tracing_check_open_get_tr(tr);
7357         if (ret)
7358                 return ret;
7359
7360         if (file->f_mode & FMODE_READ) {
7361                 iter = __tracing_open(inode, file, true);
7362                 if (IS_ERR(iter))
7363                         ret = PTR_ERR(iter);
7364         } else {
7365                 /* Writes still need the seq_file to hold the private data */
7366                 ret = -ENOMEM;
7367                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7368                 if (!m)
7369                         goto out;
7370                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7371                 if (!iter) {
7372                         kfree(m);
7373                         goto out;
7374                 }
7375                 ret = 0;
7376
7377                 iter->tr = tr;
7378                 iter->array_buffer = &tr->max_buffer;
7379                 iter->cpu_file = tracing_get_cpu(inode);
7380                 m->private = iter;
7381                 file->private_data = m;
7382         }
7383 out:
7384         if (ret < 0)
7385                 trace_array_put(tr);
7386
7387         return ret;
7388 }
7389
7390 static ssize_t
7391 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7392                        loff_t *ppos)
7393 {
7394         struct seq_file *m = filp->private_data;
7395         struct trace_iterator *iter = m->private;
7396         struct trace_array *tr = iter->tr;
7397         unsigned long val;
7398         int ret;
7399
7400         ret = tracing_update_buffers();
7401         if (ret < 0)
7402                 return ret;
7403
7404         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7405         if (ret)
7406                 return ret;
7407
7408         mutex_lock(&trace_types_lock);
7409
7410         if (tr->current_trace->use_max_tr) {
7411                 ret = -EBUSY;
7412                 goto out;
7413         }
7414
7415         arch_spin_lock(&tr->max_lock);
7416         if (tr->cond_snapshot)
7417                 ret = -EBUSY;
7418         arch_spin_unlock(&tr->max_lock);
7419         if (ret)
7420                 goto out;
7421
7422         switch (val) {
7423         case 0:
7424                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7425                         ret = -EINVAL;
7426                         break;
7427                 }
7428                 if (tr->allocated_snapshot)
7429                         free_snapshot(tr);
7430                 break;
7431         case 1:
7432 /* Only allow per-cpu swap if the ring buffer supports it */
7433 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7434                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7435                         ret = -EINVAL;
7436                         break;
7437                 }
7438 #endif
7439                 if (tr->allocated_snapshot)
7440                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7441                                         &tr->array_buffer, iter->cpu_file);
7442                 else
7443                         ret = tracing_alloc_snapshot_instance(tr);
7444                 if (ret < 0)
7445                         break;
7446                 local_irq_disable();
7447                 /* Now, we're going to swap */
7448                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7449                         update_max_tr(tr, current, smp_processor_id(), NULL);
7450                 else
7451                         update_max_tr_single(tr, current, iter->cpu_file);
7452                 local_irq_enable();
7453                 break;
7454         default:
7455                 if (tr->allocated_snapshot) {
7456                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7457                                 tracing_reset_online_cpus(&tr->max_buffer);
7458                         else
7459                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7460                 }
7461                 break;
7462         }
7463
7464         if (ret >= 0) {
7465                 *ppos += cnt;
7466                 ret = cnt;
7467         }
7468 out:
7469         mutex_unlock(&trace_types_lock);
7470         return ret;
7471 }
7472
7473 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7474 {
7475         struct seq_file *m = file->private_data;
7476         int ret;
7477
7478         ret = tracing_release(inode, file);
7479
7480         if (file->f_mode & FMODE_READ)
7481                 return ret;
7482
7483         /* If write only, the seq_file is just a stub */
7484         if (m)
7485                 kfree(m->private);
7486         kfree(m);
7487
7488         return 0;
7489 }
7490
7491 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7492 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7493                                     size_t count, loff_t *ppos);
7494 static int tracing_buffers_release(struct inode *inode, struct file *file);
7495 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7496                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7497
7498 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7499 {
7500         struct ftrace_buffer_info *info;
7501         int ret;
7502
7503         /* The following checks for tracefs lockdown */
7504         ret = tracing_buffers_open(inode, filp);
7505         if (ret < 0)
7506                 return ret;
7507
7508         info = filp->private_data;
7509
7510         if (info->iter.trace->use_max_tr) {
7511                 tracing_buffers_release(inode, filp);
7512                 return -EBUSY;
7513         }
7514
7515         info->iter.snapshot = true;
7516         info->iter.array_buffer = &info->iter.tr->max_buffer;
7517
7518         return ret;
7519 }
7520
7521 #endif /* CONFIG_TRACER_SNAPSHOT */
7522
7523
7524 static const struct file_operations tracing_thresh_fops = {
7525         .open           = tracing_open_generic,
7526         .read           = tracing_thresh_read,
7527         .write          = tracing_thresh_write,
7528         .llseek         = generic_file_llseek,
7529 };
7530
7531 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7532 static const struct file_operations tracing_max_lat_fops = {
7533         .open           = tracing_open_generic,
7534         .read           = tracing_max_lat_read,
7535         .write          = tracing_max_lat_write,
7536         .llseek         = generic_file_llseek,
7537 };
7538 #endif
7539
7540 static const struct file_operations set_tracer_fops = {
7541         .open           = tracing_open_generic,
7542         .read           = tracing_set_trace_read,
7543         .write          = tracing_set_trace_write,
7544         .llseek         = generic_file_llseek,
7545 };
7546
7547 static const struct file_operations tracing_pipe_fops = {
7548         .open           = tracing_open_pipe,
7549         .poll           = tracing_poll_pipe,
7550         .read           = tracing_read_pipe,
7551         .splice_read    = tracing_splice_read_pipe,
7552         .release        = tracing_release_pipe,
7553         .llseek         = no_llseek,
7554 };
7555
7556 static const struct file_operations tracing_entries_fops = {
7557         .open           = tracing_open_generic_tr,
7558         .read           = tracing_entries_read,
7559         .write          = tracing_entries_write,
7560         .llseek         = generic_file_llseek,
7561         .release        = tracing_release_generic_tr,
7562 };
7563
7564 static const struct file_operations tracing_total_entries_fops = {
7565         .open           = tracing_open_generic_tr,
7566         .read           = tracing_total_entries_read,
7567         .llseek         = generic_file_llseek,
7568         .release        = tracing_release_generic_tr,
7569 };
7570
7571 static const struct file_operations tracing_free_buffer_fops = {
7572         .open           = tracing_open_generic_tr,
7573         .write          = tracing_free_buffer_write,
7574         .release        = tracing_free_buffer_release,
7575 };
7576
7577 static const struct file_operations tracing_mark_fops = {
7578         .open           = tracing_open_generic_tr,
7579         .write          = tracing_mark_write,
7580         .llseek         = generic_file_llseek,
7581         .release        = tracing_release_generic_tr,
7582 };
7583
7584 static const struct file_operations tracing_mark_raw_fops = {
7585         .open           = tracing_open_generic_tr,
7586         .write          = tracing_mark_raw_write,
7587         .llseek         = generic_file_llseek,
7588         .release        = tracing_release_generic_tr,
7589 };
7590
7591 static const struct file_operations trace_clock_fops = {
7592         .open           = tracing_clock_open,
7593         .read           = seq_read,
7594         .llseek         = seq_lseek,
7595         .release        = tracing_single_release_tr,
7596         .write          = tracing_clock_write,
7597 };
7598
7599 static const struct file_operations trace_time_stamp_mode_fops = {
7600         .open           = tracing_time_stamp_mode_open,
7601         .read           = seq_read,
7602         .llseek         = seq_lseek,
7603         .release        = tracing_single_release_tr,
7604 };
7605
7606 #ifdef CONFIG_TRACER_SNAPSHOT
7607 static const struct file_operations snapshot_fops = {
7608         .open           = tracing_snapshot_open,
7609         .read           = seq_read,
7610         .write          = tracing_snapshot_write,
7611         .llseek         = tracing_lseek,
7612         .release        = tracing_snapshot_release,
7613 };
7614
7615 static const struct file_operations snapshot_raw_fops = {
7616         .open           = snapshot_raw_open,
7617         .read           = tracing_buffers_read,
7618         .release        = tracing_buffers_release,
7619         .splice_read    = tracing_buffers_splice_read,
7620         .llseek         = no_llseek,
7621 };
7622
7623 #endif /* CONFIG_TRACER_SNAPSHOT */
7624
7625 /*
7626  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7627  * @filp: The active open file structure
7628  * @ubuf: The userspace provided buffer to read value into
7629  * @cnt: The maximum number of bytes to read
7630  * @ppos: The current "file" position
7631  *
7632  * This function implements the write interface for a struct trace_min_max_param.
7633  * The filp->private_data must point to a trace_min_max_param structure that
7634  * defines where to write the value, the min and the max acceptable values,
7635  * and a lock to protect the write.
7636  */
7637 static ssize_t
7638 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7639 {
7640         struct trace_min_max_param *param = filp->private_data;
7641         u64 val;
7642         int err;
7643
7644         if (!param)
7645                 return -EFAULT;
7646
7647         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7648         if (err)
7649                 return err;
7650
7651         if (param->lock)
7652                 mutex_lock(param->lock);
7653
7654         if (param->min && val < *param->min)
7655                 err = -EINVAL;
7656
7657         if (param->max && val > *param->max)
7658                 err = -EINVAL;
7659
7660         if (!err)
7661                 *param->val = val;
7662
7663         if (param->lock)
7664                 mutex_unlock(param->lock);
7665
7666         if (err)
7667                 return err;
7668
7669         return cnt;
7670 }
7671
7672 /*
7673  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7674  * @filp: The active open file structure
7675  * @ubuf: The userspace provided buffer to read value into
7676  * @cnt: The maximum number of bytes to read
7677  * @ppos: The current "file" position
7678  *
7679  * This function implements the read interface for a struct trace_min_max_param.
7680  * The filp->private_data must point to a trace_min_max_param struct with valid
7681  * data.
7682  */
7683 static ssize_t
7684 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7685 {
7686         struct trace_min_max_param *param = filp->private_data;
7687         char buf[U64_STR_SIZE];
7688         int len;
7689         u64 val;
7690
7691         if (!param)
7692                 return -EFAULT;
7693
7694         val = *param->val;
7695
7696         if (cnt > sizeof(buf))
7697                 cnt = sizeof(buf);
7698
7699         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7700
7701         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7702 }
7703
7704 const struct file_operations trace_min_max_fops = {
7705         .open           = tracing_open_generic,
7706         .read           = trace_min_max_read,
7707         .write          = trace_min_max_write,
7708 };
7709
7710 #define TRACING_LOG_ERRS_MAX    8
7711 #define TRACING_LOG_LOC_MAX     128
7712
7713 #define CMD_PREFIX "  Command: "
7714
7715 struct err_info {
7716         const char      **errs; /* ptr to loc-specific array of err strings */
7717         u8              type;   /* index into errs -> specific err string */
7718         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7719         u64             ts;
7720 };
7721
7722 struct tracing_log_err {
7723         struct list_head        list;
7724         struct err_info         info;
7725         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7726         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7727 };
7728
7729 static DEFINE_MUTEX(tracing_err_log_lock);
7730
7731 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7732 {
7733         struct tracing_log_err *err;
7734
7735         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7736                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7737                 if (!err)
7738                         err = ERR_PTR(-ENOMEM);
7739                 tr->n_err_log_entries++;
7740
7741                 return err;
7742         }
7743
7744         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7745         list_del(&err->list);
7746
7747         return err;
7748 }
7749
7750 /**
7751  * err_pos - find the position of a string within a command for error careting
7752  * @cmd: The tracing command that caused the error
7753  * @str: The string to position the caret at within @cmd
7754  *
7755  * Finds the position of the first occurrence of @str within @cmd.  The
7756  * return value can be passed to tracing_log_err() for caret placement
7757  * within @cmd.
7758  *
7759  * Returns the index within @cmd of the first occurrence of @str or 0
7760  * if @str was not found.
7761  */
7762 unsigned int err_pos(char *cmd, const char *str)
7763 {
7764         char *found;
7765
7766         if (WARN_ON(!strlen(cmd)))
7767                 return 0;
7768
7769         found = strstr(cmd, str);
7770         if (found)
7771                 return found - cmd;
7772
7773         return 0;
7774 }
7775
7776 /**
7777  * tracing_log_err - write an error to the tracing error log
7778  * @tr: The associated trace array for the error (NULL for top level array)
7779  * @loc: A string describing where the error occurred
7780  * @cmd: The tracing command that caused the error
7781  * @errs: The array of loc-specific static error strings
7782  * @type: The index into errs[], which produces the specific static err string
7783  * @pos: The position the caret should be placed in the cmd
7784  *
7785  * Writes an error into tracing/error_log of the form:
7786  *
7787  * <loc>: error: <text>
7788  *   Command: <cmd>
7789  *              ^
7790  *
7791  * tracing/error_log is a small log file containing the last
7792  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7793  * unless there has been a tracing error, and the error log can be
7794  * cleared and have its memory freed by writing the empty string in
7795  * truncation mode to it i.e. echo > tracing/error_log.
7796  *
7797  * NOTE: the @errs array along with the @type param are used to
7798  * produce a static error string - this string is not copied and saved
7799  * when the error is logged - only a pointer to it is saved.  See
7800  * existing callers for examples of how static strings are typically
7801  * defined for use with tracing_log_err().
7802  */
7803 void tracing_log_err(struct trace_array *tr,
7804                      const char *loc, const char *cmd,
7805                      const char **errs, u8 type, u8 pos)
7806 {
7807         struct tracing_log_err *err;
7808
7809         if (!tr)
7810                 tr = &global_trace;
7811
7812         mutex_lock(&tracing_err_log_lock);
7813         err = get_tracing_log_err(tr);
7814         if (PTR_ERR(err) == -ENOMEM) {
7815                 mutex_unlock(&tracing_err_log_lock);
7816                 return;
7817         }
7818
7819         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7820         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7821
7822         err->info.errs = errs;
7823         err->info.type = type;
7824         err->info.pos = pos;
7825         err->info.ts = local_clock();
7826
7827         list_add_tail(&err->list, &tr->err_log);
7828         mutex_unlock(&tracing_err_log_lock);
7829 }
7830
7831 static void clear_tracing_err_log(struct trace_array *tr)
7832 {
7833         struct tracing_log_err *err, *next;
7834
7835         mutex_lock(&tracing_err_log_lock);
7836         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7837                 list_del(&err->list);
7838                 kfree(err);
7839         }
7840
7841         tr->n_err_log_entries = 0;
7842         mutex_unlock(&tracing_err_log_lock);
7843 }
7844
7845 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7846 {
7847         struct trace_array *tr = m->private;
7848
7849         mutex_lock(&tracing_err_log_lock);
7850
7851         return seq_list_start(&tr->err_log, *pos);
7852 }
7853
7854 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7855 {
7856         struct trace_array *tr = m->private;
7857
7858         return seq_list_next(v, &tr->err_log, pos);
7859 }
7860
7861 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7862 {
7863         mutex_unlock(&tracing_err_log_lock);
7864 }
7865
7866 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7867 {
7868         u8 i;
7869
7870         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7871                 seq_putc(m, ' ');
7872         for (i = 0; i < pos; i++)
7873                 seq_putc(m, ' ');
7874         seq_puts(m, "^\n");
7875 }
7876
7877 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7878 {
7879         struct tracing_log_err *err = v;
7880
7881         if (err) {
7882                 const char *err_text = err->info.errs[err->info.type];
7883                 u64 sec = err->info.ts;
7884                 u32 nsec;
7885
7886                 nsec = do_div(sec, NSEC_PER_SEC);
7887                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7888                            err->loc, err_text);
7889                 seq_printf(m, "%s", err->cmd);
7890                 tracing_err_log_show_pos(m, err->info.pos);
7891         }
7892
7893         return 0;
7894 }
7895
7896 static const struct seq_operations tracing_err_log_seq_ops = {
7897         .start  = tracing_err_log_seq_start,
7898         .next   = tracing_err_log_seq_next,
7899         .stop   = tracing_err_log_seq_stop,
7900         .show   = tracing_err_log_seq_show
7901 };
7902
7903 static int tracing_err_log_open(struct inode *inode, struct file *file)
7904 {
7905         struct trace_array *tr = inode->i_private;
7906         int ret = 0;
7907
7908         ret = tracing_check_open_get_tr(tr);
7909         if (ret)
7910                 return ret;
7911
7912         /* If this file was opened for write, then erase contents */
7913         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7914                 clear_tracing_err_log(tr);
7915
7916         if (file->f_mode & FMODE_READ) {
7917                 ret = seq_open(file, &tracing_err_log_seq_ops);
7918                 if (!ret) {
7919                         struct seq_file *m = file->private_data;
7920                         m->private = tr;
7921                 } else {
7922                         trace_array_put(tr);
7923                 }
7924         }
7925         return ret;
7926 }
7927
7928 static ssize_t tracing_err_log_write(struct file *file,
7929                                      const char __user *buffer,
7930                                      size_t count, loff_t *ppos)
7931 {
7932         return count;
7933 }
7934
7935 static int tracing_err_log_release(struct inode *inode, struct file *file)
7936 {
7937         struct trace_array *tr = inode->i_private;
7938
7939         trace_array_put(tr);
7940
7941         if (file->f_mode & FMODE_READ)
7942                 seq_release(inode, file);
7943
7944         return 0;
7945 }
7946
7947 static const struct file_operations tracing_err_log_fops = {
7948         .open           = tracing_err_log_open,
7949         .write          = tracing_err_log_write,
7950         .read           = seq_read,
7951         .llseek         = seq_lseek,
7952         .release        = tracing_err_log_release,
7953 };
7954
7955 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7956 {
7957         struct trace_array *tr = inode->i_private;
7958         struct ftrace_buffer_info *info;
7959         int ret;
7960
7961         ret = tracing_check_open_get_tr(tr);
7962         if (ret)
7963                 return ret;
7964
7965         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7966         if (!info) {
7967                 trace_array_put(tr);
7968                 return -ENOMEM;
7969         }
7970
7971         mutex_lock(&trace_types_lock);
7972
7973         info->iter.tr           = tr;
7974         info->iter.cpu_file     = tracing_get_cpu(inode);
7975         info->iter.trace        = tr->current_trace;
7976         info->iter.array_buffer = &tr->array_buffer;
7977         info->spare             = NULL;
7978         /* Force reading ring buffer for first read */
7979         info->read              = (unsigned int)-1;
7980
7981         filp->private_data = info;
7982
7983         tr->trace_ref++;
7984
7985         mutex_unlock(&trace_types_lock);
7986
7987         ret = nonseekable_open(inode, filp);
7988         if (ret < 0)
7989                 trace_array_put(tr);
7990
7991         return ret;
7992 }
7993
7994 static __poll_t
7995 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7996 {
7997         struct ftrace_buffer_info *info = filp->private_data;
7998         struct trace_iterator *iter = &info->iter;
7999
8000         return trace_poll(iter, filp, poll_table);
8001 }
8002
8003 static ssize_t
8004 tracing_buffers_read(struct file *filp, char __user *ubuf,
8005                      size_t count, loff_t *ppos)
8006 {
8007         struct ftrace_buffer_info *info = filp->private_data;
8008         struct trace_iterator *iter = &info->iter;
8009         ssize_t ret = 0;
8010         ssize_t size;
8011
8012         if (!count)
8013                 return 0;
8014
8015 #ifdef CONFIG_TRACER_MAX_TRACE
8016         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8017                 return -EBUSY;
8018 #endif
8019
8020         if (!info->spare) {
8021                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8022                                                           iter->cpu_file);
8023                 if (IS_ERR(info->spare)) {
8024                         ret = PTR_ERR(info->spare);
8025                         info->spare = NULL;
8026                 } else {
8027                         info->spare_cpu = iter->cpu_file;
8028                 }
8029         }
8030         if (!info->spare)
8031                 return ret;
8032
8033         /* Do we have previous read data to read? */
8034         if (info->read < PAGE_SIZE)
8035                 goto read;
8036
8037  again:
8038         trace_access_lock(iter->cpu_file);
8039         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8040                                     &info->spare,
8041                                     count,
8042                                     iter->cpu_file, 0);
8043         trace_access_unlock(iter->cpu_file);
8044
8045         if (ret < 0) {
8046                 if (trace_empty(iter)) {
8047                         if ((filp->f_flags & O_NONBLOCK))
8048                                 return -EAGAIN;
8049
8050                         ret = wait_on_pipe(iter, 0);
8051                         if (ret)
8052                                 return ret;
8053
8054                         goto again;
8055                 }
8056                 return 0;
8057         }
8058
8059         info->read = 0;
8060  read:
8061         size = PAGE_SIZE - info->read;
8062         if (size > count)
8063                 size = count;
8064
8065         ret = copy_to_user(ubuf, info->spare + info->read, size);
8066         if (ret == size)
8067                 return -EFAULT;
8068
8069         size -= ret;
8070
8071         *ppos += size;
8072         info->read += size;
8073
8074         return size;
8075 }
8076
8077 static int tracing_buffers_release(struct inode *inode, struct file *file)
8078 {
8079         struct ftrace_buffer_info *info = file->private_data;
8080         struct trace_iterator *iter = &info->iter;
8081
8082         mutex_lock(&trace_types_lock);
8083
8084         iter->tr->trace_ref--;
8085
8086         __trace_array_put(iter->tr);
8087
8088         if (info->spare)
8089                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8090                                            info->spare_cpu, info->spare);
8091         kvfree(info);
8092
8093         mutex_unlock(&trace_types_lock);
8094
8095         return 0;
8096 }
8097
8098 struct buffer_ref {
8099         struct trace_buffer     *buffer;
8100         void                    *page;
8101         int                     cpu;
8102         refcount_t              refcount;
8103 };
8104
8105 static void buffer_ref_release(struct buffer_ref *ref)
8106 {
8107         if (!refcount_dec_and_test(&ref->refcount))
8108                 return;
8109         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8110         kfree(ref);
8111 }
8112
8113 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8114                                     struct pipe_buffer *buf)
8115 {
8116         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8117
8118         buffer_ref_release(ref);
8119         buf->private = 0;
8120 }
8121
8122 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8123                                 struct pipe_buffer *buf)
8124 {
8125         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8126
8127         if (refcount_read(&ref->refcount) > INT_MAX/2)
8128                 return false;
8129
8130         refcount_inc(&ref->refcount);
8131         return true;
8132 }
8133
8134 /* Pipe buffer operations for a buffer. */
8135 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8136         .release                = buffer_pipe_buf_release,
8137         .get                    = buffer_pipe_buf_get,
8138 };
8139
8140 /*
8141  * Callback from splice_to_pipe(), if we need to release some pages
8142  * at the end of the spd in case we error'ed out in filling the pipe.
8143  */
8144 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8145 {
8146         struct buffer_ref *ref =
8147                 (struct buffer_ref *)spd->partial[i].private;
8148
8149         buffer_ref_release(ref);
8150         spd->partial[i].private = 0;
8151 }
8152
8153 static ssize_t
8154 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8155                             struct pipe_inode_info *pipe, size_t len,
8156                             unsigned int flags)
8157 {
8158         struct ftrace_buffer_info *info = file->private_data;
8159         struct trace_iterator *iter = &info->iter;
8160         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8161         struct page *pages_def[PIPE_DEF_BUFFERS];
8162         struct splice_pipe_desc spd = {
8163                 .pages          = pages_def,
8164                 .partial        = partial_def,
8165                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8166                 .ops            = &buffer_pipe_buf_ops,
8167                 .spd_release    = buffer_spd_release,
8168         };
8169         struct buffer_ref *ref;
8170         int entries, i;
8171         ssize_t ret = 0;
8172
8173 #ifdef CONFIG_TRACER_MAX_TRACE
8174         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8175                 return -EBUSY;
8176 #endif
8177
8178         if (*ppos & (PAGE_SIZE - 1))
8179                 return -EINVAL;
8180
8181         if (len & (PAGE_SIZE - 1)) {
8182                 if (len < PAGE_SIZE)
8183                         return -EINVAL;
8184                 len &= PAGE_MASK;
8185         }
8186
8187         if (splice_grow_spd(pipe, &spd))
8188                 return -ENOMEM;
8189
8190  again:
8191         trace_access_lock(iter->cpu_file);
8192         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8193
8194         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8195                 struct page *page;
8196                 int r;
8197
8198                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8199                 if (!ref) {
8200                         ret = -ENOMEM;
8201                         break;
8202                 }
8203
8204                 refcount_set(&ref->refcount, 1);
8205                 ref->buffer = iter->array_buffer->buffer;
8206                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8207                 if (IS_ERR(ref->page)) {
8208                         ret = PTR_ERR(ref->page);
8209                         ref->page = NULL;
8210                         kfree(ref);
8211                         break;
8212                 }
8213                 ref->cpu = iter->cpu_file;
8214
8215                 r = ring_buffer_read_page(ref->buffer, &ref->page,
8216                                           len, iter->cpu_file, 1);
8217                 if (r < 0) {
8218                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8219                                                    ref->page);
8220                         kfree(ref);
8221                         break;
8222                 }
8223
8224                 page = virt_to_page(ref->page);
8225
8226                 spd.pages[i] = page;
8227                 spd.partial[i].len = PAGE_SIZE;
8228                 spd.partial[i].offset = 0;
8229                 spd.partial[i].private = (unsigned long)ref;
8230                 spd.nr_pages++;
8231                 *ppos += PAGE_SIZE;
8232
8233                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8234         }
8235
8236         trace_access_unlock(iter->cpu_file);
8237         spd.nr_pages = i;
8238
8239         /* did we read anything? */
8240         if (!spd.nr_pages) {
8241                 if (ret)
8242                         goto out;
8243
8244                 ret = -EAGAIN;
8245                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8246                         goto out;
8247
8248                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8249                 if (ret)
8250                         goto out;
8251
8252                 goto again;
8253         }
8254
8255         ret = splice_to_pipe(pipe, &spd);
8256 out:
8257         splice_shrink_spd(&spd);
8258
8259         return ret;
8260 }
8261
8262 static const struct file_operations tracing_buffers_fops = {
8263         .open           = tracing_buffers_open,
8264         .read           = tracing_buffers_read,
8265         .poll           = tracing_buffers_poll,
8266         .release        = tracing_buffers_release,
8267         .splice_read    = tracing_buffers_splice_read,
8268         .llseek         = no_llseek,
8269 };
8270
8271 static ssize_t
8272 tracing_stats_read(struct file *filp, char __user *ubuf,
8273                    size_t count, loff_t *ppos)
8274 {
8275         struct inode *inode = file_inode(filp);
8276         struct trace_array *tr = inode->i_private;
8277         struct array_buffer *trace_buf = &tr->array_buffer;
8278         int cpu = tracing_get_cpu(inode);
8279         struct trace_seq *s;
8280         unsigned long cnt;
8281         unsigned long long t;
8282         unsigned long usec_rem;
8283
8284         s = kmalloc(sizeof(*s), GFP_KERNEL);
8285         if (!s)
8286                 return -ENOMEM;
8287
8288         trace_seq_init(s);
8289
8290         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8291         trace_seq_printf(s, "entries: %ld\n", cnt);
8292
8293         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8294         trace_seq_printf(s, "overrun: %ld\n", cnt);
8295
8296         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8297         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8298
8299         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8300         trace_seq_printf(s, "bytes: %ld\n", cnt);
8301
8302         if (trace_clocks[tr->clock_id].in_ns) {
8303                 /* local or global for trace_clock */
8304                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8305                 usec_rem = do_div(t, USEC_PER_SEC);
8306                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8307                                                                 t, usec_rem);
8308
8309                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8310                 usec_rem = do_div(t, USEC_PER_SEC);
8311                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8312         } else {
8313                 /* counter or tsc mode for trace_clock */
8314                 trace_seq_printf(s, "oldest event ts: %llu\n",
8315                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8316
8317                 trace_seq_printf(s, "now ts: %llu\n",
8318                                 ring_buffer_time_stamp(trace_buf->buffer));
8319         }
8320
8321         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8322         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8323
8324         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8325         trace_seq_printf(s, "read events: %ld\n", cnt);
8326
8327         count = simple_read_from_buffer(ubuf, count, ppos,
8328                                         s->buffer, trace_seq_used(s));
8329
8330         kfree(s);
8331
8332         return count;
8333 }
8334
8335 static const struct file_operations tracing_stats_fops = {
8336         .open           = tracing_open_generic_tr,
8337         .read           = tracing_stats_read,
8338         .llseek         = generic_file_llseek,
8339         .release        = tracing_release_generic_tr,
8340 };
8341
8342 #ifdef CONFIG_DYNAMIC_FTRACE
8343
8344 static ssize_t
8345 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8346                   size_t cnt, loff_t *ppos)
8347 {
8348         ssize_t ret;
8349         char *buf;
8350         int r;
8351
8352         /* 256 should be plenty to hold the amount needed */
8353         buf = kmalloc(256, GFP_KERNEL);
8354         if (!buf)
8355                 return -ENOMEM;
8356
8357         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8358                       ftrace_update_tot_cnt,
8359                       ftrace_number_of_pages,
8360                       ftrace_number_of_groups);
8361
8362         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8363         kfree(buf);
8364         return ret;
8365 }
8366
8367 static const struct file_operations tracing_dyn_info_fops = {
8368         .open           = tracing_open_generic,
8369         .read           = tracing_read_dyn_info,
8370         .llseek         = generic_file_llseek,
8371 };
8372 #endif /* CONFIG_DYNAMIC_FTRACE */
8373
8374 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8375 static void
8376 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8377                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8378                 void *data)
8379 {
8380         tracing_snapshot_instance(tr);
8381 }
8382
8383 static void
8384 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8385                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8386                       void *data)
8387 {
8388         struct ftrace_func_mapper *mapper = data;
8389         long *count = NULL;
8390
8391         if (mapper)
8392                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8393
8394         if (count) {
8395
8396                 if (*count <= 0)
8397                         return;
8398
8399                 (*count)--;
8400         }
8401
8402         tracing_snapshot_instance(tr);
8403 }
8404
8405 static int
8406 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8407                       struct ftrace_probe_ops *ops, void *data)
8408 {
8409         struct ftrace_func_mapper *mapper = data;
8410         long *count = NULL;
8411
8412         seq_printf(m, "%ps:", (void *)ip);
8413
8414         seq_puts(m, "snapshot");
8415
8416         if (mapper)
8417                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8418
8419         if (count)
8420                 seq_printf(m, ":count=%ld\n", *count);
8421         else
8422                 seq_puts(m, ":unlimited\n");
8423
8424         return 0;
8425 }
8426
8427 static int
8428 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8429                      unsigned long ip, void *init_data, void **data)
8430 {
8431         struct ftrace_func_mapper *mapper = *data;
8432
8433         if (!mapper) {
8434                 mapper = allocate_ftrace_func_mapper();
8435                 if (!mapper)
8436                         return -ENOMEM;
8437                 *data = mapper;
8438         }
8439
8440         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8441 }
8442
8443 static void
8444 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8445                      unsigned long ip, void *data)
8446 {
8447         struct ftrace_func_mapper *mapper = data;
8448
8449         if (!ip) {
8450                 if (!mapper)
8451                         return;
8452                 free_ftrace_func_mapper(mapper, NULL);
8453                 return;
8454         }
8455
8456         ftrace_func_mapper_remove_ip(mapper, ip);
8457 }
8458
8459 static struct ftrace_probe_ops snapshot_probe_ops = {
8460         .func                   = ftrace_snapshot,
8461         .print                  = ftrace_snapshot_print,
8462 };
8463
8464 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8465         .func                   = ftrace_count_snapshot,
8466         .print                  = ftrace_snapshot_print,
8467         .init                   = ftrace_snapshot_init,
8468         .free                   = ftrace_snapshot_free,
8469 };
8470
8471 static int
8472 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8473                                char *glob, char *cmd, char *param, int enable)
8474 {
8475         struct ftrace_probe_ops *ops;
8476         void *count = (void *)-1;
8477         char *number;
8478         int ret;
8479
8480         if (!tr)
8481                 return -ENODEV;
8482
8483         /* hash funcs only work with set_ftrace_filter */
8484         if (!enable)
8485                 return -EINVAL;
8486
8487         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8488
8489         if (glob[0] == '!')
8490                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8491
8492         if (!param)
8493                 goto out_reg;
8494
8495         number = strsep(&param, ":");
8496
8497         if (!strlen(number))
8498                 goto out_reg;
8499
8500         /*
8501          * We use the callback data field (which is a pointer)
8502          * as our counter.
8503          */
8504         ret = kstrtoul(number, 0, (unsigned long *)&count);
8505         if (ret)
8506                 return ret;
8507
8508  out_reg:
8509         ret = tracing_alloc_snapshot_instance(tr);
8510         if (ret < 0)
8511                 goto out;
8512
8513         ret = register_ftrace_function_probe(glob, tr, ops, count);
8514
8515  out:
8516         return ret < 0 ? ret : 0;
8517 }
8518
8519 static struct ftrace_func_command ftrace_snapshot_cmd = {
8520         .name                   = "snapshot",
8521         .func                   = ftrace_trace_snapshot_callback,
8522 };
8523
8524 static __init int register_snapshot_cmd(void)
8525 {
8526         return register_ftrace_command(&ftrace_snapshot_cmd);
8527 }
8528 #else
8529 static inline __init int register_snapshot_cmd(void) { return 0; }
8530 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8531
8532 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8533 {
8534         if (WARN_ON(!tr->dir))
8535                 return ERR_PTR(-ENODEV);
8536
8537         /* Top directory uses NULL as the parent */
8538         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8539                 return NULL;
8540
8541         /* All sub buffers have a descriptor */
8542         return tr->dir;
8543 }
8544
8545 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8546 {
8547         struct dentry *d_tracer;
8548
8549         if (tr->percpu_dir)
8550                 return tr->percpu_dir;
8551
8552         d_tracer = tracing_get_dentry(tr);
8553         if (IS_ERR(d_tracer))
8554                 return NULL;
8555
8556         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8557
8558         MEM_FAIL(!tr->percpu_dir,
8559                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8560
8561         return tr->percpu_dir;
8562 }
8563
8564 static struct dentry *
8565 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8566                       void *data, long cpu, const struct file_operations *fops)
8567 {
8568         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8569
8570         if (ret) /* See tracing_get_cpu() */
8571                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8572         return ret;
8573 }
8574
8575 static void
8576 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8577 {
8578         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8579         struct dentry *d_cpu;
8580         char cpu_dir[30]; /* 30 characters should be more than enough */
8581
8582         if (!d_percpu)
8583                 return;
8584
8585         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8586         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8587         if (!d_cpu) {
8588                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8589                 return;
8590         }
8591
8592         /* per cpu trace_pipe */
8593         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8594                                 tr, cpu, &tracing_pipe_fops);
8595
8596         /* per cpu trace */
8597         trace_create_cpu_file("trace", 0644, d_cpu,
8598                                 tr, cpu, &tracing_fops);
8599
8600         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8601                                 tr, cpu, &tracing_buffers_fops);
8602
8603         trace_create_cpu_file("stats", 0444, d_cpu,
8604                                 tr, cpu, &tracing_stats_fops);
8605
8606         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8607                                 tr, cpu, &tracing_entries_fops);
8608
8609 #ifdef CONFIG_TRACER_SNAPSHOT
8610         trace_create_cpu_file("snapshot", 0644, d_cpu,
8611                                 tr, cpu, &snapshot_fops);
8612
8613         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8614                                 tr, cpu, &snapshot_raw_fops);
8615 #endif
8616 }
8617
8618 #ifdef CONFIG_FTRACE_SELFTEST
8619 /* Let selftest have access to static functions in this file */
8620 #include "trace_selftest.c"
8621 #endif
8622
8623 static ssize_t
8624 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8625                         loff_t *ppos)
8626 {
8627         struct trace_option_dentry *topt = filp->private_data;
8628         char *buf;
8629
8630         if (topt->flags->val & topt->opt->bit)
8631                 buf = "1\n";
8632         else
8633                 buf = "0\n";
8634
8635         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8636 }
8637
8638 static ssize_t
8639 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8640                          loff_t *ppos)
8641 {
8642         struct trace_option_dentry *topt = filp->private_data;
8643         unsigned long val;
8644         int ret;
8645
8646         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8647         if (ret)
8648                 return ret;
8649
8650         if (val != 0 && val != 1)
8651                 return -EINVAL;
8652
8653         if (!!(topt->flags->val & topt->opt->bit) != val) {
8654                 mutex_lock(&trace_types_lock);
8655                 ret = __set_tracer_option(topt->tr, topt->flags,
8656                                           topt->opt, !val);
8657                 mutex_unlock(&trace_types_lock);
8658                 if (ret)
8659                         return ret;
8660         }
8661
8662         *ppos += cnt;
8663
8664         return cnt;
8665 }
8666
8667
8668 static const struct file_operations trace_options_fops = {
8669         .open = tracing_open_generic,
8670         .read = trace_options_read,
8671         .write = trace_options_write,
8672         .llseek = generic_file_llseek,
8673 };
8674
8675 /*
8676  * In order to pass in both the trace_array descriptor as well as the index
8677  * to the flag that the trace option file represents, the trace_array
8678  * has a character array of trace_flags_index[], which holds the index
8679  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8680  * The address of this character array is passed to the flag option file
8681  * read/write callbacks.
8682  *
8683  * In order to extract both the index and the trace_array descriptor,
8684  * get_tr_index() uses the following algorithm.
8685  *
8686  *   idx = *ptr;
8687  *
8688  * As the pointer itself contains the address of the index (remember
8689  * index[1] == 1).
8690  *
8691  * Then to get the trace_array descriptor, by subtracting that index
8692  * from the ptr, we get to the start of the index itself.
8693  *
8694  *   ptr - idx == &index[0]
8695  *
8696  * Then a simple container_of() from that pointer gets us to the
8697  * trace_array descriptor.
8698  */
8699 static void get_tr_index(void *data, struct trace_array **ptr,
8700                          unsigned int *pindex)
8701 {
8702         *pindex = *(unsigned char *)data;
8703
8704         *ptr = container_of(data - *pindex, struct trace_array,
8705                             trace_flags_index);
8706 }
8707
8708 static ssize_t
8709 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8710                         loff_t *ppos)
8711 {
8712         void *tr_index = filp->private_data;
8713         struct trace_array *tr;
8714         unsigned int index;
8715         char *buf;
8716
8717         get_tr_index(tr_index, &tr, &index);
8718
8719         if (tr->trace_flags & (1 << index))
8720                 buf = "1\n";
8721         else
8722                 buf = "0\n";
8723
8724         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8725 }
8726
8727 static ssize_t
8728 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8729                          loff_t *ppos)
8730 {
8731         void *tr_index = filp->private_data;
8732         struct trace_array *tr;
8733         unsigned int index;
8734         unsigned long val;
8735         int ret;
8736
8737         get_tr_index(tr_index, &tr, &index);
8738
8739         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8740         if (ret)
8741                 return ret;
8742
8743         if (val != 0 && val != 1)
8744                 return -EINVAL;
8745
8746         mutex_lock(&event_mutex);
8747         mutex_lock(&trace_types_lock);
8748         ret = set_tracer_flag(tr, 1 << index, val);
8749         mutex_unlock(&trace_types_lock);
8750         mutex_unlock(&event_mutex);
8751
8752         if (ret < 0)
8753                 return ret;
8754
8755         *ppos += cnt;
8756
8757         return cnt;
8758 }
8759
8760 static const struct file_operations trace_options_core_fops = {
8761         .open = tracing_open_generic,
8762         .read = trace_options_core_read,
8763         .write = trace_options_core_write,
8764         .llseek = generic_file_llseek,
8765 };
8766
8767 struct dentry *trace_create_file(const char *name,
8768                                  umode_t mode,
8769                                  struct dentry *parent,
8770                                  void *data,
8771                                  const struct file_operations *fops)
8772 {
8773         struct dentry *ret;
8774
8775         ret = tracefs_create_file(name, mode, parent, data, fops);
8776         if (!ret)
8777                 pr_warn("Could not create tracefs '%s' entry\n", name);
8778
8779         return ret;
8780 }
8781
8782
8783 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8784 {
8785         struct dentry *d_tracer;
8786
8787         if (tr->options)
8788                 return tr->options;
8789
8790         d_tracer = tracing_get_dentry(tr);
8791         if (IS_ERR(d_tracer))
8792                 return NULL;
8793
8794         tr->options = tracefs_create_dir("options", d_tracer);
8795         if (!tr->options) {
8796                 pr_warn("Could not create tracefs directory 'options'\n");
8797                 return NULL;
8798         }
8799
8800         return tr->options;
8801 }
8802
8803 static void
8804 create_trace_option_file(struct trace_array *tr,
8805                          struct trace_option_dentry *topt,
8806                          struct tracer_flags *flags,
8807                          struct tracer_opt *opt)
8808 {
8809         struct dentry *t_options;
8810
8811         t_options = trace_options_init_dentry(tr);
8812         if (!t_options)
8813                 return;
8814
8815         topt->flags = flags;
8816         topt->opt = opt;
8817         topt->tr = tr;
8818
8819         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8820                                     &trace_options_fops);
8821
8822 }
8823
8824 static void
8825 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8826 {
8827         struct trace_option_dentry *topts;
8828         struct trace_options *tr_topts;
8829         struct tracer_flags *flags;
8830         struct tracer_opt *opts;
8831         int cnt;
8832         int i;
8833
8834         if (!tracer)
8835                 return;
8836
8837         flags = tracer->flags;
8838
8839         if (!flags || !flags->opts)
8840                 return;
8841
8842         /*
8843          * If this is an instance, only create flags for tracers
8844          * the instance may have.
8845          */
8846         if (!trace_ok_for_array(tracer, tr))
8847                 return;
8848
8849         for (i = 0; i < tr->nr_topts; i++) {
8850                 /* Make sure there's no duplicate flags. */
8851                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8852                         return;
8853         }
8854
8855         opts = flags->opts;
8856
8857         for (cnt = 0; opts[cnt].name; cnt++)
8858                 ;
8859
8860         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8861         if (!topts)
8862                 return;
8863
8864         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8865                             GFP_KERNEL);
8866         if (!tr_topts) {
8867                 kfree(topts);
8868                 return;
8869         }
8870
8871         tr->topts = tr_topts;
8872         tr->topts[tr->nr_topts].tracer = tracer;
8873         tr->topts[tr->nr_topts].topts = topts;
8874         tr->nr_topts++;
8875
8876         for (cnt = 0; opts[cnt].name; cnt++) {
8877                 create_trace_option_file(tr, &topts[cnt], flags,
8878                                          &opts[cnt]);
8879                 MEM_FAIL(topts[cnt].entry == NULL,
8880                           "Failed to create trace option: %s",
8881                           opts[cnt].name);
8882         }
8883 }
8884
8885 static struct dentry *
8886 create_trace_option_core_file(struct trace_array *tr,
8887                               const char *option, long index)
8888 {
8889         struct dentry *t_options;
8890
8891         t_options = trace_options_init_dentry(tr);
8892         if (!t_options)
8893                 return NULL;
8894
8895         return trace_create_file(option, 0644, t_options,
8896                                  (void *)&tr->trace_flags_index[index],
8897                                  &trace_options_core_fops);
8898 }
8899
8900 static void create_trace_options_dir(struct trace_array *tr)
8901 {
8902         struct dentry *t_options;
8903         bool top_level = tr == &global_trace;
8904         int i;
8905
8906         t_options = trace_options_init_dentry(tr);
8907         if (!t_options)
8908                 return;
8909
8910         for (i = 0; trace_options[i]; i++) {
8911                 if (top_level ||
8912                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8913                         create_trace_option_core_file(tr, trace_options[i], i);
8914         }
8915 }
8916
8917 static ssize_t
8918 rb_simple_read(struct file *filp, char __user *ubuf,
8919                size_t cnt, loff_t *ppos)
8920 {
8921         struct trace_array *tr = filp->private_data;
8922         char buf[64];
8923         int r;
8924
8925         r = tracer_tracing_is_on(tr);
8926         r = sprintf(buf, "%d\n", r);
8927
8928         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8929 }
8930
8931 static ssize_t
8932 rb_simple_write(struct file *filp, const char __user *ubuf,
8933                 size_t cnt, loff_t *ppos)
8934 {
8935         struct trace_array *tr = filp->private_data;
8936         struct trace_buffer *buffer = tr->array_buffer.buffer;
8937         unsigned long val;
8938         int ret;
8939
8940         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8941         if (ret)
8942                 return ret;
8943
8944         if (buffer) {
8945                 mutex_lock(&trace_types_lock);
8946                 if (!!val == tracer_tracing_is_on(tr)) {
8947                         val = 0; /* do nothing */
8948                 } else if (val) {
8949                         tracer_tracing_on(tr);
8950                         if (tr->current_trace->start)
8951                                 tr->current_trace->start(tr);
8952                 } else {
8953                         tracer_tracing_off(tr);
8954                         if (tr->current_trace->stop)
8955                                 tr->current_trace->stop(tr);
8956                 }
8957                 mutex_unlock(&trace_types_lock);
8958         }
8959
8960         (*ppos)++;
8961
8962         return cnt;
8963 }
8964
8965 static const struct file_operations rb_simple_fops = {
8966         .open           = tracing_open_generic_tr,
8967         .read           = rb_simple_read,
8968         .write          = rb_simple_write,
8969         .release        = tracing_release_generic_tr,
8970         .llseek         = default_llseek,
8971 };
8972
8973 static ssize_t
8974 buffer_percent_read(struct file *filp, char __user *ubuf,
8975                     size_t cnt, loff_t *ppos)
8976 {
8977         struct trace_array *tr = filp->private_data;
8978         char buf[64];
8979         int r;
8980
8981         r = tr->buffer_percent;
8982         r = sprintf(buf, "%d\n", r);
8983
8984         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8985 }
8986
8987 static ssize_t
8988 buffer_percent_write(struct file *filp, const char __user *ubuf,
8989                      size_t cnt, loff_t *ppos)
8990 {
8991         struct trace_array *tr = filp->private_data;
8992         unsigned long val;
8993         int ret;
8994
8995         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8996         if (ret)
8997                 return ret;
8998
8999         if (val > 100)
9000                 return -EINVAL;
9001
9002         if (!val)
9003                 val = 1;
9004
9005         tr->buffer_percent = val;
9006
9007         (*ppos)++;
9008
9009         return cnt;
9010 }
9011
9012 static const struct file_operations buffer_percent_fops = {
9013         .open           = tracing_open_generic_tr,
9014         .read           = buffer_percent_read,
9015         .write          = buffer_percent_write,
9016         .release        = tracing_release_generic_tr,
9017         .llseek         = default_llseek,
9018 };
9019
9020 static struct dentry *trace_instance_dir;
9021
9022 static void
9023 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9024
9025 static int
9026 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9027 {
9028         enum ring_buffer_flags rb_flags;
9029
9030         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9031
9032         buf->tr = tr;
9033
9034         buf->buffer = ring_buffer_alloc(size, rb_flags);
9035         if (!buf->buffer)
9036                 return -ENOMEM;
9037
9038         buf->data = alloc_percpu(struct trace_array_cpu);
9039         if (!buf->data) {
9040                 ring_buffer_free(buf->buffer);
9041                 buf->buffer = NULL;
9042                 return -ENOMEM;
9043         }
9044
9045         /* Allocate the first page for all buffers */
9046         set_buffer_entries(&tr->array_buffer,
9047                            ring_buffer_size(tr->array_buffer.buffer, 0));
9048
9049         return 0;
9050 }
9051
9052 static int allocate_trace_buffers(struct trace_array *tr, int size)
9053 {
9054         int ret;
9055
9056         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9057         if (ret)
9058                 return ret;
9059
9060 #ifdef CONFIG_TRACER_MAX_TRACE
9061         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9062                                     allocate_snapshot ? size : 1);
9063         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9064                 ring_buffer_free(tr->array_buffer.buffer);
9065                 tr->array_buffer.buffer = NULL;
9066                 free_percpu(tr->array_buffer.data);
9067                 tr->array_buffer.data = NULL;
9068                 return -ENOMEM;
9069         }
9070         tr->allocated_snapshot = allocate_snapshot;
9071
9072         /*
9073          * Only the top level trace array gets its snapshot allocated
9074          * from the kernel command line.
9075          */
9076         allocate_snapshot = false;
9077 #endif
9078
9079         return 0;
9080 }
9081
9082 static void free_trace_buffer(struct array_buffer *buf)
9083 {
9084         if (buf->buffer) {
9085                 ring_buffer_free(buf->buffer);
9086                 buf->buffer = NULL;
9087                 free_percpu(buf->data);
9088                 buf->data = NULL;
9089         }
9090 }
9091
9092 static void free_trace_buffers(struct trace_array *tr)
9093 {
9094         if (!tr)
9095                 return;
9096
9097         free_trace_buffer(&tr->array_buffer);
9098
9099 #ifdef CONFIG_TRACER_MAX_TRACE
9100         free_trace_buffer(&tr->max_buffer);
9101 #endif
9102 }
9103
9104 static void init_trace_flags_index(struct trace_array *tr)
9105 {
9106         int i;
9107
9108         /* Used by the trace options files */
9109         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9110                 tr->trace_flags_index[i] = i;
9111 }
9112
9113 static void __update_tracer_options(struct trace_array *tr)
9114 {
9115         struct tracer *t;
9116
9117         for (t = trace_types; t; t = t->next)
9118                 add_tracer_options(tr, t);
9119 }
9120
9121 static void update_tracer_options(struct trace_array *tr)
9122 {
9123         mutex_lock(&trace_types_lock);
9124         __update_tracer_options(tr);
9125         mutex_unlock(&trace_types_lock);
9126 }
9127
9128 /* Must have trace_types_lock held */
9129 struct trace_array *trace_array_find(const char *instance)
9130 {
9131         struct trace_array *tr, *found = NULL;
9132
9133         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9134                 if (tr->name && strcmp(tr->name, instance) == 0) {
9135                         found = tr;
9136                         break;
9137                 }
9138         }
9139
9140         return found;
9141 }
9142
9143 struct trace_array *trace_array_find_get(const char *instance)
9144 {
9145         struct trace_array *tr;
9146
9147         mutex_lock(&trace_types_lock);
9148         tr = trace_array_find(instance);
9149         if (tr)
9150                 tr->ref++;
9151         mutex_unlock(&trace_types_lock);
9152
9153         return tr;
9154 }
9155
9156 static int trace_array_create_dir(struct trace_array *tr)
9157 {
9158         int ret;
9159
9160         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9161         if (!tr->dir)
9162                 return -EINVAL;
9163
9164         ret = event_trace_add_tracer(tr->dir, tr);
9165         if (ret) {
9166                 tracefs_remove(tr->dir);
9167                 return ret;
9168         }
9169
9170         init_tracer_tracefs(tr, tr->dir);
9171         __update_tracer_options(tr);
9172
9173         return ret;
9174 }
9175
9176 static struct trace_array *trace_array_create(const char *name)
9177 {
9178         struct trace_array *tr;
9179         int ret;
9180
9181         ret = -ENOMEM;
9182         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9183         if (!tr)
9184                 return ERR_PTR(ret);
9185
9186         tr->name = kstrdup(name, GFP_KERNEL);
9187         if (!tr->name)
9188                 goto out_free_tr;
9189
9190         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9191                 goto out_free_tr;
9192
9193         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9194
9195         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9196
9197         raw_spin_lock_init(&tr->start_lock);
9198
9199         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9200
9201         tr->current_trace = &nop_trace;
9202
9203         INIT_LIST_HEAD(&tr->systems);
9204         INIT_LIST_HEAD(&tr->events);
9205         INIT_LIST_HEAD(&tr->hist_vars);
9206         INIT_LIST_HEAD(&tr->err_log);
9207
9208         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9209                 goto out_free_tr;
9210
9211         if (ftrace_allocate_ftrace_ops(tr) < 0)
9212                 goto out_free_tr;
9213
9214         ftrace_init_trace_array(tr);
9215
9216         init_trace_flags_index(tr);
9217
9218         if (trace_instance_dir) {
9219                 ret = trace_array_create_dir(tr);
9220                 if (ret)
9221                         goto out_free_tr;
9222         } else
9223                 __trace_early_add_events(tr);
9224
9225         list_add(&tr->list, &ftrace_trace_arrays);
9226
9227         tr->ref++;
9228
9229         return tr;
9230
9231  out_free_tr:
9232         ftrace_free_ftrace_ops(tr);
9233         free_trace_buffers(tr);
9234         free_cpumask_var(tr->tracing_cpumask);
9235         kfree(tr->name);
9236         kfree(tr);
9237
9238         return ERR_PTR(ret);
9239 }
9240
9241 static int instance_mkdir(const char *name)
9242 {
9243         struct trace_array *tr;
9244         int ret;
9245
9246         mutex_lock(&event_mutex);
9247         mutex_lock(&trace_types_lock);
9248
9249         ret = -EEXIST;
9250         if (trace_array_find(name))
9251                 goto out_unlock;
9252
9253         tr = trace_array_create(name);
9254
9255         ret = PTR_ERR_OR_ZERO(tr);
9256
9257 out_unlock:
9258         mutex_unlock(&trace_types_lock);
9259         mutex_unlock(&event_mutex);
9260         return ret;
9261 }
9262
9263 /**
9264  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9265  * @name: The name of the trace array to be looked up/created.
9266  *
9267  * Returns pointer to trace array with given name.
9268  * NULL, if it cannot be created.
9269  *
9270  * NOTE: This function increments the reference counter associated with the
9271  * trace array returned. This makes sure it cannot be freed while in use.
9272  * Use trace_array_put() once the trace array is no longer needed.
9273  * If the trace_array is to be freed, trace_array_destroy() needs to
9274  * be called after the trace_array_put(), or simply let user space delete
9275  * it from the tracefs instances directory. But until the
9276  * trace_array_put() is called, user space can not delete it.
9277  *
9278  */
9279 struct trace_array *trace_array_get_by_name(const char *name)
9280 {
9281         struct trace_array *tr;
9282
9283         mutex_lock(&event_mutex);
9284         mutex_lock(&trace_types_lock);
9285
9286         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9287                 if (tr->name && strcmp(tr->name, name) == 0)
9288                         goto out_unlock;
9289         }
9290
9291         tr = trace_array_create(name);
9292
9293         if (IS_ERR(tr))
9294                 tr = NULL;
9295 out_unlock:
9296         if (tr)
9297                 tr->ref++;
9298
9299         mutex_unlock(&trace_types_lock);
9300         mutex_unlock(&event_mutex);
9301         return tr;
9302 }
9303 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9304
9305 static int __remove_instance(struct trace_array *tr)
9306 {
9307         int i;
9308
9309         /* Reference counter for a newly created trace array = 1. */
9310         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9311                 return -EBUSY;
9312
9313         list_del(&tr->list);
9314
9315         /* Disable all the flags that were enabled coming in */
9316         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9317                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9318                         set_tracer_flag(tr, 1 << i, 0);
9319         }
9320
9321         tracing_set_nop(tr);
9322         clear_ftrace_function_probes(tr);
9323         event_trace_del_tracer(tr);
9324         ftrace_clear_pids(tr);
9325         ftrace_destroy_function_files(tr);
9326         tracefs_remove(tr->dir);
9327         free_percpu(tr->last_func_repeats);
9328         free_trace_buffers(tr);
9329
9330         for (i = 0; i < tr->nr_topts; i++) {
9331                 kfree(tr->topts[i].topts);
9332         }
9333         kfree(tr->topts);
9334
9335         free_cpumask_var(tr->tracing_cpumask);
9336         kfree(tr->name);
9337         kfree(tr);
9338
9339         return 0;
9340 }
9341
9342 int trace_array_destroy(struct trace_array *this_tr)
9343 {
9344         struct trace_array *tr;
9345         int ret;
9346
9347         if (!this_tr)
9348                 return -EINVAL;
9349
9350         mutex_lock(&event_mutex);
9351         mutex_lock(&trace_types_lock);
9352
9353         ret = -ENODEV;
9354
9355         /* Making sure trace array exists before destroying it. */
9356         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9357                 if (tr == this_tr) {
9358                         ret = __remove_instance(tr);
9359                         break;
9360                 }
9361         }
9362
9363         mutex_unlock(&trace_types_lock);
9364         mutex_unlock(&event_mutex);
9365
9366         return ret;
9367 }
9368 EXPORT_SYMBOL_GPL(trace_array_destroy);
9369
9370 static int instance_rmdir(const char *name)
9371 {
9372         struct trace_array *tr;
9373         int ret;
9374
9375         mutex_lock(&event_mutex);
9376         mutex_lock(&trace_types_lock);
9377
9378         ret = -ENODEV;
9379         tr = trace_array_find(name);
9380         if (tr)
9381                 ret = __remove_instance(tr);
9382
9383         mutex_unlock(&trace_types_lock);
9384         mutex_unlock(&event_mutex);
9385
9386         return ret;
9387 }
9388
9389 static __init void create_trace_instances(struct dentry *d_tracer)
9390 {
9391         struct trace_array *tr;
9392
9393         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9394                                                          instance_mkdir,
9395                                                          instance_rmdir);
9396         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9397                 return;
9398
9399         mutex_lock(&event_mutex);
9400         mutex_lock(&trace_types_lock);
9401
9402         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9403                 if (!tr->name)
9404                         continue;
9405                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9406                              "Failed to create instance directory\n"))
9407                         break;
9408         }
9409
9410         mutex_unlock(&trace_types_lock);
9411         mutex_unlock(&event_mutex);
9412 }
9413
9414 static void
9415 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9416 {
9417         struct trace_event_file *file;
9418         int cpu;
9419
9420         trace_create_file("available_tracers", 0444, d_tracer,
9421                         tr, &show_traces_fops);
9422
9423         trace_create_file("current_tracer", 0644, d_tracer,
9424                         tr, &set_tracer_fops);
9425
9426         trace_create_file("tracing_cpumask", 0644, d_tracer,
9427                           tr, &tracing_cpumask_fops);
9428
9429         trace_create_file("trace_options", 0644, d_tracer,
9430                           tr, &tracing_iter_fops);
9431
9432         trace_create_file("trace", 0644, d_tracer,
9433                           tr, &tracing_fops);
9434
9435         trace_create_file("trace_pipe", 0444, d_tracer,
9436                           tr, &tracing_pipe_fops);
9437
9438         trace_create_file("buffer_size_kb", 0644, d_tracer,
9439                           tr, &tracing_entries_fops);
9440
9441         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9442                           tr, &tracing_total_entries_fops);
9443
9444         trace_create_file("free_buffer", 0200, d_tracer,
9445                           tr, &tracing_free_buffer_fops);
9446
9447         trace_create_file("trace_marker", 0220, d_tracer,
9448                           tr, &tracing_mark_fops);
9449
9450         file = __find_event_file(tr, "ftrace", "print");
9451         if (file && file->dir)
9452                 trace_create_file("trigger", 0644, file->dir, file,
9453                                   &event_trigger_fops);
9454         tr->trace_marker_file = file;
9455
9456         trace_create_file("trace_marker_raw", 0220, d_tracer,
9457                           tr, &tracing_mark_raw_fops);
9458
9459         trace_create_file("trace_clock", 0644, d_tracer, tr,
9460                           &trace_clock_fops);
9461
9462         trace_create_file("tracing_on", 0644, d_tracer,
9463                           tr, &rb_simple_fops);
9464
9465         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9466                           &trace_time_stamp_mode_fops);
9467
9468         tr->buffer_percent = 50;
9469
9470         trace_create_file("buffer_percent", 0444, d_tracer,
9471                         tr, &buffer_percent_fops);
9472
9473         create_trace_options_dir(tr);
9474
9475         trace_create_maxlat_file(tr, d_tracer);
9476
9477         if (ftrace_create_function_files(tr, d_tracer))
9478                 MEM_FAIL(1, "Could not allocate function filter files");
9479
9480 #ifdef CONFIG_TRACER_SNAPSHOT
9481         trace_create_file("snapshot", 0644, d_tracer,
9482                           tr, &snapshot_fops);
9483 #endif
9484
9485         trace_create_file("error_log", 0644, d_tracer,
9486                           tr, &tracing_err_log_fops);
9487
9488         for_each_tracing_cpu(cpu)
9489                 tracing_init_tracefs_percpu(tr, cpu);
9490
9491         ftrace_init_tracefs(tr, d_tracer);
9492 }
9493
9494 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9495 {
9496         struct vfsmount *mnt;
9497         struct file_system_type *type;
9498
9499         /*
9500          * To maintain backward compatibility for tools that mount
9501          * debugfs to get to the tracing facility, tracefs is automatically
9502          * mounted to the debugfs/tracing directory.
9503          */
9504         type = get_fs_type("tracefs");
9505         if (!type)
9506                 return NULL;
9507         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9508         put_filesystem(type);
9509         if (IS_ERR(mnt))
9510                 return NULL;
9511         mntget(mnt);
9512
9513         return mnt;
9514 }
9515
9516 /**
9517  * tracing_init_dentry - initialize top level trace array
9518  *
9519  * This is called when creating files or directories in the tracing
9520  * directory. It is called via fs_initcall() by any of the boot up code
9521  * and expects to return the dentry of the top level tracing directory.
9522  */
9523 int tracing_init_dentry(void)
9524 {
9525         struct trace_array *tr = &global_trace;
9526
9527         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9528                 pr_warn("Tracing disabled due to lockdown\n");
9529                 return -EPERM;
9530         }
9531
9532         /* The top level trace array uses  NULL as parent */
9533         if (tr->dir)
9534                 return 0;
9535
9536         if (WARN_ON(!tracefs_initialized()))
9537                 return -ENODEV;
9538
9539         /*
9540          * As there may still be users that expect the tracing
9541          * files to exist in debugfs/tracing, we must automount
9542          * the tracefs file system there, so older tools still
9543          * work with the newer kernel.
9544          */
9545         tr->dir = debugfs_create_automount("tracing", NULL,
9546                                            trace_automount, NULL);
9547
9548         return 0;
9549 }
9550
9551 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9552 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9553
9554 static struct workqueue_struct *eval_map_wq __initdata;
9555 static struct work_struct eval_map_work __initdata;
9556
9557 static void __init eval_map_work_func(struct work_struct *work)
9558 {
9559         int len;
9560
9561         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9562         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9563 }
9564
9565 static int __init trace_eval_init(void)
9566 {
9567         INIT_WORK(&eval_map_work, eval_map_work_func);
9568
9569         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9570         if (!eval_map_wq) {
9571                 pr_err("Unable to allocate eval_map_wq\n");
9572                 /* Do work here */
9573                 eval_map_work_func(&eval_map_work);
9574                 return -ENOMEM;
9575         }
9576
9577         queue_work(eval_map_wq, &eval_map_work);
9578         return 0;
9579 }
9580
9581 static int __init trace_eval_sync(void)
9582 {
9583         /* Make sure the eval map updates are finished */
9584         if (eval_map_wq)
9585                 destroy_workqueue(eval_map_wq);
9586         return 0;
9587 }
9588
9589 late_initcall_sync(trace_eval_sync);
9590
9591
9592 #ifdef CONFIG_MODULES
9593 static void trace_module_add_evals(struct module *mod)
9594 {
9595         if (!mod->num_trace_evals)
9596                 return;
9597
9598         /*
9599          * Modules with bad taint do not have events created, do
9600          * not bother with enums either.
9601          */
9602         if (trace_module_has_bad_taint(mod))
9603                 return;
9604
9605         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9606 }
9607
9608 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9609 static void trace_module_remove_evals(struct module *mod)
9610 {
9611         union trace_eval_map_item *map;
9612         union trace_eval_map_item **last = &trace_eval_maps;
9613
9614         if (!mod->num_trace_evals)
9615                 return;
9616
9617         mutex_lock(&trace_eval_mutex);
9618
9619         map = trace_eval_maps;
9620
9621         while (map) {
9622                 if (map->head.mod == mod)
9623                         break;
9624                 map = trace_eval_jmp_to_tail(map);
9625                 last = &map->tail.next;
9626                 map = map->tail.next;
9627         }
9628         if (!map)
9629                 goto out;
9630
9631         *last = trace_eval_jmp_to_tail(map)->tail.next;
9632         kfree(map);
9633  out:
9634         mutex_unlock(&trace_eval_mutex);
9635 }
9636 #else
9637 static inline void trace_module_remove_evals(struct module *mod) { }
9638 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9639
9640 static int trace_module_notify(struct notifier_block *self,
9641                                unsigned long val, void *data)
9642 {
9643         struct module *mod = data;
9644
9645         switch (val) {
9646         case MODULE_STATE_COMING:
9647                 trace_module_add_evals(mod);
9648                 break;
9649         case MODULE_STATE_GOING:
9650                 trace_module_remove_evals(mod);
9651                 break;
9652         }
9653
9654         return NOTIFY_OK;
9655 }
9656
9657 static struct notifier_block trace_module_nb = {
9658         .notifier_call = trace_module_notify,
9659         .priority = 0,
9660 };
9661 #endif /* CONFIG_MODULES */
9662
9663 static __init int tracer_init_tracefs(void)
9664 {
9665         int ret;
9666
9667         trace_access_lock_init();
9668
9669         ret = tracing_init_dentry();
9670         if (ret)
9671                 return 0;
9672
9673         event_trace_init();
9674
9675         init_tracer_tracefs(&global_trace, NULL);
9676         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9677
9678         trace_create_file("tracing_thresh", 0644, NULL,
9679                         &global_trace, &tracing_thresh_fops);
9680
9681         trace_create_file("README", 0444, NULL,
9682                         NULL, &tracing_readme_fops);
9683
9684         trace_create_file("saved_cmdlines", 0444, NULL,
9685                         NULL, &tracing_saved_cmdlines_fops);
9686
9687         trace_create_file("saved_cmdlines_size", 0644, NULL,
9688                           NULL, &tracing_saved_cmdlines_size_fops);
9689
9690         trace_create_file("saved_tgids", 0444, NULL,
9691                         NULL, &tracing_saved_tgids_fops);
9692
9693         trace_eval_init();
9694
9695         trace_create_eval_file(NULL);
9696
9697 #ifdef CONFIG_MODULES
9698         register_module_notifier(&trace_module_nb);
9699 #endif
9700
9701 #ifdef CONFIG_DYNAMIC_FTRACE
9702         trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9703                         NULL, &tracing_dyn_info_fops);
9704 #endif
9705
9706         create_trace_instances(NULL);
9707
9708         update_tracer_options(&global_trace);
9709
9710         return 0;
9711 }
9712
9713 fs_initcall(tracer_init_tracefs);
9714
9715 static int trace_panic_handler(struct notifier_block *this,
9716                                unsigned long event, void *unused)
9717 {
9718         if (ftrace_dump_on_oops)
9719                 ftrace_dump(ftrace_dump_on_oops);
9720         return NOTIFY_OK;
9721 }
9722
9723 static struct notifier_block trace_panic_notifier = {
9724         .notifier_call  = trace_panic_handler,
9725         .next           = NULL,
9726         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9727 };
9728
9729 static int trace_die_handler(struct notifier_block *self,
9730                              unsigned long val,
9731                              void *data)
9732 {
9733         switch (val) {
9734         case DIE_OOPS:
9735                 if (ftrace_dump_on_oops)
9736                         ftrace_dump(ftrace_dump_on_oops);
9737                 break;
9738         default:
9739                 break;
9740         }
9741         return NOTIFY_OK;
9742 }
9743
9744 static struct notifier_block trace_die_notifier = {
9745         .notifier_call = trace_die_handler,
9746         .priority = 200
9747 };
9748
9749 /*
9750  * printk is set to max of 1024, we really don't need it that big.
9751  * Nothing should be printing 1000 characters anyway.
9752  */
9753 #define TRACE_MAX_PRINT         1000
9754
9755 /*
9756  * Define here KERN_TRACE so that we have one place to modify
9757  * it if we decide to change what log level the ftrace dump
9758  * should be at.
9759  */
9760 #define KERN_TRACE              KERN_EMERG
9761
9762 void
9763 trace_printk_seq(struct trace_seq *s)
9764 {
9765         /* Probably should print a warning here. */
9766         if (s->seq.len >= TRACE_MAX_PRINT)
9767                 s->seq.len = TRACE_MAX_PRINT;
9768
9769         /*
9770          * More paranoid code. Although the buffer size is set to
9771          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9772          * an extra layer of protection.
9773          */
9774         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9775                 s->seq.len = s->seq.size - 1;
9776
9777         /* should be zero ended, but we are paranoid. */
9778         s->buffer[s->seq.len] = 0;
9779
9780         printk(KERN_TRACE "%s", s->buffer);
9781
9782         trace_seq_init(s);
9783 }
9784
9785 void trace_init_global_iter(struct trace_iterator *iter)
9786 {
9787         iter->tr = &global_trace;
9788         iter->trace = iter->tr->current_trace;
9789         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9790         iter->array_buffer = &global_trace.array_buffer;
9791
9792         if (iter->trace && iter->trace->open)
9793                 iter->trace->open(iter);
9794
9795         /* Annotate start of buffers if we had overruns */
9796         if (ring_buffer_overruns(iter->array_buffer->buffer))
9797                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9798
9799         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9800         if (trace_clocks[iter->tr->clock_id].in_ns)
9801                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9802 }
9803
9804 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9805 {
9806         /* use static because iter can be a bit big for the stack */
9807         static struct trace_iterator iter;
9808         static atomic_t dump_running;
9809         struct trace_array *tr = &global_trace;
9810         unsigned int old_userobj;
9811         unsigned long flags;
9812         int cnt = 0, cpu;
9813
9814         /* Only allow one dump user at a time. */
9815         if (atomic_inc_return(&dump_running) != 1) {
9816                 atomic_dec(&dump_running);
9817                 return;
9818         }
9819
9820         /*
9821          * Always turn off tracing when we dump.
9822          * We don't need to show trace output of what happens
9823          * between multiple crashes.
9824          *
9825          * If the user does a sysrq-z, then they can re-enable
9826          * tracing with echo 1 > tracing_on.
9827          */
9828         tracing_off();
9829
9830         local_irq_save(flags);
9831
9832         /* Simulate the iterator */
9833         trace_init_global_iter(&iter);
9834         /* Can not use kmalloc for iter.temp and iter.fmt */
9835         iter.temp = static_temp_buf;
9836         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9837         iter.fmt = static_fmt_buf;
9838         iter.fmt_size = STATIC_FMT_BUF_SIZE;
9839
9840         for_each_tracing_cpu(cpu) {
9841                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9842         }
9843
9844         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9845
9846         /* don't look at user memory in panic mode */
9847         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9848
9849         switch (oops_dump_mode) {
9850         case DUMP_ALL:
9851                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9852                 break;
9853         case DUMP_ORIG:
9854                 iter.cpu_file = raw_smp_processor_id();
9855                 break;
9856         case DUMP_NONE:
9857                 goto out_enable;
9858         default:
9859                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9860                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9861         }
9862
9863         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9864
9865         /* Did function tracer already get disabled? */
9866         if (ftrace_is_dead()) {
9867                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9868                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9869         }
9870
9871         /*
9872          * We need to stop all tracing on all CPUS to read
9873          * the next buffer. This is a bit expensive, but is
9874          * not done often. We fill all what we can read,
9875          * and then release the locks again.
9876          */
9877
9878         while (!trace_empty(&iter)) {
9879
9880                 if (!cnt)
9881                         printk(KERN_TRACE "---------------------------------\n");
9882
9883                 cnt++;
9884
9885                 trace_iterator_reset(&iter);
9886                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9887
9888                 if (trace_find_next_entry_inc(&iter) != NULL) {
9889                         int ret;
9890
9891                         ret = print_trace_line(&iter);
9892                         if (ret != TRACE_TYPE_NO_CONSUME)
9893                                 trace_consume(&iter);
9894                 }
9895                 touch_nmi_watchdog();
9896
9897                 trace_printk_seq(&iter.seq);
9898         }
9899
9900         if (!cnt)
9901                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9902         else
9903                 printk(KERN_TRACE "---------------------------------\n");
9904
9905  out_enable:
9906         tr->trace_flags |= old_userobj;
9907
9908         for_each_tracing_cpu(cpu) {
9909                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9910         }
9911         atomic_dec(&dump_running);
9912         local_irq_restore(flags);
9913 }
9914 EXPORT_SYMBOL_GPL(ftrace_dump);
9915
9916 #define WRITE_BUFSIZE  4096
9917
9918 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9919                                 size_t count, loff_t *ppos,
9920                                 int (*createfn)(const char *))
9921 {
9922         char *kbuf, *buf, *tmp;
9923         int ret = 0;
9924         size_t done = 0;
9925         size_t size;
9926
9927         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9928         if (!kbuf)
9929                 return -ENOMEM;
9930
9931         while (done < count) {
9932                 size = count - done;
9933
9934                 if (size >= WRITE_BUFSIZE)
9935                         size = WRITE_BUFSIZE - 1;
9936
9937                 if (copy_from_user(kbuf, buffer + done, size)) {
9938                         ret = -EFAULT;
9939                         goto out;
9940                 }
9941                 kbuf[size] = '\0';
9942                 buf = kbuf;
9943                 do {
9944                         tmp = strchr(buf, '\n');
9945                         if (tmp) {
9946                                 *tmp = '\0';
9947                                 size = tmp - buf + 1;
9948                         } else {
9949                                 size = strlen(buf);
9950                                 if (done + size < count) {
9951                                         if (buf != kbuf)
9952                                                 break;
9953                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9954                                         pr_warn("Line length is too long: Should be less than %d\n",
9955                                                 WRITE_BUFSIZE - 2);
9956                                         ret = -EINVAL;
9957                                         goto out;
9958                                 }
9959                         }
9960                         done += size;
9961
9962                         /* Remove comments */
9963                         tmp = strchr(buf, '#');
9964
9965                         if (tmp)
9966                                 *tmp = '\0';
9967
9968                         ret = createfn(buf);
9969                         if (ret)
9970                                 goto out;
9971                         buf += size;
9972
9973                 } while (done < count);
9974         }
9975         ret = done;
9976
9977 out:
9978         kfree(kbuf);
9979
9980         return ret;
9981 }
9982
9983 __init static int tracer_alloc_buffers(void)
9984 {
9985         int ring_buf_size;
9986         int ret = -ENOMEM;
9987
9988
9989         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9990                 pr_warn("Tracing disabled due to lockdown\n");
9991                 return -EPERM;
9992         }
9993
9994         /*
9995          * Make sure we don't accidentally add more trace options
9996          * than we have bits for.
9997          */
9998         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9999
10000         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10001                 goto out;
10002
10003         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10004                 goto out_free_buffer_mask;
10005
10006         /* Only allocate trace_printk buffers if a trace_printk exists */
10007         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10008                 /* Must be called before global_trace.buffer is allocated */
10009                 trace_printk_init_buffers();
10010
10011         /* To save memory, keep the ring buffer size to its minimum */
10012         if (ring_buffer_expanded)
10013                 ring_buf_size = trace_buf_size;
10014         else
10015                 ring_buf_size = 1;
10016
10017         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10018         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10019
10020         raw_spin_lock_init(&global_trace.start_lock);
10021
10022         /*
10023          * The prepare callbacks allocates some memory for the ring buffer. We
10024          * don't free the buffer if the CPU goes down. If we were to free
10025          * the buffer, then the user would lose any trace that was in the
10026          * buffer. The memory will be removed once the "instance" is removed.
10027          */
10028         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10029                                       "trace/RB:preapre", trace_rb_cpu_prepare,
10030                                       NULL);
10031         if (ret < 0)
10032                 goto out_free_cpumask;
10033         /* Used for event triggers */
10034         ret = -ENOMEM;
10035         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10036         if (!temp_buffer)
10037                 goto out_rm_hp_state;
10038
10039         if (trace_create_savedcmd() < 0)
10040                 goto out_free_temp_buffer;
10041
10042         /* TODO: make the number of buffers hot pluggable with CPUS */
10043         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10044                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10045                 goto out_free_savedcmd;
10046         }
10047
10048         if (global_trace.buffer_disabled)
10049                 tracing_off();
10050
10051         if (trace_boot_clock) {
10052                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10053                 if (ret < 0)
10054                         pr_warn("Trace clock %s not defined, going back to default\n",
10055                                 trace_boot_clock);
10056         }
10057
10058         /*
10059          * register_tracer() might reference current_trace, so it
10060          * needs to be set before we register anything. This is
10061          * just a bootstrap of current_trace anyway.
10062          */
10063         global_trace.current_trace = &nop_trace;
10064
10065         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10066
10067         ftrace_init_global_array_ops(&global_trace);
10068
10069         init_trace_flags_index(&global_trace);
10070
10071         register_tracer(&nop_trace);
10072
10073         /* Function tracing may start here (via kernel command line) */
10074         init_function_trace();
10075
10076         /* All seems OK, enable tracing */
10077         tracing_disabled = 0;
10078
10079         atomic_notifier_chain_register(&panic_notifier_list,
10080                                        &trace_panic_notifier);
10081
10082         register_die_notifier(&trace_die_notifier);
10083
10084         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10085
10086         INIT_LIST_HEAD(&global_trace.systems);
10087         INIT_LIST_HEAD(&global_trace.events);
10088         INIT_LIST_HEAD(&global_trace.hist_vars);
10089         INIT_LIST_HEAD(&global_trace.err_log);
10090         list_add(&global_trace.list, &ftrace_trace_arrays);
10091
10092         apply_trace_boot_options();
10093
10094         register_snapshot_cmd();
10095
10096         test_can_verify();
10097
10098         return 0;
10099
10100 out_free_savedcmd:
10101         free_saved_cmdlines_buffer(savedcmd);
10102 out_free_temp_buffer:
10103         ring_buffer_free(temp_buffer);
10104 out_rm_hp_state:
10105         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10106 out_free_cpumask:
10107         free_cpumask_var(global_trace.tracing_cpumask);
10108 out_free_buffer_mask:
10109         free_cpumask_var(tracing_buffer_mask);
10110 out:
10111         return ret;
10112 }
10113
10114 void __init early_trace_init(void)
10115 {
10116         if (tracepoint_printk) {
10117                 tracepoint_print_iter =
10118                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10119                 if (MEM_FAIL(!tracepoint_print_iter,
10120                              "Failed to allocate trace iterator\n"))
10121                         tracepoint_printk = 0;
10122                 else
10123                         static_key_enable(&tracepoint_printk_key.key);
10124         }
10125         tracer_alloc_buffers();
10126 }
10127
10128 void __init trace_init(void)
10129 {
10130         trace_event_init();
10131 }
10132
10133 __init static void clear_boot_tracer(void)
10134 {
10135         /*
10136          * The default tracer at boot buffer is an init section.
10137          * This function is called in lateinit. If we did not
10138          * find the boot tracer, then clear it out, to prevent
10139          * later registration from accessing the buffer that is
10140          * about to be freed.
10141          */
10142         if (!default_bootup_tracer)
10143                 return;
10144
10145         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10146                default_bootup_tracer);
10147         default_bootup_tracer = NULL;
10148 }
10149
10150 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10151 __init static void tracing_set_default_clock(void)
10152 {
10153         /* sched_clock_stable() is determined in late_initcall */
10154         if (!trace_boot_clock && !sched_clock_stable()) {
10155                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10156                         pr_warn("Can not set tracing clock due to lockdown\n");
10157                         return;
10158                 }
10159
10160                 printk(KERN_WARNING
10161                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10162                        "If you want to keep using the local clock, then add:\n"
10163                        "  \"trace_clock=local\"\n"
10164                        "on the kernel command line\n");
10165                 tracing_set_clock(&global_trace, "global");
10166         }
10167 }
10168 #else
10169 static inline void tracing_set_default_clock(void) { }
10170 #endif
10171
10172 __init static int late_trace_init(void)
10173 {
10174         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10175                 static_key_disable(&tracepoint_printk_key.key);
10176                 tracepoint_printk = 0;
10177         }
10178
10179         tracing_set_default_clock();
10180         clear_boot_tracer();
10181         return 0;
10182 }
10183
10184 late_initcall_sync(late_trace_init);