tracing: Add trace_export support for trace_marker
[platform/kernel/linux-starfive.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include "trace.h"
53 #include "trace_output.h"
54
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69
70 /*
71  * If a tracer is running, we do not want to run SELFTEST.
72  */
73 bool __read_mostly tracing_selftest_disabled;
74
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
79
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
82         { }
83 };
84
85 static int
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
87 {
88         return 0;
89 }
90
91 /*
92  * To prevent the comm cache from being overwritten when no
93  * tracing is active, only save the comm when a trace event
94  * occurred.
95  */
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
97
98 /*
99  * Kill all tracing for good (never come back).
100  * It is initialized to 1 but will turn to zero if the initialization
101  * of the tracer is successful. But that is the only place that sets
102  * this back to zero.
103  */
104 static int tracing_disabled = 1;
105
106 cpumask_var_t __read_mostly     tracing_buffer_mask;
107
108 /*
109  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
110  *
111  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112  * is set, then ftrace_dump is called. This will output the contents
113  * of the ftrace buffers to the console.  This is very useful for
114  * capturing traces that lead to crashes and outputing it to a
115  * serial console.
116  *
117  * It is default off, but you can enable it with either specifying
118  * "ftrace_dump_on_oops" in the kernel command line, or setting
119  * /proc/sys/kernel/ftrace_dump_on_oops
120  * Set 1 if you want to dump buffers of all CPUs
121  * Set 2 if you want to dump the buffer of the CPU that triggered oops
122  */
123
124 enum ftrace_dump_mode ftrace_dump_on_oops;
125
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
128
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
132         struct module                   *mod;
133         unsigned long                   length;
134 };
135
136 union trace_eval_map_item;
137
138 struct trace_eval_map_tail {
139         /*
140          * "end" is first and points to NULL as it must be different
141          * than "mod" or "eval_string"
142          */
143         union trace_eval_map_item       *next;
144         const char                      *end;   /* points to NULL */
145 };
146
147 static DEFINE_MUTEX(trace_eval_mutex);
148
149 /*
150  * The trace_eval_maps are saved in an array with two extra elements,
151  * one at the beginning, and one at the end. The beginning item contains
152  * the count of the saved maps (head.length), and the module they
153  * belong to if not built in (head.mod). The ending item contains a
154  * pointer to the next array of saved eval_map items.
155  */
156 union trace_eval_map_item {
157         struct trace_eval_map           map;
158         struct trace_eval_map_head      head;
159         struct trace_eval_map_tail      tail;
160 };
161
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
164
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_buffer *buffer,
167                                    unsigned long flags, int pc);
168
169 #define MAX_TRACER_SIZE         100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
172
173 static bool allocate_snapshot;
174
175 static int __init set_cmdline_ftrace(char *str)
176 {
177         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178         default_bootup_tracer = bootup_tracer_buf;
179         /* We are using ftrace early, expand it */
180         ring_buffer_expanded = true;
181         return 1;
182 }
183 __setup("ftrace=", set_cmdline_ftrace);
184
185 static int __init set_ftrace_dump_on_oops(char *str)
186 {
187         if (*str++ != '=' || !*str) {
188                 ftrace_dump_on_oops = DUMP_ALL;
189                 return 1;
190         }
191
192         if (!strcmp("orig_cpu", str)) {
193                 ftrace_dump_on_oops = DUMP_ORIG;
194                 return 1;
195         }
196
197         return 0;
198 }
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
200
201 static int __init stop_trace_on_warning(char *str)
202 {
203         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204                 __disable_trace_on_warning = 1;
205         return 1;
206 }
207 __setup("traceoff_on_warning", stop_trace_on_warning);
208
209 static int __init boot_alloc_snapshot(char *str)
210 {
211         allocate_snapshot = true;
212         /* We also need the main ring buffer expanded */
213         ring_buffer_expanded = true;
214         return 1;
215 }
216 __setup("alloc_snapshot", boot_alloc_snapshot);
217
218
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
220
221 static int __init set_trace_boot_options(char *str)
222 {
223         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
224         return 0;
225 }
226 __setup("trace_options=", set_trace_boot_options);
227
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
230
231 static int __init set_trace_boot_clock(char *str)
232 {
233         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234         trace_boot_clock = trace_boot_clock_buf;
235         return 0;
236 }
237 __setup("trace_clock=", set_trace_boot_clock);
238
239 static int __init set_tracepoint_printk(char *str)
240 {
241         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242                 tracepoint_printk = 1;
243         return 1;
244 }
245 __setup("tp_printk", set_tracepoint_printk);
246
247 unsigned long long ns2usecs(u64 nsec)
248 {
249         nsec += 500;
250         do_div(nsec, 1000);
251         return nsec;
252 }
253
254 static void
255 trace_process_export(struct trace_export *export,
256                struct ring_buffer_event *event, int flag)
257 {
258         struct trace_entry *entry;
259         unsigned int size = 0;
260
261         if (export->flags & flag) {
262                 entry = ring_buffer_event_data(event);
263                 size = ring_buffer_event_length(event);
264                 export->write(export, entry, size);
265         }
266 }
267
268 static DEFINE_MUTEX(ftrace_export_lock);
269
270 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
271
272 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
273 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
274 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
275
276 static inline void ftrace_exports_enable(struct trace_export *export)
277 {
278         if (export->flags & TRACE_EXPORT_FUNCTION)
279                 static_branch_inc(&trace_function_exports_enabled);
280
281         if (export->flags & TRACE_EXPORT_EVENT)
282                 static_branch_inc(&trace_event_exports_enabled);
283
284         if (export->flags & TRACE_EXPORT_MARKER)
285                 static_branch_inc(&trace_marker_exports_enabled);
286 }
287
288 static inline void ftrace_exports_disable(struct trace_export *export)
289 {
290         if (export->flags & TRACE_EXPORT_FUNCTION)
291                 static_branch_dec(&trace_function_exports_enabled);
292
293         if (export->flags & TRACE_EXPORT_EVENT)
294                 static_branch_dec(&trace_event_exports_enabled);
295
296         if (export->flags & TRACE_EXPORT_MARKER)
297                 static_branch_dec(&trace_marker_exports_enabled);
298 }
299
300 static void ftrace_exports(struct ring_buffer_event *event, int flag)
301 {
302         struct trace_export *export;
303
304         preempt_disable_notrace();
305
306         export = rcu_dereference_raw_check(ftrace_exports_list);
307         while (export) {
308                 trace_process_export(export, event, flag);
309                 export = rcu_dereference_raw_check(export->next);
310         }
311
312         preempt_enable_notrace();
313 }
314
315 static inline void
316 add_trace_export(struct trace_export **list, struct trace_export *export)
317 {
318         rcu_assign_pointer(export->next, *list);
319         /*
320          * We are entering export into the list but another
321          * CPU might be walking that list. We need to make sure
322          * the export->next pointer is valid before another CPU sees
323          * the export pointer included into the list.
324          */
325         rcu_assign_pointer(*list, export);
326 }
327
328 static inline int
329 rm_trace_export(struct trace_export **list, struct trace_export *export)
330 {
331         struct trace_export **p;
332
333         for (p = list; *p != NULL; p = &(*p)->next)
334                 if (*p == export)
335                         break;
336
337         if (*p != export)
338                 return -1;
339
340         rcu_assign_pointer(*p, (*p)->next);
341
342         return 0;
343 }
344
345 static inline void
346 add_ftrace_export(struct trace_export **list, struct trace_export *export)
347 {
348         ftrace_exports_enable(export);
349
350         add_trace_export(list, export);
351 }
352
353 static inline int
354 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
355 {
356         int ret;
357
358         ret = rm_trace_export(list, export);
359         ftrace_exports_disable(export);
360
361         return ret;
362 }
363
364 int register_ftrace_export(struct trace_export *export)
365 {
366         if (WARN_ON_ONCE(!export->write))
367                 return -1;
368
369         mutex_lock(&ftrace_export_lock);
370
371         add_ftrace_export(&ftrace_exports_list, export);
372
373         mutex_unlock(&ftrace_export_lock);
374
375         return 0;
376 }
377 EXPORT_SYMBOL_GPL(register_ftrace_export);
378
379 int unregister_ftrace_export(struct trace_export *export)
380 {
381         int ret;
382
383         mutex_lock(&ftrace_export_lock);
384
385         ret = rm_ftrace_export(&ftrace_exports_list, export);
386
387         mutex_unlock(&ftrace_export_lock);
388
389         return ret;
390 }
391 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
392
393 /* trace_flags holds trace_options default values */
394 #define TRACE_DEFAULT_FLAGS                                             \
395         (FUNCTION_DEFAULT_FLAGS |                                       \
396          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
397          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
398          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
399          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
400
401 /* trace_options that are only supported by global_trace */
402 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
403                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
404
405 /* trace_flags that are default zero for instances */
406 #define ZEROED_TRACE_FLAGS \
407         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
408
409 /*
410  * The global_trace is the descriptor that holds the top-level tracing
411  * buffers for the live tracing.
412  */
413 static struct trace_array global_trace = {
414         .trace_flags = TRACE_DEFAULT_FLAGS,
415 };
416
417 LIST_HEAD(ftrace_trace_arrays);
418
419 int trace_array_get(struct trace_array *this_tr)
420 {
421         struct trace_array *tr;
422         int ret = -ENODEV;
423
424         mutex_lock(&trace_types_lock);
425         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
426                 if (tr == this_tr) {
427                         tr->ref++;
428                         ret = 0;
429                         break;
430                 }
431         }
432         mutex_unlock(&trace_types_lock);
433
434         return ret;
435 }
436
437 static void __trace_array_put(struct trace_array *this_tr)
438 {
439         WARN_ON(!this_tr->ref);
440         this_tr->ref--;
441 }
442
443 /**
444  * trace_array_put - Decrement the reference counter for this trace array.
445  *
446  * NOTE: Use this when we no longer need the trace array returned by
447  * trace_array_get_by_name(). This ensures the trace array can be later
448  * destroyed.
449  *
450  */
451 void trace_array_put(struct trace_array *this_tr)
452 {
453         if (!this_tr)
454                 return;
455
456         mutex_lock(&trace_types_lock);
457         __trace_array_put(this_tr);
458         mutex_unlock(&trace_types_lock);
459 }
460 EXPORT_SYMBOL_GPL(trace_array_put);
461
462 int tracing_check_open_get_tr(struct trace_array *tr)
463 {
464         int ret;
465
466         ret = security_locked_down(LOCKDOWN_TRACEFS);
467         if (ret)
468                 return ret;
469
470         if (tracing_disabled)
471                 return -ENODEV;
472
473         if (tr && trace_array_get(tr) < 0)
474                 return -ENODEV;
475
476         return 0;
477 }
478
479 int call_filter_check_discard(struct trace_event_call *call, void *rec,
480                               struct trace_buffer *buffer,
481                               struct ring_buffer_event *event)
482 {
483         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
484             !filter_match_preds(call->filter, rec)) {
485                 __trace_event_discard_commit(buffer, event);
486                 return 1;
487         }
488
489         return 0;
490 }
491
492 void trace_free_pid_list(struct trace_pid_list *pid_list)
493 {
494         vfree(pid_list->pids);
495         kfree(pid_list);
496 }
497
498 /**
499  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
500  * @filtered_pids: The list of pids to check
501  * @search_pid: The PID to find in @filtered_pids
502  *
503  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
504  */
505 bool
506 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
507 {
508         /*
509          * If pid_max changed after filtered_pids was created, we
510          * by default ignore all pids greater than the previous pid_max.
511          */
512         if (search_pid >= filtered_pids->pid_max)
513                 return false;
514
515         return test_bit(search_pid, filtered_pids->pids);
516 }
517
518 /**
519  * trace_ignore_this_task - should a task be ignored for tracing
520  * @filtered_pids: The list of pids to check
521  * @task: The task that should be ignored if not filtered
522  *
523  * Checks if @task should be traced or not from @filtered_pids.
524  * Returns true if @task should *NOT* be traced.
525  * Returns false if @task should be traced.
526  */
527 bool
528 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
529                        struct trace_pid_list *filtered_no_pids,
530                        struct task_struct *task)
531 {
532         /*
533          * If filterd_no_pids is not empty, and the task's pid is listed
534          * in filtered_no_pids, then return true.
535          * Otherwise, if filtered_pids is empty, that means we can
536          * trace all tasks. If it has content, then only trace pids
537          * within filtered_pids.
538          */
539
540         return (filtered_pids &&
541                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
542                 (filtered_no_pids &&
543                  trace_find_filtered_pid(filtered_no_pids, task->pid));
544 }
545
546 /**
547  * trace_filter_add_remove_task - Add or remove a task from a pid_list
548  * @pid_list: The list to modify
549  * @self: The current task for fork or NULL for exit
550  * @task: The task to add or remove
551  *
552  * If adding a task, if @self is defined, the task is only added if @self
553  * is also included in @pid_list. This happens on fork and tasks should
554  * only be added when the parent is listed. If @self is NULL, then the
555  * @task pid will be removed from the list, which would happen on exit
556  * of a task.
557  */
558 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
559                                   struct task_struct *self,
560                                   struct task_struct *task)
561 {
562         if (!pid_list)
563                 return;
564
565         /* For forks, we only add if the forking task is listed */
566         if (self) {
567                 if (!trace_find_filtered_pid(pid_list, self->pid))
568                         return;
569         }
570
571         /* Sorry, but we don't support pid_max changing after setting */
572         if (task->pid >= pid_list->pid_max)
573                 return;
574
575         /* "self" is set for forks, and NULL for exits */
576         if (self)
577                 set_bit(task->pid, pid_list->pids);
578         else
579                 clear_bit(task->pid, pid_list->pids);
580 }
581
582 /**
583  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
584  * @pid_list: The pid list to show
585  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
586  * @pos: The position of the file
587  *
588  * This is used by the seq_file "next" operation to iterate the pids
589  * listed in a trace_pid_list structure.
590  *
591  * Returns the pid+1 as we want to display pid of zero, but NULL would
592  * stop the iteration.
593  */
594 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
595 {
596         unsigned long pid = (unsigned long)v;
597
598         (*pos)++;
599
600         /* pid already is +1 of the actual prevous bit */
601         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
602
603         /* Return pid + 1 to allow zero to be represented */
604         if (pid < pid_list->pid_max)
605                 return (void *)(pid + 1);
606
607         return NULL;
608 }
609
610 /**
611  * trace_pid_start - Used for seq_file to start reading pid lists
612  * @pid_list: The pid list to show
613  * @pos: The position of the file
614  *
615  * This is used by seq_file "start" operation to start the iteration
616  * of listing pids.
617  *
618  * Returns the pid+1 as we want to display pid of zero, but NULL would
619  * stop the iteration.
620  */
621 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
622 {
623         unsigned long pid;
624         loff_t l = 0;
625
626         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
627         if (pid >= pid_list->pid_max)
628                 return NULL;
629
630         /* Return pid + 1 so that zero can be the exit value */
631         for (pid++; pid && l < *pos;
632              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
633                 ;
634         return (void *)pid;
635 }
636
637 /**
638  * trace_pid_show - show the current pid in seq_file processing
639  * @m: The seq_file structure to write into
640  * @v: A void pointer of the pid (+1) value to display
641  *
642  * Can be directly used by seq_file operations to display the current
643  * pid value.
644  */
645 int trace_pid_show(struct seq_file *m, void *v)
646 {
647         unsigned long pid = (unsigned long)v - 1;
648
649         seq_printf(m, "%lu\n", pid);
650         return 0;
651 }
652
653 /* 128 should be much more than enough */
654 #define PID_BUF_SIZE            127
655
656 int trace_pid_write(struct trace_pid_list *filtered_pids,
657                     struct trace_pid_list **new_pid_list,
658                     const char __user *ubuf, size_t cnt)
659 {
660         struct trace_pid_list *pid_list;
661         struct trace_parser parser;
662         unsigned long val;
663         int nr_pids = 0;
664         ssize_t read = 0;
665         ssize_t ret = 0;
666         loff_t pos;
667         pid_t pid;
668
669         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
670                 return -ENOMEM;
671
672         /*
673          * Always recreate a new array. The write is an all or nothing
674          * operation. Always create a new array when adding new pids by
675          * the user. If the operation fails, then the current list is
676          * not modified.
677          */
678         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
679         if (!pid_list) {
680                 trace_parser_put(&parser);
681                 return -ENOMEM;
682         }
683
684         pid_list->pid_max = READ_ONCE(pid_max);
685
686         /* Only truncating will shrink pid_max */
687         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
688                 pid_list->pid_max = filtered_pids->pid_max;
689
690         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
691         if (!pid_list->pids) {
692                 trace_parser_put(&parser);
693                 kfree(pid_list);
694                 return -ENOMEM;
695         }
696
697         if (filtered_pids) {
698                 /* copy the current bits to the new max */
699                 for_each_set_bit(pid, filtered_pids->pids,
700                                  filtered_pids->pid_max) {
701                         set_bit(pid, pid_list->pids);
702                         nr_pids++;
703                 }
704         }
705
706         while (cnt > 0) {
707
708                 pos = 0;
709
710                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
711                 if (ret < 0 || !trace_parser_loaded(&parser))
712                         break;
713
714                 read += ret;
715                 ubuf += ret;
716                 cnt -= ret;
717
718                 ret = -EINVAL;
719                 if (kstrtoul(parser.buffer, 0, &val))
720                         break;
721                 if (val >= pid_list->pid_max)
722                         break;
723
724                 pid = (pid_t)val;
725
726                 set_bit(pid, pid_list->pids);
727                 nr_pids++;
728
729                 trace_parser_clear(&parser);
730                 ret = 0;
731         }
732         trace_parser_put(&parser);
733
734         if (ret < 0) {
735                 trace_free_pid_list(pid_list);
736                 return ret;
737         }
738
739         if (!nr_pids) {
740                 /* Cleared the list of pids */
741                 trace_free_pid_list(pid_list);
742                 read = ret;
743                 pid_list = NULL;
744         }
745
746         *new_pid_list = pid_list;
747
748         return read;
749 }
750
751 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
752 {
753         u64 ts;
754
755         /* Early boot up does not have a buffer yet */
756         if (!buf->buffer)
757                 return trace_clock_local();
758
759         ts = ring_buffer_time_stamp(buf->buffer, cpu);
760         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
761
762         return ts;
763 }
764
765 u64 ftrace_now(int cpu)
766 {
767         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
768 }
769
770 /**
771  * tracing_is_enabled - Show if global_trace has been disabled
772  *
773  * Shows if the global trace has been enabled or not. It uses the
774  * mirror flag "buffer_disabled" to be used in fast paths such as for
775  * the irqsoff tracer. But it may be inaccurate due to races. If you
776  * need to know the accurate state, use tracing_is_on() which is a little
777  * slower, but accurate.
778  */
779 int tracing_is_enabled(void)
780 {
781         /*
782          * For quick access (irqsoff uses this in fast path), just
783          * return the mirror variable of the state of the ring buffer.
784          * It's a little racy, but we don't really care.
785          */
786         smp_rmb();
787         return !global_trace.buffer_disabled;
788 }
789
790 /*
791  * trace_buf_size is the size in bytes that is allocated
792  * for a buffer. Note, the number of bytes is always rounded
793  * to page size.
794  *
795  * This number is purposely set to a low number of 16384.
796  * If the dump on oops happens, it will be much appreciated
797  * to not have to wait for all that output. Anyway this can be
798  * boot time and run time configurable.
799  */
800 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
801
802 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
803
804 /* trace_types holds a link list of available tracers. */
805 static struct tracer            *trace_types __read_mostly;
806
807 /*
808  * trace_types_lock is used to protect the trace_types list.
809  */
810 DEFINE_MUTEX(trace_types_lock);
811
812 /*
813  * serialize the access of the ring buffer
814  *
815  * ring buffer serializes readers, but it is low level protection.
816  * The validity of the events (which returns by ring_buffer_peek() ..etc)
817  * are not protected by ring buffer.
818  *
819  * The content of events may become garbage if we allow other process consumes
820  * these events concurrently:
821  *   A) the page of the consumed events may become a normal page
822  *      (not reader page) in ring buffer, and this page will be rewrited
823  *      by events producer.
824  *   B) The page of the consumed events may become a page for splice_read,
825  *      and this page will be returned to system.
826  *
827  * These primitives allow multi process access to different cpu ring buffer
828  * concurrently.
829  *
830  * These primitives don't distinguish read-only and read-consume access.
831  * Multi read-only access are also serialized.
832  */
833
834 #ifdef CONFIG_SMP
835 static DECLARE_RWSEM(all_cpu_access_lock);
836 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
837
838 static inline void trace_access_lock(int cpu)
839 {
840         if (cpu == RING_BUFFER_ALL_CPUS) {
841                 /* gain it for accessing the whole ring buffer. */
842                 down_write(&all_cpu_access_lock);
843         } else {
844                 /* gain it for accessing a cpu ring buffer. */
845
846                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
847                 down_read(&all_cpu_access_lock);
848
849                 /* Secondly block other access to this @cpu ring buffer. */
850                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
851         }
852 }
853
854 static inline void trace_access_unlock(int cpu)
855 {
856         if (cpu == RING_BUFFER_ALL_CPUS) {
857                 up_write(&all_cpu_access_lock);
858         } else {
859                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
860                 up_read(&all_cpu_access_lock);
861         }
862 }
863
864 static inline void trace_access_lock_init(void)
865 {
866         int cpu;
867
868         for_each_possible_cpu(cpu)
869                 mutex_init(&per_cpu(cpu_access_lock, cpu));
870 }
871
872 #else
873
874 static DEFINE_MUTEX(access_lock);
875
876 static inline void trace_access_lock(int cpu)
877 {
878         (void)cpu;
879         mutex_lock(&access_lock);
880 }
881
882 static inline void trace_access_unlock(int cpu)
883 {
884         (void)cpu;
885         mutex_unlock(&access_lock);
886 }
887
888 static inline void trace_access_lock_init(void)
889 {
890 }
891
892 #endif
893
894 #ifdef CONFIG_STACKTRACE
895 static void __ftrace_trace_stack(struct trace_buffer *buffer,
896                                  unsigned long flags,
897                                  int skip, int pc, struct pt_regs *regs);
898 static inline void ftrace_trace_stack(struct trace_array *tr,
899                                       struct trace_buffer *buffer,
900                                       unsigned long flags,
901                                       int skip, int pc, struct pt_regs *regs);
902
903 #else
904 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
905                                         unsigned long flags,
906                                         int skip, int pc, struct pt_regs *regs)
907 {
908 }
909 static inline void ftrace_trace_stack(struct trace_array *tr,
910                                       struct trace_buffer *buffer,
911                                       unsigned long flags,
912                                       int skip, int pc, struct pt_regs *regs)
913 {
914 }
915
916 #endif
917
918 static __always_inline void
919 trace_event_setup(struct ring_buffer_event *event,
920                   int type, unsigned long flags, int pc)
921 {
922         struct trace_entry *ent = ring_buffer_event_data(event);
923
924         tracing_generic_entry_update(ent, type, flags, pc);
925 }
926
927 static __always_inline struct ring_buffer_event *
928 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
929                           int type,
930                           unsigned long len,
931                           unsigned long flags, int pc)
932 {
933         struct ring_buffer_event *event;
934
935         event = ring_buffer_lock_reserve(buffer, len);
936         if (event != NULL)
937                 trace_event_setup(event, type, flags, pc);
938
939         return event;
940 }
941
942 void tracer_tracing_on(struct trace_array *tr)
943 {
944         if (tr->array_buffer.buffer)
945                 ring_buffer_record_on(tr->array_buffer.buffer);
946         /*
947          * This flag is looked at when buffers haven't been allocated
948          * yet, or by some tracers (like irqsoff), that just want to
949          * know if the ring buffer has been disabled, but it can handle
950          * races of where it gets disabled but we still do a record.
951          * As the check is in the fast path of the tracers, it is more
952          * important to be fast than accurate.
953          */
954         tr->buffer_disabled = 0;
955         /* Make the flag seen by readers */
956         smp_wmb();
957 }
958
959 /**
960  * tracing_on - enable tracing buffers
961  *
962  * This function enables tracing buffers that may have been
963  * disabled with tracing_off.
964  */
965 void tracing_on(void)
966 {
967         tracer_tracing_on(&global_trace);
968 }
969 EXPORT_SYMBOL_GPL(tracing_on);
970
971
972 static __always_inline void
973 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
974 {
975         __this_cpu_write(trace_taskinfo_save, true);
976
977         /* If this is the temp buffer, we need to commit fully */
978         if (this_cpu_read(trace_buffered_event) == event) {
979                 /* Length is in event->array[0] */
980                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
981                 /* Release the temp buffer */
982                 this_cpu_dec(trace_buffered_event_cnt);
983         } else
984                 ring_buffer_unlock_commit(buffer, event);
985 }
986
987 /**
988  * __trace_puts - write a constant string into the trace buffer.
989  * @ip:    The address of the caller
990  * @str:   The constant string to write
991  * @size:  The size of the string.
992  */
993 int __trace_puts(unsigned long ip, const char *str, int size)
994 {
995         struct ring_buffer_event *event;
996         struct trace_buffer *buffer;
997         struct print_entry *entry;
998         unsigned long irq_flags;
999         int alloc;
1000         int pc;
1001
1002         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1003                 return 0;
1004
1005         pc = preempt_count();
1006
1007         if (unlikely(tracing_selftest_running || tracing_disabled))
1008                 return 0;
1009
1010         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1011
1012         local_save_flags(irq_flags);
1013         buffer = global_trace.array_buffer.buffer;
1014         ring_buffer_nest_start(buffer);
1015         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
1016                                             irq_flags, pc);
1017         if (!event) {
1018                 size = 0;
1019                 goto out;
1020         }
1021
1022         entry = ring_buffer_event_data(event);
1023         entry->ip = ip;
1024
1025         memcpy(&entry->buf, str, size);
1026
1027         /* Add a newline if necessary */
1028         if (entry->buf[size - 1] != '\n') {
1029                 entry->buf[size] = '\n';
1030                 entry->buf[size + 1] = '\0';
1031         } else
1032                 entry->buf[size] = '\0';
1033
1034         __buffer_unlock_commit(buffer, event);
1035         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1036  out:
1037         ring_buffer_nest_end(buffer);
1038         return size;
1039 }
1040 EXPORT_SYMBOL_GPL(__trace_puts);
1041
1042 /**
1043  * __trace_bputs - write the pointer to a constant string into trace buffer
1044  * @ip:    The address of the caller
1045  * @str:   The constant string to write to the buffer to
1046  */
1047 int __trace_bputs(unsigned long ip, const char *str)
1048 {
1049         struct ring_buffer_event *event;
1050         struct trace_buffer *buffer;
1051         struct bputs_entry *entry;
1052         unsigned long irq_flags;
1053         int size = sizeof(struct bputs_entry);
1054         int ret = 0;
1055         int pc;
1056
1057         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1058                 return 0;
1059
1060         pc = preempt_count();
1061
1062         if (unlikely(tracing_selftest_running || tracing_disabled))
1063                 return 0;
1064
1065         local_save_flags(irq_flags);
1066         buffer = global_trace.array_buffer.buffer;
1067
1068         ring_buffer_nest_start(buffer);
1069         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1070                                             irq_flags, pc);
1071         if (!event)
1072                 goto out;
1073
1074         entry = ring_buffer_event_data(event);
1075         entry->ip                       = ip;
1076         entry->str                      = str;
1077
1078         __buffer_unlock_commit(buffer, event);
1079         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1080
1081         ret = 1;
1082  out:
1083         ring_buffer_nest_end(buffer);
1084         return ret;
1085 }
1086 EXPORT_SYMBOL_GPL(__trace_bputs);
1087
1088 #ifdef CONFIG_TRACER_SNAPSHOT
1089 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1090                                            void *cond_data)
1091 {
1092         struct tracer *tracer = tr->current_trace;
1093         unsigned long flags;
1094
1095         if (in_nmi()) {
1096                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1097                 internal_trace_puts("*** snapshot is being ignored        ***\n");
1098                 return;
1099         }
1100
1101         if (!tr->allocated_snapshot) {
1102                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1103                 internal_trace_puts("*** stopping trace here!   ***\n");
1104                 tracing_off();
1105                 return;
1106         }
1107
1108         /* Note, snapshot can not be used when the tracer uses it */
1109         if (tracer->use_max_tr) {
1110                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1111                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1112                 return;
1113         }
1114
1115         local_irq_save(flags);
1116         update_max_tr(tr, current, smp_processor_id(), cond_data);
1117         local_irq_restore(flags);
1118 }
1119
1120 void tracing_snapshot_instance(struct trace_array *tr)
1121 {
1122         tracing_snapshot_instance_cond(tr, NULL);
1123 }
1124
1125 /**
1126  * tracing_snapshot - take a snapshot of the current buffer.
1127  *
1128  * This causes a swap between the snapshot buffer and the current live
1129  * tracing buffer. You can use this to take snapshots of the live
1130  * trace when some condition is triggered, but continue to trace.
1131  *
1132  * Note, make sure to allocate the snapshot with either
1133  * a tracing_snapshot_alloc(), or by doing it manually
1134  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1135  *
1136  * If the snapshot buffer is not allocated, it will stop tracing.
1137  * Basically making a permanent snapshot.
1138  */
1139 void tracing_snapshot(void)
1140 {
1141         struct trace_array *tr = &global_trace;
1142
1143         tracing_snapshot_instance(tr);
1144 }
1145 EXPORT_SYMBOL_GPL(tracing_snapshot);
1146
1147 /**
1148  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1149  * @tr:         The tracing instance to snapshot
1150  * @cond_data:  The data to be tested conditionally, and possibly saved
1151  *
1152  * This is the same as tracing_snapshot() except that the snapshot is
1153  * conditional - the snapshot will only happen if the
1154  * cond_snapshot.update() implementation receiving the cond_data
1155  * returns true, which means that the trace array's cond_snapshot
1156  * update() operation used the cond_data to determine whether the
1157  * snapshot should be taken, and if it was, presumably saved it along
1158  * with the snapshot.
1159  */
1160 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1161 {
1162         tracing_snapshot_instance_cond(tr, cond_data);
1163 }
1164 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1165
1166 /**
1167  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1168  * @tr:         The tracing instance
1169  *
1170  * When the user enables a conditional snapshot using
1171  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1172  * with the snapshot.  This accessor is used to retrieve it.
1173  *
1174  * Should not be called from cond_snapshot.update(), since it takes
1175  * the tr->max_lock lock, which the code calling
1176  * cond_snapshot.update() has already done.
1177  *
1178  * Returns the cond_data associated with the trace array's snapshot.
1179  */
1180 void *tracing_cond_snapshot_data(struct trace_array *tr)
1181 {
1182         void *cond_data = NULL;
1183
1184         arch_spin_lock(&tr->max_lock);
1185
1186         if (tr->cond_snapshot)
1187                 cond_data = tr->cond_snapshot->cond_data;
1188
1189         arch_spin_unlock(&tr->max_lock);
1190
1191         return cond_data;
1192 }
1193 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1194
1195 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1196                                         struct array_buffer *size_buf, int cpu_id);
1197 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1198
1199 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1200 {
1201         int ret;
1202
1203         if (!tr->allocated_snapshot) {
1204
1205                 /* allocate spare buffer */
1206                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1207                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1208                 if (ret < 0)
1209                         return ret;
1210
1211                 tr->allocated_snapshot = true;
1212         }
1213
1214         return 0;
1215 }
1216
1217 static void free_snapshot(struct trace_array *tr)
1218 {
1219         /*
1220          * We don't free the ring buffer. instead, resize it because
1221          * The max_tr ring buffer has some state (e.g. ring->clock) and
1222          * we want preserve it.
1223          */
1224         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1225         set_buffer_entries(&tr->max_buffer, 1);
1226         tracing_reset_online_cpus(&tr->max_buffer);
1227         tr->allocated_snapshot = false;
1228 }
1229
1230 /**
1231  * tracing_alloc_snapshot - allocate snapshot buffer.
1232  *
1233  * This only allocates the snapshot buffer if it isn't already
1234  * allocated - it doesn't also take a snapshot.
1235  *
1236  * This is meant to be used in cases where the snapshot buffer needs
1237  * to be set up for events that can't sleep but need to be able to
1238  * trigger a snapshot.
1239  */
1240 int tracing_alloc_snapshot(void)
1241 {
1242         struct trace_array *tr = &global_trace;
1243         int ret;
1244
1245         ret = tracing_alloc_snapshot_instance(tr);
1246         WARN_ON(ret < 0);
1247
1248         return ret;
1249 }
1250 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1251
1252 /**
1253  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1254  *
1255  * This is similar to tracing_snapshot(), but it will allocate the
1256  * snapshot buffer if it isn't already allocated. Use this only
1257  * where it is safe to sleep, as the allocation may sleep.
1258  *
1259  * This causes a swap between the snapshot buffer and the current live
1260  * tracing buffer. You can use this to take snapshots of the live
1261  * trace when some condition is triggered, but continue to trace.
1262  */
1263 void tracing_snapshot_alloc(void)
1264 {
1265         int ret;
1266
1267         ret = tracing_alloc_snapshot();
1268         if (ret < 0)
1269                 return;
1270
1271         tracing_snapshot();
1272 }
1273 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1274
1275 /**
1276  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1277  * @tr:         The tracing instance
1278  * @cond_data:  User data to associate with the snapshot
1279  * @update:     Implementation of the cond_snapshot update function
1280  *
1281  * Check whether the conditional snapshot for the given instance has
1282  * already been enabled, or if the current tracer is already using a
1283  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1284  * save the cond_data and update function inside.
1285  *
1286  * Returns 0 if successful, error otherwise.
1287  */
1288 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1289                                  cond_update_fn_t update)
1290 {
1291         struct cond_snapshot *cond_snapshot;
1292         int ret = 0;
1293
1294         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1295         if (!cond_snapshot)
1296                 return -ENOMEM;
1297
1298         cond_snapshot->cond_data = cond_data;
1299         cond_snapshot->update = update;
1300
1301         mutex_lock(&trace_types_lock);
1302
1303         ret = tracing_alloc_snapshot_instance(tr);
1304         if (ret)
1305                 goto fail_unlock;
1306
1307         if (tr->current_trace->use_max_tr) {
1308                 ret = -EBUSY;
1309                 goto fail_unlock;
1310         }
1311
1312         /*
1313          * The cond_snapshot can only change to NULL without the
1314          * trace_types_lock. We don't care if we race with it going
1315          * to NULL, but we want to make sure that it's not set to
1316          * something other than NULL when we get here, which we can
1317          * do safely with only holding the trace_types_lock and not
1318          * having to take the max_lock.
1319          */
1320         if (tr->cond_snapshot) {
1321                 ret = -EBUSY;
1322                 goto fail_unlock;
1323         }
1324
1325         arch_spin_lock(&tr->max_lock);
1326         tr->cond_snapshot = cond_snapshot;
1327         arch_spin_unlock(&tr->max_lock);
1328
1329         mutex_unlock(&trace_types_lock);
1330
1331         return ret;
1332
1333  fail_unlock:
1334         mutex_unlock(&trace_types_lock);
1335         kfree(cond_snapshot);
1336         return ret;
1337 }
1338 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1339
1340 /**
1341  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1342  * @tr:         The tracing instance
1343  *
1344  * Check whether the conditional snapshot for the given instance is
1345  * enabled; if so, free the cond_snapshot associated with it,
1346  * otherwise return -EINVAL.
1347  *
1348  * Returns 0 if successful, error otherwise.
1349  */
1350 int tracing_snapshot_cond_disable(struct trace_array *tr)
1351 {
1352         int ret = 0;
1353
1354         arch_spin_lock(&tr->max_lock);
1355
1356         if (!tr->cond_snapshot)
1357                 ret = -EINVAL;
1358         else {
1359                 kfree(tr->cond_snapshot);
1360                 tr->cond_snapshot = NULL;
1361         }
1362
1363         arch_spin_unlock(&tr->max_lock);
1364
1365         return ret;
1366 }
1367 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1368 #else
1369 void tracing_snapshot(void)
1370 {
1371         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1372 }
1373 EXPORT_SYMBOL_GPL(tracing_snapshot);
1374 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1375 {
1376         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1377 }
1378 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1379 int tracing_alloc_snapshot(void)
1380 {
1381         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1382         return -ENODEV;
1383 }
1384 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1385 void tracing_snapshot_alloc(void)
1386 {
1387         /* Give warning */
1388         tracing_snapshot();
1389 }
1390 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1391 void *tracing_cond_snapshot_data(struct trace_array *tr)
1392 {
1393         return NULL;
1394 }
1395 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1396 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1397 {
1398         return -ENODEV;
1399 }
1400 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1401 int tracing_snapshot_cond_disable(struct trace_array *tr)
1402 {
1403         return false;
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1406 #endif /* CONFIG_TRACER_SNAPSHOT */
1407
1408 void tracer_tracing_off(struct trace_array *tr)
1409 {
1410         if (tr->array_buffer.buffer)
1411                 ring_buffer_record_off(tr->array_buffer.buffer);
1412         /*
1413          * This flag is looked at when buffers haven't been allocated
1414          * yet, or by some tracers (like irqsoff), that just want to
1415          * know if the ring buffer has been disabled, but it can handle
1416          * races of where it gets disabled but we still do a record.
1417          * As the check is in the fast path of the tracers, it is more
1418          * important to be fast than accurate.
1419          */
1420         tr->buffer_disabled = 1;
1421         /* Make the flag seen by readers */
1422         smp_wmb();
1423 }
1424
1425 /**
1426  * tracing_off - turn off tracing buffers
1427  *
1428  * This function stops the tracing buffers from recording data.
1429  * It does not disable any overhead the tracers themselves may
1430  * be causing. This function simply causes all recording to
1431  * the ring buffers to fail.
1432  */
1433 void tracing_off(void)
1434 {
1435         tracer_tracing_off(&global_trace);
1436 }
1437 EXPORT_SYMBOL_GPL(tracing_off);
1438
1439 void disable_trace_on_warning(void)
1440 {
1441         if (__disable_trace_on_warning) {
1442                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1443                         "Disabling tracing due to warning\n");
1444                 tracing_off();
1445         }
1446 }
1447
1448 /**
1449  * tracer_tracing_is_on - show real state of ring buffer enabled
1450  * @tr : the trace array to know if ring buffer is enabled
1451  *
1452  * Shows real state of the ring buffer if it is enabled or not.
1453  */
1454 bool tracer_tracing_is_on(struct trace_array *tr)
1455 {
1456         if (tr->array_buffer.buffer)
1457                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1458         return !tr->buffer_disabled;
1459 }
1460
1461 /**
1462  * tracing_is_on - show state of ring buffers enabled
1463  */
1464 int tracing_is_on(void)
1465 {
1466         return tracer_tracing_is_on(&global_trace);
1467 }
1468 EXPORT_SYMBOL_GPL(tracing_is_on);
1469
1470 static int __init set_buf_size(char *str)
1471 {
1472         unsigned long buf_size;
1473
1474         if (!str)
1475                 return 0;
1476         buf_size = memparse(str, &str);
1477         /* nr_entries can not be zero */
1478         if (buf_size == 0)
1479                 return 0;
1480         trace_buf_size = buf_size;
1481         return 1;
1482 }
1483 __setup("trace_buf_size=", set_buf_size);
1484
1485 static int __init set_tracing_thresh(char *str)
1486 {
1487         unsigned long threshold;
1488         int ret;
1489
1490         if (!str)
1491                 return 0;
1492         ret = kstrtoul(str, 0, &threshold);
1493         if (ret < 0)
1494                 return 0;
1495         tracing_thresh = threshold * 1000;
1496         return 1;
1497 }
1498 __setup("tracing_thresh=", set_tracing_thresh);
1499
1500 unsigned long nsecs_to_usecs(unsigned long nsecs)
1501 {
1502         return nsecs / 1000;
1503 }
1504
1505 /*
1506  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1507  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1508  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1509  * of strings in the order that the evals (enum) were defined.
1510  */
1511 #undef C
1512 #define C(a, b) b
1513
1514 /* These must match the bit postions in trace_iterator_flags */
1515 static const char *trace_options[] = {
1516         TRACE_FLAGS
1517         NULL
1518 };
1519
1520 static struct {
1521         u64 (*func)(void);
1522         const char *name;
1523         int in_ns;              /* is this clock in nanoseconds? */
1524 } trace_clocks[] = {
1525         { trace_clock_local,            "local",        1 },
1526         { trace_clock_global,           "global",       1 },
1527         { trace_clock_counter,          "counter",      0 },
1528         { trace_clock_jiffies,          "uptime",       0 },
1529         { trace_clock,                  "perf",         1 },
1530         { ktime_get_mono_fast_ns,       "mono",         1 },
1531         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1532         { ktime_get_boot_fast_ns,       "boot",         1 },
1533         ARCH_TRACE_CLOCKS
1534 };
1535
1536 bool trace_clock_in_ns(struct trace_array *tr)
1537 {
1538         if (trace_clocks[tr->clock_id].in_ns)
1539                 return true;
1540
1541         return false;
1542 }
1543
1544 /*
1545  * trace_parser_get_init - gets the buffer for trace parser
1546  */
1547 int trace_parser_get_init(struct trace_parser *parser, int size)
1548 {
1549         memset(parser, 0, sizeof(*parser));
1550
1551         parser->buffer = kmalloc(size, GFP_KERNEL);
1552         if (!parser->buffer)
1553                 return 1;
1554
1555         parser->size = size;
1556         return 0;
1557 }
1558
1559 /*
1560  * trace_parser_put - frees the buffer for trace parser
1561  */
1562 void trace_parser_put(struct trace_parser *parser)
1563 {
1564         kfree(parser->buffer);
1565         parser->buffer = NULL;
1566 }
1567
1568 /*
1569  * trace_get_user - reads the user input string separated by  space
1570  * (matched by isspace(ch))
1571  *
1572  * For each string found the 'struct trace_parser' is updated,
1573  * and the function returns.
1574  *
1575  * Returns number of bytes read.
1576  *
1577  * See kernel/trace/trace.h for 'struct trace_parser' details.
1578  */
1579 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1580         size_t cnt, loff_t *ppos)
1581 {
1582         char ch;
1583         size_t read = 0;
1584         ssize_t ret;
1585
1586         if (!*ppos)
1587                 trace_parser_clear(parser);
1588
1589         ret = get_user(ch, ubuf++);
1590         if (ret)
1591                 goto out;
1592
1593         read++;
1594         cnt--;
1595
1596         /*
1597          * The parser is not finished with the last write,
1598          * continue reading the user input without skipping spaces.
1599          */
1600         if (!parser->cont) {
1601                 /* skip white space */
1602                 while (cnt && isspace(ch)) {
1603                         ret = get_user(ch, ubuf++);
1604                         if (ret)
1605                                 goto out;
1606                         read++;
1607                         cnt--;
1608                 }
1609
1610                 parser->idx = 0;
1611
1612                 /* only spaces were written */
1613                 if (isspace(ch) || !ch) {
1614                         *ppos += read;
1615                         ret = read;
1616                         goto out;
1617                 }
1618         }
1619
1620         /* read the non-space input */
1621         while (cnt && !isspace(ch) && ch) {
1622                 if (parser->idx < parser->size - 1)
1623                         parser->buffer[parser->idx++] = ch;
1624                 else {
1625                         ret = -EINVAL;
1626                         goto out;
1627                 }
1628                 ret = get_user(ch, ubuf++);
1629                 if (ret)
1630                         goto out;
1631                 read++;
1632                 cnt--;
1633         }
1634
1635         /* We either got finished input or we have to wait for another call. */
1636         if (isspace(ch) || !ch) {
1637                 parser->buffer[parser->idx] = 0;
1638                 parser->cont = false;
1639         } else if (parser->idx < parser->size - 1) {
1640                 parser->cont = true;
1641                 parser->buffer[parser->idx++] = ch;
1642                 /* Make sure the parsed string always terminates with '\0'. */
1643                 parser->buffer[parser->idx] = 0;
1644         } else {
1645                 ret = -EINVAL;
1646                 goto out;
1647         }
1648
1649         *ppos += read;
1650         ret = read;
1651
1652 out:
1653         return ret;
1654 }
1655
1656 /* TODO add a seq_buf_to_buffer() */
1657 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1658 {
1659         int len;
1660
1661         if (trace_seq_used(s) <= s->seq.readpos)
1662                 return -EBUSY;
1663
1664         len = trace_seq_used(s) - s->seq.readpos;
1665         if (cnt > len)
1666                 cnt = len;
1667         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1668
1669         s->seq.readpos += cnt;
1670         return cnt;
1671 }
1672
1673 unsigned long __read_mostly     tracing_thresh;
1674 static const struct file_operations tracing_max_lat_fops;
1675
1676 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1677         defined(CONFIG_FSNOTIFY)
1678
1679 static struct workqueue_struct *fsnotify_wq;
1680
1681 static void latency_fsnotify_workfn(struct work_struct *work)
1682 {
1683         struct trace_array *tr = container_of(work, struct trace_array,
1684                                               fsnotify_work);
1685         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1686 }
1687
1688 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1689 {
1690         struct trace_array *tr = container_of(iwork, struct trace_array,
1691                                               fsnotify_irqwork);
1692         queue_work(fsnotify_wq, &tr->fsnotify_work);
1693 }
1694
1695 static void trace_create_maxlat_file(struct trace_array *tr,
1696                                      struct dentry *d_tracer)
1697 {
1698         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1699         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1700         tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1701                                               d_tracer, &tr->max_latency,
1702                                               &tracing_max_lat_fops);
1703 }
1704
1705 __init static int latency_fsnotify_init(void)
1706 {
1707         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1708                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1709         if (!fsnotify_wq) {
1710                 pr_err("Unable to allocate tr_max_lat_wq\n");
1711                 return -ENOMEM;
1712         }
1713         return 0;
1714 }
1715
1716 late_initcall_sync(latency_fsnotify_init);
1717
1718 void latency_fsnotify(struct trace_array *tr)
1719 {
1720         if (!fsnotify_wq)
1721                 return;
1722         /*
1723          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1724          * possible that we are called from __schedule() or do_idle(), which
1725          * could cause a deadlock.
1726          */
1727         irq_work_queue(&tr->fsnotify_irqwork);
1728 }
1729
1730 /*
1731  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1732  *  defined(CONFIG_FSNOTIFY)
1733  */
1734 #else
1735
1736 #define trace_create_maxlat_file(tr, d_tracer)                          \
1737         trace_create_file("tracing_max_latency", 0644, d_tracer,        \
1738                           &tr->max_latency, &tracing_max_lat_fops)
1739
1740 #endif
1741
1742 #ifdef CONFIG_TRACER_MAX_TRACE
1743 /*
1744  * Copy the new maximum trace into the separate maximum-trace
1745  * structure. (this way the maximum trace is permanently saved,
1746  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1747  */
1748 static void
1749 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1750 {
1751         struct array_buffer *trace_buf = &tr->array_buffer;
1752         struct array_buffer *max_buf = &tr->max_buffer;
1753         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1754         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1755
1756         max_buf->cpu = cpu;
1757         max_buf->time_start = data->preempt_timestamp;
1758
1759         max_data->saved_latency = tr->max_latency;
1760         max_data->critical_start = data->critical_start;
1761         max_data->critical_end = data->critical_end;
1762
1763         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1764         max_data->pid = tsk->pid;
1765         /*
1766          * If tsk == current, then use current_uid(), as that does not use
1767          * RCU. The irq tracer can be called out of RCU scope.
1768          */
1769         if (tsk == current)
1770                 max_data->uid = current_uid();
1771         else
1772                 max_data->uid = task_uid(tsk);
1773
1774         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1775         max_data->policy = tsk->policy;
1776         max_data->rt_priority = tsk->rt_priority;
1777
1778         /* record this tasks comm */
1779         tracing_record_cmdline(tsk);
1780         latency_fsnotify(tr);
1781 }
1782
1783 /**
1784  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1785  * @tr: tracer
1786  * @tsk: the task with the latency
1787  * @cpu: The cpu that initiated the trace.
1788  * @cond_data: User data associated with a conditional snapshot
1789  *
1790  * Flip the buffers between the @tr and the max_tr and record information
1791  * about which task was the cause of this latency.
1792  */
1793 void
1794 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1795               void *cond_data)
1796 {
1797         if (tr->stop_count)
1798                 return;
1799
1800         WARN_ON_ONCE(!irqs_disabled());
1801
1802         if (!tr->allocated_snapshot) {
1803                 /* Only the nop tracer should hit this when disabling */
1804                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1805                 return;
1806         }
1807
1808         arch_spin_lock(&tr->max_lock);
1809
1810         /* Inherit the recordable setting from array_buffer */
1811         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1812                 ring_buffer_record_on(tr->max_buffer.buffer);
1813         else
1814                 ring_buffer_record_off(tr->max_buffer.buffer);
1815
1816 #ifdef CONFIG_TRACER_SNAPSHOT
1817         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1818                 goto out_unlock;
1819 #endif
1820         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1821
1822         __update_max_tr(tr, tsk, cpu);
1823
1824  out_unlock:
1825         arch_spin_unlock(&tr->max_lock);
1826 }
1827
1828 /**
1829  * update_max_tr_single - only copy one trace over, and reset the rest
1830  * @tr: tracer
1831  * @tsk: task with the latency
1832  * @cpu: the cpu of the buffer to copy.
1833  *
1834  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1835  */
1836 void
1837 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1838 {
1839         int ret;
1840
1841         if (tr->stop_count)
1842                 return;
1843
1844         WARN_ON_ONCE(!irqs_disabled());
1845         if (!tr->allocated_snapshot) {
1846                 /* Only the nop tracer should hit this when disabling */
1847                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1848                 return;
1849         }
1850
1851         arch_spin_lock(&tr->max_lock);
1852
1853         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1854
1855         if (ret == -EBUSY) {
1856                 /*
1857                  * We failed to swap the buffer due to a commit taking
1858                  * place on this CPU. We fail to record, but we reset
1859                  * the max trace buffer (no one writes directly to it)
1860                  * and flag that it failed.
1861                  */
1862                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1863                         "Failed to swap buffers due to commit in progress\n");
1864         }
1865
1866         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1867
1868         __update_max_tr(tr, tsk, cpu);
1869         arch_spin_unlock(&tr->max_lock);
1870 }
1871 #endif /* CONFIG_TRACER_MAX_TRACE */
1872
1873 static int wait_on_pipe(struct trace_iterator *iter, int full)
1874 {
1875         /* Iterators are static, they should be filled or empty */
1876         if (trace_buffer_iter(iter, iter->cpu_file))
1877                 return 0;
1878
1879         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1880                                 full);
1881 }
1882
1883 #ifdef CONFIG_FTRACE_STARTUP_TEST
1884 static bool selftests_can_run;
1885
1886 struct trace_selftests {
1887         struct list_head                list;
1888         struct tracer                   *type;
1889 };
1890
1891 static LIST_HEAD(postponed_selftests);
1892
1893 static int save_selftest(struct tracer *type)
1894 {
1895         struct trace_selftests *selftest;
1896
1897         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1898         if (!selftest)
1899                 return -ENOMEM;
1900
1901         selftest->type = type;
1902         list_add(&selftest->list, &postponed_selftests);
1903         return 0;
1904 }
1905
1906 static int run_tracer_selftest(struct tracer *type)
1907 {
1908         struct trace_array *tr = &global_trace;
1909         struct tracer *saved_tracer = tr->current_trace;
1910         int ret;
1911
1912         if (!type->selftest || tracing_selftest_disabled)
1913                 return 0;
1914
1915         /*
1916          * If a tracer registers early in boot up (before scheduling is
1917          * initialized and such), then do not run its selftests yet.
1918          * Instead, run it a little later in the boot process.
1919          */
1920         if (!selftests_can_run)
1921                 return save_selftest(type);
1922
1923         /*
1924          * Run a selftest on this tracer.
1925          * Here we reset the trace buffer, and set the current
1926          * tracer to be this tracer. The tracer can then run some
1927          * internal tracing to verify that everything is in order.
1928          * If we fail, we do not register this tracer.
1929          */
1930         tracing_reset_online_cpus(&tr->array_buffer);
1931
1932         tr->current_trace = type;
1933
1934 #ifdef CONFIG_TRACER_MAX_TRACE
1935         if (type->use_max_tr) {
1936                 /* If we expanded the buffers, make sure the max is expanded too */
1937                 if (ring_buffer_expanded)
1938                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1939                                            RING_BUFFER_ALL_CPUS);
1940                 tr->allocated_snapshot = true;
1941         }
1942 #endif
1943
1944         /* the test is responsible for initializing and enabling */
1945         pr_info("Testing tracer %s: ", type->name);
1946         ret = type->selftest(type, tr);
1947         /* the test is responsible for resetting too */
1948         tr->current_trace = saved_tracer;
1949         if (ret) {
1950                 printk(KERN_CONT "FAILED!\n");
1951                 /* Add the warning after printing 'FAILED' */
1952                 WARN_ON(1);
1953                 return -1;
1954         }
1955         /* Only reset on passing, to avoid touching corrupted buffers */
1956         tracing_reset_online_cpus(&tr->array_buffer);
1957
1958 #ifdef CONFIG_TRACER_MAX_TRACE
1959         if (type->use_max_tr) {
1960                 tr->allocated_snapshot = false;
1961
1962                 /* Shrink the max buffer again */
1963                 if (ring_buffer_expanded)
1964                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1965                                            RING_BUFFER_ALL_CPUS);
1966         }
1967 #endif
1968
1969         printk(KERN_CONT "PASSED\n");
1970         return 0;
1971 }
1972
1973 static __init int init_trace_selftests(void)
1974 {
1975         struct trace_selftests *p, *n;
1976         struct tracer *t, **last;
1977         int ret;
1978
1979         selftests_can_run = true;
1980
1981         mutex_lock(&trace_types_lock);
1982
1983         if (list_empty(&postponed_selftests))
1984                 goto out;
1985
1986         pr_info("Running postponed tracer tests:\n");
1987
1988         tracing_selftest_running = true;
1989         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1990                 /* This loop can take minutes when sanitizers are enabled, so
1991                  * lets make sure we allow RCU processing.
1992                  */
1993                 cond_resched();
1994                 ret = run_tracer_selftest(p->type);
1995                 /* If the test fails, then warn and remove from available_tracers */
1996                 if (ret < 0) {
1997                         WARN(1, "tracer: %s failed selftest, disabling\n",
1998                              p->type->name);
1999                         last = &trace_types;
2000                         for (t = trace_types; t; t = t->next) {
2001                                 if (t == p->type) {
2002                                         *last = t->next;
2003                                         break;
2004                                 }
2005                                 last = &t->next;
2006                         }
2007                 }
2008                 list_del(&p->list);
2009                 kfree(p);
2010         }
2011         tracing_selftest_running = false;
2012
2013  out:
2014         mutex_unlock(&trace_types_lock);
2015
2016         return 0;
2017 }
2018 core_initcall(init_trace_selftests);
2019 #else
2020 static inline int run_tracer_selftest(struct tracer *type)
2021 {
2022         return 0;
2023 }
2024 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2025
2026 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2027
2028 static void __init apply_trace_boot_options(void);
2029
2030 /**
2031  * register_tracer - register a tracer with the ftrace system.
2032  * @type: the plugin for the tracer
2033  *
2034  * Register a new plugin tracer.
2035  */
2036 int __init register_tracer(struct tracer *type)
2037 {
2038         struct tracer *t;
2039         int ret = 0;
2040
2041         if (!type->name) {
2042                 pr_info("Tracer must have a name\n");
2043                 return -1;
2044         }
2045
2046         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2047                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2048                 return -1;
2049         }
2050
2051         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2052                 pr_warn("Can not register tracer %s due to lockdown\n",
2053                            type->name);
2054                 return -EPERM;
2055         }
2056
2057         mutex_lock(&trace_types_lock);
2058
2059         tracing_selftest_running = true;
2060
2061         for (t = trace_types; t; t = t->next) {
2062                 if (strcmp(type->name, t->name) == 0) {
2063                         /* already found */
2064                         pr_info("Tracer %s already registered\n",
2065                                 type->name);
2066                         ret = -1;
2067                         goto out;
2068                 }
2069         }
2070
2071         if (!type->set_flag)
2072                 type->set_flag = &dummy_set_flag;
2073         if (!type->flags) {
2074                 /*allocate a dummy tracer_flags*/
2075                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2076                 if (!type->flags) {
2077                         ret = -ENOMEM;
2078                         goto out;
2079                 }
2080                 type->flags->val = 0;
2081                 type->flags->opts = dummy_tracer_opt;
2082         } else
2083                 if (!type->flags->opts)
2084                         type->flags->opts = dummy_tracer_opt;
2085
2086         /* store the tracer for __set_tracer_option */
2087         type->flags->trace = type;
2088
2089         ret = run_tracer_selftest(type);
2090         if (ret < 0)
2091                 goto out;
2092
2093         type->next = trace_types;
2094         trace_types = type;
2095         add_tracer_options(&global_trace, type);
2096
2097  out:
2098         tracing_selftest_running = false;
2099         mutex_unlock(&trace_types_lock);
2100
2101         if (ret || !default_bootup_tracer)
2102                 goto out_unlock;
2103
2104         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2105                 goto out_unlock;
2106
2107         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2108         /* Do we want this tracer to start on bootup? */
2109         tracing_set_tracer(&global_trace, type->name);
2110         default_bootup_tracer = NULL;
2111
2112         apply_trace_boot_options();
2113
2114         /* disable other selftests, since this will break it. */
2115         tracing_selftest_disabled = true;
2116 #ifdef CONFIG_FTRACE_STARTUP_TEST
2117         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
2118                type->name);
2119 #endif
2120
2121  out_unlock:
2122         return ret;
2123 }
2124
2125 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2126 {
2127         struct trace_buffer *buffer = buf->buffer;
2128
2129         if (!buffer)
2130                 return;
2131
2132         ring_buffer_record_disable(buffer);
2133
2134         /* Make sure all commits have finished */
2135         synchronize_rcu();
2136         ring_buffer_reset_cpu(buffer, cpu);
2137
2138         ring_buffer_record_enable(buffer);
2139 }
2140
2141 void tracing_reset_online_cpus(struct array_buffer *buf)
2142 {
2143         struct trace_buffer *buffer = buf->buffer;
2144
2145         if (!buffer)
2146                 return;
2147
2148         ring_buffer_record_disable(buffer);
2149
2150         /* Make sure all commits have finished */
2151         synchronize_rcu();
2152
2153         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2154
2155         ring_buffer_reset_online_cpus(buffer);
2156
2157         ring_buffer_record_enable(buffer);
2158 }
2159
2160 /* Must have trace_types_lock held */
2161 void tracing_reset_all_online_cpus(void)
2162 {
2163         struct trace_array *tr;
2164
2165         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2166                 if (!tr->clear_trace)
2167                         continue;
2168                 tr->clear_trace = false;
2169                 tracing_reset_online_cpus(&tr->array_buffer);
2170 #ifdef CONFIG_TRACER_MAX_TRACE
2171                 tracing_reset_online_cpus(&tr->max_buffer);
2172 #endif
2173         }
2174 }
2175
2176 static int *tgid_map;
2177
2178 #define SAVED_CMDLINES_DEFAULT 128
2179 #define NO_CMDLINE_MAP UINT_MAX
2180 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2181 struct saved_cmdlines_buffer {
2182         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2183         unsigned *map_cmdline_to_pid;
2184         unsigned cmdline_num;
2185         int cmdline_idx;
2186         char *saved_cmdlines;
2187 };
2188 static struct saved_cmdlines_buffer *savedcmd;
2189
2190 /* temporary disable recording */
2191 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2192
2193 static inline char *get_saved_cmdlines(int idx)
2194 {
2195         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2196 }
2197
2198 static inline void set_cmdline(int idx, const char *cmdline)
2199 {
2200         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2201 }
2202
2203 static int allocate_cmdlines_buffer(unsigned int val,
2204                                     struct saved_cmdlines_buffer *s)
2205 {
2206         s->map_cmdline_to_pid = kmalloc_array(val,
2207                                               sizeof(*s->map_cmdline_to_pid),
2208                                               GFP_KERNEL);
2209         if (!s->map_cmdline_to_pid)
2210                 return -ENOMEM;
2211
2212         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2213         if (!s->saved_cmdlines) {
2214                 kfree(s->map_cmdline_to_pid);
2215                 return -ENOMEM;
2216         }
2217
2218         s->cmdline_idx = 0;
2219         s->cmdline_num = val;
2220         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2221                sizeof(s->map_pid_to_cmdline));
2222         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2223                val * sizeof(*s->map_cmdline_to_pid));
2224
2225         return 0;
2226 }
2227
2228 static int trace_create_savedcmd(void)
2229 {
2230         int ret;
2231
2232         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2233         if (!savedcmd)
2234                 return -ENOMEM;
2235
2236         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2237         if (ret < 0) {
2238                 kfree(savedcmd);
2239                 savedcmd = NULL;
2240                 return -ENOMEM;
2241         }
2242
2243         return 0;
2244 }
2245
2246 int is_tracing_stopped(void)
2247 {
2248         return global_trace.stop_count;
2249 }
2250
2251 /**
2252  * tracing_start - quick start of the tracer
2253  *
2254  * If tracing is enabled but was stopped by tracing_stop,
2255  * this will start the tracer back up.
2256  */
2257 void tracing_start(void)
2258 {
2259         struct trace_buffer *buffer;
2260         unsigned long flags;
2261
2262         if (tracing_disabled)
2263                 return;
2264
2265         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2266         if (--global_trace.stop_count) {
2267                 if (global_trace.stop_count < 0) {
2268                         /* Someone screwed up their debugging */
2269                         WARN_ON_ONCE(1);
2270                         global_trace.stop_count = 0;
2271                 }
2272                 goto out;
2273         }
2274
2275         /* Prevent the buffers from switching */
2276         arch_spin_lock(&global_trace.max_lock);
2277
2278         buffer = global_trace.array_buffer.buffer;
2279         if (buffer)
2280                 ring_buffer_record_enable(buffer);
2281
2282 #ifdef CONFIG_TRACER_MAX_TRACE
2283         buffer = global_trace.max_buffer.buffer;
2284         if (buffer)
2285                 ring_buffer_record_enable(buffer);
2286 #endif
2287
2288         arch_spin_unlock(&global_trace.max_lock);
2289
2290  out:
2291         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2292 }
2293
2294 static void tracing_start_tr(struct trace_array *tr)
2295 {
2296         struct trace_buffer *buffer;
2297         unsigned long flags;
2298
2299         if (tracing_disabled)
2300                 return;
2301
2302         /* If global, we need to also start the max tracer */
2303         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2304                 return tracing_start();
2305
2306         raw_spin_lock_irqsave(&tr->start_lock, flags);
2307
2308         if (--tr->stop_count) {
2309                 if (tr->stop_count < 0) {
2310                         /* Someone screwed up their debugging */
2311                         WARN_ON_ONCE(1);
2312                         tr->stop_count = 0;
2313                 }
2314                 goto out;
2315         }
2316
2317         buffer = tr->array_buffer.buffer;
2318         if (buffer)
2319                 ring_buffer_record_enable(buffer);
2320
2321  out:
2322         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2323 }
2324
2325 /**
2326  * tracing_stop - quick stop of the tracer
2327  *
2328  * Light weight way to stop tracing. Use in conjunction with
2329  * tracing_start.
2330  */
2331 void tracing_stop(void)
2332 {
2333         struct trace_buffer *buffer;
2334         unsigned long flags;
2335
2336         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2337         if (global_trace.stop_count++)
2338                 goto out;
2339
2340         /* Prevent the buffers from switching */
2341         arch_spin_lock(&global_trace.max_lock);
2342
2343         buffer = global_trace.array_buffer.buffer;
2344         if (buffer)
2345                 ring_buffer_record_disable(buffer);
2346
2347 #ifdef CONFIG_TRACER_MAX_TRACE
2348         buffer = global_trace.max_buffer.buffer;
2349         if (buffer)
2350                 ring_buffer_record_disable(buffer);
2351 #endif
2352
2353         arch_spin_unlock(&global_trace.max_lock);
2354
2355  out:
2356         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2357 }
2358
2359 static void tracing_stop_tr(struct trace_array *tr)
2360 {
2361         struct trace_buffer *buffer;
2362         unsigned long flags;
2363
2364         /* If global, we need to also stop the max tracer */
2365         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2366                 return tracing_stop();
2367
2368         raw_spin_lock_irqsave(&tr->start_lock, flags);
2369         if (tr->stop_count++)
2370                 goto out;
2371
2372         buffer = tr->array_buffer.buffer;
2373         if (buffer)
2374                 ring_buffer_record_disable(buffer);
2375
2376  out:
2377         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2378 }
2379
2380 static int trace_save_cmdline(struct task_struct *tsk)
2381 {
2382         unsigned pid, idx;
2383
2384         /* treat recording of idle task as a success */
2385         if (!tsk->pid)
2386                 return 1;
2387
2388         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2389                 return 0;
2390
2391         /*
2392          * It's not the end of the world if we don't get
2393          * the lock, but we also don't want to spin
2394          * nor do we want to disable interrupts,
2395          * so if we miss here, then better luck next time.
2396          */
2397         if (!arch_spin_trylock(&trace_cmdline_lock))
2398                 return 0;
2399
2400         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2401         if (idx == NO_CMDLINE_MAP) {
2402                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2403
2404                 /*
2405                  * Check whether the cmdline buffer at idx has a pid
2406                  * mapped. We are going to overwrite that entry so we
2407                  * need to clear the map_pid_to_cmdline. Otherwise we
2408                  * would read the new comm for the old pid.
2409                  */
2410                 pid = savedcmd->map_cmdline_to_pid[idx];
2411                 if (pid != NO_CMDLINE_MAP)
2412                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2413
2414                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2415                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2416
2417                 savedcmd->cmdline_idx = idx;
2418         }
2419
2420         set_cmdline(idx, tsk->comm);
2421
2422         arch_spin_unlock(&trace_cmdline_lock);
2423
2424         return 1;
2425 }
2426
2427 static void __trace_find_cmdline(int pid, char comm[])
2428 {
2429         unsigned map;
2430
2431         if (!pid) {
2432                 strcpy(comm, "<idle>");
2433                 return;
2434         }
2435
2436         if (WARN_ON_ONCE(pid < 0)) {
2437                 strcpy(comm, "<XXX>");
2438                 return;
2439         }
2440
2441         if (pid > PID_MAX_DEFAULT) {
2442                 strcpy(comm, "<...>");
2443                 return;
2444         }
2445
2446         map = savedcmd->map_pid_to_cmdline[pid];
2447         if (map != NO_CMDLINE_MAP)
2448                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2449         else
2450                 strcpy(comm, "<...>");
2451 }
2452
2453 void trace_find_cmdline(int pid, char comm[])
2454 {
2455         preempt_disable();
2456         arch_spin_lock(&trace_cmdline_lock);
2457
2458         __trace_find_cmdline(pid, comm);
2459
2460         arch_spin_unlock(&trace_cmdline_lock);
2461         preempt_enable();
2462 }
2463
2464 int trace_find_tgid(int pid)
2465 {
2466         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2467                 return 0;
2468
2469         return tgid_map[pid];
2470 }
2471
2472 static int trace_save_tgid(struct task_struct *tsk)
2473 {
2474         /* treat recording of idle task as a success */
2475         if (!tsk->pid)
2476                 return 1;
2477
2478         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2479                 return 0;
2480
2481         tgid_map[tsk->pid] = tsk->tgid;
2482         return 1;
2483 }
2484
2485 static bool tracing_record_taskinfo_skip(int flags)
2486 {
2487         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2488                 return true;
2489         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2490                 return true;
2491         if (!__this_cpu_read(trace_taskinfo_save))
2492                 return true;
2493         return false;
2494 }
2495
2496 /**
2497  * tracing_record_taskinfo - record the task info of a task
2498  *
2499  * @task:  task to record
2500  * @flags: TRACE_RECORD_CMDLINE for recording comm
2501  *         TRACE_RECORD_TGID for recording tgid
2502  */
2503 void tracing_record_taskinfo(struct task_struct *task, int flags)
2504 {
2505         bool done;
2506
2507         if (tracing_record_taskinfo_skip(flags))
2508                 return;
2509
2510         /*
2511          * Record as much task information as possible. If some fail, continue
2512          * to try to record the others.
2513          */
2514         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2515         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2516
2517         /* If recording any information failed, retry again soon. */
2518         if (!done)
2519                 return;
2520
2521         __this_cpu_write(trace_taskinfo_save, false);
2522 }
2523
2524 /**
2525  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2526  *
2527  * @prev: previous task during sched_switch
2528  * @next: next task during sched_switch
2529  * @flags: TRACE_RECORD_CMDLINE for recording comm
2530  *         TRACE_RECORD_TGID for recording tgid
2531  */
2532 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2533                                           struct task_struct *next, int flags)
2534 {
2535         bool done;
2536
2537         if (tracing_record_taskinfo_skip(flags))
2538                 return;
2539
2540         /*
2541          * Record as much task information as possible. If some fail, continue
2542          * to try to record the others.
2543          */
2544         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2545         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2546         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2547         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2548
2549         /* If recording any information failed, retry again soon. */
2550         if (!done)
2551                 return;
2552
2553         __this_cpu_write(trace_taskinfo_save, false);
2554 }
2555
2556 /* Helpers to record a specific task information */
2557 void tracing_record_cmdline(struct task_struct *task)
2558 {
2559         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2560 }
2561
2562 void tracing_record_tgid(struct task_struct *task)
2563 {
2564         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2565 }
2566
2567 /*
2568  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2569  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2570  * simplifies those functions and keeps them in sync.
2571  */
2572 enum print_line_t trace_handle_return(struct trace_seq *s)
2573 {
2574         return trace_seq_has_overflowed(s) ?
2575                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2576 }
2577 EXPORT_SYMBOL_GPL(trace_handle_return);
2578
2579 void
2580 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2581                              unsigned long flags, int pc)
2582 {
2583         struct task_struct *tsk = current;
2584
2585         entry->preempt_count            = pc & 0xff;
2586         entry->pid                      = (tsk) ? tsk->pid : 0;
2587         entry->type                     = type;
2588         entry->flags =
2589 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2590                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2591 #else
2592                 TRACE_FLAG_IRQS_NOSUPPORT |
2593 #endif
2594                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2595                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2596                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2597                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2598                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2599 }
2600 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2601
2602 struct ring_buffer_event *
2603 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2604                           int type,
2605                           unsigned long len,
2606                           unsigned long flags, int pc)
2607 {
2608         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2609 }
2610
2611 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2612 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2613 static int trace_buffered_event_ref;
2614
2615 /**
2616  * trace_buffered_event_enable - enable buffering events
2617  *
2618  * When events are being filtered, it is quicker to use a temporary
2619  * buffer to write the event data into if there's a likely chance
2620  * that it will not be committed. The discard of the ring buffer
2621  * is not as fast as committing, and is much slower than copying
2622  * a commit.
2623  *
2624  * When an event is to be filtered, allocate per cpu buffers to
2625  * write the event data into, and if the event is filtered and discarded
2626  * it is simply dropped, otherwise, the entire data is to be committed
2627  * in one shot.
2628  */
2629 void trace_buffered_event_enable(void)
2630 {
2631         struct ring_buffer_event *event;
2632         struct page *page;
2633         int cpu;
2634
2635         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2636
2637         if (trace_buffered_event_ref++)
2638                 return;
2639
2640         for_each_tracing_cpu(cpu) {
2641                 page = alloc_pages_node(cpu_to_node(cpu),
2642                                         GFP_KERNEL | __GFP_NORETRY, 0);
2643                 if (!page)
2644                         goto failed;
2645
2646                 event = page_address(page);
2647                 memset(event, 0, sizeof(*event));
2648
2649                 per_cpu(trace_buffered_event, cpu) = event;
2650
2651                 preempt_disable();
2652                 if (cpu == smp_processor_id() &&
2653                     this_cpu_read(trace_buffered_event) !=
2654                     per_cpu(trace_buffered_event, cpu))
2655                         WARN_ON_ONCE(1);
2656                 preempt_enable();
2657         }
2658
2659         return;
2660  failed:
2661         trace_buffered_event_disable();
2662 }
2663
2664 static void enable_trace_buffered_event(void *data)
2665 {
2666         /* Probably not needed, but do it anyway */
2667         smp_rmb();
2668         this_cpu_dec(trace_buffered_event_cnt);
2669 }
2670
2671 static void disable_trace_buffered_event(void *data)
2672 {
2673         this_cpu_inc(trace_buffered_event_cnt);
2674 }
2675
2676 /**
2677  * trace_buffered_event_disable - disable buffering events
2678  *
2679  * When a filter is removed, it is faster to not use the buffered
2680  * events, and to commit directly into the ring buffer. Free up
2681  * the temp buffers when there are no more users. This requires
2682  * special synchronization with current events.
2683  */
2684 void trace_buffered_event_disable(void)
2685 {
2686         int cpu;
2687
2688         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2689
2690         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2691                 return;
2692
2693         if (--trace_buffered_event_ref)
2694                 return;
2695
2696         preempt_disable();
2697         /* For each CPU, set the buffer as used. */
2698         smp_call_function_many(tracing_buffer_mask,
2699                                disable_trace_buffered_event, NULL, 1);
2700         preempt_enable();
2701
2702         /* Wait for all current users to finish */
2703         synchronize_rcu();
2704
2705         for_each_tracing_cpu(cpu) {
2706                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2707                 per_cpu(trace_buffered_event, cpu) = NULL;
2708         }
2709         /*
2710          * Make sure trace_buffered_event is NULL before clearing
2711          * trace_buffered_event_cnt.
2712          */
2713         smp_wmb();
2714
2715         preempt_disable();
2716         /* Do the work on each cpu */
2717         smp_call_function_many(tracing_buffer_mask,
2718                                enable_trace_buffered_event, NULL, 1);
2719         preempt_enable();
2720 }
2721
2722 static struct trace_buffer *temp_buffer;
2723
2724 struct ring_buffer_event *
2725 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2726                           struct trace_event_file *trace_file,
2727                           int type, unsigned long len,
2728                           unsigned long flags, int pc)
2729 {
2730         struct ring_buffer_event *entry;
2731         int val;
2732
2733         *current_rb = trace_file->tr->array_buffer.buffer;
2734
2735         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2736              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2737             (entry = this_cpu_read(trace_buffered_event))) {
2738                 /* Try to use the per cpu buffer first */
2739                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2740                 if (val == 1) {
2741                         trace_event_setup(entry, type, flags, pc);
2742                         entry->array[0] = len;
2743                         return entry;
2744                 }
2745                 this_cpu_dec(trace_buffered_event_cnt);
2746         }
2747
2748         entry = __trace_buffer_lock_reserve(*current_rb,
2749                                             type, len, flags, pc);
2750         /*
2751          * If tracing is off, but we have triggers enabled
2752          * we still need to look at the event data. Use the temp_buffer
2753          * to store the trace event for the tigger to use. It's recusive
2754          * safe and will not be recorded anywhere.
2755          */
2756         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2757                 *current_rb = temp_buffer;
2758                 entry = __trace_buffer_lock_reserve(*current_rb,
2759                                                     type, len, flags, pc);
2760         }
2761         return entry;
2762 }
2763 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2764
2765 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2766 static DEFINE_MUTEX(tracepoint_printk_mutex);
2767
2768 static void output_printk(struct trace_event_buffer *fbuffer)
2769 {
2770         struct trace_event_call *event_call;
2771         struct trace_event_file *file;
2772         struct trace_event *event;
2773         unsigned long flags;
2774         struct trace_iterator *iter = tracepoint_print_iter;
2775
2776         /* We should never get here if iter is NULL */
2777         if (WARN_ON_ONCE(!iter))
2778                 return;
2779
2780         event_call = fbuffer->trace_file->event_call;
2781         if (!event_call || !event_call->event.funcs ||
2782             !event_call->event.funcs->trace)
2783                 return;
2784
2785         file = fbuffer->trace_file;
2786         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2787             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2788              !filter_match_preds(file->filter, fbuffer->entry)))
2789                 return;
2790
2791         event = &fbuffer->trace_file->event_call->event;
2792
2793         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2794         trace_seq_init(&iter->seq);
2795         iter->ent = fbuffer->entry;
2796         event_call->event.funcs->trace(iter, 0, event);
2797         trace_seq_putc(&iter->seq, 0);
2798         printk("%s", iter->seq.buffer);
2799
2800         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2801 }
2802
2803 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2804                              void *buffer, size_t *lenp,
2805                              loff_t *ppos)
2806 {
2807         int save_tracepoint_printk;
2808         int ret;
2809
2810         mutex_lock(&tracepoint_printk_mutex);
2811         save_tracepoint_printk = tracepoint_printk;
2812
2813         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2814
2815         /*
2816          * This will force exiting early, as tracepoint_printk
2817          * is always zero when tracepoint_printk_iter is not allocated
2818          */
2819         if (!tracepoint_print_iter)
2820                 tracepoint_printk = 0;
2821
2822         if (save_tracepoint_printk == tracepoint_printk)
2823                 goto out;
2824
2825         if (tracepoint_printk)
2826                 static_key_enable(&tracepoint_printk_key.key);
2827         else
2828                 static_key_disable(&tracepoint_printk_key.key);
2829
2830  out:
2831         mutex_unlock(&tracepoint_printk_mutex);
2832
2833         return ret;
2834 }
2835
2836 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2837 {
2838         if (static_key_false(&tracepoint_printk_key.key))
2839                 output_printk(fbuffer);
2840
2841         if (static_branch_unlikely(&trace_event_exports_enabled))
2842                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2843         event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2844                                     fbuffer->event, fbuffer->entry,
2845                                     fbuffer->flags, fbuffer->pc, fbuffer->regs);
2846 }
2847 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2848
2849 /*
2850  * Skip 3:
2851  *
2852  *   trace_buffer_unlock_commit_regs()
2853  *   trace_event_buffer_commit()
2854  *   trace_event_raw_event_xxx()
2855  */
2856 # define STACK_SKIP 3
2857
2858 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2859                                      struct trace_buffer *buffer,
2860                                      struct ring_buffer_event *event,
2861                                      unsigned long flags, int pc,
2862                                      struct pt_regs *regs)
2863 {
2864         __buffer_unlock_commit(buffer, event);
2865
2866         /*
2867          * If regs is not set, then skip the necessary functions.
2868          * Note, we can still get here via blktrace, wakeup tracer
2869          * and mmiotrace, but that's ok if they lose a function or
2870          * two. They are not that meaningful.
2871          */
2872         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2873         ftrace_trace_userstack(buffer, flags, pc);
2874 }
2875
2876 /*
2877  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2878  */
2879 void
2880 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2881                                    struct ring_buffer_event *event)
2882 {
2883         __buffer_unlock_commit(buffer, event);
2884 }
2885
2886 void
2887 trace_function(struct trace_array *tr,
2888                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2889                int pc)
2890 {
2891         struct trace_event_call *call = &event_function;
2892         struct trace_buffer *buffer = tr->array_buffer.buffer;
2893         struct ring_buffer_event *event;
2894         struct ftrace_entry *entry;
2895
2896         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2897                                             flags, pc);
2898         if (!event)
2899                 return;
2900         entry   = ring_buffer_event_data(event);
2901         entry->ip                       = ip;
2902         entry->parent_ip                = parent_ip;
2903
2904         if (!call_filter_check_discard(call, entry, buffer, event)) {
2905                 if (static_branch_unlikely(&trace_function_exports_enabled))
2906                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2907                 __buffer_unlock_commit(buffer, event);
2908         }
2909 }
2910
2911 #ifdef CONFIG_STACKTRACE
2912
2913 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2914 #define FTRACE_KSTACK_NESTING   4
2915
2916 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2917
2918 struct ftrace_stack {
2919         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2920 };
2921
2922
2923 struct ftrace_stacks {
2924         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2925 };
2926
2927 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2928 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2929
2930 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2931                                  unsigned long flags,
2932                                  int skip, int pc, struct pt_regs *regs)
2933 {
2934         struct trace_event_call *call = &event_kernel_stack;
2935         struct ring_buffer_event *event;
2936         unsigned int size, nr_entries;
2937         struct ftrace_stack *fstack;
2938         struct stack_entry *entry;
2939         int stackidx;
2940
2941         /*
2942          * Add one, for this function and the call to save_stack_trace()
2943          * If regs is set, then these functions will not be in the way.
2944          */
2945 #ifndef CONFIG_UNWINDER_ORC
2946         if (!regs)
2947                 skip++;
2948 #endif
2949
2950         preempt_disable_notrace();
2951
2952         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2953
2954         /* This should never happen. If it does, yell once and skip */
2955         if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2956                 goto out;
2957
2958         /*
2959          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2960          * interrupt will either see the value pre increment or post
2961          * increment. If the interrupt happens pre increment it will have
2962          * restored the counter when it returns.  We just need a barrier to
2963          * keep gcc from moving things around.
2964          */
2965         barrier();
2966
2967         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2968         size = ARRAY_SIZE(fstack->calls);
2969
2970         if (regs) {
2971                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2972                                                    size, skip);
2973         } else {
2974                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2975         }
2976
2977         size = nr_entries * sizeof(unsigned long);
2978         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2979                                             sizeof(*entry) + size, flags, pc);
2980         if (!event)
2981                 goto out;
2982         entry = ring_buffer_event_data(event);
2983
2984         memcpy(&entry->caller, fstack->calls, size);
2985         entry->size = nr_entries;
2986
2987         if (!call_filter_check_discard(call, entry, buffer, event))
2988                 __buffer_unlock_commit(buffer, event);
2989
2990  out:
2991         /* Again, don't let gcc optimize things here */
2992         barrier();
2993         __this_cpu_dec(ftrace_stack_reserve);
2994         preempt_enable_notrace();
2995
2996 }
2997
2998 static inline void ftrace_trace_stack(struct trace_array *tr,
2999                                       struct trace_buffer *buffer,
3000                                       unsigned long flags,
3001                                       int skip, int pc, struct pt_regs *regs)
3002 {
3003         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3004                 return;
3005
3006         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
3007 }
3008
3009 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
3010                    int pc)
3011 {
3012         struct trace_buffer *buffer = tr->array_buffer.buffer;
3013
3014         if (rcu_is_watching()) {
3015                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3016                 return;
3017         }
3018
3019         /*
3020          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3021          * but if the above rcu_is_watching() failed, then the NMI
3022          * triggered someplace critical, and rcu_irq_enter() should
3023          * not be called from NMI.
3024          */
3025         if (unlikely(in_nmi()))
3026                 return;
3027
3028         rcu_irq_enter_irqson();
3029         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3030         rcu_irq_exit_irqson();
3031 }
3032
3033 /**
3034  * trace_dump_stack - record a stack back trace in the trace buffer
3035  * @skip: Number of functions to skip (helper handlers)
3036  */
3037 void trace_dump_stack(int skip)
3038 {
3039         unsigned long flags;
3040
3041         if (tracing_disabled || tracing_selftest_running)
3042                 return;
3043
3044         local_save_flags(flags);
3045
3046 #ifndef CONFIG_UNWINDER_ORC
3047         /* Skip 1 to skip this function. */
3048         skip++;
3049 #endif
3050         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3051                              flags, skip, preempt_count(), NULL);
3052 }
3053 EXPORT_SYMBOL_GPL(trace_dump_stack);
3054
3055 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3056 static DEFINE_PER_CPU(int, user_stack_count);
3057
3058 static void
3059 ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3060 {
3061         struct trace_event_call *call = &event_user_stack;
3062         struct ring_buffer_event *event;
3063         struct userstack_entry *entry;
3064
3065         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3066                 return;
3067
3068         /*
3069          * NMIs can not handle page faults, even with fix ups.
3070          * The save user stack can (and often does) fault.
3071          */
3072         if (unlikely(in_nmi()))
3073                 return;
3074
3075         /*
3076          * prevent recursion, since the user stack tracing may
3077          * trigger other kernel events.
3078          */
3079         preempt_disable();
3080         if (__this_cpu_read(user_stack_count))
3081                 goto out;
3082
3083         __this_cpu_inc(user_stack_count);
3084
3085         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3086                                             sizeof(*entry), flags, pc);
3087         if (!event)
3088                 goto out_drop_count;
3089         entry   = ring_buffer_event_data(event);
3090
3091         entry->tgid             = current->tgid;
3092         memset(&entry->caller, 0, sizeof(entry->caller));
3093
3094         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3095         if (!call_filter_check_discard(call, entry, buffer, event))
3096                 __buffer_unlock_commit(buffer, event);
3097
3098  out_drop_count:
3099         __this_cpu_dec(user_stack_count);
3100  out:
3101         preempt_enable();
3102 }
3103 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3104 static void ftrace_trace_userstack(struct trace_buffer *buffer,
3105                                    unsigned long flags, int pc)
3106 {
3107 }
3108 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3109
3110 #endif /* CONFIG_STACKTRACE */
3111
3112 /* created for use with alloc_percpu */
3113 struct trace_buffer_struct {
3114         int nesting;
3115         char buffer[4][TRACE_BUF_SIZE];
3116 };
3117
3118 static struct trace_buffer_struct *trace_percpu_buffer;
3119
3120 /*
3121  * Thise allows for lockless recording.  If we're nested too deeply, then
3122  * this returns NULL.
3123  */
3124 static char *get_trace_buf(void)
3125 {
3126         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3127
3128         if (!buffer || buffer->nesting >= 4)
3129                 return NULL;
3130
3131         buffer->nesting++;
3132
3133         /* Interrupts must see nesting incremented before we use the buffer */
3134         barrier();
3135         return &buffer->buffer[buffer->nesting][0];
3136 }
3137
3138 static void put_trace_buf(void)
3139 {
3140         /* Don't let the decrement of nesting leak before this */
3141         barrier();
3142         this_cpu_dec(trace_percpu_buffer->nesting);
3143 }
3144
3145 static int alloc_percpu_trace_buffer(void)
3146 {
3147         struct trace_buffer_struct *buffers;
3148
3149         if (trace_percpu_buffer)
3150                 return 0;
3151
3152         buffers = alloc_percpu(struct trace_buffer_struct);
3153         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3154                 return -ENOMEM;
3155
3156         trace_percpu_buffer = buffers;
3157         return 0;
3158 }
3159
3160 static int buffers_allocated;
3161
3162 void trace_printk_init_buffers(void)
3163 {
3164         if (buffers_allocated)
3165                 return;
3166
3167         if (alloc_percpu_trace_buffer())
3168                 return;
3169
3170         /* trace_printk() is for debug use only. Don't use it in production. */
3171
3172         pr_warn("\n");
3173         pr_warn("**********************************************************\n");
3174         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3175         pr_warn("**                                                      **\n");
3176         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3177         pr_warn("**                                                      **\n");
3178         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3179         pr_warn("** unsafe for production use.                           **\n");
3180         pr_warn("**                                                      **\n");
3181         pr_warn("** If you see this message and you are not debugging    **\n");
3182         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3183         pr_warn("**                                                      **\n");
3184         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3185         pr_warn("**********************************************************\n");
3186
3187         /* Expand the buffers to set size */
3188         tracing_update_buffers();
3189
3190         buffers_allocated = 1;
3191
3192         /*
3193          * trace_printk_init_buffers() can be called by modules.
3194          * If that happens, then we need to start cmdline recording
3195          * directly here. If the global_trace.buffer is already
3196          * allocated here, then this was called by module code.
3197          */
3198         if (global_trace.array_buffer.buffer)
3199                 tracing_start_cmdline_record();
3200 }
3201 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3202
3203 void trace_printk_start_comm(void)
3204 {
3205         /* Start tracing comms if trace printk is set */
3206         if (!buffers_allocated)
3207                 return;
3208         tracing_start_cmdline_record();
3209 }
3210
3211 static void trace_printk_start_stop_comm(int enabled)
3212 {
3213         if (!buffers_allocated)
3214                 return;
3215
3216         if (enabled)
3217                 tracing_start_cmdline_record();
3218         else
3219                 tracing_stop_cmdline_record();
3220 }
3221
3222 /**
3223  * trace_vbprintk - write binary msg to tracing buffer
3224  * @ip:    The address of the caller
3225  * @fmt:   The string format to write to the buffer
3226  * @args:  Arguments for @fmt
3227  */
3228 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3229 {
3230         struct trace_event_call *call = &event_bprint;
3231         struct ring_buffer_event *event;
3232         struct trace_buffer *buffer;
3233         struct trace_array *tr = &global_trace;
3234         struct bprint_entry *entry;
3235         unsigned long flags;
3236         char *tbuffer;
3237         int len = 0, size, pc;
3238
3239         if (unlikely(tracing_selftest_running || tracing_disabled))
3240                 return 0;
3241
3242         /* Don't pollute graph traces with trace_vprintk internals */
3243         pause_graph_tracing();
3244
3245         pc = preempt_count();
3246         preempt_disable_notrace();
3247
3248         tbuffer = get_trace_buf();
3249         if (!tbuffer) {
3250                 len = 0;
3251                 goto out_nobuffer;
3252         }
3253
3254         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3255
3256         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3257                 goto out_put;
3258
3259         local_save_flags(flags);
3260         size = sizeof(*entry) + sizeof(u32) * len;
3261         buffer = tr->array_buffer.buffer;
3262         ring_buffer_nest_start(buffer);
3263         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3264                                             flags, pc);
3265         if (!event)
3266                 goto out;
3267         entry = ring_buffer_event_data(event);
3268         entry->ip                       = ip;
3269         entry->fmt                      = fmt;
3270
3271         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3272         if (!call_filter_check_discard(call, entry, buffer, event)) {
3273                 __buffer_unlock_commit(buffer, event);
3274                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3275         }
3276
3277 out:
3278         ring_buffer_nest_end(buffer);
3279 out_put:
3280         put_trace_buf();
3281
3282 out_nobuffer:
3283         preempt_enable_notrace();
3284         unpause_graph_tracing();
3285
3286         return len;
3287 }
3288 EXPORT_SYMBOL_GPL(trace_vbprintk);
3289
3290 __printf(3, 0)
3291 static int
3292 __trace_array_vprintk(struct trace_buffer *buffer,
3293                       unsigned long ip, const char *fmt, va_list args)
3294 {
3295         struct trace_event_call *call = &event_print;
3296         struct ring_buffer_event *event;
3297         int len = 0, size, pc;
3298         struct print_entry *entry;
3299         unsigned long flags;
3300         char *tbuffer;
3301
3302         if (tracing_disabled || tracing_selftest_running)
3303                 return 0;
3304
3305         /* Don't pollute graph traces with trace_vprintk internals */
3306         pause_graph_tracing();
3307
3308         pc = preempt_count();
3309         preempt_disable_notrace();
3310
3311
3312         tbuffer = get_trace_buf();
3313         if (!tbuffer) {
3314                 len = 0;
3315                 goto out_nobuffer;
3316         }
3317
3318         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3319
3320         local_save_flags(flags);
3321         size = sizeof(*entry) + len + 1;
3322         ring_buffer_nest_start(buffer);
3323         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3324                                             flags, pc);
3325         if (!event)
3326                 goto out;
3327         entry = ring_buffer_event_data(event);
3328         entry->ip = ip;
3329
3330         memcpy(&entry->buf, tbuffer, len + 1);
3331         if (!call_filter_check_discard(call, entry, buffer, event)) {
3332                 __buffer_unlock_commit(buffer, event);
3333                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3334         }
3335
3336 out:
3337         ring_buffer_nest_end(buffer);
3338         put_trace_buf();
3339
3340 out_nobuffer:
3341         preempt_enable_notrace();
3342         unpause_graph_tracing();
3343
3344         return len;
3345 }
3346
3347 __printf(3, 0)
3348 int trace_array_vprintk(struct trace_array *tr,
3349                         unsigned long ip, const char *fmt, va_list args)
3350 {
3351         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3352 }
3353
3354 /**
3355  * trace_array_printk - Print a message to a specific instance
3356  * @tr: The instance trace_array descriptor
3357  * @ip: The instruction pointer that this is called from.
3358  * @fmt: The format to print (printf format)
3359  *
3360  * If a subsystem sets up its own instance, they have the right to
3361  * printk strings into their tracing instance buffer using this
3362  * function. Note, this function will not write into the top level
3363  * buffer (use trace_printk() for that), as writing into the top level
3364  * buffer should only have events that can be individually disabled.
3365  * trace_printk() is only used for debugging a kernel, and should not
3366  * be ever encorporated in normal use.
3367  *
3368  * trace_array_printk() can be used, as it will not add noise to the
3369  * top level tracing buffer.
3370  *
3371  * Note, trace_array_init_printk() must be called on @tr before this
3372  * can be used.
3373  */
3374 __printf(3, 0)
3375 int trace_array_printk(struct trace_array *tr,
3376                        unsigned long ip, const char *fmt, ...)
3377 {
3378         int ret;
3379         va_list ap;
3380
3381         if (!tr)
3382                 return -ENOENT;
3383
3384         /* This is only allowed for created instances */
3385         if (tr == &global_trace)
3386                 return 0;
3387
3388         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3389                 return 0;
3390
3391         va_start(ap, fmt);
3392         ret = trace_array_vprintk(tr, ip, fmt, ap);
3393         va_end(ap);
3394         return ret;
3395 }
3396 EXPORT_SYMBOL_GPL(trace_array_printk);
3397
3398 /**
3399  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3400  * @tr: The trace array to initialize the buffers for
3401  *
3402  * As trace_array_printk() only writes into instances, they are OK to
3403  * have in the kernel (unlike trace_printk()). This needs to be called
3404  * before trace_array_printk() can be used on a trace_array.
3405  */
3406 int trace_array_init_printk(struct trace_array *tr)
3407 {
3408         if (!tr)
3409                 return -ENOENT;
3410
3411         /* This is only allowed for created instances */
3412         if (tr == &global_trace)
3413                 return -EINVAL;
3414
3415         return alloc_percpu_trace_buffer();
3416 }
3417 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3418
3419 __printf(3, 4)
3420 int trace_array_printk_buf(struct trace_buffer *buffer,
3421                            unsigned long ip, const char *fmt, ...)
3422 {
3423         int ret;
3424         va_list ap;
3425
3426         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3427                 return 0;
3428
3429         va_start(ap, fmt);
3430         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3431         va_end(ap);
3432         return ret;
3433 }
3434
3435 __printf(2, 0)
3436 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3437 {
3438         return trace_array_vprintk(&global_trace, ip, fmt, args);
3439 }
3440 EXPORT_SYMBOL_GPL(trace_vprintk);
3441
3442 static void trace_iterator_increment(struct trace_iterator *iter)
3443 {
3444         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3445
3446         iter->idx++;
3447         if (buf_iter)
3448                 ring_buffer_iter_advance(buf_iter);
3449 }
3450
3451 static struct trace_entry *
3452 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3453                 unsigned long *lost_events)
3454 {
3455         struct ring_buffer_event *event;
3456         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3457
3458         if (buf_iter) {
3459                 event = ring_buffer_iter_peek(buf_iter, ts);
3460                 if (lost_events)
3461                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3462                                 (unsigned long)-1 : 0;
3463         } else {
3464                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3465                                          lost_events);
3466         }
3467
3468         if (event) {
3469                 iter->ent_size = ring_buffer_event_length(event);
3470                 return ring_buffer_event_data(event);
3471         }
3472         iter->ent_size = 0;
3473         return NULL;
3474 }
3475
3476 static struct trace_entry *
3477 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3478                   unsigned long *missing_events, u64 *ent_ts)
3479 {
3480         struct trace_buffer *buffer = iter->array_buffer->buffer;
3481         struct trace_entry *ent, *next = NULL;
3482         unsigned long lost_events = 0, next_lost = 0;
3483         int cpu_file = iter->cpu_file;
3484         u64 next_ts = 0, ts;
3485         int next_cpu = -1;
3486         int next_size = 0;
3487         int cpu;
3488
3489         /*
3490          * If we are in a per_cpu trace file, don't bother by iterating over
3491          * all cpu and peek directly.
3492          */
3493         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3494                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3495                         return NULL;
3496                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3497                 if (ent_cpu)
3498                         *ent_cpu = cpu_file;
3499
3500                 return ent;
3501         }
3502
3503         for_each_tracing_cpu(cpu) {
3504
3505                 if (ring_buffer_empty_cpu(buffer, cpu))
3506                         continue;
3507
3508                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3509
3510                 /*
3511                  * Pick the entry with the smallest timestamp:
3512                  */
3513                 if (ent && (!next || ts < next_ts)) {
3514                         next = ent;
3515                         next_cpu = cpu;
3516                         next_ts = ts;
3517                         next_lost = lost_events;
3518                         next_size = iter->ent_size;
3519                 }
3520         }
3521
3522         iter->ent_size = next_size;
3523
3524         if (ent_cpu)
3525                 *ent_cpu = next_cpu;
3526
3527         if (ent_ts)
3528                 *ent_ts = next_ts;
3529
3530         if (missing_events)
3531                 *missing_events = next_lost;
3532
3533         return next;
3534 }
3535
3536 #define STATIC_TEMP_BUF_SIZE    128
3537 static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
3538
3539 /* Find the next real entry, without updating the iterator itself */
3540 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3541                                           int *ent_cpu, u64 *ent_ts)
3542 {
3543         /* __find_next_entry will reset ent_size */
3544         int ent_size = iter->ent_size;
3545         struct trace_entry *entry;
3546
3547         /*
3548          * If called from ftrace_dump(), then the iter->temp buffer
3549          * will be the static_temp_buf and not created from kmalloc.
3550          * If the entry size is greater than the buffer, we can
3551          * not save it. Just return NULL in that case. This is only
3552          * used to add markers when two consecutive events' time
3553          * stamps have a large delta. See trace_print_lat_context()
3554          */
3555         if (iter->temp == static_temp_buf &&
3556             STATIC_TEMP_BUF_SIZE < ent_size)
3557                 return NULL;
3558
3559         /*
3560          * The __find_next_entry() may call peek_next_entry(), which may
3561          * call ring_buffer_peek() that may make the contents of iter->ent
3562          * undefined. Need to copy iter->ent now.
3563          */
3564         if (iter->ent && iter->ent != iter->temp) {
3565                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3566                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3567                         kfree(iter->temp);
3568                         iter->temp = kmalloc(iter->ent_size, GFP_KERNEL);
3569                         if (!iter->temp)
3570                                 return NULL;
3571                 }
3572                 memcpy(iter->temp, iter->ent, iter->ent_size);
3573                 iter->temp_size = iter->ent_size;
3574                 iter->ent = iter->temp;
3575         }
3576         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3577         /* Put back the original ent_size */
3578         iter->ent_size = ent_size;
3579
3580         return entry;
3581 }
3582
3583 /* Find the next real entry, and increment the iterator to the next entry */
3584 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3585 {
3586         iter->ent = __find_next_entry(iter, &iter->cpu,
3587                                       &iter->lost_events, &iter->ts);
3588
3589         if (iter->ent)
3590                 trace_iterator_increment(iter);
3591
3592         return iter->ent ? iter : NULL;
3593 }
3594
3595 static void trace_consume(struct trace_iterator *iter)
3596 {
3597         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3598                             &iter->lost_events);
3599 }
3600
3601 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3602 {
3603         struct trace_iterator *iter = m->private;
3604         int i = (int)*pos;
3605         void *ent;
3606
3607         WARN_ON_ONCE(iter->leftover);
3608
3609         (*pos)++;
3610
3611         /* can't go backwards */
3612         if (iter->idx > i)
3613                 return NULL;
3614
3615         if (iter->idx < 0)
3616                 ent = trace_find_next_entry_inc(iter);
3617         else
3618                 ent = iter;
3619
3620         while (ent && iter->idx < i)
3621                 ent = trace_find_next_entry_inc(iter);
3622
3623         iter->pos = *pos;
3624
3625         return ent;
3626 }
3627
3628 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3629 {
3630         struct ring_buffer_iter *buf_iter;
3631         unsigned long entries = 0;
3632         u64 ts;
3633
3634         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3635
3636         buf_iter = trace_buffer_iter(iter, cpu);
3637         if (!buf_iter)
3638                 return;
3639
3640         ring_buffer_iter_reset(buf_iter);
3641
3642         /*
3643          * We could have the case with the max latency tracers
3644          * that a reset never took place on a cpu. This is evident
3645          * by the timestamp being before the start of the buffer.
3646          */
3647         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3648                 if (ts >= iter->array_buffer->time_start)
3649                         break;
3650                 entries++;
3651                 ring_buffer_iter_advance(buf_iter);
3652         }
3653
3654         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3655 }
3656
3657 /*
3658  * The current tracer is copied to avoid a global locking
3659  * all around.
3660  */
3661 static void *s_start(struct seq_file *m, loff_t *pos)
3662 {
3663         struct trace_iterator *iter = m->private;
3664         struct trace_array *tr = iter->tr;
3665         int cpu_file = iter->cpu_file;
3666         void *p = NULL;
3667         loff_t l = 0;
3668         int cpu;
3669
3670         /*
3671          * copy the tracer to avoid using a global lock all around.
3672          * iter->trace is a copy of current_trace, the pointer to the
3673          * name may be used instead of a strcmp(), as iter->trace->name
3674          * will point to the same string as current_trace->name.
3675          */
3676         mutex_lock(&trace_types_lock);
3677         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3678                 *iter->trace = *tr->current_trace;
3679         mutex_unlock(&trace_types_lock);
3680
3681 #ifdef CONFIG_TRACER_MAX_TRACE
3682         if (iter->snapshot && iter->trace->use_max_tr)
3683                 return ERR_PTR(-EBUSY);
3684 #endif
3685
3686         if (!iter->snapshot)
3687                 atomic_inc(&trace_record_taskinfo_disabled);
3688
3689         if (*pos != iter->pos) {
3690                 iter->ent = NULL;
3691                 iter->cpu = 0;
3692                 iter->idx = -1;
3693
3694                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3695                         for_each_tracing_cpu(cpu)
3696                                 tracing_iter_reset(iter, cpu);
3697                 } else
3698                         tracing_iter_reset(iter, cpu_file);
3699
3700                 iter->leftover = 0;
3701                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3702                         ;
3703
3704         } else {
3705                 /*
3706                  * If we overflowed the seq_file before, then we want
3707                  * to just reuse the trace_seq buffer again.
3708                  */
3709                 if (iter->leftover)
3710                         p = iter;
3711                 else {
3712                         l = *pos - 1;
3713                         p = s_next(m, p, &l);
3714                 }
3715         }
3716
3717         trace_event_read_lock();
3718         trace_access_lock(cpu_file);
3719         return p;
3720 }
3721
3722 static void s_stop(struct seq_file *m, void *p)
3723 {
3724         struct trace_iterator *iter = m->private;
3725
3726 #ifdef CONFIG_TRACER_MAX_TRACE
3727         if (iter->snapshot && iter->trace->use_max_tr)
3728                 return;
3729 #endif
3730
3731         if (!iter->snapshot)
3732                 atomic_dec(&trace_record_taskinfo_disabled);
3733
3734         trace_access_unlock(iter->cpu_file);
3735         trace_event_read_unlock();
3736 }
3737
3738 static void
3739 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3740                       unsigned long *entries, int cpu)
3741 {
3742         unsigned long count;
3743
3744         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3745         /*
3746          * If this buffer has skipped entries, then we hold all
3747          * entries for the trace and we need to ignore the
3748          * ones before the time stamp.
3749          */
3750         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3751                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3752                 /* total is the same as the entries */
3753                 *total = count;
3754         } else
3755                 *total = count +
3756                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3757         *entries = count;
3758 }
3759
3760 static void
3761 get_total_entries(struct array_buffer *buf,
3762                   unsigned long *total, unsigned long *entries)
3763 {
3764         unsigned long t, e;
3765         int cpu;
3766
3767         *total = 0;
3768         *entries = 0;
3769
3770         for_each_tracing_cpu(cpu) {
3771                 get_total_entries_cpu(buf, &t, &e, cpu);
3772                 *total += t;
3773                 *entries += e;
3774         }
3775 }
3776
3777 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3778 {
3779         unsigned long total, entries;
3780
3781         if (!tr)
3782                 tr = &global_trace;
3783
3784         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3785
3786         return entries;
3787 }
3788
3789 unsigned long trace_total_entries(struct trace_array *tr)
3790 {
3791         unsigned long total, entries;
3792
3793         if (!tr)
3794                 tr = &global_trace;
3795
3796         get_total_entries(&tr->array_buffer, &total, &entries);
3797
3798         return entries;
3799 }
3800
3801 static void print_lat_help_header(struct seq_file *m)
3802 {
3803         seq_puts(m, "#                  _------=> CPU#            \n"
3804                     "#                 / _-----=> irqs-off        \n"
3805                     "#                | / _----=> need-resched    \n"
3806                     "#                || / _---=> hardirq/softirq \n"
3807                     "#                ||| / _--=> preempt-depth   \n"
3808                     "#                |||| /     delay            \n"
3809                     "#  cmd     pid   ||||| time  |   caller      \n"
3810                     "#     \\   /      |||||  \\    |   /         \n");
3811 }
3812
3813 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3814 {
3815         unsigned long total;
3816         unsigned long entries;
3817
3818         get_total_entries(buf, &total, &entries);
3819         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3820                    entries, total, num_online_cpus());
3821         seq_puts(m, "#\n");
3822 }
3823
3824 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3825                                    unsigned int flags)
3826 {
3827         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3828
3829         print_event_info(buf, m);
3830
3831         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3832         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3833 }
3834
3835 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3836                                        unsigned int flags)
3837 {
3838         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3839         const char *space = "          ";
3840         int prec = tgid ? 10 : 2;
3841
3842         print_event_info(buf, m);
3843
3844         seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3845         seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3846         seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3847         seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3848         seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3849         seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3850         seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3851 }
3852
3853 void
3854 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3855 {
3856         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3857         struct array_buffer *buf = iter->array_buffer;
3858         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3859         struct tracer *type = iter->trace;
3860         unsigned long entries;
3861         unsigned long total;
3862         const char *name = "preemption";
3863
3864         name = type->name;
3865
3866         get_total_entries(buf, &total, &entries);
3867
3868         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3869                    name, UTS_RELEASE);
3870         seq_puts(m, "# -----------------------------------"
3871                  "---------------------------------\n");
3872         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3873                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3874                    nsecs_to_usecs(data->saved_latency),
3875                    entries,
3876                    total,
3877                    buf->cpu,
3878 #if defined(CONFIG_PREEMPT_NONE)
3879                    "server",
3880 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3881                    "desktop",
3882 #elif defined(CONFIG_PREEMPT)
3883                    "preempt",
3884 #elif defined(CONFIG_PREEMPT_RT)
3885                    "preempt_rt",
3886 #else
3887                    "unknown",
3888 #endif
3889                    /* These are reserved for later use */
3890                    0, 0, 0, 0);
3891 #ifdef CONFIG_SMP
3892         seq_printf(m, " #P:%d)\n", num_online_cpus());
3893 #else
3894         seq_puts(m, ")\n");
3895 #endif
3896         seq_puts(m, "#    -----------------\n");
3897         seq_printf(m, "#    | task: %.16s-%d "
3898                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3899                    data->comm, data->pid,
3900                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3901                    data->policy, data->rt_priority);
3902         seq_puts(m, "#    -----------------\n");
3903
3904         if (data->critical_start) {
3905                 seq_puts(m, "#  => started at: ");
3906                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3907                 trace_print_seq(m, &iter->seq);
3908                 seq_puts(m, "\n#  => ended at:   ");
3909                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3910                 trace_print_seq(m, &iter->seq);
3911                 seq_puts(m, "\n#\n");
3912         }
3913
3914         seq_puts(m, "#\n");
3915 }
3916
3917 static void test_cpu_buff_start(struct trace_iterator *iter)
3918 {
3919         struct trace_seq *s = &iter->seq;
3920         struct trace_array *tr = iter->tr;
3921
3922         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3923                 return;
3924
3925         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3926                 return;
3927
3928         if (cpumask_available(iter->started) &&
3929             cpumask_test_cpu(iter->cpu, iter->started))
3930                 return;
3931
3932         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3933                 return;
3934
3935         if (cpumask_available(iter->started))
3936                 cpumask_set_cpu(iter->cpu, iter->started);
3937
3938         /* Don't print started cpu buffer for the first entry of the trace */
3939         if (iter->idx > 1)
3940                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3941                                 iter->cpu);
3942 }
3943
3944 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3945 {
3946         struct trace_array *tr = iter->tr;
3947         struct trace_seq *s = &iter->seq;
3948         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3949         struct trace_entry *entry;
3950         struct trace_event *event;
3951
3952         entry = iter->ent;
3953
3954         test_cpu_buff_start(iter);
3955
3956         event = ftrace_find_event(entry->type);
3957
3958         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3959                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3960                         trace_print_lat_context(iter);
3961                 else
3962                         trace_print_context(iter);
3963         }
3964
3965         if (trace_seq_has_overflowed(s))
3966                 return TRACE_TYPE_PARTIAL_LINE;
3967
3968         if (event)
3969                 return event->funcs->trace(iter, sym_flags, event);
3970
3971         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3972
3973         return trace_handle_return(s);
3974 }
3975
3976 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3977 {
3978         struct trace_array *tr = iter->tr;
3979         struct trace_seq *s = &iter->seq;
3980         struct trace_entry *entry;
3981         struct trace_event *event;
3982
3983         entry = iter->ent;
3984
3985         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3986                 trace_seq_printf(s, "%d %d %llu ",
3987                                  entry->pid, iter->cpu, iter->ts);
3988
3989         if (trace_seq_has_overflowed(s))
3990                 return TRACE_TYPE_PARTIAL_LINE;
3991
3992         event = ftrace_find_event(entry->type);
3993         if (event)
3994                 return event->funcs->raw(iter, 0, event);
3995
3996         trace_seq_printf(s, "%d ?\n", entry->type);
3997
3998         return trace_handle_return(s);
3999 }
4000
4001 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4002 {
4003         struct trace_array *tr = iter->tr;
4004         struct trace_seq *s = &iter->seq;
4005         unsigned char newline = '\n';
4006         struct trace_entry *entry;
4007         struct trace_event *event;
4008
4009         entry = iter->ent;
4010
4011         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4012                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4013                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4014                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4015                 if (trace_seq_has_overflowed(s))
4016                         return TRACE_TYPE_PARTIAL_LINE;
4017         }
4018
4019         event = ftrace_find_event(entry->type);
4020         if (event) {
4021                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4022                 if (ret != TRACE_TYPE_HANDLED)
4023                         return ret;
4024         }
4025
4026         SEQ_PUT_FIELD(s, newline);
4027
4028         return trace_handle_return(s);
4029 }
4030
4031 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4032 {
4033         struct trace_array *tr = iter->tr;
4034         struct trace_seq *s = &iter->seq;
4035         struct trace_entry *entry;
4036         struct trace_event *event;
4037
4038         entry = iter->ent;
4039
4040         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4041                 SEQ_PUT_FIELD(s, entry->pid);
4042                 SEQ_PUT_FIELD(s, iter->cpu);
4043                 SEQ_PUT_FIELD(s, iter->ts);
4044                 if (trace_seq_has_overflowed(s))
4045                         return TRACE_TYPE_PARTIAL_LINE;
4046         }
4047
4048         event = ftrace_find_event(entry->type);
4049         return event ? event->funcs->binary(iter, 0, event) :
4050                 TRACE_TYPE_HANDLED;
4051 }
4052
4053 int trace_empty(struct trace_iterator *iter)
4054 {
4055         struct ring_buffer_iter *buf_iter;
4056         int cpu;
4057
4058         /* If we are looking at one CPU buffer, only check that one */
4059         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4060                 cpu = iter->cpu_file;
4061                 buf_iter = trace_buffer_iter(iter, cpu);
4062                 if (buf_iter) {
4063                         if (!ring_buffer_iter_empty(buf_iter))
4064                                 return 0;
4065                 } else {
4066                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4067                                 return 0;
4068                 }
4069                 return 1;
4070         }
4071
4072         for_each_tracing_cpu(cpu) {
4073                 buf_iter = trace_buffer_iter(iter, cpu);
4074                 if (buf_iter) {
4075                         if (!ring_buffer_iter_empty(buf_iter))
4076                                 return 0;
4077                 } else {
4078                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4079                                 return 0;
4080                 }
4081         }
4082
4083         return 1;
4084 }
4085
4086 /*  Called with trace_event_read_lock() held. */
4087 enum print_line_t print_trace_line(struct trace_iterator *iter)
4088 {
4089         struct trace_array *tr = iter->tr;
4090         unsigned long trace_flags = tr->trace_flags;
4091         enum print_line_t ret;
4092
4093         if (iter->lost_events) {
4094                 if (iter->lost_events == (unsigned long)-1)
4095                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4096                                          iter->cpu);
4097                 else
4098                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4099                                          iter->cpu, iter->lost_events);
4100                 if (trace_seq_has_overflowed(&iter->seq))
4101                         return TRACE_TYPE_PARTIAL_LINE;
4102         }
4103
4104         if (iter->trace && iter->trace->print_line) {
4105                 ret = iter->trace->print_line(iter);
4106                 if (ret != TRACE_TYPE_UNHANDLED)
4107                         return ret;
4108         }
4109
4110         if (iter->ent->type == TRACE_BPUTS &&
4111                         trace_flags & TRACE_ITER_PRINTK &&
4112                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4113                 return trace_print_bputs_msg_only(iter);
4114
4115         if (iter->ent->type == TRACE_BPRINT &&
4116                         trace_flags & TRACE_ITER_PRINTK &&
4117                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4118                 return trace_print_bprintk_msg_only(iter);
4119
4120         if (iter->ent->type == TRACE_PRINT &&
4121                         trace_flags & TRACE_ITER_PRINTK &&
4122                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4123                 return trace_print_printk_msg_only(iter);
4124
4125         if (trace_flags & TRACE_ITER_BIN)
4126                 return print_bin_fmt(iter);
4127
4128         if (trace_flags & TRACE_ITER_HEX)
4129                 return print_hex_fmt(iter);
4130
4131         if (trace_flags & TRACE_ITER_RAW)
4132                 return print_raw_fmt(iter);
4133
4134         return print_trace_fmt(iter);
4135 }
4136
4137 void trace_latency_header(struct seq_file *m)
4138 {
4139         struct trace_iterator *iter = m->private;
4140         struct trace_array *tr = iter->tr;
4141
4142         /* print nothing if the buffers are empty */
4143         if (trace_empty(iter))
4144                 return;
4145
4146         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4147                 print_trace_header(m, iter);
4148
4149         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4150                 print_lat_help_header(m);
4151 }
4152
4153 void trace_default_header(struct seq_file *m)
4154 {
4155         struct trace_iterator *iter = m->private;
4156         struct trace_array *tr = iter->tr;
4157         unsigned long trace_flags = tr->trace_flags;
4158
4159         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4160                 return;
4161
4162         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4163                 /* print nothing if the buffers are empty */
4164                 if (trace_empty(iter))
4165                         return;
4166                 print_trace_header(m, iter);
4167                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4168                         print_lat_help_header(m);
4169         } else {
4170                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4171                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4172                                 print_func_help_header_irq(iter->array_buffer,
4173                                                            m, trace_flags);
4174                         else
4175                                 print_func_help_header(iter->array_buffer, m,
4176                                                        trace_flags);
4177                 }
4178         }
4179 }
4180
4181 static void test_ftrace_alive(struct seq_file *m)
4182 {
4183         if (!ftrace_is_dead())
4184                 return;
4185         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4186                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4187 }
4188
4189 #ifdef CONFIG_TRACER_MAX_TRACE
4190 static void show_snapshot_main_help(struct seq_file *m)
4191 {
4192         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4193                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4194                     "#                      Takes a snapshot of the main buffer.\n"
4195                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4196                     "#                      (Doesn't have to be '2' works with any number that\n"
4197                     "#                       is not a '0' or '1')\n");
4198 }
4199
4200 static void show_snapshot_percpu_help(struct seq_file *m)
4201 {
4202         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4203 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4204         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4205                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4206 #else
4207         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4208                     "#                     Must use main snapshot file to allocate.\n");
4209 #endif
4210         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4211                     "#                      (Doesn't have to be '2' works with any number that\n"
4212                     "#                       is not a '0' or '1')\n");
4213 }
4214
4215 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4216 {
4217         if (iter->tr->allocated_snapshot)
4218                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4219         else
4220                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4221
4222         seq_puts(m, "# Snapshot commands:\n");
4223         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4224                 show_snapshot_main_help(m);
4225         else
4226                 show_snapshot_percpu_help(m);
4227 }
4228 #else
4229 /* Should never be called */
4230 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4231 #endif
4232
4233 static int s_show(struct seq_file *m, void *v)
4234 {
4235         struct trace_iterator *iter = v;
4236         int ret;
4237
4238         if (iter->ent == NULL) {
4239                 if (iter->tr) {
4240                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4241                         seq_puts(m, "#\n");
4242                         test_ftrace_alive(m);
4243                 }
4244                 if (iter->snapshot && trace_empty(iter))
4245                         print_snapshot_help(m, iter);
4246                 else if (iter->trace && iter->trace->print_header)
4247                         iter->trace->print_header(m);
4248                 else
4249                         trace_default_header(m);
4250
4251         } else if (iter->leftover) {
4252                 /*
4253                  * If we filled the seq_file buffer earlier, we
4254                  * want to just show it now.
4255                  */
4256                 ret = trace_print_seq(m, &iter->seq);
4257
4258                 /* ret should this time be zero, but you never know */
4259                 iter->leftover = ret;
4260
4261         } else {
4262                 print_trace_line(iter);
4263                 ret = trace_print_seq(m, &iter->seq);
4264                 /*
4265                  * If we overflow the seq_file buffer, then it will
4266                  * ask us for this data again at start up.
4267                  * Use that instead.
4268                  *  ret is 0 if seq_file write succeeded.
4269                  *        -1 otherwise.
4270                  */
4271                 iter->leftover = ret;
4272         }
4273
4274         return 0;
4275 }
4276
4277 /*
4278  * Should be used after trace_array_get(), trace_types_lock
4279  * ensures that i_cdev was already initialized.
4280  */
4281 static inline int tracing_get_cpu(struct inode *inode)
4282 {
4283         if (inode->i_cdev) /* See trace_create_cpu_file() */
4284                 return (long)inode->i_cdev - 1;
4285         return RING_BUFFER_ALL_CPUS;
4286 }
4287
4288 static const struct seq_operations tracer_seq_ops = {
4289         .start          = s_start,
4290         .next           = s_next,
4291         .stop           = s_stop,
4292         .show           = s_show,
4293 };
4294
4295 static struct trace_iterator *
4296 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4297 {
4298         struct trace_array *tr = inode->i_private;
4299         struct trace_iterator *iter;
4300         int cpu;
4301
4302         if (tracing_disabled)
4303                 return ERR_PTR(-ENODEV);
4304
4305         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4306         if (!iter)
4307                 return ERR_PTR(-ENOMEM);
4308
4309         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4310                                     GFP_KERNEL);
4311         if (!iter->buffer_iter)
4312                 goto release;
4313
4314         /*
4315          * trace_find_next_entry() may need to save off iter->ent.
4316          * It will place it into the iter->temp buffer. As most
4317          * events are less than 128, allocate a buffer of that size.
4318          * If one is greater, then trace_find_next_entry() will
4319          * allocate a new buffer to adjust for the bigger iter->ent.
4320          * It's not critical if it fails to get allocated here.
4321          */
4322         iter->temp = kmalloc(128, GFP_KERNEL);
4323         if (iter->temp)
4324                 iter->temp_size = 128;
4325
4326         /*
4327          * We make a copy of the current tracer to avoid concurrent
4328          * changes on it while we are reading.
4329          */
4330         mutex_lock(&trace_types_lock);
4331         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4332         if (!iter->trace)
4333                 goto fail;
4334
4335         *iter->trace = *tr->current_trace;
4336
4337         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4338                 goto fail;
4339
4340         iter->tr = tr;
4341
4342 #ifdef CONFIG_TRACER_MAX_TRACE
4343         /* Currently only the top directory has a snapshot */
4344         if (tr->current_trace->print_max || snapshot)
4345                 iter->array_buffer = &tr->max_buffer;
4346         else
4347 #endif
4348                 iter->array_buffer = &tr->array_buffer;
4349         iter->snapshot = snapshot;
4350         iter->pos = -1;
4351         iter->cpu_file = tracing_get_cpu(inode);
4352         mutex_init(&iter->mutex);
4353
4354         /* Notify the tracer early; before we stop tracing. */
4355         if (iter->trace->open)
4356                 iter->trace->open(iter);
4357
4358         /* Annotate start of buffers if we had overruns */
4359         if (ring_buffer_overruns(iter->array_buffer->buffer))
4360                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4361
4362         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4363         if (trace_clocks[tr->clock_id].in_ns)
4364                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4365
4366         /*
4367          * If pause-on-trace is enabled, then stop the trace while
4368          * dumping, unless this is the "snapshot" file
4369          */
4370         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4371                 tracing_stop_tr(tr);
4372
4373         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4374                 for_each_tracing_cpu(cpu) {
4375                         iter->buffer_iter[cpu] =
4376                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4377                                                          cpu, GFP_KERNEL);
4378                 }
4379                 ring_buffer_read_prepare_sync();
4380                 for_each_tracing_cpu(cpu) {
4381                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4382                         tracing_iter_reset(iter, cpu);
4383                 }
4384         } else {
4385                 cpu = iter->cpu_file;
4386                 iter->buffer_iter[cpu] =
4387                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4388                                                  cpu, GFP_KERNEL);
4389                 ring_buffer_read_prepare_sync();
4390                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4391                 tracing_iter_reset(iter, cpu);
4392         }
4393
4394         mutex_unlock(&trace_types_lock);
4395
4396         return iter;
4397
4398  fail:
4399         mutex_unlock(&trace_types_lock);
4400         kfree(iter->trace);
4401         kfree(iter->temp);
4402         kfree(iter->buffer_iter);
4403 release:
4404         seq_release_private(inode, file);
4405         return ERR_PTR(-ENOMEM);
4406 }
4407
4408 int tracing_open_generic(struct inode *inode, struct file *filp)
4409 {
4410         int ret;
4411
4412         ret = tracing_check_open_get_tr(NULL);
4413         if (ret)
4414                 return ret;
4415
4416         filp->private_data = inode->i_private;
4417         return 0;
4418 }
4419
4420 bool tracing_is_disabled(void)
4421 {
4422         return (tracing_disabled) ? true: false;
4423 }
4424
4425 /*
4426  * Open and update trace_array ref count.
4427  * Must have the current trace_array passed to it.
4428  */
4429 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4430 {
4431         struct trace_array *tr = inode->i_private;
4432         int ret;
4433
4434         ret = tracing_check_open_get_tr(tr);
4435         if (ret)
4436                 return ret;
4437
4438         filp->private_data = inode->i_private;
4439
4440         return 0;
4441 }
4442
4443 static int tracing_release(struct inode *inode, struct file *file)
4444 {
4445         struct trace_array *tr = inode->i_private;
4446         struct seq_file *m = file->private_data;
4447         struct trace_iterator *iter;
4448         int cpu;
4449
4450         if (!(file->f_mode & FMODE_READ)) {
4451                 trace_array_put(tr);
4452                 return 0;
4453         }
4454
4455         /* Writes do not use seq_file */
4456         iter = m->private;
4457         mutex_lock(&trace_types_lock);
4458
4459         for_each_tracing_cpu(cpu) {
4460                 if (iter->buffer_iter[cpu])
4461                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4462         }
4463
4464         if (iter->trace && iter->trace->close)
4465                 iter->trace->close(iter);
4466
4467         if (!iter->snapshot && tr->stop_count)
4468                 /* reenable tracing if it was previously enabled */
4469                 tracing_start_tr(tr);
4470
4471         __trace_array_put(tr);
4472
4473         mutex_unlock(&trace_types_lock);
4474
4475         mutex_destroy(&iter->mutex);
4476         free_cpumask_var(iter->started);
4477         kfree(iter->temp);
4478         kfree(iter->trace);
4479         kfree(iter->buffer_iter);
4480         seq_release_private(inode, file);
4481
4482         return 0;
4483 }
4484
4485 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4486 {
4487         struct trace_array *tr = inode->i_private;
4488
4489         trace_array_put(tr);
4490         return 0;
4491 }
4492
4493 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4494 {
4495         struct trace_array *tr = inode->i_private;
4496
4497         trace_array_put(tr);
4498
4499         return single_release(inode, file);
4500 }
4501
4502 static int tracing_open(struct inode *inode, struct file *file)
4503 {
4504         struct trace_array *tr = inode->i_private;
4505         struct trace_iterator *iter;
4506         int ret;
4507
4508         ret = tracing_check_open_get_tr(tr);
4509         if (ret)
4510                 return ret;
4511
4512         /* If this file was open for write, then erase contents */
4513         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4514                 int cpu = tracing_get_cpu(inode);
4515                 struct array_buffer *trace_buf = &tr->array_buffer;
4516
4517 #ifdef CONFIG_TRACER_MAX_TRACE
4518                 if (tr->current_trace->print_max)
4519                         trace_buf = &tr->max_buffer;
4520 #endif
4521
4522                 if (cpu == RING_BUFFER_ALL_CPUS)
4523                         tracing_reset_online_cpus(trace_buf);
4524                 else
4525                         tracing_reset_cpu(trace_buf, cpu);
4526         }
4527
4528         if (file->f_mode & FMODE_READ) {
4529                 iter = __tracing_open(inode, file, false);
4530                 if (IS_ERR(iter))
4531                         ret = PTR_ERR(iter);
4532                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4533                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4534         }
4535
4536         if (ret < 0)
4537                 trace_array_put(tr);
4538
4539         return ret;
4540 }
4541
4542 /*
4543  * Some tracers are not suitable for instance buffers.
4544  * A tracer is always available for the global array (toplevel)
4545  * or if it explicitly states that it is.
4546  */
4547 static bool
4548 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4549 {
4550         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4551 }
4552
4553 /* Find the next tracer that this trace array may use */
4554 static struct tracer *
4555 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4556 {
4557         while (t && !trace_ok_for_array(t, tr))
4558                 t = t->next;
4559
4560         return t;
4561 }
4562
4563 static void *
4564 t_next(struct seq_file *m, void *v, loff_t *pos)
4565 {
4566         struct trace_array *tr = m->private;
4567         struct tracer *t = v;
4568
4569         (*pos)++;
4570
4571         if (t)
4572                 t = get_tracer_for_array(tr, t->next);
4573
4574         return t;
4575 }
4576
4577 static void *t_start(struct seq_file *m, loff_t *pos)
4578 {
4579         struct trace_array *tr = m->private;
4580         struct tracer *t;
4581         loff_t l = 0;
4582
4583         mutex_lock(&trace_types_lock);
4584
4585         t = get_tracer_for_array(tr, trace_types);
4586         for (; t && l < *pos; t = t_next(m, t, &l))
4587                         ;
4588
4589         return t;
4590 }
4591
4592 static void t_stop(struct seq_file *m, void *p)
4593 {
4594         mutex_unlock(&trace_types_lock);
4595 }
4596
4597 static int t_show(struct seq_file *m, void *v)
4598 {
4599         struct tracer *t = v;
4600
4601         if (!t)
4602                 return 0;
4603
4604         seq_puts(m, t->name);
4605         if (t->next)
4606                 seq_putc(m, ' ');
4607         else
4608                 seq_putc(m, '\n');
4609
4610         return 0;
4611 }
4612
4613 static const struct seq_operations show_traces_seq_ops = {
4614         .start          = t_start,
4615         .next           = t_next,
4616         .stop           = t_stop,
4617         .show           = t_show,
4618 };
4619
4620 static int show_traces_open(struct inode *inode, struct file *file)
4621 {
4622         struct trace_array *tr = inode->i_private;
4623         struct seq_file *m;
4624         int ret;
4625
4626         ret = tracing_check_open_get_tr(tr);
4627         if (ret)
4628                 return ret;
4629
4630         ret = seq_open(file, &show_traces_seq_ops);
4631         if (ret) {
4632                 trace_array_put(tr);
4633                 return ret;
4634         }
4635
4636         m = file->private_data;
4637         m->private = tr;
4638
4639         return 0;
4640 }
4641
4642 static int show_traces_release(struct inode *inode, struct file *file)
4643 {
4644         struct trace_array *tr = inode->i_private;
4645
4646         trace_array_put(tr);
4647         return seq_release(inode, file);
4648 }
4649
4650 static ssize_t
4651 tracing_write_stub(struct file *filp, const char __user *ubuf,
4652                    size_t count, loff_t *ppos)
4653 {
4654         return count;
4655 }
4656
4657 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4658 {
4659         int ret;
4660
4661         if (file->f_mode & FMODE_READ)
4662                 ret = seq_lseek(file, offset, whence);
4663         else
4664                 file->f_pos = ret = 0;
4665
4666         return ret;
4667 }
4668
4669 static const struct file_operations tracing_fops = {
4670         .open           = tracing_open,
4671         .read           = seq_read,
4672         .write          = tracing_write_stub,
4673         .llseek         = tracing_lseek,
4674         .release        = tracing_release,
4675 };
4676
4677 static const struct file_operations show_traces_fops = {
4678         .open           = show_traces_open,
4679         .read           = seq_read,
4680         .llseek         = seq_lseek,
4681         .release        = show_traces_release,
4682 };
4683
4684 static ssize_t
4685 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4686                      size_t count, loff_t *ppos)
4687 {
4688         struct trace_array *tr = file_inode(filp)->i_private;
4689         char *mask_str;
4690         int len;
4691
4692         len = snprintf(NULL, 0, "%*pb\n",
4693                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4694         mask_str = kmalloc(len, GFP_KERNEL);
4695         if (!mask_str)
4696                 return -ENOMEM;
4697
4698         len = snprintf(mask_str, len, "%*pb\n",
4699                        cpumask_pr_args(tr->tracing_cpumask));
4700         if (len >= count) {
4701                 count = -EINVAL;
4702                 goto out_err;
4703         }
4704         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4705
4706 out_err:
4707         kfree(mask_str);
4708
4709         return count;
4710 }
4711
4712 int tracing_set_cpumask(struct trace_array *tr,
4713                         cpumask_var_t tracing_cpumask_new)
4714 {
4715         int cpu;
4716
4717         if (!tr)
4718                 return -EINVAL;
4719
4720         local_irq_disable();
4721         arch_spin_lock(&tr->max_lock);
4722         for_each_tracing_cpu(cpu) {
4723                 /*
4724                  * Increase/decrease the disabled counter if we are
4725                  * about to flip a bit in the cpumask:
4726                  */
4727                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4728                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4729                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4730                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4731                 }
4732                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4733                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4734                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4735                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4736                 }
4737         }
4738         arch_spin_unlock(&tr->max_lock);
4739         local_irq_enable();
4740
4741         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4742
4743         return 0;
4744 }
4745
4746 static ssize_t
4747 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4748                       size_t count, loff_t *ppos)
4749 {
4750         struct trace_array *tr = file_inode(filp)->i_private;
4751         cpumask_var_t tracing_cpumask_new;
4752         int err;
4753
4754         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4755                 return -ENOMEM;
4756
4757         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4758         if (err)
4759                 goto err_free;
4760
4761         err = tracing_set_cpumask(tr, tracing_cpumask_new);
4762         if (err)
4763                 goto err_free;
4764
4765         free_cpumask_var(tracing_cpumask_new);
4766
4767         return count;
4768
4769 err_free:
4770         free_cpumask_var(tracing_cpumask_new);
4771
4772         return err;
4773 }
4774
4775 static const struct file_operations tracing_cpumask_fops = {
4776         .open           = tracing_open_generic_tr,
4777         .read           = tracing_cpumask_read,
4778         .write          = tracing_cpumask_write,
4779         .release        = tracing_release_generic_tr,
4780         .llseek         = generic_file_llseek,
4781 };
4782
4783 static int tracing_trace_options_show(struct seq_file *m, void *v)
4784 {
4785         struct tracer_opt *trace_opts;
4786         struct trace_array *tr = m->private;
4787         u32 tracer_flags;
4788         int i;
4789
4790         mutex_lock(&trace_types_lock);
4791         tracer_flags = tr->current_trace->flags->val;
4792         trace_opts = tr->current_trace->flags->opts;
4793
4794         for (i = 0; trace_options[i]; i++) {
4795                 if (tr->trace_flags & (1 << i))
4796                         seq_printf(m, "%s\n", trace_options[i]);
4797                 else
4798                         seq_printf(m, "no%s\n", trace_options[i]);
4799         }
4800
4801         for (i = 0; trace_opts[i].name; i++) {
4802                 if (tracer_flags & trace_opts[i].bit)
4803                         seq_printf(m, "%s\n", trace_opts[i].name);
4804                 else
4805                         seq_printf(m, "no%s\n", trace_opts[i].name);
4806         }
4807         mutex_unlock(&trace_types_lock);
4808
4809         return 0;
4810 }
4811
4812 static int __set_tracer_option(struct trace_array *tr,
4813                                struct tracer_flags *tracer_flags,
4814                                struct tracer_opt *opts, int neg)
4815 {
4816         struct tracer *trace = tracer_flags->trace;
4817         int ret;
4818
4819         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4820         if (ret)
4821                 return ret;
4822
4823         if (neg)
4824                 tracer_flags->val &= ~opts->bit;
4825         else
4826                 tracer_flags->val |= opts->bit;
4827         return 0;
4828 }
4829
4830 /* Try to assign a tracer specific option */
4831 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4832 {
4833         struct tracer *trace = tr->current_trace;
4834         struct tracer_flags *tracer_flags = trace->flags;
4835         struct tracer_opt *opts = NULL;
4836         int i;
4837
4838         for (i = 0; tracer_flags->opts[i].name; i++) {
4839                 opts = &tracer_flags->opts[i];
4840
4841                 if (strcmp(cmp, opts->name) == 0)
4842                         return __set_tracer_option(tr, trace->flags, opts, neg);
4843         }
4844
4845         return -EINVAL;
4846 }
4847
4848 /* Some tracers require overwrite to stay enabled */
4849 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4850 {
4851         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4852                 return -1;
4853
4854         return 0;
4855 }
4856
4857 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4858 {
4859         if ((mask == TRACE_ITER_RECORD_TGID) ||
4860             (mask == TRACE_ITER_RECORD_CMD))
4861                 lockdep_assert_held(&event_mutex);
4862
4863         /* do nothing if flag is already set */
4864         if (!!(tr->trace_flags & mask) == !!enabled)
4865                 return 0;
4866
4867         /* Give the tracer a chance to approve the change */
4868         if (tr->current_trace->flag_changed)
4869                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4870                         return -EINVAL;
4871
4872         if (enabled)
4873                 tr->trace_flags |= mask;
4874         else
4875                 tr->trace_flags &= ~mask;
4876
4877         if (mask == TRACE_ITER_RECORD_CMD)
4878                 trace_event_enable_cmd_record(enabled);
4879
4880         if (mask == TRACE_ITER_RECORD_TGID) {
4881                 if (!tgid_map)
4882                         tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4883                                            sizeof(*tgid_map),
4884                                            GFP_KERNEL);
4885                 if (!tgid_map) {
4886                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4887                         return -ENOMEM;
4888                 }
4889
4890                 trace_event_enable_tgid_record(enabled);
4891         }
4892
4893         if (mask == TRACE_ITER_EVENT_FORK)
4894                 trace_event_follow_fork(tr, enabled);
4895
4896         if (mask == TRACE_ITER_FUNC_FORK)
4897                 ftrace_pid_follow_fork(tr, enabled);
4898
4899         if (mask == TRACE_ITER_OVERWRITE) {
4900                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4901 #ifdef CONFIG_TRACER_MAX_TRACE
4902                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4903 #endif
4904         }
4905
4906         if (mask == TRACE_ITER_PRINTK) {
4907                 trace_printk_start_stop_comm(enabled);
4908                 trace_printk_control(enabled);
4909         }
4910
4911         return 0;
4912 }
4913
4914 int trace_set_options(struct trace_array *tr, char *option)
4915 {
4916         char *cmp;
4917         int neg = 0;
4918         int ret;
4919         size_t orig_len = strlen(option);
4920         int len;
4921
4922         cmp = strstrip(option);
4923
4924         len = str_has_prefix(cmp, "no");
4925         if (len)
4926                 neg = 1;
4927
4928         cmp += len;
4929
4930         mutex_lock(&event_mutex);
4931         mutex_lock(&trace_types_lock);
4932
4933         ret = match_string(trace_options, -1, cmp);
4934         /* If no option could be set, test the specific tracer options */
4935         if (ret < 0)
4936                 ret = set_tracer_option(tr, cmp, neg);
4937         else
4938                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4939
4940         mutex_unlock(&trace_types_lock);
4941         mutex_unlock(&event_mutex);
4942
4943         /*
4944          * If the first trailing whitespace is replaced with '\0' by strstrip,
4945          * turn it back into a space.
4946          */
4947         if (orig_len > strlen(option))
4948                 option[strlen(option)] = ' ';
4949
4950         return ret;
4951 }
4952
4953 static void __init apply_trace_boot_options(void)
4954 {
4955         char *buf = trace_boot_options_buf;
4956         char *option;
4957
4958         while (true) {
4959                 option = strsep(&buf, ",");
4960
4961                 if (!option)
4962                         break;
4963
4964                 if (*option)
4965                         trace_set_options(&global_trace, option);
4966
4967                 /* Put back the comma to allow this to be called again */
4968                 if (buf)
4969                         *(buf - 1) = ',';
4970         }
4971 }
4972
4973 static ssize_t
4974 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4975                         size_t cnt, loff_t *ppos)
4976 {
4977         struct seq_file *m = filp->private_data;
4978         struct trace_array *tr = m->private;
4979         char buf[64];
4980         int ret;
4981
4982         if (cnt >= sizeof(buf))
4983                 return -EINVAL;
4984
4985         if (copy_from_user(buf, ubuf, cnt))
4986                 return -EFAULT;
4987
4988         buf[cnt] = 0;
4989
4990         ret = trace_set_options(tr, buf);
4991         if (ret < 0)
4992                 return ret;
4993
4994         *ppos += cnt;
4995
4996         return cnt;
4997 }
4998
4999 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5000 {
5001         struct trace_array *tr = inode->i_private;
5002         int ret;
5003
5004         ret = tracing_check_open_get_tr(tr);
5005         if (ret)
5006                 return ret;
5007
5008         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5009         if (ret < 0)
5010                 trace_array_put(tr);
5011
5012         return ret;
5013 }
5014
5015 static const struct file_operations tracing_iter_fops = {
5016         .open           = tracing_trace_options_open,
5017         .read           = seq_read,
5018         .llseek         = seq_lseek,
5019         .release        = tracing_single_release_tr,
5020         .write          = tracing_trace_options_write,
5021 };
5022
5023 static const char readme_msg[] =
5024         "tracing mini-HOWTO:\n\n"
5025         "# echo 0 > tracing_on : quick way to disable tracing\n"
5026         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5027         " Important files:\n"
5028         "  trace\t\t\t- The static contents of the buffer\n"
5029         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5030         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5031         "  current_tracer\t- function and latency tracers\n"
5032         "  available_tracers\t- list of configured tracers for current_tracer\n"
5033         "  error_log\t- error log for failed commands (that support it)\n"
5034         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5035         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5036         "  trace_clock\t\t-change the clock used to order events\n"
5037         "       local:   Per cpu clock but may not be synced across CPUs\n"
5038         "      global:   Synced across CPUs but slows tracing down.\n"
5039         "     counter:   Not a clock, but just an increment\n"
5040         "      uptime:   Jiffy counter from time of boot\n"
5041         "        perf:   Same clock that perf events use\n"
5042 #ifdef CONFIG_X86_64
5043         "     x86-tsc:   TSC cycle counter\n"
5044 #endif
5045         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
5046         "       delta:   Delta difference against a buffer-wide timestamp\n"
5047         "    absolute:   Absolute (standalone) timestamp\n"
5048         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5049         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5050         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5051         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5052         "\t\t\t  Remove sub-buffer with rmdir\n"
5053         "  trace_options\t\t- Set format or modify how tracing happens\n"
5054         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5055         "\t\t\t  option name\n"
5056         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5057 #ifdef CONFIG_DYNAMIC_FTRACE
5058         "\n  available_filter_functions - list of functions that can be filtered on\n"
5059         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5060         "\t\t\t  functions\n"
5061         "\t     accepts: func_full_name or glob-matching-pattern\n"
5062         "\t     modules: Can select a group via module\n"
5063         "\t      Format: :mod:<module-name>\n"
5064         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5065         "\t    triggers: a command to perform when function is hit\n"
5066         "\t      Format: <function>:<trigger>[:count]\n"
5067         "\t     trigger: traceon, traceoff\n"
5068         "\t\t      enable_event:<system>:<event>\n"
5069         "\t\t      disable_event:<system>:<event>\n"
5070 #ifdef CONFIG_STACKTRACE
5071         "\t\t      stacktrace\n"
5072 #endif
5073 #ifdef CONFIG_TRACER_SNAPSHOT
5074         "\t\t      snapshot\n"
5075 #endif
5076         "\t\t      dump\n"
5077         "\t\t      cpudump\n"
5078         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5079         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5080         "\t     The first one will disable tracing every time do_fault is hit\n"
5081         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5082         "\t       The first time do trap is hit and it disables tracing, the\n"
5083         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5084         "\t       the counter will not decrement. It only decrements when the\n"
5085         "\t       trigger did work\n"
5086         "\t     To remove trigger without count:\n"
5087         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5088         "\t     To remove trigger with a count:\n"
5089         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5090         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5091         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5092         "\t    modules: Can select a group via module command :mod:\n"
5093         "\t    Does not accept triggers\n"
5094 #endif /* CONFIG_DYNAMIC_FTRACE */
5095 #ifdef CONFIG_FUNCTION_TRACER
5096         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5097         "\t\t    (function)\n"
5098         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5099         "\t\t    (function)\n"
5100 #endif
5101 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5102         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5103         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5104         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5105 #endif
5106 #ifdef CONFIG_TRACER_SNAPSHOT
5107         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5108         "\t\t\t  snapshot buffer. Read the contents for more\n"
5109         "\t\t\t  information\n"
5110 #endif
5111 #ifdef CONFIG_STACK_TRACER
5112         "  stack_trace\t\t- Shows the max stack trace when active\n"
5113         "  stack_max_size\t- Shows current max stack size that was traced\n"
5114         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5115         "\t\t\t  new trace)\n"
5116 #ifdef CONFIG_DYNAMIC_FTRACE
5117         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5118         "\t\t\t  traces\n"
5119 #endif
5120 #endif /* CONFIG_STACK_TRACER */
5121 #ifdef CONFIG_DYNAMIC_EVENTS
5122         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5123         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5124 #endif
5125 #ifdef CONFIG_KPROBE_EVENTS
5126         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5127         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5128 #endif
5129 #ifdef CONFIG_UPROBE_EVENTS
5130         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5131         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5132 #endif
5133 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5134         "\t  accepts: event-definitions (one definition per line)\n"
5135         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5136         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5137 #ifdef CONFIG_HIST_TRIGGERS
5138         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5139 #endif
5140         "\t           -:[<group>/]<event>\n"
5141 #ifdef CONFIG_KPROBE_EVENTS
5142         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5143   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5144 #endif
5145 #ifdef CONFIG_UPROBE_EVENTS
5146   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
5147 #endif
5148         "\t     args: <name>=fetcharg[:type]\n"
5149         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5150 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5151         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5152 #else
5153         "\t           $stack<index>, $stack, $retval, $comm,\n"
5154 #endif
5155         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5156         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5157         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5158         "\t           <type>\\[<array-size>\\]\n"
5159 #ifdef CONFIG_HIST_TRIGGERS
5160         "\t    field: <stype> <name>;\n"
5161         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5162         "\t           [unsigned] char/int/long\n"
5163 #endif
5164 #endif
5165         "  events/\t\t- Directory containing all trace event subsystems:\n"
5166         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5167         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5168         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5169         "\t\t\t  events\n"
5170         "      filter\t\t- If set, only events passing filter are traced\n"
5171         "  events/<system>/<event>/\t- Directory containing control files for\n"
5172         "\t\t\t  <event>:\n"
5173         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5174         "      filter\t\t- If set, only events passing filter are traced\n"
5175         "      trigger\t\t- If set, a command to perform when event is hit\n"
5176         "\t    Format: <trigger>[:count][if <filter>]\n"
5177         "\t   trigger: traceon, traceoff\n"
5178         "\t            enable_event:<system>:<event>\n"
5179         "\t            disable_event:<system>:<event>\n"
5180 #ifdef CONFIG_HIST_TRIGGERS
5181         "\t            enable_hist:<system>:<event>\n"
5182         "\t            disable_hist:<system>:<event>\n"
5183 #endif
5184 #ifdef CONFIG_STACKTRACE
5185         "\t\t    stacktrace\n"
5186 #endif
5187 #ifdef CONFIG_TRACER_SNAPSHOT
5188         "\t\t    snapshot\n"
5189 #endif
5190 #ifdef CONFIG_HIST_TRIGGERS
5191         "\t\t    hist (see below)\n"
5192 #endif
5193         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5194         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5195         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5196         "\t                  events/block/block_unplug/trigger\n"
5197         "\t   The first disables tracing every time block_unplug is hit.\n"
5198         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5199         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5200         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5201         "\t   Like function triggers, the counter is only decremented if it\n"
5202         "\t    enabled or disabled tracing.\n"
5203         "\t   To remove a trigger without a count:\n"
5204         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5205         "\t   To remove a trigger with a count:\n"
5206         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5207         "\t   Filters can be ignored when removing a trigger.\n"
5208 #ifdef CONFIG_HIST_TRIGGERS
5209         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5210         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5211         "\t            [:values=<field1[,field2,...]>]\n"
5212         "\t            [:sort=<field1[,field2,...]>]\n"
5213         "\t            [:size=#entries]\n"
5214         "\t            [:pause][:continue][:clear]\n"
5215         "\t            [:name=histname1]\n"
5216         "\t            [:<handler>.<action>]\n"
5217         "\t            [if <filter>]\n\n"
5218         "\t    When a matching event is hit, an entry is added to a hash\n"
5219         "\t    table using the key(s) and value(s) named, and the value of a\n"
5220         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5221         "\t    correspond to fields in the event's format description.  Keys\n"
5222         "\t    can be any field, or the special string 'stacktrace'.\n"
5223         "\t    Compound keys consisting of up to two fields can be specified\n"
5224         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5225         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5226         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5227         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5228         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5229         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5230         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5231         "\t    its histogram data will be shared with other triggers of the\n"
5232         "\t    same name, and trigger hits will update this common data.\n\n"
5233         "\t    Reading the 'hist' file for the event will dump the hash\n"
5234         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5235         "\t    triggers attached to an event, there will be a table for each\n"
5236         "\t    trigger in the output.  The table displayed for a named\n"
5237         "\t    trigger will be the same as any other instance having the\n"
5238         "\t    same name.  The default format used to display a given field\n"
5239         "\t    can be modified by appending any of the following modifiers\n"
5240         "\t    to the field name, as applicable:\n\n"
5241         "\t            .hex        display a number as a hex value\n"
5242         "\t            .sym        display an address as a symbol\n"
5243         "\t            .sym-offset display an address as a symbol and offset\n"
5244         "\t            .execname   display a common_pid as a program name\n"
5245         "\t            .syscall    display a syscall id as a syscall name\n"
5246         "\t            .log2       display log2 value rather than raw number\n"
5247         "\t            .usecs      display a common_timestamp in microseconds\n\n"
5248         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5249         "\t    trigger or to start a hist trigger but not log any events\n"
5250         "\t    until told to do so.  'continue' can be used to start or\n"
5251         "\t    restart a paused hist trigger.\n\n"
5252         "\t    The 'clear' parameter will clear the contents of a running\n"
5253         "\t    hist trigger and leave its current paused/active state\n"
5254         "\t    unchanged.\n\n"
5255         "\t    The enable_hist and disable_hist triggers can be used to\n"
5256         "\t    have one event conditionally start and stop another event's\n"
5257         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5258         "\t    the enable_event and disable_event triggers.\n\n"
5259         "\t    Hist trigger handlers and actions are executed whenever a\n"
5260         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5261         "\t        <handler>.<action>\n\n"
5262         "\t    The available handlers are:\n\n"
5263         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5264         "\t        onmax(var)               - invoke if var exceeds current max\n"
5265         "\t        onchange(var)            - invoke action if var changes\n\n"
5266         "\t    The available actions are:\n\n"
5267         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5268         "\t        save(field,...)                      - save current event fields\n"
5269 #ifdef CONFIG_TRACER_SNAPSHOT
5270         "\t        snapshot()                           - snapshot the trace buffer\n"
5271 #endif
5272 #endif
5273 ;
5274
5275 static ssize_t
5276 tracing_readme_read(struct file *filp, char __user *ubuf,
5277                        size_t cnt, loff_t *ppos)
5278 {
5279         return simple_read_from_buffer(ubuf, cnt, ppos,
5280                                         readme_msg, strlen(readme_msg));
5281 }
5282
5283 static const struct file_operations tracing_readme_fops = {
5284         .open           = tracing_open_generic,
5285         .read           = tracing_readme_read,
5286         .llseek         = generic_file_llseek,
5287 };
5288
5289 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5290 {
5291         int *ptr = v;
5292
5293         if (*pos || m->count)
5294                 ptr++;
5295
5296         (*pos)++;
5297
5298         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5299                 if (trace_find_tgid(*ptr))
5300                         return ptr;
5301         }
5302
5303         return NULL;
5304 }
5305
5306 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5307 {
5308         void *v;
5309         loff_t l = 0;
5310
5311         if (!tgid_map)
5312                 return NULL;
5313
5314         v = &tgid_map[0];
5315         while (l <= *pos) {
5316                 v = saved_tgids_next(m, v, &l);
5317                 if (!v)
5318                         return NULL;
5319         }
5320
5321         return v;
5322 }
5323
5324 static void saved_tgids_stop(struct seq_file *m, void *v)
5325 {
5326 }
5327
5328 static int saved_tgids_show(struct seq_file *m, void *v)
5329 {
5330         int pid = (int *)v - tgid_map;
5331
5332         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5333         return 0;
5334 }
5335
5336 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5337         .start          = saved_tgids_start,
5338         .stop           = saved_tgids_stop,
5339         .next           = saved_tgids_next,
5340         .show           = saved_tgids_show,
5341 };
5342
5343 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5344 {
5345         int ret;
5346
5347         ret = tracing_check_open_get_tr(NULL);
5348         if (ret)
5349                 return ret;
5350
5351         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5352 }
5353
5354
5355 static const struct file_operations tracing_saved_tgids_fops = {
5356         .open           = tracing_saved_tgids_open,
5357         .read           = seq_read,
5358         .llseek         = seq_lseek,
5359         .release        = seq_release,
5360 };
5361
5362 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5363 {
5364         unsigned int *ptr = v;
5365
5366         if (*pos || m->count)
5367                 ptr++;
5368
5369         (*pos)++;
5370
5371         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5372              ptr++) {
5373                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5374                         continue;
5375
5376                 return ptr;
5377         }
5378
5379         return NULL;
5380 }
5381
5382 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5383 {
5384         void *v;
5385         loff_t l = 0;
5386
5387         preempt_disable();
5388         arch_spin_lock(&trace_cmdline_lock);
5389
5390         v = &savedcmd->map_cmdline_to_pid[0];
5391         while (l <= *pos) {
5392                 v = saved_cmdlines_next(m, v, &l);
5393                 if (!v)
5394                         return NULL;
5395         }
5396
5397         return v;
5398 }
5399
5400 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5401 {
5402         arch_spin_unlock(&trace_cmdline_lock);
5403         preempt_enable();
5404 }
5405
5406 static int saved_cmdlines_show(struct seq_file *m, void *v)
5407 {
5408         char buf[TASK_COMM_LEN];
5409         unsigned int *pid = v;
5410
5411         __trace_find_cmdline(*pid, buf);
5412         seq_printf(m, "%d %s\n", *pid, buf);
5413         return 0;
5414 }
5415
5416 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5417         .start          = saved_cmdlines_start,
5418         .next           = saved_cmdlines_next,
5419         .stop           = saved_cmdlines_stop,
5420         .show           = saved_cmdlines_show,
5421 };
5422
5423 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5424 {
5425         int ret;
5426
5427         ret = tracing_check_open_get_tr(NULL);
5428         if (ret)
5429                 return ret;
5430
5431         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5432 }
5433
5434 static const struct file_operations tracing_saved_cmdlines_fops = {
5435         .open           = tracing_saved_cmdlines_open,
5436         .read           = seq_read,
5437         .llseek         = seq_lseek,
5438         .release        = seq_release,
5439 };
5440
5441 static ssize_t
5442 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5443                                  size_t cnt, loff_t *ppos)
5444 {
5445         char buf[64];
5446         int r;
5447
5448         arch_spin_lock(&trace_cmdline_lock);
5449         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5450         arch_spin_unlock(&trace_cmdline_lock);
5451
5452         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5453 }
5454
5455 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5456 {
5457         kfree(s->saved_cmdlines);
5458         kfree(s->map_cmdline_to_pid);
5459         kfree(s);
5460 }
5461
5462 static int tracing_resize_saved_cmdlines(unsigned int val)
5463 {
5464         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5465
5466         s = kmalloc(sizeof(*s), GFP_KERNEL);
5467         if (!s)
5468                 return -ENOMEM;
5469
5470         if (allocate_cmdlines_buffer(val, s) < 0) {
5471                 kfree(s);
5472                 return -ENOMEM;
5473         }
5474
5475         arch_spin_lock(&trace_cmdline_lock);
5476         savedcmd_temp = savedcmd;
5477         savedcmd = s;
5478         arch_spin_unlock(&trace_cmdline_lock);
5479         free_saved_cmdlines_buffer(savedcmd_temp);
5480
5481         return 0;
5482 }
5483
5484 static ssize_t
5485 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5486                                   size_t cnt, loff_t *ppos)
5487 {
5488         unsigned long val;
5489         int ret;
5490
5491         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5492         if (ret)
5493                 return ret;
5494
5495         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5496         if (!val || val > PID_MAX_DEFAULT)
5497                 return -EINVAL;
5498
5499         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5500         if (ret < 0)
5501                 return ret;
5502
5503         *ppos += cnt;
5504
5505         return cnt;
5506 }
5507
5508 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5509         .open           = tracing_open_generic,
5510         .read           = tracing_saved_cmdlines_size_read,
5511         .write          = tracing_saved_cmdlines_size_write,
5512 };
5513
5514 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5515 static union trace_eval_map_item *
5516 update_eval_map(union trace_eval_map_item *ptr)
5517 {
5518         if (!ptr->map.eval_string) {
5519                 if (ptr->tail.next) {
5520                         ptr = ptr->tail.next;
5521                         /* Set ptr to the next real item (skip head) */
5522                         ptr++;
5523                 } else
5524                         return NULL;
5525         }
5526         return ptr;
5527 }
5528
5529 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5530 {
5531         union trace_eval_map_item *ptr = v;
5532
5533         /*
5534          * Paranoid! If ptr points to end, we don't want to increment past it.
5535          * This really should never happen.
5536          */
5537         (*pos)++;
5538         ptr = update_eval_map(ptr);
5539         if (WARN_ON_ONCE(!ptr))
5540                 return NULL;
5541
5542         ptr++;
5543         ptr = update_eval_map(ptr);
5544
5545         return ptr;
5546 }
5547
5548 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5549 {
5550         union trace_eval_map_item *v;
5551         loff_t l = 0;
5552
5553         mutex_lock(&trace_eval_mutex);
5554
5555         v = trace_eval_maps;
5556         if (v)
5557                 v++;
5558
5559         while (v && l < *pos) {
5560                 v = eval_map_next(m, v, &l);
5561         }
5562
5563         return v;
5564 }
5565
5566 static void eval_map_stop(struct seq_file *m, void *v)
5567 {
5568         mutex_unlock(&trace_eval_mutex);
5569 }
5570
5571 static int eval_map_show(struct seq_file *m, void *v)
5572 {
5573         union trace_eval_map_item *ptr = v;
5574
5575         seq_printf(m, "%s %ld (%s)\n",
5576                    ptr->map.eval_string, ptr->map.eval_value,
5577                    ptr->map.system);
5578
5579         return 0;
5580 }
5581
5582 static const struct seq_operations tracing_eval_map_seq_ops = {
5583         .start          = eval_map_start,
5584         .next           = eval_map_next,
5585         .stop           = eval_map_stop,
5586         .show           = eval_map_show,
5587 };
5588
5589 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5590 {
5591         int ret;
5592
5593         ret = tracing_check_open_get_tr(NULL);
5594         if (ret)
5595                 return ret;
5596
5597         return seq_open(filp, &tracing_eval_map_seq_ops);
5598 }
5599
5600 static const struct file_operations tracing_eval_map_fops = {
5601         .open           = tracing_eval_map_open,
5602         .read           = seq_read,
5603         .llseek         = seq_lseek,
5604         .release        = seq_release,
5605 };
5606
5607 static inline union trace_eval_map_item *
5608 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5609 {
5610         /* Return tail of array given the head */
5611         return ptr + ptr->head.length + 1;
5612 }
5613
5614 static void
5615 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5616                            int len)
5617 {
5618         struct trace_eval_map **stop;
5619         struct trace_eval_map **map;
5620         union trace_eval_map_item *map_array;
5621         union trace_eval_map_item *ptr;
5622
5623         stop = start + len;
5624
5625         /*
5626          * The trace_eval_maps contains the map plus a head and tail item,
5627          * where the head holds the module and length of array, and the
5628          * tail holds a pointer to the next list.
5629          */
5630         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5631         if (!map_array) {
5632                 pr_warn("Unable to allocate trace eval mapping\n");
5633                 return;
5634         }
5635
5636         mutex_lock(&trace_eval_mutex);
5637
5638         if (!trace_eval_maps)
5639                 trace_eval_maps = map_array;
5640         else {
5641                 ptr = trace_eval_maps;
5642                 for (;;) {
5643                         ptr = trace_eval_jmp_to_tail(ptr);
5644                         if (!ptr->tail.next)
5645                                 break;
5646                         ptr = ptr->tail.next;
5647
5648                 }
5649                 ptr->tail.next = map_array;
5650         }
5651         map_array->head.mod = mod;
5652         map_array->head.length = len;
5653         map_array++;
5654
5655         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5656                 map_array->map = **map;
5657                 map_array++;
5658         }
5659         memset(map_array, 0, sizeof(*map_array));
5660
5661         mutex_unlock(&trace_eval_mutex);
5662 }
5663
5664 static void trace_create_eval_file(struct dentry *d_tracer)
5665 {
5666         trace_create_file("eval_map", 0444, d_tracer,
5667                           NULL, &tracing_eval_map_fops);
5668 }
5669
5670 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5671 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5672 static inline void trace_insert_eval_map_file(struct module *mod,
5673                               struct trace_eval_map **start, int len) { }
5674 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5675
5676 static void trace_insert_eval_map(struct module *mod,
5677                                   struct trace_eval_map **start, int len)
5678 {
5679         struct trace_eval_map **map;
5680
5681         if (len <= 0)
5682                 return;
5683
5684         map = start;
5685
5686         trace_event_eval_update(map, len);
5687
5688         trace_insert_eval_map_file(mod, start, len);
5689 }
5690
5691 static ssize_t
5692 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5693                        size_t cnt, loff_t *ppos)
5694 {
5695         struct trace_array *tr = filp->private_data;
5696         char buf[MAX_TRACER_SIZE+2];
5697         int r;
5698
5699         mutex_lock(&trace_types_lock);
5700         r = sprintf(buf, "%s\n", tr->current_trace->name);
5701         mutex_unlock(&trace_types_lock);
5702
5703         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5704 }
5705
5706 int tracer_init(struct tracer *t, struct trace_array *tr)
5707 {
5708         tracing_reset_online_cpus(&tr->array_buffer);
5709         return t->init(tr);
5710 }
5711
5712 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5713 {
5714         int cpu;
5715
5716         for_each_tracing_cpu(cpu)
5717                 per_cpu_ptr(buf->data, cpu)->entries = val;
5718 }
5719
5720 #ifdef CONFIG_TRACER_MAX_TRACE
5721 /* resize @tr's buffer to the size of @size_tr's entries */
5722 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5723                                         struct array_buffer *size_buf, int cpu_id)
5724 {
5725         int cpu, ret = 0;
5726
5727         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5728                 for_each_tracing_cpu(cpu) {
5729                         ret = ring_buffer_resize(trace_buf->buffer,
5730                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5731                         if (ret < 0)
5732                                 break;
5733                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5734                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5735                 }
5736         } else {
5737                 ret = ring_buffer_resize(trace_buf->buffer,
5738                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5739                 if (ret == 0)
5740                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5741                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5742         }
5743
5744         return ret;
5745 }
5746 #endif /* CONFIG_TRACER_MAX_TRACE */
5747
5748 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5749                                         unsigned long size, int cpu)
5750 {
5751         int ret;
5752
5753         /*
5754          * If kernel or user changes the size of the ring buffer
5755          * we use the size that was given, and we can forget about
5756          * expanding it later.
5757          */
5758         ring_buffer_expanded = true;
5759
5760         /* May be called before buffers are initialized */
5761         if (!tr->array_buffer.buffer)
5762                 return 0;
5763
5764         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5765         if (ret < 0)
5766                 return ret;
5767
5768 #ifdef CONFIG_TRACER_MAX_TRACE
5769         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5770             !tr->current_trace->use_max_tr)
5771                 goto out;
5772
5773         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5774         if (ret < 0) {
5775                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5776                                                      &tr->array_buffer, cpu);
5777                 if (r < 0) {
5778                         /*
5779                          * AARGH! We are left with different
5780                          * size max buffer!!!!
5781                          * The max buffer is our "snapshot" buffer.
5782                          * When a tracer needs a snapshot (one of the
5783                          * latency tracers), it swaps the max buffer
5784                          * with the saved snap shot. We succeeded to
5785                          * update the size of the main buffer, but failed to
5786                          * update the size of the max buffer. But when we tried
5787                          * to reset the main buffer to the original size, we
5788                          * failed there too. This is very unlikely to
5789                          * happen, but if it does, warn and kill all
5790                          * tracing.
5791                          */
5792                         WARN_ON(1);
5793                         tracing_disabled = 1;
5794                 }
5795                 return ret;
5796         }
5797
5798         if (cpu == RING_BUFFER_ALL_CPUS)
5799                 set_buffer_entries(&tr->max_buffer, size);
5800         else
5801                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5802
5803  out:
5804 #endif /* CONFIG_TRACER_MAX_TRACE */
5805
5806         if (cpu == RING_BUFFER_ALL_CPUS)
5807                 set_buffer_entries(&tr->array_buffer, size);
5808         else
5809                 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5810
5811         return ret;
5812 }
5813
5814 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5815                                   unsigned long size, int cpu_id)
5816 {
5817         int ret = size;
5818
5819         mutex_lock(&trace_types_lock);
5820
5821         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5822                 /* make sure, this cpu is enabled in the mask */
5823                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5824                         ret = -EINVAL;
5825                         goto out;
5826                 }
5827         }
5828
5829         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5830         if (ret < 0)
5831                 ret = -ENOMEM;
5832
5833 out:
5834         mutex_unlock(&trace_types_lock);
5835
5836         return ret;
5837 }
5838
5839
5840 /**
5841  * tracing_update_buffers - used by tracing facility to expand ring buffers
5842  *
5843  * To save on memory when the tracing is never used on a system with it
5844  * configured in. The ring buffers are set to a minimum size. But once
5845  * a user starts to use the tracing facility, then they need to grow
5846  * to their default size.
5847  *
5848  * This function is to be called when a tracer is about to be used.
5849  */
5850 int tracing_update_buffers(void)
5851 {
5852         int ret = 0;
5853
5854         mutex_lock(&trace_types_lock);
5855         if (!ring_buffer_expanded)
5856                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5857                                                 RING_BUFFER_ALL_CPUS);
5858         mutex_unlock(&trace_types_lock);
5859
5860         return ret;
5861 }
5862
5863 struct trace_option_dentry;
5864
5865 static void
5866 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5867
5868 /*
5869  * Used to clear out the tracer before deletion of an instance.
5870  * Must have trace_types_lock held.
5871  */
5872 static void tracing_set_nop(struct trace_array *tr)
5873 {
5874         if (tr->current_trace == &nop_trace)
5875                 return;
5876         
5877         tr->current_trace->enabled--;
5878
5879         if (tr->current_trace->reset)
5880                 tr->current_trace->reset(tr);
5881
5882         tr->current_trace = &nop_trace;
5883 }
5884
5885 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5886 {
5887         /* Only enable if the directory has been created already. */
5888         if (!tr->dir)
5889                 return;
5890
5891         create_trace_option_files(tr, t);
5892 }
5893
5894 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5895 {
5896         struct tracer *t;
5897 #ifdef CONFIG_TRACER_MAX_TRACE
5898         bool had_max_tr;
5899 #endif
5900         int ret = 0;
5901
5902         mutex_lock(&trace_types_lock);
5903
5904         if (!ring_buffer_expanded) {
5905                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5906                                                 RING_BUFFER_ALL_CPUS);
5907                 if (ret < 0)
5908                         goto out;
5909                 ret = 0;
5910         }
5911
5912         for (t = trace_types; t; t = t->next) {
5913                 if (strcmp(t->name, buf) == 0)
5914                         break;
5915         }
5916         if (!t) {
5917                 ret = -EINVAL;
5918                 goto out;
5919         }
5920         if (t == tr->current_trace)
5921                 goto out;
5922
5923 #ifdef CONFIG_TRACER_SNAPSHOT
5924         if (t->use_max_tr) {
5925                 arch_spin_lock(&tr->max_lock);
5926                 if (tr->cond_snapshot)
5927                         ret = -EBUSY;
5928                 arch_spin_unlock(&tr->max_lock);
5929                 if (ret)
5930                         goto out;
5931         }
5932 #endif
5933         /* Some tracers won't work on kernel command line */
5934         if (system_state < SYSTEM_RUNNING && t->noboot) {
5935                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5936                         t->name);
5937                 goto out;
5938         }
5939
5940         /* Some tracers are only allowed for the top level buffer */
5941         if (!trace_ok_for_array(t, tr)) {
5942                 ret = -EINVAL;
5943                 goto out;
5944         }
5945
5946         /* If trace pipe files are being read, we can't change the tracer */
5947         if (tr->trace_ref) {
5948                 ret = -EBUSY;
5949                 goto out;
5950         }
5951
5952         trace_branch_disable();
5953
5954         tr->current_trace->enabled--;
5955
5956         if (tr->current_trace->reset)
5957                 tr->current_trace->reset(tr);
5958
5959         /* Current trace needs to be nop_trace before synchronize_rcu */
5960         tr->current_trace = &nop_trace;
5961
5962 #ifdef CONFIG_TRACER_MAX_TRACE
5963         had_max_tr = tr->allocated_snapshot;
5964
5965         if (had_max_tr && !t->use_max_tr) {
5966                 /*
5967                  * We need to make sure that the update_max_tr sees that
5968                  * current_trace changed to nop_trace to keep it from
5969                  * swapping the buffers after we resize it.
5970                  * The update_max_tr is called from interrupts disabled
5971                  * so a synchronized_sched() is sufficient.
5972                  */
5973                 synchronize_rcu();
5974                 free_snapshot(tr);
5975         }
5976 #endif
5977
5978 #ifdef CONFIG_TRACER_MAX_TRACE
5979         if (t->use_max_tr && !had_max_tr) {
5980                 ret = tracing_alloc_snapshot_instance(tr);
5981                 if (ret < 0)
5982                         goto out;
5983         }
5984 #endif
5985
5986         if (t->init) {
5987                 ret = tracer_init(t, tr);
5988                 if (ret)
5989                         goto out;
5990         }
5991
5992         tr->current_trace = t;
5993         tr->current_trace->enabled++;
5994         trace_branch_enable(tr);
5995  out:
5996         mutex_unlock(&trace_types_lock);
5997
5998         return ret;
5999 }
6000
6001 static ssize_t
6002 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6003                         size_t cnt, loff_t *ppos)
6004 {
6005         struct trace_array *tr = filp->private_data;
6006         char buf[MAX_TRACER_SIZE+1];
6007         int i;
6008         size_t ret;
6009         int err;
6010
6011         ret = cnt;
6012
6013         if (cnt > MAX_TRACER_SIZE)
6014                 cnt = MAX_TRACER_SIZE;
6015
6016         if (copy_from_user(buf, ubuf, cnt))
6017                 return -EFAULT;
6018
6019         buf[cnt] = 0;
6020
6021         /* strip ending whitespace. */
6022         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6023                 buf[i] = 0;
6024
6025         err = tracing_set_tracer(tr, buf);
6026         if (err)
6027                 return err;
6028
6029         *ppos += ret;
6030
6031         return ret;
6032 }
6033
6034 static ssize_t
6035 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6036                    size_t cnt, loff_t *ppos)
6037 {
6038         char buf[64];
6039         int r;
6040
6041         r = snprintf(buf, sizeof(buf), "%ld\n",
6042                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6043         if (r > sizeof(buf))
6044                 r = sizeof(buf);
6045         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6046 }
6047
6048 static ssize_t
6049 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6050                     size_t cnt, loff_t *ppos)
6051 {
6052         unsigned long val;
6053         int ret;
6054
6055         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6056         if (ret)
6057                 return ret;
6058
6059         *ptr = val * 1000;
6060
6061         return cnt;
6062 }
6063
6064 static ssize_t
6065 tracing_thresh_read(struct file *filp, char __user *ubuf,
6066                     size_t cnt, loff_t *ppos)
6067 {
6068         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6069 }
6070
6071 static ssize_t
6072 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6073                      size_t cnt, loff_t *ppos)
6074 {
6075         struct trace_array *tr = filp->private_data;
6076         int ret;
6077
6078         mutex_lock(&trace_types_lock);
6079         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6080         if (ret < 0)
6081                 goto out;
6082
6083         if (tr->current_trace->update_thresh) {
6084                 ret = tr->current_trace->update_thresh(tr);
6085                 if (ret < 0)
6086                         goto out;
6087         }
6088
6089         ret = cnt;
6090 out:
6091         mutex_unlock(&trace_types_lock);
6092
6093         return ret;
6094 }
6095
6096 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6097
6098 static ssize_t
6099 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6100                      size_t cnt, loff_t *ppos)
6101 {
6102         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6103 }
6104
6105 static ssize_t
6106 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6107                       size_t cnt, loff_t *ppos)
6108 {
6109         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6110 }
6111
6112 #endif
6113
6114 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6115 {
6116         struct trace_array *tr = inode->i_private;
6117         struct trace_iterator *iter;
6118         int ret;
6119
6120         ret = tracing_check_open_get_tr(tr);
6121         if (ret)
6122                 return ret;
6123
6124         mutex_lock(&trace_types_lock);
6125
6126         /* create a buffer to store the information to pass to userspace */
6127         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6128         if (!iter) {
6129                 ret = -ENOMEM;
6130                 __trace_array_put(tr);
6131                 goto out;
6132         }
6133
6134         trace_seq_init(&iter->seq);
6135         iter->trace = tr->current_trace;
6136
6137         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6138                 ret = -ENOMEM;
6139                 goto fail;
6140         }
6141
6142         /* trace pipe does not show start of buffer */
6143         cpumask_setall(iter->started);
6144
6145         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6146                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6147
6148         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6149         if (trace_clocks[tr->clock_id].in_ns)
6150                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6151
6152         iter->tr = tr;
6153         iter->array_buffer = &tr->array_buffer;
6154         iter->cpu_file = tracing_get_cpu(inode);
6155         mutex_init(&iter->mutex);
6156         filp->private_data = iter;
6157
6158         if (iter->trace->pipe_open)
6159                 iter->trace->pipe_open(iter);
6160
6161         nonseekable_open(inode, filp);
6162
6163         tr->trace_ref++;
6164 out:
6165         mutex_unlock(&trace_types_lock);
6166         return ret;
6167
6168 fail:
6169         kfree(iter);
6170         __trace_array_put(tr);
6171         mutex_unlock(&trace_types_lock);
6172         return ret;
6173 }
6174
6175 static int tracing_release_pipe(struct inode *inode, struct file *file)
6176 {
6177         struct trace_iterator *iter = file->private_data;
6178         struct trace_array *tr = inode->i_private;
6179
6180         mutex_lock(&trace_types_lock);
6181
6182         tr->trace_ref--;
6183
6184         if (iter->trace->pipe_close)
6185                 iter->trace->pipe_close(iter);
6186
6187         mutex_unlock(&trace_types_lock);
6188
6189         free_cpumask_var(iter->started);
6190         mutex_destroy(&iter->mutex);
6191         kfree(iter);
6192
6193         trace_array_put(tr);
6194
6195         return 0;
6196 }
6197
6198 static __poll_t
6199 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6200 {
6201         struct trace_array *tr = iter->tr;
6202
6203         /* Iterators are static, they should be filled or empty */
6204         if (trace_buffer_iter(iter, iter->cpu_file))
6205                 return EPOLLIN | EPOLLRDNORM;
6206
6207         if (tr->trace_flags & TRACE_ITER_BLOCK)
6208                 /*
6209                  * Always select as readable when in blocking mode
6210                  */
6211                 return EPOLLIN | EPOLLRDNORM;
6212         else
6213                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6214                                              filp, poll_table);
6215 }
6216
6217 static __poll_t
6218 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6219 {
6220         struct trace_iterator *iter = filp->private_data;
6221
6222         return trace_poll(iter, filp, poll_table);
6223 }
6224
6225 /* Must be called with iter->mutex held. */
6226 static int tracing_wait_pipe(struct file *filp)
6227 {
6228         struct trace_iterator *iter = filp->private_data;
6229         int ret;
6230
6231         while (trace_empty(iter)) {
6232
6233                 if ((filp->f_flags & O_NONBLOCK)) {
6234                         return -EAGAIN;
6235                 }
6236
6237                 /*
6238                  * We block until we read something and tracing is disabled.
6239                  * We still block if tracing is disabled, but we have never
6240                  * read anything. This allows a user to cat this file, and
6241                  * then enable tracing. But after we have read something,
6242                  * we give an EOF when tracing is again disabled.
6243                  *
6244                  * iter->pos will be 0 if we haven't read anything.
6245                  */
6246                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6247                         break;
6248
6249                 mutex_unlock(&iter->mutex);
6250
6251                 ret = wait_on_pipe(iter, 0);
6252
6253                 mutex_lock(&iter->mutex);
6254
6255                 if (ret)
6256                         return ret;
6257         }
6258
6259         return 1;
6260 }
6261
6262 /*
6263  * Consumer reader.
6264  */
6265 static ssize_t
6266 tracing_read_pipe(struct file *filp, char __user *ubuf,
6267                   size_t cnt, loff_t *ppos)
6268 {
6269         struct trace_iterator *iter = filp->private_data;
6270         ssize_t sret;
6271
6272         /*
6273          * Avoid more than one consumer on a single file descriptor
6274          * This is just a matter of traces coherency, the ring buffer itself
6275          * is protected.
6276          */
6277         mutex_lock(&iter->mutex);
6278
6279         /* return any leftover data */
6280         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6281         if (sret != -EBUSY)
6282                 goto out;
6283
6284         trace_seq_init(&iter->seq);
6285
6286         if (iter->trace->read) {
6287                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6288                 if (sret)
6289                         goto out;
6290         }
6291
6292 waitagain:
6293         sret = tracing_wait_pipe(filp);
6294         if (sret <= 0)
6295                 goto out;
6296
6297         /* stop when tracing is finished */
6298         if (trace_empty(iter)) {
6299                 sret = 0;
6300                 goto out;
6301         }
6302
6303         if (cnt >= PAGE_SIZE)
6304                 cnt = PAGE_SIZE - 1;
6305
6306         /* reset all but tr, trace, and overruns */
6307         memset(&iter->seq, 0,
6308                sizeof(struct trace_iterator) -
6309                offsetof(struct trace_iterator, seq));
6310         cpumask_clear(iter->started);
6311         trace_seq_init(&iter->seq);
6312         iter->pos = -1;
6313
6314         trace_event_read_lock();
6315         trace_access_lock(iter->cpu_file);
6316         while (trace_find_next_entry_inc(iter) != NULL) {
6317                 enum print_line_t ret;
6318                 int save_len = iter->seq.seq.len;
6319
6320                 ret = print_trace_line(iter);
6321                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6322                         /* don't print partial lines */
6323                         iter->seq.seq.len = save_len;
6324                         break;
6325                 }
6326                 if (ret != TRACE_TYPE_NO_CONSUME)
6327                         trace_consume(iter);
6328
6329                 if (trace_seq_used(&iter->seq) >= cnt)
6330                         break;
6331
6332                 /*
6333                  * Setting the full flag means we reached the trace_seq buffer
6334                  * size and we should leave by partial output condition above.
6335                  * One of the trace_seq_* functions is not used properly.
6336                  */
6337                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6338                           iter->ent->type);
6339         }
6340         trace_access_unlock(iter->cpu_file);
6341         trace_event_read_unlock();
6342
6343         /* Now copy what we have to the user */
6344         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6345         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6346                 trace_seq_init(&iter->seq);
6347
6348         /*
6349          * If there was nothing to send to user, in spite of consuming trace
6350          * entries, go back to wait for more entries.
6351          */
6352         if (sret == -EBUSY)
6353                 goto waitagain;
6354
6355 out:
6356         mutex_unlock(&iter->mutex);
6357
6358         return sret;
6359 }
6360
6361 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6362                                      unsigned int idx)
6363 {
6364         __free_page(spd->pages[idx]);
6365 }
6366
6367 static size_t
6368 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6369 {
6370         size_t count;
6371         int save_len;
6372         int ret;
6373
6374         /* Seq buffer is page-sized, exactly what we need. */
6375         for (;;) {
6376                 save_len = iter->seq.seq.len;
6377                 ret = print_trace_line(iter);
6378
6379                 if (trace_seq_has_overflowed(&iter->seq)) {
6380                         iter->seq.seq.len = save_len;
6381                         break;
6382                 }
6383
6384                 /*
6385                  * This should not be hit, because it should only
6386                  * be set if the iter->seq overflowed. But check it
6387                  * anyway to be safe.
6388                  */
6389                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6390                         iter->seq.seq.len = save_len;
6391                         break;
6392                 }
6393
6394                 count = trace_seq_used(&iter->seq) - save_len;
6395                 if (rem < count) {
6396                         rem = 0;
6397                         iter->seq.seq.len = save_len;
6398                         break;
6399                 }
6400
6401                 if (ret != TRACE_TYPE_NO_CONSUME)
6402                         trace_consume(iter);
6403                 rem -= count;
6404                 if (!trace_find_next_entry_inc(iter))   {
6405                         rem = 0;
6406                         iter->ent = NULL;
6407                         break;
6408                 }
6409         }
6410
6411         return rem;
6412 }
6413
6414 static ssize_t tracing_splice_read_pipe(struct file *filp,
6415                                         loff_t *ppos,
6416                                         struct pipe_inode_info *pipe,
6417                                         size_t len,
6418                                         unsigned int flags)
6419 {
6420         struct page *pages_def[PIPE_DEF_BUFFERS];
6421         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6422         struct trace_iterator *iter = filp->private_data;
6423         struct splice_pipe_desc spd = {
6424                 .pages          = pages_def,
6425                 .partial        = partial_def,
6426                 .nr_pages       = 0, /* This gets updated below. */
6427                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6428                 .ops            = &default_pipe_buf_ops,
6429                 .spd_release    = tracing_spd_release_pipe,
6430         };
6431         ssize_t ret;
6432         size_t rem;
6433         unsigned int i;
6434
6435         if (splice_grow_spd(pipe, &spd))
6436                 return -ENOMEM;
6437
6438         mutex_lock(&iter->mutex);
6439
6440         if (iter->trace->splice_read) {
6441                 ret = iter->trace->splice_read(iter, filp,
6442                                                ppos, pipe, len, flags);
6443                 if (ret)
6444                         goto out_err;
6445         }
6446
6447         ret = tracing_wait_pipe(filp);
6448         if (ret <= 0)
6449                 goto out_err;
6450
6451         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6452                 ret = -EFAULT;
6453                 goto out_err;
6454         }
6455
6456         trace_event_read_lock();
6457         trace_access_lock(iter->cpu_file);
6458
6459         /* Fill as many pages as possible. */
6460         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6461                 spd.pages[i] = alloc_page(GFP_KERNEL);
6462                 if (!spd.pages[i])
6463                         break;
6464
6465                 rem = tracing_fill_pipe_page(rem, iter);
6466
6467                 /* Copy the data into the page, so we can start over. */
6468                 ret = trace_seq_to_buffer(&iter->seq,
6469                                           page_address(spd.pages[i]),
6470                                           trace_seq_used(&iter->seq));
6471                 if (ret < 0) {
6472                         __free_page(spd.pages[i]);
6473                         break;
6474                 }
6475                 spd.partial[i].offset = 0;
6476                 spd.partial[i].len = trace_seq_used(&iter->seq);
6477
6478                 trace_seq_init(&iter->seq);
6479         }
6480
6481         trace_access_unlock(iter->cpu_file);
6482         trace_event_read_unlock();
6483         mutex_unlock(&iter->mutex);
6484
6485         spd.nr_pages = i;
6486
6487         if (i)
6488                 ret = splice_to_pipe(pipe, &spd);
6489         else
6490                 ret = 0;
6491 out:
6492         splice_shrink_spd(&spd);
6493         return ret;
6494
6495 out_err:
6496         mutex_unlock(&iter->mutex);
6497         goto out;
6498 }
6499
6500 static ssize_t
6501 tracing_entries_read(struct file *filp, char __user *ubuf,
6502                      size_t cnt, loff_t *ppos)
6503 {
6504         struct inode *inode = file_inode(filp);
6505         struct trace_array *tr = inode->i_private;
6506         int cpu = tracing_get_cpu(inode);
6507         char buf[64];
6508         int r = 0;
6509         ssize_t ret;
6510
6511         mutex_lock(&trace_types_lock);
6512
6513         if (cpu == RING_BUFFER_ALL_CPUS) {
6514                 int cpu, buf_size_same;
6515                 unsigned long size;
6516
6517                 size = 0;
6518                 buf_size_same = 1;
6519                 /* check if all cpu sizes are same */
6520                 for_each_tracing_cpu(cpu) {
6521                         /* fill in the size from first enabled cpu */
6522                         if (size == 0)
6523                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6524                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6525                                 buf_size_same = 0;
6526                                 break;
6527                         }
6528                 }
6529
6530                 if (buf_size_same) {
6531                         if (!ring_buffer_expanded)
6532                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6533                                             size >> 10,
6534                                             trace_buf_size >> 10);
6535                         else
6536                                 r = sprintf(buf, "%lu\n", size >> 10);
6537                 } else
6538                         r = sprintf(buf, "X\n");
6539         } else
6540                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6541
6542         mutex_unlock(&trace_types_lock);
6543
6544         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6545         return ret;
6546 }
6547
6548 static ssize_t
6549 tracing_entries_write(struct file *filp, const char __user *ubuf,
6550                       size_t cnt, loff_t *ppos)
6551 {
6552         struct inode *inode = file_inode(filp);
6553         struct trace_array *tr = inode->i_private;
6554         unsigned long val;
6555         int ret;
6556
6557         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6558         if (ret)
6559                 return ret;
6560
6561         /* must have at least 1 entry */
6562         if (!val)
6563                 return -EINVAL;
6564
6565         /* value is in KB */
6566         val <<= 10;
6567         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6568         if (ret < 0)
6569                 return ret;
6570
6571         *ppos += cnt;
6572
6573         return cnt;
6574 }
6575
6576 static ssize_t
6577 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6578                                 size_t cnt, loff_t *ppos)
6579 {
6580         struct trace_array *tr = filp->private_data;
6581         char buf[64];
6582         int r, cpu;
6583         unsigned long size = 0, expanded_size = 0;
6584
6585         mutex_lock(&trace_types_lock);
6586         for_each_tracing_cpu(cpu) {
6587                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6588                 if (!ring_buffer_expanded)
6589                         expanded_size += trace_buf_size >> 10;
6590         }
6591         if (ring_buffer_expanded)
6592                 r = sprintf(buf, "%lu\n", size);
6593         else
6594                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6595         mutex_unlock(&trace_types_lock);
6596
6597         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6598 }
6599
6600 static ssize_t
6601 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6602                           size_t cnt, loff_t *ppos)
6603 {
6604         /*
6605          * There is no need to read what the user has written, this function
6606          * is just to make sure that there is no error when "echo" is used
6607          */
6608
6609         *ppos += cnt;
6610
6611         return cnt;
6612 }
6613
6614 static int
6615 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6616 {
6617         struct trace_array *tr = inode->i_private;
6618
6619         /* disable tracing ? */
6620         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6621                 tracer_tracing_off(tr);
6622         /* resize the ring buffer to 0 */
6623         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6624
6625         trace_array_put(tr);
6626
6627         return 0;
6628 }
6629
6630 static ssize_t
6631 tracing_mark_write(struct file *filp, const char __user *ubuf,
6632                                         size_t cnt, loff_t *fpos)
6633 {
6634         struct trace_array *tr = filp->private_data;
6635         struct ring_buffer_event *event;
6636         enum event_trigger_type tt = ETT_NONE;
6637         struct trace_buffer *buffer;
6638         struct print_entry *entry;
6639         unsigned long irq_flags;
6640         ssize_t written;
6641         int size;
6642         int len;
6643
6644 /* Used in tracing_mark_raw_write() as well */
6645 #define FAULTED_STR "<faulted>"
6646 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6647
6648         if (tracing_disabled)
6649                 return -EINVAL;
6650
6651         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6652                 return -EINVAL;
6653
6654         if (cnt > TRACE_BUF_SIZE)
6655                 cnt = TRACE_BUF_SIZE;
6656
6657         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6658
6659         local_save_flags(irq_flags);
6660         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6661
6662         /* If less than "<faulted>", then make sure we can still add that */
6663         if (cnt < FAULTED_SIZE)
6664                 size += FAULTED_SIZE - cnt;
6665
6666         buffer = tr->array_buffer.buffer;
6667         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6668                                             irq_flags, preempt_count());
6669         if (unlikely(!event))
6670                 /* Ring buffer disabled, return as if not open for write */
6671                 return -EBADF;
6672
6673         entry = ring_buffer_event_data(event);
6674         entry->ip = _THIS_IP_;
6675
6676         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6677         if (len) {
6678                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6679                 cnt = FAULTED_SIZE;
6680                 written = -EFAULT;
6681         } else
6682                 written = cnt;
6683         len = cnt;
6684
6685         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6686                 /* do not add \n before testing triggers, but add \0 */
6687                 entry->buf[cnt] = '\0';
6688                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6689         }
6690
6691         if (entry->buf[cnt - 1] != '\n') {
6692                 entry->buf[cnt] = '\n';
6693                 entry->buf[cnt + 1] = '\0';
6694         } else
6695                 entry->buf[cnt] = '\0';
6696
6697         if (static_branch_unlikely(&trace_marker_exports_enabled))
6698                 ftrace_exports(event, TRACE_EXPORT_MARKER);
6699         __buffer_unlock_commit(buffer, event);
6700
6701         if (tt)
6702                 event_triggers_post_call(tr->trace_marker_file, tt);
6703
6704         if (written > 0)
6705                 *fpos += written;
6706
6707         return written;
6708 }
6709
6710 /* Limit it for now to 3K (including tag) */
6711 #define RAW_DATA_MAX_SIZE (1024*3)
6712
6713 static ssize_t
6714 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6715                                         size_t cnt, loff_t *fpos)
6716 {
6717         struct trace_array *tr = filp->private_data;
6718         struct ring_buffer_event *event;
6719         struct trace_buffer *buffer;
6720         struct raw_data_entry *entry;
6721         unsigned long irq_flags;
6722         ssize_t written;
6723         int size;
6724         int len;
6725
6726 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6727
6728         if (tracing_disabled)
6729                 return -EINVAL;
6730
6731         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6732                 return -EINVAL;
6733
6734         /* The marker must at least have a tag id */
6735         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6736                 return -EINVAL;
6737
6738         if (cnt > TRACE_BUF_SIZE)
6739                 cnt = TRACE_BUF_SIZE;
6740
6741         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6742
6743         local_save_flags(irq_flags);
6744         size = sizeof(*entry) + cnt;
6745         if (cnt < FAULT_SIZE_ID)
6746                 size += FAULT_SIZE_ID - cnt;
6747
6748         buffer = tr->array_buffer.buffer;
6749         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6750                                             irq_flags, preempt_count());
6751         if (!event)
6752                 /* Ring buffer disabled, return as if not open for write */
6753                 return -EBADF;
6754
6755         entry = ring_buffer_event_data(event);
6756
6757         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6758         if (len) {
6759                 entry->id = -1;
6760                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6761                 written = -EFAULT;
6762         } else
6763                 written = cnt;
6764
6765         __buffer_unlock_commit(buffer, event);
6766
6767         if (written > 0)
6768                 *fpos += written;
6769
6770         return written;
6771 }
6772
6773 static int tracing_clock_show(struct seq_file *m, void *v)
6774 {
6775         struct trace_array *tr = m->private;
6776         int i;
6777
6778         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6779                 seq_printf(m,
6780                         "%s%s%s%s", i ? " " : "",
6781                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6782                         i == tr->clock_id ? "]" : "");
6783         seq_putc(m, '\n');
6784
6785         return 0;
6786 }
6787
6788 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6789 {
6790         int i;
6791
6792         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6793                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6794                         break;
6795         }
6796         if (i == ARRAY_SIZE(trace_clocks))
6797                 return -EINVAL;
6798
6799         mutex_lock(&trace_types_lock);
6800
6801         tr->clock_id = i;
6802
6803         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6804
6805         /*
6806          * New clock may not be consistent with the previous clock.
6807          * Reset the buffer so that it doesn't have incomparable timestamps.
6808          */
6809         tracing_reset_online_cpus(&tr->array_buffer);
6810
6811 #ifdef CONFIG_TRACER_MAX_TRACE
6812         if (tr->max_buffer.buffer)
6813                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6814         tracing_reset_online_cpus(&tr->max_buffer);
6815 #endif
6816
6817         mutex_unlock(&trace_types_lock);
6818
6819         return 0;
6820 }
6821
6822 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6823                                    size_t cnt, loff_t *fpos)
6824 {
6825         struct seq_file *m = filp->private_data;
6826         struct trace_array *tr = m->private;
6827         char buf[64];
6828         const char *clockstr;
6829         int ret;
6830
6831         if (cnt >= sizeof(buf))
6832                 return -EINVAL;
6833
6834         if (copy_from_user(buf, ubuf, cnt))
6835                 return -EFAULT;
6836
6837         buf[cnt] = 0;
6838
6839         clockstr = strstrip(buf);
6840
6841         ret = tracing_set_clock(tr, clockstr);
6842         if (ret)
6843                 return ret;
6844
6845         *fpos += cnt;
6846
6847         return cnt;
6848 }
6849
6850 static int tracing_clock_open(struct inode *inode, struct file *file)
6851 {
6852         struct trace_array *tr = inode->i_private;
6853         int ret;
6854
6855         ret = tracing_check_open_get_tr(tr);
6856         if (ret)
6857                 return ret;
6858
6859         ret = single_open(file, tracing_clock_show, inode->i_private);
6860         if (ret < 0)
6861                 trace_array_put(tr);
6862
6863         return ret;
6864 }
6865
6866 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6867 {
6868         struct trace_array *tr = m->private;
6869
6870         mutex_lock(&trace_types_lock);
6871
6872         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6873                 seq_puts(m, "delta [absolute]\n");
6874         else
6875                 seq_puts(m, "[delta] absolute\n");
6876
6877         mutex_unlock(&trace_types_lock);
6878
6879         return 0;
6880 }
6881
6882 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6883 {
6884         struct trace_array *tr = inode->i_private;
6885         int ret;
6886
6887         ret = tracing_check_open_get_tr(tr);
6888         if (ret)
6889                 return ret;
6890
6891         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6892         if (ret < 0)
6893                 trace_array_put(tr);
6894
6895         return ret;
6896 }
6897
6898 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6899 {
6900         int ret = 0;
6901
6902         mutex_lock(&trace_types_lock);
6903
6904         if (abs && tr->time_stamp_abs_ref++)
6905                 goto out;
6906
6907         if (!abs) {
6908                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6909                         ret = -EINVAL;
6910                         goto out;
6911                 }
6912
6913                 if (--tr->time_stamp_abs_ref)
6914                         goto out;
6915         }
6916
6917         ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6918
6919 #ifdef CONFIG_TRACER_MAX_TRACE
6920         if (tr->max_buffer.buffer)
6921                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6922 #endif
6923  out:
6924         mutex_unlock(&trace_types_lock);
6925
6926         return ret;
6927 }
6928
6929 struct ftrace_buffer_info {
6930         struct trace_iterator   iter;
6931         void                    *spare;
6932         unsigned int            spare_cpu;
6933         unsigned int            read;
6934 };
6935
6936 #ifdef CONFIG_TRACER_SNAPSHOT
6937 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6938 {
6939         struct trace_array *tr = inode->i_private;
6940         struct trace_iterator *iter;
6941         struct seq_file *m;
6942         int ret;
6943
6944         ret = tracing_check_open_get_tr(tr);
6945         if (ret)
6946                 return ret;
6947
6948         if (file->f_mode & FMODE_READ) {
6949                 iter = __tracing_open(inode, file, true);
6950                 if (IS_ERR(iter))
6951                         ret = PTR_ERR(iter);
6952         } else {
6953                 /* Writes still need the seq_file to hold the private data */
6954                 ret = -ENOMEM;
6955                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6956                 if (!m)
6957                         goto out;
6958                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6959                 if (!iter) {
6960                         kfree(m);
6961                         goto out;
6962                 }
6963                 ret = 0;
6964
6965                 iter->tr = tr;
6966                 iter->array_buffer = &tr->max_buffer;
6967                 iter->cpu_file = tracing_get_cpu(inode);
6968                 m->private = iter;
6969                 file->private_data = m;
6970         }
6971 out:
6972         if (ret < 0)
6973                 trace_array_put(tr);
6974
6975         return ret;
6976 }
6977
6978 static ssize_t
6979 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6980                        loff_t *ppos)
6981 {
6982         struct seq_file *m = filp->private_data;
6983         struct trace_iterator *iter = m->private;
6984         struct trace_array *tr = iter->tr;
6985         unsigned long val;
6986         int ret;
6987
6988         ret = tracing_update_buffers();
6989         if (ret < 0)
6990                 return ret;
6991
6992         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6993         if (ret)
6994                 return ret;
6995
6996         mutex_lock(&trace_types_lock);
6997
6998         if (tr->current_trace->use_max_tr) {
6999                 ret = -EBUSY;
7000                 goto out;
7001         }
7002
7003         arch_spin_lock(&tr->max_lock);
7004         if (tr->cond_snapshot)
7005                 ret = -EBUSY;
7006         arch_spin_unlock(&tr->max_lock);
7007         if (ret)
7008                 goto out;
7009
7010         switch (val) {
7011         case 0:
7012                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7013                         ret = -EINVAL;
7014                         break;
7015                 }
7016                 if (tr->allocated_snapshot)
7017                         free_snapshot(tr);
7018                 break;
7019         case 1:
7020 /* Only allow per-cpu swap if the ring buffer supports it */
7021 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7022                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7023                         ret = -EINVAL;
7024                         break;
7025                 }
7026 #endif
7027                 if (tr->allocated_snapshot)
7028                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7029                                         &tr->array_buffer, iter->cpu_file);
7030                 else
7031                         ret = tracing_alloc_snapshot_instance(tr);
7032                 if (ret < 0)
7033                         break;
7034                 local_irq_disable();
7035                 /* Now, we're going to swap */
7036                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7037                         update_max_tr(tr, current, smp_processor_id(), NULL);
7038                 else
7039                         update_max_tr_single(tr, current, iter->cpu_file);
7040                 local_irq_enable();
7041                 break;
7042         default:
7043                 if (tr->allocated_snapshot) {
7044                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7045                                 tracing_reset_online_cpus(&tr->max_buffer);
7046                         else
7047                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7048                 }
7049                 break;
7050         }
7051
7052         if (ret >= 0) {
7053                 *ppos += cnt;
7054                 ret = cnt;
7055         }
7056 out:
7057         mutex_unlock(&trace_types_lock);
7058         return ret;
7059 }
7060
7061 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7062 {
7063         struct seq_file *m = file->private_data;
7064         int ret;
7065
7066         ret = tracing_release(inode, file);
7067
7068         if (file->f_mode & FMODE_READ)
7069                 return ret;
7070
7071         /* If write only, the seq_file is just a stub */
7072         if (m)
7073                 kfree(m->private);
7074         kfree(m);
7075
7076         return 0;
7077 }
7078
7079 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7080 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7081                                     size_t count, loff_t *ppos);
7082 static int tracing_buffers_release(struct inode *inode, struct file *file);
7083 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7084                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7085
7086 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7087 {
7088         struct ftrace_buffer_info *info;
7089         int ret;
7090
7091         /* The following checks for tracefs lockdown */
7092         ret = tracing_buffers_open(inode, filp);
7093         if (ret < 0)
7094                 return ret;
7095
7096         info = filp->private_data;
7097
7098         if (info->iter.trace->use_max_tr) {
7099                 tracing_buffers_release(inode, filp);
7100                 return -EBUSY;
7101         }
7102
7103         info->iter.snapshot = true;
7104         info->iter.array_buffer = &info->iter.tr->max_buffer;
7105
7106         return ret;
7107 }
7108
7109 #endif /* CONFIG_TRACER_SNAPSHOT */
7110
7111
7112 static const struct file_operations tracing_thresh_fops = {
7113         .open           = tracing_open_generic,
7114         .read           = tracing_thresh_read,
7115         .write          = tracing_thresh_write,
7116         .llseek         = generic_file_llseek,
7117 };
7118
7119 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7120 static const struct file_operations tracing_max_lat_fops = {
7121         .open           = tracing_open_generic,
7122         .read           = tracing_max_lat_read,
7123         .write          = tracing_max_lat_write,
7124         .llseek         = generic_file_llseek,
7125 };
7126 #endif
7127
7128 static const struct file_operations set_tracer_fops = {
7129         .open           = tracing_open_generic,
7130         .read           = tracing_set_trace_read,
7131         .write          = tracing_set_trace_write,
7132         .llseek         = generic_file_llseek,
7133 };
7134
7135 static const struct file_operations tracing_pipe_fops = {
7136         .open           = tracing_open_pipe,
7137         .poll           = tracing_poll_pipe,
7138         .read           = tracing_read_pipe,
7139         .splice_read    = tracing_splice_read_pipe,
7140         .release        = tracing_release_pipe,
7141         .llseek         = no_llseek,
7142 };
7143
7144 static const struct file_operations tracing_entries_fops = {
7145         .open           = tracing_open_generic_tr,
7146         .read           = tracing_entries_read,
7147         .write          = tracing_entries_write,
7148         .llseek         = generic_file_llseek,
7149         .release        = tracing_release_generic_tr,
7150 };
7151
7152 static const struct file_operations tracing_total_entries_fops = {
7153         .open           = tracing_open_generic_tr,
7154         .read           = tracing_total_entries_read,
7155         .llseek         = generic_file_llseek,
7156         .release        = tracing_release_generic_tr,
7157 };
7158
7159 static const struct file_operations tracing_free_buffer_fops = {
7160         .open           = tracing_open_generic_tr,
7161         .write          = tracing_free_buffer_write,
7162         .release        = tracing_free_buffer_release,
7163 };
7164
7165 static const struct file_operations tracing_mark_fops = {
7166         .open           = tracing_open_generic_tr,
7167         .write          = tracing_mark_write,
7168         .llseek         = generic_file_llseek,
7169         .release        = tracing_release_generic_tr,
7170 };
7171
7172 static const struct file_operations tracing_mark_raw_fops = {
7173         .open           = tracing_open_generic_tr,
7174         .write          = tracing_mark_raw_write,
7175         .llseek         = generic_file_llseek,
7176         .release        = tracing_release_generic_tr,
7177 };
7178
7179 static const struct file_operations trace_clock_fops = {
7180         .open           = tracing_clock_open,
7181         .read           = seq_read,
7182         .llseek         = seq_lseek,
7183         .release        = tracing_single_release_tr,
7184         .write          = tracing_clock_write,
7185 };
7186
7187 static const struct file_operations trace_time_stamp_mode_fops = {
7188         .open           = tracing_time_stamp_mode_open,
7189         .read           = seq_read,
7190         .llseek         = seq_lseek,
7191         .release        = tracing_single_release_tr,
7192 };
7193
7194 #ifdef CONFIG_TRACER_SNAPSHOT
7195 static const struct file_operations snapshot_fops = {
7196         .open           = tracing_snapshot_open,
7197         .read           = seq_read,
7198         .write          = tracing_snapshot_write,
7199         .llseek         = tracing_lseek,
7200         .release        = tracing_snapshot_release,
7201 };
7202
7203 static const struct file_operations snapshot_raw_fops = {
7204         .open           = snapshot_raw_open,
7205         .read           = tracing_buffers_read,
7206         .release        = tracing_buffers_release,
7207         .splice_read    = tracing_buffers_splice_read,
7208         .llseek         = no_llseek,
7209 };
7210
7211 #endif /* CONFIG_TRACER_SNAPSHOT */
7212
7213 #define TRACING_LOG_ERRS_MAX    8
7214 #define TRACING_LOG_LOC_MAX     128
7215
7216 #define CMD_PREFIX "  Command: "
7217
7218 struct err_info {
7219         const char      **errs; /* ptr to loc-specific array of err strings */
7220         u8              type;   /* index into errs -> specific err string */
7221         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
7222         u64             ts;
7223 };
7224
7225 struct tracing_log_err {
7226         struct list_head        list;
7227         struct err_info         info;
7228         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7229         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7230 };
7231
7232 static DEFINE_MUTEX(tracing_err_log_lock);
7233
7234 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7235 {
7236         struct tracing_log_err *err;
7237
7238         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7239                 err = kzalloc(sizeof(*err), GFP_KERNEL);
7240                 if (!err)
7241                         err = ERR_PTR(-ENOMEM);
7242                 tr->n_err_log_entries++;
7243
7244                 return err;
7245         }
7246
7247         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7248         list_del(&err->list);
7249
7250         return err;
7251 }
7252
7253 /**
7254  * err_pos - find the position of a string within a command for error careting
7255  * @cmd: The tracing command that caused the error
7256  * @str: The string to position the caret at within @cmd
7257  *
7258  * Finds the position of the first occurence of @str within @cmd.  The
7259  * return value can be passed to tracing_log_err() for caret placement
7260  * within @cmd.
7261  *
7262  * Returns the index within @cmd of the first occurence of @str or 0
7263  * if @str was not found.
7264  */
7265 unsigned int err_pos(char *cmd, const char *str)
7266 {
7267         char *found;
7268
7269         if (WARN_ON(!strlen(cmd)))
7270                 return 0;
7271
7272         found = strstr(cmd, str);
7273         if (found)
7274                 return found - cmd;
7275
7276         return 0;
7277 }
7278
7279 /**
7280  * tracing_log_err - write an error to the tracing error log
7281  * @tr: The associated trace array for the error (NULL for top level array)
7282  * @loc: A string describing where the error occurred
7283  * @cmd: The tracing command that caused the error
7284  * @errs: The array of loc-specific static error strings
7285  * @type: The index into errs[], which produces the specific static err string
7286  * @pos: The position the caret should be placed in the cmd
7287  *
7288  * Writes an error into tracing/error_log of the form:
7289  *
7290  * <loc>: error: <text>
7291  *   Command: <cmd>
7292  *              ^
7293  *
7294  * tracing/error_log is a small log file containing the last
7295  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7296  * unless there has been a tracing error, and the error log can be
7297  * cleared and have its memory freed by writing the empty string in
7298  * truncation mode to it i.e. echo > tracing/error_log.
7299  *
7300  * NOTE: the @errs array along with the @type param are used to
7301  * produce a static error string - this string is not copied and saved
7302  * when the error is logged - only a pointer to it is saved.  See
7303  * existing callers for examples of how static strings are typically
7304  * defined for use with tracing_log_err().
7305  */
7306 void tracing_log_err(struct trace_array *tr,
7307                      const char *loc, const char *cmd,
7308                      const char **errs, u8 type, u8 pos)
7309 {
7310         struct tracing_log_err *err;
7311
7312         if (!tr)
7313                 tr = &global_trace;
7314
7315         mutex_lock(&tracing_err_log_lock);
7316         err = get_tracing_log_err(tr);
7317         if (PTR_ERR(err) == -ENOMEM) {
7318                 mutex_unlock(&tracing_err_log_lock);
7319                 return;
7320         }
7321
7322         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7323         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7324
7325         err->info.errs = errs;
7326         err->info.type = type;
7327         err->info.pos = pos;
7328         err->info.ts = local_clock();
7329
7330         list_add_tail(&err->list, &tr->err_log);
7331         mutex_unlock(&tracing_err_log_lock);
7332 }
7333
7334 static void clear_tracing_err_log(struct trace_array *tr)
7335 {
7336         struct tracing_log_err *err, *next;
7337
7338         mutex_lock(&tracing_err_log_lock);
7339         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7340                 list_del(&err->list);
7341                 kfree(err);
7342         }
7343
7344         tr->n_err_log_entries = 0;
7345         mutex_unlock(&tracing_err_log_lock);
7346 }
7347
7348 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7349 {
7350         struct trace_array *tr = m->private;
7351
7352         mutex_lock(&tracing_err_log_lock);
7353
7354         return seq_list_start(&tr->err_log, *pos);
7355 }
7356
7357 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7358 {
7359         struct trace_array *tr = m->private;
7360
7361         return seq_list_next(v, &tr->err_log, pos);
7362 }
7363
7364 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7365 {
7366         mutex_unlock(&tracing_err_log_lock);
7367 }
7368
7369 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7370 {
7371         u8 i;
7372
7373         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7374                 seq_putc(m, ' ');
7375         for (i = 0; i < pos; i++)
7376                 seq_putc(m, ' ');
7377         seq_puts(m, "^\n");
7378 }
7379
7380 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7381 {
7382         struct tracing_log_err *err = v;
7383
7384         if (err) {
7385                 const char *err_text = err->info.errs[err->info.type];
7386                 u64 sec = err->info.ts;
7387                 u32 nsec;
7388
7389                 nsec = do_div(sec, NSEC_PER_SEC);
7390                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7391                            err->loc, err_text);
7392                 seq_printf(m, "%s", err->cmd);
7393                 tracing_err_log_show_pos(m, err->info.pos);
7394         }
7395
7396         return 0;
7397 }
7398
7399 static const struct seq_operations tracing_err_log_seq_ops = {
7400         .start  = tracing_err_log_seq_start,
7401         .next   = tracing_err_log_seq_next,
7402         .stop   = tracing_err_log_seq_stop,
7403         .show   = tracing_err_log_seq_show
7404 };
7405
7406 static int tracing_err_log_open(struct inode *inode, struct file *file)
7407 {
7408         struct trace_array *tr = inode->i_private;
7409         int ret = 0;
7410
7411         ret = tracing_check_open_get_tr(tr);
7412         if (ret)
7413                 return ret;
7414
7415         /* If this file was opened for write, then erase contents */
7416         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7417                 clear_tracing_err_log(tr);
7418
7419         if (file->f_mode & FMODE_READ) {
7420                 ret = seq_open(file, &tracing_err_log_seq_ops);
7421                 if (!ret) {
7422                         struct seq_file *m = file->private_data;
7423                         m->private = tr;
7424                 } else {
7425                         trace_array_put(tr);
7426                 }
7427         }
7428         return ret;
7429 }
7430
7431 static ssize_t tracing_err_log_write(struct file *file,
7432                                      const char __user *buffer,
7433                                      size_t count, loff_t *ppos)
7434 {
7435         return count;
7436 }
7437
7438 static int tracing_err_log_release(struct inode *inode, struct file *file)
7439 {
7440         struct trace_array *tr = inode->i_private;
7441
7442         trace_array_put(tr);
7443
7444         if (file->f_mode & FMODE_READ)
7445                 seq_release(inode, file);
7446
7447         return 0;
7448 }
7449
7450 static const struct file_operations tracing_err_log_fops = {
7451         .open           = tracing_err_log_open,
7452         .write          = tracing_err_log_write,
7453         .read           = seq_read,
7454         .llseek         = seq_lseek,
7455         .release        = tracing_err_log_release,
7456 };
7457
7458 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7459 {
7460         struct trace_array *tr = inode->i_private;
7461         struct ftrace_buffer_info *info;
7462         int ret;
7463
7464         ret = tracing_check_open_get_tr(tr);
7465         if (ret)
7466                 return ret;
7467
7468         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7469         if (!info) {
7470                 trace_array_put(tr);
7471                 return -ENOMEM;
7472         }
7473
7474         mutex_lock(&trace_types_lock);
7475
7476         info->iter.tr           = tr;
7477         info->iter.cpu_file     = tracing_get_cpu(inode);
7478         info->iter.trace        = tr->current_trace;
7479         info->iter.array_buffer = &tr->array_buffer;
7480         info->spare             = NULL;
7481         /* Force reading ring buffer for first read */
7482         info->read              = (unsigned int)-1;
7483
7484         filp->private_data = info;
7485
7486         tr->trace_ref++;
7487
7488         mutex_unlock(&trace_types_lock);
7489
7490         ret = nonseekable_open(inode, filp);
7491         if (ret < 0)
7492                 trace_array_put(tr);
7493
7494         return ret;
7495 }
7496
7497 static __poll_t
7498 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7499 {
7500         struct ftrace_buffer_info *info = filp->private_data;
7501         struct trace_iterator *iter = &info->iter;
7502
7503         return trace_poll(iter, filp, poll_table);
7504 }
7505
7506 static ssize_t
7507 tracing_buffers_read(struct file *filp, char __user *ubuf,
7508                      size_t count, loff_t *ppos)
7509 {
7510         struct ftrace_buffer_info *info = filp->private_data;
7511         struct trace_iterator *iter = &info->iter;
7512         ssize_t ret = 0;
7513         ssize_t size;
7514
7515         if (!count)
7516                 return 0;
7517
7518 #ifdef CONFIG_TRACER_MAX_TRACE
7519         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7520                 return -EBUSY;
7521 #endif
7522
7523         if (!info->spare) {
7524                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7525                                                           iter->cpu_file);
7526                 if (IS_ERR(info->spare)) {
7527                         ret = PTR_ERR(info->spare);
7528                         info->spare = NULL;
7529                 } else {
7530                         info->spare_cpu = iter->cpu_file;
7531                 }
7532         }
7533         if (!info->spare)
7534                 return ret;
7535
7536         /* Do we have previous read data to read? */
7537         if (info->read < PAGE_SIZE)
7538                 goto read;
7539
7540  again:
7541         trace_access_lock(iter->cpu_file);
7542         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7543                                     &info->spare,
7544                                     count,
7545                                     iter->cpu_file, 0);
7546         trace_access_unlock(iter->cpu_file);
7547
7548         if (ret < 0) {
7549                 if (trace_empty(iter)) {
7550                         if ((filp->f_flags & O_NONBLOCK))
7551                                 return -EAGAIN;
7552
7553                         ret = wait_on_pipe(iter, 0);
7554                         if (ret)
7555                                 return ret;
7556
7557                         goto again;
7558                 }
7559                 return 0;
7560         }
7561
7562         info->read = 0;
7563  read:
7564         size = PAGE_SIZE - info->read;
7565         if (size > count)
7566                 size = count;
7567
7568         ret = copy_to_user(ubuf, info->spare + info->read, size);
7569         if (ret == size)
7570                 return -EFAULT;
7571
7572         size -= ret;
7573
7574         *ppos += size;
7575         info->read += size;
7576
7577         return size;
7578 }
7579
7580 static int tracing_buffers_release(struct inode *inode, struct file *file)
7581 {
7582         struct ftrace_buffer_info *info = file->private_data;
7583         struct trace_iterator *iter = &info->iter;
7584
7585         mutex_lock(&trace_types_lock);
7586
7587         iter->tr->trace_ref--;
7588
7589         __trace_array_put(iter->tr);
7590
7591         if (info->spare)
7592                 ring_buffer_free_read_page(iter->array_buffer->buffer,
7593                                            info->spare_cpu, info->spare);
7594         kvfree(info);
7595
7596         mutex_unlock(&trace_types_lock);
7597
7598         return 0;
7599 }
7600
7601 struct buffer_ref {
7602         struct trace_buffer     *buffer;
7603         void                    *page;
7604         int                     cpu;
7605         refcount_t              refcount;
7606 };
7607
7608 static void buffer_ref_release(struct buffer_ref *ref)
7609 {
7610         if (!refcount_dec_and_test(&ref->refcount))
7611                 return;
7612         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7613         kfree(ref);
7614 }
7615
7616 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7617                                     struct pipe_buffer *buf)
7618 {
7619         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7620
7621         buffer_ref_release(ref);
7622         buf->private = 0;
7623 }
7624
7625 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7626                                 struct pipe_buffer *buf)
7627 {
7628         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7629
7630         if (refcount_read(&ref->refcount) > INT_MAX/2)
7631                 return false;
7632
7633         refcount_inc(&ref->refcount);
7634         return true;
7635 }
7636
7637 /* Pipe buffer operations for a buffer. */
7638 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7639         .release                = buffer_pipe_buf_release,
7640         .get                    = buffer_pipe_buf_get,
7641 };
7642
7643 /*
7644  * Callback from splice_to_pipe(), if we need to release some pages
7645  * at the end of the spd in case we error'ed out in filling the pipe.
7646  */
7647 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7648 {
7649         struct buffer_ref *ref =
7650                 (struct buffer_ref *)spd->partial[i].private;
7651
7652         buffer_ref_release(ref);
7653         spd->partial[i].private = 0;
7654 }
7655
7656 static ssize_t
7657 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7658                             struct pipe_inode_info *pipe, size_t len,
7659                             unsigned int flags)
7660 {
7661         struct ftrace_buffer_info *info = file->private_data;
7662         struct trace_iterator *iter = &info->iter;
7663         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7664         struct page *pages_def[PIPE_DEF_BUFFERS];
7665         struct splice_pipe_desc spd = {
7666                 .pages          = pages_def,
7667                 .partial        = partial_def,
7668                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7669                 .ops            = &buffer_pipe_buf_ops,
7670                 .spd_release    = buffer_spd_release,
7671         };
7672         struct buffer_ref *ref;
7673         int entries, i;
7674         ssize_t ret = 0;
7675
7676 #ifdef CONFIG_TRACER_MAX_TRACE
7677         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7678                 return -EBUSY;
7679 #endif
7680
7681         if (*ppos & (PAGE_SIZE - 1))
7682                 return -EINVAL;
7683
7684         if (len & (PAGE_SIZE - 1)) {
7685                 if (len < PAGE_SIZE)
7686                         return -EINVAL;
7687                 len &= PAGE_MASK;
7688         }
7689
7690         if (splice_grow_spd(pipe, &spd))
7691                 return -ENOMEM;
7692
7693  again:
7694         trace_access_lock(iter->cpu_file);
7695         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7696
7697         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7698                 struct page *page;
7699                 int r;
7700
7701                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7702                 if (!ref) {
7703                         ret = -ENOMEM;
7704                         break;
7705                 }
7706
7707                 refcount_set(&ref->refcount, 1);
7708                 ref->buffer = iter->array_buffer->buffer;
7709                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7710                 if (IS_ERR(ref->page)) {
7711                         ret = PTR_ERR(ref->page);
7712                         ref->page = NULL;
7713                         kfree(ref);
7714                         break;
7715                 }
7716                 ref->cpu = iter->cpu_file;
7717
7718                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7719                                           len, iter->cpu_file, 1);
7720                 if (r < 0) {
7721                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7722                                                    ref->page);
7723                         kfree(ref);
7724                         break;
7725                 }
7726
7727                 page = virt_to_page(ref->page);
7728
7729                 spd.pages[i] = page;
7730                 spd.partial[i].len = PAGE_SIZE;
7731                 spd.partial[i].offset = 0;
7732                 spd.partial[i].private = (unsigned long)ref;
7733                 spd.nr_pages++;
7734                 *ppos += PAGE_SIZE;
7735
7736                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7737         }
7738
7739         trace_access_unlock(iter->cpu_file);
7740         spd.nr_pages = i;
7741
7742         /* did we read anything? */
7743         if (!spd.nr_pages) {
7744                 if (ret)
7745                         goto out;
7746
7747                 ret = -EAGAIN;
7748                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7749                         goto out;
7750
7751                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7752                 if (ret)
7753                         goto out;
7754
7755                 goto again;
7756         }
7757
7758         ret = splice_to_pipe(pipe, &spd);
7759 out:
7760         splice_shrink_spd(&spd);
7761
7762         return ret;
7763 }
7764
7765 static const struct file_operations tracing_buffers_fops = {
7766         .open           = tracing_buffers_open,
7767         .read           = tracing_buffers_read,
7768         .poll           = tracing_buffers_poll,
7769         .release        = tracing_buffers_release,
7770         .splice_read    = tracing_buffers_splice_read,
7771         .llseek         = no_llseek,
7772 };
7773
7774 static ssize_t
7775 tracing_stats_read(struct file *filp, char __user *ubuf,
7776                    size_t count, loff_t *ppos)
7777 {
7778         struct inode *inode = file_inode(filp);
7779         struct trace_array *tr = inode->i_private;
7780         struct array_buffer *trace_buf = &tr->array_buffer;
7781         int cpu = tracing_get_cpu(inode);
7782         struct trace_seq *s;
7783         unsigned long cnt;
7784         unsigned long long t;
7785         unsigned long usec_rem;
7786
7787         s = kmalloc(sizeof(*s), GFP_KERNEL);
7788         if (!s)
7789                 return -ENOMEM;
7790
7791         trace_seq_init(s);
7792
7793         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7794         trace_seq_printf(s, "entries: %ld\n", cnt);
7795
7796         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7797         trace_seq_printf(s, "overrun: %ld\n", cnt);
7798
7799         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7800         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7801
7802         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7803         trace_seq_printf(s, "bytes: %ld\n", cnt);
7804
7805         if (trace_clocks[tr->clock_id].in_ns) {
7806                 /* local or global for trace_clock */
7807                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7808                 usec_rem = do_div(t, USEC_PER_SEC);
7809                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7810                                                                 t, usec_rem);
7811
7812                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7813                 usec_rem = do_div(t, USEC_PER_SEC);
7814                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7815         } else {
7816                 /* counter or tsc mode for trace_clock */
7817                 trace_seq_printf(s, "oldest event ts: %llu\n",
7818                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7819
7820                 trace_seq_printf(s, "now ts: %llu\n",
7821                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7822         }
7823
7824         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7825         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7826
7827         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7828         trace_seq_printf(s, "read events: %ld\n", cnt);
7829
7830         count = simple_read_from_buffer(ubuf, count, ppos,
7831                                         s->buffer, trace_seq_used(s));
7832
7833         kfree(s);
7834
7835         return count;
7836 }
7837
7838 static const struct file_operations tracing_stats_fops = {
7839         .open           = tracing_open_generic_tr,
7840         .read           = tracing_stats_read,
7841         .llseek         = generic_file_llseek,
7842         .release        = tracing_release_generic_tr,
7843 };
7844
7845 #ifdef CONFIG_DYNAMIC_FTRACE
7846
7847 static ssize_t
7848 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7849                   size_t cnt, loff_t *ppos)
7850 {
7851         ssize_t ret;
7852         char *buf;
7853         int r;
7854
7855         /* 256 should be plenty to hold the amount needed */
7856         buf = kmalloc(256, GFP_KERNEL);
7857         if (!buf)
7858                 return -ENOMEM;
7859
7860         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7861                       ftrace_update_tot_cnt,
7862                       ftrace_number_of_pages,
7863                       ftrace_number_of_groups);
7864
7865         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7866         kfree(buf);
7867         return ret;
7868 }
7869
7870 static const struct file_operations tracing_dyn_info_fops = {
7871         .open           = tracing_open_generic,
7872         .read           = tracing_read_dyn_info,
7873         .llseek         = generic_file_llseek,
7874 };
7875 #endif /* CONFIG_DYNAMIC_FTRACE */
7876
7877 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7878 static void
7879 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7880                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7881                 void *data)
7882 {
7883         tracing_snapshot_instance(tr);
7884 }
7885
7886 static void
7887 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7888                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7889                       void *data)
7890 {
7891         struct ftrace_func_mapper *mapper = data;
7892         long *count = NULL;
7893
7894         if (mapper)
7895                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7896
7897         if (count) {
7898
7899                 if (*count <= 0)
7900                         return;
7901
7902                 (*count)--;
7903         }
7904
7905         tracing_snapshot_instance(tr);
7906 }
7907
7908 static int
7909 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7910                       struct ftrace_probe_ops *ops, void *data)
7911 {
7912         struct ftrace_func_mapper *mapper = data;
7913         long *count = NULL;
7914
7915         seq_printf(m, "%ps:", (void *)ip);
7916
7917         seq_puts(m, "snapshot");
7918
7919         if (mapper)
7920                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7921
7922         if (count)
7923                 seq_printf(m, ":count=%ld\n", *count);
7924         else
7925                 seq_puts(m, ":unlimited\n");
7926
7927         return 0;
7928 }
7929
7930 static int
7931 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7932                      unsigned long ip, void *init_data, void **data)
7933 {
7934         struct ftrace_func_mapper *mapper = *data;
7935
7936         if (!mapper) {
7937                 mapper = allocate_ftrace_func_mapper();
7938                 if (!mapper)
7939                         return -ENOMEM;
7940                 *data = mapper;
7941         }
7942
7943         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7944 }
7945
7946 static void
7947 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7948                      unsigned long ip, void *data)
7949 {
7950         struct ftrace_func_mapper *mapper = data;
7951
7952         if (!ip) {
7953                 if (!mapper)
7954                         return;
7955                 free_ftrace_func_mapper(mapper, NULL);
7956                 return;
7957         }
7958
7959         ftrace_func_mapper_remove_ip(mapper, ip);
7960 }
7961
7962 static struct ftrace_probe_ops snapshot_probe_ops = {
7963         .func                   = ftrace_snapshot,
7964         .print                  = ftrace_snapshot_print,
7965 };
7966
7967 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7968         .func                   = ftrace_count_snapshot,
7969         .print                  = ftrace_snapshot_print,
7970         .init                   = ftrace_snapshot_init,
7971         .free                   = ftrace_snapshot_free,
7972 };
7973
7974 static int
7975 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7976                                char *glob, char *cmd, char *param, int enable)
7977 {
7978         struct ftrace_probe_ops *ops;
7979         void *count = (void *)-1;
7980         char *number;
7981         int ret;
7982
7983         if (!tr)
7984                 return -ENODEV;
7985
7986         /* hash funcs only work with set_ftrace_filter */
7987         if (!enable)
7988                 return -EINVAL;
7989
7990         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7991
7992         if (glob[0] == '!')
7993                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7994
7995         if (!param)
7996                 goto out_reg;
7997
7998         number = strsep(&param, ":");
7999
8000         if (!strlen(number))
8001                 goto out_reg;
8002
8003         /*
8004          * We use the callback data field (which is a pointer)
8005          * as our counter.
8006          */
8007         ret = kstrtoul(number, 0, (unsigned long *)&count);
8008         if (ret)
8009                 return ret;
8010
8011  out_reg:
8012         ret = tracing_alloc_snapshot_instance(tr);
8013         if (ret < 0)
8014                 goto out;
8015
8016         ret = register_ftrace_function_probe(glob, tr, ops, count);
8017
8018  out:
8019         return ret < 0 ? ret : 0;
8020 }
8021
8022 static struct ftrace_func_command ftrace_snapshot_cmd = {
8023         .name                   = "snapshot",
8024         .func                   = ftrace_trace_snapshot_callback,
8025 };
8026
8027 static __init int register_snapshot_cmd(void)
8028 {
8029         return register_ftrace_command(&ftrace_snapshot_cmd);
8030 }
8031 #else
8032 static inline __init int register_snapshot_cmd(void) { return 0; }
8033 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8034
8035 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8036 {
8037         if (WARN_ON(!tr->dir))
8038                 return ERR_PTR(-ENODEV);
8039
8040         /* Top directory uses NULL as the parent */
8041         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8042                 return NULL;
8043
8044         /* All sub buffers have a descriptor */
8045         return tr->dir;
8046 }
8047
8048 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8049 {
8050         struct dentry *d_tracer;
8051
8052         if (tr->percpu_dir)
8053                 return tr->percpu_dir;
8054
8055         d_tracer = tracing_get_dentry(tr);
8056         if (IS_ERR(d_tracer))
8057                 return NULL;
8058
8059         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8060
8061         MEM_FAIL(!tr->percpu_dir,
8062                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8063
8064         return tr->percpu_dir;
8065 }
8066
8067 static struct dentry *
8068 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8069                       void *data, long cpu, const struct file_operations *fops)
8070 {
8071         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8072
8073         if (ret) /* See tracing_get_cpu() */
8074                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8075         return ret;
8076 }
8077
8078 static void
8079 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8080 {
8081         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8082         struct dentry *d_cpu;
8083         char cpu_dir[30]; /* 30 characters should be more than enough */
8084
8085         if (!d_percpu)
8086                 return;
8087
8088         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8089         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8090         if (!d_cpu) {
8091                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8092                 return;
8093         }
8094
8095         /* per cpu trace_pipe */
8096         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8097                                 tr, cpu, &tracing_pipe_fops);
8098
8099         /* per cpu trace */
8100         trace_create_cpu_file("trace", 0644, d_cpu,
8101                                 tr, cpu, &tracing_fops);
8102
8103         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8104                                 tr, cpu, &tracing_buffers_fops);
8105
8106         trace_create_cpu_file("stats", 0444, d_cpu,
8107                                 tr, cpu, &tracing_stats_fops);
8108
8109         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8110                                 tr, cpu, &tracing_entries_fops);
8111
8112 #ifdef CONFIG_TRACER_SNAPSHOT
8113         trace_create_cpu_file("snapshot", 0644, d_cpu,
8114                                 tr, cpu, &snapshot_fops);
8115
8116         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8117                                 tr, cpu, &snapshot_raw_fops);
8118 #endif
8119 }
8120
8121 #ifdef CONFIG_FTRACE_SELFTEST
8122 /* Let selftest have access to static functions in this file */
8123 #include "trace_selftest.c"
8124 #endif
8125
8126 static ssize_t
8127 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8128                         loff_t *ppos)
8129 {
8130         struct trace_option_dentry *topt = filp->private_data;
8131         char *buf;
8132
8133         if (topt->flags->val & topt->opt->bit)
8134                 buf = "1\n";
8135         else
8136                 buf = "0\n";
8137
8138         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8139 }
8140
8141 static ssize_t
8142 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8143                          loff_t *ppos)
8144 {
8145         struct trace_option_dentry *topt = filp->private_data;
8146         unsigned long val;
8147         int ret;
8148
8149         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8150         if (ret)
8151                 return ret;
8152
8153         if (val != 0 && val != 1)
8154                 return -EINVAL;
8155
8156         if (!!(topt->flags->val & topt->opt->bit) != val) {
8157                 mutex_lock(&trace_types_lock);
8158                 ret = __set_tracer_option(topt->tr, topt->flags,
8159                                           topt->opt, !val);
8160                 mutex_unlock(&trace_types_lock);
8161                 if (ret)
8162                         return ret;
8163         }
8164
8165         *ppos += cnt;
8166
8167         return cnt;
8168 }
8169
8170
8171 static const struct file_operations trace_options_fops = {
8172         .open = tracing_open_generic,
8173         .read = trace_options_read,
8174         .write = trace_options_write,
8175         .llseek = generic_file_llseek,
8176 };
8177
8178 /*
8179  * In order to pass in both the trace_array descriptor as well as the index
8180  * to the flag that the trace option file represents, the trace_array
8181  * has a character array of trace_flags_index[], which holds the index
8182  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8183  * The address of this character array is passed to the flag option file
8184  * read/write callbacks.
8185  *
8186  * In order to extract both the index and the trace_array descriptor,
8187  * get_tr_index() uses the following algorithm.
8188  *
8189  *   idx = *ptr;
8190  *
8191  * As the pointer itself contains the address of the index (remember
8192  * index[1] == 1).
8193  *
8194  * Then to get the trace_array descriptor, by subtracting that index
8195  * from the ptr, we get to the start of the index itself.
8196  *
8197  *   ptr - idx == &index[0]
8198  *
8199  * Then a simple container_of() from that pointer gets us to the
8200  * trace_array descriptor.
8201  */
8202 static void get_tr_index(void *data, struct trace_array **ptr,
8203                          unsigned int *pindex)
8204 {
8205         *pindex = *(unsigned char *)data;
8206
8207         *ptr = container_of(data - *pindex, struct trace_array,
8208                             trace_flags_index);
8209 }
8210
8211 static ssize_t
8212 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8213                         loff_t *ppos)
8214 {
8215         void *tr_index = filp->private_data;
8216         struct trace_array *tr;
8217         unsigned int index;
8218         char *buf;
8219
8220         get_tr_index(tr_index, &tr, &index);
8221
8222         if (tr->trace_flags & (1 << index))
8223                 buf = "1\n";
8224         else
8225                 buf = "0\n";
8226
8227         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8228 }
8229
8230 static ssize_t
8231 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8232                          loff_t *ppos)
8233 {
8234         void *tr_index = filp->private_data;
8235         struct trace_array *tr;
8236         unsigned int index;
8237         unsigned long val;
8238         int ret;
8239
8240         get_tr_index(tr_index, &tr, &index);
8241
8242         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8243         if (ret)
8244                 return ret;
8245
8246         if (val != 0 && val != 1)
8247                 return -EINVAL;
8248
8249         mutex_lock(&event_mutex);
8250         mutex_lock(&trace_types_lock);
8251         ret = set_tracer_flag(tr, 1 << index, val);
8252         mutex_unlock(&trace_types_lock);
8253         mutex_unlock(&event_mutex);
8254
8255         if (ret < 0)
8256                 return ret;
8257
8258         *ppos += cnt;
8259
8260         return cnt;
8261 }
8262
8263 static const struct file_operations trace_options_core_fops = {
8264         .open = tracing_open_generic,
8265         .read = trace_options_core_read,
8266         .write = trace_options_core_write,
8267         .llseek = generic_file_llseek,
8268 };
8269
8270 struct dentry *trace_create_file(const char *name,
8271                                  umode_t mode,
8272                                  struct dentry *parent,
8273                                  void *data,
8274                                  const struct file_operations *fops)
8275 {
8276         struct dentry *ret;
8277
8278         ret = tracefs_create_file(name, mode, parent, data, fops);
8279         if (!ret)
8280                 pr_warn("Could not create tracefs '%s' entry\n", name);
8281
8282         return ret;
8283 }
8284
8285
8286 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8287 {
8288         struct dentry *d_tracer;
8289
8290         if (tr->options)
8291                 return tr->options;
8292
8293         d_tracer = tracing_get_dentry(tr);
8294         if (IS_ERR(d_tracer))
8295                 return NULL;
8296
8297         tr->options = tracefs_create_dir("options", d_tracer);
8298         if (!tr->options) {
8299                 pr_warn("Could not create tracefs directory 'options'\n");
8300                 return NULL;
8301         }
8302
8303         return tr->options;
8304 }
8305
8306 static void
8307 create_trace_option_file(struct trace_array *tr,
8308                          struct trace_option_dentry *topt,
8309                          struct tracer_flags *flags,
8310                          struct tracer_opt *opt)
8311 {
8312         struct dentry *t_options;
8313
8314         t_options = trace_options_init_dentry(tr);
8315         if (!t_options)
8316                 return;
8317
8318         topt->flags = flags;
8319         topt->opt = opt;
8320         topt->tr = tr;
8321
8322         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8323                                     &trace_options_fops);
8324
8325 }
8326
8327 static void
8328 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8329 {
8330         struct trace_option_dentry *topts;
8331         struct trace_options *tr_topts;
8332         struct tracer_flags *flags;
8333         struct tracer_opt *opts;
8334         int cnt;
8335         int i;
8336
8337         if (!tracer)
8338                 return;
8339
8340         flags = tracer->flags;
8341
8342         if (!flags || !flags->opts)
8343                 return;
8344
8345         /*
8346          * If this is an instance, only create flags for tracers
8347          * the instance may have.
8348          */
8349         if (!trace_ok_for_array(tracer, tr))
8350                 return;
8351
8352         for (i = 0; i < tr->nr_topts; i++) {
8353                 /* Make sure there's no duplicate flags. */
8354                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8355                         return;
8356         }
8357
8358         opts = flags->opts;
8359
8360         for (cnt = 0; opts[cnt].name; cnt++)
8361                 ;
8362
8363         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8364         if (!topts)
8365                 return;
8366
8367         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8368                             GFP_KERNEL);
8369         if (!tr_topts) {
8370                 kfree(topts);
8371                 return;
8372         }
8373
8374         tr->topts = tr_topts;
8375         tr->topts[tr->nr_topts].tracer = tracer;
8376         tr->topts[tr->nr_topts].topts = topts;
8377         tr->nr_topts++;
8378
8379         for (cnt = 0; opts[cnt].name; cnt++) {
8380                 create_trace_option_file(tr, &topts[cnt], flags,
8381                                          &opts[cnt]);
8382                 MEM_FAIL(topts[cnt].entry == NULL,
8383                           "Failed to create trace option: %s",
8384                           opts[cnt].name);
8385         }
8386 }
8387
8388 static struct dentry *
8389 create_trace_option_core_file(struct trace_array *tr,
8390                               const char *option, long index)
8391 {
8392         struct dentry *t_options;
8393
8394         t_options = trace_options_init_dentry(tr);
8395         if (!t_options)
8396                 return NULL;
8397
8398         return trace_create_file(option, 0644, t_options,
8399                                  (void *)&tr->trace_flags_index[index],
8400                                  &trace_options_core_fops);
8401 }
8402
8403 static void create_trace_options_dir(struct trace_array *tr)
8404 {
8405         struct dentry *t_options;
8406         bool top_level = tr == &global_trace;
8407         int i;
8408
8409         t_options = trace_options_init_dentry(tr);
8410         if (!t_options)
8411                 return;
8412
8413         for (i = 0; trace_options[i]; i++) {
8414                 if (top_level ||
8415                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8416                         create_trace_option_core_file(tr, trace_options[i], i);
8417         }
8418 }
8419
8420 static ssize_t
8421 rb_simple_read(struct file *filp, char __user *ubuf,
8422                size_t cnt, loff_t *ppos)
8423 {
8424         struct trace_array *tr = filp->private_data;
8425         char buf[64];
8426         int r;
8427
8428         r = tracer_tracing_is_on(tr);
8429         r = sprintf(buf, "%d\n", r);
8430
8431         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8432 }
8433
8434 static ssize_t
8435 rb_simple_write(struct file *filp, const char __user *ubuf,
8436                 size_t cnt, loff_t *ppos)
8437 {
8438         struct trace_array *tr = filp->private_data;
8439         struct trace_buffer *buffer = tr->array_buffer.buffer;
8440         unsigned long val;
8441         int ret;
8442
8443         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8444         if (ret)
8445                 return ret;
8446
8447         if (buffer) {
8448                 mutex_lock(&trace_types_lock);
8449                 if (!!val == tracer_tracing_is_on(tr)) {
8450                         val = 0; /* do nothing */
8451                 } else if (val) {
8452                         tracer_tracing_on(tr);
8453                         if (tr->current_trace->start)
8454                                 tr->current_trace->start(tr);
8455                 } else {
8456                         tracer_tracing_off(tr);
8457                         if (tr->current_trace->stop)
8458                                 tr->current_trace->stop(tr);
8459                 }
8460                 mutex_unlock(&trace_types_lock);
8461         }
8462
8463         (*ppos)++;
8464
8465         return cnt;
8466 }
8467
8468 static const struct file_operations rb_simple_fops = {
8469         .open           = tracing_open_generic_tr,
8470         .read           = rb_simple_read,
8471         .write          = rb_simple_write,
8472         .release        = tracing_release_generic_tr,
8473         .llseek         = default_llseek,
8474 };
8475
8476 static ssize_t
8477 buffer_percent_read(struct file *filp, char __user *ubuf,
8478                     size_t cnt, loff_t *ppos)
8479 {
8480         struct trace_array *tr = filp->private_data;
8481         char buf[64];
8482         int r;
8483
8484         r = tr->buffer_percent;
8485         r = sprintf(buf, "%d\n", r);
8486
8487         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8488 }
8489
8490 static ssize_t
8491 buffer_percent_write(struct file *filp, const char __user *ubuf,
8492                      size_t cnt, loff_t *ppos)
8493 {
8494         struct trace_array *tr = filp->private_data;
8495         unsigned long val;
8496         int ret;
8497
8498         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8499         if (ret)
8500                 return ret;
8501
8502         if (val > 100)
8503                 return -EINVAL;
8504
8505         if (!val)
8506                 val = 1;
8507
8508         tr->buffer_percent = val;
8509
8510         (*ppos)++;
8511
8512         return cnt;
8513 }
8514
8515 static const struct file_operations buffer_percent_fops = {
8516         .open           = tracing_open_generic_tr,
8517         .read           = buffer_percent_read,
8518         .write          = buffer_percent_write,
8519         .release        = tracing_release_generic_tr,
8520         .llseek         = default_llseek,
8521 };
8522
8523 static struct dentry *trace_instance_dir;
8524
8525 static void
8526 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8527
8528 static int
8529 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8530 {
8531         enum ring_buffer_flags rb_flags;
8532
8533         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8534
8535         buf->tr = tr;
8536
8537         buf->buffer = ring_buffer_alloc(size, rb_flags);
8538         if (!buf->buffer)
8539                 return -ENOMEM;
8540
8541         buf->data = alloc_percpu(struct trace_array_cpu);
8542         if (!buf->data) {
8543                 ring_buffer_free(buf->buffer);
8544                 buf->buffer = NULL;
8545                 return -ENOMEM;
8546         }
8547
8548         /* Allocate the first page for all buffers */
8549         set_buffer_entries(&tr->array_buffer,
8550                            ring_buffer_size(tr->array_buffer.buffer, 0));
8551
8552         return 0;
8553 }
8554
8555 static int allocate_trace_buffers(struct trace_array *tr, int size)
8556 {
8557         int ret;
8558
8559         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8560         if (ret)
8561                 return ret;
8562
8563 #ifdef CONFIG_TRACER_MAX_TRACE
8564         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8565                                     allocate_snapshot ? size : 1);
8566         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8567                 ring_buffer_free(tr->array_buffer.buffer);
8568                 tr->array_buffer.buffer = NULL;
8569                 free_percpu(tr->array_buffer.data);
8570                 tr->array_buffer.data = NULL;
8571                 return -ENOMEM;
8572         }
8573         tr->allocated_snapshot = allocate_snapshot;
8574
8575         /*
8576          * Only the top level trace array gets its snapshot allocated
8577          * from the kernel command line.
8578          */
8579         allocate_snapshot = false;
8580 #endif
8581
8582         return 0;
8583 }
8584
8585 static void free_trace_buffer(struct array_buffer *buf)
8586 {
8587         if (buf->buffer) {
8588                 ring_buffer_free(buf->buffer);
8589                 buf->buffer = NULL;
8590                 free_percpu(buf->data);
8591                 buf->data = NULL;
8592         }
8593 }
8594
8595 static void free_trace_buffers(struct trace_array *tr)
8596 {
8597         if (!tr)
8598                 return;
8599
8600         free_trace_buffer(&tr->array_buffer);
8601
8602 #ifdef CONFIG_TRACER_MAX_TRACE
8603         free_trace_buffer(&tr->max_buffer);
8604 #endif
8605 }
8606
8607 static void init_trace_flags_index(struct trace_array *tr)
8608 {
8609         int i;
8610
8611         /* Used by the trace options files */
8612         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8613                 tr->trace_flags_index[i] = i;
8614 }
8615
8616 static void __update_tracer_options(struct trace_array *tr)
8617 {
8618         struct tracer *t;
8619
8620         for (t = trace_types; t; t = t->next)
8621                 add_tracer_options(tr, t);
8622 }
8623
8624 static void update_tracer_options(struct trace_array *tr)
8625 {
8626         mutex_lock(&trace_types_lock);
8627         __update_tracer_options(tr);
8628         mutex_unlock(&trace_types_lock);
8629 }
8630
8631 /* Must have trace_types_lock held */
8632 struct trace_array *trace_array_find(const char *instance)
8633 {
8634         struct trace_array *tr, *found = NULL;
8635
8636         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8637                 if (tr->name && strcmp(tr->name, instance) == 0) {
8638                         found = tr;
8639                         break;
8640                 }
8641         }
8642
8643         return found;
8644 }
8645
8646 struct trace_array *trace_array_find_get(const char *instance)
8647 {
8648         struct trace_array *tr;
8649
8650         mutex_lock(&trace_types_lock);
8651         tr = trace_array_find(instance);
8652         if (tr)
8653                 tr->ref++;
8654         mutex_unlock(&trace_types_lock);
8655
8656         return tr;
8657 }
8658
8659 static struct trace_array *trace_array_create(const char *name)
8660 {
8661         struct trace_array *tr;
8662         int ret;
8663
8664         ret = -ENOMEM;
8665         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8666         if (!tr)
8667                 return ERR_PTR(ret);
8668
8669         tr->name = kstrdup(name, GFP_KERNEL);
8670         if (!tr->name)
8671                 goto out_free_tr;
8672
8673         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8674                 goto out_free_tr;
8675
8676         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8677
8678         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8679
8680         raw_spin_lock_init(&tr->start_lock);
8681
8682         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8683
8684         tr->current_trace = &nop_trace;
8685
8686         INIT_LIST_HEAD(&tr->systems);
8687         INIT_LIST_HEAD(&tr->events);
8688         INIT_LIST_HEAD(&tr->hist_vars);
8689         INIT_LIST_HEAD(&tr->err_log);
8690
8691         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8692                 goto out_free_tr;
8693
8694         tr->dir = tracefs_create_dir(name, trace_instance_dir);
8695         if (!tr->dir)
8696                 goto out_free_tr;
8697
8698         ret = event_trace_add_tracer(tr->dir, tr);
8699         if (ret) {
8700                 tracefs_remove(tr->dir);
8701                 goto out_free_tr;
8702         }
8703
8704         ftrace_init_trace_array(tr);
8705
8706         init_tracer_tracefs(tr, tr->dir);
8707         init_trace_flags_index(tr);
8708         __update_tracer_options(tr);
8709
8710         list_add(&tr->list, &ftrace_trace_arrays);
8711
8712         tr->ref++;
8713
8714
8715         return tr;
8716
8717  out_free_tr:
8718         free_trace_buffers(tr);
8719         free_cpumask_var(tr->tracing_cpumask);
8720         kfree(tr->name);
8721         kfree(tr);
8722
8723         return ERR_PTR(ret);
8724 }
8725
8726 static int instance_mkdir(const char *name)
8727 {
8728         struct trace_array *tr;
8729         int ret;
8730
8731         mutex_lock(&event_mutex);
8732         mutex_lock(&trace_types_lock);
8733
8734         ret = -EEXIST;
8735         if (trace_array_find(name))
8736                 goto out_unlock;
8737
8738         tr = trace_array_create(name);
8739
8740         ret = PTR_ERR_OR_ZERO(tr);
8741
8742 out_unlock:
8743         mutex_unlock(&trace_types_lock);
8744         mutex_unlock(&event_mutex);
8745         return ret;
8746 }
8747
8748 /**
8749  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8750  * @name: The name of the trace array to be looked up/created.
8751  *
8752  * Returns pointer to trace array with given name.
8753  * NULL, if it cannot be created.
8754  *
8755  * NOTE: This function increments the reference counter associated with the
8756  * trace array returned. This makes sure it cannot be freed while in use.
8757  * Use trace_array_put() once the trace array is no longer needed.
8758  * If the trace_array is to be freed, trace_array_destroy() needs to
8759  * be called after the trace_array_put(), or simply let user space delete
8760  * it from the tracefs instances directory. But until the
8761  * trace_array_put() is called, user space can not delete it.
8762  *
8763  */
8764 struct trace_array *trace_array_get_by_name(const char *name)
8765 {
8766         struct trace_array *tr;
8767
8768         mutex_lock(&event_mutex);
8769         mutex_lock(&trace_types_lock);
8770
8771         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8772                 if (tr->name && strcmp(tr->name, name) == 0)
8773                         goto out_unlock;
8774         }
8775
8776         tr = trace_array_create(name);
8777
8778         if (IS_ERR(tr))
8779                 tr = NULL;
8780 out_unlock:
8781         if (tr)
8782                 tr->ref++;
8783
8784         mutex_unlock(&trace_types_lock);
8785         mutex_unlock(&event_mutex);
8786         return tr;
8787 }
8788 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8789
8790 static int __remove_instance(struct trace_array *tr)
8791 {
8792         int i;
8793
8794         /* Reference counter for a newly created trace array = 1. */
8795         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8796                 return -EBUSY;
8797
8798         list_del(&tr->list);
8799
8800         /* Disable all the flags that were enabled coming in */
8801         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8802                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8803                         set_tracer_flag(tr, 1 << i, 0);
8804         }
8805
8806         tracing_set_nop(tr);
8807         clear_ftrace_function_probes(tr);
8808         event_trace_del_tracer(tr);
8809         ftrace_clear_pids(tr);
8810         ftrace_destroy_function_files(tr);
8811         tracefs_remove(tr->dir);
8812         free_trace_buffers(tr);
8813
8814         for (i = 0; i < tr->nr_topts; i++) {
8815                 kfree(tr->topts[i].topts);
8816         }
8817         kfree(tr->topts);
8818
8819         free_cpumask_var(tr->tracing_cpumask);
8820         kfree(tr->name);
8821         kfree(tr);
8822         tr = NULL;
8823
8824         return 0;
8825 }
8826
8827 int trace_array_destroy(struct trace_array *this_tr)
8828 {
8829         struct trace_array *tr;
8830         int ret;
8831
8832         if (!this_tr)
8833                 return -EINVAL;
8834
8835         mutex_lock(&event_mutex);
8836         mutex_lock(&trace_types_lock);
8837
8838         ret = -ENODEV;
8839
8840         /* Making sure trace array exists before destroying it. */
8841         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8842                 if (tr == this_tr) {
8843                         ret = __remove_instance(tr);
8844                         break;
8845                 }
8846         }
8847
8848         mutex_unlock(&trace_types_lock);
8849         mutex_unlock(&event_mutex);
8850
8851         return ret;
8852 }
8853 EXPORT_SYMBOL_GPL(trace_array_destroy);
8854
8855 static int instance_rmdir(const char *name)
8856 {
8857         struct trace_array *tr;
8858         int ret;
8859
8860         mutex_lock(&event_mutex);
8861         mutex_lock(&trace_types_lock);
8862
8863         ret = -ENODEV;
8864         tr = trace_array_find(name);
8865         if (tr)
8866                 ret = __remove_instance(tr);
8867
8868         mutex_unlock(&trace_types_lock);
8869         mutex_unlock(&event_mutex);
8870
8871         return ret;
8872 }
8873
8874 static __init void create_trace_instances(struct dentry *d_tracer)
8875 {
8876         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8877                                                          instance_mkdir,
8878                                                          instance_rmdir);
8879         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8880                 return;
8881 }
8882
8883 static void
8884 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8885 {
8886         struct trace_event_file *file;
8887         int cpu;
8888
8889         trace_create_file("available_tracers", 0444, d_tracer,
8890                         tr, &show_traces_fops);
8891
8892         trace_create_file("current_tracer", 0644, d_tracer,
8893                         tr, &set_tracer_fops);
8894
8895         trace_create_file("tracing_cpumask", 0644, d_tracer,
8896                           tr, &tracing_cpumask_fops);
8897
8898         trace_create_file("trace_options", 0644, d_tracer,
8899                           tr, &tracing_iter_fops);
8900
8901         trace_create_file("trace", 0644, d_tracer,
8902                           tr, &tracing_fops);
8903
8904         trace_create_file("trace_pipe", 0444, d_tracer,
8905                           tr, &tracing_pipe_fops);
8906
8907         trace_create_file("buffer_size_kb", 0644, d_tracer,
8908                           tr, &tracing_entries_fops);
8909
8910         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8911                           tr, &tracing_total_entries_fops);
8912
8913         trace_create_file("free_buffer", 0200, d_tracer,
8914                           tr, &tracing_free_buffer_fops);
8915
8916         trace_create_file("trace_marker", 0220, d_tracer,
8917                           tr, &tracing_mark_fops);
8918
8919         file = __find_event_file(tr, "ftrace", "print");
8920         if (file && file->dir)
8921                 trace_create_file("trigger", 0644, file->dir, file,
8922                                   &event_trigger_fops);
8923         tr->trace_marker_file = file;
8924
8925         trace_create_file("trace_marker_raw", 0220, d_tracer,
8926                           tr, &tracing_mark_raw_fops);
8927
8928         trace_create_file("trace_clock", 0644, d_tracer, tr,
8929                           &trace_clock_fops);
8930
8931         trace_create_file("tracing_on", 0644, d_tracer,
8932                           tr, &rb_simple_fops);
8933
8934         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8935                           &trace_time_stamp_mode_fops);
8936
8937         tr->buffer_percent = 50;
8938
8939         trace_create_file("buffer_percent", 0444, d_tracer,
8940                         tr, &buffer_percent_fops);
8941
8942         create_trace_options_dir(tr);
8943
8944 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8945         trace_create_maxlat_file(tr, d_tracer);
8946 #endif
8947
8948         if (ftrace_create_function_files(tr, d_tracer))
8949                 MEM_FAIL(1, "Could not allocate function filter files");
8950
8951 #ifdef CONFIG_TRACER_SNAPSHOT
8952         trace_create_file("snapshot", 0644, d_tracer,
8953                           tr, &snapshot_fops);
8954 #endif
8955
8956         trace_create_file("error_log", 0644, d_tracer,
8957                           tr, &tracing_err_log_fops);
8958
8959         for_each_tracing_cpu(cpu)
8960                 tracing_init_tracefs_percpu(tr, cpu);
8961
8962         ftrace_init_tracefs(tr, d_tracer);
8963 }
8964
8965 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8966 {
8967         struct vfsmount *mnt;
8968         struct file_system_type *type;
8969
8970         /*
8971          * To maintain backward compatibility for tools that mount
8972          * debugfs to get to the tracing facility, tracefs is automatically
8973          * mounted to the debugfs/tracing directory.
8974          */
8975         type = get_fs_type("tracefs");
8976         if (!type)
8977                 return NULL;
8978         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8979         put_filesystem(type);
8980         if (IS_ERR(mnt))
8981                 return NULL;
8982         mntget(mnt);
8983
8984         return mnt;
8985 }
8986
8987 /**
8988  * tracing_init_dentry - initialize top level trace array
8989  *
8990  * This is called when creating files or directories in the tracing
8991  * directory. It is called via fs_initcall() by any of the boot up code
8992  * and expects to return the dentry of the top level tracing directory.
8993  */
8994 struct dentry *tracing_init_dentry(void)
8995 {
8996         struct trace_array *tr = &global_trace;
8997
8998         if (security_locked_down(LOCKDOWN_TRACEFS)) {
8999                 pr_warn("Tracing disabled due to lockdown\n");
9000                 return ERR_PTR(-EPERM);
9001         }
9002
9003         /* The top level trace array uses  NULL as parent */
9004         if (tr->dir)
9005                 return NULL;
9006
9007         if (WARN_ON(!tracefs_initialized()))
9008                 return ERR_PTR(-ENODEV);
9009
9010         /*
9011          * As there may still be users that expect the tracing
9012          * files to exist in debugfs/tracing, we must automount
9013          * the tracefs file system there, so older tools still
9014          * work with the newer kerenl.
9015          */
9016         tr->dir = debugfs_create_automount("tracing", NULL,
9017                                            trace_automount, NULL);
9018
9019         return NULL;
9020 }
9021
9022 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9023 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9024
9025 static void __init trace_eval_init(void)
9026 {
9027         int len;
9028
9029         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9030         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9031 }
9032
9033 #ifdef CONFIG_MODULES
9034 static void trace_module_add_evals(struct module *mod)
9035 {
9036         if (!mod->num_trace_evals)
9037                 return;
9038
9039         /*
9040          * Modules with bad taint do not have events created, do
9041          * not bother with enums either.
9042          */
9043         if (trace_module_has_bad_taint(mod))
9044                 return;
9045
9046         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9047 }
9048
9049 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9050 static void trace_module_remove_evals(struct module *mod)
9051 {
9052         union trace_eval_map_item *map;
9053         union trace_eval_map_item **last = &trace_eval_maps;
9054
9055         if (!mod->num_trace_evals)
9056                 return;
9057
9058         mutex_lock(&trace_eval_mutex);
9059
9060         map = trace_eval_maps;
9061
9062         while (map) {
9063                 if (map->head.mod == mod)
9064                         break;
9065                 map = trace_eval_jmp_to_tail(map);
9066                 last = &map->tail.next;
9067                 map = map->tail.next;
9068         }
9069         if (!map)
9070                 goto out;
9071
9072         *last = trace_eval_jmp_to_tail(map)->tail.next;
9073         kfree(map);
9074  out:
9075         mutex_unlock(&trace_eval_mutex);
9076 }
9077 #else
9078 static inline void trace_module_remove_evals(struct module *mod) { }
9079 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9080
9081 static int trace_module_notify(struct notifier_block *self,
9082                                unsigned long val, void *data)
9083 {
9084         struct module *mod = data;
9085
9086         switch (val) {
9087         case MODULE_STATE_COMING:
9088                 trace_module_add_evals(mod);
9089                 break;
9090         case MODULE_STATE_GOING:
9091                 trace_module_remove_evals(mod);
9092                 break;
9093         }
9094
9095         return 0;
9096 }
9097
9098 static struct notifier_block trace_module_nb = {
9099         .notifier_call = trace_module_notify,
9100         .priority = 0,
9101 };
9102 #endif /* CONFIG_MODULES */
9103
9104 static __init int tracer_init_tracefs(void)
9105 {
9106         struct dentry *d_tracer;
9107
9108         trace_access_lock_init();
9109
9110         d_tracer = tracing_init_dentry();
9111         if (IS_ERR(d_tracer))
9112                 return 0;
9113
9114         event_trace_init();
9115
9116         init_tracer_tracefs(&global_trace, d_tracer);
9117         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
9118
9119         trace_create_file("tracing_thresh", 0644, d_tracer,
9120                         &global_trace, &tracing_thresh_fops);
9121
9122         trace_create_file("README", 0444, d_tracer,
9123                         NULL, &tracing_readme_fops);
9124
9125         trace_create_file("saved_cmdlines", 0444, d_tracer,
9126                         NULL, &tracing_saved_cmdlines_fops);
9127
9128         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
9129                           NULL, &tracing_saved_cmdlines_size_fops);
9130
9131         trace_create_file("saved_tgids", 0444, d_tracer,
9132                         NULL, &tracing_saved_tgids_fops);
9133
9134         trace_eval_init();
9135
9136         trace_create_eval_file(d_tracer);
9137
9138 #ifdef CONFIG_MODULES
9139         register_module_notifier(&trace_module_nb);
9140 #endif
9141
9142 #ifdef CONFIG_DYNAMIC_FTRACE
9143         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
9144                         NULL, &tracing_dyn_info_fops);
9145 #endif
9146
9147         create_trace_instances(d_tracer);
9148
9149         update_tracer_options(&global_trace);
9150
9151         return 0;
9152 }
9153
9154 static int trace_panic_handler(struct notifier_block *this,
9155                                unsigned long event, void *unused)
9156 {
9157         if (ftrace_dump_on_oops)
9158                 ftrace_dump(ftrace_dump_on_oops);
9159         return NOTIFY_OK;
9160 }
9161
9162 static struct notifier_block trace_panic_notifier = {
9163         .notifier_call  = trace_panic_handler,
9164         .next           = NULL,
9165         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9166 };
9167
9168 static int trace_die_handler(struct notifier_block *self,
9169                              unsigned long val,
9170                              void *data)
9171 {
9172         switch (val) {
9173         case DIE_OOPS:
9174                 if (ftrace_dump_on_oops)
9175                         ftrace_dump(ftrace_dump_on_oops);
9176                 break;
9177         default:
9178                 break;
9179         }
9180         return NOTIFY_OK;
9181 }
9182
9183 static struct notifier_block trace_die_notifier = {
9184         .notifier_call = trace_die_handler,
9185         .priority = 200
9186 };
9187
9188 /*
9189  * printk is set to max of 1024, we really don't need it that big.
9190  * Nothing should be printing 1000 characters anyway.
9191  */
9192 #define TRACE_MAX_PRINT         1000
9193
9194 /*
9195  * Define here KERN_TRACE so that we have one place to modify
9196  * it if we decide to change what log level the ftrace dump
9197  * should be at.
9198  */
9199 #define KERN_TRACE              KERN_EMERG
9200
9201 void
9202 trace_printk_seq(struct trace_seq *s)
9203 {
9204         /* Probably should print a warning here. */
9205         if (s->seq.len >= TRACE_MAX_PRINT)
9206                 s->seq.len = TRACE_MAX_PRINT;
9207
9208         /*
9209          * More paranoid code. Although the buffer size is set to
9210          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9211          * an extra layer of protection.
9212          */
9213         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9214                 s->seq.len = s->seq.size - 1;
9215
9216         /* should be zero ended, but we are paranoid. */
9217         s->buffer[s->seq.len] = 0;
9218
9219         printk(KERN_TRACE "%s", s->buffer);
9220
9221         trace_seq_init(s);
9222 }
9223
9224 void trace_init_global_iter(struct trace_iterator *iter)
9225 {
9226         iter->tr = &global_trace;
9227         iter->trace = iter->tr->current_trace;
9228         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9229         iter->array_buffer = &global_trace.array_buffer;
9230
9231         if (iter->trace && iter->trace->open)
9232                 iter->trace->open(iter);
9233
9234         /* Annotate start of buffers if we had overruns */
9235         if (ring_buffer_overruns(iter->array_buffer->buffer))
9236                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9237
9238         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9239         if (trace_clocks[iter->tr->clock_id].in_ns)
9240                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9241 }
9242
9243 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9244 {
9245         /* use static because iter can be a bit big for the stack */
9246         static struct trace_iterator iter;
9247         static atomic_t dump_running;
9248         struct trace_array *tr = &global_trace;
9249         unsigned int old_userobj;
9250         unsigned long flags;
9251         int cnt = 0, cpu;
9252
9253         /* Only allow one dump user at a time. */
9254         if (atomic_inc_return(&dump_running) != 1) {
9255                 atomic_dec(&dump_running);
9256                 return;
9257         }
9258
9259         /*
9260          * Always turn off tracing when we dump.
9261          * We don't need to show trace output of what happens
9262          * between multiple crashes.
9263          *
9264          * If the user does a sysrq-z, then they can re-enable
9265          * tracing with echo 1 > tracing_on.
9266          */
9267         tracing_off();
9268
9269         local_irq_save(flags);
9270         printk_nmi_direct_enter();
9271
9272         /* Simulate the iterator */
9273         trace_init_global_iter(&iter);
9274         /* Can not use kmalloc for iter.temp */
9275         iter.temp = static_temp_buf;
9276         iter.temp_size = STATIC_TEMP_BUF_SIZE;
9277
9278         for_each_tracing_cpu(cpu) {
9279                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9280         }
9281
9282         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9283
9284         /* don't look at user memory in panic mode */
9285         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9286
9287         switch (oops_dump_mode) {
9288         case DUMP_ALL:
9289                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9290                 break;
9291         case DUMP_ORIG:
9292                 iter.cpu_file = raw_smp_processor_id();
9293                 break;
9294         case DUMP_NONE:
9295                 goto out_enable;
9296         default:
9297                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9298                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9299         }
9300
9301         printk(KERN_TRACE "Dumping ftrace buffer:\n");
9302
9303         /* Did function tracer already get disabled? */
9304         if (ftrace_is_dead()) {
9305                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9306                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9307         }
9308
9309         /*
9310          * We need to stop all tracing on all CPUS to read the
9311          * the next buffer. This is a bit expensive, but is
9312          * not done often. We fill all what we can read,
9313          * and then release the locks again.
9314          */
9315
9316         while (!trace_empty(&iter)) {
9317
9318                 if (!cnt)
9319                         printk(KERN_TRACE "---------------------------------\n");
9320
9321                 cnt++;
9322
9323                 trace_iterator_reset(&iter);
9324                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9325
9326                 if (trace_find_next_entry_inc(&iter) != NULL) {
9327                         int ret;
9328
9329                         ret = print_trace_line(&iter);
9330                         if (ret != TRACE_TYPE_NO_CONSUME)
9331                                 trace_consume(&iter);
9332                 }
9333                 touch_nmi_watchdog();
9334
9335                 trace_printk_seq(&iter.seq);
9336         }
9337
9338         if (!cnt)
9339                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
9340         else
9341                 printk(KERN_TRACE "---------------------------------\n");
9342
9343  out_enable:
9344         tr->trace_flags |= old_userobj;
9345
9346         for_each_tracing_cpu(cpu) {
9347                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9348         }
9349         atomic_dec(&dump_running);
9350         printk_nmi_direct_exit();
9351         local_irq_restore(flags);
9352 }
9353 EXPORT_SYMBOL_GPL(ftrace_dump);
9354
9355 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9356 {
9357         char **argv;
9358         int argc, ret;
9359
9360         argc = 0;
9361         ret = 0;
9362         argv = argv_split(GFP_KERNEL, buf, &argc);
9363         if (!argv)
9364                 return -ENOMEM;
9365
9366         if (argc)
9367                 ret = createfn(argc, argv);
9368
9369         argv_free(argv);
9370
9371         return ret;
9372 }
9373
9374 #define WRITE_BUFSIZE  4096
9375
9376 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9377                                 size_t count, loff_t *ppos,
9378                                 int (*createfn)(int, char **))
9379 {
9380         char *kbuf, *buf, *tmp;
9381         int ret = 0;
9382         size_t done = 0;
9383         size_t size;
9384
9385         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9386         if (!kbuf)
9387                 return -ENOMEM;
9388
9389         while (done < count) {
9390                 size = count - done;
9391
9392                 if (size >= WRITE_BUFSIZE)
9393                         size = WRITE_BUFSIZE - 1;
9394
9395                 if (copy_from_user(kbuf, buffer + done, size)) {
9396                         ret = -EFAULT;
9397                         goto out;
9398                 }
9399                 kbuf[size] = '\0';
9400                 buf = kbuf;
9401                 do {
9402                         tmp = strchr(buf, '\n');
9403                         if (tmp) {
9404                                 *tmp = '\0';
9405                                 size = tmp - buf + 1;
9406                         } else {
9407                                 size = strlen(buf);
9408                                 if (done + size < count) {
9409                                         if (buf != kbuf)
9410                                                 break;
9411                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9412                                         pr_warn("Line length is too long: Should be less than %d\n",
9413                                                 WRITE_BUFSIZE - 2);
9414                                         ret = -EINVAL;
9415                                         goto out;
9416                                 }
9417                         }
9418                         done += size;
9419
9420                         /* Remove comments */
9421                         tmp = strchr(buf, '#');
9422
9423                         if (tmp)
9424                                 *tmp = '\0';
9425
9426                         ret = trace_run_command(buf, createfn);
9427                         if (ret)
9428                                 goto out;
9429                         buf += size;
9430
9431                 } while (done < count);
9432         }
9433         ret = done;
9434
9435 out:
9436         kfree(kbuf);
9437
9438         return ret;
9439 }
9440
9441 __init static int tracer_alloc_buffers(void)
9442 {
9443         int ring_buf_size;
9444         int ret = -ENOMEM;
9445
9446
9447         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9448                 pr_warn("Tracing disabled due to lockdown\n");
9449                 return -EPERM;
9450         }
9451
9452         /*
9453          * Make sure we don't accidently add more trace options
9454          * than we have bits for.
9455          */
9456         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9457
9458         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9459                 goto out;
9460
9461         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9462                 goto out_free_buffer_mask;
9463
9464         /* Only allocate trace_printk buffers if a trace_printk exists */
9465         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9466                 /* Must be called before global_trace.buffer is allocated */
9467                 trace_printk_init_buffers();
9468
9469         /* To save memory, keep the ring buffer size to its minimum */
9470         if (ring_buffer_expanded)
9471                 ring_buf_size = trace_buf_size;
9472         else
9473                 ring_buf_size = 1;
9474
9475         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9476         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9477
9478         raw_spin_lock_init(&global_trace.start_lock);
9479
9480         /*
9481          * The prepare callbacks allocates some memory for the ring buffer. We
9482          * don't free the buffer if the if the CPU goes down. If we were to free
9483          * the buffer, then the user would lose any trace that was in the
9484          * buffer. The memory will be removed once the "instance" is removed.
9485          */
9486         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9487                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9488                                       NULL);
9489         if (ret < 0)
9490                 goto out_free_cpumask;
9491         /* Used for event triggers */
9492         ret = -ENOMEM;
9493         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9494         if (!temp_buffer)
9495                 goto out_rm_hp_state;
9496
9497         if (trace_create_savedcmd() < 0)
9498                 goto out_free_temp_buffer;
9499
9500         /* TODO: make the number of buffers hot pluggable with CPUS */
9501         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9502                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9503                 goto out_free_savedcmd;
9504         }
9505
9506         if (global_trace.buffer_disabled)
9507                 tracing_off();
9508
9509         if (trace_boot_clock) {
9510                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9511                 if (ret < 0)
9512                         pr_warn("Trace clock %s not defined, going back to default\n",
9513                                 trace_boot_clock);
9514         }
9515
9516         /*
9517          * register_tracer() might reference current_trace, so it
9518          * needs to be set before we register anything. This is
9519          * just a bootstrap of current_trace anyway.
9520          */
9521         global_trace.current_trace = &nop_trace;
9522
9523         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9524
9525         ftrace_init_global_array_ops(&global_trace);
9526
9527         init_trace_flags_index(&global_trace);
9528
9529         register_tracer(&nop_trace);
9530
9531         /* Function tracing may start here (via kernel command line) */
9532         init_function_trace();
9533
9534         /* All seems OK, enable tracing */
9535         tracing_disabled = 0;
9536
9537         atomic_notifier_chain_register(&panic_notifier_list,
9538                                        &trace_panic_notifier);
9539
9540         register_die_notifier(&trace_die_notifier);
9541
9542         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9543
9544         INIT_LIST_HEAD(&global_trace.systems);
9545         INIT_LIST_HEAD(&global_trace.events);
9546         INIT_LIST_HEAD(&global_trace.hist_vars);
9547         INIT_LIST_HEAD(&global_trace.err_log);
9548         list_add(&global_trace.list, &ftrace_trace_arrays);
9549
9550         apply_trace_boot_options();
9551
9552         register_snapshot_cmd();
9553
9554         return 0;
9555
9556 out_free_savedcmd:
9557         free_saved_cmdlines_buffer(savedcmd);
9558 out_free_temp_buffer:
9559         ring_buffer_free(temp_buffer);
9560 out_rm_hp_state:
9561         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9562 out_free_cpumask:
9563         free_cpumask_var(global_trace.tracing_cpumask);
9564 out_free_buffer_mask:
9565         free_cpumask_var(tracing_buffer_mask);
9566 out:
9567         return ret;
9568 }
9569
9570 void __init early_trace_init(void)
9571 {
9572         if (tracepoint_printk) {
9573                 tracepoint_print_iter =
9574                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9575                 if (MEM_FAIL(!tracepoint_print_iter,
9576                              "Failed to allocate trace iterator\n"))
9577                         tracepoint_printk = 0;
9578                 else
9579                         static_key_enable(&tracepoint_printk_key.key);
9580         }
9581         tracer_alloc_buffers();
9582 }
9583
9584 void __init trace_init(void)
9585 {
9586         trace_event_init();
9587 }
9588
9589 __init static int clear_boot_tracer(void)
9590 {
9591         /*
9592          * The default tracer at boot buffer is an init section.
9593          * This function is called in lateinit. If we did not
9594          * find the boot tracer, then clear it out, to prevent
9595          * later registration from accessing the buffer that is
9596          * about to be freed.
9597          */
9598         if (!default_bootup_tracer)
9599                 return 0;
9600
9601         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9602                default_bootup_tracer);
9603         default_bootup_tracer = NULL;
9604
9605         return 0;
9606 }
9607
9608 fs_initcall(tracer_init_tracefs);
9609 late_initcall_sync(clear_boot_tracer);
9610
9611 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9612 __init static int tracing_set_default_clock(void)
9613 {
9614         /* sched_clock_stable() is determined in late_initcall */
9615         if (!trace_boot_clock && !sched_clock_stable()) {
9616                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9617                         pr_warn("Can not set tracing clock due to lockdown\n");
9618                         return -EPERM;
9619                 }
9620
9621                 printk(KERN_WARNING
9622                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9623                        "If you want to keep using the local clock, then add:\n"
9624                        "  \"trace_clock=local\"\n"
9625                        "on the kernel command line\n");
9626                 tracing_set_clock(&global_trace, "global");
9627         }
9628
9629         return 0;
9630 }
9631 late_initcall_sync(tracing_set_default_clock);
9632 #endif