1 // SPDX-License-Identifier: GPL-2.0
3 * ring buffer based function tracer
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
54 #include "trace_output.h"
57 * On boot up, the ring buffer is set to the minimum size, so that
58 * we do not waste memory on systems that are not using tracing.
60 bool ring_buffer_expanded;
63 * We need to change this state when a selftest is running.
64 * A selftest will lurk into the ring-buffer to count the
65 * entries inserted during the selftest although some concurrent
66 * insertions into the ring-buffer such as trace_printk could occurred
67 * at the same time, giving false positive or negative results.
69 static bool __read_mostly tracing_selftest_running;
72 * If boot-time tracing including tracers/events via kernel cmdline
73 * is running, we do not want to run SELFTEST.
75 bool __read_mostly tracing_selftest_disabled;
77 #ifdef CONFIG_FTRACE_STARTUP_TEST
78 void __init disable_tracing_selftest(const char *reason)
80 if (!tracing_selftest_disabled) {
81 tracing_selftest_disabled = true;
82 pr_info("Ftrace startup test is disabled due to %s\n", reason);
87 /* Pipe tracepoints to printk */
88 struct trace_iterator *tracepoint_print_iter;
89 int tracepoint_printk;
90 static bool tracepoint_printk_stop_on_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
105 * To prevent the comm cache from being overwritten when no
106 * tracing is active, only save the comm when a trace event
109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
112 * Kill all tracing for good (never come back).
113 * It is initialized to 1 but will turn to zero if the initialization
114 * of the tracer is successful. But that is the only place that sets
117 static int tracing_disabled = 1;
119 cpumask_var_t __read_mostly tracing_buffer_mask;
122 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
124 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125 * is set, then ftrace_dump is called. This will output the contents
126 * of the ftrace buffers to the console. This is very useful for
127 * capturing traces that lead to crashes and outputing it to a
130 * It is default off, but you can enable it with either specifying
131 * "ftrace_dump_on_oops" in the kernel command line, or setting
132 * /proc/sys/kernel/ftrace_dump_on_oops
133 * Set 1 if you want to dump buffers of all CPUs
134 * Set 2 if you want to dump the buffer of the CPU that triggered oops
137 enum ftrace_dump_mode ftrace_dump_on_oops;
139 /* When set, tracing will stop when a WARN*() is hit */
140 int __disable_trace_on_warning;
142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
143 /* Map of enums to their values, for "eval_map" file */
144 struct trace_eval_map_head {
146 unsigned long length;
149 union trace_eval_map_item;
151 struct trace_eval_map_tail {
153 * "end" is first and points to NULL as it must be different
154 * than "mod" or "eval_string"
156 union trace_eval_map_item *next;
157 const char *end; /* points to NULL */
160 static DEFINE_MUTEX(trace_eval_mutex);
163 * The trace_eval_maps are saved in an array with two extra elements,
164 * one at the beginning, and one at the end. The beginning item contains
165 * the count of the saved maps (head.length), and the module they
166 * belong to if not built in (head.mod). The ending item contains a
167 * pointer to the next array of saved eval_map items.
169 union trace_eval_map_item {
170 struct trace_eval_map map;
171 struct trace_eval_map_head head;
172 struct trace_eval_map_tail tail;
175 static union trace_eval_map_item *trace_eval_maps;
176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
179 static void ftrace_trace_userstack(struct trace_array *tr,
180 struct trace_buffer *buffer,
181 unsigned int trace_ctx);
183 #define MAX_TRACER_SIZE 100
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
190 static int __init set_cmdline_ftrace(char *str)
192 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
193 default_bootup_tracer = bootup_tracer_buf;
194 /* We are using ftrace early, expand it */
195 ring_buffer_expanded = true;
198 __setup("ftrace=", set_cmdline_ftrace);
200 static int __init set_ftrace_dump_on_oops(char *str)
202 if (*str++ != '=' || !*str || !strcmp("1", str)) {
203 ftrace_dump_on_oops = DUMP_ALL;
207 if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
208 ftrace_dump_on_oops = DUMP_ORIG;
214 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
216 static int __init stop_trace_on_warning(char *str)
218 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
219 __disable_trace_on_warning = 1;
222 __setup("traceoff_on_warning", stop_trace_on_warning);
224 static int __init boot_alloc_snapshot(char *str)
226 allocate_snapshot = true;
227 /* We also need the main ring buffer expanded */
228 ring_buffer_expanded = true;
231 __setup("alloc_snapshot", boot_alloc_snapshot);
234 static int __init boot_snapshot(char *str)
236 snapshot_at_boot = true;
237 boot_alloc_snapshot(str);
240 __setup("ftrace_boot_snapshot", boot_snapshot);
243 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
245 static int __init set_trace_boot_options(char *str)
247 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
250 __setup("trace_options=", set_trace_boot_options);
252 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
253 static char *trace_boot_clock __initdata;
255 static int __init set_trace_boot_clock(char *str)
257 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
258 trace_boot_clock = trace_boot_clock_buf;
261 __setup("trace_clock=", set_trace_boot_clock);
263 static int __init set_tracepoint_printk(char *str)
265 /* Ignore the "tp_printk_stop_on_boot" param */
269 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
270 tracepoint_printk = 1;
273 __setup("tp_printk", set_tracepoint_printk);
275 static int __init set_tracepoint_printk_stop(char *str)
277 tracepoint_printk_stop_on_boot = true;
280 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
282 unsigned long long ns2usecs(u64 nsec)
290 trace_process_export(struct trace_export *export,
291 struct ring_buffer_event *event, int flag)
293 struct trace_entry *entry;
294 unsigned int size = 0;
296 if (export->flags & flag) {
297 entry = ring_buffer_event_data(event);
298 size = ring_buffer_event_length(event);
299 export->write(export, entry, size);
303 static DEFINE_MUTEX(ftrace_export_lock);
305 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
307 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
308 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
309 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
311 static inline void ftrace_exports_enable(struct trace_export *export)
313 if (export->flags & TRACE_EXPORT_FUNCTION)
314 static_branch_inc(&trace_function_exports_enabled);
316 if (export->flags & TRACE_EXPORT_EVENT)
317 static_branch_inc(&trace_event_exports_enabled);
319 if (export->flags & TRACE_EXPORT_MARKER)
320 static_branch_inc(&trace_marker_exports_enabled);
323 static inline void ftrace_exports_disable(struct trace_export *export)
325 if (export->flags & TRACE_EXPORT_FUNCTION)
326 static_branch_dec(&trace_function_exports_enabled);
328 if (export->flags & TRACE_EXPORT_EVENT)
329 static_branch_dec(&trace_event_exports_enabled);
331 if (export->flags & TRACE_EXPORT_MARKER)
332 static_branch_dec(&trace_marker_exports_enabled);
335 static void ftrace_exports(struct ring_buffer_event *event, int flag)
337 struct trace_export *export;
339 preempt_disable_notrace();
341 export = rcu_dereference_raw_check(ftrace_exports_list);
343 trace_process_export(export, event, flag);
344 export = rcu_dereference_raw_check(export->next);
347 preempt_enable_notrace();
351 add_trace_export(struct trace_export **list, struct trace_export *export)
353 rcu_assign_pointer(export->next, *list);
355 * We are entering export into the list but another
356 * CPU might be walking that list. We need to make sure
357 * the export->next pointer is valid before another CPU sees
358 * the export pointer included into the list.
360 rcu_assign_pointer(*list, export);
364 rm_trace_export(struct trace_export **list, struct trace_export *export)
366 struct trace_export **p;
368 for (p = list; *p != NULL; p = &(*p)->next)
375 rcu_assign_pointer(*p, (*p)->next);
381 add_ftrace_export(struct trace_export **list, struct trace_export *export)
383 ftrace_exports_enable(export);
385 add_trace_export(list, export);
389 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
393 ret = rm_trace_export(list, export);
394 ftrace_exports_disable(export);
399 int register_ftrace_export(struct trace_export *export)
401 if (WARN_ON_ONCE(!export->write))
404 mutex_lock(&ftrace_export_lock);
406 add_ftrace_export(&ftrace_exports_list, export);
408 mutex_unlock(&ftrace_export_lock);
412 EXPORT_SYMBOL_GPL(register_ftrace_export);
414 int unregister_ftrace_export(struct trace_export *export)
418 mutex_lock(&ftrace_export_lock);
420 ret = rm_ftrace_export(&ftrace_exports_list, export);
422 mutex_unlock(&ftrace_export_lock);
426 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
428 /* trace_flags holds trace_options default values */
429 #define TRACE_DEFAULT_FLAGS \
430 (FUNCTION_DEFAULT_FLAGS | \
431 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
432 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
433 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
434 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
437 /* trace_options that are only supported by global_trace */
438 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
439 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
441 /* trace_flags that are default zero for instances */
442 #define ZEROED_TRACE_FLAGS \
443 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
446 * The global_trace is the descriptor that holds the top-level tracing
447 * buffers for the live tracing.
449 static struct trace_array global_trace = {
450 .trace_flags = TRACE_DEFAULT_FLAGS,
453 LIST_HEAD(ftrace_trace_arrays);
455 int trace_array_get(struct trace_array *this_tr)
457 struct trace_array *tr;
460 mutex_lock(&trace_types_lock);
461 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
468 mutex_unlock(&trace_types_lock);
473 static void __trace_array_put(struct trace_array *this_tr)
475 WARN_ON(!this_tr->ref);
480 * trace_array_put - Decrement the reference counter for this trace array.
481 * @this_tr : pointer to the trace array
483 * NOTE: Use this when we no longer need the trace array returned by
484 * trace_array_get_by_name(). This ensures the trace array can be later
488 void trace_array_put(struct trace_array *this_tr)
493 mutex_lock(&trace_types_lock);
494 __trace_array_put(this_tr);
495 mutex_unlock(&trace_types_lock);
497 EXPORT_SYMBOL_GPL(trace_array_put);
499 int tracing_check_open_get_tr(struct trace_array *tr)
503 ret = security_locked_down(LOCKDOWN_TRACEFS);
507 if (tracing_disabled)
510 if (tr && trace_array_get(tr) < 0)
516 int call_filter_check_discard(struct trace_event_call *call, void *rec,
517 struct trace_buffer *buffer,
518 struct ring_buffer_event *event)
520 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
521 !filter_match_preds(call->filter, rec)) {
522 __trace_event_discard_commit(buffer, event);
530 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
531 * @filtered_pids: The list of pids to check
532 * @search_pid: The PID to find in @filtered_pids
534 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
537 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
539 return trace_pid_list_is_set(filtered_pids, search_pid);
543 * trace_ignore_this_task - should a task be ignored for tracing
544 * @filtered_pids: The list of pids to check
545 * @filtered_no_pids: The list of pids not to be traced
546 * @task: The task that should be ignored if not filtered
548 * Checks if @task should be traced or not from @filtered_pids.
549 * Returns true if @task should *NOT* be traced.
550 * Returns false if @task should be traced.
553 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
554 struct trace_pid_list *filtered_no_pids,
555 struct task_struct *task)
558 * If filtered_no_pids is not empty, and the task's pid is listed
559 * in filtered_no_pids, then return true.
560 * Otherwise, if filtered_pids is empty, that means we can
561 * trace all tasks. If it has content, then only trace pids
562 * within filtered_pids.
565 return (filtered_pids &&
566 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
568 trace_find_filtered_pid(filtered_no_pids, task->pid));
572 * trace_filter_add_remove_task - Add or remove a task from a pid_list
573 * @pid_list: The list to modify
574 * @self: The current task for fork or NULL for exit
575 * @task: The task to add or remove
577 * If adding a task, if @self is defined, the task is only added if @self
578 * is also included in @pid_list. This happens on fork and tasks should
579 * only be added when the parent is listed. If @self is NULL, then the
580 * @task pid will be removed from the list, which would happen on exit
583 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
584 struct task_struct *self,
585 struct task_struct *task)
590 /* For forks, we only add if the forking task is listed */
592 if (!trace_find_filtered_pid(pid_list, self->pid))
596 /* "self" is set for forks, and NULL for exits */
598 trace_pid_list_set(pid_list, task->pid);
600 trace_pid_list_clear(pid_list, task->pid);
604 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
605 * @pid_list: The pid list to show
606 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
607 * @pos: The position of the file
609 * This is used by the seq_file "next" operation to iterate the pids
610 * listed in a trace_pid_list structure.
612 * Returns the pid+1 as we want to display pid of zero, but NULL would
613 * stop the iteration.
615 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
617 long pid = (unsigned long)v;
622 /* pid already is +1 of the actual previous bit */
623 if (trace_pid_list_next(pid_list, pid, &next) < 0)
628 /* Return pid + 1 to allow zero to be represented */
629 return (void *)(pid + 1);
633 * trace_pid_start - Used for seq_file to start reading pid lists
634 * @pid_list: The pid list to show
635 * @pos: The position of the file
637 * This is used by seq_file "start" operation to start the iteration
640 * Returns the pid+1 as we want to display pid of zero, but NULL would
641 * stop the iteration.
643 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
649 if (trace_pid_list_first(pid_list, &first) < 0)
654 /* Return pid + 1 so that zero can be the exit value */
655 for (pid++; pid && l < *pos;
656 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
662 * trace_pid_show - show the current pid in seq_file processing
663 * @m: The seq_file structure to write into
664 * @v: A void pointer of the pid (+1) value to display
666 * Can be directly used by seq_file operations to display the current
669 int trace_pid_show(struct seq_file *m, void *v)
671 unsigned long pid = (unsigned long)v - 1;
673 seq_printf(m, "%lu\n", pid);
677 /* 128 should be much more than enough */
678 #define PID_BUF_SIZE 127
680 int trace_pid_write(struct trace_pid_list *filtered_pids,
681 struct trace_pid_list **new_pid_list,
682 const char __user *ubuf, size_t cnt)
684 struct trace_pid_list *pid_list;
685 struct trace_parser parser;
693 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
697 * Always recreate a new array. The write is an all or nothing
698 * operation. Always create a new array when adding new pids by
699 * the user. If the operation fails, then the current list is
702 pid_list = trace_pid_list_alloc();
704 trace_parser_put(&parser);
709 /* copy the current bits to the new max */
710 ret = trace_pid_list_first(filtered_pids, &pid);
712 trace_pid_list_set(pid_list, pid);
713 ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
723 ret = trace_get_user(&parser, ubuf, cnt, &pos);
731 if (!trace_parser_loaded(&parser))
735 if (kstrtoul(parser.buffer, 0, &val))
740 if (trace_pid_list_set(pid_list, pid) < 0) {
746 trace_parser_clear(&parser);
749 trace_parser_put(&parser);
752 trace_pid_list_free(pid_list);
757 /* Cleared the list of pids */
758 trace_pid_list_free(pid_list);
762 *new_pid_list = pid_list;
767 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
771 /* Early boot up does not have a buffer yet */
773 return trace_clock_local();
775 ts = ring_buffer_time_stamp(buf->buffer);
776 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
781 u64 ftrace_now(int cpu)
783 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
787 * tracing_is_enabled - Show if global_trace has been enabled
789 * Shows if the global trace has been enabled or not. It uses the
790 * mirror flag "buffer_disabled" to be used in fast paths such as for
791 * the irqsoff tracer. But it may be inaccurate due to races. If you
792 * need to know the accurate state, use tracing_is_on() which is a little
793 * slower, but accurate.
795 int tracing_is_enabled(void)
798 * For quick access (irqsoff uses this in fast path), just
799 * return the mirror variable of the state of the ring buffer.
800 * It's a little racy, but we don't really care.
803 return !global_trace.buffer_disabled;
807 * trace_buf_size is the size in bytes that is allocated
808 * for a buffer. Note, the number of bytes is always rounded
811 * This number is purposely set to a low number of 16384.
812 * If the dump on oops happens, it will be much appreciated
813 * to not have to wait for all that output. Anyway this can be
814 * boot time and run time configurable.
816 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
818 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
820 /* trace_types holds a link list of available tracers. */
821 static struct tracer *trace_types __read_mostly;
824 * trace_types_lock is used to protect the trace_types list.
826 DEFINE_MUTEX(trace_types_lock);
829 * serialize the access of the ring buffer
831 * ring buffer serializes readers, but it is low level protection.
832 * The validity of the events (which returns by ring_buffer_peek() ..etc)
833 * are not protected by ring buffer.
835 * The content of events may become garbage if we allow other process consumes
836 * these events concurrently:
837 * A) the page of the consumed events may become a normal page
838 * (not reader page) in ring buffer, and this page will be rewritten
839 * by events producer.
840 * B) The page of the consumed events may become a page for splice_read,
841 * and this page will be returned to system.
843 * These primitives allow multi process access to different cpu ring buffer
846 * These primitives don't distinguish read-only and read-consume access.
847 * Multi read-only access are also serialized.
851 static DECLARE_RWSEM(all_cpu_access_lock);
852 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
854 static inline void trace_access_lock(int cpu)
856 if (cpu == RING_BUFFER_ALL_CPUS) {
857 /* gain it for accessing the whole ring buffer. */
858 down_write(&all_cpu_access_lock);
860 /* gain it for accessing a cpu ring buffer. */
862 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
863 down_read(&all_cpu_access_lock);
865 /* Secondly block other access to this @cpu ring buffer. */
866 mutex_lock(&per_cpu(cpu_access_lock, cpu));
870 static inline void trace_access_unlock(int cpu)
872 if (cpu == RING_BUFFER_ALL_CPUS) {
873 up_write(&all_cpu_access_lock);
875 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
876 up_read(&all_cpu_access_lock);
880 static inline void trace_access_lock_init(void)
884 for_each_possible_cpu(cpu)
885 mutex_init(&per_cpu(cpu_access_lock, cpu));
890 static DEFINE_MUTEX(access_lock);
892 static inline void trace_access_lock(int cpu)
895 mutex_lock(&access_lock);
898 static inline void trace_access_unlock(int cpu)
901 mutex_unlock(&access_lock);
904 static inline void trace_access_lock_init(void)
910 #ifdef CONFIG_STACKTRACE
911 static void __ftrace_trace_stack(struct trace_buffer *buffer,
912 unsigned int trace_ctx,
913 int skip, struct pt_regs *regs);
914 static inline void ftrace_trace_stack(struct trace_array *tr,
915 struct trace_buffer *buffer,
916 unsigned int trace_ctx,
917 int skip, struct pt_regs *regs);
920 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
921 unsigned int trace_ctx,
922 int skip, struct pt_regs *regs)
925 static inline void ftrace_trace_stack(struct trace_array *tr,
926 struct trace_buffer *buffer,
927 unsigned long trace_ctx,
928 int skip, struct pt_regs *regs)
934 static __always_inline void
935 trace_event_setup(struct ring_buffer_event *event,
936 int type, unsigned int trace_ctx)
938 struct trace_entry *ent = ring_buffer_event_data(event);
940 tracing_generic_entry_update(ent, type, trace_ctx);
943 static __always_inline struct ring_buffer_event *
944 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
947 unsigned int trace_ctx)
949 struct ring_buffer_event *event;
951 event = ring_buffer_lock_reserve(buffer, len);
953 trace_event_setup(event, type, trace_ctx);
958 void tracer_tracing_on(struct trace_array *tr)
960 if (tr->array_buffer.buffer)
961 ring_buffer_record_on(tr->array_buffer.buffer);
963 * This flag is looked at when buffers haven't been allocated
964 * yet, or by some tracers (like irqsoff), that just want to
965 * know if the ring buffer has been disabled, but it can handle
966 * races of where it gets disabled but we still do a record.
967 * As the check is in the fast path of the tracers, it is more
968 * important to be fast than accurate.
970 tr->buffer_disabled = 0;
971 /* Make the flag seen by readers */
976 * tracing_on - enable tracing buffers
978 * This function enables tracing buffers that may have been
979 * disabled with tracing_off.
981 void tracing_on(void)
983 tracer_tracing_on(&global_trace);
985 EXPORT_SYMBOL_GPL(tracing_on);
988 static __always_inline void
989 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
991 __this_cpu_write(trace_taskinfo_save, true);
993 /* If this is the temp buffer, we need to commit fully */
994 if (this_cpu_read(trace_buffered_event) == event) {
995 /* Length is in event->array[0] */
996 ring_buffer_write(buffer, event->array[0], &event->array[1]);
997 /* Release the temp buffer */
998 this_cpu_dec(trace_buffered_event_cnt);
999 /* ring_buffer_unlock_commit() enables preemption */
1000 preempt_enable_notrace();
1002 ring_buffer_unlock_commit(buffer, event);
1006 * __trace_puts - write a constant string into the trace buffer.
1007 * @ip: The address of the caller
1008 * @str: The constant string to write
1009 * @size: The size of the string.
1011 int __trace_puts(unsigned long ip, const char *str, int size)
1013 struct ring_buffer_event *event;
1014 struct trace_buffer *buffer;
1015 struct print_entry *entry;
1016 unsigned int trace_ctx;
1019 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1022 if (unlikely(tracing_selftest_running || tracing_disabled))
1025 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1027 trace_ctx = tracing_gen_ctx();
1028 buffer = global_trace.array_buffer.buffer;
1029 ring_buffer_nest_start(buffer);
1030 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1037 entry = ring_buffer_event_data(event);
1040 memcpy(&entry->buf, str, size);
1042 /* Add a newline if necessary */
1043 if (entry->buf[size - 1] != '\n') {
1044 entry->buf[size] = '\n';
1045 entry->buf[size + 1] = '\0';
1047 entry->buf[size] = '\0';
1049 __buffer_unlock_commit(buffer, event);
1050 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1052 ring_buffer_nest_end(buffer);
1055 EXPORT_SYMBOL_GPL(__trace_puts);
1058 * __trace_bputs - write the pointer to a constant string into trace buffer
1059 * @ip: The address of the caller
1060 * @str: The constant string to write to the buffer to
1062 int __trace_bputs(unsigned long ip, const char *str)
1064 struct ring_buffer_event *event;
1065 struct trace_buffer *buffer;
1066 struct bputs_entry *entry;
1067 unsigned int trace_ctx;
1068 int size = sizeof(struct bputs_entry);
1071 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1074 if (unlikely(tracing_selftest_running || tracing_disabled))
1077 trace_ctx = tracing_gen_ctx();
1078 buffer = global_trace.array_buffer.buffer;
1080 ring_buffer_nest_start(buffer);
1081 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1086 entry = ring_buffer_event_data(event);
1090 __buffer_unlock_commit(buffer, event);
1091 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1095 ring_buffer_nest_end(buffer);
1098 EXPORT_SYMBOL_GPL(__trace_bputs);
1100 #ifdef CONFIG_TRACER_SNAPSHOT
1101 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1104 struct tracer *tracer = tr->current_trace;
1105 unsigned long flags;
1108 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1109 internal_trace_puts("*** snapshot is being ignored ***\n");
1113 if (!tr->allocated_snapshot) {
1114 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1115 internal_trace_puts("*** stopping trace here! ***\n");
1120 /* Note, snapshot can not be used when the tracer uses it */
1121 if (tracer->use_max_tr) {
1122 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1123 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1127 local_irq_save(flags);
1128 update_max_tr(tr, current, smp_processor_id(), cond_data);
1129 local_irq_restore(flags);
1132 void tracing_snapshot_instance(struct trace_array *tr)
1134 tracing_snapshot_instance_cond(tr, NULL);
1138 * tracing_snapshot - take a snapshot of the current buffer.
1140 * This causes a swap between the snapshot buffer and the current live
1141 * tracing buffer. You can use this to take snapshots of the live
1142 * trace when some condition is triggered, but continue to trace.
1144 * Note, make sure to allocate the snapshot with either
1145 * a tracing_snapshot_alloc(), or by doing it manually
1146 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1148 * If the snapshot buffer is not allocated, it will stop tracing.
1149 * Basically making a permanent snapshot.
1151 void tracing_snapshot(void)
1153 struct trace_array *tr = &global_trace;
1155 tracing_snapshot_instance(tr);
1157 EXPORT_SYMBOL_GPL(tracing_snapshot);
1160 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1161 * @tr: The tracing instance to snapshot
1162 * @cond_data: The data to be tested conditionally, and possibly saved
1164 * This is the same as tracing_snapshot() except that the snapshot is
1165 * conditional - the snapshot will only happen if the
1166 * cond_snapshot.update() implementation receiving the cond_data
1167 * returns true, which means that the trace array's cond_snapshot
1168 * update() operation used the cond_data to determine whether the
1169 * snapshot should be taken, and if it was, presumably saved it along
1170 * with the snapshot.
1172 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1174 tracing_snapshot_instance_cond(tr, cond_data);
1176 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1179 * tracing_cond_snapshot_data - get the user data associated with a snapshot
1180 * @tr: The tracing instance
1182 * When the user enables a conditional snapshot using
1183 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1184 * with the snapshot. This accessor is used to retrieve it.
1186 * Should not be called from cond_snapshot.update(), since it takes
1187 * the tr->max_lock lock, which the code calling
1188 * cond_snapshot.update() has already done.
1190 * Returns the cond_data associated with the trace array's snapshot.
1192 void *tracing_cond_snapshot_data(struct trace_array *tr)
1194 void *cond_data = NULL;
1196 local_irq_disable();
1197 arch_spin_lock(&tr->max_lock);
1199 if (tr->cond_snapshot)
1200 cond_data = tr->cond_snapshot->cond_data;
1202 arch_spin_unlock(&tr->max_lock);
1207 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1209 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1210 struct array_buffer *size_buf, int cpu_id);
1211 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1213 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1217 if (!tr->allocated_snapshot) {
1219 /* allocate spare buffer */
1220 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1221 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1225 tr->allocated_snapshot = true;
1231 static void free_snapshot(struct trace_array *tr)
1234 * We don't free the ring buffer. instead, resize it because
1235 * The max_tr ring buffer has some state (e.g. ring->clock) and
1236 * we want preserve it.
1238 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1239 set_buffer_entries(&tr->max_buffer, 1);
1240 tracing_reset_online_cpus(&tr->max_buffer);
1241 tr->allocated_snapshot = false;
1245 * tracing_alloc_snapshot - allocate snapshot buffer.
1247 * This only allocates the snapshot buffer if it isn't already
1248 * allocated - it doesn't also take a snapshot.
1250 * This is meant to be used in cases where the snapshot buffer needs
1251 * to be set up for events that can't sleep but need to be able to
1252 * trigger a snapshot.
1254 int tracing_alloc_snapshot(void)
1256 struct trace_array *tr = &global_trace;
1259 ret = tracing_alloc_snapshot_instance(tr);
1264 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1267 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1269 * This is similar to tracing_snapshot(), but it will allocate the
1270 * snapshot buffer if it isn't already allocated. Use this only
1271 * where it is safe to sleep, as the allocation may sleep.
1273 * This causes a swap between the snapshot buffer and the current live
1274 * tracing buffer. You can use this to take snapshots of the live
1275 * trace when some condition is triggered, but continue to trace.
1277 void tracing_snapshot_alloc(void)
1281 ret = tracing_alloc_snapshot();
1287 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1290 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1291 * @tr: The tracing instance
1292 * @cond_data: User data to associate with the snapshot
1293 * @update: Implementation of the cond_snapshot update function
1295 * Check whether the conditional snapshot for the given instance has
1296 * already been enabled, or if the current tracer is already using a
1297 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1298 * save the cond_data and update function inside.
1300 * Returns 0 if successful, error otherwise.
1302 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1303 cond_update_fn_t update)
1305 struct cond_snapshot *cond_snapshot;
1308 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1312 cond_snapshot->cond_data = cond_data;
1313 cond_snapshot->update = update;
1315 mutex_lock(&trace_types_lock);
1317 ret = tracing_alloc_snapshot_instance(tr);
1321 if (tr->current_trace->use_max_tr) {
1327 * The cond_snapshot can only change to NULL without the
1328 * trace_types_lock. We don't care if we race with it going
1329 * to NULL, but we want to make sure that it's not set to
1330 * something other than NULL when we get here, which we can
1331 * do safely with only holding the trace_types_lock and not
1332 * having to take the max_lock.
1334 if (tr->cond_snapshot) {
1339 local_irq_disable();
1340 arch_spin_lock(&tr->max_lock);
1341 tr->cond_snapshot = cond_snapshot;
1342 arch_spin_unlock(&tr->max_lock);
1345 mutex_unlock(&trace_types_lock);
1350 mutex_unlock(&trace_types_lock);
1351 kfree(cond_snapshot);
1354 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1357 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1358 * @tr: The tracing instance
1360 * Check whether the conditional snapshot for the given instance is
1361 * enabled; if so, free the cond_snapshot associated with it,
1362 * otherwise return -EINVAL.
1364 * Returns 0 if successful, error otherwise.
1366 int tracing_snapshot_cond_disable(struct trace_array *tr)
1370 local_irq_disable();
1371 arch_spin_lock(&tr->max_lock);
1373 if (!tr->cond_snapshot)
1376 kfree(tr->cond_snapshot);
1377 tr->cond_snapshot = NULL;
1380 arch_spin_unlock(&tr->max_lock);
1385 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1387 void tracing_snapshot(void)
1389 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1391 EXPORT_SYMBOL_GPL(tracing_snapshot);
1392 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1394 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1396 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1397 int tracing_alloc_snapshot(void)
1399 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1402 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1403 void tracing_snapshot_alloc(void)
1408 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1409 void *tracing_cond_snapshot_data(struct trace_array *tr)
1413 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1414 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1418 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1419 int tracing_snapshot_cond_disable(struct trace_array *tr)
1423 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1424 #endif /* CONFIG_TRACER_SNAPSHOT */
1426 void tracer_tracing_off(struct trace_array *tr)
1428 if (tr->array_buffer.buffer)
1429 ring_buffer_record_off(tr->array_buffer.buffer);
1431 * This flag is looked at when buffers haven't been allocated
1432 * yet, or by some tracers (like irqsoff), that just want to
1433 * know if the ring buffer has been disabled, but it can handle
1434 * races of where it gets disabled but we still do a record.
1435 * As the check is in the fast path of the tracers, it is more
1436 * important to be fast than accurate.
1438 tr->buffer_disabled = 1;
1439 /* Make the flag seen by readers */
1444 * tracing_off - turn off tracing buffers
1446 * This function stops the tracing buffers from recording data.
1447 * It does not disable any overhead the tracers themselves may
1448 * be causing. This function simply causes all recording to
1449 * the ring buffers to fail.
1451 void tracing_off(void)
1453 tracer_tracing_off(&global_trace);
1455 EXPORT_SYMBOL_GPL(tracing_off);
1457 void disable_trace_on_warning(void)
1459 if (__disable_trace_on_warning) {
1460 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1461 "Disabling tracing due to warning\n");
1467 * tracer_tracing_is_on - show real state of ring buffer enabled
1468 * @tr : the trace array to know if ring buffer is enabled
1470 * Shows real state of the ring buffer if it is enabled or not.
1472 bool tracer_tracing_is_on(struct trace_array *tr)
1474 if (tr->array_buffer.buffer)
1475 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1476 return !tr->buffer_disabled;
1480 * tracing_is_on - show state of ring buffers enabled
1482 int tracing_is_on(void)
1484 return tracer_tracing_is_on(&global_trace);
1486 EXPORT_SYMBOL_GPL(tracing_is_on);
1488 static int __init set_buf_size(char *str)
1490 unsigned long buf_size;
1494 buf_size = memparse(str, &str);
1496 * nr_entries can not be zero and the startup
1497 * tests require some buffer space. Therefore
1498 * ensure we have at least 4096 bytes of buffer.
1500 trace_buf_size = max(4096UL, buf_size);
1503 __setup("trace_buf_size=", set_buf_size);
1505 static int __init set_tracing_thresh(char *str)
1507 unsigned long threshold;
1512 ret = kstrtoul(str, 0, &threshold);
1515 tracing_thresh = threshold * 1000;
1518 __setup("tracing_thresh=", set_tracing_thresh);
1520 unsigned long nsecs_to_usecs(unsigned long nsecs)
1522 return nsecs / 1000;
1526 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1527 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1528 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1529 * of strings in the order that the evals (enum) were defined.
1534 /* These must match the bit positions in trace_iterator_flags */
1535 static const char *trace_options[] = {
1543 int in_ns; /* is this clock in nanoseconds? */
1544 } trace_clocks[] = {
1545 { trace_clock_local, "local", 1 },
1546 { trace_clock_global, "global", 1 },
1547 { trace_clock_counter, "counter", 0 },
1548 { trace_clock_jiffies, "uptime", 0 },
1549 { trace_clock, "perf", 1 },
1550 { ktime_get_mono_fast_ns, "mono", 1 },
1551 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1552 { ktime_get_boot_fast_ns, "boot", 1 },
1553 { ktime_get_tai_fast_ns, "tai", 1 },
1557 bool trace_clock_in_ns(struct trace_array *tr)
1559 if (trace_clocks[tr->clock_id].in_ns)
1566 * trace_parser_get_init - gets the buffer for trace parser
1568 int trace_parser_get_init(struct trace_parser *parser, int size)
1570 memset(parser, 0, sizeof(*parser));
1572 parser->buffer = kmalloc(size, GFP_KERNEL);
1573 if (!parser->buffer)
1576 parser->size = size;
1581 * trace_parser_put - frees the buffer for trace parser
1583 void trace_parser_put(struct trace_parser *parser)
1585 kfree(parser->buffer);
1586 parser->buffer = NULL;
1590 * trace_get_user - reads the user input string separated by space
1591 * (matched by isspace(ch))
1593 * For each string found the 'struct trace_parser' is updated,
1594 * and the function returns.
1596 * Returns number of bytes read.
1598 * See kernel/trace/trace.h for 'struct trace_parser' details.
1600 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1601 size_t cnt, loff_t *ppos)
1608 trace_parser_clear(parser);
1610 ret = get_user(ch, ubuf++);
1618 * The parser is not finished with the last write,
1619 * continue reading the user input without skipping spaces.
1621 if (!parser->cont) {
1622 /* skip white space */
1623 while (cnt && isspace(ch)) {
1624 ret = get_user(ch, ubuf++);
1633 /* only spaces were written */
1634 if (isspace(ch) || !ch) {
1641 /* read the non-space input */
1642 while (cnt && !isspace(ch) && ch) {
1643 if (parser->idx < parser->size - 1)
1644 parser->buffer[parser->idx++] = ch;
1649 ret = get_user(ch, ubuf++);
1656 /* We either got finished input or we have to wait for another call. */
1657 if (isspace(ch) || !ch) {
1658 parser->buffer[parser->idx] = 0;
1659 parser->cont = false;
1660 } else if (parser->idx < parser->size - 1) {
1661 parser->cont = true;
1662 parser->buffer[parser->idx++] = ch;
1663 /* Make sure the parsed string always terminates with '\0'. */
1664 parser->buffer[parser->idx] = 0;
1677 /* TODO add a seq_buf_to_buffer() */
1678 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1682 if (trace_seq_used(s) <= s->seq.readpos)
1685 len = trace_seq_used(s) - s->seq.readpos;
1688 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1690 s->seq.readpos += cnt;
1694 unsigned long __read_mostly tracing_thresh;
1695 static const struct file_operations tracing_max_lat_fops;
1697 #ifdef LATENCY_FS_NOTIFY
1699 static struct workqueue_struct *fsnotify_wq;
1701 static void latency_fsnotify_workfn(struct work_struct *work)
1703 struct trace_array *tr = container_of(work, struct trace_array,
1705 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1708 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1710 struct trace_array *tr = container_of(iwork, struct trace_array,
1712 queue_work(fsnotify_wq, &tr->fsnotify_work);
1715 static void trace_create_maxlat_file(struct trace_array *tr,
1716 struct dentry *d_tracer)
1718 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1719 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1720 tr->d_max_latency = trace_create_file("tracing_max_latency",
1722 d_tracer, &tr->max_latency,
1723 &tracing_max_lat_fops);
1726 __init static int latency_fsnotify_init(void)
1728 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1729 WQ_UNBOUND | WQ_HIGHPRI, 0);
1731 pr_err("Unable to allocate tr_max_lat_wq\n");
1737 late_initcall_sync(latency_fsnotify_init);
1739 void latency_fsnotify(struct trace_array *tr)
1744 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1745 * possible that we are called from __schedule() or do_idle(), which
1746 * could cause a deadlock.
1748 irq_work_queue(&tr->fsnotify_irqwork);
1751 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) \
1752 || defined(CONFIG_OSNOISE_TRACER)
1754 #define trace_create_maxlat_file(tr, d_tracer) \
1755 trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \
1756 d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1759 #define trace_create_maxlat_file(tr, d_tracer) do { } while (0)
1762 #ifdef CONFIG_TRACER_MAX_TRACE
1764 * Copy the new maximum trace into the separate maximum-trace
1765 * structure. (this way the maximum trace is permanently saved,
1766 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1769 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1771 struct array_buffer *trace_buf = &tr->array_buffer;
1772 struct array_buffer *max_buf = &tr->max_buffer;
1773 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1774 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1777 max_buf->time_start = data->preempt_timestamp;
1779 max_data->saved_latency = tr->max_latency;
1780 max_data->critical_start = data->critical_start;
1781 max_data->critical_end = data->critical_end;
1783 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1784 max_data->pid = tsk->pid;
1786 * If tsk == current, then use current_uid(), as that does not use
1787 * RCU. The irq tracer can be called out of RCU scope.
1790 max_data->uid = current_uid();
1792 max_data->uid = task_uid(tsk);
1794 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1795 max_data->policy = tsk->policy;
1796 max_data->rt_priority = tsk->rt_priority;
1798 /* record this tasks comm */
1799 tracing_record_cmdline(tsk);
1800 latency_fsnotify(tr);
1804 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1806 * @tsk: the task with the latency
1807 * @cpu: The cpu that initiated the trace.
1808 * @cond_data: User data associated with a conditional snapshot
1810 * Flip the buffers between the @tr and the max_tr and record information
1811 * about which task was the cause of this latency.
1814 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1820 WARN_ON_ONCE(!irqs_disabled());
1822 if (!tr->allocated_snapshot) {
1823 /* Only the nop tracer should hit this when disabling */
1824 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1828 arch_spin_lock(&tr->max_lock);
1830 /* Inherit the recordable setting from array_buffer */
1831 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1832 ring_buffer_record_on(tr->max_buffer.buffer);
1834 ring_buffer_record_off(tr->max_buffer.buffer);
1836 #ifdef CONFIG_TRACER_SNAPSHOT
1837 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1840 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1842 __update_max_tr(tr, tsk, cpu);
1845 arch_spin_unlock(&tr->max_lock);
1849 * update_max_tr_single - only copy one trace over, and reset the rest
1851 * @tsk: task with the latency
1852 * @cpu: the cpu of the buffer to copy.
1854 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1857 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1864 WARN_ON_ONCE(!irqs_disabled());
1865 if (!tr->allocated_snapshot) {
1866 /* Only the nop tracer should hit this when disabling */
1867 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1871 arch_spin_lock(&tr->max_lock);
1873 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1875 if (ret == -EBUSY) {
1877 * We failed to swap the buffer due to a commit taking
1878 * place on this CPU. We fail to record, but we reset
1879 * the max trace buffer (no one writes directly to it)
1880 * and flag that it failed.
1882 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1883 "Failed to swap buffers due to commit in progress\n");
1886 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1888 __update_max_tr(tr, tsk, cpu);
1889 arch_spin_unlock(&tr->max_lock);
1891 #endif /* CONFIG_TRACER_MAX_TRACE */
1893 static int wait_on_pipe(struct trace_iterator *iter, int full)
1895 /* Iterators are static, they should be filled or empty */
1896 if (trace_buffer_iter(iter, iter->cpu_file))
1899 return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1903 #ifdef CONFIG_FTRACE_STARTUP_TEST
1904 static bool selftests_can_run;
1906 struct trace_selftests {
1907 struct list_head list;
1908 struct tracer *type;
1911 static LIST_HEAD(postponed_selftests);
1913 static int save_selftest(struct tracer *type)
1915 struct trace_selftests *selftest;
1917 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1921 selftest->type = type;
1922 list_add(&selftest->list, &postponed_selftests);
1926 static int run_tracer_selftest(struct tracer *type)
1928 struct trace_array *tr = &global_trace;
1929 struct tracer *saved_tracer = tr->current_trace;
1932 if (!type->selftest || tracing_selftest_disabled)
1936 * If a tracer registers early in boot up (before scheduling is
1937 * initialized and such), then do not run its selftests yet.
1938 * Instead, run it a little later in the boot process.
1940 if (!selftests_can_run)
1941 return save_selftest(type);
1943 if (!tracing_is_on()) {
1944 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1950 * Run a selftest on this tracer.
1951 * Here we reset the trace buffer, and set the current
1952 * tracer to be this tracer. The tracer can then run some
1953 * internal tracing to verify that everything is in order.
1954 * If we fail, we do not register this tracer.
1956 tracing_reset_online_cpus(&tr->array_buffer);
1958 tr->current_trace = type;
1960 #ifdef CONFIG_TRACER_MAX_TRACE
1961 if (type->use_max_tr) {
1962 /* If we expanded the buffers, make sure the max is expanded too */
1963 if (ring_buffer_expanded)
1964 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1965 RING_BUFFER_ALL_CPUS);
1966 tr->allocated_snapshot = true;
1970 /* the test is responsible for initializing and enabling */
1971 pr_info("Testing tracer %s: ", type->name);
1972 ret = type->selftest(type, tr);
1973 /* the test is responsible for resetting too */
1974 tr->current_trace = saved_tracer;
1976 printk(KERN_CONT "FAILED!\n");
1977 /* Add the warning after printing 'FAILED' */
1981 /* Only reset on passing, to avoid touching corrupted buffers */
1982 tracing_reset_online_cpus(&tr->array_buffer);
1984 #ifdef CONFIG_TRACER_MAX_TRACE
1985 if (type->use_max_tr) {
1986 tr->allocated_snapshot = false;
1988 /* Shrink the max buffer again */
1989 if (ring_buffer_expanded)
1990 ring_buffer_resize(tr->max_buffer.buffer, 1,
1991 RING_BUFFER_ALL_CPUS);
1995 printk(KERN_CONT "PASSED\n");
1999 static __init int init_trace_selftests(void)
2001 struct trace_selftests *p, *n;
2002 struct tracer *t, **last;
2005 selftests_can_run = true;
2007 mutex_lock(&trace_types_lock);
2009 if (list_empty(&postponed_selftests))
2012 pr_info("Running postponed tracer tests:\n");
2014 tracing_selftest_running = true;
2015 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2016 /* This loop can take minutes when sanitizers are enabled, so
2017 * lets make sure we allow RCU processing.
2020 ret = run_tracer_selftest(p->type);
2021 /* If the test fails, then warn and remove from available_tracers */
2023 WARN(1, "tracer: %s failed selftest, disabling\n",
2025 last = &trace_types;
2026 for (t = trace_types; t; t = t->next) {
2037 tracing_selftest_running = false;
2040 mutex_unlock(&trace_types_lock);
2044 core_initcall(init_trace_selftests);
2046 static inline int run_tracer_selftest(struct tracer *type)
2050 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2052 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2054 static void __init apply_trace_boot_options(void);
2057 * register_tracer - register a tracer with the ftrace system.
2058 * @type: the plugin for the tracer
2060 * Register a new plugin tracer.
2062 int __init register_tracer(struct tracer *type)
2068 pr_info("Tracer must have a name\n");
2072 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2073 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2077 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2078 pr_warn("Can not register tracer %s due to lockdown\n",
2083 mutex_lock(&trace_types_lock);
2085 tracing_selftest_running = true;
2087 for (t = trace_types; t; t = t->next) {
2088 if (strcmp(type->name, t->name) == 0) {
2090 pr_info("Tracer %s already registered\n",
2097 if (!type->set_flag)
2098 type->set_flag = &dummy_set_flag;
2100 /*allocate a dummy tracer_flags*/
2101 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2106 type->flags->val = 0;
2107 type->flags->opts = dummy_tracer_opt;
2109 if (!type->flags->opts)
2110 type->flags->opts = dummy_tracer_opt;
2112 /* store the tracer for __set_tracer_option */
2113 type->flags->trace = type;
2115 ret = run_tracer_selftest(type);
2119 type->next = trace_types;
2121 add_tracer_options(&global_trace, type);
2124 tracing_selftest_running = false;
2125 mutex_unlock(&trace_types_lock);
2127 if (ret || !default_bootup_tracer)
2130 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2133 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2134 /* Do we want this tracer to start on bootup? */
2135 tracing_set_tracer(&global_trace, type->name);
2136 default_bootup_tracer = NULL;
2138 apply_trace_boot_options();
2140 /* disable other selftests, since this will break it. */
2141 disable_tracing_selftest("running a tracer");
2147 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2149 struct trace_buffer *buffer = buf->buffer;
2154 ring_buffer_record_disable(buffer);
2156 /* Make sure all commits have finished */
2158 ring_buffer_reset_cpu(buffer, cpu);
2160 ring_buffer_record_enable(buffer);
2163 void tracing_reset_online_cpus(struct array_buffer *buf)
2165 struct trace_buffer *buffer = buf->buffer;
2170 ring_buffer_record_disable(buffer);
2172 /* Make sure all commits have finished */
2175 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2177 ring_buffer_reset_online_cpus(buffer);
2179 ring_buffer_record_enable(buffer);
2182 /* Must have trace_types_lock held */
2183 void tracing_reset_all_online_cpus(void)
2185 struct trace_array *tr;
2187 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2188 if (!tr->clear_trace)
2190 tr->clear_trace = false;
2191 tracing_reset_online_cpus(&tr->array_buffer);
2192 #ifdef CONFIG_TRACER_MAX_TRACE
2193 tracing_reset_online_cpus(&tr->max_buffer);
2199 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2200 * is the tgid last observed corresponding to pid=i.
2202 static int *tgid_map;
2204 /* The maximum valid index into tgid_map. */
2205 static size_t tgid_map_max;
2207 #define SAVED_CMDLINES_DEFAULT 128
2208 #define NO_CMDLINE_MAP UINT_MAX
2210 * Preemption must be disabled before acquiring trace_cmdline_lock.
2211 * The various trace_arrays' max_lock must be acquired in a context
2212 * where interrupt is disabled.
2214 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2215 struct saved_cmdlines_buffer {
2216 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2217 unsigned *map_cmdline_to_pid;
2218 unsigned cmdline_num;
2220 char *saved_cmdlines;
2222 static struct saved_cmdlines_buffer *savedcmd;
2224 static inline char *get_saved_cmdlines(int idx)
2226 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2229 static inline void set_cmdline(int idx, const char *cmdline)
2231 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2234 static int allocate_cmdlines_buffer(unsigned int val,
2235 struct saved_cmdlines_buffer *s)
2237 s->map_cmdline_to_pid = kmalloc_array(val,
2238 sizeof(*s->map_cmdline_to_pid),
2240 if (!s->map_cmdline_to_pid)
2243 s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2244 if (!s->saved_cmdlines) {
2245 kfree(s->map_cmdline_to_pid);
2250 s->cmdline_num = val;
2251 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2252 sizeof(s->map_pid_to_cmdline));
2253 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2254 val * sizeof(*s->map_cmdline_to_pid));
2259 static int trace_create_savedcmd(void)
2263 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2267 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2277 int is_tracing_stopped(void)
2279 return global_trace.stop_count;
2283 * tracing_start - quick start of the tracer
2285 * If tracing is enabled but was stopped by tracing_stop,
2286 * this will start the tracer back up.
2288 void tracing_start(void)
2290 struct trace_buffer *buffer;
2291 unsigned long flags;
2293 if (tracing_disabled)
2296 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2297 if (--global_trace.stop_count) {
2298 if (global_trace.stop_count < 0) {
2299 /* Someone screwed up their debugging */
2301 global_trace.stop_count = 0;
2306 /* Prevent the buffers from switching */
2307 arch_spin_lock(&global_trace.max_lock);
2309 buffer = global_trace.array_buffer.buffer;
2311 ring_buffer_record_enable(buffer);
2313 #ifdef CONFIG_TRACER_MAX_TRACE
2314 buffer = global_trace.max_buffer.buffer;
2316 ring_buffer_record_enable(buffer);
2319 arch_spin_unlock(&global_trace.max_lock);
2322 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2325 static void tracing_start_tr(struct trace_array *tr)
2327 struct trace_buffer *buffer;
2328 unsigned long flags;
2330 if (tracing_disabled)
2333 /* If global, we need to also start the max tracer */
2334 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2335 return tracing_start();
2337 raw_spin_lock_irqsave(&tr->start_lock, flags);
2339 if (--tr->stop_count) {
2340 if (tr->stop_count < 0) {
2341 /* Someone screwed up their debugging */
2348 buffer = tr->array_buffer.buffer;
2350 ring_buffer_record_enable(buffer);
2353 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2357 * tracing_stop - quick stop of the tracer
2359 * Light weight way to stop tracing. Use in conjunction with
2362 void tracing_stop(void)
2364 struct trace_buffer *buffer;
2365 unsigned long flags;
2367 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2368 if (global_trace.stop_count++)
2371 /* Prevent the buffers from switching */
2372 arch_spin_lock(&global_trace.max_lock);
2374 buffer = global_trace.array_buffer.buffer;
2376 ring_buffer_record_disable(buffer);
2378 #ifdef CONFIG_TRACER_MAX_TRACE
2379 buffer = global_trace.max_buffer.buffer;
2381 ring_buffer_record_disable(buffer);
2384 arch_spin_unlock(&global_trace.max_lock);
2387 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2390 static void tracing_stop_tr(struct trace_array *tr)
2392 struct trace_buffer *buffer;
2393 unsigned long flags;
2395 /* If global, we need to also stop the max tracer */
2396 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2397 return tracing_stop();
2399 raw_spin_lock_irqsave(&tr->start_lock, flags);
2400 if (tr->stop_count++)
2403 buffer = tr->array_buffer.buffer;
2405 ring_buffer_record_disable(buffer);
2408 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2411 static int trace_save_cmdline(struct task_struct *tsk)
2415 /* treat recording of idle task as a success */
2419 tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2422 * It's not the end of the world if we don't get
2423 * the lock, but we also don't want to spin
2424 * nor do we want to disable interrupts,
2425 * so if we miss here, then better luck next time.
2427 * This is called within the scheduler and wake up, so interrupts
2428 * had better been disabled and run queue lock been held.
2430 lockdep_assert_preemption_disabled();
2431 if (!arch_spin_trylock(&trace_cmdline_lock))
2434 idx = savedcmd->map_pid_to_cmdline[tpid];
2435 if (idx == NO_CMDLINE_MAP) {
2436 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2438 savedcmd->map_pid_to_cmdline[tpid] = idx;
2439 savedcmd->cmdline_idx = idx;
2442 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2443 set_cmdline(idx, tsk->comm);
2445 arch_spin_unlock(&trace_cmdline_lock);
2450 static void __trace_find_cmdline(int pid, char comm[])
2456 strcpy(comm, "<idle>");
2460 if (WARN_ON_ONCE(pid < 0)) {
2461 strcpy(comm, "<XXX>");
2465 tpid = pid & (PID_MAX_DEFAULT - 1);
2466 map = savedcmd->map_pid_to_cmdline[tpid];
2467 if (map != NO_CMDLINE_MAP) {
2468 tpid = savedcmd->map_cmdline_to_pid[map];
2470 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2474 strcpy(comm, "<...>");
2477 void trace_find_cmdline(int pid, char comm[])
2480 arch_spin_lock(&trace_cmdline_lock);
2482 __trace_find_cmdline(pid, comm);
2484 arch_spin_unlock(&trace_cmdline_lock);
2488 static int *trace_find_tgid_ptr(int pid)
2491 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2492 * if we observe a non-NULL tgid_map then we also observe the correct
2495 int *map = smp_load_acquire(&tgid_map);
2497 if (unlikely(!map || pid > tgid_map_max))
2503 int trace_find_tgid(int pid)
2505 int *ptr = trace_find_tgid_ptr(pid);
2507 return ptr ? *ptr : 0;
2510 static int trace_save_tgid(struct task_struct *tsk)
2514 /* treat recording of idle task as a success */
2518 ptr = trace_find_tgid_ptr(tsk->pid);
2526 static bool tracing_record_taskinfo_skip(int flags)
2528 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2530 if (!__this_cpu_read(trace_taskinfo_save))
2536 * tracing_record_taskinfo - record the task info of a task
2538 * @task: task to record
2539 * @flags: TRACE_RECORD_CMDLINE for recording comm
2540 * TRACE_RECORD_TGID for recording tgid
2542 void tracing_record_taskinfo(struct task_struct *task, int flags)
2546 if (tracing_record_taskinfo_skip(flags))
2550 * Record as much task information as possible. If some fail, continue
2551 * to try to record the others.
2553 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2554 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2556 /* If recording any information failed, retry again soon. */
2560 __this_cpu_write(trace_taskinfo_save, false);
2564 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2566 * @prev: previous task during sched_switch
2567 * @next: next task during sched_switch
2568 * @flags: TRACE_RECORD_CMDLINE for recording comm
2569 * TRACE_RECORD_TGID for recording tgid
2571 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2572 struct task_struct *next, int flags)
2576 if (tracing_record_taskinfo_skip(flags))
2580 * Record as much task information as possible. If some fail, continue
2581 * to try to record the others.
2583 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2584 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2585 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2586 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2588 /* If recording any information failed, retry again soon. */
2592 __this_cpu_write(trace_taskinfo_save, false);
2595 /* Helpers to record a specific task information */
2596 void tracing_record_cmdline(struct task_struct *task)
2598 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2601 void tracing_record_tgid(struct task_struct *task)
2603 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2607 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2608 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2609 * simplifies those functions and keeps them in sync.
2611 enum print_line_t trace_handle_return(struct trace_seq *s)
2613 return trace_seq_has_overflowed(s) ?
2614 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2616 EXPORT_SYMBOL_GPL(trace_handle_return);
2618 static unsigned short migration_disable_value(void)
2620 #if defined(CONFIG_SMP)
2621 return current->migration_disabled;
2627 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2629 unsigned int trace_flags = irqs_status;
2632 pc = preempt_count();
2635 trace_flags |= TRACE_FLAG_NMI;
2636 if (pc & HARDIRQ_MASK)
2637 trace_flags |= TRACE_FLAG_HARDIRQ;
2638 if (in_serving_softirq())
2639 trace_flags |= TRACE_FLAG_SOFTIRQ;
2640 if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2641 trace_flags |= TRACE_FLAG_BH_OFF;
2643 if (tif_need_resched())
2644 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2645 if (test_preempt_need_resched())
2646 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2647 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2648 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2651 struct ring_buffer_event *
2652 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2655 unsigned int trace_ctx)
2657 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2660 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2661 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2662 static int trace_buffered_event_ref;
2665 * trace_buffered_event_enable - enable buffering events
2667 * When events are being filtered, it is quicker to use a temporary
2668 * buffer to write the event data into if there's a likely chance
2669 * that it will not be committed. The discard of the ring buffer
2670 * is not as fast as committing, and is much slower than copying
2673 * When an event is to be filtered, allocate per cpu buffers to
2674 * write the event data into, and if the event is filtered and discarded
2675 * it is simply dropped, otherwise, the entire data is to be committed
2678 void trace_buffered_event_enable(void)
2680 struct ring_buffer_event *event;
2684 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2686 if (trace_buffered_event_ref++)
2689 for_each_tracing_cpu(cpu) {
2690 page = alloc_pages_node(cpu_to_node(cpu),
2691 GFP_KERNEL | __GFP_NORETRY, 0);
2695 event = page_address(page);
2696 memset(event, 0, sizeof(*event));
2698 per_cpu(trace_buffered_event, cpu) = event;
2701 if (cpu == smp_processor_id() &&
2702 __this_cpu_read(trace_buffered_event) !=
2703 per_cpu(trace_buffered_event, cpu))
2710 trace_buffered_event_disable();
2713 static void enable_trace_buffered_event(void *data)
2715 /* Probably not needed, but do it anyway */
2717 this_cpu_dec(trace_buffered_event_cnt);
2720 static void disable_trace_buffered_event(void *data)
2722 this_cpu_inc(trace_buffered_event_cnt);
2726 * trace_buffered_event_disable - disable buffering events
2728 * When a filter is removed, it is faster to not use the buffered
2729 * events, and to commit directly into the ring buffer. Free up
2730 * the temp buffers when there are no more users. This requires
2731 * special synchronization with current events.
2733 void trace_buffered_event_disable(void)
2737 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2739 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2742 if (--trace_buffered_event_ref)
2746 /* For each CPU, set the buffer as used. */
2747 smp_call_function_many(tracing_buffer_mask,
2748 disable_trace_buffered_event, NULL, 1);
2751 /* Wait for all current users to finish */
2754 for_each_tracing_cpu(cpu) {
2755 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2756 per_cpu(trace_buffered_event, cpu) = NULL;
2759 * Make sure trace_buffered_event is NULL before clearing
2760 * trace_buffered_event_cnt.
2765 /* Do the work on each cpu */
2766 smp_call_function_many(tracing_buffer_mask,
2767 enable_trace_buffered_event, NULL, 1);
2771 static struct trace_buffer *temp_buffer;
2773 struct ring_buffer_event *
2774 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2775 struct trace_event_file *trace_file,
2776 int type, unsigned long len,
2777 unsigned int trace_ctx)
2779 struct ring_buffer_event *entry;
2780 struct trace_array *tr = trace_file->tr;
2783 *current_rb = tr->array_buffer.buffer;
2785 if (!tr->no_filter_buffering_ref &&
2786 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2787 preempt_disable_notrace();
2789 * Filtering is on, so try to use the per cpu buffer first.
2790 * This buffer will simulate a ring_buffer_event,
2791 * where the type_len is zero and the array[0] will
2792 * hold the full length.
2793 * (see include/linux/ring-buffer.h for details on
2794 * how the ring_buffer_event is structured).
2796 * Using a temp buffer during filtering and copying it
2797 * on a matched filter is quicker than writing directly
2798 * into the ring buffer and then discarding it when
2799 * it doesn't match. That is because the discard
2800 * requires several atomic operations to get right.
2801 * Copying on match and doing nothing on a failed match
2802 * is still quicker than no copy on match, but having
2803 * to discard out of the ring buffer on a failed match.
2805 if ((entry = __this_cpu_read(trace_buffered_event))) {
2806 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2808 val = this_cpu_inc_return(trace_buffered_event_cnt);
2811 * Preemption is disabled, but interrupts and NMIs
2812 * can still come in now. If that happens after
2813 * the above increment, then it will have to go
2814 * back to the old method of allocating the event
2815 * on the ring buffer, and if the filter fails, it
2816 * will have to call ring_buffer_discard_commit()
2819 * Need to also check the unlikely case that the
2820 * length is bigger than the temp buffer size.
2821 * If that happens, then the reserve is pretty much
2822 * guaranteed to fail, as the ring buffer currently
2823 * only allows events less than a page. But that may
2824 * change in the future, so let the ring buffer reserve
2825 * handle the failure in that case.
2827 if (val == 1 && likely(len <= max_len)) {
2828 trace_event_setup(entry, type, trace_ctx);
2829 entry->array[0] = len;
2830 /* Return with preemption disabled */
2833 this_cpu_dec(trace_buffered_event_cnt);
2835 /* __trace_buffer_lock_reserve() disables preemption */
2836 preempt_enable_notrace();
2839 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2842 * If tracing is off, but we have triggers enabled
2843 * we still need to look at the event data. Use the temp_buffer
2844 * to store the trace event for the trigger to use. It's recursive
2845 * safe and will not be recorded anywhere.
2847 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2848 *current_rb = temp_buffer;
2849 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2854 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2856 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2857 static DEFINE_MUTEX(tracepoint_printk_mutex);
2859 static void output_printk(struct trace_event_buffer *fbuffer)
2861 struct trace_event_call *event_call;
2862 struct trace_event_file *file;
2863 struct trace_event *event;
2864 unsigned long flags;
2865 struct trace_iterator *iter = tracepoint_print_iter;
2867 /* We should never get here if iter is NULL */
2868 if (WARN_ON_ONCE(!iter))
2871 event_call = fbuffer->trace_file->event_call;
2872 if (!event_call || !event_call->event.funcs ||
2873 !event_call->event.funcs->trace)
2876 file = fbuffer->trace_file;
2877 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2878 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2879 !filter_match_preds(file->filter, fbuffer->entry)))
2882 event = &fbuffer->trace_file->event_call->event;
2884 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2885 trace_seq_init(&iter->seq);
2886 iter->ent = fbuffer->entry;
2887 event_call->event.funcs->trace(iter, 0, event);
2888 trace_seq_putc(&iter->seq, 0);
2889 printk("%s", iter->seq.buffer);
2891 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2894 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2895 void *buffer, size_t *lenp,
2898 int save_tracepoint_printk;
2901 mutex_lock(&tracepoint_printk_mutex);
2902 save_tracepoint_printk = tracepoint_printk;
2904 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2907 * This will force exiting early, as tracepoint_printk
2908 * is always zero when tracepoint_printk_iter is not allocated
2910 if (!tracepoint_print_iter)
2911 tracepoint_printk = 0;
2913 if (save_tracepoint_printk == tracepoint_printk)
2916 if (tracepoint_printk)
2917 static_key_enable(&tracepoint_printk_key.key);
2919 static_key_disable(&tracepoint_printk_key.key);
2922 mutex_unlock(&tracepoint_printk_mutex);
2927 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2929 enum event_trigger_type tt = ETT_NONE;
2930 struct trace_event_file *file = fbuffer->trace_file;
2932 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2933 fbuffer->entry, &tt))
2936 if (static_key_false(&tracepoint_printk_key.key))
2937 output_printk(fbuffer);
2939 if (static_branch_unlikely(&trace_event_exports_enabled))
2940 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2942 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2943 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2947 event_triggers_post_call(file, tt);
2950 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2955 * trace_buffer_unlock_commit_regs()
2956 * trace_event_buffer_commit()
2957 * trace_event_raw_event_xxx()
2959 # define STACK_SKIP 3
2961 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2962 struct trace_buffer *buffer,
2963 struct ring_buffer_event *event,
2964 unsigned int trace_ctx,
2965 struct pt_regs *regs)
2967 __buffer_unlock_commit(buffer, event);
2970 * If regs is not set, then skip the necessary functions.
2971 * Note, we can still get here via blktrace, wakeup tracer
2972 * and mmiotrace, but that's ok if they lose a function or
2973 * two. They are not that meaningful.
2975 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2976 ftrace_trace_userstack(tr, buffer, trace_ctx);
2980 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2983 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2984 struct ring_buffer_event *event)
2986 __buffer_unlock_commit(buffer, event);
2990 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2991 parent_ip, unsigned int trace_ctx)
2993 struct trace_event_call *call = &event_function;
2994 struct trace_buffer *buffer = tr->array_buffer.buffer;
2995 struct ring_buffer_event *event;
2996 struct ftrace_entry *entry;
2998 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3002 entry = ring_buffer_event_data(event);
3004 entry->parent_ip = parent_ip;
3006 if (!call_filter_check_discard(call, entry, buffer, event)) {
3007 if (static_branch_unlikely(&trace_function_exports_enabled))
3008 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3009 __buffer_unlock_commit(buffer, event);
3013 #ifdef CONFIG_STACKTRACE
3015 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3016 #define FTRACE_KSTACK_NESTING 4
3018 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3020 struct ftrace_stack {
3021 unsigned long calls[FTRACE_KSTACK_ENTRIES];
3025 struct ftrace_stacks {
3026 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
3029 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3030 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3032 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3033 unsigned int trace_ctx,
3034 int skip, struct pt_regs *regs)
3036 struct trace_event_call *call = &event_kernel_stack;
3037 struct ring_buffer_event *event;
3038 unsigned int size, nr_entries;
3039 struct ftrace_stack *fstack;
3040 struct stack_entry *entry;
3044 * Add one, for this function and the call to save_stack_trace()
3045 * If regs is set, then these functions will not be in the way.
3047 #ifndef CONFIG_UNWINDER_ORC
3052 preempt_disable_notrace();
3054 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3056 /* This should never happen. If it does, yell once and skip */
3057 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3061 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3062 * interrupt will either see the value pre increment or post
3063 * increment. If the interrupt happens pre increment it will have
3064 * restored the counter when it returns. We just need a barrier to
3065 * keep gcc from moving things around.
3069 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3070 size = ARRAY_SIZE(fstack->calls);
3073 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3076 nr_entries = stack_trace_save(fstack->calls, size, skip);
3079 size = nr_entries * sizeof(unsigned long);
3080 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3081 (sizeof(*entry) - sizeof(entry->caller)) + size,
3085 entry = ring_buffer_event_data(event);
3087 memcpy(&entry->caller, fstack->calls, size);
3088 entry->size = nr_entries;
3090 if (!call_filter_check_discard(call, entry, buffer, event))
3091 __buffer_unlock_commit(buffer, event);
3094 /* Again, don't let gcc optimize things here */
3096 __this_cpu_dec(ftrace_stack_reserve);
3097 preempt_enable_notrace();
3101 static inline void ftrace_trace_stack(struct trace_array *tr,
3102 struct trace_buffer *buffer,
3103 unsigned int trace_ctx,
3104 int skip, struct pt_regs *regs)
3106 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3109 __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3112 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3115 struct trace_buffer *buffer = tr->array_buffer.buffer;
3117 if (rcu_is_watching()) {
3118 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3123 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3124 * but if the above rcu_is_watching() failed, then the NMI
3125 * triggered someplace critical, and ct_irq_enter() should
3126 * not be called from NMI.
3128 if (unlikely(in_nmi()))
3131 ct_irq_enter_irqson();
3132 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3133 ct_irq_exit_irqson();
3137 * trace_dump_stack - record a stack back trace in the trace buffer
3138 * @skip: Number of functions to skip (helper handlers)
3140 void trace_dump_stack(int skip)
3142 if (tracing_disabled || tracing_selftest_running)
3145 #ifndef CONFIG_UNWINDER_ORC
3146 /* Skip 1 to skip this function. */
3149 __ftrace_trace_stack(global_trace.array_buffer.buffer,
3150 tracing_gen_ctx(), skip, NULL);
3152 EXPORT_SYMBOL_GPL(trace_dump_stack);
3154 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3155 static DEFINE_PER_CPU(int, user_stack_count);
3158 ftrace_trace_userstack(struct trace_array *tr,
3159 struct trace_buffer *buffer, unsigned int trace_ctx)
3161 struct trace_event_call *call = &event_user_stack;
3162 struct ring_buffer_event *event;
3163 struct userstack_entry *entry;
3165 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3169 * NMIs can not handle page faults, even with fix ups.
3170 * The save user stack can (and often does) fault.
3172 if (unlikely(in_nmi()))
3176 * prevent recursion, since the user stack tracing may
3177 * trigger other kernel events.
3180 if (__this_cpu_read(user_stack_count))
3183 __this_cpu_inc(user_stack_count);
3185 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3186 sizeof(*entry), trace_ctx);
3188 goto out_drop_count;
3189 entry = ring_buffer_event_data(event);
3191 entry->tgid = current->tgid;
3192 memset(&entry->caller, 0, sizeof(entry->caller));
3194 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3195 if (!call_filter_check_discard(call, entry, buffer, event))
3196 __buffer_unlock_commit(buffer, event);
3199 __this_cpu_dec(user_stack_count);
3203 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3204 static void ftrace_trace_userstack(struct trace_array *tr,
3205 struct trace_buffer *buffer,
3206 unsigned int trace_ctx)
3209 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3211 #endif /* CONFIG_STACKTRACE */
3214 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3215 unsigned long long delta)
3217 entry->bottom_delta_ts = delta & U32_MAX;
3218 entry->top_delta_ts = (delta >> 32);
3221 void trace_last_func_repeats(struct trace_array *tr,
3222 struct trace_func_repeats *last_info,
3223 unsigned int trace_ctx)
3225 struct trace_buffer *buffer = tr->array_buffer.buffer;
3226 struct func_repeats_entry *entry;
3227 struct ring_buffer_event *event;
3230 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3231 sizeof(*entry), trace_ctx);
3235 delta = ring_buffer_event_time_stamp(buffer, event) -
3236 last_info->ts_last_call;
3238 entry = ring_buffer_event_data(event);
3239 entry->ip = last_info->ip;
3240 entry->parent_ip = last_info->parent_ip;
3241 entry->count = last_info->count;
3242 func_repeats_set_delta_ts(entry, delta);
3244 __buffer_unlock_commit(buffer, event);
3247 /* created for use with alloc_percpu */
3248 struct trace_buffer_struct {
3250 char buffer[4][TRACE_BUF_SIZE];
3253 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3256 * This allows for lockless recording. If we're nested too deeply, then
3257 * this returns NULL.
3259 static char *get_trace_buf(void)
3261 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3263 if (!trace_percpu_buffer || buffer->nesting >= 4)
3268 /* Interrupts must see nesting incremented before we use the buffer */
3270 return &buffer->buffer[buffer->nesting - 1][0];
3273 static void put_trace_buf(void)
3275 /* Don't let the decrement of nesting leak before this */
3277 this_cpu_dec(trace_percpu_buffer->nesting);
3280 static int alloc_percpu_trace_buffer(void)
3282 struct trace_buffer_struct __percpu *buffers;
3284 if (trace_percpu_buffer)
3287 buffers = alloc_percpu(struct trace_buffer_struct);
3288 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3291 trace_percpu_buffer = buffers;
3295 static int buffers_allocated;
3297 void trace_printk_init_buffers(void)
3299 if (buffers_allocated)
3302 if (alloc_percpu_trace_buffer())
3305 /* trace_printk() is for debug use only. Don't use it in production. */
3308 pr_warn("**********************************************************\n");
3309 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3311 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3313 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3314 pr_warn("** unsafe for production use. **\n");
3316 pr_warn("** If you see this message and you are not debugging **\n");
3317 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3319 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3320 pr_warn("**********************************************************\n");
3322 /* Expand the buffers to set size */
3323 tracing_update_buffers();
3325 buffers_allocated = 1;
3328 * trace_printk_init_buffers() can be called by modules.
3329 * If that happens, then we need to start cmdline recording
3330 * directly here. If the global_trace.buffer is already
3331 * allocated here, then this was called by module code.
3333 if (global_trace.array_buffer.buffer)
3334 tracing_start_cmdline_record();
3336 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3338 void trace_printk_start_comm(void)
3340 /* Start tracing comms if trace printk is set */
3341 if (!buffers_allocated)
3343 tracing_start_cmdline_record();
3346 static void trace_printk_start_stop_comm(int enabled)
3348 if (!buffers_allocated)
3352 tracing_start_cmdline_record();
3354 tracing_stop_cmdline_record();
3358 * trace_vbprintk - write binary msg to tracing buffer
3359 * @ip: The address of the caller
3360 * @fmt: The string format to write to the buffer
3361 * @args: Arguments for @fmt
3363 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3365 struct trace_event_call *call = &event_bprint;
3366 struct ring_buffer_event *event;
3367 struct trace_buffer *buffer;
3368 struct trace_array *tr = &global_trace;
3369 struct bprint_entry *entry;
3370 unsigned int trace_ctx;
3374 if (unlikely(tracing_selftest_running || tracing_disabled))
3377 /* Don't pollute graph traces with trace_vprintk internals */
3378 pause_graph_tracing();
3380 trace_ctx = tracing_gen_ctx();
3381 preempt_disable_notrace();
3383 tbuffer = get_trace_buf();
3389 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3391 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3394 size = sizeof(*entry) + sizeof(u32) * len;
3395 buffer = tr->array_buffer.buffer;
3396 ring_buffer_nest_start(buffer);
3397 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3401 entry = ring_buffer_event_data(event);
3405 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3406 if (!call_filter_check_discard(call, entry, buffer, event)) {
3407 __buffer_unlock_commit(buffer, event);
3408 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3412 ring_buffer_nest_end(buffer);
3417 preempt_enable_notrace();
3418 unpause_graph_tracing();
3422 EXPORT_SYMBOL_GPL(trace_vbprintk);
3426 __trace_array_vprintk(struct trace_buffer *buffer,
3427 unsigned long ip, const char *fmt, va_list args)
3429 struct trace_event_call *call = &event_print;
3430 struct ring_buffer_event *event;
3432 struct print_entry *entry;
3433 unsigned int trace_ctx;
3436 if (tracing_disabled || tracing_selftest_running)
3439 /* Don't pollute graph traces with trace_vprintk internals */
3440 pause_graph_tracing();
3442 trace_ctx = tracing_gen_ctx();
3443 preempt_disable_notrace();
3446 tbuffer = get_trace_buf();
3452 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3454 size = sizeof(*entry) + len + 1;
3455 ring_buffer_nest_start(buffer);
3456 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3460 entry = ring_buffer_event_data(event);
3463 memcpy(&entry->buf, tbuffer, len + 1);
3464 if (!call_filter_check_discard(call, entry, buffer, event)) {
3465 __buffer_unlock_commit(buffer, event);
3466 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3470 ring_buffer_nest_end(buffer);
3474 preempt_enable_notrace();
3475 unpause_graph_tracing();
3481 int trace_array_vprintk(struct trace_array *tr,
3482 unsigned long ip, const char *fmt, va_list args)
3484 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3488 * trace_array_printk - Print a message to a specific instance
3489 * @tr: The instance trace_array descriptor
3490 * @ip: The instruction pointer that this is called from.
3491 * @fmt: The format to print (printf format)
3493 * If a subsystem sets up its own instance, they have the right to
3494 * printk strings into their tracing instance buffer using this
3495 * function. Note, this function will not write into the top level
3496 * buffer (use trace_printk() for that), as writing into the top level
3497 * buffer should only have events that can be individually disabled.
3498 * trace_printk() is only used for debugging a kernel, and should not
3499 * be ever incorporated in normal use.
3501 * trace_array_printk() can be used, as it will not add noise to the
3502 * top level tracing buffer.
3504 * Note, trace_array_init_printk() must be called on @tr before this
3508 int trace_array_printk(struct trace_array *tr,
3509 unsigned long ip, const char *fmt, ...)
3517 /* This is only allowed for created instances */
3518 if (tr == &global_trace)
3521 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3525 ret = trace_array_vprintk(tr, ip, fmt, ap);
3529 EXPORT_SYMBOL_GPL(trace_array_printk);
3532 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3533 * @tr: The trace array to initialize the buffers for
3535 * As trace_array_printk() only writes into instances, they are OK to
3536 * have in the kernel (unlike trace_printk()). This needs to be called
3537 * before trace_array_printk() can be used on a trace_array.
3539 int trace_array_init_printk(struct trace_array *tr)
3544 /* This is only allowed for created instances */
3545 if (tr == &global_trace)
3548 return alloc_percpu_trace_buffer();
3550 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3553 int trace_array_printk_buf(struct trace_buffer *buffer,
3554 unsigned long ip, const char *fmt, ...)
3559 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3563 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3569 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3571 return trace_array_vprintk(&global_trace, ip, fmt, args);
3573 EXPORT_SYMBOL_GPL(trace_vprintk);
3575 static void trace_iterator_increment(struct trace_iterator *iter)
3577 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3581 ring_buffer_iter_advance(buf_iter);
3584 static struct trace_entry *
3585 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3586 unsigned long *lost_events)
3588 struct ring_buffer_event *event;
3589 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3592 event = ring_buffer_iter_peek(buf_iter, ts);
3594 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3595 (unsigned long)-1 : 0;
3597 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3602 iter->ent_size = ring_buffer_event_length(event);
3603 return ring_buffer_event_data(event);
3609 static struct trace_entry *
3610 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3611 unsigned long *missing_events, u64 *ent_ts)
3613 struct trace_buffer *buffer = iter->array_buffer->buffer;
3614 struct trace_entry *ent, *next = NULL;
3615 unsigned long lost_events = 0, next_lost = 0;
3616 int cpu_file = iter->cpu_file;
3617 u64 next_ts = 0, ts;
3623 * If we are in a per_cpu trace file, don't bother by iterating over
3624 * all cpu and peek directly.
3626 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3627 if (ring_buffer_empty_cpu(buffer, cpu_file))
3629 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3631 *ent_cpu = cpu_file;
3636 for_each_tracing_cpu(cpu) {
3638 if (ring_buffer_empty_cpu(buffer, cpu))
3641 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3644 * Pick the entry with the smallest timestamp:
3646 if (ent && (!next || ts < next_ts)) {
3650 next_lost = lost_events;
3651 next_size = iter->ent_size;
3655 iter->ent_size = next_size;
3658 *ent_cpu = next_cpu;
3664 *missing_events = next_lost;
3669 #define STATIC_FMT_BUF_SIZE 128
3670 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3672 static char *trace_iter_expand_format(struct trace_iterator *iter)
3677 * iter->tr is NULL when used with tp_printk, which makes
3678 * this get called where it is not safe to call krealloc().
3680 if (!iter->tr || iter->fmt == static_fmt_buf)
3683 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3686 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3693 /* Returns true if the string is safe to dereference from an event */
3694 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3697 unsigned long addr = (unsigned long)str;
3698 struct trace_event *trace_event;
3699 struct trace_event_call *event;
3701 /* Ignore strings with no length */
3705 /* OK if part of the event data */
3706 if ((addr >= (unsigned long)iter->ent) &&
3707 (addr < (unsigned long)iter->ent + iter->ent_size))
3710 /* OK if part of the temp seq buffer */
3711 if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3712 (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3715 /* Core rodata can not be freed */
3716 if (is_kernel_rodata(addr))
3719 if (trace_is_tracepoint_string(str))
3723 * Now this could be a module event, referencing core module
3724 * data, which is OK.
3729 trace_event = ftrace_find_event(iter->ent->type);
3733 event = container_of(trace_event, struct trace_event_call, event);
3734 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3737 /* Would rather have rodata, but this will suffice */
3738 if (within_module_core(addr, event->module))
3744 static const char *show_buffer(struct trace_seq *s)
3746 struct seq_buf *seq = &s->seq;
3748 seq_buf_terminate(seq);
3753 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3755 static int test_can_verify_check(const char *fmt, ...)
3762 * The verifier is dependent on vsnprintf() modifies the va_list
3763 * passed to it, where it is sent as a reference. Some architectures
3764 * (like x86_32) passes it by value, which means that vsnprintf()
3765 * does not modify the va_list passed to it, and the verifier
3766 * would then need to be able to understand all the values that
3767 * vsnprintf can use. If it is passed by value, then the verifier
3771 vsnprintf(buf, 16, "%d", ap);
3772 ret = va_arg(ap, int);
3778 static void test_can_verify(void)
3780 if (!test_can_verify_check("%d %d", 0, 1)) {
3781 pr_info("trace event string verifier disabled\n");
3782 static_branch_inc(&trace_no_verify);
3787 * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3788 * @iter: The iterator that holds the seq buffer and the event being printed
3789 * @fmt: The format used to print the event
3790 * @ap: The va_list holding the data to print from @fmt.
3792 * This writes the data into the @iter->seq buffer using the data from
3793 * @fmt and @ap. If the format has a %s, then the source of the string
3794 * is examined to make sure it is safe to print, otherwise it will
3795 * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3798 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3801 const char *p = fmt;
3805 if (WARN_ON_ONCE(!fmt))
3808 if (static_branch_unlikely(&trace_no_verify))
3811 /* Don't bother checking when doing a ftrace_dump() */
3812 if (iter->fmt == static_fmt_buf)
3821 /* We only care about %s and variants */
3822 for (i = 0; p[i]; i++) {
3823 if (i + 1 >= iter->fmt_size) {
3825 * If we can't expand the copy buffer,
3828 if (!trace_iter_expand_format(iter))
3832 if (p[i] == '\\' && p[i+1]) {
3837 /* Need to test cases like %08.*s */
3838 for (j = 1; p[i+j]; j++) {
3839 if (isdigit(p[i+j]) ||
3842 if (p[i+j] == '*') {
3854 /* If no %s found then just print normally */
3858 /* Copy up to the %s, and print that */
3859 strncpy(iter->fmt, p, i);
3860 iter->fmt[i] = '\0';
3861 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3864 * If iter->seq is full, the above call no longer guarantees
3865 * that ap is in sync with fmt processing, and further calls
3866 * to va_arg() can return wrong positional arguments.
3868 * Ensure that ap is no longer used in this case.
3870 if (iter->seq.full) {
3876 len = va_arg(ap, int);
3878 /* The ap now points to the string data of the %s */
3879 str = va_arg(ap, const char *);
3882 * If you hit this warning, it is likely that the
3883 * trace event in question used %s on a string that
3884 * was saved at the time of the event, but may not be
3885 * around when the trace is read. Use __string(),
3886 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3887 * instead. See samples/trace_events/trace-events-sample.h
3890 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3891 "fmt: '%s' current_buffer: '%s'",
3892 fmt, show_buffer(&iter->seq))) {
3895 /* Try to safely read the string */
3897 if (len + 1 > iter->fmt_size)
3898 len = iter->fmt_size - 1;
3901 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3905 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3909 trace_seq_printf(&iter->seq, "(0x%px)", str);
3911 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3913 str = "[UNSAFE-MEMORY]";
3914 strcpy(iter->fmt, "%s");
3916 strncpy(iter->fmt, p + i, j + 1);
3917 iter->fmt[j+1] = '\0';
3920 trace_seq_printf(&iter->seq, iter->fmt, len, str);
3922 trace_seq_printf(&iter->seq, iter->fmt, str);
3928 trace_seq_vprintf(&iter->seq, p, ap);
3931 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3933 const char *p, *new_fmt;
3936 if (WARN_ON_ONCE(!fmt))
3939 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3943 new_fmt = q = iter->fmt;
3945 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3946 if (!trace_iter_expand_format(iter))
3949 q += iter->fmt - new_fmt;
3950 new_fmt = iter->fmt;
3955 /* Replace %p with %px */
3959 } else if (p[0] == 'p' && !isalnum(p[1])) {
3970 #define STATIC_TEMP_BUF_SIZE 128
3971 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3973 /* Find the next real entry, without updating the iterator itself */
3974 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3975 int *ent_cpu, u64 *ent_ts)
3977 /* __find_next_entry will reset ent_size */
3978 int ent_size = iter->ent_size;
3979 struct trace_entry *entry;
3982 * If called from ftrace_dump(), then the iter->temp buffer
3983 * will be the static_temp_buf and not created from kmalloc.
3984 * If the entry size is greater than the buffer, we can
3985 * not save it. Just return NULL in that case. This is only
3986 * used to add markers when two consecutive events' time
3987 * stamps have a large delta. See trace_print_lat_context()
3989 if (iter->temp == static_temp_buf &&
3990 STATIC_TEMP_BUF_SIZE < ent_size)
3994 * The __find_next_entry() may call peek_next_entry(), which may
3995 * call ring_buffer_peek() that may make the contents of iter->ent
3996 * undefined. Need to copy iter->ent now.
3998 if (iter->ent && iter->ent != iter->temp) {
3999 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4000 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4002 temp = kmalloc(iter->ent_size, GFP_KERNEL);
4007 iter->temp_size = iter->ent_size;
4009 memcpy(iter->temp, iter->ent, iter->ent_size);
4010 iter->ent = iter->temp;
4012 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4013 /* Put back the original ent_size */
4014 iter->ent_size = ent_size;
4019 /* Find the next real entry, and increment the iterator to the next entry */
4020 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4022 iter->ent = __find_next_entry(iter, &iter->cpu,
4023 &iter->lost_events, &iter->ts);
4026 trace_iterator_increment(iter);
4028 return iter->ent ? iter : NULL;
4031 static void trace_consume(struct trace_iterator *iter)
4033 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4034 &iter->lost_events);
4037 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4039 struct trace_iterator *iter = m->private;
4043 WARN_ON_ONCE(iter->leftover);
4047 /* can't go backwards */
4052 ent = trace_find_next_entry_inc(iter);
4056 while (ent && iter->idx < i)
4057 ent = trace_find_next_entry_inc(iter);
4064 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4066 struct ring_buffer_iter *buf_iter;
4067 unsigned long entries = 0;
4070 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4072 buf_iter = trace_buffer_iter(iter, cpu);
4076 ring_buffer_iter_reset(buf_iter);
4079 * We could have the case with the max latency tracers
4080 * that a reset never took place on a cpu. This is evident
4081 * by the timestamp being before the start of the buffer.
4083 while (ring_buffer_iter_peek(buf_iter, &ts)) {
4084 if (ts >= iter->array_buffer->time_start)
4087 ring_buffer_iter_advance(buf_iter);
4090 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4094 * The current tracer is copied to avoid a global locking
4097 static void *s_start(struct seq_file *m, loff_t *pos)
4099 struct trace_iterator *iter = m->private;
4100 struct trace_array *tr = iter->tr;
4101 int cpu_file = iter->cpu_file;
4107 * copy the tracer to avoid using a global lock all around.
4108 * iter->trace is a copy of current_trace, the pointer to the
4109 * name may be used instead of a strcmp(), as iter->trace->name
4110 * will point to the same string as current_trace->name.
4112 mutex_lock(&trace_types_lock);
4113 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4114 *iter->trace = *tr->current_trace;
4115 mutex_unlock(&trace_types_lock);
4117 #ifdef CONFIG_TRACER_MAX_TRACE
4118 if (iter->snapshot && iter->trace->use_max_tr)
4119 return ERR_PTR(-EBUSY);
4122 if (*pos != iter->pos) {
4127 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4128 for_each_tracing_cpu(cpu)
4129 tracing_iter_reset(iter, cpu);
4131 tracing_iter_reset(iter, cpu_file);
4134 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4139 * If we overflowed the seq_file before, then we want
4140 * to just reuse the trace_seq buffer again.
4146 p = s_next(m, p, &l);
4150 trace_event_read_lock();
4151 trace_access_lock(cpu_file);
4155 static void s_stop(struct seq_file *m, void *p)
4157 struct trace_iterator *iter = m->private;
4159 #ifdef CONFIG_TRACER_MAX_TRACE
4160 if (iter->snapshot && iter->trace->use_max_tr)
4164 trace_access_unlock(iter->cpu_file);
4165 trace_event_read_unlock();
4169 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4170 unsigned long *entries, int cpu)
4172 unsigned long count;
4174 count = ring_buffer_entries_cpu(buf->buffer, cpu);
4176 * If this buffer has skipped entries, then we hold all
4177 * entries for the trace and we need to ignore the
4178 * ones before the time stamp.
4180 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4181 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4182 /* total is the same as the entries */
4186 ring_buffer_overrun_cpu(buf->buffer, cpu);
4191 get_total_entries(struct array_buffer *buf,
4192 unsigned long *total, unsigned long *entries)
4200 for_each_tracing_cpu(cpu) {
4201 get_total_entries_cpu(buf, &t, &e, cpu);
4207 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4209 unsigned long total, entries;
4214 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4219 unsigned long trace_total_entries(struct trace_array *tr)
4221 unsigned long total, entries;
4226 get_total_entries(&tr->array_buffer, &total, &entries);
4231 static void print_lat_help_header(struct seq_file *m)
4233 seq_puts(m, "# _------=> CPU# \n"
4234 "# / _-----=> irqs-off/BH-disabled\n"
4235 "# | / _----=> need-resched \n"
4236 "# || / _---=> hardirq/softirq \n"
4237 "# ||| / _--=> preempt-depth \n"
4238 "# |||| / _-=> migrate-disable \n"
4239 "# ||||| / delay \n"
4240 "# cmd pid |||||| time | caller \n"
4241 "# \\ / |||||| \\ | / \n");
4244 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4246 unsigned long total;
4247 unsigned long entries;
4249 get_total_entries(buf, &total, &entries);
4250 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
4251 entries, total, num_online_cpus());
4255 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4258 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4260 print_event_info(buf, m);
4262 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
4263 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
4266 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4269 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4270 static const char space[] = " ";
4271 int prec = tgid ? 12 : 2;
4273 print_event_info(buf, m);
4275 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space);
4276 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
4277 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
4278 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
4279 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
4280 seq_printf(m, "# %.*s|||| / delay\n", prec, space);
4281 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
4282 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
4286 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4288 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4289 struct array_buffer *buf = iter->array_buffer;
4290 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4291 struct tracer *type = iter->trace;
4292 unsigned long entries;
4293 unsigned long total;
4294 const char *name = type->name;
4296 get_total_entries(buf, &total, &entries);
4298 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4300 seq_puts(m, "# -----------------------------------"
4301 "---------------------------------\n");
4302 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4303 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4304 nsecs_to_usecs(data->saved_latency),
4308 preempt_model_none() ? "server" :
4309 preempt_model_voluntary() ? "desktop" :
4310 preempt_model_full() ? "preempt" :
4311 preempt_model_rt() ? "preempt_rt" :
4313 /* These are reserved for later use */
4316 seq_printf(m, " #P:%d)\n", num_online_cpus());
4320 seq_puts(m, "# -----------------\n");
4321 seq_printf(m, "# | task: %.16s-%d "
4322 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4323 data->comm, data->pid,
4324 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4325 data->policy, data->rt_priority);
4326 seq_puts(m, "# -----------------\n");
4328 if (data->critical_start) {
4329 seq_puts(m, "# => started at: ");
4330 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4331 trace_print_seq(m, &iter->seq);
4332 seq_puts(m, "\n# => ended at: ");
4333 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4334 trace_print_seq(m, &iter->seq);
4335 seq_puts(m, "\n#\n");
4341 static void test_cpu_buff_start(struct trace_iterator *iter)
4343 struct trace_seq *s = &iter->seq;
4344 struct trace_array *tr = iter->tr;
4346 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4349 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4352 if (cpumask_available(iter->started) &&
4353 cpumask_test_cpu(iter->cpu, iter->started))
4356 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4359 if (cpumask_available(iter->started))
4360 cpumask_set_cpu(iter->cpu, iter->started);
4362 /* Don't print started cpu buffer for the first entry of the trace */
4364 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4368 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4370 struct trace_array *tr = iter->tr;
4371 struct trace_seq *s = &iter->seq;
4372 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4373 struct trace_entry *entry;
4374 struct trace_event *event;
4378 test_cpu_buff_start(iter);
4380 event = ftrace_find_event(entry->type);
4382 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4383 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4384 trace_print_lat_context(iter);
4386 trace_print_context(iter);
4389 if (trace_seq_has_overflowed(s))
4390 return TRACE_TYPE_PARTIAL_LINE;
4393 return event->funcs->trace(iter, sym_flags, event);
4395 trace_seq_printf(s, "Unknown type %d\n", entry->type);
4397 return trace_handle_return(s);
4400 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4402 struct trace_array *tr = iter->tr;
4403 struct trace_seq *s = &iter->seq;
4404 struct trace_entry *entry;
4405 struct trace_event *event;
4409 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4410 trace_seq_printf(s, "%d %d %llu ",
4411 entry->pid, iter->cpu, iter->ts);
4413 if (trace_seq_has_overflowed(s))
4414 return TRACE_TYPE_PARTIAL_LINE;
4416 event = ftrace_find_event(entry->type);
4418 return event->funcs->raw(iter, 0, event);
4420 trace_seq_printf(s, "%d ?\n", entry->type);
4422 return trace_handle_return(s);
4425 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4427 struct trace_array *tr = iter->tr;
4428 struct trace_seq *s = &iter->seq;
4429 unsigned char newline = '\n';
4430 struct trace_entry *entry;
4431 struct trace_event *event;
4435 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4436 SEQ_PUT_HEX_FIELD(s, entry->pid);
4437 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4438 SEQ_PUT_HEX_FIELD(s, iter->ts);
4439 if (trace_seq_has_overflowed(s))
4440 return TRACE_TYPE_PARTIAL_LINE;
4443 event = ftrace_find_event(entry->type);
4445 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4446 if (ret != TRACE_TYPE_HANDLED)
4450 SEQ_PUT_FIELD(s, newline);
4452 return trace_handle_return(s);
4455 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4457 struct trace_array *tr = iter->tr;
4458 struct trace_seq *s = &iter->seq;
4459 struct trace_entry *entry;
4460 struct trace_event *event;
4464 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4465 SEQ_PUT_FIELD(s, entry->pid);
4466 SEQ_PUT_FIELD(s, iter->cpu);
4467 SEQ_PUT_FIELD(s, iter->ts);
4468 if (trace_seq_has_overflowed(s))
4469 return TRACE_TYPE_PARTIAL_LINE;
4472 event = ftrace_find_event(entry->type);
4473 return event ? event->funcs->binary(iter, 0, event) :
4477 int trace_empty(struct trace_iterator *iter)
4479 struct ring_buffer_iter *buf_iter;
4482 /* If we are looking at one CPU buffer, only check that one */
4483 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4484 cpu = iter->cpu_file;
4485 buf_iter = trace_buffer_iter(iter, cpu);
4487 if (!ring_buffer_iter_empty(buf_iter))
4490 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4496 for_each_tracing_cpu(cpu) {
4497 buf_iter = trace_buffer_iter(iter, cpu);
4499 if (!ring_buffer_iter_empty(buf_iter))
4502 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4510 /* Called with trace_event_read_lock() held. */
4511 enum print_line_t print_trace_line(struct trace_iterator *iter)
4513 struct trace_array *tr = iter->tr;
4514 unsigned long trace_flags = tr->trace_flags;
4515 enum print_line_t ret;
4517 if (iter->lost_events) {
4518 if (iter->lost_events == (unsigned long)-1)
4519 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4522 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4523 iter->cpu, iter->lost_events);
4524 if (trace_seq_has_overflowed(&iter->seq))
4525 return TRACE_TYPE_PARTIAL_LINE;
4528 if (iter->trace && iter->trace->print_line) {
4529 ret = iter->trace->print_line(iter);
4530 if (ret != TRACE_TYPE_UNHANDLED)
4534 if (iter->ent->type == TRACE_BPUTS &&
4535 trace_flags & TRACE_ITER_PRINTK &&
4536 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4537 return trace_print_bputs_msg_only(iter);
4539 if (iter->ent->type == TRACE_BPRINT &&
4540 trace_flags & TRACE_ITER_PRINTK &&
4541 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4542 return trace_print_bprintk_msg_only(iter);
4544 if (iter->ent->type == TRACE_PRINT &&
4545 trace_flags & TRACE_ITER_PRINTK &&
4546 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4547 return trace_print_printk_msg_only(iter);
4549 if (trace_flags & TRACE_ITER_BIN)
4550 return print_bin_fmt(iter);
4552 if (trace_flags & TRACE_ITER_HEX)
4553 return print_hex_fmt(iter);
4555 if (trace_flags & TRACE_ITER_RAW)
4556 return print_raw_fmt(iter);
4558 return print_trace_fmt(iter);
4561 void trace_latency_header(struct seq_file *m)
4563 struct trace_iterator *iter = m->private;
4564 struct trace_array *tr = iter->tr;
4566 /* print nothing if the buffers are empty */
4567 if (trace_empty(iter))
4570 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4571 print_trace_header(m, iter);
4573 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4574 print_lat_help_header(m);
4577 void trace_default_header(struct seq_file *m)
4579 struct trace_iterator *iter = m->private;
4580 struct trace_array *tr = iter->tr;
4581 unsigned long trace_flags = tr->trace_flags;
4583 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4586 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4587 /* print nothing if the buffers are empty */
4588 if (trace_empty(iter))
4590 print_trace_header(m, iter);
4591 if (!(trace_flags & TRACE_ITER_VERBOSE))
4592 print_lat_help_header(m);
4594 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4595 if (trace_flags & TRACE_ITER_IRQ_INFO)
4596 print_func_help_header_irq(iter->array_buffer,
4599 print_func_help_header(iter->array_buffer, m,
4605 static void test_ftrace_alive(struct seq_file *m)
4607 if (!ftrace_is_dead())
4609 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4610 "# MAY BE MISSING FUNCTION EVENTS\n");
4613 #ifdef CONFIG_TRACER_MAX_TRACE
4614 static void show_snapshot_main_help(struct seq_file *m)
4616 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4617 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4618 "# Takes a snapshot of the main buffer.\n"
4619 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4620 "# (Doesn't have to be '2' works with any number that\n"
4621 "# is not a '0' or '1')\n");
4624 static void show_snapshot_percpu_help(struct seq_file *m)
4626 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4627 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4628 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4629 "# Takes a snapshot of the main buffer for this cpu.\n");
4631 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4632 "# Must use main snapshot file to allocate.\n");
4634 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4635 "# (Doesn't have to be '2' works with any number that\n"
4636 "# is not a '0' or '1')\n");
4639 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4641 if (iter->tr->allocated_snapshot)
4642 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4644 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4646 seq_puts(m, "# Snapshot commands:\n");
4647 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4648 show_snapshot_main_help(m);
4650 show_snapshot_percpu_help(m);
4653 /* Should never be called */
4654 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4657 static int s_show(struct seq_file *m, void *v)
4659 struct trace_iterator *iter = v;
4662 if (iter->ent == NULL) {
4664 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4666 test_ftrace_alive(m);
4668 if (iter->snapshot && trace_empty(iter))
4669 print_snapshot_help(m, iter);
4670 else if (iter->trace && iter->trace->print_header)
4671 iter->trace->print_header(m);
4673 trace_default_header(m);
4675 } else if (iter->leftover) {
4677 * If we filled the seq_file buffer earlier, we
4678 * want to just show it now.
4680 ret = trace_print_seq(m, &iter->seq);
4682 /* ret should this time be zero, but you never know */
4683 iter->leftover = ret;
4686 print_trace_line(iter);
4687 ret = trace_print_seq(m, &iter->seq);
4689 * If we overflow the seq_file buffer, then it will
4690 * ask us for this data again at start up.
4692 * ret is 0 if seq_file write succeeded.
4695 iter->leftover = ret;
4702 * Should be used after trace_array_get(), trace_types_lock
4703 * ensures that i_cdev was already initialized.
4705 static inline int tracing_get_cpu(struct inode *inode)
4707 if (inode->i_cdev) /* See trace_create_cpu_file() */
4708 return (long)inode->i_cdev - 1;
4709 return RING_BUFFER_ALL_CPUS;
4712 static const struct seq_operations tracer_seq_ops = {
4719 static struct trace_iterator *
4720 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4722 struct trace_array *tr = inode->i_private;
4723 struct trace_iterator *iter;
4726 if (tracing_disabled)
4727 return ERR_PTR(-ENODEV);
4729 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4731 return ERR_PTR(-ENOMEM);
4733 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4735 if (!iter->buffer_iter)
4739 * trace_find_next_entry() may need to save off iter->ent.
4740 * It will place it into the iter->temp buffer. As most
4741 * events are less than 128, allocate a buffer of that size.
4742 * If one is greater, then trace_find_next_entry() will
4743 * allocate a new buffer to adjust for the bigger iter->ent.
4744 * It's not critical if it fails to get allocated here.
4746 iter->temp = kmalloc(128, GFP_KERNEL);
4748 iter->temp_size = 128;
4751 * trace_event_printf() may need to modify given format
4752 * string to replace %p with %px so that it shows real address
4753 * instead of hash value. However, that is only for the event
4754 * tracing, other tracer may not need. Defer the allocation
4755 * until it is needed.
4761 * We make a copy of the current tracer to avoid concurrent
4762 * changes on it while we are reading.
4764 mutex_lock(&trace_types_lock);
4765 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4769 *iter->trace = *tr->current_trace;
4771 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4776 #ifdef CONFIG_TRACER_MAX_TRACE
4777 /* Currently only the top directory has a snapshot */
4778 if (tr->current_trace->print_max || snapshot)
4779 iter->array_buffer = &tr->max_buffer;
4782 iter->array_buffer = &tr->array_buffer;
4783 iter->snapshot = snapshot;
4785 iter->cpu_file = tracing_get_cpu(inode);
4786 mutex_init(&iter->mutex);
4788 /* Notify the tracer early; before we stop tracing. */
4789 if (iter->trace->open)
4790 iter->trace->open(iter);
4792 /* Annotate start of buffers if we had overruns */
4793 if (ring_buffer_overruns(iter->array_buffer->buffer))
4794 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4796 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4797 if (trace_clocks[tr->clock_id].in_ns)
4798 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4801 * If pause-on-trace is enabled, then stop the trace while
4802 * dumping, unless this is the "snapshot" file
4804 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4805 tracing_stop_tr(tr);
4807 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4808 for_each_tracing_cpu(cpu) {
4809 iter->buffer_iter[cpu] =
4810 ring_buffer_read_prepare(iter->array_buffer->buffer,
4813 ring_buffer_read_prepare_sync();
4814 for_each_tracing_cpu(cpu) {
4815 ring_buffer_read_start(iter->buffer_iter[cpu]);
4816 tracing_iter_reset(iter, cpu);
4819 cpu = iter->cpu_file;
4820 iter->buffer_iter[cpu] =
4821 ring_buffer_read_prepare(iter->array_buffer->buffer,
4823 ring_buffer_read_prepare_sync();
4824 ring_buffer_read_start(iter->buffer_iter[cpu]);
4825 tracing_iter_reset(iter, cpu);
4828 mutex_unlock(&trace_types_lock);
4833 mutex_unlock(&trace_types_lock);
4836 kfree(iter->buffer_iter);
4838 seq_release_private(inode, file);
4839 return ERR_PTR(-ENOMEM);
4842 int tracing_open_generic(struct inode *inode, struct file *filp)
4846 ret = tracing_check_open_get_tr(NULL);
4850 filp->private_data = inode->i_private;
4854 bool tracing_is_disabled(void)
4856 return (tracing_disabled) ? true: false;
4860 * Open and update trace_array ref count.
4861 * Must have the current trace_array passed to it.
4863 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4865 struct trace_array *tr = inode->i_private;
4868 ret = tracing_check_open_get_tr(tr);
4872 filp->private_data = inode->i_private;
4877 static int tracing_mark_open(struct inode *inode, struct file *filp)
4879 stream_open(inode, filp);
4880 return tracing_open_generic_tr(inode, filp);
4883 static int tracing_release(struct inode *inode, struct file *file)
4885 struct trace_array *tr = inode->i_private;
4886 struct seq_file *m = file->private_data;
4887 struct trace_iterator *iter;
4890 if (!(file->f_mode & FMODE_READ)) {
4891 trace_array_put(tr);
4895 /* Writes do not use seq_file */
4897 mutex_lock(&trace_types_lock);
4899 for_each_tracing_cpu(cpu) {
4900 if (iter->buffer_iter[cpu])
4901 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4904 if (iter->trace && iter->trace->close)
4905 iter->trace->close(iter);
4907 if (!iter->snapshot && tr->stop_count)
4908 /* reenable tracing if it was previously enabled */
4909 tracing_start_tr(tr);
4911 __trace_array_put(tr);
4913 mutex_unlock(&trace_types_lock);
4915 mutex_destroy(&iter->mutex);
4916 free_cpumask_var(iter->started);
4920 kfree(iter->buffer_iter);
4921 seq_release_private(inode, file);
4926 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4928 struct trace_array *tr = inode->i_private;
4930 trace_array_put(tr);
4934 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4936 struct trace_array *tr = inode->i_private;
4938 trace_array_put(tr);
4940 return single_release(inode, file);
4943 static int tracing_open(struct inode *inode, struct file *file)
4945 struct trace_array *tr = inode->i_private;
4946 struct trace_iterator *iter;
4949 ret = tracing_check_open_get_tr(tr);
4953 /* If this file was open for write, then erase contents */
4954 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4955 int cpu = tracing_get_cpu(inode);
4956 struct array_buffer *trace_buf = &tr->array_buffer;
4958 #ifdef CONFIG_TRACER_MAX_TRACE
4959 if (tr->current_trace->print_max)
4960 trace_buf = &tr->max_buffer;
4963 if (cpu == RING_BUFFER_ALL_CPUS)
4964 tracing_reset_online_cpus(trace_buf);
4966 tracing_reset_cpu(trace_buf, cpu);
4969 if (file->f_mode & FMODE_READ) {
4970 iter = __tracing_open(inode, file, false);
4972 ret = PTR_ERR(iter);
4973 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4974 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4978 trace_array_put(tr);
4984 * Some tracers are not suitable for instance buffers.
4985 * A tracer is always available for the global array (toplevel)
4986 * or if it explicitly states that it is.
4989 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4991 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4994 /* Find the next tracer that this trace array may use */
4995 static struct tracer *
4996 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4998 while (t && !trace_ok_for_array(t, tr))
5005 t_next(struct seq_file *m, void *v, loff_t *pos)
5007 struct trace_array *tr = m->private;
5008 struct tracer *t = v;
5013 t = get_tracer_for_array(tr, t->next);
5018 static void *t_start(struct seq_file *m, loff_t *pos)
5020 struct trace_array *tr = m->private;
5024 mutex_lock(&trace_types_lock);
5026 t = get_tracer_for_array(tr, trace_types);
5027 for (; t && l < *pos; t = t_next(m, t, &l))
5033 static void t_stop(struct seq_file *m, void *p)
5035 mutex_unlock(&trace_types_lock);
5038 static int t_show(struct seq_file *m, void *v)
5040 struct tracer *t = v;
5045 seq_puts(m, t->name);
5054 static const struct seq_operations show_traces_seq_ops = {
5061 static int show_traces_open(struct inode *inode, struct file *file)
5063 struct trace_array *tr = inode->i_private;
5067 ret = tracing_check_open_get_tr(tr);
5071 ret = seq_open(file, &show_traces_seq_ops);
5073 trace_array_put(tr);
5077 m = file->private_data;
5083 static int show_traces_release(struct inode *inode, struct file *file)
5085 struct trace_array *tr = inode->i_private;
5087 trace_array_put(tr);
5088 return seq_release(inode, file);
5092 tracing_write_stub(struct file *filp, const char __user *ubuf,
5093 size_t count, loff_t *ppos)
5098 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5102 if (file->f_mode & FMODE_READ)
5103 ret = seq_lseek(file, offset, whence);
5105 file->f_pos = ret = 0;
5110 static const struct file_operations tracing_fops = {
5111 .open = tracing_open,
5113 .write = tracing_write_stub,
5114 .llseek = tracing_lseek,
5115 .release = tracing_release,
5118 static const struct file_operations show_traces_fops = {
5119 .open = show_traces_open,
5121 .llseek = seq_lseek,
5122 .release = show_traces_release,
5126 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5127 size_t count, loff_t *ppos)
5129 struct trace_array *tr = file_inode(filp)->i_private;
5133 len = snprintf(NULL, 0, "%*pb\n",
5134 cpumask_pr_args(tr->tracing_cpumask)) + 1;
5135 mask_str = kmalloc(len, GFP_KERNEL);
5139 len = snprintf(mask_str, len, "%*pb\n",
5140 cpumask_pr_args(tr->tracing_cpumask));
5145 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5153 int tracing_set_cpumask(struct trace_array *tr,
5154 cpumask_var_t tracing_cpumask_new)
5161 local_irq_disable();
5162 arch_spin_lock(&tr->max_lock);
5163 for_each_tracing_cpu(cpu) {
5165 * Increase/decrease the disabled counter if we are
5166 * about to flip a bit in the cpumask:
5168 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5169 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5170 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5171 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5173 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5174 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5175 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5176 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5179 arch_spin_unlock(&tr->max_lock);
5182 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5188 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5189 size_t count, loff_t *ppos)
5191 struct trace_array *tr = file_inode(filp)->i_private;
5192 cpumask_var_t tracing_cpumask_new;
5195 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5198 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5202 err = tracing_set_cpumask(tr, tracing_cpumask_new);
5206 free_cpumask_var(tracing_cpumask_new);
5211 free_cpumask_var(tracing_cpumask_new);
5216 static const struct file_operations tracing_cpumask_fops = {
5217 .open = tracing_open_generic_tr,
5218 .read = tracing_cpumask_read,
5219 .write = tracing_cpumask_write,
5220 .release = tracing_release_generic_tr,
5221 .llseek = generic_file_llseek,
5224 static int tracing_trace_options_show(struct seq_file *m, void *v)
5226 struct tracer_opt *trace_opts;
5227 struct trace_array *tr = m->private;
5231 mutex_lock(&trace_types_lock);
5232 tracer_flags = tr->current_trace->flags->val;
5233 trace_opts = tr->current_trace->flags->opts;
5235 for (i = 0; trace_options[i]; i++) {
5236 if (tr->trace_flags & (1 << i))
5237 seq_printf(m, "%s\n", trace_options[i]);
5239 seq_printf(m, "no%s\n", trace_options[i]);
5242 for (i = 0; trace_opts[i].name; i++) {
5243 if (tracer_flags & trace_opts[i].bit)
5244 seq_printf(m, "%s\n", trace_opts[i].name);
5246 seq_printf(m, "no%s\n", trace_opts[i].name);
5248 mutex_unlock(&trace_types_lock);
5253 static int __set_tracer_option(struct trace_array *tr,
5254 struct tracer_flags *tracer_flags,
5255 struct tracer_opt *opts, int neg)
5257 struct tracer *trace = tracer_flags->trace;
5260 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5265 tracer_flags->val &= ~opts->bit;
5267 tracer_flags->val |= opts->bit;
5271 /* Try to assign a tracer specific option */
5272 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5274 struct tracer *trace = tr->current_trace;
5275 struct tracer_flags *tracer_flags = trace->flags;
5276 struct tracer_opt *opts = NULL;
5279 for (i = 0; tracer_flags->opts[i].name; i++) {
5280 opts = &tracer_flags->opts[i];
5282 if (strcmp(cmp, opts->name) == 0)
5283 return __set_tracer_option(tr, trace->flags, opts, neg);
5289 /* Some tracers require overwrite to stay enabled */
5290 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5292 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5298 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5302 if ((mask == TRACE_ITER_RECORD_TGID) ||
5303 (mask == TRACE_ITER_RECORD_CMD))
5304 lockdep_assert_held(&event_mutex);
5306 /* do nothing if flag is already set */
5307 if (!!(tr->trace_flags & mask) == !!enabled)
5310 /* Give the tracer a chance to approve the change */
5311 if (tr->current_trace->flag_changed)
5312 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5316 tr->trace_flags |= mask;
5318 tr->trace_flags &= ~mask;
5320 if (mask == TRACE_ITER_RECORD_CMD)
5321 trace_event_enable_cmd_record(enabled);
5323 if (mask == TRACE_ITER_RECORD_TGID) {
5325 tgid_map_max = pid_max;
5326 map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5330 * Pairs with smp_load_acquire() in
5331 * trace_find_tgid_ptr() to ensure that if it observes
5332 * the tgid_map we just allocated then it also observes
5333 * the corresponding tgid_map_max value.
5335 smp_store_release(&tgid_map, map);
5338 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5342 trace_event_enable_tgid_record(enabled);
5345 if (mask == TRACE_ITER_EVENT_FORK)
5346 trace_event_follow_fork(tr, enabled);
5348 if (mask == TRACE_ITER_FUNC_FORK)
5349 ftrace_pid_follow_fork(tr, enabled);
5351 if (mask == TRACE_ITER_OVERWRITE) {
5352 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5353 #ifdef CONFIG_TRACER_MAX_TRACE
5354 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5358 if (mask == TRACE_ITER_PRINTK) {
5359 trace_printk_start_stop_comm(enabled);
5360 trace_printk_control(enabled);
5366 int trace_set_options(struct trace_array *tr, char *option)
5371 size_t orig_len = strlen(option);
5374 cmp = strstrip(option);
5376 len = str_has_prefix(cmp, "no");
5382 mutex_lock(&event_mutex);
5383 mutex_lock(&trace_types_lock);
5385 ret = match_string(trace_options, -1, cmp);
5386 /* If no option could be set, test the specific tracer options */
5388 ret = set_tracer_option(tr, cmp, neg);
5390 ret = set_tracer_flag(tr, 1 << ret, !neg);
5392 mutex_unlock(&trace_types_lock);
5393 mutex_unlock(&event_mutex);
5396 * If the first trailing whitespace is replaced with '\0' by strstrip,
5397 * turn it back into a space.
5399 if (orig_len > strlen(option))
5400 option[strlen(option)] = ' ';
5405 static void __init apply_trace_boot_options(void)
5407 char *buf = trace_boot_options_buf;
5411 option = strsep(&buf, ",");
5417 trace_set_options(&global_trace, option);
5419 /* Put back the comma to allow this to be called again */
5426 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5427 size_t cnt, loff_t *ppos)
5429 struct seq_file *m = filp->private_data;
5430 struct trace_array *tr = m->private;
5434 if (cnt >= sizeof(buf))
5437 if (copy_from_user(buf, ubuf, cnt))
5442 ret = trace_set_options(tr, buf);
5451 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5453 struct trace_array *tr = inode->i_private;
5456 ret = tracing_check_open_get_tr(tr);
5460 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5462 trace_array_put(tr);
5467 static const struct file_operations tracing_iter_fops = {
5468 .open = tracing_trace_options_open,
5470 .llseek = seq_lseek,
5471 .release = tracing_single_release_tr,
5472 .write = tracing_trace_options_write,
5475 static const char readme_msg[] =
5476 "tracing mini-HOWTO:\n\n"
5477 "# echo 0 > tracing_on : quick way to disable tracing\n"
5478 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5479 " Important files:\n"
5480 " trace\t\t\t- The static contents of the buffer\n"
5481 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5482 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5483 " current_tracer\t- function and latency tracers\n"
5484 " available_tracers\t- list of configured tracers for current_tracer\n"
5485 " error_log\t- error log for failed commands (that support it)\n"
5486 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5487 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5488 " trace_clock\t\t- change the clock used to order events\n"
5489 " local: Per cpu clock but may not be synced across CPUs\n"
5490 " global: Synced across CPUs but slows tracing down.\n"
5491 " counter: Not a clock, but just an increment\n"
5492 " uptime: Jiffy counter from time of boot\n"
5493 " perf: Same clock that perf events use\n"
5494 #ifdef CONFIG_X86_64
5495 " x86-tsc: TSC cycle counter\n"
5497 "\n timestamp_mode\t- view the mode used to timestamp events\n"
5498 " delta: Delta difference against a buffer-wide timestamp\n"
5499 " absolute: Absolute (standalone) timestamp\n"
5500 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5501 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5502 " tracing_cpumask\t- Limit which CPUs to trace\n"
5503 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5504 "\t\t\t Remove sub-buffer with rmdir\n"
5505 " trace_options\t\t- Set format or modify how tracing happens\n"
5506 "\t\t\t Disable an option by prefixing 'no' to the\n"
5507 "\t\t\t option name\n"
5508 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5509 #ifdef CONFIG_DYNAMIC_FTRACE
5510 "\n available_filter_functions - list of functions that can be filtered on\n"
5511 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5512 "\t\t\t functions\n"
5513 "\t accepts: func_full_name or glob-matching-pattern\n"
5514 "\t modules: Can select a group via module\n"
5515 "\t Format: :mod:<module-name>\n"
5516 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5517 "\t triggers: a command to perform when function is hit\n"
5518 "\t Format: <function>:<trigger>[:count]\n"
5519 "\t trigger: traceon, traceoff\n"
5520 "\t\t enable_event:<system>:<event>\n"
5521 "\t\t disable_event:<system>:<event>\n"
5522 #ifdef CONFIG_STACKTRACE
5525 #ifdef CONFIG_TRACER_SNAPSHOT
5530 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5531 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5532 "\t The first one will disable tracing every time do_fault is hit\n"
5533 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5534 "\t The first time do trap is hit and it disables tracing, the\n"
5535 "\t counter will decrement to 2. If tracing is already disabled,\n"
5536 "\t the counter will not decrement. It only decrements when the\n"
5537 "\t trigger did work\n"
5538 "\t To remove trigger without count:\n"
5539 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5540 "\t To remove trigger with a count:\n"
5541 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5542 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5543 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5544 "\t modules: Can select a group via module command :mod:\n"
5545 "\t Does not accept triggers\n"
5546 #endif /* CONFIG_DYNAMIC_FTRACE */
5547 #ifdef CONFIG_FUNCTION_TRACER
5548 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5550 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5553 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5554 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5555 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5556 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5558 #ifdef CONFIG_TRACER_SNAPSHOT
5559 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5560 "\t\t\t snapshot buffer. Read the contents for more\n"
5561 "\t\t\t information\n"
5563 #ifdef CONFIG_STACK_TRACER
5564 " stack_trace\t\t- Shows the max stack trace when active\n"
5565 " stack_max_size\t- Shows current max stack size that was traced\n"
5566 "\t\t\t Write into this file to reset the max size (trigger a\n"
5567 "\t\t\t new trace)\n"
5568 #ifdef CONFIG_DYNAMIC_FTRACE
5569 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5572 #endif /* CONFIG_STACK_TRACER */
5573 #ifdef CONFIG_DYNAMIC_EVENTS
5574 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5575 "\t\t\t Write into this file to define/undefine new trace events.\n"
5577 #ifdef CONFIG_KPROBE_EVENTS
5578 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5579 "\t\t\t Write into this file to define/undefine new trace events.\n"
5581 #ifdef CONFIG_UPROBE_EVENTS
5582 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5583 "\t\t\t Write into this file to define/undefine new trace events.\n"
5585 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5586 "\t accepts: event-definitions (one definition per line)\n"
5587 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5588 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5589 #ifdef CONFIG_HIST_TRIGGERS
5590 "\t s:[synthetic/]<event> <field> [<field>]\n"
5592 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]\n"
5593 "\t -:[<group>/][<event>]\n"
5594 #ifdef CONFIG_KPROBE_EVENTS
5595 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5596 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5598 #ifdef CONFIG_UPROBE_EVENTS
5599 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5601 "\t args: <name>=fetcharg[:type]\n"
5602 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5603 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5604 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5606 "\t $stack<index>, $stack, $retval, $comm,\n"
5608 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5609 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5610 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5611 "\t <type>\\[<array-size>\\]\n"
5612 #ifdef CONFIG_HIST_TRIGGERS
5613 "\t field: <stype> <name>;\n"
5614 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5615 "\t [unsigned] char/int/long\n"
5617 "\t efield: For event probes ('e' types), the field is on of the fields\n"
5618 "\t of the <attached-group>/<attached-event>.\n"
5620 " events/\t\t- Directory containing all trace event subsystems:\n"
5621 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5622 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5623 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5625 " filter\t\t- If set, only events passing filter are traced\n"
5626 " events/<system>/<event>/\t- Directory containing control files for\n"
5628 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5629 " filter\t\t- If set, only events passing filter are traced\n"
5630 " trigger\t\t- If set, a command to perform when event is hit\n"
5631 "\t Format: <trigger>[:count][if <filter>]\n"
5632 "\t trigger: traceon, traceoff\n"
5633 "\t enable_event:<system>:<event>\n"
5634 "\t disable_event:<system>:<event>\n"
5635 #ifdef CONFIG_HIST_TRIGGERS
5636 "\t enable_hist:<system>:<event>\n"
5637 "\t disable_hist:<system>:<event>\n"
5639 #ifdef CONFIG_STACKTRACE
5642 #ifdef CONFIG_TRACER_SNAPSHOT
5645 #ifdef CONFIG_HIST_TRIGGERS
5646 "\t\t hist (see below)\n"
5648 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5649 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5650 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5651 "\t events/block/block_unplug/trigger\n"
5652 "\t The first disables tracing every time block_unplug is hit.\n"
5653 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5654 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5655 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5656 "\t Like function triggers, the counter is only decremented if it\n"
5657 "\t enabled or disabled tracing.\n"
5658 "\t To remove a trigger without a count:\n"
5659 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5660 "\t To remove a trigger with a count:\n"
5661 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5662 "\t Filters can be ignored when removing a trigger.\n"
5663 #ifdef CONFIG_HIST_TRIGGERS
5664 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5665 "\t Format: hist:keys=<field1[,field2,...]>\n"
5666 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5667 "\t [:values=<field1[,field2,...]>]\n"
5668 "\t [:sort=<field1[,field2,...]>]\n"
5669 "\t [:size=#entries]\n"
5670 "\t [:pause][:continue][:clear]\n"
5671 "\t [:name=histname1]\n"
5672 "\t [:<handler>.<action>]\n"
5673 "\t [if <filter>]\n\n"
5674 "\t Note, special fields can be used as well:\n"
5675 "\t common_timestamp - to record current timestamp\n"
5676 "\t common_cpu - to record the CPU the event happened on\n"
5678 "\t A hist trigger variable can be:\n"
5679 "\t - a reference to a field e.g. x=current_timestamp,\n"
5680 "\t - a reference to another variable e.g. y=$x,\n"
5681 "\t - a numeric literal: e.g. ms_per_sec=1000,\n"
5682 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5684 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5685 "\t multiplication(*) and division(/) operators. An operand can be either a\n"
5686 "\t variable reference, field or numeric literal.\n"
5688 "\t When a matching event is hit, an entry is added to a hash\n"
5689 "\t table using the key(s) and value(s) named, and the value of a\n"
5690 "\t sum called 'hitcount' is incremented. Keys and values\n"
5691 "\t correspond to fields in the event's format description. Keys\n"
5692 "\t can be any field, or the special string 'stacktrace'.\n"
5693 "\t Compound keys consisting of up to two fields can be specified\n"
5694 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5695 "\t fields. Sort keys consisting of up to two fields can be\n"
5696 "\t specified using the 'sort' keyword. The sort direction can\n"
5697 "\t be modified by appending '.descending' or '.ascending' to a\n"
5698 "\t sort field. The 'size' parameter can be used to specify more\n"
5699 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5700 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5701 "\t its histogram data will be shared with other triggers of the\n"
5702 "\t same name, and trigger hits will update this common data.\n\n"
5703 "\t Reading the 'hist' file for the event will dump the hash\n"
5704 "\t table in its entirety to stdout. If there are multiple hist\n"
5705 "\t triggers attached to an event, there will be a table for each\n"
5706 "\t trigger in the output. The table displayed for a named\n"
5707 "\t trigger will be the same as any other instance having the\n"
5708 "\t same name. The default format used to display a given field\n"
5709 "\t can be modified by appending any of the following modifiers\n"
5710 "\t to the field name, as applicable:\n\n"
5711 "\t .hex display a number as a hex value\n"
5712 "\t .sym display an address as a symbol\n"
5713 "\t .sym-offset display an address as a symbol and offset\n"
5714 "\t .execname display a common_pid as a program name\n"
5715 "\t .syscall display a syscall id as a syscall name\n"
5716 "\t .log2 display log2 value rather than raw number\n"
5717 "\t .buckets=size display values in groups of size rather than raw number\n"
5718 "\t .usecs display a common_timestamp in microseconds\n\n"
5719 "\t The 'pause' parameter can be used to pause an existing hist\n"
5720 "\t trigger or to start a hist trigger but not log any events\n"
5721 "\t until told to do so. 'continue' can be used to start or\n"
5722 "\t restart a paused hist trigger.\n\n"
5723 "\t The 'clear' parameter will clear the contents of a running\n"
5724 "\t hist trigger and leave its current paused/active state\n"
5726 "\t The enable_hist and disable_hist triggers can be used to\n"
5727 "\t have one event conditionally start and stop another event's\n"
5728 "\t already-attached hist trigger. The syntax is analogous to\n"
5729 "\t the enable_event and disable_event triggers.\n\n"
5730 "\t Hist trigger handlers and actions are executed whenever a\n"
5731 "\t a histogram entry is added or updated. They take the form:\n\n"
5732 "\t <handler>.<action>\n\n"
5733 "\t The available handlers are:\n\n"
5734 "\t onmatch(matching.event) - invoke on addition or update\n"
5735 "\t onmax(var) - invoke if var exceeds current max\n"
5736 "\t onchange(var) - invoke action if var changes\n\n"
5737 "\t The available actions are:\n\n"
5738 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5739 "\t save(field,...) - save current event fields\n"
5740 #ifdef CONFIG_TRACER_SNAPSHOT
5741 "\t snapshot() - snapshot the trace buffer\n\n"
5743 #ifdef CONFIG_SYNTH_EVENTS
5744 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5745 "\t Write into this file to define/undefine new synthetic events.\n"
5746 "\t example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5752 tracing_readme_read(struct file *filp, char __user *ubuf,
5753 size_t cnt, loff_t *ppos)
5755 return simple_read_from_buffer(ubuf, cnt, ppos,
5756 readme_msg, strlen(readme_msg));
5759 static const struct file_operations tracing_readme_fops = {
5760 .open = tracing_open_generic,
5761 .read = tracing_readme_read,
5762 .llseek = generic_file_llseek,
5765 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5769 return trace_find_tgid_ptr(pid);
5772 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5776 return trace_find_tgid_ptr(pid);
5779 static void saved_tgids_stop(struct seq_file *m, void *v)
5783 static int saved_tgids_show(struct seq_file *m, void *v)
5785 int *entry = (int *)v;
5786 int pid = entry - tgid_map;
5792 seq_printf(m, "%d %d\n", pid, tgid);
5796 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5797 .start = saved_tgids_start,
5798 .stop = saved_tgids_stop,
5799 .next = saved_tgids_next,
5800 .show = saved_tgids_show,
5803 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5807 ret = tracing_check_open_get_tr(NULL);
5811 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5815 static const struct file_operations tracing_saved_tgids_fops = {
5816 .open = tracing_saved_tgids_open,
5818 .llseek = seq_lseek,
5819 .release = seq_release,
5822 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5824 unsigned int *ptr = v;
5826 if (*pos || m->count)
5831 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5833 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5842 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5848 arch_spin_lock(&trace_cmdline_lock);
5850 v = &savedcmd->map_cmdline_to_pid[0];
5852 v = saved_cmdlines_next(m, v, &l);
5860 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5862 arch_spin_unlock(&trace_cmdline_lock);
5866 static int saved_cmdlines_show(struct seq_file *m, void *v)
5868 char buf[TASK_COMM_LEN];
5869 unsigned int *pid = v;
5871 __trace_find_cmdline(*pid, buf);
5872 seq_printf(m, "%d %s\n", *pid, buf);
5876 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5877 .start = saved_cmdlines_start,
5878 .next = saved_cmdlines_next,
5879 .stop = saved_cmdlines_stop,
5880 .show = saved_cmdlines_show,
5883 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5887 ret = tracing_check_open_get_tr(NULL);
5891 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5894 static const struct file_operations tracing_saved_cmdlines_fops = {
5895 .open = tracing_saved_cmdlines_open,
5897 .llseek = seq_lseek,
5898 .release = seq_release,
5902 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5903 size_t cnt, loff_t *ppos)
5909 arch_spin_lock(&trace_cmdline_lock);
5910 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5911 arch_spin_unlock(&trace_cmdline_lock);
5914 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5917 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5919 kfree(s->saved_cmdlines);
5920 kfree(s->map_cmdline_to_pid);
5924 static int tracing_resize_saved_cmdlines(unsigned int val)
5926 struct saved_cmdlines_buffer *s, *savedcmd_temp;
5928 s = kmalloc(sizeof(*s), GFP_KERNEL);
5932 if (allocate_cmdlines_buffer(val, s) < 0) {
5938 arch_spin_lock(&trace_cmdline_lock);
5939 savedcmd_temp = savedcmd;
5941 arch_spin_unlock(&trace_cmdline_lock);
5943 free_saved_cmdlines_buffer(savedcmd_temp);
5949 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5950 size_t cnt, loff_t *ppos)
5955 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5959 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5960 if (!val || val > PID_MAX_DEFAULT)
5963 ret = tracing_resize_saved_cmdlines((unsigned int)val);
5972 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5973 .open = tracing_open_generic,
5974 .read = tracing_saved_cmdlines_size_read,
5975 .write = tracing_saved_cmdlines_size_write,
5978 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5979 static union trace_eval_map_item *
5980 update_eval_map(union trace_eval_map_item *ptr)
5982 if (!ptr->map.eval_string) {
5983 if (ptr->tail.next) {
5984 ptr = ptr->tail.next;
5985 /* Set ptr to the next real item (skip head) */
5993 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5995 union trace_eval_map_item *ptr = v;
5998 * Paranoid! If ptr points to end, we don't want to increment past it.
5999 * This really should never happen.
6002 ptr = update_eval_map(ptr);
6003 if (WARN_ON_ONCE(!ptr))
6007 ptr = update_eval_map(ptr);
6012 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6014 union trace_eval_map_item *v;
6017 mutex_lock(&trace_eval_mutex);
6019 v = trace_eval_maps;
6023 while (v && l < *pos) {
6024 v = eval_map_next(m, v, &l);
6030 static void eval_map_stop(struct seq_file *m, void *v)
6032 mutex_unlock(&trace_eval_mutex);
6035 static int eval_map_show(struct seq_file *m, void *v)
6037 union trace_eval_map_item *ptr = v;
6039 seq_printf(m, "%s %ld (%s)\n",
6040 ptr->map.eval_string, ptr->map.eval_value,
6046 static const struct seq_operations tracing_eval_map_seq_ops = {
6047 .start = eval_map_start,
6048 .next = eval_map_next,
6049 .stop = eval_map_stop,
6050 .show = eval_map_show,
6053 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6057 ret = tracing_check_open_get_tr(NULL);
6061 return seq_open(filp, &tracing_eval_map_seq_ops);
6064 static const struct file_operations tracing_eval_map_fops = {
6065 .open = tracing_eval_map_open,
6067 .llseek = seq_lseek,
6068 .release = seq_release,
6071 static inline union trace_eval_map_item *
6072 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6074 /* Return tail of array given the head */
6075 return ptr + ptr->head.length + 1;
6079 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6082 struct trace_eval_map **stop;
6083 struct trace_eval_map **map;
6084 union trace_eval_map_item *map_array;
6085 union trace_eval_map_item *ptr;
6090 * The trace_eval_maps contains the map plus a head and tail item,
6091 * where the head holds the module and length of array, and the
6092 * tail holds a pointer to the next list.
6094 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6096 pr_warn("Unable to allocate trace eval mapping\n");
6100 mutex_lock(&trace_eval_mutex);
6102 if (!trace_eval_maps)
6103 trace_eval_maps = map_array;
6105 ptr = trace_eval_maps;
6107 ptr = trace_eval_jmp_to_tail(ptr);
6108 if (!ptr->tail.next)
6110 ptr = ptr->tail.next;
6113 ptr->tail.next = map_array;
6115 map_array->head.mod = mod;
6116 map_array->head.length = len;
6119 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6120 map_array->map = **map;
6123 memset(map_array, 0, sizeof(*map_array));
6125 mutex_unlock(&trace_eval_mutex);
6128 static void trace_create_eval_file(struct dentry *d_tracer)
6130 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6131 NULL, &tracing_eval_map_fops);
6134 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6135 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6136 static inline void trace_insert_eval_map_file(struct module *mod,
6137 struct trace_eval_map **start, int len) { }
6138 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6140 static void trace_insert_eval_map(struct module *mod,
6141 struct trace_eval_map **start, int len)
6143 struct trace_eval_map **map;
6150 trace_event_eval_update(map, len);
6152 trace_insert_eval_map_file(mod, start, len);
6156 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6157 size_t cnt, loff_t *ppos)
6159 struct trace_array *tr = filp->private_data;
6160 char buf[MAX_TRACER_SIZE+2];
6163 mutex_lock(&trace_types_lock);
6164 r = sprintf(buf, "%s\n", tr->current_trace->name);
6165 mutex_unlock(&trace_types_lock);
6167 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6170 int tracer_init(struct tracer *t, struct trace_array *tr)
6172 tracing_reset_online_cpus(&tr->array_buffer);
6176 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6180 for_each_tracing_cpu(cpu)
6181 per_cpu_ptr(buf->data, cpu)->entries = val;
6184 #ifdef CONFIG_TRACER_MAX_TRACE
6185 /* resize @tr's buffer to the size of @size_tr's entries */
6186 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6187 struct array_buffer *size_buf, int cpu_id)
6191 if (cpu_id == RING_BUFFER_ALL_CPUS) {
6192 for_each_tracing_cpu(cpu) {
6193 ret = ring_buffer_resize(trace_buf->buffer,
6194 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6197 per_cpu_ptr(trace_buf->data, cpu)->entries =
6198 per_cpu_ptr(size_buf->data, cpu)->entries;
6201 ret = ring_buffer_resize(trace_buf->buffer,
6202 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6204 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6205 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6210 #endif /* CONFIG_TRACER_MAX_TRACE */
6212 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6213 unsigned long size, int cpu)
6218 * If kernel or user changes the size of the ring buffer
6219 * we use the size that was given, and we can forget about
6220 * expanding it later.
6222 ring_buffer_expanded = true;
6224 /* May be called before buffers are initialized */
6225 if (!tr->array_buffer.buffer)
6228 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6232 #ifdef CONFIG_TRACER_MAX_TRACE
6233 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6234 !tr->current_trace->use_max_tr)
6237 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6239 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6240 &tr->array_buffer, cpu);
6243 * AARGH! We are left with different
6244 * size max buffer!!!!
6245 * The max buffer is our "snapshot" buffer.
6246 * When a tracer needs a snapshot (one of the
6247 * latency tracers), it swaps the max buffer
6248 * with the saved snap shot. We succeeded to
6249 * update the size of the main buffer, but failed to
6250 * update the size of the max buffer. But when we tried
6251 * to reset the main buffer to the original size, we
6252 * failed there too. This is very unlikely to
6253 * happen, but if it does, warn and kill all
6257 tracing_disabled = 1;
6262 if (cpu == RING_BUFFER_ALL_CPUS)
6263 set_buffer_entries(&tr->max_buffer, size);
6265 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6268 #endif /* CONFIG_TRACER_MAX_TRACE */
6270 if (cpu == RING_BUFFER_ALL_CPUS)
6271 set_buffer_entries(&tr->array_buffer, size);
6273 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6278 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6279 unsigned long size, int cpu_id)
6283 mutex_lock(&trace_types_lock);
6285 if (cpu_id != RING_BUFFER_ALL_CPUS) {
6286 /* make sure, this cpu is enabled in the mask */
6287 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6293 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6298 mutex_unlock(&trace_types_lock);
6305 * tracing_update_buffers - used by tracing facility to expand ring buffers
6307 * To save on memory when the tracing is never used on a system with it
6308 * configured in. The ring buffers are set to a minimum size. But once
6309 * a user starts to use the tracing facility, then they need to grow
6310 * to their default size.
6312 * This function is to be called when a tracer is about to be used.
6314 int tracing_update_buffers(void)
6318 mutex_lock(&trace_types_lock);
6319 if (!ring_buffer_expanded)
6320 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6321 RING_BUFFER_ALL_CPUS);
6322 mutex_unlock(&trace_types_lock);
6327 struct trace_option_dentry;
6330 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6333 * Used to clear out the tracer before deletion of an instance.
6334 * Must have trace_types_lock held.
6336 static void tracing_set_nop(struct trace_array *tr)
6338 if (tr->current_trace == &nop_trace)
6341 tr->current_trace->enabled--;
6343 if (tr->current_trace->reset)
6344 tr->current_trace->reset(tr);
6346 tr->current_trace = &nop_trace;
6349 static bool tracer_options_updated;
6351 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6353 /* Only enable if the directory has been created already. */
6357 /* Only create trace option files after update_tracer_options finish */
6358 if (!tracer_options_updated)
6361 create_trace_option_files(tr, t);
6364 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6367 #ifdef CONFIG_TRACER_MAX_TRACE
6372 mutex_lock(&trace_types_lock);
6374 if (!ring_buffer_expanded) {
6375 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6376 RING_BUFFER_ALL_CPUS);
6382 for (t = trace_types; t; t = t->next) {
6383 if (strcmp(t->name, buf) == 0)
6390 if (t == tr->current_trace)
6393 #ifdef CONFIG_TRACER_SNAPSHOT
6394 if (t->use_max_tr) {
6395 local_irq_disable();
6396 arch_spin_lock(&tr->max_lock);
6397 if (tr->cond_snapshot)
6399 arch_spin_unlock(&tr->max_lock);
6405 /* Some tracers won't work on kernel command line */
6406 if (system_state < SYSTEM_RUNNING && t->noboot) {
6407 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6412 /* Some tracers are only allowed for the top level buffer */
6413 if (!trace_ok_for_array(t, tr)) {
6418 /* If trace pipe files are being read, we can't change the tracer */
6419 if (tr->trace_ref) {
6424 trace_branch_disable();
6426 tr->current_trace->enabled--;
6428 if (tr->current_trace->reset)
6429 tr->current_trace->reset(tr);
6431 #ifdef CONFIG_TRACER_MAX_TRACE
6432 had_max_tr = tr->current_trace->use_max_tr;
6434 /* Current trace needs to be nop_trace before synchronize_rcu */
6435 tr->current_trace = &nop_trace;
6437 if (had_max_tr && !t->use_max_tr) {
6439 * We need to make sure that the update_max_tr sees that
6440 * current_trace changed to nop_trace to keep it from
6441 * swapping the buffers after we resize it.
6442 * The update_max_tr is called from interrupts disabled
6443 * so a synchronized_sched() is sufficient.
6449 if (t->use_max_tr && !tr->allocated_snapshot) {
6450 ret = tracing_alloc_snapshot_instance(tr);
6455 tr->current_trace = &nop_trace;
6459 ret = tracer_init(t, tr);
6464 tr->current_trace = t;
6465 tr->current_trace->enabled++;
6466 trace_branch_enable(tr);
6468 mutex_unlock(&trace_types_lock);
6474 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6475 size_t cnt, loff_t *ppos)
6477 struct trace_array *tr = filp->private_data;
6478 char buf[MAX_TRACER_SIZE+1];
6485 if (cnt > MAX_TRACER_SIZE)
6486 cnt = MAX_TRACER_SIZE;
6488 if (copy_from_user(buf, ubuf, cnt))
6495 err = tracing_set_tracer(tr, name);
6505 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6506 size_t cnt, loff_t *ppos)
6511 r = snprintf(buf, sizeof(buf), "%ld\n",
6512 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6513 if (r > sizeof(buf))
6515 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6519 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6520 size_t cnt, loff_t *ppos)
6525 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6535 tracing_thresh_read(struct file *filp, char __user *ubuf,
6536 size_t cnt, loff_t *ppos)
6538 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6542 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6543 size_t cnt, loff_t *ppos)
6545 struct trace_array *tr = filp->private_data;
6548 mutex_lock(&trace_types_lock);
6549 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6553 if (tr->current_trace->update_thresh) {
6554 ret = tr->current_trace->update_thresh(tr);
6561 mutex_unlock(&trace_types_lock);
6566 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6569 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6570 size_t cnt, loff_t *ppos)
6572 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6576 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6577 size_t cnt, loff_t *ppos)
6579 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6584 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6586 struct trace_array *tr = inode->i_private;
6587 struct trace_iterator *iter;
6590 ret = tracing_check_open_get_tr(tr);
6594 mutex_lock(&trace_types_lock);
6596 /* create a buffer to store the information to pass to userspace */
6597 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6600 __trace_array_put(tr);
6604 trace_seq_init(&iter->seq);
6605 iter->trace = tr->current_trace;
6607 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6612 /* trace pipe does not show start of buffer */
6613 cpumask_setall(iter->started);
6615 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6616 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6618 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6619 if (trace_clocks[tr->clock_id].in_ns)
6620 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6623 iter->array_buffer = &tr->array_buffer;
6624 iter->cpu_file = tracing_get_cpu(inode);
6625 mutex_init(&iter->mutex);
6626 filp->private_data = iter;
6628 if (iter->trace->pipe_open)
6629 iter->trace->pipe_open(iter);
6631 nonseekable_open(inode, filp);
6635 mutex_unlock(&trace_types_lock);
6640 __trace_array_put(tr);
6641 mutex_unlock(&trace_types_lock);
6645 static int tracing_release_pipe(struct inode *inode, struct file *file)
6647 struct trace_iterator *iter = file->private_data;
6648 struct trace_array *tr = inode->i_private;
6650 mutex_lock(&trace_types_lock);
6654 if (iter->trace->pipe_close)
6655 iter->trace->pipe_close(iter);
6657 mutex_unlock(&trace_types_lock);
6659 free_cpumask_var(iter->started);
6661 mutex_destroy(&iter->mutex);
6664 trace_array_put(tr);
6670 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6672 struct trace_array *tr = iter->tr;
6674 /* Iterators are static, they should be filled or empty */
6675 if (trace_buffer_iter(iter, iter->cpu_file))
6676 return EPOLLIN | EPOLLRDNORM;
6678 if (tr->trace_flags & TRACE_ITER_BLOCK)
6680 * Always select as readable when in blocking mode
6682 return EPOLLIN | EPOLLRDNORM;
6684 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6685 filp, poll_table, iter->tr->buffer_percent);
6689 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6691 struct trace_iterator *iter = filp->private_data;
6693 return trace_poll(iter, filp, poll_table);
6696 /* Must be called with iter->mutex held. */
6697 static int tracing_wait_pipe(struct file *filp)
6699 struct trace_iterator *iter = filp->private_data;
6702 while (trace_empty(iter)) {
6704 if ((filp->f_flags & O_NONBLOCK)) {
6709 * We block until we read something and tracing is disabled.
6710 * We still block if tracing is disabled, but we have never
6711 * read anything. This allows a user to cat this file, and
6712 * then enable tracing. But after we have read something,
6713 * we give an EOF when tracing is again disabled.
6715 * iter->pos will be 0 if we haven't read anything.
6717 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6720 mutex_unlock(&iter->mutex);
6722 ret = wait_on_pipe(iter, 0);
6724 mutex_lock(&iter->mutex);
6737 tracing_read_pipe(struct file *filp, char __user *ubuf,
6738 size_t cnt, loff_t *ppos)
6740 struct trace_iterator *iter = filp->private_data;
6744 * Avoid more than one consumer on a single file descriptor
6745 * This is just a matter of traces coherency, the ring buffer itself
6748 mutex_lock(&iter->mutex);
6750 /* return any leftover data */
6751 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6755 trace_seq_init(&iter->seq);
6757 if (iter->trace->read) {
6758 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6764 sret = tracing_wait_pipe(filp);
6768 /* stop when tracing is finished */
6769 if (trace_empty(iter)) {
6774 if (cnt >= PAGE_SIZE)
6775 cnt = PAGE_SIZE - 1;
6777 /* reset all but tr, trace, and overruns */
6778 trace_iterator_reset(iter);
6779 cpumask_clear(iter->started);
6780 trace_seq_init(&iter->seq);
6782 trace_event_read_lock();
6783 trace_access_lock(iter->cpu_file);
6784 while (trace_find_next_entry_inc(iter) != NULL) {
6785 enum print_line_t ret;
6786 int save_len = iter->seq.seq.len;
6788 ret = print_trace_line(iter);
6789 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6790 /* don't print partial lines */
6791 iter->seq.seq.len = save_len;
6794 if (ret != TRACE_TYPE_NO_CONSUME)
6795 trace_consume(iter);
6797 if (trace_seq_used(&iter->seq) >= cnt)
6801 * Setting the full flag means we reached the trace_seq buffer
6802 * size and we should leave by partial output condition above.
6803 * One of the trace_seq_* functions is not used properly.
6805 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6808 trace_access_unlock(iter->cpu_file);
6809 trace_event_read_unlock();
6811 /* Now copy what we have to the user */
6812 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6813 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6814 trace_seq_init(&iter->seq);
6817 * If there was nothing to send to user, in spite of consuming trace
6818 * entries, go back to wait for more entries.
6824 mutex_unlock(&iter->mutex);
6829 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6832 __free_page(spd->pages[idx]);
6836 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6842 /* Seq buffer is page-sized, exactly what we need. */
6844 save_len = iter->seq.seq.len;
6845 ret = print_trace_line(iter);
6847 if (trace_seq_has_overflowed(&iter->seq)) {
6848 iter->seq.seq.len = save_len;
6853 * This should not be hit, because it should only
6854 * be set if the iter->seq overflowed. But check it
6855 * anyway to be safe.
6857 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6858 iter->seq.seq.len = save_len;
6862 count = trace_seq_used(&iter->seq) - save_len;
6865 iter->seq.seq.len = save_len;
6869 if (ret != TRACE_TYPE_NO_CONSUME)
6870 trace_consume(iter);
6872 if (!trace_find_next_entry_inc(iter)) {
6882 static ssize_t tracing_splice_read_pipe(struct file *filp,
6884 struct pipe_inode_info *pipe,
6888 struct page *pages_def[PIPE_DEF_BUFFERS];
6889 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6890 struct trace_iterator *iter = filp->private_data;
6891 struct splice_pipe_desc spd = {
6893 .partial = partial_def,
6894 .nr_pages = 0, /* This gets updated below. */
6895 .nr_pages_max = PIPE_DEF_BUFFERS,
6896 .ops = &default_pipe_buf_ops,
6897 .spd_release = tracing_spd_release_pipe,
6903 if (splice_grow_spd(pipe, &spd))
6906 mutex_lock(&iter->mutex);
6908 if (iter->trace->splice_read) {
6909 ret = iter->trace->splice_read(iter, filp,
6910 ppos, pipe, len, flags);
6915 ret = tracing_wait_pipe(filp);
6919 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6924 trace_event_read_lock();
6925 trace_access_lock(iter->cpu_file);
6927 /* Fill as many pages as possible. */
6928 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6929 spd.pages[i] = alloc_page(GFP_KERNEL);
6933 rem = tracing_fill_pipe_page(rem, iter);
6935 /* Copy the data into the page, so we can start over. */
6936 ret = trace_seq_to_buffer(&iter->seq,
6937 page_address(spd.pages[i]),
6938 trace_seq_used(&iter->seq));
6940 __free_page(spd.pages[i]);
6943 spd.partial[i].offset = 0;
6944 spd.partial[i].len = trace_seq_used(&iter->seq);
6946 trace_seq_init(&iter->seq);
6949 trace_access_unlock(iter->cpu_file);
6950 trace_event_read_unlock();
6951 mutex_unlock(&iter->mutex);
6956 ret = splice_to_pipe(pipe, &spd);
6960 splice_shrink_spd(&spd);
6964 mutex_unlock(&iter->mutex);
6969 tracing_entries_read(struct file *filp, char __user *ubuf,
6970 size_t cnt, loff_t *ppos)
6972 struct inode *inode = file_inode(filp);
6973 struct trace_array *tr = inode->i_private;
6974 int cpu = tracing_get_cpu(inode);
6979 mutex_lock(&trace_types_lock);
6981 if (cpu == RING_BUFFER_ALL_CPUS) {
6982 int cpu, buf_size_same;
6987 /* check if all cpu sizes are same */
6988 for_each_tracing_cpu(cpu) {
6989 /* fill in the size from first enabled cpu */
6991 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6992 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6998 if (buf_size_same) {
6999 if (!ring_buffer_expanded)
7000 r = sprintf(buf, "%lu (expanded: %lu)\n",
7002 trace_buf_size >> 10);
7004 r = sprintf(buf, "%lu\n", size >> 10);
7006 r = sprintf(buf, "X\n");
7008 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7010 mutex_unlock(&trace_types_lock);
7012 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7017 tracing_entries_write(struct file *filp, const char __user *ubuf,
7018 size_t cnt, loff_t *ppos)
7020 struct inode *inode = file_inode(filp);
7021 struct trace_array *tr = inode->i_private;
7025 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7029 /* must have at least 1 entry */
7033 /* value is in KB */
7035 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7045 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7046 size_t cnt, loff_t *ppos)
7048 struct trace_array *tr = filp->private_data;
7051 unsigned long size = 0, expanded_size = 0;
7053 mutex_lock(&trace_types_lock);
7054 for_each_tracing_cpu(cpu) {
7055 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7056 if (!ring_buffer_expanded)
7057 expanded_size += trace_buf_size >> 10;
7059 if (ring_buffer_expanded)
7060 r = sprintf(buf, "%lu\n", size);
7062 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7063 mutex_unlock(&trace_types_lock);
7065 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7069 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7070 size_t cnt, loff_t *ppos)
7073 * There is no need to read what the user has written, this function
7074 * is just to make sure that there is no error when "echo" is used
7083 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7085 struct trace_array *tr = inode->i_private;
7087 /* disable tracing ? */
7088 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7089 tracer_tracing_off(tr);
7090 /* resize the ring buffer to 0 */
7091 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7093 trace_array_put(tr);
7099 tracing_mark_write(struct file *filp, const char __user *ubuf,
7100 size_t cnt, loff_t *fpos)
7102 struct trace_array *tr = filp->private_data;
7103 struct ring_buffer_event *event;
7104 enum event_trigger_type tt = ETT_NONE;
7105 struct trace_buffer *buffer;
7106 struct print_entry *entry;
7111 /* Used in tracing_mark_raw_write() as well */
7112 #define FAULTED_STR "<faulted>"
7113 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7115 if (tracing_disabled)
7118 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7121 if (cnt > TRACE_BUF_SIZE)
7122 cnt = TRACE_BUF_SIZE;
7124 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7126 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7128 /* If less than "<faulted>", then make sure we can still add that */
7129 if (cnt < FAULTED_SIZE)
7130 size += FAULTED_SIZE - cnt;
7132 buffer = tr->array_buffer.buffer;
7133 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7135 if (unlikely(!event))
7136 /* Ring buffer disabled, return as if not open for write */
7139 entry = ring_buffer_event_data(event);
7140 entry->ip = _THIS_IP_;
7142 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7144 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7150 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7151 /* do not add \n before testing triggers, but add \0 */
7152 entry->buf[cnt] = '\0';
7153 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7156 if (entry->buf[cnt - 1] != '\n') {
7157 entry->buf[cnt] = '\n';
7158 entry->buf[cnt + 1] = '\0';
7160 entry->buf[cnt] = '\0';
7162 if (static_branch_unlikely(&trace_marker_exports_enabled))
7163 ftrace_exports(event, TRACE_EXPORT_MARKER);
7164 __buffer_unlock_commit(buffer, event);
7167 event_triggers_post_call(tr->trace_marker_file, tt);
7172 /* Limit it for now to 3K (including tag) */
7173 #define RAW_DATA_MAX_SIZE (1024*3)
7176 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7177 size_t cnt, loff_t *fpos)
7179 struct trace_array *tr = filp->private_data;
7180 struct ring_buffer_event *event;
7181 struct trace_buffer *buffer;
7182 struct raw_data_entry *entry;
7187 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7189 if (tracing_disabled)
7192 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7195 /* The marker must at least have a tag id */
7196 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7199 if (cnt > TRACE_BUF_SIZE)
7200 cnt = TRACE_BUF_SIZE;
7202 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7204 size = sizeof(*entry) + cnt;
7205 if (cnt < FAULT_SIZE_ID)
7206 size += FAULT_SIZE_ID - cnt;
7208 buffer = tr->array_buffer.buffer;
7209 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7212 /* Ring buffer disabled, return as if not open for write */
7215 entry = ring_buffer_event_data(event);
7217 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7220 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7225 __buffer_unlock_commit(buffer, event);
7230 static int tracing_clock_show(struct seq_file *m, void *v)
7232 struct trace_array *tr = m->private;
7235 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7237 "%s%s%s%s", i ? " " : "",
7238 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7239 i == tr->clock_id ? "]" : "");
7245 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7249 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7250 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7253 if (i == ARRAY_SIZE(trace_clocks))
7256 mutex_lock(&trace_types_lock);
7260 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7263 * New clock may not be consistent with the previous clock.
7264 * Reset the buffer so that it doesn't have incomparable timestamps.
7266 tracing_reset_online_cpus(&tr->array_buffer);
7268 #ifdef CONFIG_TRACER_MAX_TRACE
7269 if (tr->max_buffer.buffer)
7270 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7271 tracing_reset_online_cpus(&tr->max_buffer);
7274 mutex_unlock(&trace_types_lock);
7279 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7280 size_t cnt, loff_t *fpos)
7282 struct seq_file *m = filp->private_data;
7283 struct trace_array *tr = m->private;
7285 const char *clockstr;
7288 if (cnt >= sizeof(buf))
7291 if (copy_from_user(buf, ubuf, cnt))
7296 clockstr = strstrip(buf);
7298 ret = tracing_set_clock(tr, clockstr);
7307 static int tracing_clock_open(struct inode *inode, struct file *file)
7309 struct trace_array *tr = inode->i_private;
7312 ret = tracing_check_open_get_tr(tr);
7316 ret = single_open(file, tracing_clock_show, inode->i_private);
7318 trace_array_put(tr);
7323 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7325 struct trace_array *tr = m->private;
7327 mutex_lock(&trace_types_lock);
7329 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7330 seq_puts(m, "delta [absolute]\n");
7332 seq_puts(m, "[delta] absolute\n");
7334 mutex_unlock(&trace_types_lock);
7339 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7341 struct trace_array *tr = inode->i_private;
7344 ret = tracing_check_open_get_tr(tr);
7348 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7350 trace_array_put(tr);
7355 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7357 if (rbe == this_cpu_read(trace_buffered_event))
7358 return ring_buffer_time_stamp(buffer);
7360 return ring_buffer_event_time_stamp(buffer, rbe);
7364 * Set or disable using the per CPU trace_buffer_event when possible.
7366 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7370 mutex_lock(&trace_types_lock);
7372 if (set && tr->no_filter_buffering_ref++)
7376 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7381 --tr->no_filter_buffering_ref;
7384 mutex_unlock(&trace_types_lock);
7389 struct ftrace_buffer_info {
7390 struct trace_iterator iter;
7392 unsigned int spare_cpu;
7396 #ifdef CONFIG_TRACER_SNAPSHOT
7397 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7399 struct trace_array *tr = inode->i_private;
7400 struct trace_iterator *iter;
7404 ret = tracing_check_open_get_tr(tr);
7408 if (file->f_mode & FMODE_READ) {
7409 iter = __tracing_open(inode, file, true);
7411 ret = PTR_ERR(iter);
7413 /* Writes still need the seq_file to hold the private data */
7415 m = kzalloc(sizeof(*m), GFP_KERNEL);
7418 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7426 iter->array_buffer = &tr->max_buffer;
7427 iter->cpu_file = tracing_get_cpu(inode);
7429 file->private_data = m;
7433 trace_array_put(tr);
7439 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7442 struct seq_file *m = filp->private_data;
7443 struct trace_iterator *iter = m->private;
7444 struct trace_array *tr = iter->tr;
7448 ret = tracing_update_buffers();
7452 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7456 mutex_lock(&trace_types_lock);
7458 if (tr->current_trace->use_max_tr) {
7463 local_irq_disable();
7464 arch_spin_lock(&tr->max_lock);
7465 if (tr->cond_snapshot)
7467 arch_spin_unlock(&tr->max_lock);
7474 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7478 if (tr->allocated_snapshot)
7482 /* Only allow per-cpu swap if the ring buffer supports it */
7483 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7484 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7489 if (tr->allocated_snapshot)
7490 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7491 &tr->array_buffer, iter->cpu_file);
7493 ret = tracing_alloc_snapshot_instance(tr);
7496 local_irq_disable();
7497 /* Now, we're going to swap */
7498 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7499 update_max_tr(tr, current, smp_processor_id(), NULL);
7501 update_max_tr_single(tr, current, iter->cpu_file);
7505 if (tr->allocated_snapshot) {
7506 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7507 tracing_reset_online_cpus(&tr->max_buffer);
7509 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7519 mutex_unlock(&trace_types_lock);
7523 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7525 struct seq_file *m = file->private_data;
7528 ret = tracing_release(inode, file);
7530 if (file->f_mode & FMODE_READ)
7533 /* If write only, the seq_file is just a stub */
7541 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7542 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7543 size_t count, loff_t *ppos);
7544 static int tracing_buffers_release(struct inode *inode, struct file *file);
7545 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7546 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7548 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7550 struct ftrace_buffer_info *info;
7553 /* The following checks for tracefs lockdown */
7554 ret = tracing_buffers_open(inode, filp);
7558 info = filp->private_data;
7560 if (info->iter.trace->use_max_tr) {
7561 tracing_buffers_release(inode, filp);
7565 info->iter.snapshot = true;
7566 info->iter.array_buffer = &info->iter.tr->max_buffer;
7571 #endif /* CONFIG_TRACER_SNAPSHOT */
7574 static const struct file_operations tracing_thresh_fops = {
7575 .open = tracing_open_generic,
7576 .read = tracing_thresh_read,
7577 .write = tracing_thresh_write,
7578 .llseek = generic_file_llseek,
7581 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7582 static const struct file_operations tracing_max_lat_fops = {
7583 .open = tracing_open_generic,
7584 .read = tracing_max_lat_read,
7585 .write = tracing_max_lat_write,
7586 .llseek = generic_file_llseek,
7590 static const struct file_operations set_tracer_fops = {
7591 .open = tracing_open_generic,
7592 .read = tracing_set_trace_read,
7593 .write = tracing_set_trace_write,
7594 .llseek = generic_file_llseek,
7597 static const struct file_operations tracing_pipe_fops = {
7598 .open = tracing_open_pipe,
7599 .poll = tracing_poll_pipe,
7600 .read = tracing_read_pipe,
7601 .splice_read = tracing_splice_read_pipe,
7602 .release = tracing_release_pipe,
7603 .llseek = no_llseek,
7606 static const struct file_operations tracing_entries_fops = {
7607 .open = tracing_open_generic_tr,
7608 .read = tracing_entries_read,
7609 .write = tracing_entries_write,
7610 .llseek = generic_file_llseek,
7611 .release = tracing_release_generic_tr,
7614 static const struct file_operations tracing_total_entries_fops = {
7615 .open = tracing_open_generic_tr,
7616 .read = tracing_total_entries_read,
7617 .llseek = generic_file_llseek,
7618 .release = tracing_release_generic_tr,
7621 static const struct file_operations tracing_free_buffer_fops = {
7622 .open = tracing_open_generic_tr,
7623 .write = tracing_free_buffer_write,
7624 .release = tracing_free_buffer_release,
7627 static const struct file_operations tracing_mark_fops = {
7628 .open = tracing_mark_open,
7629 .write = tracing_mark_write,
7630 .release = tracing_release_generic_tr,
7633 static const struct file_operations tracing_mark_raw_fops = {
7634 .open = tracing_mark_open,
7635 .write = tracing_mark_raw_write,
7636 .release = tracing_release_generic_tr,
7639 static const struct file_operations trace_clock_fops = {
7640 .open = tracing_clock_open,
7642 .llseek = seq_lseek,
7643 .release = tracing_single_release_tr,
7644 .write = tracing_clock_write,
7647 static const struct file_operations trace_time_stamp_mode_fops = {
7648 .open = tracing_time_stamp_mode_open,
7650 .llseek = seq_lseek,
7651 .release = tracing_single_release_tr,
7654 #ifdef CONFIG_TRACER_SNAPSHOT
7655 static const struct file_operations snapshot_fops = {
7656 .open = tracing_snapshot_open,
7658 .write = tracing_snapshot_write,
7659 .llseek = tracing_lseek,
7660 .release = tracing_snapshot_release,
7663 static const struct file_operations snapshot_raw_fops = {
7664 .open = snapshot_raw_open,
7665 .read = tracing_buffers_read,
7666 .release = tracing_buffers_release,
7667 .splice_read = tracing_buffers_splice_read,
7668 .llseek = no_llseek,
7671 #endif /* CONFIG_TRACER_SNAPSHOT */
7674 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7675 * @filp: The active open file structure
7676 * @ubuf: The userspace provided buffer to read value into
7677 * @cnt: The maximum number of bytes to read
7678 * @ppos: The current "file" position
7680 * This function implements the write interface for a struct trace_min_max_param.
7681 * The filp->private_data must point to a trace_min_max_param structure that
7682 * defines where to write the value, the min and the max acceptable values,
7683 * and a lock to protect the write.
7686 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7688 struct trace_min_max_param *param = filp->private_data;
7695 err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7700 mutex_lock(param->lock);
7702 if (param->min && val < *param->min)
7705 if (param->max && val > *param->max)
7712 mutex_unlock(param->lock);
7721 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7722 * @filp: The active open file structure
7723 * @ubuf: The userspace provided buffer to read value into
7724 * @cnt: The maximum number of bytes to read
7725 * @ppos: The current "file" position
7727 * This function implements the read interface for a struct trace_min_max_param.
7728 * The filp->private_data must point to a trace_min_max_param struct with valid
7732 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7734 struct trace_min_max_param *param = filp->private_data;
7735 char buf[U64_STR_SIZE];
7744 if (cnt > sizeof(buf))
7747 len = snprintf(buf, sizeof(buf), "%llu\n", val);
7749 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7752 const struct file_operations trace_min_max_fops = {
7753 .open = tracing_open_generic,
7754 .read = trace_min_max_read,
7755 .write = trace_min_max_write,
7758 #define TRACING_LOG_ERRS_MAX 8
7759 #define TRACING_LOG_LOC_MAX 128
7761 #define CMD_PREFIX " Command: "
7764 const char **errs; /* ptr to loc-specific array of err strings */
7765 u8 type; /* index into errs -> specific err string */
7766 u16 pos; /* caret position */
7770 struct tracing_log_err {
7771 struct list_head list;
7772 struct err_info info;
7773 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7774 char *cmd; /* what caused err */
7777 static DEFINE_MUTEX(tracing_err_log_lock);
7779 static struct tracing_log_err *alloc_tracing_log_err(int len)
7781 struct tracing_log_err *err;
7783 err = kzalloc(sizeof(*err), GFP_KERNEL);
7785 return ERR_PTR(-ENOMEM);
7787 err->cmd = kzalloc(len, GFP_KERNEL);
7790 return ERR_PTR(-ENOMEM);
7796 static void free_tracing_log_err(struct tracing_log_err *err)
7802 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7805 struct tracing_log_err *err;
7807 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7808 err = alloc_tracing_log_err(len);
7809 if (PTR_ERR(err) != -ENOMEM)
7810 tr->n_err_log_entries++;
7815 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7817 err->cmd = kzalloc(len, GFP_KERNEL);
7819 return ERR_PTR(-ENOMEM);
7820 list_del(&err->list);
7826 * err_pos - find the position of a string within a command for error careting
7827 * @cmd: The tracing command that caused the error
7828 * @str: The string to position the caret at within @cmd
7830 * Finds the position of the first occurrence of @str within @cmd. The
7831 * return value can be passed to tracing_log_err() for caret placement
7834 * Returns the index within @cmd of the first occurrence of @str or 0
7835 * if @str was not found.
7837 unsigned int err_pos(char *cmd, const char *str)
7841 if (WARN_ON(!strlen(cmd)))
7844 found = strstr(cmd, str);
7852 * tracing_log_err - write an error to the tracing error log
7853 * @tr: The associated trace array for the error (NULL for top level array)
7854 * @loc: A string describing where the error occurred
7855 * @cmd: The tracing command that caused the error
7856 * @errs: The array of loc-specific static error strings
7857 * @type: The index into errs[], which produces the specific static err string
7858 * @pos: The position the caret should be placed in the cmd
7860 * Writes an error into tracing/error_log of the form:
7862 * <loc>: error: <text>
7866 * tracing/error_log is a small log file containing the last
7867 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7868 * unless there has been a tracing error, and the error log can be
7869 * cleared and have its memory freed by writing the empty string in
7870 * truncation mode to it i.e. echo > tracing/error_log.
7872 * NOTE: the @errs array along with the @type param are used to
7873 * produce a static error string - this string is not copied and saved
7874 * when the error is logged - only a pointer to it is saved. See
7875 * existing callers for examples of how static strings are typically
7876 * defined for use with tracing_log_err().
7878 void tracing_log_err(struct trace_array *tr,
7879 const char *loc, const char *cmd,
7880 const char **errs, u8 type, u16 pos)
7882 struct tracing_log_err *err;
7888 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7890 mutex_lock(&tracing_err_log_lock);
7891 err = get_tracing_log_err(tr, len);
7892 if (PTR_ERR(err) == -ENOMEM) {
7893 mutex_unlock(&tracing_err_log_lock);
7897 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7898 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7900 err->info.errs = errs;
7901 err->info.type = type;
7902 err->info.pos = pos;
7903 err->info.ts = local_clock();
7905 list_add_tail(&err->list, &tr->err_log);
7906 mutex_unlock(&tracing_err_log_lock);
7909 static void clear_tracing_err_log(struct trace_array *tr)
7911 struct tracing_log_err *err, *next;
7913 mutex_lock(&tracing_err_log_lock);
7914 list_for_each_entry_safe(err, next, &tr->err_log, list) {
7915 list_del(&err->list);
7916 free_tracing_log_err(err);
7919 tr->n_err_log_entries = 0;
7920 mutex_unlock(&tracing_err_log_lock);
7923 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7925 struct trace_array *tr = m->private;
7927 mutex_lock(&tracing_err_log_lock);
7929 return seq_list_start(&tr->err_log, *pos);
7932 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7934 struct trace_array *tr = m->private;
7936 return seq_list_next(v, &tr->err_log, pos);
7939 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7941 mutex_unlock(&tracing_err_log_lock);
7944 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7948 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7950 for (i = 0; i < pos; i++)
7955 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7957 struct tracing_log_err *err = v;
7960 const char *err_text = err->info.errs[err->info.type];
7961 u64 sec = err->info.ts;
7964 nsec = do_div(sec, NSEC_PER_SEC);
7965 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7966 err->loc, err_text);
7967 seq_printf(m, "%s", err->cmd);
7968 tracing_err_log_show_pos(m, err->info.pos);
7974 static const struct seq_operations tracing_err_log_seq_ops = {
7975 .start = tracing_err_log_seq_start,
7976 .next = tracing_err_log_seq_next,
7977 .stop = tracing_err_log_seq_stop,
7978 .show = tracing_err_log_seq_show
7981 static int tracing_err_log_open(struct inode *inode, struct file *file)
7983 struct trace_array *tr = inode->i_private;
7986 ret = tracing_check_open_get_tr(tr);
7990 /* If this file was opened for write, then erase contents */
7991 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7992 clear_tracing_err_log(tr);
7994 if (file->f_mode & FMODE_READ) {
7995 ret = seq_open(file, &tracing_err_log_seq_ops);
7997 struct seq_file *m = file->private_data;
8000 trace_array_put(tr);
8006 static ssize_t tracing_err_log_write(struct file *file,
8007 const char __user *buffer,
8008 size_t count, loff_t *ppos)
8013 static int tracing_err_log_release(struct inode *inode, struct file *file)
8015 struct trace_array *tr = inode->i_private;
8017 trace_array_put(tr);
8019 if (file->f_mode & FMODE_READ)
8020 seq_release(inode, file);
8025 static const struct file_operations tracing_err_log_fops = {
8026 .open = tracing_err_log_open,
8027 .write = tracing_err_log_write,
8029 .llseek = seq_lseek,
8030 .release = tracing_err_log_release,
8033 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8035 struct trace_array *tr = inode->i_private;
8036 struct ftrace_buffer_info *info;
8039 ret = tracing_check_open_get_tr(tr);
8043 info = kvzalloc(sizeof(*info), GFP_KERNEL);
8045 trace_array_put(tr);
8049 mutex_lock(&trace_types_lock);
8052 info->iter.cpu_file = tracing_get_cpu(inode);
8053 info->iter.trace = tr->current_trace;
8054 info->iter.array_buffer = &tr->array_buffer;
8056 /* Force reading ring buffer for first read */
8057 info->read = (unsigned int)-1;
8059 filp->private_data = info;
8063 mutex_unlock(&trace_types_lock);
8065 ret = nonseekable_open(inode, filp);
8067 trace_array_put(tr);
8073 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8075 struct ftrace_buffer_info *info = filp->private_data;
8076 struct trace_iterator *iter = &info->iter;
8078 return trace_poll(iter, filp, poll_table);
8082 tracing_buffers_read(struct file *filp, char __user *ubuf,
8083 size_t count, loff_t *ppos)
8085 struct ftrace_buffer_info *info = filp->private_data;
8086 struct trace_iterator *iter = &info->iter;
8093 #ifdef CONFIG_TRACER_MAX_TRACE
8094 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8099 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8101 if (IS_ERR(info->spare)) {
8102 ret = PTR_ERR(info->spare);
8105 info->spare_cpu = iter->cpu_file;
8111 /* Do we have previous read data to read? */
8112 if (info->read < PAGE_SIZE)
8116 trace_access_lock(iter->cpu_file);
8117 ret = ring_buffer_read_page(iter->array_buffer->buffer,
8121 trace_access_unlock(iter->cpu_file);
8124 if (trace_empty(iter)) {
8125 if ((filp->f_flags & O_NONBLOCK))
8128 ret = wait_on_pipe(iter, 0);
8139 size = PAGE_SIZE - info->read;
8143 ret = copy_to_user(ubuf, info->spare + info->read, size);
8155 static int tracing_buffers_release(struct inode *inode, struct file *file)
8157 struct ftrace_buffer_info *info = file->private_data;
8158 struct trace_iterator *iter = &info->iter;
8160 mutex_lock(&trace_types_lock);
8162 iter->tr->trace_ref--;
8164 __trace_array_put(iter->tr);
8167 /* Make sure the waiters see the new wait_index */
8170 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8173 ring_buffer_free_read_page(iter->array_buffer->buffer,
8174 info->spare_cpu, info->spare);
8177 mutex_unlock(&trace_types_lock);
8183 struct trace_buffer *buffer;
8186 refcount_t refcount;
8189 static void buffer_ref_release(struct buffer_ref *ref)
8191 if (!refcount_dec_and_test(&ref->refcount))
8193 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8197 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8198 struct pipe_buffer *buf)
8200 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8202 buffer_ref_release(ref);
8206 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8207 struct pipe_buffer *buf)
8209 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8211 if (refcount_read(&ref->refcount) > INT_MAX/2)
8214 refcount_inc(&ref->refcount);
8218 /* Pipe buffer operations for a buffer. */
8219 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8220 .release = buffer_pipe_buf_release,
8221 .get = buffer_pipe_buf_get,
8225 * Callback from splice_to_pipe(), if we need to release some pages
8226 * at the end of the spd in case we error'ed out in filling the pipe.
8228 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8230 struct buffer_ref *ref =
8231 (struct buffer_ref *)spd->partial[i].private;
8233 buffer_ref_release(ref);
8234 spd->partial[i].private = 0;
8238 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8239 struct pipe_inode_info *pipe, size_t len,
8242 struct ftrace_buffer_info *info = file->private_data;
8243 struct trace_iterator *iter = &info->iter;
8244 struct partial_page partial_def[PIPE_DEF_BUFFERS];
8245 struct page *pages_def[PIPE_DEF_BUFFERS];
8246 struct splice_pipe_desc spd = {
8248 .partial = partial_def,
8249 .nr_pages_max = PIPE_DEF_BUFFERS,
8250 .ops = &buffer_pipe_buf_ops,
8251 .spd_release = buffer_spd_release,
8253 struct buffer_ref *ref;
8257 #ifdef CONFIG_TRACER_MAX_TRACE
8258 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8262 if (*ppos & (PAGE_SIZE - 1))
8265 if (len & (PAGE_SIZE - 1)) {
8266 if (len < PAGE_SIZE)
8271 if (splice_grow_spd(pipe, &spd))
8275 trace_access_lock(iter->cpu_file);
8276 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8278 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8282 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8288 refcount_set(&ref->refcount, 1);
8289 ref->buffer = iter->array_buffer->buffer;
8290 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8291 if (IS_ERR(ref->page)) {
8292 ret = PTR_ERR(ref->page);
8297 ref->cpu = iter->cpu_file;
8299 r = ring_buffer_read_page(ref->buffer, &ref->page,
8300 len, iter->cpu_file, 1);
8302 ring_buffer_free_read_page(ref->buffer, ref->cpu,
8308 page = virt_to_page(ref->page);
8310 spd.pages[i] = page;
8311 spd.partial[i].len = PAGE_SIZE;
8312 spd.partial[i].offset = 0;
8313 spd.partial[i].private = (unsigned long)ref;
8317 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8320 trace_access_unlock(iter->cpu_file);
8323 /* did we read anything? */
8324 if (!spd.nr_pages) {
8331 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8334 wait_index = READ_ONCE(iter->wait_index);
8336 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8340 /* No need to wait after waking up when tracing is off */
8341 if (!tracer_tracing_is_on(iter->tr))
8344 /* Make sure we see the new wait_index */
8346 if (wait_index != iter->wait_index)
8352 ret = splice_to_pipe(pipe, &spd);
8354 splice_shrink_spd(&spd);
8359 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8360 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8362 struct ftrace_buffer_info *info = file->private_data;
8363 struct trace_iterator *iter = &info->iter;
8366 return -ENOIOCTLCMD;
8368 mutex_lock(&trace_types_lock);
8371 /* Make sure the waiters see the new wait_index */
8374 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8376 mutex_unlock(&trace_types_lock);
8380 static const struct file_operations tracing_buffers_fops = {
8381 .open = tracing_buffers_open,
8382 .read = tracing_buffers_read,
8383 .poll = tracing_buffers_poll,
8384 .release = tracing_buffers_release,
8385 .splice_read = tracing_buffers_splice_read,
8386 .unlocked_ioctl = tracing_buffers_ioctl,
8387 .llseek = no_llseek,
8391 tracing_stats_read(struct file *filp, char __user *ubuf,
8392 size_t count, loff_t *ppos)
8394 struct inode *inode = file_inode(filp);
8395 struct trace_array *tr = inode->i_private;
8396 struct array_buffer *trace_buf = &tr->array_buffer;
8397 int cpu = tracing_get_cpu(inode);
8398 struct trace_seq *s;
8400 unsigned long long t;
8401 unsigned long usec_rem;
8403 s = kmalloc(sizeof(*s), GFP_KERNEL);
8409 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8410 trace_seq_printf(s, "entries: %ld\n", cnt);
8412 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8413 trace_seq_printf(s, "overrun: %ld\n", cnt);
8415 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8416 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8418 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8419 trace_seq_printf(s, "bytes: %ld\n", cnt);
8421 if (trace_clocks[tr->clock_id].in_ns) {
8422 /* local or global for trace_clock */
8423 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8424 usec_rem = do_div(t, USEC_PER_SEC);
8425 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8428 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8429 usec_rem = do_div(t, USEC_PER_SEC);
8430 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8432 /* counter or tsc mode for trace_clock */
8433 trace_seq_printf(s, "oldest event ts: %llu\n",
8434 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8436 trace_seq_printf(s, "now ts: %llu\n",
8437 ring_buffer_time_stamp(trace_buf->buffer));
8440 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8441 trace_seq_printf(s, "dropped events: %ld\n", cnt);
8443 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8444 trace_seq_printf(s, "read events: %ld\n", cnt);
8446 count = simple_read_from_buffer(ubuf, count, ppos,
8447 s->buffer, trace_seq_used(s));
8454 static const struct file_operations tracing_stats_fops = {
8455 .open = tracing_open_generic_tr,
8456 .read = tracing_stats_read,
8457 .llseek = generic_file_llseek,
8458 .release = tracing_release_generic_tr,
8461 #ifdef CONFIG_DYNAMIC_FTRACE
8464 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8465 size_t cnt, loff_t *ppos)
8471 /* 256 should be plenty to hold the amount needed */
8472 buf = kmalloc(256, GFP_KERNEL);
8476 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8477 ftrace_update_tot_cnt,
8478 ftrace_number_of_pages,
8479 ftrace_number_of_groups);
8481 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8486 static const struct file_operations tracing_dyn_info_fops = {
8487 .open = tracing_open_generic,
8488 .read = tracing_read_dyn_info,
8489 .llseek = generic_file_llseek,
8491 #endif /* CONFIG_DYNAMIC_FTRACE */
8493 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8495 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8496 struct trace_array *tr, struct ftrace_probe_ops *ops,
8499 tracing_snapshot_instance(tr);
8503 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8504 struct trace_array *tr, struct ftrace_probe_ops *ops,
8507 struct ftrace_func_mapper *mapper = data;
8511 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8521 tracing_snapshot_instance(tr);
8525 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8526 struct ftrace_probe_ops *ops, void *data)
8528 struct ftrace_func_mapper *mapper = data;
8531 seq_printf(m, "%ps:", (void *)ip);
8533 seq_puts(m, "snapshot");
8536 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8539 seq_printf(m, ":count=%ld\n", *count);
8541 seq_puts(m, ":unlimited\n");
8547 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8548 unsigned long ip, void *init_data, void **data)
8550 struct ftrace_func_mapper *mapper = *data;
8553 mapper = allocate_ftrace_func_mapper();
8559 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8563 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8564 unsigned long ip, void *data)
8566 struct ftrace_func_mapper *mapper = data;
8571 free_ftrace_func_mapper(mapper, NULL);
8575 ftrace_func_mapper_remove_ip(mapper, ip);
8578 static struct ftrace_probe_ops snapshot_probe_ops = {
8579 .func = ftrace_snapshot,
8580 .print = ftrace_snapshot_print,
8583 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8584 .func = ftrace_count_snapshot,
8585 .print = ftrace_snapshot_print,
8586 .init = ftrace_snapshot_init,
8587 .free = ftrace_snapshot_free,
8591 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8592 char *glob, char *cmd, char *param, int enable)
8594 struct ftrace_probe_ops *ops;
8595 void *count = (void *)-1;
8602 /* hash funcs only work with set_ftrace_filter */
8606 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8609 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8614 number = strsep(¶m, ":");
8616 if (!strlen(number))
8620 * We use the callback data field (which is a pointer)
8623 ret = kstrtoul(number, 0, (unsigned long *)&count);
8628 ret = tracing_alloc_snapshot_instance(tr);
8632 ret = register_ftrace_function_probe(glob, tr, ops, count);
8635 return ret < 0 ? ret : 0;
8638 static struct ftrace_func_command ftrace_snapshot_cmd = {
8640 .func = ftrace_trace_snapshot_callback,
8643 static __init int register_snapshot_cmd(void)
8645 return register_ftrace_command(&ftrace_snapshot_cmd);
8648 static inline __init int register_snapshot_cmd(void) { return 0; }
8649 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8651 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8653 if (WARN_ON(!tr->dir))
8654 return ERR_PTR(-ENODEV);
8656 /* Top directory uses NULL as the parent */
8657 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8660 /* All sub buffers have a descriptor */
8664 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8666 struct dentry *d_tracer;
8669 return tr->percpu_dir;
8671 d_tracer = tracing_get_dentry(tr);
8672 if (IS_ERR(d_tracer))
8675 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8677 MEM_FAIL(!tr->percpu_dir,
8678 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8680 return tr->percpu_dir;
8683 static struct dentry *
8684 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8685 void *data, long cpu, const struct file_operations *fops)
8687 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8689 if (ret) /* See tracing_get_cpu() */
8690 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8695 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8697 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8698 struct dentry *d_cpu;
8699 char cpu_dir[30]; /* 30 characters should be more than enough */
8704 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8705 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8707 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8711 /* per cpu trace_pipe */
8712 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8713 tr, cpu, &tracing_pipe_fops);
8716 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8717 tr, cpu, &tracing_fops);
8719 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8720 tr, cpu, &tracing_buffers_fops);
8722 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8723 tr, cpu, &tracing_stats_fops);
8725 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8726 tr, cpu, &tracing_entries_fops);
8728 #ifdef CONFIG_TRACER_SNAPSHOT
8729 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8730 tr, cpu, &snapshot_fops);
8732 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8733 tr, cpu, &snapshot_raw_fops);
8737 #ifdef CONFIG_FTRACE_SELFTEST
8738 /* Let selftest have access to static functions in this file */
8739 #include "trace_selftest.c"
8743 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8746 struct trace_option_dentry *topt = filp->private_data;
8749 if (topt->flags->val & topt->opt->bit)
8754 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8758 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8761 struct trace_option_dentry *topt = filp->private_data;
8765 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8769 if (val != 0 && val != 1)
8772 if (!!(topt->flags->val & topt->opt->bit) != val) {
8773 mutex_lock(&trace_types_lock);
8774 ret = __set_tracer_option(topt->tr, topt->flags,
8776 mutex_unlock(&trace_types_lock);
8787 static const struct file_operations trace_options_fops = {
8788 .open = tracing_open_generic,
8789 .read = trace_options_read,
8790 .write = trace_options_write,
8791 .llseek = generic_file_llseek,
8795 * In order to pass in both the trace_array descriptor as well as the index
8796 * to the flag that the trace option file represents, the trace_array
8797 * has a character array of trace_flags_index[], which holds the index
8798 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8799 * The address of this character array is passed to the flag option file
8800 * read/write callbacks.
8802 * In order to extract both the index and the trace_array descriptor,
8803 * get_tr_index() uses the following algorithm.
8807 * As the pointer itself contains the address of the index (remember
8810 * Then to get the trace_array descriptor, by subtracting that index
8811 * from the ptr, we get to the start of the index itself.
8813 * ptr - idx == &index[0]
8815 * Then a simple container_of() from that pointer gets us to the
8816 * trace_array descriptor.
8818 static void get_tr_index(void *data, struct trace_array **ptr,
8819 unsigned int *pindex)
8821 *pindex = *(unsigned char *)data;
8823 *ptr = container_of(data - *pindex, struct trace_array,
8828 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8831 void *tr_index = filp->private_data;
8832 struct trace_array *tr;
8836 get_tr_index(tr_index, &tr, &index);
8838 if (tr->trace_flags & (1 << index))
8843 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8847 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8850 void *tr_index = filp->private_data;
8851 struct trace_array *tr;
8856 get_tr_index(tr_index, &tr, &index);
8858 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8862 if (val != 0 && val != 1)
8865 mutex_lock(&event_mutex);
8866 mutex_lock(&trace_types_lock);
8867 ret = set_tracer_flag(tr, 1 << index, val);
8868 mutex_unlock(&trace_types_lock);
8869 mutex_unlock(&event_mutex);
8879 static const struct file_operations trace_options_core_fops = {
8880 .open = tracing_open_generic,
8881 .read = trace_options_core_read,
8882 .write = trace_options_core_write,
8883 .llseek = generic_file_llseek,
8886 struct dentry *trace_create_file(const char *name,
8888 struct dentry *parent,
8890 const struct file_operations *fops)
8894 ret = tracefs_create_file(name, mode, parent, data, fops);
8896 pr_warn("Could not create tracefs '%s' entry\n", name);
8902 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8904 struct dentry *d_tracer;
8909 d_tracer = tracing_get_dentry(tr);
8910 if (IS_ERR(d_tracer))
8913 tr->options = tracefs_create_dir("options", d_tracer);
8915 pr_warn("Could not create tracefs directory 'options'\n");
8923 create_trace_option_file(struct trace_array *tr,
8924 struct trace_option_dentry *topt,
8925 struct tracer_flags *flags,
8926 struct tracer_opt *opt)
8928 struct dentry *t_options;
8930 t_options = trace_options_init_dentry(tr);
8934 topt->flags = flags;
8938 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8939 t_options, topt, &trace_options_fops);
8944 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8946 struct trace_option_dentry *topts;
8947 struct trace_options *tr_topts;
8948 struct tracer_flags *flags;
8949 struct tracer_opt *opts;
8956 flags = tracer->flags;
8958 if (!flags || !flags->opts)
8962 * If this is an instance, only create flags for tracers
8963 * the instance may have.
8965 if (!trace_ok_for_array(tracer, tr))
8968 for (i = 0; i < tr->nr_topts; i++) {
8969 /* Make sure there's no duplicate flags. */
8970 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8976 for (cnt = 0; opts[cnt].name; cnt++)
8979 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8983 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8990 tr->topts = tr_topts;
8991 tr->topts[tr->nr_topts].tracer = tracer;
8992 tr->topts[tr->nr_topts].topts = topts;
8995 for (cnt = 0; opts[cnt].name; cnt++) {
8996 create_trace_option_file(tr, &topts[cnt], flags,
8998 MEM_FAIL(topts[cnt].entry == NULL,
8999 "Failed to create trace option: %s",
9004 static struct dentry *
9005 create_trace_option_core_file(struct trace_array *tr,
9006 const char *option, long index)
9008 struct dentry *t_options;
9010 t_options = trace_options_init_dentry(tr);
9014 return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9015 (void *)&tr->trace_flags_index[index],
9016 &trace_options_core_fops);
9019 static void create_trace_options_dir(struct trace_array *tr)
9021 struct dentry *t_options;
9022 bool top_level = tr == &global_trace;
9025 t_options = trace_options_init_dentry(tr);
9029 for (i = 0; trace_options[i]; i++) {
9031 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9032 create_trace_option_core_file(tr, trace_options[i], i);
9037 rb_simple_read(struct file *filp, char __user *ubuf,
9038 size_t cnt, loff_t *ppos)
9040 struct trace_array *tr = filp->private_data;
9044 r = tracer_tracing_is_on(tr);
9045 r = sprintf(buf, "%d\n", r);
9047 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9051 rb_simple_write(struct file *filp, const char __user *ubuf,
9052 size_t cnt, loff_t *ppos)
9054 struct trace_array *tr = filp->private_data;
9055 struct trace_buffer *buffer = tr->array_buffer.buffer;
9059 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9064 mutex_lock(&trace_types_lock);
9065 if (!!val == tracer_tracing_is_on(tr)) {
9066 val = 0; /* do nothing */
9068 tracer_tracing_on(tr);
9069 if (tr->current_trace->start)
9070 tr->current_trace->start(tr);
9072 tracer_tracing_off(tr);
9073 if (tr->current_trace->stop)
9074 tr->current_trace->stop(tr);
9075 /* Wake up any waiters */
9076 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9078 mutex_unlock(&trace_types_lock);
9086 static const struct file_operations rb_simple_fops = {
9087 .open = tracing_open_generic_tr,
9088 .read = rb_simple_read,
9089 .write = rb_simple_write,
9090 .release = tracing_release_generic_tr,
9091 .llseek = default_llseek,
9095 buffer_percent_read(struct file *filp, char __user *ubuf,
9096 size_t cnt, loff_t *ppos)
9098 struct trace_array *tr = filp->private_data;
9102 r = tr->buffer_percent;
9103 r = sprintf(buf, "%d\n", r);
9105 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9109 buffer_percent_write(struct file *filp, const char __user *ubuf,
9110 size_t cnt, loff_t *ppos)
9112 struct trace_array *tr = filp->private_data;
9116 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9126 tr->buffer_percent = val;
9133 static const struct file_operations buffer_percent_fops = {
9134 .open = tracing_open_generic_tr,
9135 .read = buffer_percent_read,
9136 .write = buffer_percent_write,
9137 .release = tracing_release_generic_tr,
9138 .llseek = default_llseek,
9141 static struct dentry *trace_instance_dir;
9144 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9147 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9149 enum ring_buffer_flags rb_flags;
9151 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9155 buf->buffer = ring_buffer_alloc(size, rb_flags);
9159 buf->data = alloc_percpu(struct trace_array_cpu);
9161 ring_buffer_free(buf->buffer);
9166 /* Allocate the first page for all buffers */
9167 set_buffer_entries(&tr->array_buffer,
9168 ring_buffer_size(tr->array_buffer.buffer, 0));
9173 static void free_trace_buffer(struct array_buffer *buf)
9176 ring_buffer_free(buf->buffer);
9178 free_percpu(buf->data);
9183 static int allocate_trace_buffers(struct trace_array *tr, int size)
9187 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9191 #ifdef CONFIG_TRACER_MAX_TRACE
9192 ret = allocate_trace_buffer(tr, &tr->max_buffer,
9193 allocate_snapshot ? size : 1);
9194 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9195 free_trace_buffer(&tr->array_buffer);
9198 tr->allocated_snapshot = allocate_snapshot;
9201 * Only the top level trace array gets its snapshot allocated
9202 * from the kernel command line.
9204 allocate_snapshot = false;
9210 static void free_trace_buffers(struct trace_array *tr)
9215 free_trace_buffer(&tr->array_buffer);
9217 #ifdef CONFIG_TRACER_MAX_TRACE
9218 free_trace_buffer(&tr->max_buffer);
9222 static void init_trace_flags_index(struct trace_array *tr)
9226 /* Used by the trace options files */
9227 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9228 tr->trace_flags_index[i] = i;
9231 static void __update_tracer_options(struct trace_array *tr)
9235 for (t = trace_types; t; t = t->next)
9236 add_tracer_options(tr, t);
9239 static void update_tracer_options(struct trace_array *tr)
9241 mutex_lock(&trace_types_lock);
9242 tracer_options_updated = true;
9243 __update_tracer_options(tr);
9244 mutex_unlock(&trace_types_lock);
9247 /* Must have trace_types_lock held */
9248 struct trace_array *trace_array_find(const char *instance)
9250 struct trace_array *tr, *found = NULL;
9252 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9253 if (tr->name && strcmp(tr->name, instance) == 0) {
9262 struct trace_array *trace_array_find_get(const char *instance)
9264 struct trace_array *tr;
9266 mutex_lock(&trace_types_lock);
9267 tr = trace_array_find(instance);
9270 mutex_unlock(&trace_types_lock);
9275 static int trace_array_create_dir(struct trace_array *tr)
9279 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9283 ret = event_trace_add_tracer(tr->dir, tr);
9285 tracefs_remove(tr->dir);
9289 init_tracer_tracefs(tr, tr->dir);
9290 __update_tracer_options(tr);
9295 static struct trace_array *trace_array_create(const char *name)
9297 struct trace_array *tr;
9301 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9303 return ERR_PTR(ret);
9305 tr->name = kstrdup(name, GFP_KERNEL);
9309 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9312 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9314 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9316 raw_spin_lock_init(&tr->start_lock);
9318 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9320 tr->current_trace = &nop_trace;
9322 INIT_LIST_HEAD(&tr->systems);
9323 INIT_LIST_HEAD(&tr->events);
9324 INIT_LIST_HEAD(&tr->hist_vars);
9325 INIT_LIST_HEAD(&tr->err_log);
9327 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9330 if (ftrace_allocate_ftrace_ops(tr) < 0)
9333 ftrace_init_trace_array(tr);
9335 init_trace_flags_index(tr);
9337 if (trace_instance_dir) {
9338 ret = trace_array_create_dir(tr);
9342 __trace_early_add_events(tr);
9344 list_add(&tr->list, &ftrace_trace_arrays);
9351 ftrace_free_ftrace_ops(tr);
9352 free_trace_buffers(tr);
9353 free_cpumask_var(tr->tracing_cpumask);
9357 return ERR_PTR(ret);
9360 static int instance_mkdir(const char *name)
9362 struct trace_array *tr;
9365 mutex_lock(&event_mutex);
9366 mutex_lock(&trace_types_lock);
9369 if (trace_array_find(name))
9372 tr = trace_array_create(name);
9374 ret = PTR_ERR_OR_ZERO(tr);
9377 mutex_unlock(&trace_types_lock);
9378 mutex_unlock(&event_mutex);
9383 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9384 * @name: The name of the trace array to be looked up/created.
9386 * Returns pointer to trace array with given name.
9387 * NULL, if it cannot be created.
9389 * NOTE: This function increments the reference counter associated with the
9390 * trace array returned. This makes sure it cannot be freed while in use.
9391 * Use trace_array_put() once the trace array is no longer needed.
9392 * If the trace_array is to be freed, trace_array_destroy() needs to
9393 * be called after the trace_array_put(), or simply let user space delete
9394 * it from the tracefs instances directory. But until the
9395 * trace_array_put() is called, user space can not delete it.
9398 struct trace_array *trace_array_get_by_name(const char *name)
9400 struct trace_array *tr;
9402 mutex_lock(&event_mutex);
9403 mutex_lock(&trace_types_lock);
9405 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9406 if (tr->name && strcmp(tr->name, name) == 0)
9410 tr = trace_array_create(name);
9418 mutex_unlock(&trace_types_lock);
9419 mutex_unlock(&event_mutex);
9422 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9424 static int __remove_instance(struct trace_array *tr)
9428 /* Reference counter for a newly created trace array = 1. */
9429 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9432 list_del(&tr->list);
9434 /* Disable all the flags that were enabled coming in */
9435 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9436 if ((1 << i) & ZEROED_TRACE_FLAGS)
9437 set_tracer_flag(tr, 1 << i, 0);
9440 tracing_set_nop(tr);
9441 clear_ftrace_function_probes(tr);
9442 event_trace_del_tracer(tr);
9443 ftrace_clear_pids(tr);
9444 ftrace_destroy_function_files(tr);
9445 tracefs_remove(tr->dir);
9446 free_percpu(tr->last_func_repeats);
9447 free_trace_buffers(tr);
9449 for (i = 0; i < tr->nr_topts; i++) {
9450 kfree(tr->topts[i].topts);
9454 free_cpumask_var(tr->tracing_cpumask);
9461 int trace_array_destroy(struct trace_array *this_tr)
9463 struct trace_array *tr;
9469 mutex_lock(&event_mutex);
9470 mutex_lock(&trace_types_lock);
9474 /* Making sure trace array exists before destroying it. */
9475 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9476 if (tr == this_tr) {
9477 ret = __remove_instance(tr);
9482 mutex_unlock(&trace_types_lock);
9483 mutex_unlock(&event_mutex);
9487 EXPORT_SYMBOL_GPL(trace_array_destroy);
9489 static int instance_rmdir(const char *name)
9491 struct trace_array *tr;
9494 mutex_lock(&event_mutex);
9495 mutex_lock(&trace_types_lock);
9498 tr = trace_array_find(name);
9500 ret = __remove_instance(tr);
9502 mutex_unlock(&trace_types_lock);
9503 mutex_unlock(&event_mutex);
9508 static __init void create_trace_instances(struct dentry *d_tracer)
9510 struct trace_array *tr;
9512 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9515 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9518 mutex_lock(&event_mutex);
9519 mutex_lock(&trace_types_lock);
9521 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9524 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9525 "Failed to create instance directory\n"))
9529 mutex_unlock(&trace_types_lock);
9530 mutex_unlock(&event_mutex);
9534 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9536 struct trace_event_file *file;
9539 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9540 tr, &show_traces_fops);
9542 trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9543 tr, &set_tracer_fops);
9545 trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9546 tr, &tracing_cpumask_fops);
9548 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9549 tr, &tracing_iter_fops);
9551 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9554 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9555 tr, &tracing_pipe_fops);
9557 trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9558 tr, &tracing_entries_fops);
9560 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9561 tr, &tracing_total_entries_fops);
9563 trace_create_file("free_buffer", 0200, d_tracer,
9564 tr, &tracing_free_buffer_fops);
9566 trace_create_file("trace_marker", 0220, d_tracer,
9567 tr, &tracing_mark_fops);
9569 file = __find_event_file(tr, "ftrace", "print");
9570 if (file && file->dir)
9571 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9572 file, &event_trigger_fops);
9573 tr->trace_marker_file = file;
9575 trace_create_file("trace_marker_raw", 0220, d_tracer,
9576 tr, &tracing_mark_raw_fops);
9578 trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9581 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9582 tr, &rb_simple_fops);
9584 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9585 &trace_time_stamp_mode_fops);
9587 tr->buffer_percent = 50;
9589 trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9590 tr, &buffer_percent_fops);
9592 create_trace_options_dir(tr);
9594 trace_create_maxlat_file(tr, d_tracer);
9596 if (ftrace_create_function_files(tr, d_tracer))
9597 MEM_FAIL(1, "Could not allocate function filter files");
9599 #ifdef CONFIG_TRACER_SNAPSHOT
9600 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9601 tr, &snapshot_fops);
9604 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9605 tr, &tracing_err_log_fops);
9607 for_each_tracing_cpu(cpu)
9608 tracing_init_tracefs_percpu(tr, cpu);
9610 ftrace_init_tracefs(tr, d_tracer);
9613 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9615 struct vfsmount *mnt;
9616 struct file_system_type *type;
9619 * To maintain backward compatibility for tools that mount
9620 * debugfs to get to the tracing facility, tracefs is automatically
9621 * mounted to the debugfs/tracing directory.
9623 type = get_fs_type("tracefs");
9626 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9627 put_filesystem(type);
9636 * tracing_init_dentry - initialize top level trace array
9638 * This is called when creating files or directories in the tracing
9639 * directory. It is called via fs_initcall() by any of the boot up code
9640 * and expects to return the dentry of the top level tracing directory.
9642 int tracing_init_dentry(void)
9644 struct trace_array *tr = &global_trace;
9646 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9647 pr_warn("Tracing disabled due to lockdown\n");
9651 /* The top level trace array uses NULL as parent */
9655 if (WARN_ON(!tracefs_initialized()))
9659 * As there may still be users that expect the tracing
9660 * files to exist in debugfs/tracing, we must automount
9661 * the tracefs file system there, so older tools still
9662 * work with the newer kernel.
9664 tr->dir = debugfs_create_automount("tracing", NULL,
9665 trace_automount, NULL);
9670 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9671 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9673 static struct workqueue_struct *eval_map_wq __initdata;
9674 static struct work_struct eval_map_work __initdata;
9675 static struct work_struct tracerfs_init_work __initdata;
9677 static void __init eval_map_work_func(struct work_struct *work)
9681 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9682 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9685 static int __init trace_eval_init(void)
9687 INIT_WORK(&eval_map_work, eval_map_work_func);
9689 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9691 pr_err("Unable to allocate eval_map_wq\n");
9693 eval_map_work_func(&eval_map_work);
9697 queue_work(eval_map_wq, &eval_map_work);
9701 subsys_initcall(trace_eval_init);
9703 static int __init trace_eval_sync(void)
9705 /* Make sure the eval map updates are finished */
9707 destroy_workqueue(eval_map_wq);
9711 late_initcall_sync(trace_eval_sync);
9714 #ifdef CONFIG_MODULES
9715 static void trace_module_add_evals(struct module *mod)
9717 if (!mod->num_trace_evals)
9721 * Modules with bad taint do not have events created, do
9722 * not bother with enums either.
9724 if (trace_module_has_bad_taint(mod))
9727 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9730 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9731 static void trace_module_remove_evals(struct module *mod)
9733 union trace_eval_map_item *map;
9734 union trace_eval_map_item **last = &trace_eval_maps;
9736 if (!mod->num_trace_evals)
9739 mutex_lock(&trace_eval_mutex);
9741 map = trace_eval_maps;
9744 if (map->head.mod == mod)
9746 map = trace_eval_jmp_to_tail(map);
9747 last = &map->tail.next;
9748 map = map->tail.next;
9753 *last = trace_eval_jmp_to_tail(map)->tail.next;
9756 mutex_unlock(&trace_eval_mutex);
9759 static inline void trace_module_remove_evals(struct module *mod) { }
9760 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9762 static int trace_module_notify(struct notifier_block *self,
9763 unsigned long val, void *data)
9765 struct module *mod = data;
9768 case MODULE_STATE_COMING:
9769 trace_module_add_evals(mod);
9771 case MODULE_STATE_GOING:
9772 trace_module_remove_evals(mod);
9779 static struct notifier_block trace_module_nb = {
9780 .notifier_call = trace_module_notify,
9783 #endif /* CONFIG_MODULES */
9785 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9790 init_tracer_tracefs(&global_trace, NULL);
9791 ftrace_init_tracefs_toplevel(&global_trace, NULL);
9793 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9794 &global_trace, &tracing_thresh_fops);
9796 trace_create_file("README", TRACE_MODE_READ, NULL,
9797 NULL, &tracing_readme_fops);
9799 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9800 NULL, &tracing_saved_cmdlines_fops);
9802 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9803 NULL, &tracing_saved_cmdlines_size_fops);
9805 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9806 NULL, &tracing_saved_tgids_fops);
9808 trace_create_eval_file(NULL);
9810 #ifdef CONFIG_MODULES
9811 register_module_notifier(&trace_module_nb);
9814 #ifdef CONFIG_DYNAMIC_FTRACE
9815 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9816 NULL, &tracing_dyn_info_fops);
9819 create_trace_instances(NULL);
9821 update_tracer_options(&global_trace);
9824 static __init int tracer_init_tracefs(void)
9828 trace_access_lock_init();
9830 ret = tracing_init_dentry();
9835 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9836 queue_work(eval_map_wq, &tracerfs_init_work);
9838 tracer_init_tracefs_work_func(NULL);
9841 rv_init_interface();
9846 fs_initcall(tracer_init_tracefs);
9848 static int trace_panic_handler(struct notifier_block *this,
9849 unsigned long event, void *unused)
9851 if (ftrace_dump_on_oops)
9852 ftrace_dump(ftrace_dump_on_oops);
9856 static struct notifier_block trace_panic_notifier = {
9857 .notifier_call = trace_panic_handler,
9859 .priority = 150 /* priority: INT_MAX >= x >= 0 */
9862 static int trace_die_handler(struct notifier_block *self,
9868 if (ftrace_dump_on_oops)
9869 ftrace_dump(ftrace_dump_on_oops);
9877 static struct notifier_block trace_die_notifier = {
9878 .notifier_call = trace_die_handler,
9883 * printk is set to max of 1024, we really don't need it that big.
9884 * Nothing should be printing 1000 characters anyway.
9886 #define TRACE_MAX_PRINT 1000
9889 * Define here KERN_TRACE so that we have one place to modify
9890 * it if we decide to change what log level the ftrace dump
9893 #define KERN_TRACE KERN_EMERG
9896 trace_printk_seq(struct trace_seq *s)
9898 /* Probably should print a warning here. */
9899 if (s->seq.len >= TRACE_MAX_PRINT)
9900 s->seq.len = TRACE_MAX_PRINT;
9903 * More paranoid code. Although the buffer size is set to
9904 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9905 * an extra layer of protection.
9907 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9908 s->seq.len = s->seq.size - 1;
9910 /* should be zero ended, but we are paranoid. */
9911 s->buffer[s->seq.len] = 0;
9913 printk(KERN_TRACE "%s", s->buffer);
9918 void trace_init_global_iter(struct trace_iterator *iter)
9920 iter->tr = &global_trace;
9921 iter->trace = iter->tr->current_trace;
9922 iter->cpu_file = RING_BUFFER_ALL_CPUS;
9923 iter->array_buffer = &global_trace.array_buffer;
9925 if (iter->trace && iter->trace->open)
9926 iter->trace->open(iter);
9928 /* Annotate start of buffers if we had overruns */
9929 if (ring_buffer_overruns(iter->array_buffer->buffer))
9930 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9932 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9933 if (trace_clocks[iter->tr->clock_id].in_ns)
9934 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9936 /* Can not use kmalloc for iter.temp and iter.fmt */
9937 iter->temp = static_temp_buf;
9938 iter->temp_size = STATIC_TEMP_BUF_SIZE;
9939 iter->fmt = static_fmt_buf;
9940 iter->fmt_size = STATIC_FMT_BUF_SIZE;
9943 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9945 /* use static because iter can be a bit big for the stack */
9946 static struct trace_iterator iter;
9947 static atomic_t dump_running;
9948 struct trace_array *tr = &global_trace;
9949 unsigned int old_userobj;
9950 unsigned long flags;
9953 /* Only allow one dump user at a time. */
9954 if (atomic_inc_return(&dump_running) != 1) {
9955 atomic_dec(&dump_running);
9960 * Always turn off tracing when we dump.
9961 * We don't need to show trace output of what happens
9962 * between multiple crashes.
9964 * If the user does a sysrq-z, then they can re-enable
9965 * tracing with echo 1 > tracing_on.
9969 local_irq_save(flags);
9971 /* Simulate the iterator */
9972 trace_init_global_iter(&iter);
9974 for_each_tracing_cpu(cpu) {
9975 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9978 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9980 /* don't look at user memory in panic mode */
9981 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9983 switch (oops_dump_mode) {
9985 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9988 iter.cpu_file = raw_smp_processor_id();
9993 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9994 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9997 printk(KERN_TRACE "Dumping ftrace buffer:\n");
9999 /* Did function tracer already get disabled? */
10000 if (ftrace_is_dead()) {
10001 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10002 printk("# MAY BE MISSING FUNCTION EVENTS\n");
10006 * We need to stop all tracing on all CPUS to read
10007 * the next buffer. This is a bit expensive, but is
10008 * not done often. We fill all what we can read,
10009 * and then release the locks again.
10012 while (!trace_empty(&iter)) {
10015 printk(KERN_TRACE "---------------------------------\n");
10019 trace_iterator_reset(&iter);
10020 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10022 if (trace_find_next_entry_inc(&iter) != NULL) {
10025 ret = print_trace_line(&iter);
10026 if (ret != TRACE_TYPE_NO_CONSUME)
10027 trace_consume(&iter);
10029 touch_nmi_watchdog();
10031 trace_printk_seq(&iter.seq);
10035 printk(KERN_TRACE " (ftrace buffer empty)\n");
10037 printk(KERN_TRACE "---------------------------------\n");
10040 tr->trace_flags |= old_userobj;
10042 for_each_tracing_cpu(cpu) {
10043 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10045 atomic_dec(&dump_running);
10046 local_irq_restore(flags);
10048 EXPORT_SYMBOL_GPL(ftrace_dump);
10050 #define WRITE_BUFSIZE 4096
10052 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10053 size_t count, loff_t *ppos,
10054 int (*createfn)(const char *))
10056 char *kbuf, *buf, *tmp;
10061 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10065 while (done < count) {
10066 size = count - done;
10068 if (size >= WRITE_BUFSIZE)
10069 size = WRITE_BUFSIZE - 1;
10071 if (copy_from_user(kbuf, buffer + done, size)) {
10078 tmp = strchr(buf, '\n');
10081 size = tmp - buf + 1;
10083 size = strlen(buf);
10084 if (done + size < count) {
10087 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10088 pr_warn("Line length is too long: Should be less than %d\n",
10089 WRITE_BUFSIZE - 2);
10096 /* Remove comments */
10097 tmp = strchr(buf, '#');
10102 ret = createfn(buf);
10107 } while (done < count);
10117 __init static int tracer_alloc_buffers(void)
10123 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10124 pr_warn("Tracing disabled due to lockdown\n");
10129 * Make sure we don't accidentally add more trace options
10130 * than we have bits for.
10132 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10134 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10137 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10138 goto out_free_buffer_mask;
10140 /* Only allocate trace_printk buffers if a trace_printk exists */
10141 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10142 /* Must be called before global_trace.buffer is allocated */
10143 trace_printk_init_buffers();
10145 /* To save memory, keep the ring buffer size to its minimum */
10146 if (ring_buffer_expanded)
10147 ring_buf_size = trace_buf_size;
10151 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10152 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10154 raw_spin_lock_init(&global_trace.start_lock);
10157 * The prepare callbacks allocates some memory for the ring buffer. We
10158 * don't free the buffer if the CPU goes down. If we were to free
10159 * the buffer, then the user would lose any trace that was in the
10160 * buffer. The memory will be removed once the "instance" is removed.
10162 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10163 "trace/RB:prepare", trace_rb_cpu_prepare,
10166 goto out_free_cpumask;
10167 /* Used for event triggers */
10169 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10171 goto out_rm_hp_state;
10173 if (trace_create_savedcmd() < 0)
10174 goto out_free_temp_buffer;
10176 /* TODO: make the number of buffers hot pluggable with CPUS */
10177 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10178 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10179 goto out_free_savedcmd;
10182 if (global_trace.buffer_disabled)
10185 if (trace_boot_clock) {
10186 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10188 pr_warn("Trace clock %s not defined, going back to default\n",
10193 * register_tracer() might reference current_trace, so it
10194 * needs to be set before we register anything. This is
10195 * just a bootstrap of current_trace anyway.
10197 global_trace.current_trace = &nop_trace;
10199 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10201 ftrace_init_global_array_ops(&global_trace);
10203 init_trace_flags_index(&global_trace);
10205 register_tracer(&nop_trace);
10207 /* Function tracing may start here (via kernel command line) */
10208 init_function_trace();
10210 /* All seems OK, enable tracing */
10211 tracing_disabled = 0;
10213 atomic_notifier_chain_register(&panic_notifier_list,
10214 &trace_panic_notifier);
10216 register_die_notifier(&trace_die_notifier);
10218 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10220 INIT_LIST_HEAD(&global_trace.systems);
10221 INIT_LIST_HEAD(&global_trace.events);
10222 INIT_LIST_HEAD(&global_trace.hist_vars);
10223 INIT_LIST_HEAD(&global_trace.err_log);
10224 list_add(&global_trace.list, &ftrace_trace_arrays);
10226 apply_trace_boot_options();
10228 register_snapshot_cmd();
10235 free_saved_cmdlines_buffer(savedcmd);
10236 out_free_temp_buffer:
10237 ring_buffer_free(temp_buffer);
10239 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10241 free_cpumask_var(global_trace.tracing_cpumask);
10242 out_free_buffer_mask:
10243 free_cpumask_var(tracing_buffer_mask);
10248 void __init ftrace_boot_snapshot(void)
10250 if (snapshot_at_boot) {
10251 tracing_snapshot();
10252 internal_trace_puts("** Boot snapshot taken **\n");
10256 void __init early_trace_init(void)
10258 if (tracepoint_printk) {
10259 tracepoint_print_iter =
10260 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10261 if (MEM_FAIL(!tracepoint_print_iter,
10262 "Failed to allocate trace iterator\n"))
10263 tracepoint_printk = 0;
10265 static_key_enable(&tracepoint_printk_key.key);
10267 tracer_alloc_buffers();
10270 void __init trace_init(void)
10272 trace_event_init();
10275 __init static void clear_boot_tracer(void)
10278 * The default tracer at boot buffer is an init section.
10279 * This function is called in lateinit. If we did not
10280 * find the boot tracer, then clear it out, to prevent
10281 * later registration from accessing the buffer that is
10282 * about to be freed.
10284 if (!default_bootup_tracer)
10287 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10288 default_bootup_tracer);
10289 default_bootup_tracer = NULL;
10292 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10293 __init static void tracing_set_default_clock(void)
10295 /* sched_clock_stable() is determined in late_initcall */
10296 if (!trace_boot_clock && !sched_clock_stable()) {
10297 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10298 pr_warn("Can not set tracing clock due to lockdown\n");
10302 printk(KERN_WARNING
10303 "Unstable clock detected, switching default tracing clock to \"global\"\n"
10304 "If you want to keep using the local clock, then add:\n"
10305 " \"trace_clock=local\"\n"
10306 "on the kernel command line\n");
10307 tracing_set_clock(&global_trace, "global");
10311 static inline void tracing_set_default_clock(void) { }
10314 __init static int late_trace_init(void)
10316 if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10317 static_key_disable(&tracepoint_printk_key.key);
10318 tracepoint_printk = 0;
10321 tracing_set_default_clock();
10322 clear_boot_tracer();
10326 late_initcall_sync(late_trace_init);