1 // SPDX-License-Identifier: GPL-2.0
3 * ring buffer based function tracer
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
55 #include "trace_output.h"
58 * On boot up, the ring buffer is set to the minimum size, so that
59 * we do not waste memory on systems that are not using tracing.
61 bool ring_buffer_expanded;
64 * We need to change this state when a selftest is running.
65 * A selftest will lurk into the ring-buffer to count the
66 * entries inserted during the selftest although some concurrent
67 * insertions into the ring-buffer such as trace_printk could occurred
68 * at the same time, giving false positive or negative results.
70 static bool __read_mostly tracing_selftest_running;
73 * If boot-time tracing including tracers/events via kernel cmdline
74 * is running, we do not want to run SELFTEST.
76 bool __read_mostly tracing_selftest_disabled;
78 #ifdef CONFIG_FTRACE_STARTUP_TEST
79 void __init disable_tracing_selftest(const char *reason)
81 if (!tracing_selftest_disabled) {
82 tracing_selftest_disabled = true;
83 pr_info("Ftrace startup test is disabled due to %s\n", reason);
88 /* Pipe tracepoints to printk */
89 static struct trace_iterator *tracepoint_print_iter;
90 int tracepoint_printk;
91 static bool tracepoint_printk_stop_on_boot __initdata;
92 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
94 /* For tracers that don't implement custom flags */
95 static struct tracer_opt dummy_tracer_opt[] = {
100 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
106 * To prevent the comm cache from being overwritten when no
107 * tracing is active, only save the comm when a trace event
110 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
113 * Kill all tracing for good (never come back).
114 * It is initialized to 1 but will turn to zero if the initialization
115 * of the tracer is successful. But that is the only place that sets
118 static int tracing_disabled = 1;
120 cpumask_var_t __read_mostly tracing_buffer_mask;
123 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
125 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
126 * is set, then ftrace_dump is called. This will output the contents
127 * of the ftrace buffers to the console. This is very useful for
128 * capturing traces that lead to crashes and outputing it to a
131 * It is default off, but you can enable it with either specifying
132 * "ftrace_dump_on_oops" in the kernel command line, or setting
133 * /proc/sys/kernel/ftrace_dump_on_oops
134 * Set 1 if you want to dump buffers of all CPUs
135 * Set 2 if you want to dump the buffer of the CPU that triggered oops
138 enum ftrace_dump_mode ftrace_dump_on_oops;
140 /* When set, tracing will stop when a WARN*() is hit */
141 int __disable_trace_on_warning;
143 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
144 /* Map of enums to their values, for "eval_map" file */
145 struct trace_eval_map_head {
147 unsigned long length;
150 union trace_eval_map_item;
152 struct trace_eval_map_tail {
154 * "end" is first and points to NULL as it must be different
155 * than "mod" or "eval_string"
157 union trace_eval_map_item *next;
158 const char *end; /* points to NULL */
161 static DEFINE_MUTEX(trace_eval_mutex);
164 * The trace_eval_maps are saved in an array with two extra elements,
165 * one at the beginning, and one at the end. The beginning item contains
166 * the count of the saved maps (head.length), and the module they
167 * belong to if not built in (head.mod). The ending item contains a
168 * pointer to the next array of saved eval_map items.
170 union trace_eval_map_item {
171 struct trace_eval_map map;
172 struct trace_eval_map_head head;
173 struct trace_eval_map_tail tail;
176 static union trace_eval_map_item *trace_eval_maps;
177 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
179 int tracing_set_tracer(struct trace_array *tr, const char *buf);
180 static void ftrace_trace_userstack(struct trace_array *tr,
181 struct trace_buffer *buffer,
182 unsigned int trace_ctx);
184 #define MAX_TRACER_SIZE 100
185 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
186 static char *default_bootup_tracer;
188 static bool allocate_snapshot;
189 static bool snapshot_at_boot;
191 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
192 static int boot_instance_index;
194 static int __init set_cmdline_ftrace(char *str)
196 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
197 default_bootup_tracer = bootup_tracer_buf;
198 /* We are using ftrace early, expand it */
199 ring_buffer_expanded = true;
202 __setup("ftrace=", set_cmdline_ftrace);
204 static int __init set_ftrace_dump_on_oops(char *str)
206 if (*str++ != '=' || !*str || !strcmp("1", str)) {
207 ftrace_dump_on_oops = DUMP_ALL;
211 if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
212 ftrace_dump_on_oops = DUMP_ORIG;
218 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
220 static int __init stop_trace_on_warning(char *str)
222 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
223 __disable_trace_on_warning = 1;
226 __setup("traceoff_on_warning", stop_trace_on_warning);
228 static int __init boot_alloc_snapshot(char *str)
230 allocate_snapshot = true;
231 /* We also need the main ring buffer expanded */
232 ring_buffer_expanded = true;
235 __setup("alloc_snapshot", boot_alloc_snapshot);
238 static int __init boot_snapshot(char *str)
240 snapshot_at_boot = true;
241 boot_alloc_snapshot(str);
244 __setup("ftrace_boot_snapshot", boot_snapshot);
247 static int __init boot_instance(char *str)
249 char *slot = boot_instance_info + boot_instance_index;
250 int left = sizeof(boot_instance_info) - boot_instance_index;
253 if (strlen(str) >= left)
256 ret = snprintf(slot, left, "%s\t", str);
257 boot_instance_index += ret;
261 __setup("trace_instance=", boot_instance);
264 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
266 static int __init set_trace_boot_options(char *str)
268 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
271 __setup("trace_options=", set_trace_boot_options);
273 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
274 static char *trace_boot_clock __initdata;
276 static int __init set_trace_boot_clock(char *str)
278 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
279 trace_boot_clock = trace_boot_clock_buf;
282 __setup("trace_clock=", set_trace_boot_clock);
284 static int __init set_tracepoint_printk(char *str)
286 /* Ignore the "tp_printk_stop_on_boot" param */
290 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
291 tracepoint_printk = 1;
294 __setup("tp_printk", set_tracepoint_printk);
296 static int __init set_tracepoint_printk_stop(char *str)
298 tracepoint_printk_stop_on_boot = true;
301 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
303 unsigned long long ns2usecs(u64 nsec)
311 trace_process_export(struct trace_export *export,
312 struct ring_buffer_event *event, int flag)
314 struct trace_entry *entry;
315 unsigned int size = 0;
317 if (export->flags & flag) {
318 entry = ring_buffer_event_data(event);
319 size = ring_buffer_event_length(event);
320 export->write(export, entry, size);
324 static DEFINE_MUTEX(ftrace_export_lock);
326 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
328 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
329 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
330 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
332 static inline void ftrace_exports_enable(struct trace_export *export)
334 if (export->flags & TRACE_EXPORT_FUNCTION)
335 static_branch_inc(&trace_function_exports_enabled);
337 if (export->flags & TRACE_EXPORT_EVENT)
338 static_branch_inc(&trace_event_exports_enabled);
340 if (export->flags & TRACE_EXPORT_MARKER)
341 static_branch_inc(&trace_marker_exports_enabled);
344 static inline void ftrace_exports_disable(struct trace_export *export)
346 if (export->flags & TRACE_EXPORT_FUNCTION)
347 static_branch_dec(&trace_function_exports_enabled);
349 if (export->flags & TRACE_EXPORT_EVENT)
350 static_branch_dec(&trace_event_exports_enabled);
352 if (export->flags & TRACE_EXPORT_MARKER)
353 static_branch_dec(&trace_marker_exports_enabled);
356 static void ftrace_exports(struct ring_buffer_event *event, int flag)
358 struct trace_export *export;
360 preempt_disable_notrace();
362 export = rcu_dereference_raw_check(ftrace_exports_list);
364 trace_process_export(export, event, flag);
365 export = rcu_dereference_raw_check(export->next);
368 preempt_enable_notrace();
372 add_trace_export(struct trace_export **list, struct trace_export *export)
374 rcu_assign_pointer(export->next, *list);
376 * We are entering export into the list but another
377 * CPU might be walking that list. We need to make sure
378 * the export->next pointer is valid before another CPU sees
379 * the export pointer included into the list.
381 rcu_assign_pointer(*list, export);
385 rm_trace_export(struct trace_export **list, struct trace_export *export)
387 struct trace_export **p;
389 for (p = list; *p != NULL; p = &(*p)->next)
396 rcu_assign_pointer(*p, (*p)->next);
402 add_ftrace_export(struct trace_export **list, struct trace_export *export)
404 ftrace_exports_enable(export);
406 add_trace_export(list, export);
410 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
414 ret = rm_trace_export(list, export);
415 ftrace_exports_disable(export);
420 int register_ftrace_export(struct trace_export *export)
422 if (WARN_ON_ONCE(!export->write))
425 mutex_lock(&ftrace_export_lock);
427 add_ftrace_export(&ftrace_exports_list, export);
429 mutex_unlock(&ftrace_export_lock);
433 EXPORT_SYMBOL_GPL(register_ftrace_export);
435 int unregister_ftrace_export(struct trace_export *export)
439 mutex_lock(&ftrace_export_lock);
441 ret = rm_ftrace_export(&ftrace_exports_list, export);
443 mutex_unlock(&ftrace_export_lock);
447 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
449 /* trace_flags holds trace_options default values */
450 #define TRACE_DEFAULT_FLAGS \
451 (FUNCTION_DEFAULT_FLAGS | \
452 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
453 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
454 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
455 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
458 /* trace_options that are only supported by global_trace */
459 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
460 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
462 /* trace_flags that are default zero for instances */
463 #define ZEROED_TRACE_FLAGS \
464 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
467 * The global_trace is the descriptor that holds the top-level tracing
468 * buffers for the live tracing.
470 static struct trace_array global_trace = {
471 .trace_flags = TRACE_DEFAULT_FLAGS,
474 LIST_HEAD(ftrace_trace_arrays);
476 int trace_array_get(struct trace_array *this_tr)
478 struct trace_array *tr;
481 mutex_lock(&trace_types_lock);
482 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
489 mutex_unlock(&trace_types_lock);
494 static void __trace_array_put(struct trace_array *this_tr)
496 WARN_ON(!this_tr->ref);
501 * trace_array_put - Decrement the reference counter for this trace array.
502 * @this_tr : pointer to the trace array
504 * NOTE: Use this when we no longer need the trace array returned by
505 * trace_array_get_by_name(). This ensures the trace array can be later
509 void trace_array_put(struct trace_array *this_tr)
514 mutex_lock(&trace_types_lock);
515 __trace_array_put(this_tr);
516 mutex_unlock(&trace_types_lock);
518 EXPORT_SYMBOL_GPL(trace_array_put);
520 int tracing_check_open_get_tr(struct trace_array *tr)
524 ret = security_locked_down(LOCKDOWN_TRACEFS);
528 if (tracing_disabled)
531 if (tr && trace_array_get(tr) < 0)
537 int call_filter_check_discard(struct trace_event_call *call, void *rec,
538 struct trace_buffer *buffer,
539 struct ring_buffer_event *event)
541 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
542 !filter_match_preds(call->filter, rec)) {
543 __trace_event_discard_commit(buffer, event);
551 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
552 * @filtered_pids: The list of pids to check
553 * @search_pid: The PID to find in @filtered_pids
555 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
558 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
560 return trace_pid_list_is_set(filtered_pids, search_pid);
564 * trace_ignore_this_task - should a task be ignored for tracing
565 * @filtered_pids: The list of pids to check
566 * @filtered_no_pids: The list of pids not to be traced
567 * @task: The task that should be ignored if not filtered
569 * Checks if @task should be traced or not from @filtered_pids.
570 * Returns true if @task should *NOT* be traced.
571 * Returns false if @task should be traced.
574 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
575 struct trace_pid_list *filtered_no_pids,
576 struct task_struct *task)
579 * If filtered_no_pids is not empty, and the task's pid is listed
580 * in filtered_no_pids, then return true.
581 * Otherwise, if filtered_pids is empty, that means we can
582 * trace all tasks. If it has content, then only trace pids
583 * within filtered_pids.
586 return (filtered_pids &&
587 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
589 trace_find_filtered_pid(filtered_no_pids, task->pid));
593 * trace_filter_add_remove_task - Add or remove a task from a pid_list
594 * @pid_list: The list to modify
595 * @self: The current task for fork or NULL for exit
596 * @task: The task to add or remove
598 * If adding a task, if @self is defined, the task is only added if @self
599 * is also included in @pid_list. This happens on fork and tasks should
600 * only be added when the parent is listed. If @self is NULL, then the
601 * @task pid will be removed from the list, which would happen on exit
604 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
605 struct task_struct *self,
606 struct task_struct *task)
611 /* For forks, we only add if the forking task is listed */
613 if (!trace_find_filtered_pid(pid_list, self->pid))
617 /* "self" is set for forks, and NULL for exits */
619 trace_pid_list_set(pid_list, task->pid);
621 trace_pid_list_clear(pid_list, task->pid);
625 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
626 * @pid_list: The pid list to show
627 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
628 * @pos: The position of the file
630 * This is used by the seq_file "next" operation to iterate the pids
631 * listed in a trace_pid_list structure.
633 * Returns the pid+1 as we want to display pid of zero, but NULL would
634 * stop the iteration.
636 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
638 long pid = (unsigned long)v;
643 /* pid already is +1 of the actual previous bit */
644 if (trace_pid_list_next(pid_list, pid, &next) < 0)
649 /* Return pid + 1 to allow zero to be represented */
650 return (void *)(pid + 1);
654 * trace_pid_start - Used for seq_file to start reading pid lists
655 * @pid_list: The pid list to show
656 * @pos: The position of the file
658 * This is used by seq_file "start" operation to start the iteration
661 * Returns the pid+1 as we want to display pid of zero, but NULL would
662 * stop the iteration.
664 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
670 if (trace_pid_list_first(pid_list, &first) < 0)
675 /* Return pid + 1 so that zero can be the exit value */
676 for (pid++; pid && l < *pos;
677 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
683 * trace_pid_show - show the current pid in seq_file processing
684 * @m: The seq_file structure to write into
685 * @v: A void pointer of the pid (+1) value to display
687 * Can be directly used by seq_file operations to display the current
690 int trace_pid_show(struct seq_file *m, void *v)
692 unsigned long pid = (unsigned long)v - 1;
694 seq_printf(m, "%lu\n", pid);
698 /* 128 should be much more than enough */
699 #define PID_BUF_SIZE 127
701 int trace_pid_write(struct trace_pid_list *filtered_pids,
702 struct trace_pid_list **new_pid_list,
703 const char __user *ubuf, size_t cnt)
705 struct trace_pid_list *pid_list;
706 struct trace_parser parser;
714 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
718 * Always recreate a new array. The write is an all or nothing
719 * operation. Always create a new array when adding new pids by
720 * the user. If the operation fails, then the current list is
723 pid_list = trace_pid_list_alloc();
725 trace_parser_put(&parser);
730 /* copy the current bits to the new max */
731 ret = trace_pid_list_first(filtered_pids, &pid);
733 trace_pid_list_set(pid_list, pid);
734 ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
744 ret = trace_get_user(&parser, ubuf, cnt, &pos);
752 if (!trace_parser_loaded(&parser))
756 if (kstrtoul(parser.buffer, 0, &val))
761 if (trace_pid_list_set(pid_list, pid) < 0) {
767 trace_parser_clear(&parser);
770 trace_parser_put(&parser);
773 trace_pid_list_free(pid_list);
778 /* Cleared the list of pids */
779 trace_pid_list_free(pid_list);
783 *new_pid_list = pid_list;
788 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
792 /* Early boot up does not have a buffer yet */
794 return trace_clock_local();
796 ts = ring_buffer_time_stamp(buf->buffer);
797 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
802 u64 ftrace_now(int cpu)
804 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
808 * tracing_is_enabled - Show if global_trace has been enabled
810 * Shows if the global trace has been enabled or not. It uses the
811 * mirror flag "buffer_disabled" to be used in fast paths such as for
812 * the irqsoff tracer. But it may be inaccurate due to races. If you
813 * need to know the accurate state, use tracing_is_on() which is a little
814 * slower, but accurate.
816 int tracing_is_enabled(void)
819 * For quick access (irqsoff uses this in fast path), just
820 * return the mirror variable of the state of the ring buffer.
821 * It's a little racy, but we don't really care.
824 return !global_trace.buffer_disabled;
828 * trace_buf_size is the size in bytes that is allocated
829 * for a buffer. Note, the number of bytes is always rounded
832 * This number is purposely set to a low number of 16384.
833 * If the dump on oops happens, it will be much appreciated
834 * to not have to wait for all that output. Anyway this can be
835 * boot time and run time configurable.
837 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
839 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
841 /* trace_types holds a link list of available tracers. */
842 static struct tracer *trace_types __read_mostly;
845 * trace_types_lock is used to protect the trace_types list.
847 DEFINE_MUTEX(trace_types_lock);
850 * serialize the access of the ring buffer
852 * ring buffer serializes readers, but it is low level protection.
853 * The validity of the events (which returns by ring_buffer_peek() ..etc)
854 * are not protected by ring buffer.
856 * The content of events may become garbage if we allow other process consumes
857 * these events concurrently:
858 * A) the page of the consumed events may become a normal page
859 * (not reader page) in ring buffer, and this page will be rewritten
860 * by events producer.
861 * B) The page of the consumed events may become a page for splice_read,
862 * and this page will be returned to system.
864 * These primitives allow multi process access to different cpu ring buffer
867 * These primitives don't distinguish read-only and read-consume access.
868 * Multi read-only access are also serialized.
872 static DECLARE_RWSEM(all_cpu_access_lock);
873 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
875 static inline void trace_access_lock(int cpu)
877 if (cpu == RING_BUFFER_ALL_CPUS) {
878 /* gain it for accessing the whole ring buffer. */
879 down_write(&all_cpu_access_lock);
881 /* gain it for accessing a cpu ring buffer. */
883 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
884 down_read(&all_cpu_access_lock);
886 /* Secondly block other access to this @cpu ring buffer. */
887 mutex_lock(&per_cpu(cpu_access_lock, cpu));
891 static inline void trace_access_unlock(int cpu)
893 if (cpu == RING_BUFFER_ALL_CPUS) {
894 up_write(&all_cpu_access_lock);
896 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
897 up_read(&all_cpu_access_lock);
901 static inline void trace_access_lock_init(void)
905 for_each_possible_cpu(cpu)
906 mutex_init(&per_cpu(cpu_access_lock, cpu));
911 static DEFINE_MUTEX(access_lock);
913 static inline void trace_access_lock(int cpu)
916 mutex_lock(&access_lock);
919 static inline void trace_access_unlock(int cpu)
922 mutex_unlock(&access_lock);
925 static inline void trace_access_lock_init(void)
931 #ifdef CONFIG_STACKTRACE
932 static void __ftrace_trace_stack(struct trace_buffer *buffer,
933 unsigned int trace_ctx,
934 int skip, struct pt_regs *regs);
935 static inline void ftrace_trace_stack(struct trace_array *tr,
936 struct trace_buffer *buffer,
937 unsigned int trace_ctx,
938 int skip, struct pt_regs *regs);
941 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
942 unsigned int trace_ctx,
943 int skip, struct pt_regs *regs)
946 static inline void ftrace_trace_stack(struct trace_array *tr,
947 struct trace_buffer *buffer,
948 unsigned long trace_ctx,
949 int skip, struct pt_regs *regs)
955 static __always_inline void
956 trace_event_setup(struct ring_buffer_event *event,
957 int type, unsigned int trace_ctx)
959 struct trace_entry *ent = ring_buffer_event_data(event);
961 tracing_generic_entry_update(ent, type, trace_ctx);
964 static __always_inline struct ring_buffer_event *
965 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
968 unsigned int trace_ctx)
970 struct ring_buffer_event *event;
972 event = ring_buffer_lock_reserve(buffer, len);
974 trace_event_setup(event, type, trace_ctx);
979 void tracer_tracing_on(struct trace_array *tr)
981 if (tr->array_buffer.buffer)
982 ring_buffer_record_on(tr->array_buffer.buffer);
984 * This flag is looked at when buffers haven't been allocated
985 * yet, or by some tracers (like irqsoff), that just want to
986 * know if the ring buffer has been disabled, but it can handle
987 * races of where it gets disabled but we still do a record.
988 * As the check is in the fast path of the tracers, it is more
989 * important to be fast than accurate.
991 tr->buffer_disabled = 0;
992 /* Make the flag seen by readers */
997 * tracing_on - enable tracing buffers
999 * This function enables tracing buffers that may have been
1000 * disabled with tracing_off.
1002 void tracing_on(void)
1004 tracer_tracing_on(&global_trace);
1006 EXPORT_SYMBOL_GPL(tracing_on);
1009 static __always_inline void
1010 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1012 __this_cpu_write(trace_taskinfo_save, true);
1014 /* If this is the temp buffer, we need to commit fully */
1015 if (this_cpu_read(trace_buffered_event) == event) {
1016 /* Length is in event->array[0] */
1017 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1018 /* Release the temp buffer */
1019 this_cpu_dec(trace_buffered_event_cnt);
1020 /* ring_buffer_unlock_commit() enables preemption */
1021 preempt_enable_notrace();
1023 ring_buffer_unlock_commit(buffer);
1027 * __trace_puts - write a constant string into the trace buffer.
1028 * @ip: The address of the caller
1029 * @str: The constant string to write
1030 * @size: The size of the string.
1032 int __trace_puts(unsigned long ip, const char *str, int size)
1034 struct ring_buffer_event *event;
1035 struct trace_buffer *buffer;
1036 struct print_entry *entry;
1037 unsigned int trace_ctx;
1040 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1043 if (unlikely(tracing_selftest_running || tracing_disabled))
1046 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1048 trace_ctx = tracing_gen_ctx();
1049 buffer = global_trace.array_buffer.buffer;
1050 ring_buffer_nest_start(buffer);
1051 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1058 entry = ring_buffer_event_data(event);
1061 memcpy(&entry->buf, str, size);
1063 /* Add a newline if necessary */
1064 if (entry->buf[size - 1] != '\n') {
1065 entry->buf[size] = '\n';
1066 entry->buf[size + 1] = '\0';
1068 entry->buf[size] = '\0';
1070 __buffer_unlock_commit(buffer, event);
1071 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1073 ring_buffer_nest_end(buffer);
1076 EXPORT_SYMBOL_GPL(__trace_puts);
1079 * __trace_bputs - write the pointer to a constant string into trace buffer
1080 * @ip: The address of the caller
1081 * @str: The constant string to write to the buffer to
1083 int __trace_bputs(unsigned long ip, const char *str)
1085 struct ring_buffer_event *event;
1086 struct trace_buffer *buffer;
1087 struct bputs_entry *entry;
1088 unsigned int trace_ctx;
1089 int size = sizeof(struct bputs_entry);
1092 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1095 if (unlikely(tracing_selftest_running || tracing_disabled))
1098 trace_ctx = tracing_gen_ctx();
1099 buffer = global_trace.array_buffer.buffer;
1101 ring_buffer_nest_start(buffer);
1102 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1107 entry = ring_buffer_event_data(event);
1111 __buffer_unlock_commit(buffer, event);
1112 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1116 ring_buffer_nest_end(buffer);
1119 EXPORT_SYMBOL_GPL(__trace_bputs);
1121 #ifdef CONFIG_TRACER_SNAPSHOT
1122 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1125 struct tracer *tracer = tr->current_trace;
1126 unsigned long flags;
1129 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1130 internal_trace_puts("*** snapshot is being ignored ***\n");
1134 if (!tr->allocated_snapshot) {
1135 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1136 internal_trace_puts("*** stopping trace here! ***\n");
1141 /* Note, snapshot can not be used when the tracer uses it */
1142 if (tracer->use_max_tr) {
1143 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1144 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1148 local_irq_save(flags);
1149 update_max_tr(tr, current, smp_processor_id(), cond_data);
1150 local_irq_restore(flags);
1153 void tracing_snapshot_instance(struct trace_array *tr)
1155 tracing_snapshot_instance_cond(tr, NULL);
1159 * tracing_snapshot - take a snapshot of the current buffer.
1161 * This causes a swap between the snapshot buffer and the current live
1162 * tracing buffer. You can use this to take snapshots of the live
1163 * trace when some condition is triggered, but continue to trace.
1165 * Note, make sure to allocate the snapshot with either
1166 * a tracing_snapshot_alloc(), or by doing it manually
1167 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1169 * If the snapshot buffer is not allocated, it will stop tracing.
1170 * Basically making a permanent snapshot.
1172 void tracing_snapshot(void)
1174 struct trace_array *tr = &global_trace;
1176 tracing_snapshot_instance(tr);
1178 EXPORT_SYMBOL_GPL(tracing_snapshot);
1181 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1182 * @tr: The tracing instance to snapshot
1183 * @cond_data: The data to be tested conditionally, and possibly saved
1185 * This is the same as tracing_snapshot() except that the snapshot is
1186 * conditional - the snapshot will only happen if the
1187 * cond_snapshot.update() implementation receiving the cond_data
1188 * returns true, which means that the trace array's cond_snapshot
1189 * update() operation used the cond_data to determine whether the
1190 * snapshot should be taken, and if it was, presumably saved it along
1191 * with the snapshot.
1193 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1195 tracing_snapshot_instance_cond(tr, cond_data);
1197 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1200 * tracing_cond_snapshot_data - get the user data associated with a snapshot
1201 * @tr: The tracing instance
1203 * When the user enables a conditional snapshot using
1204 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1205 * with the snapshot. This accessor is used to retrieve it.
1207 * Should not be called from cond_snapshot.update(), since it takes
1208 * the tr->max_lock lock, which the code calling
1209 * cond_snapshot.update() has already done.
1211 * Returns the cond_data associated with the trace array's snapshot.
1213 void *tracing_cond_snapshot_data(struct trace_array *tr)
1215 void *cond_data = NULL;
1217 local_irq_disable();
1218 arch_spin_lock(&tr->max_lock);
1220 if (tr->cond_snapshot)
1221 cond_data = tr->cond_snapshot->cond_data;
1223 arch_spin_unlock(&tr->max_lock);
1228 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1230 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1231 struct array_buffer *size_buf, int cpu_id);
1232 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1234 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1238 if (!tr->allocated_snapshot) {
1240 /* allocate spare buffer */
1241 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1242 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1246 tr->allocated_snapshot = true;
1252 static void free_snapshot(struct trace_array *tr)
1255 * We don't free the ring buffer. instead, resize it because
1256 * The max_tr ring buffer has some state (e.g. ring->clock) and
1257 * we want preserve it.
1259 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1260 set_buffer_entries(&tr->max_buffer, 1);
1261 tracing_reset_online_cpus(&tr->max_buffer);
1262 tr->allocated_snapshot = false;
1266 * tracing_alloc_snapshot - allocate snapshot buffer.
1268 * This only allocates the snapshot buffer if it isn't already
1269 * allocated - it doesn't also take a snapshot.
1271 * This is meant to be used in cases where the snapshot buffer needs
1272 * to be set up for events that can't sleep but need to be able to
1273 * trigger a snapshot.
1275 int tracing_alloc_snapshot(void)
1277 struct trace_array *tr = &global_trace;
1280 ret = tracing_alloc_snapshot_instance(tr);
1285 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1288 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1290 * This is similar to tracing_snapshot(), but it will allocate the
1291 * snapshot buffer if it isn't already allocated. Use this only
1292 * where it is safe to sleep, as the allocation may sleep.
1294 * This causes a swap between the snapshot buffer and the current live
1295 * tracing buffer. You can use this to take snapshots of the live
1296 * trace when some condition is triggered, but continue to trace.
1298 void tracing_snapshot_alloc(void)
1302 ret = tracing_alloc_snapshot();
1308 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1311 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1312 * @tr: The tracing instance
1313 * @cond_data: User data to associate with the snapshot
1314 * @update: Implementation of the cond_snapshot update function
1316 * Check whether the conditional snapshot for the given instance has
1317 * already been enabled, or if the current tracer is already using a
1318 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1319 * save the cond_data and update function inside.
1321 * Returns 0 if successful, error otherwise.
1323 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1324 cond_update_fn_t update)
1326 struct cond_snapshot *cond_snapshot;
1329 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1333 cond_snapshot->cond_data = cond_data;
1334 cond_snapshot->update = update;
1336 mutex_lock(&trace_types_lock);
1338 ret = tracing_alloc_snapshot_instance(tr);
1342 if (tr->current_trace->use_max_tr) {
1348 * The cond_snapshot can only change to NULL without the
1349 * trace_types_lock. We don't care if we race with it going
1350 * to NULL, but we want to make sure that it's not set to
1351 * something other than NULL when we get here, which we can
1352 * do safely with only holding the trace_types_lock and not
1353 * having to take the max_lock.
1355 if (tr->cond_snapshot) {
1360 local_irq_disable();
1361 arch_spin_lock(&tr->max_lock);
1362 tr->cond_snapshot = cond_snapshot;
1363 arch_spin_unlock(&tr->max_lock);
1366 mutex_unlock(&trace_types_lock);
1371 mutex_unlock(&trace_types_lock);
1372 kfree(cond_snapshot);
1375 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1378 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1379 * @tr: The tracing instance
1381 * Check whether the conditional snapshot for the given instance is
1382 * enabled; if so, free the cond_snapshot associated with it,
1383 * otherwise return -EINVAL.
1385 * Returns 0 if successful, error otherwise.
1387 int tracing_snapshot_cond_disable(struct trace_array *tr)
1391 local_irq_disable();
1392 arch_spin_lock(&tr->max_lock);
1394 if (!tr->cond_snapshot)
1397 kfree(tr->cond_snapshot);
1398 tr->cond_snapshot = NULL;
1401 arch_spin_unlock(&tr->max_lock);
1406 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1408 void tracing_snapshot(void)
1410 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1412 EXPORT_SYMBOL_GPL(tracing_snapshot);
1413 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1415 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1417 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1418 int tracing_alloc_snapshot(void)
1420 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1423 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1424 void tracing_snapshot_alloc(void)
1429 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1430 void *tracing_cond_snapshot_data(struct trace_array *tr)
1434 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1435 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1439 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1440 int tracing_snapshot_cond_disable(struct trace_array *tr)
1444 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1445 #define free_snapshot(tr) do { } while (0)
1446 #endif /* CONFIG_TRACER_SNAPSHOT */
1448 void tracer_tracing_off(struct trace_array *tr)
1450 if (tr->array_buffer.buffer)
1451 ring_buffer_record_off(tr->array_buffer.buffer);
1453 * This flag is looked at when buffers haven't been allocated
1454 * yet, or by some tracers (like irqsoff), that just want to
1455 * know if the ring buffer has been disabled, but it can handle
1456 * races of where it gets disabled but we still do a record.
1457 * As the check is in the fast path of the tracers, it is more
1458 * important to be fast than accurate.
1460 tr->buffer_disabled = 1;
1461 /* Make the flag seen by readers */
1466 * tracing_off - turn off tracing buffers
1468 * This function stops the tracing buffers from recording data.
1469 * It does not disable any overhead the tracers themselves may
1470 * be causing. This function simply causes all recording to
1471 * the ring buffers to fail.
1473 void tracing_off(void)
1475 tracer_tracing_off(&global_trace);
1477 EXPORT_SYMBOL_GPL(tracing_off);
1479 void disable_trace_on_warning(void)
1481 if (__disable_trace_on_warning) {
1482 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1483 "Disabling tracing due to warning\n");
1489 * tracer_tracing_is_on - show real state of ring buffer enabled
1490 * @tr : the trace array to know if ring buffer is enabled
1492 * Shows real state of the ring buffer if it is enabled or not.
1494 bool tracer_tracing_is_on(struct trace_array *tr)
1496 if (tr->array_buffer.buffer)
1497 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1498 return !tr->buffer_disabled;
1502 * tracing_is_on - show state of ring buffers enabled
1504 int tracing_is_on(void)
1506 return tracer_tracing_is_on(&global_trace);
1508 EXPORT_SYMBOL_GPL(tracing_is_on);
1510 static int __init set_buf_size(char *str)
1512 unsigned long buf_size;
1516 buf_size = memparse(str, &str);
1518 * nr_entries can not be zero and the startup
1519 * tests require some buffer space. Therefore
1520 * ensure we have at least 4096 bytes of buffer.
1522 trace_buf_size = max(4096UL, buf_size);
1525 __setup("trace_buf_size=", set_buf_size);
1527 static int __init set_tracing_thresh(char *str)
1529 unsigned long threshold;
1534 ret = kstrtoul(str, 0, &threshold);
1537 tracing_thresh = threshold * 1000;
1540 __setup("tracing_thresh=", set_tracing_thresh);
1542 unsigned long nsecs_to_usecs(unsigned long nsecs)
1544 return nsecs / 1000;
1548 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1549 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1550 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1551 * of strings in the order that the evals (enum) were defined.
1556 /* These must match the bit positions in trace_iterator_flags */
1557 static const char *trace_options[] = {
1565 int in_ns; /* is this clock in nanoseconds? */
1566 } trace_clocks[] = {
1567 { trace_clock_local, "local", 1 },
1568 { trace_clock_global, "global", 1 },
1569 { trace_clock_counter, "counter", 0 },
1570 { trace_clock_jiffies, "uptime", 0 },
1571 { trace_clock, "perf", 1 },
1572 { ktime_get_mono_fast_ns, "mono", 1 },
1573 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1574 { ktime_get_boot_fast_ns, "boot", 1 },
1575 { ktime_get_tai_fast_ns, "tai", 1 },
1579 bool trace_clock_in_ns(struct trace_array *tr)
1581 if (trace_clocks[tr->clock_id].in_ns)
1588 * trace_parser_get_init - gets the buffer for trace parser
1590 int trace_parser_get_init(struct trace_parser *parser, int size)
1592 memset(parser, 0, sizeof(*parser));
1594 parser->buffer = kmalloc(size, GFP_KERNEL);
1595 if (!parser->buffer)
1598 parser->size = size;
1603 * trace_parser_put - frees the buffer for trace parser
1605 void trace_parser_put(struct trace_parser *parser)
1607 kfree(parser->buffer);
1608 parser->buffer = NULL;
1612 * trace_get_user - reads the user input string separated by space
1613 * (matched by isspace(ch))
1615 * For each string found the 'struct trace_parser' is updated,
1616 * and the function returns.
1618 * Returns number of bytes read.
1620 * See kernel/trace/trace.h for 'struct trace_parser' details.
1622 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1623 size_t cnt, loff_t *ppos)
1630 trace_parser_clear(parser);
1632 ret = get_user(ch, ubuf++);
1640 * The parser is not finished with the last write,
1641 * continue reading the user input without skipping spaces.
1643 if (!parser->cont) {
1644 /* skip white space */
1645 while (cnt && isspace(ch)) {
1646 ret = get_user(ch, ubuf++);
1655 /* only spaces were written */
1656 if (isspace(ch) || !ch) {
1663 /* read the non-space input */
1664 while (cnt && !isspace(ch) && ch) {
1665 if (parser->idx < parser->size - 1)
1666 parser->buffer[parser->idx++] = ch;
1671 ret = get_user(ch, ubuf++);
1678 /* We either got finished input or we have to wait for another call. */
1679 if (isspace(ch) || !ch) {
1680 parser->buffer[parser->idx] = 0;
1681 parser->cont = false;
1682 } else if (parser->idx < parser->size - 1) {
1683 parser->cont = true;
1684 parser->buffer[parser->idx++] = ch;
1685 /* Make sure the parsed string always terminates with '\0'. */
1686 parser->buffer[parser->idx] = 0;
1699 /* TODO add a seq_buf_to_buffer() */
1700 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1704 if (trace_seq_used(s) <= s->seq.readpos)
1707 len = trace_seq_used(s) - s->seq.readpos;
1710 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1712 s->seq.readpos += cnt;
1716 unsigned long __read_mostly tracing_thresh;
1718 #ifdef CONFIG_TRACER_MAX_TRACE
1719 static const struct file_operations tracing_max_lat_fops;
1721 #ifdef LATENCY_FS_NOTIFY
1723 static struct workqueue_struct *fsnotify_wq;
1725 static void latency_fsnotify_workfn(struct work_struct *work)
1727 struct trace_array *tr = container_of(work, struct trace_array,
1729 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1732 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1734 struct trace_array *tr = container_of(iwork, struct trace_array,
1736 queue_work(fsnotify_wq, &tr->fsnotify_work);
1739 static void trace_create_maxlat_file(struct trace_array *tr,
1740 struct dentry *d_tracer)
1742 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1743 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1744 tr->d_max_latency = trace_create_file("tracing_max_latency",
1746 d_tracer, &tr->max_latency,
1747 &tracing_max_lat_fops);
1750 __init static int latency_fsnotify_init(void)
1752 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1753 WQ_UNBOUND | WQ_HIGHPRI, 0);
1755 pr_err("Unable to allocate tr_max_lat_wq\n");
1761 late_initcall_sync(latency_fsnotify_init);
1763 void latency_fsnotify(struct trace_array *tr)
1768 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1769 * possible that we are called from __schedule() or do_idle(), which
1770 * could cause a deadlock.
1772 irq_work_queue(&tr->fsnotify_irqwork);
1775 #else /* !LATENCY_FS_NOTIFY */
1777 #define trace_create_maxlat_file(tr, d_tracer) \
1778 trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \
1779 d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1784 * Copy the new maximum trace into the separate maximum-trace
1785 * structure. (this way the maximum trace is permanently saved,
1786 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1789 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1791 struct array_buffer *trace_buf = &tr->array_buffer;
1792 struct array_buffer *max_buf = &tr->max_buffer;
1793 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1794 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1797 max_buf->time_start = data->preempt_timestamp;
1799 max_data->saved_latency = tr->max_latency;
1800 max_data->critical_start = data->critical_start;
1801 max_data->critical_end = data->critical_end;
1803 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1804 max_data->pid = tsk->pid;
1806 * If tsk == current, then use current_uid(), as that does not use
1807 * RCU. The irq tracer can be called out of RCU scope.
1810 max_data->uid = current_uid();
1812 max_data->uid = task_uid(tsk);
1814 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1815 max_data->policy = tsk->policy;
1816 max_data->rt_priority = tsk->rt_priority;
1818 /* record this tasks comm */
1819 tracing_record_cmdline(tsk);
1820 latency_fsnotify(tr);
1824 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1826 * @tsk: the task with the latency
1827 * @cpu: The cpu that initiated the trace.
1828 * @cond_data: User data associated with a conditional snapshot
1830 * Flip the buffers between the @tr and the max_tr and record information
1831 * about which task was the cause of this latency.
1834 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1840 WARN_ON_ONCE(!irqs_disabled());
1842 if (!tr->allocated_snapshot) {
1843 /* Only the nop tracer should hit this when disabling */
1844 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1848 arch_spin_lock(&tr->max_lock);
1850 /* Inherit the recordable setting from array_buffer */
1851 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1852 ring_buffer_record_on(tr->max_buffer.buffer);
1854 ring_buffer_record_off(tr->max_buffer.buffer);
1856 #ifdef CONFIG_TRACER_SNAPSHOT
1857 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1858 arch_spin_unlock(&tr->max_lock);
1862 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1864 __update_max_tr(tr, tsk, cpu);
1866 arch_spin_unlock(&tr->max_lock);
1870 * update_max_tr_single - only copy one trace over, and reset the rest
1872 * @tsk: task with the latency
1873 * @cpu: the cpu of the buffer to copy.
1875 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1878 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1885 WARN_ON_ONCE(!irqs_disabled());
1886 if (!tr->allocated_snapshot) {
1887 /* Only the nop tracer should hit this when disabling */
1888 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1892 arch_spin_lock(&tr->max_lock);
1894 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1896 if (ret == -EBUSY) {
1898 * We failed to swap the buffer due to a commit taking
1899 * place on this CPU. We fail to record, but we reset
1900 * the max trace buffer (no one writes directly to it)
1901 * and flag that it failed.
1903 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1904 "Failed to swap buffers due to commit in progress\n");
1907 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1909 __update_max_tr(tr, tsk, cpu);
1910 arch_spin_unlock(&tr->max_lock);
1913 #endif /* CONFIG_TRACER_MAX_TRACE */
1915 static int wait_on_pipe(struct trace_iterator *iter, int full)
1917 /* Iterators are static, they should be filled or empty */
1918 if (trace_buffer_iter(iter, iter->cpu_file))
1921 return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1925 #ifdef CONFIG_FTRACE_STARTUP_TEST
1926 static bool selftests_can_run;
1928 struct trace_selftests {
1929 struct list_head list;
1930 struct tracer *type;
1933 static LIST_HEAD(postponed_selftests);
1935 static int save_selftest(struct tracer *type)
1937 struct trace_selftests *selftest;
1939 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1943 selftest->type = type;
1944 list_add(&selftest->list, &postponed_selftests);
1948 static int run_tracer_selftest(struct tracer *type)
1950 struct trace_array *tr = &global_trace;
1951 struct tracer *saved_tracer = tr->current_trace;
1954 if (!type->selftest || tracing_selftest_disabled)
1958 * If a tracer registers early in boot up (before scheduling is
1959 * initialized and such), then do not run its selftests yet.
1960 * Instead, run it a little later in the boot process.
1962 if (!selftests_can_run)
1963 return save_selftest(type);
1965 if (!tracing_is_on()) {
1966 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1972 * Run a selftest on this tracer.
1973 * Here we reset the trace buffer, and set the current
1974 * tracer to be this tracer. The tracer can then run some
1975 * internal tracing to verify that everything is in order.
1976 * If we fail, we do not register this tracer.
1978 tracing_reset_online_cpus(&tr->array_buffer);
1980 tr->current_trace = type;
1982 #ifdef CONFIG_TRACER_MAX_TRACE
1983 if (type->use_max_tr) {
1984 /* If we expanded the buffers, make sure the max is expanded too */
1985 if (ring_buffer_expanded)
1986 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1987 RING_BUFFER_ALL_CPUS);
1988 tr->allocated_snapshot = true;
1992 /* the test is responsible for initializing and enabling */
1993 pr_info("Testing tracer %s: ", type->name);
1994 ret = type->selftest(type, tr);
1995 /* the test is responsible for resetting too */
1996 tr->current_trace = saved_tracer;
1998 printk(KERN_CONT "FAILED!\n");
1999 /* Add the warning after printing 'FAILED' */
2003 /* Only reset on passing, to avoid touching corrupted buffers */
2004 tracing_reset_online_cpus(&tr->array_buffer);
2006 #ifdef CONFIG_TRACER_MAX_TRACE
2007 if (type->use_max_tr) {
2008 tr->allocated_snapshot = false;
2010 /* Shrink the max buffer again */
2011 if (ring_buffer_expanded)
2012 ring_buffer_resize(tr->max_buffer.buffer, 1,
2013 RING_BUFFER_ALL_CPUS);
2017 printk(KERN_CONT "PASSED\n");
2021 static __init int init_trace_selftests(void)
2023 struct trace_selftests *p, *n;
2024 struct tracer *t, **last;
2027 selftests_can_run = true;
2029 mutex_lock(&trace_types_lock);
2031 if (list_empty(&postponed_selftests))
2034 pr_info("Running postponed tracer tests:\n");
2036 tracing_selftest_running = true;
2037 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2038 /* This loop can take minutes when sanitizers are enabled, so
2039 * lets make sure we allow RCU processing.
2042 ret = run_tracer_selftest(p->type);
2043 /* If the test fails, then warn and remove from available_tracers */
2045 WARN(1, "tracer: %s failed selftest, disabling\n",
2047 last = &trace_types;
2048 for (t = trace_types; t; t = t->next) {
2059 tracing_selftest_running = false;
2062 mutex_unlock(&trace_types_lock);
2066 core_initcall(init_trace_selftests);
2068 static inline int run_tracer_selftest(struct tracer *type)
2072 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2074 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2076 static void __init apply_trace_boot_options(void);
2079 * register_tracer - register a tracer with the ftrace system.
2080 * @type: the plugin for the tracer
2082 * Register a new plugin tracer.
2084 int __init register_tracer(struct tracer *type)
2090 pr_info("Tracer must have a name\n");
2094 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2095 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2099 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2100 pr_warn("Can not register tracer %s due to lockdown\n",
2105 mutex_lock(&trace_types_lock);
2107 tracing_selftest_running = true;
2109 for (t = trace_types; t; t = t->next) {
2110 if (strcmp(type->name, t->name) == 0) {
2112 pr_info("Tracer %s already registered\n",
2119 if (!type->set_flag)
2120 type->set_flag = &dummy_set_flag;
2122 /*allocate a dummy tracer_flags*/
2123 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2128 type->flags->val = 0;
2129 type->flags->opts = dummy_tracer_opt;
2131 if (!type->flags->opts)
2132 type->flags->opts = dummy_tracer_opt;
2134 /* store the tracer for __set_tracer_option */
2135 type->flags->trace = type;
2137 ret = run_tracer_selftest(type);
2141 type->next = trace_types;
2143 add_tracer_options(&global_trace, type);
2146 tracing_selftest_running = false;
2147 mutex_unlock(&trace_types_lock);
2149 if (ret || !default_bootup_tracer)
2152 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2155 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2156 /* Do we want this tracer to start on bootup? */
2157 tracing_set_tracer(&global_trace, type->name);
2158 default_bootup_tracer = NULL;
2160 apply_trace_boot_options();
2162 /* disable other selftests, since this will break it. */
2163 disable_tracing_selftest("running a tracer");
2169 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2171 struct trace_buffer *buffer = buf->buffer;
2176 ring_buffer_record_disable(buffer);
2178 /* Make sure all commits have finished */
2180 ring_buffer_reset_cpu(buffer, cpu);
2182 ring_buffer_record_enable(buffer);
2185 void tracing_reset_online_cpus(struct array_buffer *buf)
2187 struct trace_buffer *buffer = buf->buffer;
2192 ring_buffer_record_disable(buffer);
2194 /* Make sure all commits have finished */
2197 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2199 ring_buffer_reset_online_cpus(buffer);
2201 ring_buffer_record_enable(buffer);
2204 /* Must have trace_types_lock held */
2205 void tracing_reset_all_online_cpus_unlocked(void)
2207 struct trace_array *tr;
2209 lockdep_assert_held(&trace_types_lock);
2211 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2212 if (!tr->clear_trace)
2214 tr->clear_trace = false;
2215 tracing_reset_online_cpus(&tr->array_buffer);
2216 #ifdef CONFIG_TRACER_MAX_TRACE
2217 tracing_reset_online_cpus(&tr->max_buffer);
2222 void tracing_reset_all_online_cpus(void)
2224 mutex_lock(&trace_types_lock);
2225 tracing_reset_all_online_cpus_unlocked();
2226 mutex_unlock(&trace_types_lock);
2230 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2231 * is the tgid last observed corresponding to pid=i.
2233 static int *tgid_map;
2235 /* The maximum valid index into tgid_map. */
2236 static size_t tgid_map_max;
2238 #define SAVED_CMDLINES_DEFAULT 128
2239 #define NO_CMDLINE_MAP UINT_MAX
2241 * Preemption must be disabled before acquiring trace_cmdline_lock.
2242 * The various trace_arrays' max_lock must be acquired in a context
2243 * where interrupt is disabled.
2245 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2246 struct saved_cmdlines_buffer {
2247 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2248 unsigned *map_cmdline_to_pid;
2249 unsigned cmdline_num;
2251 char *saved_cmdlines;
2253 static struct saved_cmdlines_buffer *savedcmd;
2255 static inline char *get_saved_cmdlines(int idx)
2257 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2260 static inline void set_cmdline(int idx, const char *cmdline)
2262 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2265 static int allocate_cmdlines_buffer(unsigned int val,
2266 struct saved_cmdlines_buffer *s)
2268 s->map_cmdline_to_pid = kmalloc_array(val,
2269 sizeof(*s->map_cmdline_to_pid),
2271 if (!s->map_cmdline_to_pid)
2274 s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2275 if (!s->saved_cmdlines) {
2276 kfree(s->map_cmdline_to_pid);
2281 s->cmdline_num = val;
2282 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2283 sizeof(s->map_pid_to_cmdline));
2284 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2285 val * sizeof(*s->map_cmdline_to_pid));
2290 static int trace_create_savedcmd(void)
2294 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2298 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2308 int is_tracing_stopped(void)
2310 return global_trace.stop_count;
2314 * tracing_start - quick start of the tracer
2316 * If tracing is enabled but was stopped by tracing_stop,
2317 * this will start the tracer back up.
2319 void tracing_start(void)
2321 struct trace_buffer *buffer;
2322 unsigned long flags;
2324 if (tracing_disabled)
2327 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2328 if (--global_trace.stop_count) {
2329 if (global_trace.stop_count < 0) {
2330 /* Someone screwed up their debugging */
2332 global_trace.stop_count = 0;
2337 /* Prevent the buffers from switching */
2338 arch_spin_lock(&global_trace.max_lock);
2340 buffer = global_trace.array_buffer.buffer;
2342 ring_buffer_record_enable(buffer);
2344 #ifdef CONFIG_TRACER_MAX_TRACE
2345 buffer = global_trace.max_buffer.buffer;
2347 ring_buffer_record_enable(buffer);
2350 arch_spin_unlock(&global_trace.max_lock);
2353 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2356 static void tracing_start_tr(struct trace_array *tr)
2358 struct trace_buffer *buffer;
2359 unsigned long flags;
2361 if (tracing_disabled)
2364 /* If global, we need to also start the max tracer */
2365 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2366 return tracing_start();
2368 raw_spin_lock_irqsave(&tr->start_lock, flags);
2370 if (--tr->stop_count) {
2371 if (tr->stop_count < 0) {
2372 /* Someone screwed up their debugging */
2379 buffer = tr->array_buffer.buffer;
2381 ring_buffer_record_enable(buffer);
2384 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2388 * tracing_stop - quick stop of the tracer
2390 * Light weight way to stop tracing. Use in conjunction with
2393 void tracing_stop(void)
2395 struct trace_buffer *buffer;
2396 unsigned long flags;
2398 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2399 if (global_trace.stop_count++)
2402 /* Prevent the buffers from switching */
2403 arch_spin_lock(&global_trace.max_lock);
2405 buffer = global_trace.array_buffer.buffer;
2407 ring_buffer_record_disable(buffer);
2409 #ifdef CONFIG_TRACER_MAX_TRACE
2410 buffer = global_trace.max_buffer.buffer;
2412 ring_buffer_record_disable(buffer);
2415 arch_spin_unlock(&global_trace.max_lock);
2418 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2421 static void tracing_stop_tr(struct trace_array *tr)
2423 struct trace_buffer *buffer;
2424 unsigned long flags;
2426 /* If global, we need to also stop the max tracer */
2427 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2428 return tracing_stop();
2430 raw_spin_lock_irqsave(&tr->start_lock, flags);
2431 if (tr->stop_count++)
2434 buffer = tr->array_buffer.buffer;
2436 ring_buffer_record_disable(buffer);
2439 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2442 static int trace_save_cmdline(struct task_struct *tsk)
2446 /* treat recording of idle task as a success */
2450 tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2453 * It's not the end of the world if we don't get
2454 * the lock, but we also don't want to spin
2455 * nor do we want to disable interrupts,
2456 * so if we miss here, then better luck next time.
2458 * This is called within the scheduler and wake up, so interrupts
2459 * had better been disabled and run queue lock been held.
2461 lockdep_assert_preemption_disabled();
2462 if (!arch_spin_trylock(&trace_cmdline_lock))
2465 idx = savedcmd->map_pid_to_cmdline[tpid];
2466 if (idx == NO_CMDLINE_MAP) {
2467 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2469 savedcmd->map_pid_to_cmdline[tpid] = idx;
2470 savedcmd->cmdline_idx = idx;
2473 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2474 set_cmdline(idx, tsk->comm);
2476 arch_spin_unlock(&trace_cmdline_lock);
2481 static void __trace_find_cmdline(int pid, char comm[])
2487 strcpy(comm, "<idle>");
2491 if (WARN_ON_ONCE(pid < 0)) {
2492 strcpy(comm, "<XXX>");
2496 tpid = pid & (PID_MAX_DEFAULT - 1);
2497 map = savedcmd->map_pid_to_cmdline[tpid];
2498 if (map != NO_CMDLINE_MAP) {
2499 tpid = savedcmd->map_cmdline_to_pid[map];
2501 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2505 strcpy(comm, "<...>");
2508 void trace_find_cmdline(int pid, char comm[])
2511 arch_spin_lock(&trace_cmdline_lock);
2513 __trace_find_cmdline(pid, comm);
2515 arch_spin_unlock(&trace_cmdline_lock);
2519 static int *trace_find_tgid_ptr(int pid)
2522 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2523 * if we observe a non-NULL tgid_map then we also observe the correct
2526 int *map = smp_load_acquire(&tgid_map);
2528 if (unlikely(!map || pid > tgid_map_max))
2534 int trace_find_tgid(int pid)
2536 int *ptr = trace_find_tgid_ptr(pid);
2538 return ptr ? *ptr : 0;
2541 static int trace_save_tgid(struct task_struct *tsk)
2545 /* treat recording of idle task as a success */
2549 ptr = trace_find_tgid_ptr(tsk->pid);
2557 static bool tracing_record_taskinfo_skip(int flags)
2559 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2561 if (!__this_cpu_read(trace_taskinfo_save))
2567 * tracing_record_taskinfo - record the task info of a task
2569 * @task: task to record
2570 * @flags: TRACE_RECORD_CMDLINE for recording comm
2571 * TRACE_RECORD_TGID for recording tgid
2573 void tracing_record_taskinfo(struct task_struct *task, int flags)
2577 if (tracing_record_taskinfo_skip(flags))
2581 * Record as much task information as possible. If some fail, continue
2582 * to try to record the others.
2584 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2585 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2587 /* If recording any information failed, retry again soon. */
2591 __this_cpu_write(trace_taskinfo_save, false);
2595 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2597 * @prev: previous task during sched_switch
2598 * @next: next task during sched_switch
2599 * @flags: TRACE_RECORD_CMDLINE for recording comm
2600 * TRACE_RECORD_TGID for recording tgid
2602 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2603 struct task_struct *next, int flags)
2607 if (tracing_record_taskinfo_skip(flags))
2611 * Record as much task information as possible. If some fail, continue
2612 * to try to record the others.
2614 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2615 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2616 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2617 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2619 /* If recording any information failed, retry again soon. */
2623 __this_cpu_write(trace_taskinfo_save, false);
2626 /* Helpers to record a specific task information */
2627 void tracing_record_cmdline(struct task_struct *task)
2629 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2632 void tracing_record_tgid(struct task_struct *task)
2634 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2638 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2639 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2640 * simplifies those functions and keeps them in sync.
2642 enum print_line_t trace_handle_return(struct trace_seq *s)
2644 return trace_seq_has_overflowed(s) ?
2645 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2647 EXPORT_SYMBOL_GPL(trace_handle_return);
2649 static unsigned short migration_disable_value(void)
2651 #if defined(CONFIG_SMP)
2652 return current->migration_disabled;
2658 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2660 unsigned int trace_flags = irqs_status;
2663 pc = preempt_count();
2666 trace_flags |= TRACE_FLAG_NMI;
2667 if (pc & HARDIRQ_MASK)
2668 trace_flags |= TRACE_FLAG_HARDIRQ;
2669 if (in_serving_softirq())
2670 trace_flags |= TRACE_FLAG_SOFTIRQ;
2671 if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2672 trace_flags |= TRACE_FLAG_BH_OFF;
2674 if (tif_need_resched())
2675 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2676 if (test_preempt_need_resched())
2677 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2678 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2679 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2682 struct ring_buffer_event *
2683 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2686 unsigned int trace_ctx)
2688 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2691 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2692 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2693 static int trace_buffered_event_ref;
2696 * trace_buffered_event_enable - enable buffering events
2698 * When events are being filtered, it is quicker to use a temporary
2699 * buffer to write the event data into if there's a likely chance
2700 * that it will not be committed. The discard of the ring buffer
2701 * is not as fast as committing, and is much slower than copying
2704 * When an event is to be filtered, allocate per cpu buffers to
2705 * write the event data into, and if the event is filtered and discarded
2706 * it is simply dropped, otherwise, the entire data is to be committed
2709 void trace_buffered_event_enable(void)
2711 struct ring_buffer_event *event;
2715 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2717 if (trace_buffered_event_ref++)
2720 for_each_tracing_cpu(cpu) {
2721 page = alloc_pages_node(cpu_to_node(cpu),
2722 GFP_KERNEL | __GFP_NORETRY, 0);
2726 event = page_address(page);
2727 memset(event, 0, sizeof(*event));
2729 per_cpu(trace_buffered_event, cpu) = event;
2732 if (cpu == smp_processor_id() &&
2733 __this_cpu_read(trace_buffered_event) !=
2734 per_cpu(trace_buffered_event, cpu))
2741 trace_buffered_event_disable();
2744 static void enable_trace_buffered_event(void *data)
2746 /* Probably not needed, but do it anyway */
2748 this_cpu_dec(trace_buffered_event_cnt);
2751 static void disable_trace_buffered_event(void *data)
2753 this_cpu_inc(trace_buffered_event_cnt);
2757 * trace_buffered_event_disable - disable buffering events
2759 * When a filter is removed, it is faster to not use the buffered
2760 * events, and to commit directly into the ring buffer. Free up
2761 * the temp buffers when there are no more users. This requires
2762 * special synchronization with current events.
2764 void trace_buffered_event_disable(void)
2768 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2770 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2773 if (--trace_buffered_event_ref)
2777 /* For each CPU, set the buffer as used. */
2778 smp_call_function_many(tracing_buffer_mask,
2779 disable_trace_buffered_event, NULL, 1);
2782 /* Wait for all current users to finish */
2785 for_each_tracing_cpu(cpu) {
2786 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2787 per_cpu(trace_buffered_event, cpu) = NULL;
2790 * Make sure trace_buffered_event is NULL before clearing
2791 * trace_buffered_event_cnt.
2796 /* Do the work on each cpu */
2797 smp_call_function_many(tracing_buffer_mask,
2798 enable_trace_buffered_event, NULL, 1);
2802 static struct trace_buffer *temp_buffer;
2804 struct ring_buffer_event *
2805 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2806 struct trace_event_file *trace_file,
2807 int type, unsigned long len,
2808 unsigned int trace_ctx)
2810 struct ring_buffer_event *entry;
2811 struct trace_array *tr = trace_file->tr;
2814 *current_rb = tr->array_buffer.buffer;
2816 if (!tr->no_filter_buffering_ref &&
2817 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2818 preempt_disable_notrace();
2820 * Filtering is on, so try to use the per cpu buffer first.
2821 * This buffer will simulate a ring_buffer_event,
2822 * where the type_len is zero and the array[0] will
2823 * hold the full length.
2824 * (see include/linux/ring-buffer.h for details on
2825 * how the ring_buffer_event is structured).
2827 * Using a temp buffer during filtering and copying it
2828 * on a matched filter is quicker than writing directly
2829 * into the ring buffer and then discarding it when
2830 * it doesn't match. That is because the discard
2831 * requires several atomic operations to get right.
2832 * Copying on match and doing nothing on a failed match
2833 * is still quicker than no copy on match, but having
2834 * to discard out of the ring buffer on a failed match.
2836 if ((entry = __this_cpu_read(trace_buffered_event))) {
2837 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2839 val = this_cpu_inc_return(trace_buffered_event_cnt);
2842 * Preemption is disabled, but interrupts and NMIs
2843 * can still come in now. If that happens after
2844 * the above increment, then it will have to go
2845 * back to the old method of allocating the event
2846 * on the ring buffer, and if the filter fails, it
2847 * will have to call ring_buffer_discard_commit()
2850 * Need to also check the unlikely case that the
2851 * length is bigger than the temp buffer size.
2852 * If that happens, then the reserve is pretty much
2853 * guaranteed to fail, as the ring buffer currently
2854 * only allows events less than a page. But that may
2855 * change in the future, so let the ring buffer reserve
2856 * handle the failure in that case.
2858 if (val == 1 && likely(len <= max_len)) {
2859 trace_event_setup(entry, type, trace_ctx);
2860 entry->array[0] = len;
2861 /* Return with preemption disabled */
2864 this_cpu_dec(trace_buffered_event_cnt);
2866 /* __trace_buffer_lock_reserve() disables preemption */
2867 preempt_enable_notrace();
2870 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2873 * If tracing is off, but we have triggers enabled
2874 * we still need to look at the event data. Use the temp_buffer
2875 * to store the trace event for the trigger to use. It's recursive
2876 * safe and will not be recorded anywhere.
2878 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2879 *current_rb = temp_buffer;
2880 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2885 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2887 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2888 static DEFINE_MUTEX(tracepoint_printk_mutex);
2890 static void output_printk(struct trace_event_buffer *fbuffer)
2892 struct trace_event_call *event_call;
2893 struct trace_event_file *file;
2894 struct trace_event *event;
2895 unsigned long flags;
2896 struct trace_iterator *iter = tracepoint_print_iter;
2898 /* We should never get here if iter is NULL */
2899 if (WARN_ON_ONCE(!iter))
2902 event_call = fbuffer->trace_file->event_call;
2903 if (!event_call || !event_call->event.funcs ||
2904 !event_call->event.funcs->trace)
2907 file = fbuffer->trace_file;
2908 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2909 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2910 !filter_match_preds(file->filter, fbuffer->entry)))
2913 event = &fbuffer->trace_file->event_call->event;
2915 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2916 trace_seq_init(&iter->seq);
2917 iter->ent = fbuffer->entry;
2918 event_call->event.funcs->trace(iter, 0, event);
2919 trace_seq_putc(&iter->seq, 0);
2920 printk("%s", iter->seq.buffer);
2922 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2925 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2926 void *buffer, size_t *lenp,
2929 int save_tracepoint_printk;
2932 mutex_lock(&tracepoint_printk_mutex);
2933 save_tracepoint_printk = tracepoint_printk;
2935 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2938 * This will force exiting early, as tracepoint_printk
2939 * is always zero when tracepoint_printk_iter is not allocated
2941 if (!tracepoint_print_iter)
2942 tracepoint_printk = 0;
2944 if (save_tracepoint_printk == tracepoint_printk)
2947 if (tracepoint_printk)
2948 static_key_enable(&tracepoint_printk_key.key);
2950 static_key_disable(&tracepoint_printk_key.key);
2953 mutex_unlock(&tracepoint_printk_mutex);
2958 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2960 enum event_trigger_type tt = ETT_NONE;
2961 struct trace_event_file *file = fbuffer->trace_file;
2963 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2964 fbuffer->entry, &tt))
2967 if (static_key_false(&tracepoint_printk_key.key))
2968 output_printk(fbuffer);
2970 if (static_branch_unlikely(&trace_event_exports_enabled))
2971 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2973 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2974 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2978 event_triggers_post_call(file, tt);
2981 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2986 * trace_buffer_unlock_commit_regs()
2987 * trace_event_buffer_commit()
2988 * trace_event_raw_event_xxx()
2990 # define STACK_SKIP 3
2992 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2993 struct trace_buffer *buffer,
2994 struct ring_buffer_event *event,
2995 unsigned int trace_ctx,
2996 struct pt_regs *regs)
2998 __buffer_unlock_commit(buffer, event);
3001 * If regs is not set, then skip the necessary functions.
3002 * Note, we can still get here via blktrace, wakeup tracer
3003 * and mmiotrace, but that's ok if they lose a function or
3004 * two. They are not that meaningful.
3006 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3007 ftrace_trace_userstack(tr, buffer, trace_ctx);
3011 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3014 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3015 struct ring_buffer_event *event)
3017 __buffer_unlock_commit(buffer, event);
3021 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3022 parent_ip, unsigned int trace_ctx)
3024 struct trace_event_call *call = &event_function;
3025 struct trace_buffer *buffer = tr->array_buffer.buffer;
3026 struct ring_buffer_event *event;
3027 struct ftrace_entry *entry;
3029 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3033 entry = ring_buffer_event_data(event);
3035 entry->parent_ip = parent_ip;
3037 if (!call_filter_check_discard(call, entry, buffer, event)) {
3038 if (static_branch_unlikely(&trace_function_exports_enabled))
3039 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3040 __buffer_unlock_commit(buffer, event);
3044 #ifdef CONFIG_STACKTRACE
3046 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3047 #define FTRACE_KSTACK_NESTING 4
3049 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3051 struct ftrace_stack {
3052 unsigned long calls[FTRACE_KSTACK_ENTRIES];
3056 struct ftrace_stacks {
3057 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
3060 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3061 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3063 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3064 unsigned int trace_ctx,
3065 int skip, struct pt_regs *regs)
3067 struct trace_event_call *call = &event_kernel_stack;
3068 struct ring_buffer_event *event;
3069 unsigned int size, nr_entries;
3070 struct ftrace_stack *fstack;
3071 struct stack_entry *entry;
3075 * Add one, for this function and the call to save_stack_trace()
3076 * If regs is set, then these functions will not be in the way.
3078 #ifndef CONFIG_UNWINDER_ORC
3083 preempt_disable_notrace();
3085 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3087 /* This should never happen. If it does, yell once and skip */
3088 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3092 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3093 * interrupt will either see the value pre increment or post
3094 * increment. If the interrupt happens pre increment it will have
3095 * restored the counter when it returns. We just need a barrier to
3096 * keep gcc from moving things around.
3100 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3101 size = ARRAY_SIZE(fstack->calls);
3104 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3107 nr_entries = stack_trace_save(fstack->calls, size, skip);
3110 size = nr_entries * sizeof(unsigned long);
3111 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3112 (sizeof(*entry) - sizeof(entry->caller)) + size,
3116 entry = ring_buffer_event_data(event);
3118 memcpy(&entry->caller, fstack->calls, size);
3119 entry->size = nr_entries;
3121 if (!call_filter_check_discard(call, entry, buffer, event))
3122 __buffer_unlock_commit(buffer, event);
3125 /* Again, don't let gcc optimize things here */
3127 __this_cpu_dec(ftrace_stack_reserve);
3128 preempt_enable_notrace();
3132 static inline void ftrace_trace_stack(struct trace_array *tr,
3133 struct trace_buffer *buffer,
3134 unsigned int trace_ctx,
3135 int skip, struct pt_regs *regs)
3137 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3140 __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3143 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3146 struct trace_buffer *buffer = tr->array_buffer.buffer;
3148 if (rcu_is_watching()) {
3149 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3154 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3155 * but if the above rcu_is_watching() failed, then the NMI
3156 * triggered someplace critical, and ct_irq_enter() should
3157 * not be called from NMI.
3159 if (unlikely(in_nmi()))
3162 ct_irq_enter_irqson();
3163 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3164 ct_irq_exit_irqson();
3168 * trace_dump_stack - record a stack back trace in the trace buffer
3169 * @skip: Number of functions to skip (helper handlers)
3171 void trace_dump_stack(int skip)
3173 if (tracing_disabled || tracing_selftest_running)
3176 #ifndef CONFIG_UNWINDER_ORC
3177 /* Skip 1 to skip this function. */
3180 __ftrace_trace_stack(global_trace.array_buffer.buffer,
3181 tracing_gen_ctx(), skip, NULL);
3183 EXPORT_SYMBOL_GPL(trace_dump_stack);
3185 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3186 static DEFINE_PER_CPU(int, user_stack_count);
3189 ftrace_trace_userstack(struct trace_array *tr,
3190 struct trace_buffer *buffer, unsigned int trace_ctx)
3192 struct trace_event_call *call = &event_user_stack;
3193 struct ring_buffer_event *event;
3194 struct userstack_entry *entry;
3196 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3200 * NMIs can not handle page faults, even with fix ups.
3201 * The save user stack can (and often does) fault.
3203 if (unlikely(in_nmi()))
3207 * prevent recursion, since the user stack tracing may
3208 * trigger other kernel events.
3211 if (__this_cpu_read(user_stack_count))
3214 __this_cpu_inc(user_stack_count);
3216 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3217 sizeof(*entry), trace_ctx);
3219 goto out_drop_count;
3220 entry = ring_buffer_event_data(event);
3222 entry->tgid = current->tgid;
3223 memset(&entry->caller, 0, sizeof(entry->caller));
3225 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3226 if (!call_filter_check_discard(call, entry, buffer, event))
3227 __buffer_unlock_commit(buffer, event);
3230 __this_cpu_dec(user_stack_count);
3234 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3235 static void ftrace_trace_userstack(struct trace_array *tr,
3236 struct trace_buffer *buffer,
3237 unsigned int trace_ctx)
3240 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3242 #endif /* CONFIG_STACKTRACE */
3245 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3246 unsigned long long delta)
3248 entry->bottom_delta_ts = delta & U32_MAX;
3249 entry->top_delta_ts = (delta >> 32);
3252 void trace_last_func_repeats(struct trace_array *tr,
3253 struct trace_func_repeats *last_info,
3254 unsigned int trace_ctx)
3256 struct trace_buffer *buffer = tr->array_buffer.buffer;
3257 struct func_repeats_entry *entry;
3258 struct ring_buffer_event *event;
3261 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3262 sizeof(*entry), trace_ctx);
3266 delta = ring_buffer_event_time_stamp(buffer, event) -
3267 last_info->ts_last_call;
3269 entry = ring_buffer_event_data(event);
3270 entry->ip = last_info->ip;
3271 entry->parent_ip = last_info->parent_ip;
3272 entry->count = last_info->count;
3273 func_repeats_set_delta_ts(entry, delta);
3275 __buffer_unlock_commit(buffer, event);
3278 /* created for use with alloc_percpu */
3279 struct trace_buffer_struct {
3281 char buffer[4][TRACE_BUF_SIZE];
3284 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3287 * This allows for lockless recording. If we're nested too deeply, then
3288 * this returns NULL.
3290 static char *get_trace_buf(void)
3292 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3294 if (!trace_percpu_buffer || buffer->nesting >= 4)
3299 /* Interrupts must see nesting incremented before we use the buffer */
3301 return &buffer->buffer[buffer->nesting - 1][0];
3304 static void put_trace_buf(void)
3306 /* Don't let the decrement of nesting leak before this */
3308 this_cpu_dec(trace_percpu_buffer->nesting);
3311 static int alloc_percpu_trace_buffer(void)
3313 struct trace_buffer_struct __percpu *buffers;
3315 if (trace_percpu_buffer)
3318 buffers = alloc_percpu(struct trace_buffer_struct);
3319 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3322 trace_percpu_buffer = buffers;
3326 static int buffers_allocated;
3328 void trace_printk_init_buffers(void)
3330 if (buffers_allocated)
3333 if (alloc_percpu_trace_buffer())
3336 /* trace_printk() is for debug use only. Don't use it in production. */
3339 pr_warn("**********************************************************\n");
3340 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3342 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3344 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3345 pr_warn("** unsafe for production use. **\n");
3347 pr_warn("** If you see this message and you are not debugging **\n");
3348 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3350 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3351 pr_warn("**********************************************************\n");
3353 /* Expand the buffers to set size */
3354 tracing_update_buffers();
3356 buffers_allocated = 1;
3359 * trace_printk_init_buffers() can be called by modules.
3360 * If that happens, then we need to start cmdline recording
3361 * directly here. If the global_trace.buffer is already
3362 * allocated here, then this was called by module code.
3364 if (global_trace.array_buffer.buffer)
3365 tracing_start_cmdline_record();
3367 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3369 void trace_printk_start_comm(void)
3371 /* Start tracing comms if trace printk is set */
3372 if (!buffers_allocated)
3374 tracing_start_cmdline_record();
3377 static void trace_printk_start_stop_comm(int enabled)
3379 if (!buffers_allocated)
3383 tracing_start_cmdline_record();
3385 tracing_stop_cmdline_record();
3389 * trace_vbprintk - write binary msg to tracing buffer
3390 * @ip: The address of the caller
3391 * @fmt: The string format to write to the buffer
3392 * @args: Arguments for @fmt
3394 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3396 struct trace_event_call *call = &event_bprint;
3397 struct ring_buffer_event *event;
3398 struct trace_buffer *buffer;
3399 struct trace_array *tr = &global_trace;
3400 struct bprint_entry *entry;
3401 unsigned int trace_ctx;
3405 if (unlikely(tracing_selftest_running || tracing_disabled))
3408 /* Don't pollute graph traces with trace_vprintk internals */
3409 pause_graph_tracing();
3411 trace_ctx = tracing_gen_ctx();
3412 preempt_disable_notrace();
3414 tbuffer = get_trace_buf();
3420 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3422 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3425 size = sizeof(*entry) + sizeof(u32) * len;
3426 buffer = tr->array_buffer.buffer;
3427 ring_buffer_nest_start(buffer);
3428 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3432 entry = ring_buffer_event_data(event);
3436 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3437 if (!call_filter_check_discard(call, entry, buffer, event)) {
3438 __buffer_unlock_commit(buffer, event);
3439 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3443 ring_buffer_nest_end(buffer);
3448 preempt_enable_notrace();
3449 unpause_graph_tracing();
3453 EXPORT_SYMBOL_GPL(trace_vbprintk);
3457 __trace_array_vprintk(struct trace_buffer *buffer,
3458 unsigned long ip, const char *fmt, va_list args)
3460 struct trace_event_call *call = &event_print;
3461 struct ring_buffer_event *event;
3463 struct print_entry *entry;
3464 unsigned int trace_ctx;
3467 if (tracing_disabled || tracing_selftest_running)
3470 /* Don't pollute graph traces with trace_vprintk internals */
3471 pause_graph_tracing();
3473 trace_ctx = tracing_gen_ctx();
3474 preempt_disable_notrace();
3477 tbuffer = get_trace_buf();
3483 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3485 size = sizeof(*entry) + len + 1;
3486 ring_buffer_nest_start(buffer);
3487 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3491 entry = ring_buffer_event_data(event);
3494 memcpy(&entry->buf, tbuffer, len + 1);
3495 if (!call_filter_check_discard(call, entry, buffer, event)) {
3496 __buffer_unlock_commit(buffer, event);
3497 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3501 ring_buffer_nest_end(buffer);
3505 preempt_enable_notrace();
3506 unpause_graph_tracing();
3512 int trace_array_vprintk(struct trace_array *tr,
3513 unsigned long ip, const char *fmt, va_list args)
3515 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3519 * trace_array_printk - Print a message to a specific instance
3520 * @tr: The instance trace_array descriptor
3521 * @ip: The instruction pointer that this is called from.
3522 * @fmt: The format to print (printf format)
3524 * If a subsystem sets up its own instance, they have the right to
3525 * printk strings into their tracing instance buffer using this
3526 * function. Note, this function will not write into the top level
3527 * buffer (use trace_printk() for that), as writing into the top level
3528 * buffer should only have events that can be individually disabled.
3529 * trace_printk() is only used for debugging a kernel, and should not
3530 * be ever incorporated in normal use.
3532 * trace_array_printk() can be used, as it will not add noise to the
3533 * top level tracing buffer.
3535 * Note, trace_array_init_printk() must be called on @tr before this
3539 int trace_array_printk(struct trace_array *tr,
3540 unsigned long ip, const char *fmt, ...)
3548 /* This is only allowed for created instances */
3549 if (tr == &global_trace)
3552 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3556 ret = trace_array_vprintk(tr, ip, fmt, ap);
3560 EXPORT_SYMBOL_GPL(trace_array_printk);
3563 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3564 * @tr: The trace array to initialize the buffers for
3566 * As trace_array_printk() only writes into instances, they are OK to
3567 * have in the kernel (unlike trace_printk()). This needs to be called
3568 * before trace_array_printk() can be used on a trace_array.
3570 int trace_array_init_printk(struct trace_array *tr)
3575 /* This is only allowed for created instances */
3576 if (tr == &global_trace)
3579 return alloc_percpu_trace_buffer();
3581 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3584 int trace_array_printk_buf(struct trace_buffer *buffer,
3585 unsigned long ip, const char *fmt, ...)
3590 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3594 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3600 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3602 return trace_array_vprintk(&global_trace, ip, fmt, args);
3604 EXPORT_SYMBOL_GPL(trace_vprintk);
3606 static void trace_iterator_increment(struct trace_iterator *iter)
3608 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3612 ring_buffer_iter_advance(buf_iter);
3615 static struct trace_entry *
3616 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3617 unsigned long *lost_events)
3619 struct ring_buffer_event *event;
3620 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3623 event = ring_buffer_iter_peek(buf_iter, ts);
3625 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3626 (unsigned long)-1 : 0;
3628 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3633 iter->ent_size = ring_buffer_event_length(event);
3634 return ring_buffer_event_data(event);
3640 static struct trace_entry *
3641 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3642 unsigned long *missing_events, u64 *ent_ts)
3644 struct trace_buffer *buffer = iter->array_buffer->buffer;
3645 struct trace_entry *ent, *next = NULL;
3646 unsigned long lost_events = 0, next_lost = 0;
3647 int cpu_file = iter->cpu_file;
3648 u64 next_ts = 0, ts;
3654 * If we are in a per_cpu trace file, don't bother by iterating over
3655 * all cpu and peek directly.
3657 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3658 if (ring_buffer_empty_cpu(buffer, cpu_file))
3660 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3662 *ent_cpu = cpu_file;
3667 for_each_tracing_cpu(cpu) {
3669 if (ring_buffer_empty_cpu(buffer, cpu))
3672 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3675 * Pick the entry with the smallest timestamp:
3677 if (ent && (!next || ts < next_ts)) {
3681 next_lost = lost_events;
3682 next_size = iter->ent_size;
3686 iter->ent_size = next_size;
3689 *ent_cpu = next_cpu;
3695 *missing_events = next_lost;
3700 #define STATIC_FMT_BUF_SIZE 128
3701 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3703 static char *trace_iter_expand_format(struct trace_iterator *iter)
3708 * iter->tr is NULL when used with tp_printk, which makes
3709 * this get called where it is not safe to call krealloc().
3711 if (!iter->tr || iter->fmt == static_fmt_buf)
3714 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3717 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3724 /* Returns true if the string is safe to dereference from an event */
3725 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3728 unsigned long addr = (unsigned long)str;
3729 struct trace_event *trace_event;
3730 struct trace_event_call *event;
3732 /* Ignore strings with no length */
3736 /* OK if part of the event data */
3737 if ((addr >= (unsigned long)iter->ent) &&
3738 (addr < (unsigned long)iter->ent + iter->ent_size))
3741 /* OK if part of the temp seq buffer */
3742 if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3743 (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3746 /* Core rodata can not be freed */
3747 if (is_kernel_rodata(addr))
3750 if (trace_is_tracepoint_string(str))
3754 * Now this could be a module event, referencing core module
3755 * data, which is OK.
3760 trace_event = ftrace_find_event(iter->ent->type);
3764 event = container_of(trace_event, struct trace_event_call, event);
3765 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3768 /* Would rather have rodata, but this will suffice */
3769 if (within_module_core(addr, event->module))
3775 static const char *show_buffer(struct trace_seq *s)
3777 struct seq_buf *seq = &s->seq;
3779 seq_buf_terminate(seq);
3784 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3786 static int test_can_verify_check(const char *fmt, ...)
3793 * The verifier is dependent on vsnprintf() modifies the va_list
3794 * passed to it, where it is sent as a reference. Some architectures
3795 * (like x86_32) passes it by value, which means that vsnprintf()
3796 * does not modify the va_list passed to it, and the verifier
3797 * would then need to be able to understand all the values that
3798 * vsnprintf can use. If it is passed by value, then the verifier
3802 vsnprintf(buf, 16, "%d", ap);
3803 ret = va_arg(ap, int);
3809 static void test_can_verify(void)
3811 if (!test_can_verify_check("%d %d", 0, 1)) {
3812 pr_info("trace event string verifier disabled\n");
3813 static_branch_inc(&trace_no_verify);
3818 * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3819 * @iter: The iterator that holds the seq buffer and the event being printed
3820 * @fmt: The format used to print the event
3821 * @ap: The va_list holding the data to print from @fmt.
3823 * This writes the data into the @iter->seq buffer using the data from
3824 * @fmt and @ap. If the format has a %s, then the source of the string
3825 * is examined to make sure it is safe to print, otherwise it will
3826 * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3829 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3832 const char *p = fmt;
3836 if (WARN_ON_ONCE(!fmt))
3839 if (static_branch_unlikely(&trace_no_verify))
3842 /* Don't bother checking when doing a ftrace_dump() */
3843 if (iter->fmt == static_fmt_buf)
3852 /* We only care about %s and variants */
3853 for (i = 0; p[i]; i++) {
3854 if (i + 1 >= iter->fmt_size) {
3856 * If we can't expand the copy buffer,
3859 if (!trace_iter_expand_format(iter))
3863 if (p[i] == '\\' && p[i+1]) {
3868 /* Need to test cases like %08.*s */
3869 for (j = 1; p[i+j]; j++) {
3870 if (isdigit(p[i+j]) ||
3873 if (p[i+j] == '*') {
3885 /* If no %s found then just print normally */
3889 /* Copy up to the %s, and print that */
3890 strncpy(iter->fmt, p, i);
3891 iter->fmt[i] = '\0';
3892 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3895 * If iter->seq is full, the above call no longer guarantees
3896 * that ap is in sync with fmt processing, and further calls
3897 * to va_arg() can return wrong positional arguments.
3899 * Ensure that ap is no longer used in this case.
3901 if (iter->seq.full) {
3907 len = va_arg(ap, int);
3909 /* The ap now points to the string data of the %s */
3910 str = va_arg(ap, const char *);
3913 * If you hit this warning, it is likely that the
3914 * trace event in question used %s on a string that
3915 * was saved at the time of the event, but may not be
3916 * around when the trace is read. Use __string(),
3917 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3918 * instead. See samples/trace_events/trace-events-sample.h
3921 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3922 "fmt: '%s' current_buffer: '%s'",
3923 fmt, show_buffer(&iter->seq))) {
3926 /* Try to safely read the string */
3928 if (len + 1 > iter->fmt_size)
3929 len = iter->fmt_size - 1;
3932 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3936 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3940 trace_seq_printf(&iter->seq, "(0x%px)", str);
3942 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3944 str = "[UNSAFE-MEMORY]";
3945 strcpy(iter->fmt, "%s");
3947 strncpy(iter->fmt, p + i, j + 1);
3948 iter->fmt[j+1] = '\0';
3951 trace_seq_printf(&iter->seq, iter->fmt, len, str);
3953 trace_seq_printf(&iter->seq, iter->fmt, str);
3959 trace_seq_vprintf(&iter->seq, p, ap);
3962 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3964 const char *p, *new_fmt;
3967 if (WARN_ON_ONCE(!fmt))
3970 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3974 new_fmt = q = iter->fmt;
3976 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3977 if (!trace_iter_expand_format(iter))
3980 q += iter->fmt - new_fmt;
3981 new_fmt = iter->fmt;
3986 /* Replace %p with %px */
3990 } else if (p[0] == 'p' && !isalnum(p[1])) {
4001 #define STATIC_TEMP_BUF_SIZE 128
4002 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4004 /* Find the next real entry, without updating the iterator itself */
4005 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4006 int *ent_cpu, u64 *ent_ts)
4008 /* __find_next_entry will reset ent_size */
4009 int ent_size = iter->ent_size;
4010 struct trace_entry *entry;
4013 * If called from ftrace_dump(), then the iter->temp buffer
4014 * will be the static_temp_buf and not created from kmalloc.
4015 * If the entry size is greater than the buffer, we can
4016 * not save it. Just return NULL in that case. This is only
4017 * used to add markers when two consecutive events' time
4018 * stamps have a large delta. See trace_print_lat_context()
4020 if (iter->temp == static_temp_buf &&
4021 STATIC_TEMP_BUF_SIZE < ent_size)
4025 * The __find_next_entry() may call peek_next_entry(), which may
4026 * call ring_buffer_peek() that may make the contents of iter->ent
4027 * undefined. Need to copy iter->ent now.
4029 if (iter->ent && iter->ent != iter->temp) {
4030 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4031 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4033 temp = kmalloc(iter->ent_size, GFP_KERNEL);
4038 iter->temp_size = iter->ent_size;
4040 memcpy(iter->temp, iter->ent, iter->ent_size);
4041 iter->ent = iter->temp;
4043 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4044 /* Put back the original ent_size */
4045 iter->ent_size = ent_size;
4050 /* Find the next real entry, and increment the iterator to the next entry */
4051 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4053 iter->ent = __find_next_entry(iter, &iter->cpu,
4054 &iter->lost_events, &iter->ts);
4057 trace_iterator_increment(iter);
4059 return iter->ent ? iter : NULL;
4062 static void trace_consume(struct trace_iterator *iter)
4064 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4065 &iter->lost_events);
4068 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4070 struct trace_iterator *iter = m->private;
4074 WARN_ON_ONCE(iter->leftover);
4078 /* can't go backwards */
4083 ent = trace_find_next_entry_inc(iter);
4087 while (ent && iter->idx < i)
4088 ent = trace_find_next_entry_inc(iter);
4095 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4097 struct ring_buffer_iter *buf_iter;
4098 unsigned long entries = 0;
4101 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4103 buf_iter = trace_buffer_iter(iter, cpu);
4107 ring_buffer_iter_reset(buf_iter);
4110 * We could have the case with the max latency tracers
4111 * that a reset never took place on a cpu. This is evident
4112 * by the timestamp being before the start of the buffer.
4114 while (ring_buffer_iter_peek(buf_iter, &ts)) {
4115 if (ts >= iter->array_buffer->time_start)
4118 ring_buffer_iter_advance(buf_iter);
4121 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4125 * The current tracer is copied to avoid a global locking
4128 static void *s_start(struct seq_file *m, loff_t *pos)
4130 struct trace_iterator *iter = m->private;
4131 struct trace_array *tr = iter->tr;
4132 int cpu_file = iter->cpu_file;
4138 * copy the tracer to avoid using a global lock all around.
4139 * iter->trace is a copy of current_trace, the pointer to the
4140 * name may be used instead of a strcmp(), as iter->trace->name
4141 * will point to the same string as current_trace->name.
4143 mutex_lock(&trace_types_lock);
4144 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4145 *iter->trace = *tr->current_trace;
4146 mutex_unlock(&trace_types_lock);
4148 #ifdef CONFIG_TRACER_MAX_TRACE
4149 if (iter->snapshot && iter->trace->use_max_tr)
4150 return ERR_PTR(-EBUSY);
4153 if (*pos != iter->pos) {
4158 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4159 for_each_tracing_cpu(cpu)
4160 tracing_iter_reset(iter, cpu);
4162 tracing_iter_reset(iter, cpu_file);
4165 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4170 * If we overflowed the seq_file before, then we want
4171 * to just reuse the trace_seq buffer again.
4177 p = s_next(m, p, &l);
4181 trace_event_read_lock();
4182 trace_access_lock(cpu_file);
4186 static void s_stop(struct seq_file *m, void *p)
4188 struct trace_iterator *iter = m->private;
4190 #ifdef CONFIG_TRACER_MAX_TRACE
4191 if (iter->snapshot && iter->trace->use_max_tr)
4195 trace_access_unlock(iter->cpu_file);
4196 trace_event_read_unlock();
4200 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4201 unsigned long *entries, int cpu)
4203 unsigned long count;
4205 count = ring_buffer_entries_cpu(buf->buffer, cpu);
4207 * If this buffer has skipped entries, then we hold all
4208 * entries for the trace and we need to ignore the
4209 * ones before the time stamp.
4211 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4212 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4213 /* total is the same as the entries */
4217 ring_buffer_overrun_cpu(buf->buffer, cpu);
4222 get_total_entries(struct array_buffer *buf,
4223 unsigned long *total, unsigned long *entries)
4231 for_each_tracing_cpu(cpu) {
4232 get_total_entries_cpu(buf, &t, &e, cpu);
4238 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4240 unsigned long total, entries;
4245 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4250 unsigned long trace_total_entries(struct trace_array *tr)
4252 unsigned long total, entries;
4257 get_total_entries(&tr->array_buffer, &total, &entries);
4262 static void print_lat_help_header(struct seq_file *m)
4264 seq_puts(m, "# _------=> CPU# \n"
4265 "# / _-----=> irqs-off/BH-disabled\n"
4266 "# | / _----=> need-resched \n"
4267 "# || / _---=> hardirq/softirq \n"
4268 "# ||| / _--=> preempt-depth \n"
4269 "# |||| / _-=> migrate-disable \n"
4270 "# ||||| / delay \n"
4271 "# cmd pid |||||| time | caller \n"
4272 "# \\ / |||||| \\ | / \n");
4275 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4277 unsigned long total;
4278 unsigned long entries;
4280 get_total_entries(buf, &total, &entries);
4281 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
4282 entries, total, num_online_cpus());
4286 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4289 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4291 print_event_info(buf, m);
4293 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
4294 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
4297 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4300 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4301 static const char space[] = " ";
4302 int prec = tgid ? 12 : 2;
4304 print_event_info(buf, m);
4306 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space);
4307 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
4308 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
4309 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
4310 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
4311 seq_printf(m, "# %.*s|||| / delay\n", prec, space);
4312 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
4313 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
4317 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4319 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4320 struct array_buffer *buf = iter->array_buffer;
4321 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4322 struct tracer *type = iter->trace;
4323 unsigned long entries;
4324 unsigned long total;
4325 const char *name = type->name;
4327 get_total_entries(buf, &total, &entries);
4329 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4331 seq_puts(m, "# -----------------------------------"
4332 "---------------------------------\n");
4333 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4334 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4335 nsecs_to_usecs(data->saved_latency),
4339 preempt_model_none() ? "server" :
4340 preempt_model_voluntary() ? "desktop" :
4341 preempt_model_full() ? "preempt" :
4342 preempt_model_rt() ? "preempt_rt" :
4344 /* These are reserved for later use */
4347 seq_printf(m, " #P:%d)\n", num_online_cpus());
4351 seq_puts(m, "# -----------------\n");
4352 seq_printf(m, "# | task: %.16s-%d "
4353 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4354 data->comm, data->pid,
4355 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4356 data->policy, data->rt_priority);
4357 seq_puts(m, "# -----------------\n");
4359 if (data->critical_start) {
4360 seq_puts(m, "# => started at: ");
4361 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4362 trace_print_seq(m, &iter->seq);
4363 seq_puts(m, "\n# => ended at: ");
4364 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4365 trace_print_seq(m, &iter->seq);
4366 seq_puts(m, "\n#\n");
4372 static void test_cpu_buff_start(struct trace_iterator *iter)
4374 struct trace_seq *s = &iter->seq;
4375 struct trace_array *tr = iter->tr;
4377 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4380 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4383 if (cpumask_available(iter->started) &&
4384 cpumask_test_cpu(iter->cpu, iter->started))
4387 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4390 if (cpumask_available(iter->started))
4391 cpumask_set_cpu(iter->cpu, iter->started);
4393 /* Don't print started cpu buffer for the first entry of the trace */
4395 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4399 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4401 struct trace_array *tr = iter->tr;
4402 struct trace_seq *s = &iter->seq;
4403 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4404 struct trace_entry *entry;
4405 struct trace_event *event;
4409 test_cpu_buff_start(iter);
4411 event = ftrace_find_event(entry->type);
4413 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4414 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4415 trace_print_lat_context(iter);
4417 trace_print_context(iter);
4420 if (trace_seq_has_overflowed(s))
4421 return TRACE_TYPE_PARTIAL_LINE;
4424 return event->funcs->trace(iter, sym_flags, event);
4426 trace_seq_printf(s, "Unknown type %d\n", entry->type);
4428 return trace_handle_return(s);
4431 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4433 struct trace_array *tr = iter->tr;
4434 struct trace_seq *s = &iter->seq;
4435 struct trace_entry *entry;
4436 struct trace_event *event;
4440 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4441 trace_seq_printf(s, "%d %d %llu ",
4442 entry->pid, iter->cpu, iter->ts);
4444 if (trace_seq_has_overflowed(s))
4445 return TRACE_TYPE_PARTIAL_LINE;
4447 event = ftrace_find_event(entry->type);
4449 return event->funcs->raw(iter, 0, event);
4451 trace_seq_printf(s, "%d ?\n", entry->type);
4453 return trace_handle_return(s);
4456 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4458 struct trace_array *tr = iter->tr;
4459 struct trace_seq *s = &iter->seq;
4460 unsigned char newline = '\n';
4461 struct trace_entry *entry;
4462 struct trace_event *event;
4466 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4467 SEQ_PUT_HEX_FIELD(s, entry->pid);
4468 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4469 SEQ_PUT_HEX_FIELD(s, iter->ts);
4470 if (trace_seq_has_overflowed(s))
4471 return TRACE_TYPE_PARTIAL_LINE;
4474 event = ftrace_find_event(entry->type);
4476 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4477 if (ret != TRACE_TYPE_HANDLED)
4481 SEQ_PUT_FIELD(s, newline);
4483 return trace_handle_return(s);
4486 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4488 struct trace_array *tr = iter->tr;
4489 struct trace_seq *s = &iter->seq;
4490 struct trace_entry *entry;
4491 struct trace_event *event;
4495 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4496 SEQ_PUT_FIELD(s, entry->pid);
4497 SEQ_PUT_FIELD(s, iter->cpu);
4498 SEQ_PUT_FIELD(s, iter->ts);
4499 if (trace_seq_has_overflowed(s))
4500 return TRACE_TYPE_PARTIAL_LINE;
4503 event = ftrace_find_event(entry->type);
4504 return event ? event->funcs->binary(iter, 0, event) :
4508 int trace_empty(struct trace_iterator *iter)
4510 struct ring_buffer_iter *buf_iter;
4513 /* If we are looking at one CPU buffer, only check that one */
4514 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4515 cpu = iter->cpu_file;
4516 buf_iter = trace_buffer_iter(iter, cpu);
4518 if (!ring_buffer_iter_empty(buf_iter))
4521 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4527 for_each_tracing_cpu(cpu) {
4528 buf_iter = trace_buffer_iter(iter, cpu);
4530 if (!ring_buffer_iter_empty(buf_iter))
4533 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4541 /* Called with trace_event_read_lock() held. */
4542 enum print_line_t print_trace_line(struct trace_iterator *iter)
4544 struct trace_array *tr = iter->tr;
4545 unsigned long trace_flags = tr->trace_flags;
4546 enum print_line_t ret;
4548 if (iter->lost_events) {
4549 if (iter->lost_events == (unsigned long)-1)
4550 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4553 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4554 iter->cpu, iter->lost_events);
4555 if (trace_seq_has_overflowed(&iter->seq))
4556 return TRACE_TYPE_PARTIAL_LINE;
4559 if (iter->trace && iter->trace->print_line) {
4560 ret = iter->trace->print_line(iter);
4561 if (ret != TRACE_TYPE_UNHANDLED)
4565 if (iter->ent->type == TRACE_BPUTS &&
4566 trace_flags & TRACE_ITER_PRINTK &&
4567 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4568 return trace_print_bputs_msg_only(iter);
4570 if (iter->ent->type == TRACE_BPRINT &&
4571 trace_flags & TRACE_ITER_PRINTK &&
4572 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4573 return trace_print_bprintk_msg_only(iter);
4575 if (iter->ent->type == TRACE_PRINT &&
4576 trace_flags & TRACE_ITER_PRINTK &&
4577 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4578 return trace_print_printk_msg_only(iter);
4580 if (trace_flags & TRACE_ITER_BIN)
4581 return print_bin_fmt(iter);
4583 if (trace_flags & TRACE_ITER_HEX)
4584 return print_hex_fmt(iter);
4586 if (trace_flags & TRACE_ITER_RAW)
4587 return print_raw_fmt(iter);
4589 return print_trace_fmt(iter);
4592 void trace_latency_header(struct seq_file *m)
4594 struct trace_iterator *iter = m->private;
4595 struct trace_array *tr = iter->tr;
4597 /* print nothing if the buffers are empty */
4598 if (trace_empty(iter))
4601 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4602 print_trace_header(m, iter);
4604 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4605 print_lat_help_header(m);
4608 void trace_default_header(struct seq_file *m)
4610 struct trace_iterator *iter = m->private;
4611 struct trace_array *tr = iter->tr;
4612 unsigned long trace_flags = tr->trace_flags;
4614 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4617 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4618 /* print nothing if the buffers are empty */
4619 if (trace_empty(iter))
4621 print_trace_header(m, iter);
4622 if (!(trace_flags & TRACE_ITER_VERBOSE))
4623 print_lat_help_header(m);
4625 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4626 if (trace_flags & TRACE_ITER_IRQ_INFO)
4627 print_func_help_header_irq(iter->array_buffer,
4630 print_func_help_header(iter->array_buffer, m,
4636 static void test_ftrace_alive(struct seq_file *m)
4638 if (!ftrace_is_dead())
4640 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4641 "# MAY BE MISSING FUNCTION EVENTS\n");
4644 #ifdef CONFIG_TRACER_MAX_TRACE
4645 static void show_snapshot_main_help(struct seq_file *m)
4647 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4648 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4649 "# Takes a snapshot of the main buffer.\n"
4650 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4651 "# (Doesn't have to be '2' works with any number that\n"
4652 "# is not a '0' or '1')\n");
4655 static void show_snapshot_percpu_help(struct seq_file *m)
4657 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4658 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4659 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4660 "# Takes a snapshot of the main buffer for this cpu.\n");
4662 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4663 "# Must use main snapshot file to allocate.\n");
4665 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4666 "# (Doesn't have to be '2' works with any number that\n"
4667 "# is not a '0' or '1')\n");
4670 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4672 if (iter->tr->allocated_snapshot)
4673 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4675 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4677 seq_puts(m, "# Snapshot commands:\n");
4678 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4679 show_snapshot_main_help(m);
4681 show_snapshot_percpu_help(m);
4684 /* Should never be called */
4685 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4688 static int s_show(struct seq_file *m, void *v)
4690 struct trace_iterator *iter = v;
4693 if (iter->ent == NULL) {
4695 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4697 test_ftrace_alive(m);
4699 if (iter->snapshot && trace_empty(iter))
4700 print_snapshot_help(m, iter);
4701 else if (iter->trace && iter->trace->print_header)
4702 iter->trace->print_header(m);
4704 trace_default_header(m);
4706 } else if (iter->leftover) {
4708 * If we filled the seq_file buffer earlier, we
4709 * want to just show it now.
4711 ret = trace_print_seq(m, &iter->seq);
4713 /* ret should this time be zero, but you never know */
4714 iter->leftover = ret;
4717 print_trace_line(iter);
4718 ret = trace_print_seq(m, &iter->seq);
4720 * If we overflow the seq_file buffer, then it will
4721 * ask us for this data again at start up.
4723 * ret is 0 if seq_file write succeeded.
4726 iter->leftover = ret;
4733 * Should be used after trace_array_get(), trace_types_lock
4734 * ensures that i_cdev was already initialized.
4736 static inline int tracing_get_cpu(struct inode *inode)
4738 if (inode->i_cdev) /* See trace_create_cpu_file() */
4739 return (long)inode->i_cdev - 1;
4740 return RING_BUFFER_ALL_CPUS;
4743 static const struct seq_operations tracer_seq_ops = {
4750 static struct trace_iterator *
4751 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4753 struct trace_array *tr = inode->i_private;
4754 struct trace_iterator *iter;
4757 if (tracing_disabled)
4758 return ERR_PTR(-ENODEV);
4760 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4762 return ERR_PTR(-ENOMEM);
4764 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4766 if (!iter->buffer_iter)
4770 * trace_find_next_entry() may need to save off iter->ent.
4771 * It will place it into the iter->temp buffer. As most
4772 * events are less than 128, allocate a buffer of that size.
4773 * If one is greater, then trace_find_next_entry() will
4774 * allocate a new buffer to adjust for the bigger iter->ent.
4775 * It's not critical if it fails to get allocated here.
4777 iter->temp = kmalloc(128, GFP_KERNEL);
4779 iter->temp_size = 128;
4782 * trace_event_printf() may need to modify given format
4783 * string to replace %p with %px so that it shows real address
4784 * instead of hash value. However, that is only for the event
4785 * tracing, other tracer may not need. Defer the allocation
4786 * until it is needed.
4792 * We make a copy of the current tracer to avoid concurrent
4793 * changes on it while we are reading.
4795 mutex_lock(&trace_types_lock);
4796 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4800 *iter->trace = *tr->current_trace;
4802 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4807 #ifdef CONFIG_TRACER_MAX_TRACE
4808 /* Currently only the top directory has a snapshot */
4809 if (tr->current_trace->print_max || snapshot)
4810 iter->array_buffer = &tr->max_buffer;
4813 iter->array_buffer = &tr->array_buffer;
4814 iter->snapshot = snapshot;
4816 iter->cpu_file = tracing_get_cpu(inode);
4817 mutex_init(&iter->mutex);
4819 /* Notify the tracer early; before we stop tracing. */
4820 if (iter->trace->open)
4821 iter->trace->open(iter);
4823 /* Annotate start of buffers if we had overruns */
4824 if (ring_buffer_overruns(iter->array_buffer->buffer))
4825 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4827 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4828 if (trace_clocks[tr->clock_id].in_ns)
4829 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4832 * If pause-on-trace is enabled, then stop the trace while
4833 * dumping, unless this is the "snapshot" file
4835 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4836 tracing_stop_tr(tr);
4838 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4839 for_each_tracing_cpu(cpu) {
4840 iter->buffer_iter[cpu] =
4841 ring_buffer_read_prepare(iter->array_buffer->buffer,
4844 ring_buffer_read_prepare_sync();
4845 for_each_tracing_cpu(cpu) {
4846 ring_buffer_read_start(iter->buffer_iter[cpu]);
4847 tracing_iter_reset(iter, cpu);
4850 cpu = iter->cpu_file;
4851 iter->buffer_iter[cpu] =
4852 ring_buffer_read_prepare(iter->array_buffer->buffer,
4854 ring_buffer_read_prepare_sync();
4855 ring_buffer_read_start(iter->buffer_iter[cpu]);
4856 tracing_iter_reset(iter, cpu);
4859 mutex_unlock(&trace_types_lock);
4864 mutex_unlock(&trace_types_lock);
4867 kfree(iter->buffer_iter);
4869 seq_release_private(inode, file);
4870 return ERR_PTR(-ENOMEM);
4873 int tracing_open_generic(struct inode *inode, struct file *filp)
4877 ret = tracing_check_open_get_tr(NULL);
4881 filp->private_data = inode->i_private;
4885 bool tracing_is_disabled(void)
4887 return (tracing_disabled) ? true: false;
4891 * Open and update trace_array ref count.
4892 * Must have the current trace_array passed to it.
4894 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4896 struct trace_array *tr = inode->i_private;
4899 ret = tracing_check_open_get_tr(tr);
4903 filp->private_data = inode->i_private;
4908 static int tracing_mark_open(struct inode *inode, struct file *filp)
4910 stream_open(inode, filp);
4911 return tracing_open_generic_tr(inode, filp);
4914 static int tracing_release(struct inode *inode, struct file *file)
4916 struct trace_array *tr = inode->i_private;
4917 struct seq_file *m = file->private_data;
4918 struct trace_iterator *iter;
4921 if (!(file->f_mode & FMODE_READ)) {
4922 trace_array_put(tr);
4926 /* Writes do not use seq_file */
4928 mutex_lock(&trace_types_lock);
4930 for_each_tracing_cpu(cpu) {
4931 if (iter->buffer_iter[cpu])
4932 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4935 if (iter->trace && iter->trace->close)
4936 iter->trace->close(iter);
4938 if (!iter->snapshot && tr->stop_count)
4939 /* reenable tracing if it was previously enabled */
4940 tracing_start_tr(tr);
4942 __trace_array_put(tr);
4944 mutex_unlock(&trace_types_lock);
4946 mutex_destroy(&iter->mutex);
4947 free_cpumask_var(iter->started);
4951 kfree(iter->buffer_iter);
4952 seq_release_private(inode, file);
4957 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4959 struct trace_array *tr = inode->i_private;
4961 trace_array_put(tr);
4965 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4967 struct trace_array *tr = inode->i_private;
4969 trace_array_put(tr);
4971 return single_release(inode, file);
4974 static int tracing_open(struct inode *inode, struct file *file)
4976 struct trace_array *tr = inode->i_private;
4977 struct trace_iterator *iter;
4980 ret = tracing_check_open_get_tr(tr);
4984 /* If this file was open for write, then erase contents */
4985 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4986 int cpu = tracing_get_cpu(inode);
4987 struct array_buffer *trace_buf = &tr->array_buffer;
4989 #ifdef CONFIG_TRACER_MAX_TRACE
4990 if (tr->current_trace->print_max)
4991 trace_buf = &tr->max_buffer;
4994 if (cpu == RING_BUFFER_ALL_CPUS)
4995 tracing_reset_online_cpus(trace_buf);
4997 tracing_reset_cpu(trace_buf, cpu);
5000 if (file->f_mode & FMODE_READ) {
5001 iter = __tracing_open(inode, file, false);
5003 ret = PTR_ERR(iter);
5004 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5005 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5009 trace_array_put(tr);
5015 * Some tracers are not suitable for instance buffers.
5016 * A tracer is always available for the global array (toplevel)
5017 * or if it explicitly states that it is.
5020 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5022 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5025 /* Find the next tracer that this trace array may use */
5026 static struct tracer *
5027 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5029 while (t && !trace_ok_for_array(t, tr))
5036 t_next(struct seq_file *m, void *v, loff_t *pos)
5038 struct trace_array *tr = m->private;
5039 struct tracer *t = v;
5044 t = get_tracer_for_array(tr, t->next);
5049 static void *t_start(struct seq_file *m, loff_t *pos)
5051 struct trace_array *tr = m->private;
5055 mutex_lock(&trace_types_lock);
5057 t = get_tracer_for_array(tr, trace_types);
5058 for (; t && l < *pos; t = t_next(m, t, &l))
5064 static void t_stop(struct seq_file *m, void *p)
5066 mutex_unlock(&trace_types_lock);
5069 static int t_show(struct seq_file *m, void *v)
5071 struct tracer *t = v;
5076 seq_puts(m, t->name);
5085 static const struct seq_operations show_traces_seq_ops = {
5092 static int show_traces_open(struct inode *inode, struct file *file)
5094 struct trace_array *tr = inode->i_private;
5098 ret = tracing_check_open_get_tr(tr);
5102 ret = seq_open(file, &show_traces_seq_ops);
5104 trace_array_put(tr);
5108 m = file->private_data;
5114 static int show_traces_release(struct inode *inode, struct file *file)
5116 struct trace_array *tr = inode->i_private;
5118 trace_array_put(tr);
5119 return seq_release(inode, file);
5123 tracing_write_stub(struct file *filp, const char __user *ubuf,
5124 size_t count, loff_t *ppos)
5129 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5133 if (file->f_mode & FMODE_READ)
5134 ret = seq_lseek(file, offset, whence);
5136 file->f_pos = ret = 0;
5141 static const struct file_operations tracing_fops = {
5142 .open = tracing_open,
5144 .write = tracing_write_stub,
5145 .llseek = tracing_lseek,
5146 .release = tracing_release,
5149 static const struct file_operations show_traces_fops = {
5150 .open = show_traces_open,
5152 .llseek = seq_lseek,
5153 .release = show_traces_release,
5157 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5158 size_t count, loff_t *ppos)
5160 struct trace_array *tr = file_inode(filp)->i_private;
5164 len = snprintf(NULL, 0, "%*pb\n",
5165 cpumask_pr_args(tr->tracing_cpumask)) + 1;
5166 mask_str = kmalloc(len, GFP_KERNEL);
5170 len = snprintf(mask_str, len, "%*pb\n",
5171 cpumask_pr_args(tr->tracing_cpumask));
5176 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5184 int tracing_set_cpumask(struct trace_array *tr,
5185 cpumask_var_t tracing_cpumask_new)
5192 local_irq_disable();
5193 arch_spin_lock(&tr->max_lock);
5194 for_each_tracing_cpu(cpu) {
5196 * Increase/decrease the disabled counter if we are
5197 * about to flip a bit in the cpumask:
5199 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5200 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5201 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5202 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5204 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5205 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5206 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5207 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5210 arch_spin_unlock(&tr->max_lock);
5213 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5219 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5220 size_t count, loff_t *ppos)
5222 struct trace_array *tr = file_inode(filp)->i_private;
5223 cpumask_var_t tracing_cpumask_new;
5226 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5229 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5233 err = tracing_set_cpumask(tr, tracing_cpumask_new);
5237 free_cpumask_var(tracing_cpumask_new);
5242 free_cpumask_var(tracing_cpumask_new);
5247 static const struct file_operations tracing_cpumask_fops = {
5248 .open = tracing_open_generic_tr,
5249 .read = tracing_cpumask_read,
5250 .write = tracing_cpumask_write,
5251 .release = tracing_release_generic_tr,
5252 .llseek = generic_file_llseek,
5255 static int tracing_trace_options_show(struct seq_file *m, void *v)
5257 struct tracer_opt *trace_opts;
5258 struct trace_array *tr = m->private;
5262 mutex_lock(&trace_types_lock);
5263 tracer_flags = tr->current_trace->flags->val;
5264 trace_opts = tr->current_trace->flags->opts;
5266 for (i = 0; trace_options[i]; i++) {
5267 if (tr->trace_flags & (1 << i))
5268 seq_printf(m, "%s\n", trace_options[i]);
5270 seq_printf(m, "no%s\n", trace_options[i]);
5273 for (i = 0; trace_opts[i].name; i++) {
5274 if (tracer_flags & trace_opts[i].bit)
5275 seq_printf(m, "%s\n", trace_opts[i].name);
5277 seq_printf(m, "no%s\n", trace_opts[i].name);
5279 mutex_unlock(&trace_types_lock);
5284 static int __set_tracer_option(struct trace_array *tr,
5285 struct tracer_flags *tracer_flags,
5286 struct tracer_opt *opts, int neg)
5288 struct tracer *trace = tracer_flags->trace;
5291 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5296 tracer_flags->val &= ~opts->bit;
5298 tracer_flags->val |= opts->bit;
5302 /* Try to assign a tracer specific option */
5303 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5305 struct tracer *trace = tr->current_trace;
5306 struct tracer_flags *tracer_flags = trace->flags;
5307 struct tracer_opt *opts = NULL;
5310 for (i = 0; tracer_flags->opts[i].name; i++) {
5311 opts = &tracer_flags->opts[i];
5313 if (strcmp(cmp, opts->name) == 0)
5314 return __set_tracer_option(tr, trace->flags, opts, neg);
5320 /* Some tracers require overwrite to stay enabled */
5321 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5323 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5329 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5333 if ((mask == TRACE_ITER_RECORD_TGID) ||
5334 (mask == TRACE_ITER_RECORD_CMD))
5335 lockdep_assert_held(&event_mutex);
5337 /* do nothing if flag is already set */
5338 if (!!(tr->trace_flags & mask) == !!enabled)
5341 /* Give the tracer a chance to approve the change */
5342 if (tr->current_trace->flag_changed)
5343 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5347 tr->trace_flags |= mask;
5349 tr->trace_flags &= ~mask;
5351 if (mask == TRACE_ITER_RECORD_CMD)
5352 trace_event_enable_cmd_record(enabled);
5354 if (mask == TRACE_ITER_RECORD_TGID) {
5356 tgid_map_max = pid_max;
5357 map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5361 * Pairs with smp_load_acquire() in
5362 * trace_find_tgid_ptr() to ensure that if it observes
5363 * the tgid_map we just allocated then it also observes
5364 * the corresponding tgid_map_max value.
5366 smp_store_release(&tgid_map, map);
5369 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5373 trace_event_enable_tgid_record(enabled);
5376 if (mask == TRACE_ITER_EVENT_FORK)
5377 trace_event_follow_fork(tr, enabled);
5379 if (mask == TRACE_ITER_FUNC_FORK)
5380 ftrace_pid_follow_fork(tr, enabled);
5382 if (mask == TRACE_ITER_OVERWRITE) {
5383 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5384 #ifdef CONFIG_TRACER_MAX_TRACE
5385 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5389 if (mask == TRACE_ITER_PRINTK) {
5390 trace_printk_start_stop_comm(enabled);
5391 trace_printk_control(enabled);
5397 int trace_set_options(struct trace_array *tr, char *option)
5402 size_t orig_len = strlen(option);
5405 cmp = strstrip(option);
5407 len = str_has_prefix(cmp, "no");
5413 mutex_lock(&event_mutex);
5414 mutex_lock(&trace_types_lock);
5416 ret = match_string(trace_options, -1, cmp);
5417 /* If no option could be set, test the specific tracer options */
5419 ret = set_tracer_option(tr, cmp, neg);
5421 ret = set_tracer_flag(tr, 1 << ret, !neg);
5423 mutex_unlock(&trace_types_lock);
5424 mutex_unlock(&event_mutex);
5427 * If the first trailing whitespace is replaced with '\0' by strstrip,
5428 * turn it back into a space.
5430 if (orig_len > strlen(option))
5431 option[strlen(option)] = ' ';
5436 static void __init apply_trace_boot_options(void)
5438 char *buf = trace_boot_options_buf;
5442 option = strsep(&buf, ",");
5448 trace_set_options(&global_trace, option);
5450 /* Put back the comma to allow this to be called again */
5457 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5458 size_t cnt, loff_t *ppos)
5460 struct seq_file *m = filp->private_data;
5461 struct trace_array *tr = m->private;
5465 if (cnt >= sizeof(buf))
5468 if (copy_from_user(buf, ubuf, cnt))
5473 ret = trace_set_options(tr, buf);
5482 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5484 struct trace_array *tr = inode->i_private;
5487 ret = tracing_check_open_get_tr(tr);
5491 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5493 trace_array_put(tr);
5498 static const struct file_operations tracing_iter_fops = {
5499 .open = tracing_trace_options_open,
5501 .llseek = seq_lseek,
5502 .release = tracing_single_release_tr,
5503 .write = tracing_trace_options_write,
5506 static const char readme_msg[] =
5507 "tracing mini-HOWTO:\n\n"
5508 "# echo 0 > tracing_on : quick way to disable tracing\n"
5509 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5510 " Important files:\n"
5511 " trace\t\t\t- The static contents of the buffer\n"
5512 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5513 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5514 " current_tracer\t- function and latency tracers\n"
5515 " available_tracers\t- list of configured tracers for current_tracer\n"
5516 " error_log\t- error log for failed commands (that support it)\n"
5517 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5518 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5519 " trace_clock\t\t- change the clock used to order events\n"
5520 " local: Per cpu clock but may not be synced across CPUs\n"
5521 " global: Synced across CPUs but slows tracing down.\n"
5522 " counter: Not a clock, but just an increment\n"
5523 " uptime: Jiffy counter from time of boot\n"
5524 " perf: Same clock that perf events use\n"
5525 #ifdef CONFIG_X86_64
5526 " x86-tsc: TSC cycle counter\n"
5528 "\n timestamp_mode\t- view the mode used to timestamp events\n"
5529 " delta: Delta difference against a buffer-wide timestamp\n"
5530 " absolute: Absolute (standalone) timestamp\n"
5531 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5532 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5533 " tracing_cpumask\t- Limit which CPUs to trace\n"
5534 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5535 "\t\t\t Remove sub-buffer with rmdir\n"
5536 " trace_options\t\t- Set format or modify how tracing happens\n"
5537 "\t\t\t Disable an option by prefixing 'no' to the\n"
5538 "\t\t\t option name\n"
5539 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5540 #ifdef CONFIG_DYNAMIC_FTRACE
5541 "\n available_filter_functions - list of functions that can be filtered on\n"
5542 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5543 "\t\t\t functions\n"
5544 "\t accepts: func_full_name or glob-matching-pattern\n"
5545 "\t modules: Can select a group via module\n"
5546 "\t Format: :mod:<module-name>\n"
5547 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5548 "\t triggers: a command to perform when function is hit\n"
5549 "\t Format: <function>:<trigger>[:count]\n"
5550 "\t trigger: traceon, traceoff\n"
5551 "\t\t enable_event:<system>:<event>\n"
5552 "\t\t disable_event:<system>:<event>\n"
5553 #ifdef CONFIG_STACKTRACE
5556 #ifdef CONFIG_TRACER_SNAPSHOT
5561 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5562 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5563 "\t The first one will disable tracing every time do_fault is hit\n"
5564 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5565 "\t The first time do trap is hit and it disables tracing, the\n"
5566 "\t counter will decrement to 2. If tracing is already disabled,\n"
5567 "\t the counter will not decrement. It only decrements when the\n"
5568 "\t trigger did work\n"
5569 "\t To remove trigger without count:\n"
5570 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5571 "\t To remove trigger with a count:\n"
5572 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5573 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5574 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5575 "\t modules: Can select a group via module command :mod:\n"
5576 "\t Does not accept triggers\n"
5577 #endif /* CONFIG_DYNAMIC_FTRACE */
5578 #ifdef CONFIG_FUNCTION_TRACER
5579 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5581 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5584 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5585 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5586 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5587 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5589 #ifdef CONFIG_TRACER_SNAPSHOT
5590 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5591 "\t\t\t snapshot buffer. Read the contents for more\n"
5592 "\t\t\t information\n"
5594 #ifdef CONFIG_STACK_TRACER
5595 " stack_trace\t\t- Shows the max stack trace when active\n"
5596 " stack_max_size\t- Shows current max stack size that was traced\n"
5597 "\t\t\t Write into this file to reset the max size (trigger a\n"
5598 "\t\t\t new trace)\n"
5599 #ifdef CONFIG_DYNAMIC_FTRACE
5600 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5603 #endif /* CONFIG_STACK_TRACER */
5604 #ifdef CONFIG_DYNAMIC_EVENTS
5605 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5606 "\t\t\t Write into this file to define/undefine new trace events.\n"
5608 #ifdef CONFIG_KPROBE_EVENTS
5609 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5610 "\t\t\t Write into this file to define/undefine new trace events.\n"
5612 #ifdef CONFIG_UPROBE_EVENTS
5613 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5614 "\t\t\t Write into this file to define/undefine new trace events.\n"
5616 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5617 "\t accepts: event-definitions (one definition per line)\n"
5618 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5619 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5620 #ifdef CONFIG_HIST_TRIGGERS
5621 "\t s:[synthetic/]<event> <field> [<field>]\n"
5623 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]\n"
5624 "\t -:[<group>/][<event>]\n"
5625 #ifdef CONFIG_KPROBE_EVENTS
5626 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5627 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5629 #ifdef CONFIG_UPROBE_EVENTS
5630 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5632 "\t args: <name>=fetcharg[:type]\n"
5633 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5634 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5635 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5637 "\t $stack<index>, $stack, $retval, $comm,\n"
5639 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5640 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5641 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5642 "\t symstr, <type>\\[<array-size>\\]\n"
5643 #ifdef CONFIG_HIST_TRIGGERS
5644 "\t field: <stype> <name>;\n"
5645 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5646 "\t [unsigned] char/int/long\n"
5648 "\t efield: For event probes ('e' types), the field is on of the fields\n"
5649 "\t of the <attached-group>/<attached-event>.\n"
5651 " events/\t\t- Directory containing all trace event subsystems:\n"
5652 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5653 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5654 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5656 " filter\t\t- If set, only events passing filter are traced\n"
5657 " events/<system>/<event>/\t- Directory containing control files for\n"
5659 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5660 " filter\t\t- If set, only events passing filter are traced\n"
5661 " trigger\t\t- If set, a command to perform when event is hit\n"
5662 "\t Format: <trigger>[:count][if <filter>]\n"
5663 "\t trigger: traceon, traceoff\n"
5664 "\t enable_event:<system>:<event>\n"
5665 "\t disable_event:<system>:<event>\n"
5666 #ifdef CONFIG_HIST_TRIGGERS
5667 "\t enable_hist:<system>:<event>\n"
5668 "\t disable_hist:<system>:<event>\n"
5670 #ifdef CONFIG_STACKTRACE
5673 #ifdef CONFIG_TRACER_SNAPSHOT
5676 #ifdef CONFIG_HIST_TRIGGERS
5677 "\t\t hist (see below)\n"
5679 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5680 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5681 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5682 "\t events/block/block_unplug/trigger\n"
5683 "\t The first disables tracing every time block_unplug is hit.\n"
5684 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5685 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5686 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5687 "\t Like function triggers, the counter is only decremented if it\n"
5688 "\t enabled or disabled tracing.\n"
5689 "\t To remove a trigger without a count:\n"
5690 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5691 "\t To remove a trigger with a count:\n"
5692 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5693 "\t Filters can be ignored when removing a trigger.\n"
5694 #ifdef CONFIG_HIST_TRIGGERS
5695 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5696 "\t Format: hist:keys=<field1[,field2,...]>\n"
5697 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5698 "\t [:values=<field1[,field2,...]>]\n"
5699 "\t [:sort=<field1[,field2,...]>]\n"
5700 "\t [:size=#entries]\n"
5701 "\t [:pause][:continue][:clear]\n"
5702 "\t [:name=histname1]\n"
5703 "\t [:nohitcount]\n"
5704 "\t [:<handler>.<action>]\n"
5705 "\t [if <filter>]\n\n"
5706 "\t Note, special fields can be used as well:\n"
5707 "\t common_timestamp - to record current timestamp\n"
5708 "\t common_cpu - to record the CPU the event happened on\n"
5710 "\t A hist trigger variable can be:\n"
5711 "\t - a reference to a field e.g. x=current_timestamp,\n"
5712 "\t - a reference to another variable e.g. y=$x,\n"
5713 "\t - a numeric literal: e.g. ms_per_sec=1000,\n"
5714 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5716 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5717 "\t multiplication(*) and division(/) operators. An operand can be either a\n"
5718 "\t variable reference, field or numeric literal.\n"
5720 "\t When a matching event is hit, an entry is added to a hash\n"
5721 "\t table using the key(s) and value(s) named, and the value of a\n"
5722 "\t sum called 'hitcount' is incremented. Keys and values\n"
5723 "\t correspond to fields in the event's format description. Keys\n"
5724 "\t can be any field, or the special string 'stacktrace'.\n"
5725 "\t Compound keys consisting of up to two fields can be specified\n"
5726 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5727 "\t fields. Sort keys consisting of up to two fields can be\n"
5728 "\t specified using the 'sort' keyword. The sort direction can\n"
5729 "\t be modified by appending '.descending' or '.ascending' to a\n"
5730 "\t sort field. The 'size' parameter can be used to specify more\n"
5731 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5732 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5733 "\t its histogram data will be shared with other triggers of the\n"
5734 "\t same name, and trigger hits will update this common data.\n\n"
5735 "\t Reading the 'hist' file for the event will dump the hash\n"
5736 "\t table in its entirety to stdout. If there are multiple hist\n"
5737 "\t triggers attached to an event, there will be a table for each\n"
5738 "\t trigger in the output. The table displayed for a named\n"
5739 "\t trigger will be the same as any other instance having the\n"
5740 "\t same name. The default format used to display a given field\n"
5741 "\t can be modified by appending any of the following modifiers\n"
5742 "\t to the field name, as applicable:\n\n"
5743 "\t .hex display a number as a hex value\n"
5744 "\t .sym display an address as a symbol\n"
5745 "\t .sym-offset display an address as a symbol and offset\n"
5746 "\t .execname display a common_pid as a program name\n"
5747 "\t .syscall display a syscall id as a syscall name\n"
5748 "\t .log2 display log2 value rather than raw number\n"
5749 "\t .buckets=size display values in groups of size rather than raw number\n"
5750 "\t .usecs display a common_timestamp in microseconds\n"
5751 "\t .percent display a number of percentage value\n"
5752 "\t .graph display a bar-graph of a value\n\n"
5753 "\t The 'pause' parameter can be used to pause an existing hist\n"
5754 "\t trigger or to start a hist trigger but not log any events\n"
5755 "\t until told to do so. 'continue' can be used to start or\n"
5756 "\t restart a paused hist trigger.\n\n"
5757 "\t The 'clear' parameter will clear the contents of a running\n"
5758 "\t hist trigger and leave its current paused/active state\n"
5760 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5761 "\t raw hitcount in the histogram.\n\n"
5762 "\t The enable_hist and disable_hist triggers can be used to\n"
5763 "\t have one event conditionally start and stop another event's\n"
5764 "\t already-attached hist trigger. The syntax is analogous to\n"
5765 "\t the enable_event and disable_event triggers.\n\n"
5766 "\t Hist trigger handlers and actions are executed whenever a\n"
5767 "\t a histogram entry is added or updated. They take the form:\n\n"
5768 "\t <handler>.<action>\n\n"
5769 "\t The available handlers are:\n\n"
5770 "\t onmatch(matching.event) - invoke on addition or update\n"
5771 "\t onmax(var) - invoke if var exceeds current max\n"
5772 "\t onchange(var) - invoke action if var changes\n\n"
5773 "\t The available actions are:\n\n"
5774 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5775 "\t save(field,...) - save current event fields\n"
5776 #ifdef CONFIG_TRACER_SNAPSHOT
5777 "\t snapshot() - snapshot the trace buffer\n\n"
5779 #ifdef CONFIG_SYNTH_EVENTS
5780 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5781 "\t Write into this file to define/undefine new synthetic events.\n"
5782 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5788 tracing_readme_read(struct file *filp, char __user *ubuf,
5789 size_t cnt, loff_t *ppos)
5791 return simple_read_from_buffer(ubuf, cnt, ppos,
5792 readme_msg, strlen(readme_msg));
5795 static const struct file_operations tracing_readme_fops = {
5796 .open = tracing_open_generic,
5797 .read = tracing_readme_read,
5798 .llseek = generic_file_llseek,
5801 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5805 return trace_find_tgid_ptr(pid);
5808 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5812 return trace_find_tgid_ptr(pid);
5815 static void saved_tgids_stop(struct seq_file *m, void *v)
5819 static int saved_tgids_show(struct seq_file *m, void *v)
5821 int *entry = (int *)v;
5822 int pid = entry - tgid_map;
5828 seq_printf(m, "%d %d\n", pid, tgid);
5832 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5833 .start = saved_tgids_start,
5834 .stop = saved_tgids_stop,
5835 .next = saved_tgids_next,
5836 .show = saved_tgids_show,
5839 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5843 ret = tracing_check_open_get_tr(NULL);
5847 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5851 static const struct file_operations tracing_saved_tgids_fops = {
5852 .open = tracing_saved_tgids_open,
5854 .llseek = seq_lseek,
5855 .release = seq_release,
5858 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5860 unsigned int *ptr = v;
5862 if (*pos || m->count)
5867 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5869 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5878 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5884 arch_spin_lock(&trace_cmdline_lock);
5886 v = &savedcmd->map_cmdline_to_pid[0];
5888 v = saved_cmdlines_next(m, v, &l);
5896 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5898 arch_spin_unlock(&trace_cmdline_lock);
5902 static int saved_cmdlines_show(struct seq_file *m, void *v)
5904 char buf[TASK_COMM_LEN];
5905 unsigned int *pid = v;
5907 __trace_find_cmdline(*pid, buf);
5908 seq_printf(m, "%d %s\n", *pid, buf);
5912 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5913 .start = saved_cmdlines_start,
5914 .next = saved_cmdlines_next,
5915 .stop = saved_cmdlines_stop,
5916 .show = saved_cmdlines_show,
5919 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5923 ret = tracing_check_open_get_tr(NULL);
5927 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5930 static const struct file_operations tracing_saved_cmdlines_fops = {
5931 .open = tracing_saved_cmdlines_open,
5933 .llseek = seq_lseek,
5934 .release = seq_release,
5938 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5939 size_t cnt, loff_t *ppos)
5945 arch_spin_lock(&trace_cmdline_lock);
5946 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5947 arch_spin_unlock(&trace_cmdline_lock);
5950 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5953 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5955 kfree(s->saved_cmdlines);
5956 kfree(s->map_cmdline_to_pid);
5960 static int tracing_resize_saved_cmdlines(unsigned int val)
5962 struct saved_cmdlines_buffer *s, *savedcmd_temp;
5964 s = kmalloc(sizeof(*s), GFP_KERNEL);
5968 if (allocate_cmdlines_buffer(val, s) < 0) {
5974 arch_spin_lock(&trace_cmdline_lock);
5975 savedcmd_temp = savedcmd;
5977 arch_spin_unlock(&trace_cmdline_lock);
5979 free_saved_cmdlines_buffer(savedcmd_temp);
5985 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5986 size_t cnt, loff_t *ppos)
5991 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5995 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5996 if (!val || val > PID_MAX_DEFAULT)
5999 ret = tracing_resize_saved_cmdlines((unsigned int)val);
6008 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6009 .open = tracing_open_generic,
6010 .read = tracing_saved_cmdlines_size_read,
6011 .write = tracing_saved_cmdlines_size_write,
6014 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6015 static union trace_eval_map_item *
6016 update_eval_map(union trace_eval_map_item *ptr)
6018 if (!ptr->map.eval_string) {
6019 if (ptr->tail.next) {
6020 ptr = ptr->tail.next;
6021 /* Set ptr to the next real item (skip head) */
6029 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6031 union trace_eval_map_item *ptr = v;
6034 * Paranoid! If ptr points to end, we don't want to increment past it.
6035 * This really should never happen.
6038 ptr = update_eval_map(ptr);
6039 if (WARN_ON_ONCE(!ptr))
6043 ptr = update_eval_map(ptr);
6048 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6050 union trace_eval_map_item *v;
6053 mutex_lock(&trace_eval_mutex);
6055 v = trace_eval_maps;
6059 while (v && l < *pos) {
6060 v = eval_map_next(m, v, &l);
6066 static void eval_map_stop(struct seq_file *m, void *v)
6068 mutex_unlock(&trace_eval_mutex);
6071 static int eval_map_show(struct seq_file *m, void *v)
6073 union trace_eval_map_item *ptr = v;
6075 seq_printf(m, "%s %ld (%s)\n",
6076 ptr->map.eval_string, ptr->map.eval_value,
6082 static const struct seq_operations tracing_eval_map_seq_ops = {
6083 .start = eval_map_start,
6084 .next = eval_map_next,
6085 .stop = eval_map_stop,
6086 .show = eval_map_show,
6089 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6093 ret = tracing_check_open_get_tr(NULL);
6097 return seq_open(filp, &tracing_eval_map_seq_ops);
6100 static const struct file_operations tracing_eval_map_fops = {
6101 .open = tracing_eval_map_open,
6103 .llseek = seq_lseek,
6104 .release = seq_release,
6107 static inline union trace_eval_map_item *
6108 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6110 /* Return tail of array given the head */
6111 return ptr + ptr->head.length + 1;
6115 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6118 struct trace_eval_map **stop;
6119 struct trace_eval_map **map;
6120 union trace_eval_map_item *map_array;
6121 union trace_eval_map_item *ptr;
6126 * The trace_eval_maps contains the map plus a head and tail item,
6127 * where the head holds the module and length of array, and the
6128 * tail holds a pointer to the next list.
6130 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6132 pr_warn("Unable to allocate trace eval mapping\n");
6136 mutex_lock(&trace_eval_mutex);
6138 if (!trace_eval_maps)
6139 trace_eval_maps = map_array;
6141 ptr = trace_eval_maps;
6143 ptr = trace_eval_jmp_to_tail(ptr);
6144 if (!ptr->tail.next)
6146 ptr = ptr->tail.next;
6149 ptr->tail.next = map_array;
6151 map_array->head.mod = mod;
6152 map_array->head.length = len;
6155 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6156 map_array->map = **map;
6159 memset(map_array, 0, sizeof(*map_array));
6161 mutex_unlock(&trace_eval_mutex);
6164 static void trace_create_eval_file(struct dentry *d_tracer)
6166 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6167 NULL, &tracing_eval_map_fops);
6170 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6171 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6172 static inline void trace_insert_eval_map_file(struct module *mod,
6173 struct trace_eval_map **start, int len) { }
6174 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6176 static void trace_insert_eval_map(struct module *mod,
6177 struct trace_eval_map **start, int len)
6179 struct trace_eval_map **map;
6186 trace_event_eval_update(map, len);
6188 trace_insert_eval_map_file(mod, start, len);
6192 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6193 size_t cnt, loff_t *ppos)
6195 struct trace_array *tr = filp->private_data;
6196 char buf[MAX_TRACER_SIZE+2];
6199 mutex_lock(&trace_types_lock);
6200 r = sprintf(buf, "%s\n", tr->current_trace->name);
6201 mutex_unlock(&trace_types_lock);
6203 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6206 int tracer_init(struct tracer *t, struct trace_array *tr)
6208 tracing_reset_online_cpus(&tr->array_buffer);
6212 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6216 for_each_tracing_cpu(cpu)
6217 per_cpu_ptr(buf->data, cpu)->entries = val;
6220 #ifdef CONFIG_TRACER_MAX_TRACE
6221 /* resize @tr's buffer to the size of @size_tr's entries */
6222 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6223 struct array_buffer *size_buf, int cpu_id)
6227 if (cpu_id == RING_BUFFER_ALL_CPUS) {
6228 for_each_tracing_cpu(cpu) {
6229 ret = ring_buffer_resize(trace_buf->buffer,
6230 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6233 per_cpu_ptr(trace_buf->data, cpu)->entries =
6234 per_cpu_ptr(size_buf->data, cpu)->entries;
6237 ret = ring_buffer_resize(trace_buf->buffer,
6238 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6240 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6241 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6246 #endif /* CONFIG_TRACER_MAX_TRACE */
6248 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6249 unsigned long size, int cpu)
6254 * If kernel or user changes the size of the ring buffer
6255 * we use the size that was given, and we can forget about
6256 * expanding it later.
6258 ring_buffer_expanded = true;
6260 /* May be called before buffers are initialized */
6261 if (!tr->array_buffer.buffer)
6264 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6268 #ifdef CONFIG_TRACER_MAX_TRACE
6269 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6270 !tr->current_trace->use_max_tr)
6273 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6275 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6276 &tr->array_buffer, cpu);
6279 * AARGH! We are left with different
6280 * size max buffer!!!!
6281 * The max buffer is our "snapshot" buffer.
6282 * When a tracer needs a snapshot (one of the
6283 * latency tracers), it swaps the max buffer
6284 * with the saved snap shot. We succeeded to
6285 * update the size of the main buffer, but failed to
6286 * update the size of the max buffer. But when we tried
6287 * to reset the main buffer to the original size, we
6288 * failed there too. This is very unlikely to
6289 * happen, but if it does, warn and kill all
6293 tracing_disabled = 1;
6298 if (cpu == RING_BUFFER_ALL_CPUS)
6299 set_buffer_entries(&tr->max_buffer, size);
6301 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6304 #endif /* CONFIG_TRACER_MAX_TRACE */
6306 if (cpu == RING_BUFFER_ALL_CPUS)
6307 set_buffer_entries(&tr->array_buffer, size);
6309 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6314 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6315 unsigned long size, int cpu_id)
6319 mutex_lock(&trace_types_lock);
6321 if (cpu_id != RING_BUFFER_ALL_CPUS) {
6322 /* make sure, this cpu is enabled in the mask */
6323 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6329 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6334 mutex_unlock(&trace_types_lock);
6341 * tracing_update_buffers - used by tracing facility to expand ring buffers
6343 * To save on memory when the tracing is never used on a system with it
6344 * configured in. The ring buffers are set to a minimum size. But once
6345 * a user starts to use the tracing facility, then they need to grow
6346 * to their default size.
6348 * This function is to be called when a tracer is about to be used.
6350 int tracing_update_buffers(void)
6354 mutex_lock(&trace_types_lock);
6355 if (!ring_buffer_expanded)
6356 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6357 RING_BUFFER_ALL_CPUS);
6358 mutex_unlock(&trace_types_lock);
6363 struct trace_option_dentry;
6366 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6369 * Used to clear out the tracer before deletion of an instance.
6370 * Must have trace_types_lock held.
6372 static void tracing_set_nop(struct trace_array *tr)
6374 if (tr->current_trace == &nop_trace)
6377 tr->current_trace->enabled--;
6379 if (tr->current_trace->reset)
6380 tr->current_trace->reset(tr);
6382 tr->current_trace = &nop_trace;
6385 static bool tracer_options_updated;
6387 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6389 /* Only enable if the directory has been created already. */
6393 /* Only create trace option files after update_tracer_options finish */
6394 if (!tracer_options_updated)
6397 create_trace_option_files(tr, t);
6400 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6403 #ifdef CONFIG_TRACER_MAX_TRACE
6408 mutex_lock(&trace_types_lock);
6410 if (!ring_buffer_expanded) {
6411 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6412 RING_BUFFER_ALL_CPUS);
6418 for (t = trace_types; t; t = t->next) {
6419 if (strcmp(t->name, buf) == 0)
6426 if (t == tr->current_trace)
6429 #ifdef CONFIG_TRACER_SNAPSHOT
6430 if (t->use_max_tr) {
6431 local_irq_disable();
6432 arch_spin_lock(&tr->max_lock);
6433 if (tr->cond_snapshot)
6435 arch_spin_unlock(&tr->max_lock);
6441 /* Some tracers won't work on kernel command line */
6442 if (system_state < SYSTEM_RUNNING && t->noboot) {
6443 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6448 /* Some tracers are only allowed for the top level buffer */
6449 if (!trace_ok_for_array(t, tr)) {
6454 /* If trace pipe files are being read, we can't change the tracer */
6455 if (tr->trace_ref) {
6460 trace_branch_disable();
6462 tr->current_trace->enabled--;
6464 if (tr->current_trace->reset)
6465 tr->current_trace->reset(tr);
6467 #ifdef CONFIG_TRACER_MAX_TRACE
6468 had_max_tr = tr->current_trace->use_max_tr;
6470 /* Current trace needs to be nop_trace before synchronize_rcu */
6471 tr->current_trace = &nop_trace;
6473 if (had_max_tr && !t->use_max_tr) {
6475 * We need to make sure that the update_max_tr sees that
6476 * current_trace changed to nop_trace to keep it from
6477 * swapping the buffers after we resize it.
6478 * The update_max_tr is called from interrupts disabled
6479 * so a synchronized_sched() is sufficient.
6485 if (t->use_max_tr && !tr->allocated_snapshot) {
6486 ret = tracing_alloc_snapshot_instance(tr);
6491 tr->current_trace = &nop_trace;
6495 ret = tracer_init(t, tr);
6500 tr->current_trace = t;
6501 tr->current_trace->enabled++;
6502 trace_branch_enable(tr);
6504 mutex_unlock(&trace_types_lock);
6510 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6511 size_t cnt, loff_t *ppos)
6513 struct trace_array *tr = filp->private_data;
6514 char buf[MAX_TRACER_SIZE+1];
6521 if (cnt > MAX_TRACER_SIZE)
6522 cnt = MAX_TRACER_SIZE;
6524 if (copy_from_user(buf, ubuf, cnt))
6531 err = tracing_set_tracer(tr, name);
6541 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6542 size_t cnt, loff_t *ppos)
6547 r = snprintf(buf, sizeof(buf), "%ld\n",
6548 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6549 if (r > sizeof(buf))
6551 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6555 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6556 size_t cnt, loff_t *ppos)
6561 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6571 tracing_thresh_read(struct file *filp, char __user *ubuf,
6572 size_t cnt, loff_t *ppos)
6574 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6578 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6579 size_t cnt, loff_t *ppos)
6581 struct trace_array *tr = filp->private_data;
6584 mutex_lock(&trace_types_lock);
6585 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6589 if (tr->current_trace->update_thresh) {
6590 ret = tr->current_trace->update_thresh(tr);
6597 mutex_unlock(&trace_types_lock);
6602 #ifdef CONFIG_TRACER_MAX_TRACE
6605 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6606 size_t cnt, loff_t *ppos)
6608 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6612 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6613 size_t cnt, loff_t *ppos)
6615 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6620 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6622 struct trace_array *tr = inode->i_private;
6623 struct trace_iterator *iter;
6626 ret = tracing_check_open_get_tr(tr);
6630 mutex_lock(&trace_types_lock);
6632 /* create a buffer to store the information to pass to userspace */
6633 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6636 __trace_array_put(tr);
6640 trace_seq_init(&iter->seq);
6641 iter->trace = tr->current_trace;
6643 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6648 /* trace pipe does not show start of buffer */
6649 cpumask_setall(iter->started);
6651 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6652 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6654 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6655 if (trace_clocks[tr->clock_id].in_ns)
6656 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6659 iter->array_buffer = &tr->array_buffer;
6660 iter->cpu_file = tracing_get_cpu(inode);
6661 mutex_init(&iter->mutex);
6662 filp->private_data = iter;
6664 if (iter->trace->pipe_open)
6665 iter->trace->pipe_open(iter);
6667 nonseekable_open(inode, filp);
6671 mutex_unlock(&trace_types_lock);
6676 __trace_array_put(tr);
6677 mutex_unlock(&trace_types_lock);
6681 static int tracing_release_pipe(struct inode *inode, struct file *file)
6683 struct trace_iterator *iter = file->private_data;
6684 struct trace_array *tr = inode->i_private;
6686 mutex_lock(&trace_types_lock);
6690 if (iter->trace->pipe_close)
6691 iter->trace->pipe_close(iter);
6693 mutex_unlock(&trace_types_lock);
6695 free_cpumask_var(iter->started);
6697 mutex_destroy(&iter->mutex);
6700 trace_array_put(tr);
6706 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6708 struct trace_array *tr = iter->tr;
6710 /* Iterators are static, they should be filled or empty */
6711 if (trace_buffer_iter(iter, iter->cpu_file))
6712 return EPOLLIN | EPOLLRDNORM;
6714 if (tr->trace_flags & TRACE_ITER_BLOCK)
6716 * Always select as readable when in blocking mode
6718 return EPOLLIN | EPOLLRDNORM;
6720 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6721 filp, poll_table, iter->tr->buffer_percent);
6725 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6727 struct trace_iterator *iter = filp->private_data;
6729 return trace_poll(iter, filp, poll_table);
6732 /* Must be called with iter->mutex held. */
6733 static int tracing_wait_pipe(struct file *filp)
6735 struct trace_iterator *iter = filp->private_data;
6738 while (trace_empty(iter)) {
6740 if ((filp->f_flags & O_NONBLOCK)) {
6745 * We block until we read something and tracing is disabled.
6746 * We still block if tracing is disabled, but we have never
6747 * read anything. This allows a user to cat this file, and
6748 * then enable tracing. But after we have read something,
6749 * we give an EOF when tracing is again disabled.
6751 * iter->pos will be 0 if we haven't read anything.
6753 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6756 mutex_unlock(&iter->mutex);
6758 ret = wait_on_pipe(iter, 0);
6760 mutex_lock(&iter->mutex);
6773 tracing_read_pipe(struct file *filp, char __user *ubuf,
6774 size_t cnt, loff_t *ppos)
6776 struct trace_iterator *iter = filp->private_data;
6780 * Avoid more than one consumer on a single file descriptor
6781 * This is just a matter of traces coherency, the ring buffer itself
6784 mutex_lock(&iter->mutex);
6786 /* return any leftover data */
6787 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6791 trace_seq_init(&iter->seq);
6793 if (iter->trace->read) {
6794 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6800 sret = tracing_wait_pipe(filp);
6804 /* stop when tracing is finished */
6805 if (trace_empty(iter)) {
6810 if (cnt >= PAGE_SIZE)
6811 cnt = PAGE_SIZE - 1;
6813 /* reset all but tr, trace, and overruns */
6814 trace_iterator_reset(iter);
6815 cpumask_clear(iter->started);
6816 trace_seq_init(&iter->seq);
6818 trace_event_read_lock();
6819 trace_access_lock(iter->cpu_file);
6820 while (trace_find_next_entry_inc(iter) != NULL) {
6821 enum print_line_t ret;
6822 int save_len = iter->seq.seq.len;
6824 ret = print_trace_line(iter);
6825 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6827 * If one print_trace_line() fills entire trace_seq in one shot,
6828 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6829 * In this case, we need to consume it, otherwise, loop will peek
6830 * this event next time, resulting in an infinite loop.
6832 if (save_len == 0) {
6834 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6835 trace_consume(iter);
6839 /* In other cases, don't print partial lines */
6840 iter->seq.seq.len = save_len;
6843 if (ret != TRACE_TYPE_NO_CONSUME)
6844 trace_consume(iter);
6846 if (trace_seq_used(&iter->seq) >= cnt)
6850 * Setting the full flag means we reached the trace_seq buffer
6851 * size and we should leave by partial output condition above.
6852 * One of the trace_seq_* functions is not used properly.
6854 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6857 trace_access_unlock(iter->cpu_file);
6858 trace_event_read_unlock();
6860 /* Now copy what we have to the user */
6861 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6862 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6863 trace_seq_init(&iter->seq);
6866 * If there was nothing to send to user, in spite of consuming trace
6867 * entries, go back to wait for more entries.
6873 mutex_unlock(&iter->mutex);
6878 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6881 __free_page(spd->pages[idx]);
6885 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6891 /* Seq buffer is page-sized, exactly what we need. */
6893 save_len = iter->seq.seq.len;
6894 ret = print_trace_line(iter);
6896 if (trace_seq_has_overflowed(&iter->seq)) {
6897 iter->seq.seq.len = save_len;
6902 * This should not be hit, because it should only
6903 * be set if the iter->seq overflowed. But check it
6904 * anyway to be safe.
6906 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6907 iter->seq.seq.len = save_len;
6911 count = trace_seq_used(&iter->seq) - save_len;
6914 iter->seq.seq.len = save_len;
6918 if (ret != TRACE_TYPE_NO_CONSUME)
6919 trace_consume(iter);
6921 if (!trace_find_next_entry_inc(iter)) {
6931 static ssize_t tracing_splice_read_pipe(struct file *filp,
6933 struct pipe_inode_info *pipe,
6937 struct page *pages_def[PIPE_DEF_BUFFERS];
6938 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6939 struct trace_iterator *iter = filp->private_data;
6940 struct splice_pipe_desc spd = {
6942 .partial = partial_def,
6943 .nr_pages = 0, /* This gets updated below. */
6944 .nr_pages_max = PIPE_DEF_BUFFERS,
6945 .ops = &default_pipe_buf_ops,
6946 .spd_release = tracing_spd_release_pipe,
6952 if (splice_grow_spd(pipe, &spd))
6955 mutex_lock(&iter->mutex);
6957 if (iter->trace->splice_read) {
6958 ret = iter->trace->splice_read(iter, filp,
6959 ppos, pipe, len, flags);
6964 ret = tracing_wait_pipe(filp);
6968 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6973 trace_event_read_lock();
6974 trace_access_lock(iter->cpu_file);
6976 /* Fill as many pages as possible. */
6977 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6978 spd.pages[i] = alloc_page(GFP_KERNEL);
6982 rem = tracing_fill_pipe_page(rem, iter);
6984 /* Copy the data into the page, so we can start over. */
6985 ret = trace_seq_to_buffer(&iter->seq,
6986 page_address(spd.pages[i]),
6987 trace_seq_used(&iter->seq));
6989 __free_page(spd.pages[i]);
6992 spd.partial[i].offset = 0;
6993 spd.partial[i].len = trace_seq_used(&iter->seq);
6995 trace_seq_init(&iter->seq);
6998 trace_access_unlock(iter->cpu_file);
6999 trace_event_read_unlock();
7000 mutex_unlock(&iter->mutex);
7005 ret = splice_to_pipe(pipe, &spd);
7009 splice_shrink_spd(&spd);
7013 mutex_unlock(&iter->mutex);
7018 tracing_entries_read(struct file *filp, char __user *ubuf,
7019 size_t cnt, loff_t *ppos)
7021 struct inode *inode = file_inode(filp);
7022 struct trace_array *tr = inode->i_private;
7023 int cpu = tracing_get_cpu(inode);
7028 mutex_lock(&trace_types_lock);
7030 if (cpu == RING_BUFFER_ALL_CPUS) {
7031 int cpu, buf_size_same;
7036 /* check if all cpu sizes are same */
7037 for_each_tracing_cpu(cpu) {
7038 /* fill in the size from first enabled cpu */
7040 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7041 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7047 if (buf_size_same) {
7048 if (!ring_buffer_expanded)
7049 r = sprintf(buf, "%lu (expanded: %lu)\n",
7051 trace_buf_size >> 10);
7053 r = sprintf(buf, "%lu\n", size >> 10);
7055 r = sprintf(buf, "X\n");
7057 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7059 mutex_unlock(&trace_types_lock);
7061 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7066 tracing_entries_write(struct file *filp, const char __user *ubuf,
7067 size_t cnt, loff_t *ppos)
7069 struct inode *inode = file_inode(filp);
7070 struct trace_array *tr = inode->i_private;
7074 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7078 /* must have at least 1 entry */
7082 /* value is in KB */
7084 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7094 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7095 size_t cnt, loff_t *ppos)
7097 struct trace_array *tr = filp->private_data;
7100 unsigned long size = 0, expanded_size = 0;
7102 mutex_lock(&trace_types_lock);
7103 for_each_tracing_cpu(cpu) {
7104 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7105 if (!ring_buffer_expanded)
7106 expanded_size += trace_buf_size >> 10;
7108 if (ring_buffer_expanded)
7109 r = sprintf(buf, "%lu\n", size);
7111 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7112 mutex_unlock(&trace_types_lock);
7114 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7118 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7119 size_t cnt, loff_t *ppos)
7122 * There is no need to read what the user has written, this function
7123 * is just to make sure that there is no error when "echo" is used
7132 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7134 struct trace_array *tr = inode->i_private;
7136 /* disable tracing ? */
7137 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7138 tracer_tracing_off(tr);
7139 /* resize the ring buffer to 0 */
7140 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7142 trace_array_put(tr);
7148 tracing_mark_write(struct file *filp, const char __user *ubuf,
7149 size_t cnt, loff_t *fpos)
7151 struct trace_array *tr = filp->private_data;
7152 struct ring_buffer_event *event;
7153 enum event_trigger_type tt = ETT_NONE;
7154 struct trace_buffer *buffer;
7155 struct print_entry *entry;
7160 /* Used in tracing_mark_raw_write() as well */
7161 #define FAULTED_STR "<faulted>"
7162 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7164 if (tracing_disabled)
7167 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7170 if (cnt > TRACE_BUF_SIZE)
7171 cnt = TRACE_BUF_SIZE;
7173 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7175 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7177 /* If less than "<faulted>", then make sure we can still add that */
7178 if (cnt < FAULTED_SIZE)
7179 size += FAULTED_SIZE - cnt;
7181 buffer = tr->array_buffer.buffer;
7182 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7184 if (unlikely(!event))
7185 /* Ring buffer disabled, return as if not open for write */
7188 entry = ring_buffer_event_data(event);
7189 entry->ip = _THIS_IP_;
7191 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7193 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7199 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7200 /* do not add \n before testing triggers, but add \0 */
7201 entry->buf[cnt] = '\0';
7202 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7205 if (entry->buf[cnt - 1] != '\n') {
7206 entry->buf[cnt] = '\n';
7207 entry->buf[cnt + 1] = '\0';
7209 entry->buf[cnt] = '\0';
7211 if (static_branch_unlikely(&trace_marker_exports_enabled))
7212 ftrace_exports(event, TRACE_EXPORT_MARKER);
7213 __buffer_unlock_commit(buffer, event);
7216 event_triggers_post_call(tr->trace_marker_file, tt);
7221 /* Limit it for now to 3K (including tag) */
7222 #define RAW_DATA_MAX_SIZE (1024*3)
7225 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7226 size_t cnt, loff_t *fpos)
7228 struct trace_array *tr = filp->private_data;
7229 struct ring_buffer_event *event;
7230 struct trace_buffer *buffer;
7231 struct raw_data_entry *entry;
7236 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7238 if (tracing_disabled)
7241 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7244 /* The marker must at least have a tag id */
7245 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7248 if (cnt > TRACE_BUF_SIZE)
7249 cnt = TRACE_BUF_SIZE;
7251 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7253 size = sizeof(*entry) + cnt;
7254 if (cnt < FAULT_SIZE_ID)
7255 size += FAULT_SIZE_ID - cnt;
7257 buffer = tr->array_buffer.buffer;
7258 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7261 /* Ring buffer disabled, return as if not open for write */
7264 entry = ring_buffer_event_data(event);
7266 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7269 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7274 __buffer_unlock_commit(buffer, event);
7279 static int tracing_clock_show(struct seq_file *m, void *v)
7281 struct trace_array *tr = m->private;
7284 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7286 "%s%s%s%s", i ? " " : "",
7287 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7288 i == tr->clock_id ? "]" : "");
7294 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7298 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7299 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7302 if (i == ARRAY_SIZE(trace_clocks))
7305 mutex_lock(&trace_types_lock);
7309 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7312 * New clock may not be consistent with the previous clock.
7313 * Reset the buffer so that it doesn't have incomparable timestamps.
7315 tracing_reset_online_cpus(&tr->array_buffer);
7317 #ifdef CONFIG_TRACER_MAX_TRACE
7318 if (tr->max_buffer.buffer)
7319 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7320 tracing_reset_online_cpus(&tr->max_buffer);
7323 mutex_unlock(&trace_types_lock);
7328 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7329 size_t cnt, loff_t *fpos)
7331 struct seq_file *m = filp->private_data;
7332 struct trace_array *tr = m->private;
7334 const char *clockstr;
7337 if (cnt >= sizeof(buf))
7340 if (copy_from_user(buf, ubuf, cnt))
7345 clockstr = strstrip(buf);
7347 ret = tracing_set_clock(tr, clockstr);
7356 static int tracing_clock_open(struct inode *inode, struct file *file)
7358 struct trace_array *tr = inode->i_private;
7361 ret = tracing_check_open_get_tr(tr);
7365 ret = single_open(file, tracing_clock_show, inode->i_private);
7367 trace_array_put(tr);
7372 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7374 struct trace_array *tr = m->private;
7376 mutex_lock(&trace_types_lock);
7378 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7379 seq_puts(m, "delta [absolute]\n");
7381 seq_puts(m, "[delta] absolute\n");
7383 mutex_unlock(&trace_types_lock);
7388 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7390 struct trace_array *tr = inode->i_private;
7393 ret = tracing_check_open_get_tr(tr);
7397 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7399 trace_array_put(tr);
7404 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7406 if (rbe == this_cpu_read(trace_buffered_event))
7407 return ring_buffer_time_stamp(buffer);
7409 return ring_buffer_event_time_stamp(buffer, rbe);
7413 * Set or disable using the per CPU trace_buffer_event when possible.
7415 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7419 mutex_lock(&trace_types_lock);
7421 if (set && tr->no_filter_buffering_ref++)
7425 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7430 --tr->no_filter_buffering_ref;
7433 mutex_unlock(&trace_types_lock);
7438 struct ftrace_buffer_info {
7439 struct trace_iterator iter;
7441 unsigned int spare_cpu;
7445 #ifdef CONFIG_TRACER_SNAPSHOT
7446 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7448 struct trace_array *tr = inode->i_private;
7449 struct trace_iterator *iter;
7453 ret = tracing_check_open_get_tr(tr);
7457 if (file->f_mode & FMODE_READ) {
7458 iter = __tracing_open(inode, file, true);
7460 ret = PTR_ERR(iter);
7462 /* Writes still need the seq_file to hold the private data */
7464 m = kzalloc(sizeof(*m), GFP_KERNEL);
7467 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7475 iter->array_buffer = &tr->max_buffer;
7476 iter->cpu_file = tracing_get_cpu(inode);
7478 file->private_data = m;
7482 trace_array_put(tr);
7488 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7491 struct seq_file *m = filp->private_data;
7492 struct trace_iterator *iter = m->private;
7493 struct trace_array *tr = iter->tr;
7497 ret = tracing_update_buffers();
7501 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7505 mutex_lock(&trace_types_lock);
7507 if (tr->current_trace->use_max_tr) {
7512 local_irq_disable();
7513 arch_spin_lock(&tr->max_lock);
7514 if (tr->cond_snapshot)
7516 arch_spin_unlock(&tr->max_lock);
7523 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7527 if (tr->allocated_snapshot)
7531 /* Only allow per-cpu swap if the ring buffer supports it */
7532 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7533 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7538 if (tr->allocated_snapshot)
7539 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7540 &tr->array_buffer, iter->cpu_file);
7542 ret = tracing_alloc_snapshot_instance(tr);
7545 local_irq_disable();
7546 /* Now, we're going to swap */
7547 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7548 update_max_tr(tr, current, smp_processor_id(), NULL);
7550 update_max_tr_single(tr, current, iter->cpu_file);
7554 if (tr->allocated_snapshot) {
7555 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7556 tracing_reset_online_cpus(&tr->max_buffer);
7558 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7568 mutex_unlock(&trace_types_lock);
7572 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7574 struct seq_file *m = file->private_data;
7577 ret = tracing_release(inode, file);
7579 if (file->f_mode & FMODE_READ)
7582 /* If write only, the seq_file is just a stub */
7590 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7591 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7592 size_t count, loff_t *ppos);
7593 static int tracing_buffers_release(struct inode *inode, struct file *file);
7594 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7595 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7597 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7599 struct ftrace_buffer_info *info;
7602 /* The following checks for tracefs lockdown */
7603 ret = tracing_buffers_open(inode, filp);
7607 info = filp->private_data;
7609 if (info->iter.trace->use_max_tr) {
7610 tracing_buffers_release(inode, filp);
7614 info->iter.snapshot = true;
7615 info->iter.array_buffer = &info->iter.tr->max_buffer;
7620 #endif /* CONFIG_TRACER_SNAPSHOT */
7623 static const struct file_operations tracing_thresh_fops = {
7624 .open = tracing_open_generic,
7625 .read = tracing_thresh_read,
7626 .write = tracing_thresh_write,
7627 .llseek = generic_file_llseek,
7630 #ifdef CONFIG_TRACER_MAX_TRACE
7631 static const struct file_operations tracing_max_lat_fops = {
7632 .open = tracing_open_generic,
7633 .read = tracing_max_lat_read,
7634 .write = tracing_max_lat_write,
7635 .llseek = generic_file_llseek,
7639 static const struct file_operations set_tracer_fops = {
7640 .open = tracing_open_generic,
7641 .read = tracing_set_trace_read,
7642 .write = tracing_set_trace_write,
7643 .llseek = generic_file_llseek,
7646 static const struct file_operations tracing_pipe_fops = {
7647 .open = tracing_open_pipe,
7648 .poll = tracing_poll_pipe,
7649 .read = tracing_read_pipe,
7650 .splice_read = tracing_splice_read_pipe,
7651 .release = tracing_release_pipe,
7652 .llseek = no_llseek,
7655 static const struct file_operations tracing_entries_fops = {
7656 .open = tracing_open_generic_tr,
7657 .read = tracing_entries_read,
7658 .write = tracing_entries_write,
7659 .llseek = generic_file_llseek,
7660 .release = tracing_release_generic_tr,
7663 static const struct file_operations tracing_total_entries_fops = {
7664 .open = tracing_open_generic_tr,
7665 .read = tracing_total_entries_read,
7666 .llseek = generic_file_llseek,
7667 .release = tracing_release_generic_tr,
7670 static const struct file_operations tracing_free_buffer_fops = {
7671 .open = tracing_open_generic_tr,
7672 .write = tracing_free_buffer_write,
7673 .release = tracing_free_buffer_release,
7676 static const struct file_operations tracing_mark_fops = {
7677 .open = tracing_mark_open,
7678 .write = tracing_mark_write,
7679 .release = tracing_release_generic_tr,
7682 static const struct file_operations tracing_mark_raw_fops = {
7683 .open = tracing_mark_open,
7684 .write = tracing_mark_raw_write,
7685 .release = tracing_release_generic_tr,
7688 static const struct file_operations trace_clock_fops = {
7689 .open = tracing_clock_open,
7691 .llseek = seq_lseek,
7692 .release = tracing_single_release_tr,
7693 .write = tracing_clock_write,
7696 static const struct file_operations trace_time_stamp_mode_fops = {
7697 .open = tracing_time_stamp_mode_open,
7699 .llseek = seq_lseek,
7700 .release = tracing_single_release_tr,
7703 #ifdef CONFIG_TRACER_SNAPSHOT
7704 static const struct file_operations snapshot_fops = {
7705 .open = tracing_snapshot_open,
7707 .write = tracing_snapshot_write,
7708 .llseek = tracing_lseek,
7709 .release = tracing_snapshot_release,
7712 static const struct file_operations snapshot_raw_fops = {
7713 .open = snapshot_raw_open,
7714 .read = tracing_buffers_read,
7715 .release = tracing_buffers_release,
7716 .splice_read = tracing_buffers_splice_read,
7717 .llseek = no_llseek,
7720 #endif /* CONFIG_TRACER_SNAPSHOT */
7723 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7724 * @filp: The active open file structure
7725 * @ubuf: The userspace provided buffer to read value into
7726 * @cnt: The maximum number of bytes to read
7727 * @ppos: The current "file" position
7729 * This function implements the write interface for a struct trace_min_max_param.
7730 * The filp->private_data must point to a trace_min_max_param structure that
7731 * defines where to write the value, the min and the max acceptable values,
7732 * and a lock to protect the write.
7735 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7737 struct trace_min_max_param *param = filp->private_data;
7744 err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7749 mutex_lock(param->lock);
7751 if (param->min && val < *param->min)
7754 if (param->max && val > *param->max)
7761 mutex_unlock(param->lock);
7770 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7771 * @filp: The active open file structure
7772 * @ubuf: The userspace provided buffer to read value into
7773 * @cnt: The maximum number of bytes to read
7774 * @ppos: The current "file" position
7776 * This function implements the read interface for a struct trace_min_max_param.
7777 * The filp->private_data must point to a trace_min_max_param struct with valid
7781 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7783 struct trace_min_max_param *param = filp->private_data;
7784 char buf[U64_STR_SIZE];
7793 if (cnt > sizeof(buf))
7796 len = snprintf(buf, sizeof(buf), "%llu\n", val);
7798 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7801 const struct file_operations trace_min_max_fops = {
7802 .open = tracing_open_generic,
7803 .read = trace_min_max_read,
7804 .write = trace_min_max_write,
7807 #define TRACING_LOG_ERRS_MAX 8
7808 #define TRACING_LOG_LOC_MAX 128
7810 #define CMD_PREFIX " Command: "
7813 const char **errs; /* ptr to loc-specific array of err strings */
7814 u8 type; /* index into errs -> specific err string */
7815 u16 pos; /* caret position */
7819 struct tracing_log_err {
7820 struct list_head list;
7821 struct err_info info;
7822 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7823 char *cmd; /* what caused err */
7826 static DEFINE_MUTEX(tracing_err_log_lock);
7828 static struct tracing_log_err *alloc_tracing_log_err(int len)
7830 struct tracing_log_err *err;
7832 err = kzalloc(sizeof(*err), GFP_KERNEL);
7834 return ERR_PTR(-ENOMEM);
7836 err->cmd = kzalloc(len, GFP_KERNEL);
7839 return ERR_PTR(-ENOMEM);
7845 static void free_tracing_log_err(struct tracing_log_err *err)
7851 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7854 struct tracing_log_err *err;
7857 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7858 err = alloc_tracing_log_err(len);
7859 if (PTR_ERR(err) != -ENOMEM)
7860 tr->n_err_log_entries++;
7864 cmd = kzalloc(len, GFP_KERNEL);
7866 return ERR_PTR(-ENOMEM);
7867 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7870 list_del(&err->list);
7876 * err_pos - find the position of a string within a command for error careting
7877 * @cmd: The tracing command that caused the error
7878 * @str: The string to position the caret at within @cmd
7880 * Finds the position of the first occurrence of @str within @cmd. The
7881 * return value can be passed to tracing_log_err() for caret placement
7884 * Returns the index within @cmd of the first occurrence of @str or 0
7885 * if @str was not found.
7887 unsigned int err_pos(char *cmd, const char *str)
7891 if (WARN_ON(!strlen(cmd)))
7894 found = strstr(cmd, str);
7902 * tracing_log_err - write an error to the tracing error log
7903 * @tr: The associated trace array for the error (NULL for top level array)
7904 * @loc: A string describing where the error occurred
7905 * @cmd: The tracing command that caused the error
7906 * @errs: The array of loc-specific static error strings
7907 * @type: The index into errs[], which produces the specific static err string
7908 * @pos: The position the caret should be placed in the cmd
7910 * Writes an error into tracing/error_log of the form:
7912 * <loc>: error: <text>
7916 * tracing/error_log is a small log file containing the last
7917 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7918 * unless there has been a tracing error, and the error log can be
7919 * cleared and have its memory freed by writing the empty string in
7920 * truncation mode to it i.e. echo > tracing/error_log.
7922 * NOTE: the @errs array along with the @type param are used to
7923 * produce a static error string - this string is not copied and saved
7924 * when the error is logged - only a pointer to it is saved. See
7925 * existing callers for examples of how static strings are typically
7926 * defined for use with tracing_log_err().
7928 void tracing_log_err(struct trace_array *tr,
7929 const char *loc, const char *cmd,
7930 const char **errs, u8 type, u16 pos)
7932 struct tracing_log_err *err;
7938 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7940 mutex_lock(&tracing_err_log_lock);
7941 err = get_tracing_log_err(tr, len);
7942 if (PTR_ERR(err) == -ENOMEM) {
7943 mutex_unlock(&tracing_err_log_lock);
7947 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7948 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7950 err->info.errs = errs;
7951 err->info.type = type;
7952 err->info.pos = pos;
7953 err->info.ts = local_clock();
7955 list_add_tail(&err->list, &tr->err_log);
7956 mutex_unlock(&tracing_err_log_lock);
7959 static void clear_tracing_err_log(struct trace_array *tr)
7961 struct tracing_log_err *err, *next;
7963 mutex_lock(&tracing_err_log_lock);
7964 list_for_each_entry_safe(err, next, &tr->err_log, list) {
7965 list_del(&err->list);
7966 free_tracing_log_err(err);
7969 tr->n_err_log_entries = 0;
7970 mutex_unlock(&tracing_err_log_lock);
7973 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7975 struct trace_array *tr = m->private;
7977 mutex_lock(&tracing_err_log_lock);
7979 return seq_list_start(&tr->err_log, *pos);
7982 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7984 struct trace_array *tr = m->private;
7986 return seq_list_next(v, &tr->err_log, pos);
7989 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7991 mutex_unlock(&tracing_err_log_lock);
7994 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7998 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8000 for (i = 0; i < pos; i++)
8005 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8007 struct tracing_log_err *err = v;
8010 const char *err_text = err->info.errs[err->info.type];
8011 u64 sec = err->info.ts;
8014 nsec = do_div(sec, NSEC_PER_SEC);
8015 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8016 err->loc, err_text);
8017 seq_printf(m, "%s", err->cmd);
8018 tracing_err_log_show_pos(m, err->info.pos);
8024 static const struct seq_operations tracing_err_log_seq_ops = {
8025 .start = tracing_err_log_seq_start,
8026 .next = tracing_err_log_seq_next,
8027 .stop = tracing_err_log_seq_stop,
8028 .show = tracing_err_log_seq_show
8031 static int tracing_err_log_open(struct inode *inode, struct file *file)
8033 struct trace_array *tr = inode->i_private;
8036 ret = tracing_check_open_get_tr(tr);
8040 /* If this file was opened for write, then erase contents */
8041 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8042 clear_tracing_err_log(tr);
8044 if (file->f_mode & FMODE_READ) {
8045 ret = seq_open(file, &tracing_err_log_seq_ops);
8047 struct seq_file *m = file->private_data;
8050 trace_array_put(tr);
8056 static ssize_t tracing_err_log_write(struct file *file,
8057 const char __user *buffer,
8058 size_t count, loff_t *ppos)
8063 static int tracing_err_log_release(struct inode *inode, struct file *file)
8065 struct trace_array *tr = inode->i_private;
8067 trace_array_put(tr);
8069 if (file->f_mode & FMODE_READ)
8070 seq_release(inode, file);
8075 static const struct file_operations tracing_err_log_fops = {
8076 .open = tracing_err_log_open,
8077 .write = tracing_err_log_write,
8079 .llseek = seq_lseek,
8080 .release = tracing_err_log_release,
8083 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8085 struct trace_array *tr = inode->i_private;
8086 struct ftrace_buffer_info *info;
8089 ret = tracing_check_open_get_tr(tr);
8093 info = kvzalloc(sizeof(*info), GFP_KERNEL);
8095 trace_array_put(tr);
8099 mutex_lock(&trace_types_lock);
8102 info->iter.cpu_file = tracing_get_cpu(inode);
8103 info->iter.trace = tr->current_trace;
8104 info->iter.array_buffer = &tr->array_buffer;
8106 /* Force reading ring buffer for first read */
8107 info->read = (unsigned int)-1;
8109 filp->private_data = info;
8113 mutex_unlock(&trace_types_lock);
8115 ret = nonseekable_open(inode, filp);
8117 trace_array_put(tr);
8123 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8125 struct ftrace_buffer_info *info = filp->private_data;
8126 struct trace_iterator *iter = &info->iter;
8128 return trace_poll(iter, filp, poll_table);
8132 tracing_buffers_read(struct file *filp, char __user *ubuf,
8133 size_t count, loff_t *ppos)
8135 struct ftrace_buffer_info *info = filp->private_data;
8136 struct trace_iterator *iter = &info->iter;
8143 #ifdef CONFIG_TRACER_MAX_TRACE
8144 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8149 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8151 if (IS_ERR(info->spare)) {
8152 ret = PTR_ERR(info->spare);
8155 info->spare_cpu = iter->cpu_file;
8161 /* Do we have previous read data to read? */
8162 if (info->read < PAGE_SIZE)
8166 trace_access_lock(iter->cpu_file);
8167 ret = ring_buffer_read_page(iter->array_buffer->buffer,
8171 trace_access_unlock(iter->cpu_file);
8174 if (trace_empty(iter)) {
8175 if ((filp->f_flags & O_NONBLOCK))
8178 ret = wait_on_pipe(iter, 0);
8189 size = PAGE_SIZE - info->read;
8193 ret = copy_to_user(ubuf, info->spare + info->read, size);
8205 static int tracing_buffers_release(struct inode *inode, struct file *file)
8207 struct ftrace_buffer_info *info = file->private_data;
8208 struct trace_iterator *iter = &info->iter;
8210 mutex_lock(&trace_types_lock);
8212 iter->tr->trace_ref--;
8214 __trace_array_put(iter->tr);
8217 /* Make sure the waiters see the new wait_index */
8220 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8223 ring_buffer_free_read_page(iter->array_buffer->buffer,
8224 info->spare_cpu, info->spare);
8227 mutex_unlock(&trace_types_lock);
8233 struct trace_buffer *buffer;
8236 refcount_t refcount;
8239 static void buffer_ref_release(struct buffer_ref *ref)
8241 if (!refcount_dec_and_test(&ref->refcount))
8243 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8247 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8248 struct pipe_buffer *buf)
8250 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8252 buffer_ref_release(ref);
8256 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8257 struct pipe_buffer *buf)
8259 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8261 if (refcount_read(&ref->refcount) > INT_MAX/2)
8264 refcount_inc(&ref->refcount);
8268 /* Pipe buffer operations for a buffer. */
8269 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8270 .release = buffer_pipe_buf_release,
8271 .get = buffer_pipe_buf_get,
8275 * Callback from splice_to_pipe(), if we need to release some pages
8276 * at the end of the spd in case we error'ed out in filling the pipe.
8278 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8280 struct buffer_ref *ref =
8281 (struct buffer_ref *)spd->partial[i].private;
8283 buffer_ref_release(ref);
8284 spd->partial[i].private = 0;
8288 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8289 struct pipe_inode_info *pipe, size_t len,
8292 struct ftrace_buffer_info *info = file->private_data;
8293 struct trace_iterator *iter = &info->iter;
8294 struct partial_page partial_def[PIPE_DEF_BUFFERS];
8295 struct page *pages_def[PIPE_DEF_BUFFERS];
8296 struct splice_pipe_desc spd = {
8298 .partial = partial_def,
8299 .nr_pages_max = PIPE_DEF_BUFFERS,
8300 .ops = &buffer_pipe_buf_ops,
8301 .spd_release = buffer_spd_release,
8303 struct buffer_ref *ref;
8307 #ifdef CONFIG_TRACER_MAX_TRACE
8308 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8312 if (*ppos & (PAGE_SIZE - 1))
8315 if (len & (PAGE_SIZE - 1)) {
8316 if (len < PAGE_SIZE)
8321 if (splice_grow_spd(pipe, &spd))
8325 trace_access_lock(iter->cpu_file);
8326 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8328 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8332 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8338 refcount_set(&ref->refcount, 1);
8339 ref->buffer = iter->array_buffer->buffer;
8340 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8341 if (IS_ERR(ref->page)) {
8342 ret = PTR_ERR(ref->page);
8347 ref->cpu = iter->cpu_file;
8349 r = ring_buffer_read_page(ref->buffer, &ref->page,
8350 len, iter->cpu_file, 1);
8352 ring_buffer_free_read_page(ref->buffer, ref->cpu,
8358 page = virt_to_page(ref->page);
8360 spd.pages[i] = page;
8361 spd.partial[i].len = PAGE_SIZE;
8362 spd.partial[i].offset = 0;
8363 spd.partial[i].private = (unsigned long)ref;
8367 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8370 trace_access_unlock(iter->cpu_file);
8373 /* did we read anything? */
8374 if (!spd.nr_pages) {
8381 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8384 wait_index = READ_ONCE(iter->wait_index);
8386 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8390 /* No need to wait after waking up when tracing is off */
8391 if (!tracer_tracing_is_on(iter->tr))
8394 /* Make sure we see the new wait_index */
8396 if (wait_index != iter->wait_index)
8402 ret = splice_to_pipe(pipe, &spd);
8404 splice_shrink_spd(&spd);
8409 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8410 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8412 struct ftrace_buffer_info *info = file->private_data;
8413 struct trace_iterator *iter = &info->iter;
8416 return -ENOIOCTLCMD;
8418 mutex_lock(&trace_types_lock);
8421 /* Make sure the waiters see the new wait_index */
8424 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8426 mutex_unlock(&trace_types_lock);
8430 static const struct file_operations tracing_buffers_fops = {
8431 .open = tracing_buffers_open,
8432 .read = tracing_buffers_read,
8433 .poll = tracing_buffers_poll,
8434 .release = tracing_buffers_release,
8435 .splice_read = tracing_buffers_splice_read,
8436 .unlocked_ioctl = tracing_buffers_ioctl,
8437 .llseek = no_llseek,
8441 tracing_stats_read(struct file *filp, char __user *ubuf,
8442 size_t count, loff_t *ppos)
8444 struct inode *inode = file_inode(filp);
8445 struct trace_array *tr = inode->i_private;
8446 struct array_buffer *trace_buf = &tr->array_buffer;
8447 int cpu = tracing_get_cpu(inode);
8448 struct trace_seq *s;
8450 unsigned long long t;
8451 unsigned long usec_rem;
8453 s = kmalloc(sizeof(*s), GFP_KERNEL);
8459 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8460 trace_seq_printf(s, "entries: %ld\n", cnt);
8462 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8463 trace_seq_printf(s, "overrun: %ld\n", cnt);
8465 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8466 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8468 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8469 trace_seq_printf(s, "bytes: %ld\n", cnt);
8471 if (trace_clocks[tr->clock_id].in_ns) {
8472 /* local or global for trace_clock */
8473 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8474 usec_rem = do_div(t, USEC_PER_SEC);
8475 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8478 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8479 usec_rem = do_div(t, USEC_PER_SEC);
8480 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8482 /* counter or tsc mode for trace_clock */
8483 trace_seq_printf(s, "oldest event ts: %llu\n",
8484 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8486 trace_seq_printf(s, "now ts: %llu\n",
8487 ring_buffer_time_stamp(trace_buf->buffer));
8490 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8491 trace_seq_printf(s, "dropped events: %ld\n", cnt);
8493 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8494 trace_seq_printf(s, "read events: %ld\n", cnt);
8496 count = simple_read_from_buffer(ubuf, count, ppos,
8497 s->buffer, trace_seq_used(s));
8504 static const struct file_operations tracing_stats_fops = {
8505 .open = tracing_open_generic_tr,
8506 .read = tracing_stats_read,
8507 .llseek = generic_file_llseek,
8508 .release = tracing_release_generic_tr,
8511 #ifdef CONFIG_DYNAMIC_FTRACE
8514 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8515 size_t cnt, loff_t *ppos)
8521 /* 256 should be plenty to hold the amount needed */
8522 buf = kmalloc(256, GFP_KERNEL);
8526 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8527 ftrace_update_tot_cnt,
8528 ftrace_number_of_pages,
8529 ftrace_number_of_groups);
8531 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8536 static const struct file_operations tracing_dyn_info_fops = {
8537 .open = tracing_open_generic,
8538 .read = tracing_read_dyn_info,
8539 .llseek = generic_file_llseek,
8541 #endif /* CONFIG_DYNAMIC_FTRACE */
8543 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8545 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8546 struct trace_array *tr, struct ftrace_probe_ops *ops,
8549 tracing_snapshot_instance(tr);
8553 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8554 struct trace_array *tr, struct ftrace_probe_ops *ops,
8557 struct ftrace_func_mapper *mapper = data;
8561 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8571 tracing_snapshot_instance(tr);
8575 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8576 struct ftrace_probe_ops *ops, void *data)
8578 struct ftrace_func_mapper *mapper = data;
8581 seq_printf(m, "%ps:", (void *)ip);
8583 seq_puts(m, "snapshot");
8586 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8589 seq_printf(m, ":count=%ld\n", *count);
8591 seq_puts(m, ":unlimited\n");
8597 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8598 unsigned long ip, void *init_data, void **data)
8600 struct ftrace_func_mapper *mapper = *data;
8603 mapper = allocate_ftrace_func_mapper();
8609 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8613 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8614 unsigned long ip, void *data)
8616 struct ftrace_func_mapper *mapper = data;
8621 free_ftrace_func_mapper(mapper, NULL);
8625 ftrace_func_mapper_remove_ip(mapper, ip);
8628 static struct ftrace_probe_ops snapshot_probe_ops = {
8629 .func = ftrace_snapshot,
8630 .print = ftrace_snapshot_print,
8633 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8634 .func = ftrace_count_snapshot,
8635 .print = ftrace_snapshot_print,
8636 .init = ftrace_snapshot_init,
8637 .free = ftrace_snapshot_free,
8641 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8642 char *glob, char *cmd, char *param, int enable)
8644 struct ftrace_probe_ops *ops;
8645 void *count = (void *)-1;
8652 /* hash funcs only work with set_ftrace_filter */
8656 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8659 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8664 number = strsep(¶m, ":");
8666 if (!strlen(number))
8670 * We use the callback data field (which is a pointer)
8673 ret = kstrtoul(number, 0, (unsigned long *)&count);
8678 ret = tracing_alloc_snapshot_instance(tr);
8682 ret = register_ftrace_function_probe(glob, tr, ops, count);
8685 return ret < 0 ? ret : 0;
8688 static struct ftrace_func_command ftrace_snapshot_cmd = {
8690 .func = ftrace_trace_snapshot_callback,
8693 static __init int register_snapshot_cmd(void)
8695 return register_ftrace_command(&ftrace_snapshot_cmd);
8698 static inline __init int register_snapshot_cmd(void) { return 0; }
8699 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8701 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8703 if (WARN_ON(!tr->dir))
8704 return ERR_PTR(-ENODEV);
8706 /* Top directory uses NULL as the parent */
8707 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8710 /* All sub buffers have a descriptor */
8714 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8716 struct dentry *d_tracer;
8719 return tr->percpu_dir;
8721 d_tracer = tracing_get_dentry(tr);
8722 if (IS_ERR(d_tracer))
8725 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8727 MEM_FAIL(!tr->percpu_dir,
8728 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8730 return tr->percpu_dir;
8733 static struct dentry *
8734 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8735 void *data, long cpu, const struct file_operations *fops)
8737 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8739 if (ret) /* See tracing_get_cpu() */
8740 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8745 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8747 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8748 struct dentry *d_cpu;
8749 char cpu_dir[30]; /* 30 characters should be more than enough */
8754 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8755 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8757 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8761 /* per cpu trace_pipe */
8762 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8763 tr, cpu, &tracing_pipe_fops);
8766 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8767 tr, cpu, &tracing_fops);
8769 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8770 tr, cpu, &tracing_buffers_fops);
8772 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8773 tr, cpu, &tracing_stats_fops);
8775 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8776 tr, cpu, &tracing_entries_fops);
8778 #ifdef CONFIG_TRACER_SNAPSHOT
8779 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8780 tr, cpu, &snapshot_fops);
8782 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8783 tr, cpu, &snapshot_raw_fops);
8787 #ifdef CONFIG_FTRACE_SELFTEST
8788 /* Let selftest have access to static functions in this file */
8789 #include "trace_selftest.c"
8793 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8796 struct trace_option_dentry *topt = filp->private_data;
8799 if (topt->flags->val & topt->opt->bit)
8804 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8808 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8811 struct trace_option_dentry *topt = filp->private_data;
8815 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8819 if (val != 0 && val != 1)
8822 if (!!(topt->flags->val & topt->opt->bit) != val) {
8823 mutex_lock(&trace_types_lock);
8824 ret = __set_tracer_option(topt->tr, topt->flags,
8826 mutex_unlock(&trace_types_lock);
8837 static const struct file_operations trace_options_fops = {
8838 .open = tracing_open_generic,
8839 .read = trace_options_read,
8840 .write = trace_options_write,
8841 .llseek = generic_file_llseek,
8845 * In order to pass in both the trace_array descriptor as well as the index
8846 * to the flag that the trace option file represents, the trace_array
8847 * has a character array of trace_flags_index[], which holds the index
8848 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8849 * The address of this character array is passed to the flag option file
8850 * read/write callbacks.
8852 * In order to extract both the index and the trace_array descriptor,
8853 * get_tr_index() uses the following algorithm.
8857 * As the pointer itself contains the address of the index (remember
8860 * Then to get the trace_array descriptor, by subtracting that index
8861 * from the ptr, we get to the start of the index itself.
8863 * ptr - idx == &index[0]
8865 * Then a simple container_of() from that pointer gets us to the
8866 * trace_array descriptor.
8868 static void get_tr_index(void *data, struct trace_array **ptr,
8869 unsigned int *pindex)
8871 *pindex = *(unsigned char *)data;
8873 *ptr = container_of(data - *pindex, struct trace_array,
8878 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8881 void *tr_index = filp->private_data;
8882 struct trace_array *tr;
8886 get_tr_index(tr_index, &tr, &index);
8888 if (tr->trace_flags & (1 << index))
8893 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8897 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8900 void *tr_index = filp->private_data;
8901 struct trace_array *tr;
8906 get_tr_index(tr_index, &tr, &index);
8908 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8912 if (val != 0 && val != 1)
8915 mutex_lock(&event_mutex);
8916 mutex_lock(&trace_types_lock);
8917 ret = set_tracer_flag(tr, 1 << index, val);
8918 mutex_unlock(&trace_types_lock);
8919 mutex_unlock(&event_mutex);
8929 static const struct file_operations trace_options_core_fops = {
8930 .open = tracing_open_generic,
8931 .read = trace_options_core_read,
8932 .write = trace_options_core_write,
8933 .llseek = generic_file_llseek,
8936 struct dentry *trace_create_file(const char *name,
8938 struct dentry *parent,
8940 const struct file_operations *fops)
8944 ret = tracefs_create_file(name, mode, parent, data, fops);
8946 pr_warn("Could not create tracefs '%s' entry\n", name);
8952 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8954 struct dentry *d_tracer;
8959 d_tracer = tracing_get_dentry(tr);
8960 if (IS_ERR(d_tracer))
8963 tr->options = tracefs_create_dir("options", d_tracer);
8965 pr_warn("Could not create tracefs directory 'options'\n");
8973 create_trace_option_file(struct trace_array *tr,
8974 struct trace_option_dentry *topt,
8975 struct tracer_flags *flags,
8976 struct tracer_opt *opt)
8978 struct dentry *t_options;
8980 t_options = trace_options_init_dentry(tr);
8984 topt->flags = flags;
8988 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8989 t_options, topt, &trace_options_fops);
8994 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8996 struct trace_option_dentry *topts;
8997 struct trace_options *tr_topts;
8998 struct tracer_flags *flags;
8999 struct tracer_opt *opts;
9006 flags = tracer->flags;
9008 if (!flags || !flags->opts)
9012 * If this is an instance, only create flags for tracers
9013 * the instance may have.
9015 if (!trace_ok_for_array(tracer, tr))
9018 for (i = 0; i < tr->nr_topts; i++) {
9019 /* Make sure there's no duplicate flags. */
9020 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9026 for (cnt = 0; opts[cnt].name; cnt++)
9029 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9033 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9040 tr->topts = tr_topts;
9041 tr->topts[tr->nr_topts].tracer = tracer;
9042 tr->topts[tr->nr_topts].topts = topts;
9045 for (cnt = 0; opts[cnt].name; cnt++) {
9046 create_trace_option_file(tr, &topts[cnt], flags,
9048 MEM_FAIL(topts[cnt].entry == NULL,
9049 "Failed to create trace option: %s",
9054 static struct dentry *
9055 create_trace_option_core_file(struct trace_array *tr,
9056 const char *option, long index)
9058 struct dentry *t_options;
9060 t_options = trace_options_init_dentry(tr);
9064 return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9065 (void *)&tr->trace_flags_index[index],
9066 &trace_options_core_fops);
9069 static void create_trace_options_dir(struct trace_array *tr)
9071 struct dentry *t_options;
9072 bool top_level = tr == &global_trace;
9075 t_options = trace_options_init_dentry(tr);
9079 for (i = 0; trace_options[i]; i++) {
9081 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9082 create_trace_option_core_file(tr, trace_options[i], i);
9087 rb_simple_read(struct file *filp, char __user *ubuf,
9088 size_t cnt, loff_t *ppos)
9090 struct trace_array *tr = filp->private_data;
9094 r = tracer_tracing_is_on(tr);
9095 r = sprintf(buf, "%d\n", r);
9097 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9101 rb_simple_write(struct file *filp, const char __user *ubuf,
9102 size_t cnt, loff_t *ppos)
9104 struct trace_array *tr = filp->private_data;
9105 struct trace_buffer *buffer = tr->array_buffer.buffer;
9109 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9114 mutex_lock(&trace_types_lock);
9115 if (!!val == tracer_tracing_is_on(tr)) {
9116 val = 0; /* do nothing */
9118 tracer_tracing_on(tr);
9119 if (tr->current_trace->start)
9120 tr->current_trace->start(tr);
9122 tracer_tracing_off(tr);
9123 if (tr->current_trace->stop)
9124 tr->current_trace->stop(tr);
9125 /* Wake up any waiters */
9126 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9128 mutex_unlock(&trace_types_lock);
9136 static const struct file_operations rb_simple_fops = {
9137 .open = tracing_open_generic_tr,
9138 .read = rb_simple_read,
9139 .write = rb_simple_write,
9140 .release = tracing_release_generic_tr,
9141 .llseek = default_llseek,
9145 buffer_percent_read(struct file *filp, char __user *ubuf,
9146 size_t cnt, loff_t *ppos)
9148 struct trace_array *tr = filp->private_data;
9152 r = tr->buffer_percent;
9153 r = sprintf(buf, "%d\n", r);
9155 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9159 buffer_percent_write(struct file *filp, const char __user *ubuf,
9160 size_t cnt, loff_t *ppos)
9162 struct trace_array *tr = filp->private_data;
9166 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9176 tr->buffer_percent = val;
9183 static const struct file_operations buffer_percent_fops = {
9184 .open = tracing_open_generic_tr,
9185 .read = buffer_percent_read,
9186 .write = buffer_percent_write,
9187 .release = tracing_release_generic_tr,
9188 .llseek = default_llseek,
9191 static struct dentry *trace_instance_dir;
9194 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9197 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9199 enum ring_buffer_flags rb_flags;
9201 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9205 buf->buffer = ring_buffer_alloc(size, rb_flags);
9209 buf->data = alloc_percpu(struct trace_array_cpu);
9211 ring_buffer_free(buf->buffer);
9216 /* Allocate the first page for all buffers */
9217 set_buffer_entries(&tr->array_buffer,
9218 ring_buffer_size(tr->array_buffer.buffer, 0));
9223 static void free_trace_buffer(struct array_buffer *buf)
9226 ring_buffer_free(buf->buffer);
9228 free_percpu(buf->data);
9233 static int allocate_trace_buffers(struct trace_array *tr, int size)
9237 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9241 #ifdef CONFIG_TRACER_MAX_TRACE
9242 ret = allocate_trace_buffer(tr, &tr->max_buffer,
9243 allocate_snapshot ? size : 1);
9244 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9245 free_trace_buffer(&tr->array_buffer);
9248 tr->allocated_snapshot = allocate_snapshot;
9251 * Only the top level trace array gets its snapshot allocated
9252 * from the kernel command line.
9254 allocate_snapshot = false;
9260 static void free_trace_buffers(struct trace_array *tr)
9265 free_trace_buffer(&tr->array_buffer);
9267 #ifdef CONFIG_TRACER_MAX_TRACE
9268 free_trace_buffer(&tr->max_buffer);
9272 static void init_trace_flags_index(struct trace_array *tr)
9276 /* Used by the trace options files */
9277 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9278 tr->trace_flags_index[i] = i;
9281 static void __update_tracer_options(struct trace_array *tr)
9285 for (t = trace_types; t; t = t->next)
9286 add_tracer_options(tr, t);
9289 static void update_tracer_options(struct trace_array *tr)
9291 mutex_lock(&trace_types_lock);
9292 tracer_options_updated = true;
9293 __update_tracer_options(tr);
9294 mutex_unlock(&trace_types_lock);
9297 /* Must have trace_types_lock held */
9298 struct trace_array *trace_array_find(const char *instance)
9300 struct trace_array *tr, *found = NULL;
9302 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9303 if (tr->name && strcmp(tr->name, instance) == 0) {
9312 struct trace_array *trace_array_find_get(const char *instance)
9314 struct trace_array *tr;
9316 mutex_lock(&trace_types_lock);
9317 tr = trace_array_find(instance);
9320 mutex_unlock(&trace_types_lock);
9325 static int trace_array_create_dir(struct trace_array *tr)
9329 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9333 ret = event_trace_add_tracer(tr->dir, tr);
9335 tracefs_remove(tr->dir);
9339 init_tracer_tracefs(tr, tr->dir);
9340 __update_tracer_options(tr);
9345 static struct trace_array *trace_array_create(const char *name)
9347 struct trace_array *tr;
9351 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9353 return ERR_PTR(ret);
9355 tr->name = kstrdup(name, GFP_KERNEL);
9359 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9362 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9364 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9366 raw_spin_lock_init(&tr->start_lock);
9368 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9370 tr->current_trace = &nop_trace;
9372 INIT_LIST_HEAD(&tr->systems);
9373 INIT_LIST_HEAD(&tr->events);
9374 INIT_LIST_HEAD(&tr->hist_vars);
9375 INIT_LIST_HEAD(&tr->err_log);
9377 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9380 if (ftrace_allocate_ftrace_ops(tr) < 0)
9383 ftrace_init_trace_array(tr);
9385 init_trace_flags_index(tr);
9387 if (trace_instance_dir) {
9388 ret = trace_array_create_dir(tr);
9392 __trace_early_add_events(tr);
9394 list_add(&tr->list, &ftrace_trace_arrays);
9401 ftrace_free_ftrace_ops(tr);
9402 free_trace_buffers(tr);
9403 free_cpumask_var(tr->tracing_cpumask);
9407 return ERR_PTR(ret);
9410 static int instance_mkdir(const char *name)
9412 struct trace_array *tr;
9415 mutex_lock(&event_mutex);
9416 mutex_lock(&trace_types_lock);
9419 if (trace_array_find(name))
9422 tr = trace_array_create(name);
9424 ret = PTR_ERR_OR_ZERO(tr);
9427 mutex_unlock(&trace_types_lock);
9428 mutex_unlock(&event_mutex);
9433 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9434 * @name: The name of the trace array to be looked up/created.
9436 * Returns pointer to trace array with given name.
9437 * NULL, if it cannot be created.
9439 * NOTE: This function increments the reference counter associated with the
9440 * trace array returned. This makes sure it cannot be freed while in use.
9441 * Use trace_array_put() once the trace array is no longer needed.
9442 * If the trace_array is to be freed, trace_array_destroy() needs to
9443 * be called after the trace_array_put(), or simply let user space delete
9444 * it from the tracefs instances directory. But until the
9445 * trace_array_put() is called, user space can not delete it.
9448 struct trace_array *trace_array_get_by_name(const char *name)
9450 struct trace_array *tr;
9452 mutex_lock(&event_mutex);
9453 mutex_lock(&trace_types_lock);
9455 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9456 if (tr->name && strcmp(tr->name, name) == 0)
9460 tr = trace_array_create(name);
9468 mutex_unlock(&trace_types_lock);
9469 mutex_unlock(&event_mutex);
9472 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9474 static int __remove_instance(struct trace_array *tr)
9478 /* Reference counter for a newly created trace array = 1. */
9479 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9482 list_del(&tr->list);
9484 /* Disable all the flags that were enabled coming in */
9485 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9486 if ((1 << i) & ZEROED_TRACE_FLAGS)
9487 set_tracer_flag(tr, 1 << i, 0);
9490 tracing_set_nop(tr);
9491 clear_ftrace_function_probes(tr);
9492 event_trace_del_tracer(tr);
9493 ftrace_clear_pids(tr);
9494 ftrace_destroy_function_files(tr);
9495 tracefs_remove(tr->dir);
9496 free_percpu(tr->last_func_repeats);
9497 free_trace_buffers(tr);
9499 for (i = 0; i < tr->nr_topts; i++) {
9500 kfree(tr->topts[i].topts);
9504 free_cpumask_var(tr->tracing_cpumask);
9511 int trace_array_destroy(struct trace_array *this_tr)
9513 struct trace_array *tr;
9519 mutex_lock(&event_mutex);
9520 mutex_lock(&trace_types_lock);
9524 /* Making sure trace array exists before destroying it. */
9525 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9526 if (tr == this_tr) {
9527 ret = __remove_instance(tr);
9532 mutex_unlock(&trace_types_lock);
9533 mutex_unlock(&event_mutex);
9537 EXPORT_SYMBOL_GPL(trace_array_destroy);
9539 static int instance_rmdir(const char *name)
9541 struct trace_array *tr;
9544 mutex_lock(&event_mutex);
9545 mutex_lock(&trace_types_lock);
9548 tr = trace_array_find(name);
9550 ret = __remove_instance(tr);
9552 mutex_unlock(&trace_types_lock);
9553 mutex_unlock(&event_mutex);
9558 static __init void create_trace_instances(struct dentry *d_tracer)
9560 struct trace_array *tr;
9562 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9565 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9568 mutex_lock(&event_mutex);
9569 mutex_lock(&trace_types_lock);
9571 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9574 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9575 "Failed to create instance directory\n"))
9579 mutex_unlock(&trace_types_lock);
9580 mutex_unlock(&event_mutex);
9584 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9586 struct trace_event_file *file;
9589 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9590 tr, &show_traces_fops);
9592 trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9593 tr, &set_tracer_fops);
9595 trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9596 tr, &tracing_cpumask_fops);
9598 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9599 tr, &tracing_iter_fops);
9601 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9604 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9605 tr, &tracing_pipe_fops);
9607 trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9608 tr, &tracing_entries_fops);
9610 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9611 tr, &tracing_total_entries_fops);
9613 trace_create_file("free_buffer", 0200, d_tracer,
9614 tr, &tracing_free_buffer_fops);
9616 trace_create_file("trace_marker", 0220, d_tracer,
9617 tr, &tracing_mark_fops);
9619 file = __find_event_file(tr, "ftrace", "print");
9620 if (file && file->dir)
9621 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9622 file, &event_trigger_fops);
9623 tr->trace_marker_file = file;
9625 trace_create_file("trace_marker_raw", 0220, d_tracer,
9626 tr, &tracing_mark_raw_fops);
9628 trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9631 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9632 tr, &rb_simple_fops);
9634 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9635 &trace_time_stamp_mode_fops);
9637 tr->buffer_percent = 50;
9639 trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9640 tr, &buffer_percent_fops);
9642 create_trace_options_dir(tr);
9644 #ifdef CONFIG_TRACER_MAX_TRACE
9645 trace_create_maxlat_file(tr, d_tracer);
9648 if (ftrace_create_function_files(tr, d_tracer))
9649 MEM_FAIL(1, "Could not allocate function filter files");
9651 #ifdef CONFIG_TRACER_SNAPSHOT
9652 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9653 tr, &snapshot_fops);
9656 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9657 tr, &tracing_err_log_fops);
9659 for_each_tracing_cpu(cpu)
9660 tracing_init_tracefs_percpu(tr, cpu);
9662 ftrace_init_tracefs(tr, d_tracer);
9665 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9667 struct vfsmount *mnt;
9668 struct file_system_type *type;
9671 * To maintain backward compatibility for tools that mount
9672 * debugfs to get to the tracing facility, tracefs is automatically
9673 * mounted to the debugfs/tracing directory.
9675 type = get_fs_type("tracefs");
9678 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9679 put_filesystem(type);
9688 * tracing_init_dentry - initialize top level trace array
9690 * This is called when creating files or directories in the tracing
9691 * directory. It is called via fs_initcall() by any of the boot up code
9692 * and expects to return the dentry of the top level tracing directory.
9694 int tracing_init_dentry(void)
9696 struct trace_array *tr = &global_trace;
9698 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9699 pr_warn("Tracing disabled due to lockdown\n");
9703 /* The top level trace array uses NULL as parent */
9707 if (WARN_ON(!tracefs_initialized()))
9711 * As there may still be users that expect the tracing
9712 * files to exist in debugfs/tracing, we must automount
9713 * the tracefs file system there, so older tools still
9714 * work with the newer kernel.
9716 tr->dir = debugfs_create_automount("tracing", NULL,
9717 trace_automount, NULL);
9722 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9723 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9725 static struct workqueue_struct *eval_map_wq __initdata;
9726 static struct work_struct eval_map_work __initdata;
9727 static struct work_struct tracerfs_init_work __initdata;
9729 static void __init eval_map_work_func(struct work_struct *work)
9733 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9734 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9737 static int __init trace_eval_init(void)
9739 INIT_WORK(&eval_map_work, eval_map_work_func);
9741 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9743 pr_err("Unable to allocate eval_map_wq\n");
9745 eval_map_work_func(&eval_map_work);
9749 queue_work(eval_map_wq, &eval_map_work);
9753 subsys_initcall(trace_eval_init);
9755 static int __init trace_eval_sync(void)
9757 /* Make sure the eval map updates are finished */
9759 destroy_workqueue(eval_map_wq);
9763 late_initcall_sync(trace_eval_sync);
9766 #ifdef CONFIG_MODULES
9767 static void trace_module_add_evals(struct module *mod)
9769 if (!mod->num_trace_evals)
9773 * Modules with bad taint do not have events created, do
9774 * not bother with enums either.
9776 if (trace_module_has_bad_taint(mod))
9779 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9782 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9783 static void trace_module_remove_evals(struct module *mod)
9785 union trace_eval_map_item *map;
9786 union trace_eval_map_item **last = &trace_eval_maps;
9788 if (!mod->num_trace_evals)
9791 mutex_lock(&trace_eval_mutex);
9793 map = trace_eval_maps;
9796 if (map->head.mod == mod)
9798 map = trace_eval_jmp_to_tail(map);
9799 last = &map->tail.next;
9800 map = map->tail.next;
9805 *last = trace_eval_jmp_to_tail(map)->tail.next;
9808 mutex_unlock(&trace_eval_mutex);
9811 static inline void trace_module_remove_evals(struct module *mod) { }
9812 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9814 static int trace_module_notify(struct notifier_block *self,
9815 unsigned long val, void *data)
9817 struct module *mod = data;
9820 case MODULE_STATE_COMING:
9821 trace_module_add_evals(mod);
9823 case MODULE_STATE_GOING:
9824 trace_module_remove_evals(mod);
9831 static struct notifier_block trace_module_nb = {
9832 .notifier_call = trace_module_notify,
9835 #endif /* CONFIG_MODULES */
9837 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9842 init_tracer_tracefs(&global_trace, NULL);
9843 ftrace_init_tracefs_toplevel(&global_trace, NULL);
9845 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9846 &global_trace, &tracing_thresh_fops);
9848 trace_create_file("README", TRACE_MODE_READ, NULL,
9849 NULL, &tracing_readme_fops);
9851 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9852 NULL, &tracing_saved_cmdlines_fops);
9854 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9855 NULL, &tracing_saved_cmdlines_size_fops);
9857 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9858 NULL, &tracing_saved_tgids_fops);
9860 trace_create_eval_file(NULL);
9862 #ifdef CONFIG_MODULES
9863 register_module_notifier(&trace_module_nb);
9866 #ifdef CONFIG_DYNAMIC_FTRACE
9867 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9868 NULL, &tracing_dyn_info_fops);
9871 create_trace_instances(NULL);
9873 update_tracer_options(&global_trace);
9876 static __init int tracer_init_tracefs(void)
9880 trace_access_lock_init();
9882 ret = tracing_init_dentry();
9887 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9888 queue_work(eval_map_wq, &tracerfs_init_work);
9890 tracer_init_tracefs_work_func(NULL);
9893 rv_init_interface();
9898 fs_initcall(tracer_init_tracefs);
9900 static int trace_die_panic_handler(struct notifier_block *self,
9901 unsigned long ev, void *unused);
9903 static struct notifier_block trace_panic_notifier = {
9904 .notifier_call = trace_die_panic_handler,
9905 .priority = INT_MAX - 1,
9908 static struct notifier_block trace_die_notifier = {
9909 .notifier_call = trace_die_panic_handler,
9910 .priority = INT_MAX - 1,
9914 * The idea is to execute the following die/panic callback early, in order
9915 * to avoid showing irrelevant information in the trace (like other panic
9916 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
9917 * warnings get disabled (to prevent potential log flooding).
9919 static int trace_die_panic_handler(struct notifier_block *self,
9920 unsigned long ev, void *unused)
9922 if (!ftrace_dump_on_oops)
9925 /* The die notifier requires DIE_OOPS to trigger */
9926 if (self == &trace_die_notifier && ev != DIE_OOPS)
9929 ftrace_dump(ftrace_dump_on_oops);
9935 * printk is set to max of 1024, we really don't need it that big.
9936 * Nothing should be printing 1000 characters anyway.
9938 #define TRACE_MAX_PRINT 1000
9941 * Define here KERN_TRACE so that we have one place to modify
9942 * it if we decide to change what log level the ftrace dump
9945 #define KERN_TRACE KERN_EMERG
9948 trace_printk_seq(struct trace_seq *s)
9950 /* Probably should print a warning here. */
9951 if (s->seq.len >= TRACE_MAX_PRINT)
9952 s->seq.len = TRACE_MAX_PRINT;
9955 * More paranoid code. Although the buffer size is set to
9956 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9957 * an extra layer of protection.
9959 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9960 s->seq.len = s->seq.size - 1;
9962 /* should be zero ended, but we are paranoid. */
9963 s->buffer[s->seq.len] = 0;
9965 printk(KERN_TRACE "%s", s->buffer);
9970 void trace_init_global_iter(struct trace_iterator *iter)
9972 iter->tr = &global_trace;
9973 iter->trace = iter->tr->current_trace;
9974 iter->cpu_file = RING_BUFFER_ALL_CPUS;
9975 iter->array_buffer = &global_trace.array_buffer;
9977 if (iter->trace && iter->trace->open)
9978 iter->trace->open(iter);
9980 /* Annotate start of buffers if we had overruns */
9981 if (ring_buffer_overruns(iter->array_buffer->buffer))
9982 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9984 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9985 if (trace_clocks[iter->tr->clock_id].in_ns)
9986 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9988 /* Can not use kmalloc for iter.temp and iter.fmt */
9989 iter->temp = static_temp_buf;
9990 iter->temp_size = STATIC_TEMP_BUF_SIZE;
9991 iter->fmt = static_fmt_buf;
9992 iter->fmt_size = STATIC_FMT_BUF_SIZE;
9995 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9997 /* use static because iter can be a bit big for the stack */
9998 static struct trace_iterator iter;
9999 static atomic_t dump_running;
10000 struct trace_array *tr = &global_trace;
10001 unsigned int old_userobj;
10002 unsigned long flags;
10005 /* Only allow one dump user at a time. */
10006 if (atomic_inc_return(&dump_running) != 1) {
10007 atomic_dec(&dump_running);
10012 * Always turn off tracing when we dump.
10013 * We don't need to show trace output of what happens
10014 * between multiple crashes.
10016 * If the user does a sysrq-z, then they can re-enable
10017 * tracing with echo 1 > tracing_on.
10021 local_irq_save(flags);
10023 /* Simulate the iterator */
10024 trace_init_global_iter(&iter);
10026 for_each_tracing_cpu(cpu) {
10027 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10030 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10032 /* don't look at user memory in panic mode */
10033 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10035 switch (oops_dump_mode) {
10037 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10040 iter.cpu_file = raw_smp_processor_id();
10045 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10046 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10049 printk(KERN_TRACE "Dumping ftrace buffer:\n");
10051 /* Did function tracer already get disabled? */
10052 if (ftrace_is_dead()) {
10053 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10054 printk("# MAY BE MISSING FUNCTION EVENTS\n");
10058 * We need to stop all tracing on all CPUS to read
10059 * the next buffer. This is a bit expensive, but is
10060 * not done often. We fill all what we can read,
10061 * and then release the locks again.
10064 while (!trace_empty(&iter)) {
10067 printk(KERN_TRACE "---------------------------------\n");
10071 trace_iterator_reset(&iter);
10072 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10074 if (trace_find_next_entry_inc(&iter) != NULL) {
10077 ret = print_trace_line(&iter);
10078 if (ret != TRACE_TYPE_NO_CONSUME)
10079 trace_consume(&iter);
10081 touch_nmi_watchdog();
10083 trace_printk_seq(&iter.seq);
10087 printk(KERN_TRACE " (ftrace buffer empty)\n");
10089 printk(KERN_TRACE "---------------------------------\n");
10092 tr->trace_flags |= old_userobj;
10094 for_each_tracing_cpu(cpu) {
10095 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10097 atomic_dec(&dump_running);
10098 local_irq_restore(flags);
10100 EXPORT_SYMBOL_GPL(ftrace_dump);
10102 #define WRITE_BUFSIZE 4096
10104 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10105 size_t count, loff_t *ppos,
10106 int (*createfn)(const char *))
10108 char *kbuf, *buf, *tmp;
10113 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10117 while (done < count) {
10118 size = count - done;
10120 if (size >= WRITE_BUFSIZE)
10121 size = WRITE_BUFSIZE - 1;
10123 if (copy_from_user(kbuf, buffer + done, size)) {
10130 tmp = strchr(buf, '\n');
10133 size = tmp - buf + 1;
10135 size = strlen(buf);
10136 if (done + size < count) {
10139 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10140 pr_warn("Line length is too long: Should be less than %d\n",
10141 WRITE_BUFSIZE - 2);
10148 /* Remove comments */
10149 tmp = strchr(buf, '#');
10154 ret = createfn(buf);
10159 } while (done < count);
10169 __init static void enable_instances(void)
10171 struct trace_array *tr;
10176 /* A tab is always appended */
10177 boot_instance_info[boot_instance_index - 1] = '\0';
10178 str = boot_instance_info;
10180 while ((curr_str = strsep(&str, "\t"))) {
10182 tok = strsep(&curr_str, ",");
10184 tr = trace_array_get_by_name(tok);
10186 pr_warn("Failed to create instance buffer %s\n", curr_str);
10189 /* Allow user space to delete it */
10190 trace_array_put(tr);
10192 while ((tok = strsep(&curr_str, ","))) {
10193 early_enable_events(tr, tok, true);
10198 __init static int tracer_alloc_buffers(void)
10204 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10205 pr_warn("Tracing disabled due to lockdown\n");
10210 * Make sure we don't accidentally add more trace options
10211 * than we have bits for.
10213 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10215 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10218 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10219 goto out_free_buffer_mask;
10221 /* Only allocate trace_printk buffers if a trace_printk exists */
10222 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10223 /* Must be called before global_trace.buffer is allocated */
10224 trace_printk_init_buffers();
10226 /* To save memory, keep the ring buffer size to its minimum */
10227 if (ring_buffer_expanded)
10228 ring_buf_size = trace_buf_size;
10232 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10233 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10235 raw_spin_lock_init(&global_trace.start_lock);
10238 * The prepare callbacks allocates some memory for the ring buffer. We
10239 * don't free the buffer if the CPU goes down. If we were to free
10240 * the buffer, then the user would lose any trace that was in the
10241 * buffer. The memory will be removed once the "instance" is removed.
10243 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10244 "trace/RB:prepare", trace_rb_cpu_prepare,
10247 goto out_free_cpumask;
10248 /* Used for event triggers */
10250 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10252 goto out_rm_hp_state;
10254 if (trace_create_savedcmd() < 0)
10255 goto out_free_temp_buffer;
10257 /* TODO: make the number of buffers hot pluggable with CPUS */
10258 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10259 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10260 goto out_free_savedcmd;
10263 if (global_trace.buffer_disabled)
10266 if (trace_boot_clock) {
10267 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10269 pr_warn("Trace clock %s not defined, going back to default\n",
10274 * register_tracer() might reference current_trace, so it
10275 * needs to be set before we register anything. This is
10276 * just a bootstrap of current_trace anyway.
10278 global_trace.current_trace = &nop_trace;
10280 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10282 ftrace_init_global_array_ops(&global_trace);
10284 init_trace_flags_index(&global_trace);
10286 register_tracer(&nop_trace);
10288 /* Function tracing may start here (via kernel command line) */
10289 init_function_trace();
10291 /* All seems OK, enable tracing */
10292 tracing_disabled = 0;
10294 atomic_notifier_chain_register(&panic_notifier_list,
10295 &trace_panic_notifier);
10297 register_die_notifier(&trace_die_notifier);
10299 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10301 INIT_LIST_HEAD(&global_trace.systems);
10302 INIT_LIST_HEAD(&global_trace.events);
10303 INIT_LIST_HEAD(&global_trace.hist_vars);
10304 INIT_LIST_HEAD(&global_trace.err_log);
10305 list_add(&global_trace.list, &ftrace_trace_arrays);
10307 apply_trace_boot_options();
10309 register_snapshot_cmd();
10316 free_saved_cmdlines_buffer(savedcmd);
10317 out_free_temp_buffer:
10318 ring_buffer_free(temp_buffer);
10320 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10322 free_cpumask_var(global_trace.tracing_cpumask);
10323 out_free_buffer_mask:
10324 free_cpumask_var(tracing_buffer_mask);
10329 void __init ftrace_boot_snapshot(void)
10331 if (snapshot_at_boot) {
10332 tracing_snapshot();
10333 internal_trace_puts("** Boot snapshot taken **\n");
10337 void __init early_trace_init(void)
10339 if (tracepoint_printk) {
10340 tracepoint_print_iter =
10341 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10342 if (MEM_FAIL(!tracepoint_print_iter,
10343 "Failed to allocate trace iterator\n"))
10344 tracepoint_printk = 0;
10346 static_key_enable(&tracepoint_printk_key.key);
10348 tracer_alloc_buffers();
10353 void __init trace_init(void)
10355 trace_event_init();
10357 if (boot_instance_index)
10358 enable_instances();
10361 __init static void clear_boot_tracer(void)
10364 * The default tracer at boot buffer is an init section.
10365 * This function is called in lateinit. If we did not
10366 * find the boot tracer, then clear it out, to prevent
10367 * later registration from accessing the buffer that is
10368 * about to be freed.
10370 if (!default_bootup_tracer)
10373 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10374 default_bootup_tracer);
10375 default_bootup_tracer = NULL;
10378 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10379 __init static void tracing_set_default_clock(void)
10381 /* sched_clock_stable() is determined in late_initcall */
10382 if (!trace_boot_clock && !sched_clock_stable()) {
10383 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10384 pr_warn("Can not set tracing clock due to lockdown\n");
10388 printk(KERN_WARNING
10389 "Unstable clock detected, switching default tracing clock to \"global\"\n"
10390 "If you want to keep using the local clock, then add:\n"
10391 " \"trace_clock=local\"\n"
10392 "on the kernel command line\n");
10393 tracing_set_clock(&global_trace, "global");
10397 static inline void tracing_set_default_clock(void) { }
10400 __init static int late_trace_init(void)
10402 if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10403 static_key_disable(&tracepoint_printk_key.key);
10404 tracepoint_printk = 0;
10407 tracing_set_default_clock();
10408 clear_boot_tracer();
10412 late_initcall_sync(late_trace_init);