1 // SPDX-License-Identifier: GPL-2.0
3 * ring buffer based function tracer
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
53 #include "trace_output.h"
56 * On boot up, the ring buffer is set to the minimum size, so that
57 * we do not waste memory on systems that are not using tracing.
59 bool ring_buffer_expanded;
62 * We need to change this state when a selftest is running.
63 * A selftest will lurk into the ring-buffer to count the
64 * entries inserted during the selftest although some concurrent
65 * insertions into the ring-buffer such as trace_printk could occurred
66 * at the same time, giving false positive or negative results.
68 static bool __read_mostly tracing_selftest_running;
71 * If a tracer is running, we do not want to run SELFTEST.
73 bool __read_mostly tracing_selftest_disabled;
75 /* Pipe tracepoints to printk */
76 struct trace_iterator *tracepoint_print_iter;
77 int tracepoint_printk;
78 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
80 /* For tracers that don't implement custom flags */
81 static struct tracer_opt dummy_tracer_opt[] = {
86 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
92 * To prevent the comm cache from being overwritten when no
93 * tracing is active, only save the comm when a trace event
96 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
99 * Kill all tracing for good (never come back).
100 * It is initialized to 1 but will turn to zero if the initialization
101 * of the tracer is successful. But that is the only place that sets
104 static int tracing_disabled = 1;
106 cpumask_var_t __read_mostly tracing_buffer_mask;
109 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
111 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
112 * is set, then ftrace_dump is called. This will output the contents
113 * of the ftrace buffers to the console. This is very useful for
114 * capturing traces that lead to crashes and outputing it to a
117 * It is default off, but you can enable it with either specifying
118 * "ftrace_dump_on_oops" in the kernel command line, or setting
119 * /proc/sys/kernel/ftrace_dump_on_oops
120 * Set 1 if you want to dump buffers of all CPUs
121 * Set 2 if you want to dump the buffer of the CPU that triggered oops
124 enum ftrace_dump_mode ftrace_dump_on_oops;
126 /* When set, tracing will stop when a WARN*() is hit */
127 int __disable_trace_on_warning;
129 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
130 /* Map of enums to their values, for "eval_map" file */
131 struct trace_eval_map_head {
133 unsigned long length;
136 union trace_eval_map_item;
138 struct trace_eval_map_tail {
140 * "end" is first and points to NULL as it must be different
141 * than "mod" or "eval_string"
143 union trace_eval_map_item *next;
144 const char *end; /* points to NULL */
147 static DEFINE_MUTEX(trace_eval_mutex);
150 * The trace_eval_maps are saved in an array with two extra elements,
151 * one at the beginning, and one at the end. The beginning item contains
152 * the count of the saved maps (head.length), and the module they
153 * belong to if not built in (head.mod). The ending item contains a
154 * pointer to the next array of saved eval_map items.
156 union trace_eval_map_item {
157 struct trace_eval_map map;
158 struct trace_eval_map_head head;
159 struct trace_eval_map_tail tail;
162 static union trace_eval_map_item *trace_eval_maps;
163 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
165 int tracing_set_tracer(struct trace_array *tr, const char *buf);
166 static void ftrace_trace_userstack(struct trace_buffer *buffer,
167 unsigned long flags, int pc);
169 #define MAX_TRACER_SIZE 100
170 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
171 static char *default_bootup_tracer;
173 static bool allocate_snapshot;
175 static int __init set_cmdline_ftrace(char *str)
177 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
178 default_bootup_tracer = bootup_tracer_buf;
179 /* We are using ftrace early, expand it */
180 ring_buffer_expanded = true;
183 __setup("ftrace=", set_cmdline_ftrace);
185 static int __init set_ftrace_dump_on_oops(char *str)
187 if (*str++ != '=' || !*str) {
188 ftrace_dump_on_oops = DUMP_ALL;
192 if (!strcmp("orig_cpu", str)) {
193 ftrace_dump_on_oops = DUMP_ORIG;
199 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
201 static int __init stop_trace_on_warning(char *str)
203 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
204 __disable_trace_on_warning = 1;
207 __setup("traceoff_on_warning", stop_trace_on_warning);
209 static int __init boot_alloc_snapshot(char *str)
211 allocate_snapshot = true;
212 /* We also need the main ring buffer expanded */
213 ring_buffer_expanded = true;
216 __setup("alloc_snapshot", boot_alloc_snapshot);
219 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
221 static int __init set_trace_boot_options(char *str)
223 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
226 __setup("trace_options=", set_trace_boot_options);
228 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
229 static char *trace_boot_clock __initdata;
231 static int __init set_trace_boot_clock(char *str)
233 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
234 trace_boot_clock = trace_boot_clock_buf;
237 __setup("trace_clock=", set_trace_boot_clock);
239 static int __init set_tracepoint_printk(char *str)
241 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
242 tracepoint_printk = 1;
245 __setup("tp_printk", set_tracepoint_printk);
247 unsigned long long ns2usecs(u64 nsec)
254 /* trace_flags holds trace_options default values */
255 #define TRACE_DEFAULT_FLAGS \
256 (FUNCTION_DEFAULT_FLAGS | \
257 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
258 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
259 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
260 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
262 /* trace_options that are only supported by global_trace */
263 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
264 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
266 /* trace_flags that are default zero for instances */
267 #define ZEROED_TRACE_FLAGS \
268 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
271 * The global_trace is the descriptor that holds the top-level tracing
272 * buffers for the live tracing.
274 static struct trace_array global_trace = {
275 .trace_flags = TRACE_DEFAULT_FLAGS,
278 LIST_HEAD(ftrace_trace_arrays);
280 int trace_array_get(struct trace_array *this_tr)
282 struct trace_array *tr;
285 mutex_lock(&trace_types_lock);
286 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
293 mutex_unlock(&trace_types_lock);
298 static void __trace_array_put(struct trace_array *this_tr)
300 WARN_ON(!this_tr->ref);
305 * trace_array_put - Decrement the reference counter for this trace array.
307 * NOTE: Use this when we no longer need the trace array returned by
308 * trace_array_get_by_name(). This ensures the trace array can be later
312 void trace_array_put(struct trace_array *this_tr)
317 mutex_lock(&trace_types_lock);
318 __trace_array_put(this_tr);
319 mutex_unlock(&trace_types_lock);
321 EXPORT_SYMBOL_GPL(trace_array_put);
323 int tracing_check_open_get_tr(struct trace_array *tr)
327 ret = security_locked_down(LOCKDOWN_TRACEFS);
331 if (tracing_disabled)
334 if (tr && trace_array_get(tr) < 0)
340 int call_filter_check_discard(struct trace_event_call *call, void *rec,
341 struct trace_buffer *buffer,
342 struct ring_buffer_event *event)
344 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
345 !filter_match_preds(call->filter, rec)) {
346 __trace_event_discard_commit(buffer, event);
353 void trace_free_pid_list(struct trace_pid_list *pid_list)
355 vfree(pid_list->pids);
360 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
361 * @filtered_pids: The list of pids to check
362 * @search_pid: The PID to find in @filtered_pids
364 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
367 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
370 * If pid_max changed after filtered_pids was created, we
371 * by default ignore all pids greater than the previous pid_max.
373 if (search_pid >= filtered_pids->pid_max)
376 return test_bit(search_pid, filtered_pids->pids);
380 * trace_ignore_this_task - should a task be ignored for tracing
381 * @filtered_pids: The list of pids to check
382 * @task: The task that should be ignored if not filtered
384 * Checks if @task should be traced or not from @filtered_pids.
385 * Returns true if @task should *NOT* be traced.
386 * Returns false if @task should be traced.
389 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
390 struct trace_pid_list *filtered_no_pids,
391 struct task_struct *task)
394 * If filterd_no_pids is not empty, and the task's pid is listed
395 * in filtered_no_pids, then return true.
396 * Otherwise, if filtered_pids is empty, that means we can
397 * trace all tasks. If it has content, then only trace pids
398 * within filtered_pids.
401 return (filtered_pids &&
402 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
404 trace_find_filtered_pid(filtered_no_pids, task->pid));
408 * trace_filter_add_remove_task - Add or remove a task from a pid_list
409 * @pid_list: The list to modify
410 * @self: The current task for fork or NULL for exit
411 * @task: The task to add or remove
413 * If adding a task, if @self is defined, the task is only added if @self
414 * is also included in @pid_list. This happens on fork and tasks should
415 * only be added when the parent is listed. If @self is NULL, then the
416 * @task pid will be removed from the list, which would happen on exit
419 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
420 struct task_struct *self,
421 struct task_struct *task)
426 /* For forks, we only add if the forking task is listed */
428 if (!trace_find_filtered_pid(pid_list, self->pid))
432 /* Sorry, but we don't support pid_max changing after setting */
433 if (task->pid >= pid_list->pid_max)
436 /* "self" is set for forks, and NULL for exits */
438 set_bit(task->pid, pid_list->pids);
440 clear_bit(task->pid, pid_list->pids);
444 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
445 * @pid_list: The pid list to show
446 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
447 * @pos: The position of the file
449 * This is used by the seq_file "next" operation to iterate the pids
450 * listed in a trace_pid_list structure.
452 * Returns the pid+1 as we want to display pid of zero, but NULL would
453 * stop the iteration.
455 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
457 unsigned long pid = (unsigned long)v;
461 /* pid already is +1 of the actual prevous bit */
462 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
464 /* Return pid + 1 to allow zero to be represented */
465 if (pid < pid_list->pid_max)
466 return (void *)(pid + 1);
472 * trace_pid_start - Used for seq_file to start reading pid lists
473 * @pid_list: The pid list to show
474 * @pos: The position of the file
476 * This is used by seq_file "start" operation to start the iteration
479 * Returns the pid+1 as we want to display pid of zero, but NULL would
480 * stop the iteration.
482 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
487 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
488 if (pid >= pid_list->pid_max)
491 /* Return pid + 1 so that zero can be the exit value */
492 for (pid++; pid && l < *pos;
493 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
499 * trace_pid_show - show the current pid in seq_file processing
500 * @m: The seq_file structure to write into
501 * @v: A void pointer of the pid (+1) value to display
503 * Can be directly used by seq_file operations to display the current
506 int trace_pid_show(struct seq_file *m, void *v)
508 unsigned long pid = (unsigned long)v - 1;
510 seq_printf(m, "%lu\n", pid);
514 /* 128 should be much more than enough */
515 #define PID_BUF_SIZE 127
517 int trace_pid_write(struct trace_pid_list *filtered_pids,
518 struct trace_pid_list **new_pid_list,
519 const char __user *ubuf, size_t cnt)
521 struct trace_pid_list *pid_list;
522 struct trace_parser parser;
530 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
534 * Always recreate a new array. The write is an all or nothing
535 * operation. Always create a new array when adding new pids by
536 * the user. If the operation fails, then the current list is
539 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
541 trace_parser_put(&parser);
545 pid_list->pid_max = READ_ONCE(pid_max);
547 /* Only truncating will shrink pid_max */
548 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
549 pid_list->pid_max = filtered_pids->pid_max;
551 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
552 if (!pid_list->pids) {
553 trace_parser_put(&parser);
559 /* copy the current bits to the new max */
560 for_each_set_bit(pid, filtered_pids->pids,
561 filtered_pids->pid_max) {
562 set_bit(pid, pid_list->pids);
571 ret = trace_get_user(&parser, ubuf, cnt, &pos);
572 if (ret < 0 || !trace_parser_loaded(&parser))
580 if (kstrtoul(parser.buffer, 0, &val))
582 if (val >= pid_list->pid_max)
587 set_bit(pid, pid_list->pids);
590 trace_parser_clear(&parser);
593 trace_parser_put(&parser);
596 trace_free_pid_list(pid_list);
601 /* Cleared the list of pids */
602 trace_free_pid_list(pid_list);
607 *new_pid_list = pid_list;
612 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
616 /* Early boot up does not have a buffer yet */
618 return trace_clock_local();
620 ts = ring_buffer_time_stamp(buf->buffer, cpu);
621 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
626 u64 ftrace_now(int cpu)
628 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
632 * tracing_is_enabled - Show if global_trace has been disabled
634 * Shows if the global trace has been enabled or not. It uses the
635 * mirror flag "buffer_disabled" to be used in fast paths such as for
636 * the irqsoff tracer. But it may be inaccurate due to races. If you
637 * need to know the accurate state, use tracing_is_on() which is a little
638 * slower, but accurate.
640 int tracing_is_enabled(void)
643 * For quick access (irqsoff uses this in fast path), just
644 * return the mirror variable of the state of the ring buffer.
645 * It's a little racy, but we don't really care.
648 return !global_trace.buffer_disabled;
652 * trace_buf_size is the size in bytes that is allocated
653 * for a buffer. Note, the number of bytes is always rounded
656 * This number is purposely set to a low number of 16384.
657 * If the dump on oops happens, it will be much appreciated
658 * to not have to wait for all that output. Anyway this can be
659 * boot time and run time configurable.
661 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
663 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
665 /* trace_types holds a link list of available tracers. */
666 static struct tracer *trace_types __read_mostly;
669 * trace_types_lock is used to protect the trace_types list.
671 DEFINE_MUTEX(trace_types_lock);
674 * serialize the access of the ring buffer
676 * ring buffer serializes readers, but it is low level protection.
677 * The validity of the events (which returns by ring_buffer_peek() ..etc)
678 * are not protected by ring buffer.
680 * The content of events may become garbage if we allow other process consumes
681 * these events concurrently:
682 * A) the page of the consumed events may become a normal page
683 * (not reader page) in ring buffer, and this page will be rewrited
684 * by events producer.
685 * B) The page of the consumed events may become a page for splice_read,
686 * and this page will be returned to system.
688 * These primitives allow multi process access to different cpu ring buffer
691 * These primitives don't distinguish read-only and read-consume access.
692 * Multi read-only access are also serialized.
696 static DECLARE_RWSEM(all_cpu_access_lock);
697 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
699 static inline void trace_access_lock(int cpu)
701 if (cpu == RING_BUFFER_ALL_CPUS) {
702 /* gain it for accessing the whole ring buffer. */
703 down_write(&all_cpu_access_lock);
705 /* gain it for accessing a cpu ring buffer. */
707 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
708 down_read(&all_cpu_access_lock);
710 /* Secondly block other access to this @cpu ring buffer. */
711 mutex_lock(&per_cpu(cpu_access_lock, cpu));
715 static inline void trace_access_unlock(int cpu)
717 if (cpu == RING_BUFFER_ALL_CPUS) {
718 up_write(&all_cpu_access_lock);
720 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
721 up_read(&all_cpu_access_lock);
725 static inline void trace_access_lock_init(void)
729 for_each_possible_cpu(cpu)
730 mutex_init(&per_cpu(cpu_access_lock, cpu));
735 static DEFINE_MUTEX(access_lock);
737 static inline void trace_access_lock(int cpu)
740 mutex_lock(&access_lock);
743 static inline void trace_access_unlock(int cpu)
746 mutex_unlock(&access_lock);
749 static inline void trace_access_lock_init(void)
755 #ifdef CONFIG_STACKTRACE
756 static void __ftrace_trace_stack(struct trace_buffer *buffer,
758 int skip, int pc, struct pt_regs *regs);
759 static inline void ftrace_trace_stack(struct trace_array *tr,
760 struct trace_buffer *buffer,
762 int skip, int pc, struct pt_regs *regs);
765 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
767 int skip, int pc, struct pt_regs *regs)
770 static inline void ftrace_trace_stack(struct trace_array *tr,
771 struct trace_buffer *buffer,
773 int skip, int pc, struct pt_regs *regs)
779 static __always_inline void
780 trace_event_setup(struct ring_buffer_event *event,
781 int type, unsigned long flags, int pc)
783 struct trace_entry *ent = ring_buffer_event_data(event);
785 tracing_generic_entry_update(ent, type, flags, pc);
788 static __always_inline struct ring_buffer_event *
789 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
792 unsigned long flags, int pc)
794 struct ring_buffer_event *event;
796 event = ring_buffer_lock_reserve(buffer, len);
798 trace_event_setup(event, type, flags, pc);
803 void tracer_tracing_on(struct trace_array *tr)
805 if (tr->array_buffer.buffer)
806 ring_buffer_record_on(tr->array_buffer.buffer);
808 * This flag is looked at when buffers haven't been allocated
809 * yet, or by some tracers (like irqsoff), that just want to
810 * know if the ring buffer has been disabled, but it can handle
811 * races of where it gets disabled but we still do a record.
812 * As the check is in the fast path of the tracers, it is more
813 * important to be fast than accurate.
815 tr->buffer_disabled = 0;
816 /* Make the flag seen by readers */
821 * tracing_on - enable tracing buffers
823 * This function enables tracing buffers that may have been
824 * disabled with tracing_off.
826 void tracing_on(void)
828 tracer_tracing_on(&global_trace);
830 EXPORT_SYMBOL_GPL(tracing_on);
833 static __always_inline void
834 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
836 __this_cpu_write(trace_taskinfo_save, true);
838 /* If this is the temp buffer, we need to commit fully */
839 if (this_cpu_read(trace_buffered_event) == event) {
840 /* Length is in event->array[0] */
841 ring_buffer_write(buffer, event->array[0], &event->array[1]);
842 /* Release the temp buffer */
843 this_cpu_dec(trace_buffered_event_cnt);
845 ring_buffer_unlock_commit(buffer, event);
849 * __trace_puts - write a constant string into the trace buffer.
850 * @ip: The address of the caller
851 * @str: The constant string to write
852 * @size: The size of the string.
854 int __trace_puts(unsigned long ip, const char *str, int size)
856 struct ring_buffer_event *event;
857 struct trace_buffer *buffer;
858 struct print_entry *entry;
859 unsigned long irq_flags;
863 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
866 pc = preempt_count();
868 if (unlikely(tracing_selftest_running || tracing_disabled))
871 alloc = sizeof(*entry) + size + 2; /* possible \n added */
873 local_save_flags(irq_flags);
874 buffer = global_trace.array_buffer.buffer;
875 ring_buffer_nest_start(buffer);
876 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
883 entry = ring_buffer_event_data(event);
886 memcpy(&entry->buf, str, size);
888 /* Add a newline if necessary */
889 if (entry->buf[size - 1] != '\n') {
890 entry->buf[size] = '\n';
891 entry->buf[size + 1] = '\0';
893 entry->buf[size] = '\0';
895 __buffer_unlock_commit(buffer, event);
896 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
898 ring_buffer_nest_end(buffer);
901 EXPORT_SYMBOL_GPL(__trace_puts);
904 * __trace_bputs - write the pointer to a constant string into trace buffer
905 * @ip: The address of the caller
906 * @str: The constant string to write to the buffer to
908 int __trace_bputs(unsigned long ip, const char *str)
910 struct ring_buffer_event *event;
911 struct trace_buffer *buffer;
912 struct bputs_entry *entry;
913 unsigned long irq_flags;
914 int size = sizeof(struct bputs_entry);
918 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
921 pc = preempt_count();
923 if (unlikely(tracing_selftest_running || tracing_disabled))
926 local_save_flags(irq_flags);
927 buffer = global_trace.array_buffer.buffer;
929 ring_buffer_nest_start(buffer);
930 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
935 entry = ring_buffer_event_data(event);
939 __buffer_unlock_commit(buffer, event);
940 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
944 ring_buffer_nest_end(buffer);
947 EXPORT_SYMBOL_GPL(__trace_bputs);
949 #ifdef CONFIG_TRACER_SNAPSHOT
950 static void tracing_snapshot_instance_cond(struct trace_array *tr,
953 struct tracer *tracer = tr->current_trace;
957 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
958 internal_trace_puts("*** snapshot is being ignored ***\n");
962 if (!tr->allocated_snapshot) {
963 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
964 internal_trace_puts("*** stopping trace here! ***\n");
969 /* Note, snapshot can not be used when the tracer uses it */
970 if (tracer->use_max_tr) {
971 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
972 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
976 local_irq_save(flags);
977 update_max_tr(tr, current, smp_processor_id(), cond_data);
978 local_irq_restore(flags);
981 void tracing_snapshot_instance(struct trace_array *tr)
983 tracing_snapshot_instance_cond(tr, NULL);
987 * tracing_snapshot - take a snapshot of the current buffer.
989 * This causes a swap between the snapshot buffer and the current live
990 * tracing buffer. You can use this to take snapshots of the live
991 * trace when some condition is triggered, but continue to trace.
993 * Note, make sure to allocate the snapshot with either
994 * a tracing_snapshot_alloc(), or by doing it manually
995 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
997 * If the snapshot buffer is not allocated, it will stop tracing.
998 * Basically making a permanent snapshot.
1000 void tracing_snapshot(void)
1002 struct trace_array *tr = &global_trace;
1004 tracing_snapshot_instance(tr);
1006 EXPORT_SYMBOL_GPL(tracing_snapshot);
1009 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1010 * @tr: The tracing instance to snapshot
1011 * @cond_data: The data to be tested conditionally, and possibly saved
1013 * This is the same as tracing_snapshot() except that the snapshot is
1014 * conditional - the snapshot will only happen if the
1015 * cond_snapshot.update() implementation receiving the cond_data
1016 * returns true, which means that the trace array's cond_snapshot
1017 * update() operation used the cond_data to determine whether the
1018 * snapshot should be taken, and if it was, presumably saved it along
1019 * with the snapshot.
1021 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1023 tracing_snapshot_instance_cond(tr, cond_data);
1025 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1028 * tracing_snapshot_cond_data - get the user data associated with a snapshot
1029 * @tr: The tracing instance
1031 * When the user enables a conditional snapshot using
1032 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1033 * with the snapshot. This accessor is used to retrieve it.
1035 * Should not be called from cond_snapshot.update(), since it takes
1036 * the tr->max_lock lock, which the code calling
1037 * cond_snapshot.update() has already done.
1039 * Returns the cond_data associated with the trace array's snapshot.
1041 void *tracing_cond_snapshot_data(struct trace_array *tr)
1043 void *cond_data = NULL;
1045 arch_spin_lock(&tr->max_lock);
1047 if (tr->cond_snapshot)
1048 cond_data = tr->cond_snapshot->cond_data;
1050 arch_spin_unlock(&tr->max_lock);
1054 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1056 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1057 struct array_buffer *size_buf, int cpu_id);
1058 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1060 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1064 if (!tr->allocated_snapshot) {
1066 /* allocate spare buffer */
1067 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1068 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1072 tr->allocated_snapshot = true;
1078 static void free_snapshot(struct trace_array *tr)
1081 * We don't free the ring buffer. instead, resize it because
1082 * The max_tr ring buffer has some state (e.g. ring->clock) and
1083 * we want preserve it.
1085 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1086 set_buffer_entries(&tr->max_buffer, 1);
1087 tracing_reset_online_cpus(&tr->max_buffer);
1088 tr->allocated_snapshot = false;
1092 * tracing_alloc_snapshot - allocate snapshot buffer.
1094 * This only allocates the snapshot buffer if it isn't already
1095 * allocated - it doesn't also take a snapshot.
1097 * This is meant to be used in cases where the snapshot buffer needs
1098 * to be set up for events that can't sleep but need to be able to
1099 * trigger a snapshot.
1101 int tracing_alloc_snapshot(void)
1103 struct trace_array *tr = &global_trace;
1106 ret = tracing_alloc_snapshot_instance(tr);
1111 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1114 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1116 * This is similar to tracing_snapshot(), but it will allocate the
1117 * snapshot buffer if it isn't already allocated. Use this only
1118 * where it is safe to sleep, as the allocation may sleep.
1120 * This causes a swap between the snapshot buffer and the current live
1121 * tracing buffer. You can use this to take snapshots of the live
1122 * trace when some condition is triggered, but continue to trace.
1124 void tracing_snapshot_alloc(void)
1128 ret = tracing_alloc_snapshot();
1134 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1137 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1138 * @tr: The tracing instance
1139 * @cond_data: User data to associate with the snapshot
1140 * @update: Implementation of the cond_snapshot update function
1142 * Check whether the conditional snapshot for the given instance has
1143 * already been enabled, or if the current tracer is already using a
1144 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1145 * save the cond_data and update function inside.
1147 * Returns 0 if successful, error otherwise.
1149 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1150 cond_update_fn_t update)
1152 struct cond_snapshot *cond_snapshot;
1155 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1159 cond_snapshot->cond_data = cond_data;
1160 cond_snapshot->update = update;
1162 mutex_lock(&trace_types_lock);
1164 ret = tracing_alloc_snapshot_instance(tr);
1168 if (tr->current_trace->use_max_tr) {
1174 * The cond_snapshot can only change to NULL without the
1175 * trace_types_lock. We don't care if we race with it going
1176 * to NULL, but we want to make sure that it's not set to
1177 * something other than NULL when we get here, which we can
1178 * do safely with only holding the trace_types_lock and not
1179 * having to take the max_lock.
1181 if (tr->cond_snapshot) {
1186 arch_spin_lock(&tr->max_lock);
1187 tr->cond_snapshot = cond_snapshot;
1188 arch_spin_unlock(&tr->max_lock);
1190 mutex_unlock(&trace_types_lock);
1195 mutex_unlock(&trace_types_lock);
1196 kfree(cond_snapshot);
1199 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1202 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1203 * @tr: The tracing instance
1205 * Check whether the conditional snapshot for the given instance is
1206 * enabled; if so, free the cond_snapshot associated with it,
1207 * otherwise return -EINVAL.
1209 * Returns 0 if successful, error otherwise.
1211 int tracing_snapshot_cond_disable(struct trace_array *tr)
1215 arch_spin_lock(&tr->max_lock);
1217 if (!tr->cond_snapshot)
1220 kfree(tr->cond_snapshot);
1221 tr->cond_snapshot = NULL;
1224 arch_spin_unlock(&tr->max_lock);
1228 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1230 void tracing_snapshot(void)
1232 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1234 EXPORT_SYMBOL_GPL(tracing_snapshot);
1235 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1237 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1239 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1240 int tracing_alloc_snapshot(void)
1242 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1245 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1246 void tracing_snapshot_alloc(void)
1251 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1252 void *tracing_cond_snapshot_data(struct trace_array *tr)
1256 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1257 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1261 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1262 int tracing_snapshot_cond_disable(struct trace_array *tr)
1266 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1267 #endif /* CONFIG_TRACER_SNAPSHOT */
1269 void tracer_tracing_off(struct trace_array *tr)
1271 if (tr->array_buffer.buffer)
1272 ring_buffer_record_off(tr->array_buffer.buffer);
1274 * This flag is looked at when buffers haven't been allocated
1275 * yet, or by some tracers (like irqsoff), that just want to
1276 * know if the ring buffer has been disabled, but it can handle
1277 * races of where it gets disabled but we still do a record.
1278 * As the check is in the fast path of the tracers, it is more
1279 * important to be fast than accurate.
1281 tr->buffer_disabled = 1;
1282 /* Make the flag seen by readers */
1287 * tracing_off - turn off tracing buffers
1289 * This function stops the tracing buffers from recording data.
1290 * It does not disable any overhead the tracers themselves may
1291 * be causing. This function simply causes all recording to
1292 * the ring buffers to fail.
1294 void tracing_off(void)
1296 tracer_tracing_off(&global_trace);
1298 EXPORT_SYMBOL_GPL(tracing_off);
1300 void disable_trace_on_warning(void)
1302 if (__disable_trace_on_warning) {
1303 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1304 "Disabling tracing due to warning\n");
1310 * tracer_tracing_is_on - show real state of ring buffer enabled
1311 * @tr : the trace array to know if ring buffer is enabled
1313 * Shows real state of the ring buffer if it is enabled or not.
1315 bool tracer_tracing_is_on(struct trace_array *tr)
1317 if (tr->array_buffer.buffer)
1318 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1319 return !tr->buffer_disabled;
1323 * tracing_is_on - show state of ring buffers enabled
1325 int tracing_is_on(void)
1327 return tracer_tracing_is_on(&global_trace);
1329 EXPORT_SYMBOL_GPL(tracing_is_on);
1331 static int __init set_buf_size(char *str)
1333 unsigned long buf_size;
1337 buf_size = memparse(str, &str);
1338 /* nr_entries can not be zero */
1341 trace_buf_size = buf_size;
1344 __setup("trace_buf_size=", set_buf_size);
1346 static int __init set_tracing_thresh(char *str)
1348 unsigned long threshold;
1353 ret = kstrtoul(str, 0, &threshold);
1356 tracing_thresh = threshold * 1000;
1359 __setup("tracing_thresh=", set_tracing_thresh);
1361 unsigned long nsecs_to_usecs(unsigned long nsecs)
1363 return nsecs / 1000;
1367 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1368 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1369 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1370 * of strings in the order that the evals (enum) were defined.
1375 /* These must match the bit postions in trace_iterator_flags */
1376 static const char *trace_options[] = {
1384 int in_ns; /* is this clock in nanoseconds? */
1385 } trace_clocks[] = {
1386 { trace_clock_local, "local", 1 },
1387 { trace_clock_global, "global", 1 },
1388 { trace_clock_counter, "counter", 0 },
1389 { trace_clock_jiffies, "uptime", 0 },
1390 { trace_clock, "perf", 1 },
1391 { ktime_get_mono_fast_ns, "mono", 1 },
1392 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1393 { ktime_get_boot_fast_ns, "boot", 1 },
1397 bool trace_clock_in_ns(struct trace_array *tr)
1399 if (trace_clocks[tr->clock_id].in_ns)
1406 * trace_parser_get_init - gets the buffer for trace parser
1408 int trace_parser_get_init(struct trace_parser *parser, int size)
1410 memset(parser, 0, sizeof(*parser));
1412 parser->buffer = kmalloc(size, GFP_KERNEL);
1413 if (!parser->buffer)
1416 parser->size = size;
1421 * trace_parser_put - frees the buffer for trace parser
1423 void trace_parser_put(struct trace_parser *parser)
1425 kfree(parser->buffer);
1426 parser->buffer = NULL;
1430 * trace_get_user - reads the user input string separated by space
1431 * (matched by isspace(ch))
1433 * For each string found the 'struct trace_parser' is updated,
1434 * and the function returns.
1436 * Returns number of bytes read.
1438 * See kernel/trace/trace.h for 'struct trace_parser' details.
1440 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1441 size_t cnt, loff_t *ppos)
1448 trace_parser_clear(parser);
1450 ret = get_user(ch, ubuf++);
1458 * The parser is not finished with the last write,
1459 * continue reading the user input without skipping spaces.
1461 if (!parser->cont) {
1462 /* skip white space */
1463 while (cnt && isspace(ch)) {
1464 ret = get_user(ch, ubuf++);
1473 /* only spaces were written */
1474 if (isspace(ch) || !ch) {
1481 /* read the non-space input */
1482 while (cnt && !isspace(ch) && ch) {
1483 if (parser->idx < parser->size - 1)
1484 parser->buffer[parser->idx++] = ch;
1489 ret = get_user(ch, ubuf++);
1496 /* We either got finished input or we have to wait for another call. */
1497 if (isspace(ch) || !ch) {
1498 parser->buffer[parser->idx] = 0;
1499 parser->cont = false;
1500 } else if (parser->idx < parser->size - 1) {
1501 parser->cont = true;
1502 parser->buffer[parser->idx++] = ch;
1503 /* Make sure the parsed string always terminates with '\0'. */
1504 parser->buffer[parser->idx] = 0;
1517 /* TODO add a seq_buf_to_buffer() */
1518 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1522 if (trace_seq_used(s) <= s->seq.readpos)
1525 len = trace_seq_used(s) - s->seq.readpos;
1528 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1530 s->seq.readpos += cnt;
1534 unsigned long __read_mostly tracing_thresh;
1535 static const struct file_operations tracing_max_lat_fops;
1537 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1538 defined(CONFIG_FSNOTIFY)
1540 static struct workqueue_struct *fsnotify_wq;
1542 static void latency_fsnotify_workfn(struct work_struct *work)
1544 struct trace_array *tr = container_of(work, struct trace_array,
1546 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1549 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1551 struct trace_array *tr = container_of(iwork, struct trace_array,
1553 queue_work(fsnotify_wq, &tr->fsnotify_work);
1556 static void trace_create_maxlat_file(struct trace_array *tr,
1557 struct dentry *d_tracer)
1559 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1560 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1561 tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1562 d_tracer, &tr->max_latency,
1563 &tracing_max_lat_fops);
1566 __init static int latency_fsnotify_init(void)
1568 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1569 WQ_UNBOUND | WQ_HIGHPRI, 0);
1571 pr_err("Unable to allocate tr_max_lat_wq\n");
1577 late_initcall_sync(latency_fsnotify_init);
1579 void latency_fsnotify(struct trace_array *tr)
1584 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1585 * possible that we are called from __schedule() or do_idle(), which
1586 * could cause a deadlock.
1588 irq_work_queue(&tr->fsnotify_irqwork);
1592 * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1593 * defined(CONFIG_FSNOTIFY)
1597 #define trace_create_maxlat_file(tr, d_tracer) \
1598 trace_create_file("tracing_max_latency", 0644, d_tracer, \
1599 &tr->max_latency, &tracing_max_lat_fops)
1603 #ifdef CONFIG_TRACER_MAX_TRACE
1605 * Copy the new maximum trace into the separate maximum-trace
1606 * structure. (this way the maximum trace is permanently saved,
1607 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1610 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1612 struct array_buffer *trace_buf = &tr->array_buffer;
1613 struct array_buffer *max_buf = &tr->max_buffer;
1614 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1615 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1618 max_buf->time_start = data->preempt_timestamp;
1620 max_data->saved_latency = tr->max_latency;
1621 max_data->critical_start = data->critical_start;
1622 max_data->critical_end = data->critical_end;
1624 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1625 max_data->pid = tsk->pid;
1627 * If tsk == current, then use current_uid(), as that does not use
1628 * RCU. The irq tracer can be called out of RCU scope.
1631 max_data->uid = current_uid();
1633 max_data->uid = task_uid(tsk);
1635 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1636 max_data->policy = tsk->policy;
1637 max_data->rt_priority = tsk->rt_priority;
1639 /* record this tasks comm */
1640 tracing_record_cmdline(tsk);
1641 latency_fsnotify(tr);
1645 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1647 * @tsk: the task with the latency
1648 * @cpu: The cpu that initiated the trace.
1649 * @cond_data: User data associated with a conditional snapshot
1651 * Flip the buffers between the @tr and the max_tr and record information
1652 * about which task was the cause of this latency.
1655 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1661 WARN_ON_ONCE(!irqs_disabled());
1663 if (!tr->allocated_snapshot) {
1664 /* Only the nop tracer should hit this when disabling */
1665 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1669 arch_spin_lock(&tr->max_lock);
1671 /* Inherit the recordable setting from array_buffer */
1672 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1673 ring_buffer_record_on(tr->max_buffer.buffer);
1675 ring_buffer_record_off(tr->max_buffer.buffer);
1677 #ifdef CONFIG_TRACER_SNAPSHOT
1678 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1681 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1683 __update_max_tr(tr, tsk, cpu);
1686 arch_spin_unlock(&tr->max_lock);
1690 * update_max_tr_single - only copy one trace over, and reset the rest
1692 * @tsk: task with the latency
1693 * @cpu: the cpu of the buffer to copy.
1695 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1698 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1705 WARN_ON_ONCE(!irqs_disabled());
1706 if (!tr->allocated_snapshot) {
1707 /* Only the nop tracer should hit this when disabling */
1708 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1712 arch_spin_lock(&tr->max_lock);
1714 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1716 if (ret == -EBUSY) {
1718 * We failed to swap the buffer due to a commit taking
1719 * place on this CPU. We fail to record, but we reset
1720 * the max trace buffer (no one writes directly to it)
1721 * and flag that it failed.
1723 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1724 "Failed to swap buffers due to commit in progress\n");
1727 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1729 __update_max_tr(tr, tsk, cpu);
1730 arch_spin_unlock(&tr->max_lock);
1732 #endif /* CONFIG_TRACER_MAX_TRACE */
1734 static int wait_on_pipe(struct trace_iterator *iter, int full)
1736 /* Iterators are static, they should be filled or empty */
1737 if (trace_buffer_iter(iter, iter->cpu_file))
1740 return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1744 #ifdef CONFIG_FTRACE_STARTUP_TEST
1745 static bool selftests_can_run;
1747 struct trace_selftests {
1748 struct list_head list;
1749 struct tracer *type;
1752 static LIST_HEAD(postponed_selftests);
1754 static int save_selftest(struct tracer *type)
1756 struct trace_selftests *selftest;
1758 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1762 selftest->type = type;
1763 list_add(&selftest->list, &postponed_selftests);
1767 static int run_tracer_selftest(struct tracer *type)
1769 struct trace_array *tr = &global_trace;
1770 struct tracer *saved_tracer = tr->current_trace;
1773 if (!type->selftest || tracing_selftest_disabled)
1777 * If a tracer registers early in boot up (before scheduling is
1778 * initialized and such), then do not run its selftests yet.
1779 * Instead, run it a little later in the boot process.
1781 if (!selftests_can_run)
1782 return save_selftest(type);
1785 * Run a selftest on this tracer.
1786 * Here we reset the trace buffer, and set the current
1787 * tracer to be this tracer. The tracer can then run some
1788 * internal tracing to verify that everything is in order.
1789 * If we fail, we do not register this tracer.
1791 tracing_reset_online_cpus(&tr->array_buffer);
1793 tr->current_trace = type;
1795 #ifdef CONFIG_TRACER_MAX_TRACE
1796 if (type->use_max_tr) {
1797 /* If we expanded the buffers, make sure the max is expanded too */
1798 if (ring_buffer_expanded)
1799 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1800 RING_BUFFER_ALL_CPUS);
1801 tr->allocated_snapshot = true;
1805 /* the test is responsible for initializing and enabling */
1806 pr_info("Testing tracer %s: ", type->name);
1807 ret = type->selftest(type, tr);
1808 /* the test is responsible for resetting too */
1809 tr->current_trace = saved_tracer;
1811 printk(KERN_CONT "FAILED!\n");
1812 /* Add the warning after printing 'FAILED' */
1816 /* Only reset on passing, to avoid touching corrupted buffers */
1817 tracing_reset_online_cpus(&tr->array_buffer);
1819 #ifdef CONFIG_TRACER_MAX_TRACE
1820 if (type->use_max_tr) {
1821 tr->allocated_snapshot = false;
1823 /* Shrink the max buffer again */
1824 if (ring_buffer_expanded)
1825 ring_buffer_resize(tr->max_buffer.buffer, 1,
1826 RING_BUFFER_ALL_CPUS);
1830 printk(KERN_CONT "PASSED\n");
1834 static __init int init_trace_selftests(void)
1836 struct trace_selftests *p, *n;
1837 struct tracer *t, **last;
1840 selftests_can_run = true;
1842 mutex_lock(&trace_types_lock);
1844 if (list_empty(&postponed_selftests))
1847 pr_info("Running postponed tracer tests:\n");
1849 tracing_selftest_running = true;
1850 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1851 /* This loop can take minutes when sanitizers are enabled, so
1852 * lets make sure we allow RCU processing.
1855 ret = run_tracer_selftest(p->type);
1856 /* If the test fails, then warn and remove from available_tracers */
1858 WARN(1, "tracer: %s failed selftest, disabling\n",
1860 last = &trace_types;
1861 for (t = trace_types; t; t = t->next) {
1872 tracing_selftest_running = false;
1875 mutex_unlock(&trace_types_lock);
1879 core_initcall(init_trace_selftests);
1881 static inline int run_tracer_selftest(struct tracer *type)
1885 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1887 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1889 static void __init apply_trace_boot_options(void);
1892 * register_tracer - register a tracer with the ftrace system.
1893 * @type: the plugin for the tracer
1895 * Register a new plugin tracer.
1897 int __init register_tracer(struct tracer *type)
1903 pr_info("Tracer must have a name\n");
1907 if (strlen(type->name) >= MAX_TRACER_SIZE) {
1908 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1912 if (security_locked_down(LOCKDOWN_TRACEFS)) {
1913 pr_warn("Can not register tracer %s due to lockdown\n",
1918 mutex_lock(&trace_types_lock);
1920 tracing_selftest_running = true;
1922 for (t = trace_types; t; t = t->next) {
1923 if (strcmp(type->name, t->name) == 0) {
1925 pr_info("Tracer %s already registered\n",
1932 if (!type->set_flag)
1933 type->set_flag = &dummy_set_flag;
1935 /*allocate a dummy tracer_flags*/
1936 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1941 type->flags->val = 0;
1942 type->flags->opts = dummy_tracer_opt;
1944 if (!type->flags->opts)
1945 type->flags->opts = dummy_tracer_opt;
1947 /* store the tracer for __set_tracer_option */
1948 type->flags->trace = type;
1950 ret = run_tracer_selftest(type);
1954 type->next = trace_types;
1956 add_tracer_options(&global_trace, type);
1959 tracing_selftest_running = false;
1960 mutex_unlock(&trace_types_lock);
1962 if (ret || !default_bootup_tracer)
1965 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1968 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1969 /* Do we want this tracer to start on bootup? */
1970 tracing_set_tracer(&global_trace, type->name);
1971 default_bootup_tracer = NULL;
1973 apply_trace_boot_options();
1975 /* disable other selftests, since this will break it. */
1976 tracing_selftest_disabled = true;
1977 #ifdef CONFIG_FTRACE_STARTUP_TEST
1978 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1986 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1988 struct trace_buffer *buffer = buf->buffer;
1993 ring_buffer_record_disable(buffer);
1995 /* Make sure all commits have finished */
1997 ring_buffer_reset_cpu(buffer, cpu);
1999 ring_buffer_record_enable(buffer);
2002 void tracing_reset_online_cpus(struct array_buffer *buf)
2004 struct trace_buffer *buffer = buf->buffer;
2009 ring_buffer_record_disable(buffer);
2011 /* Make sure all commits have finished */
2014 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2016 ring_buffer_reset_online_cpus(buffer);
2018 ring_buffer_record_enable(buffer);
2021 /* Must have trace_types_lock held */
2022 void tracing_reset_all_online_cpus(void)
2024 struct trace_array *tr;
2026 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2027 if (!tr->clear_trace)
2029 tr->clear_trace = false;
2030 tracing_reset_online_cpus(&tr->array_buffer);
2031 #ifdef CONFIG_TRACER_MAX_TRACE
2032 tracing_reset_online_cpus(&tr->max_buffer);
2037 static int *tgid_map;
2039 #define SAVED_CMDLINES_DEFAULT 128
2040 #define NO_CMDLINE_MAP UINT_MAX
2041 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2042 struct saved_cmdlines_buffer {
2043 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2044 unsigned *map_cmdline_to_pid;
2045 unsigned cmdline_num;
2047 char *saved_cmdlines;
2049 static struct saved_cmdlines_buffer *savedcmd;
2051 /* temporary disable recording */
2052 static atomic_t trace_record_taskinfo_disabled __read_mostly;
2054 static inline char *get_saved_cmdlines(int idx)
2056 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2059 static inline void set_cmdline(int idx, const char *cmdline)
2061 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2064 static int allocate_cmdlines_buffer(unsigned int val,
2065 struct saved_cmdlines_buffer *s)
2067 s->map_cmdline_to_pid = kmalloc_array(val,
2068 sizeof(*s->map_cmdline_to_pid),
2070 if (!s->map_cmdline_to_pid)
2073 s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2074 if (!s->saved_cmdlines) {
2075 kfree(s->map_cmdline_to_pid);
2080 s->cmdline_num = val;
2081 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2082 sizeof(s->map_pid_to_cmdline));
2083 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2084 val * sizeof(*s->map_cmdline_to_pid));
2089 static int trace_create_savedcmd(void)
2093 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2097 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2107 int is_tracing_stopped(void)
2109 return global_trace.stop_count;
2113 * tracing_start - quick start of the tracer
2115 * If tracing is enabled but was stopped by tracing_stop,
2116 * this will start the tracer back up.
2118 void tracing_start(void)
2120 struct trace_buffer *buffer;
2121 unsigned long flags;
2123 if (tracing_disabled)
2126 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2127 if (--global_trace.stop_count) {
2128 if (global_trace.stop_count < 0) {
2129 /* Someone screwed up their debugging */
2131 global_trace.stop_count = 0;
2136 /* Prevent the buffers from switching */
2137 arch_spin_lock(&global_trace.max_lock);
2139 buffer = global_trace.array_buffer.buffer;
2141 ring_buffer_record_enable(buffer);
2143 #ifdef CONFIG_TRACER_MAX_TRACE
2144 buffer = global_trace.max_buffer.buffer;
2146 ring_buffer_record_enable(buffer);
2149 arch_spin_unlock(&global_trace.max_lock);
2152 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2155 static void tracing_start_tr(struct trace_array *tr)
2157 struct trace_buffer *buffer;
2158 unsigned long flags;
2160 if (tracing_disabled)
2163 /* If global, we need to also start the max tracer */
2164 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2165 return tracing_start();
2167 raw_spin_lock_irqsave(&tr->start_lock, flags);
2169 if (--tr->stop_count) {
2170 if (tr->stop_count < 0) {
2171 /* Someone screwed up their debugging */
2178 buffer = tr->array_buffer.buffer;
2180 ring_buffer_record_enable(buffer);
2183 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2187 * tracing_stop - quick stop of the tracer
2189 * Light weight way to stop tracing. Use in conjunction with
2192 void tracing_stop(void)
2194 struct trace_buffer *buffer;
2195 unsigned long flags;
2197 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2198 if (global_trace.stop_count++)
2201 /* Prevent the buffers from switching */
2202 arch_spin_lock(&global_trace.max_lock);
2204 buffer = global_trace.array_buffer.buffer;
2206 ring_buffer_record_disable(buffer);
2208 #ifdef CONFIG_TRACER_MAX_TRACE
2209 buffer = global_trace.max_buffer.buffer;
2211 ring_buffer_record_disable(buffer);
2214 arch_spin_unlock(&global_trace.max_lock);
2217 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2220 static void tracing_stop_tr(struct trace_array *tr)
2222 struct trace_buffer *buffer;
2223 unsigned long flags;
2225 /* If global, we need to also stop the max tracer */
2226 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2227 return tracing_stop();
2229 raw_spin_lock_irqsave(&tr->start_lock, flags);
2230 if (tr->stop_count++)
2233 buffer = tr->array_buffer.buffer;
2235 ring_buffer_record_disable(buffer);
2238 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2241 static int trace_save_cmdline(struct task_struct *tsk)
2245 /* treat recording of idle task as a success */
2249 if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2253 * It's not the end of the world if we don't get
2254 * the lock, but we also don't want to spin
2255 * nor do we want to disable interrupts,
2256 * so if we miss here, then better luck next time.
2258 if (!arch_spin_trylock(&trace_cmdline_lock))
2261 idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2262 if (idx == NO_CMDLINE_MAP) {
2263 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2266 * Check whether the cmdline buffer at idx has a pid
2267 * mapped. We are going to overwrite that entry so we
2268 * need to clear the map_pid_to_cmdline. Otherwise we
2269 * would read the new comm for the old pid.
2271 pid = savedcmd->map_cmdline_to_pid[idx];
2272 if (pid != NO_CMDLINE_MAP)
2273 savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2275 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2276 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2278 savedcmd->cmdline_idx = idx;
2281 set_cmdline(idx, tsk->comm);
2283 arch_spin_unlock(&trace_cmdline_lock);
2288 static void __trace_find_cmdline(int pid, char comm[])
2293 strcpy(comm, "<idle>");
2297 if (WARN_ON_ONCE(pid < 0)) {
2298 strcpy(comm, "<XXX>");
2302 if (pid > PID_MAX_DEFAULT) {
2303 strcpy(comm, "<...>");
2307 map = savedcmd->map_pid_to_cmdline[pid];
2308 if (map != NO_CMDLINE_MAP)
2309 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2311 strcpy(comm, "<...>");
2314 void trace_find_cmdline(int pid, char comm[])
2317 arch_spin_lock(&trace_cmdline_lock);
2319 __trace_find_cmdline(pid, comm);
2321 arch_spin_unlock(&trace_cmdline_lock);
2325 int trace_find_tgid(int pid)
2327 if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2330 return tgid_map[pid];
2333 static int trace_save_tgid(struct task_struct *tsk)
2335 /* treat recording of idle task as a success */
2339 if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2342 tgid_map[tsk->pid] = tsk->tgid;
2346 static bool tracing_record_taskinfo_skip(int flags)
2348 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2350 if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2352 if (!__this_cpu_read(trace_taskinfo_save))
2358 * tracing_record_taskinfo - record the task info of a task
2360 * @task: task to record
2361 * @flags: TRACE_RECORD_CMDLINE for recording comm
2362 * TRACE_RECORD_TGID for recording tgid
2364 void tracing_record_taskinfo(struct task_struct *task, int flags)
2368 if (tracing_record_taskinfo_skip(flags))
2372 * Record as much task information as possible. If some fail, continue
2373 * to try to record the others.
2375 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2376 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2378 /* If recording any information failed, retry again soon. */
2382 __this_cpu_write(trace_taskinfo_save, false);
2386 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2388 * @prev: previous task during sched_switch
2389 * @next: next task during sched_switch
2390 * @flags: TRACE_RECORD_CMDLINE for recording comm
2391 * TRACE_RECORD_TGID for recording tgid
2393 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2394 struct task_struct *next, int flags)
2398 if (tracing_record_taskinfo_skip(flags))
2402 * Record as much task information as possible. If some fail, continue
2403 * to try to record the others.
2405 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2406 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2407 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2408 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2410 /* If recording any information failed, retry again soon. */
2414 __this_cpu_write(trace_taskinfo_save, false);
2417 /* Helpers to record a specific task information */
2418 void tracing_record_cmdline(struct task_struct *task)
2420 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2423 void tracing_record_tgid(struct task_struct *task)
2425 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2429 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2430 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2431 * simplifies those functions and keeps them in sync.
2433 enum print_line_t trace_handle_return(struct trace_seq *s)
2435 return trace_seq_has_overflowed(s) ?
2436 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2438 EXPORT_SYMBOL_GPL(trace_handle_return);
2441 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2442 unsigned long flags, int pc)
2444 struct task_struct *tsk = current;
2446 entry->preempt_count = pc & 0xff;
2447 entry->pid = (tsk) ? tsk->pid : 0;
2450 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2451 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2453 TRACE_FLAG_IRQS_NOSUPPORT |
2455 ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
2456 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2457 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2458 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2459 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2461 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2463 struct ring_buffer_event *
2464 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2467 unsigned long flags, int pc)
2469 return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2472 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2473 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2474 static int trace_buffered_event_ref;
2477 * trace_buffered_event_enable - enable buffering events
2479 * When events are being filtered, it is quicker to use a temporary
2480 * buffer to write the event data into if there's a likely chance
2481 * that it will not be committed. The discard of the ring buffer
2482 * is not as fast as committing, and is much slower than copying
2485 * When an event is to be filtered, allocate per cpu buffers to
2486 * write the event data into, and if the event is filtered and discarded
2487 * it is simply dropped, otherwise, the entire data is to be committed
2490 void trace_buffered_event_enable(void)
2492 struct ring_buffer_event *event;
2496 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2498 if (trace_buffered_event_ref++)
2501 for_each_tracing_cpu(cpu) {
2502 page = alloc_pages_node(cpu_to_node(cpu),
2503 GFP_KERNEL | __GFP_NORETRY, 0);
2507 event = page_address(page);
2508 memset(event, 0, sizeof(*event));
2510 per_cpu(trace_buffered_event, cpu) = event;
2513 if (cpu == smp_processor_id() &&
2514 __this_cpu_read(trace_buffered_event) !=
2515 per_cpu(trace_buffered_event, cpu))
2522 trace_buffered_event_disable();
2525 static void enable_trace_buffered_event(void *data)
2527 /* Probably not needed, but do it anyway */
2529 this_cpu_dec(trace_buffered_event_cnt);
2532 static void disable_trace_buffered_event(void *data)
2534 this_cpu_inc(trace_buffered_event_cnt);
2538 * trace_buffered_event_disable - disable buffering events
2540 * When a filter is removed, it is faster to not use the buffered
2541 * events, and to commit directly into the ring buffer. Free up
2542 * the temp buffers when there are no more users. This requires
2543 * special synchronization with current events.
2545 void trace_buffered_event_disable(void)
2549 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2551 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2554 if (--trace_buffered_event_ref)
2558 /* For each CPU, set the buffer as used. */
2559 smp_call_function_many(tracing_buffer_mask,
2560 disable_trace_buffered_event, NULL, 1);
2563 /* Wait for all current users to finish */
2566 for_each_tracing_cpu(cpu) {
2567 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2568 per_cpu(trace_buffered_event, cpu) = NULL;
2571 * Make sure trace_buffered_event is NULL before clearing
2572 * trace_buffered_event_cnt.
2577 /* Do the work on each cpu */
2578 smp_call_function_many(tracing_buffer_mask,
2579 enable_trace_buffered_event, NULL, 1);
2583 static struct trace_buffer *temp_buffer;
2585 struct ring_buffer_event *
2586 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2587 struct trace_event_file *trace_file,
2588 int type, unsigned long len,
2589 unsigned long flags, int pc)
2591 struct ring_buffer_event *entry;
2594 *current_rb = trace_file->tr->array_buffer.buffer;
2596 if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2597 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2598 (entry = this_cpu_read(trace_buffered_event))) {
2599 /* Try to use the per cpu buffer first */
2600 val = this_cpu_inc_return(trace_buffered_event_cnt);
2602 trace_event_setup(entry, type, flags, pc);
2603 entry->array[0] = len;
2606 this_cpu_dec(trace_buffered_event_cnt);
2609 entry = __trace_buffer_lock_reserve(*current_rb,
2610 type, len, flags, pc);
2612 * If tracing is off, but we have triggers enabled
2613 * we still need to look at the event data. Use the temp_buffer
2614 * to store the trace event for the tigger to use. It's recusive
2615 * safe and will not be recorded anywhere.
2617 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2618 *current_rb = temp_buffer;
2619 entry = __trace_buffer_lock_reserve(*current_rb,
2620 type, len, flags, pc);
2624 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2626 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2627 static DEFINE_MUTEX(tracepoint_printk_mutex);
2629 static void output_printk(struct trace_event_buffer *fbuffer)
2631 struct trace_event_call *event_call;
2632 struct trace_event_file *file;
2633 struct trace_event *event;
2634 unsigned long flags;
2635 struct trace_iterator *iter = tracepoint_print_iter;
2637 /* We should never get here if iter is NULL */
2638 if (WARN_ON_ONCE(!iter))
2641 event_call = fbuffer->trace_file->event_call;
2642 if (!event_call || !event_call->event.funcs ||
2643 !event_call->event.funcs->trace)
2646 file = fbuffer->trace_file;
2647 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2648 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2649 !filter_match_preds(file->filter, fbuffer->entry)))
2652 event = &fbuffer->trace_file->event_call->event;
2654 spin_lock_irqsave(&tracepoint_iter_lock, flags);
2655 trace_seq_init(&iter->seq);
2656 iter->ent = fbuffer->entry;
2657 event_call->event.funcs->trace(iter, 0, event);
2658 trace_seq_putc(&iter->seq, 0);
2659 printk("%s", iter->seq.buffer);
2661 spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2664 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2665 void *buffer, size_t *lenp,
2668 int save_tracepoint_printk;
2671 mutex_lock(&tracepoint_printk_mutex);
2672 save_tracepoint_printk = tracepoint_printk;
2674 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2677 * This will force exiting early, as tracepoint_printk
2678 * is always zero when tracepoint_printk_iter is not allocated
2680 if (!tracepoint_print_iter)
2681 tracepoint_printk = 0;
2683 if (save_tracepoint_printk == tracepoint_printk)
2686 if (tracepoint_printk)
2687 static_key_enable(&tracepoint_printk_key.key);
2689 static_key_disable(&tracepoint_printk_key.key);
2692 mutex_unlock(&tracepoint_printk_mutex);
2697 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2699 if (static_key_false(&tracepoint_printk_key.key))
2700 output_printk(fbuffer);
2702 event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2703 fbuffer->event, fbuffer->entry,
2704 fbuffer->flags, fbuffer->pc, fbuffer->regs);
2706 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2711 * trace_buffer_unlock_commit_regs()
2712 * trace_event_buffer_commit()
2713 * trace_event_raw_event_xxx()
2715 # define STACK_SKIP 3
2717 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2718 struct trace_buffer *buffer,
2719 struct ring_buffer_event *event,
2720 unsigned long flags, int pc,
2721 struct pt_regs *regs)
2723 __buffer_unlock_commit(buffer, event);
2726 * If regs is not set, then skip the necessary functions.
2727 * Note, we can still get here via blktrace, wakeup tracer
2728 * and mmiotrace, but that's ok if they lose a function or
2729 * two. They are not that meaningful.
2731 ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2732 ftrace_trace_userstack(buffer, flags, pc);
2736 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2739 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2740 struct ring_buffer_event *event)
2742 __buffer_unlock_commit(buffer, event);
2746 trace_process_export(struct trace_export *export,
2747 struct ring_buffer_event *event)
2749 struct trace_entry *entry;
2750 unsigned int size = 0;
2752 entry = ring_buffer_event_data(event);
2753 size = ring_buffer_event_length(event);
2754 export->write(export, entry, size);
2757 static DEFINE_MUTEX(ftrace_export_lock);
2759 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2761 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2763 static inline void ftrace_exports_enable(void)
2765 static_branch_enable(&ftrace_exports_enabled);
2768 static inline void ftrace_exports_disable(void)
2770 static_branch_disable(&ftrace_exports_enabled);
2773 static void ftrace_exports(struct ring_buffer_event *event)
2775 struct trace_export *export;
2777 preempt_disable_notrace();
2779 export = rcu_dereference_raw_check(ftrace_exports_list);
2781 trace_process_export(export, event);
2782 export = rcu_dereference_raw_check(export->next);
2785 preempt_enable_notrace();
2789 add_trace_export(struct trace_export **list, struct trace_export *export)
2791 rcu_assign_pointer(export->next, *list);
2793 * We are entering export into the list but another
2794 * CPU might be walking that list. We need to make sure
2795 * the export->next pointer is valid before another CPU sees
2796 * the export pointer included into the list.
2798 rcu_assign_pointer(*list, export);
2802 rm_trace_export(struct trace_export **list, struct trace_export *export)
2804 struct trace_export **p;
2806 for (p = list; *p != NULL; p = &(*p)->next)
2813 rcu_assign_pointer(*p, (*p)->next);
2819 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2822 ftrace_exports_enable();
2824 add_trace_export(list, export);
2828 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2832 ret = rm_trace_export(list, export);
2834 ftrace_exports_disable();
2839 int register_ftrace_export(struct trace_export *export)
2841 if (WARN_ON_ONCE(!export->write))
2844 mutex_lock(&ftrace_export_lock);
2846 add_ftrace_export(&ftrace_exports_list, export);
2848 mutex_unlock(&ftrace_export_lock);
2852 EXPORT_SYMBOL_GPL(register_ftrace_export);
2854 int unregister_ftrace_export(struct trace_export *export)
2858 mutex_lock(&ftrace_export_lock);
2860 ret = rm_ftrace_export(&ftrace_exports_list, export);
2862 mutex_unlock(&ftrace_export_lock);
2866 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2869 trace_function(struct trace_array *tr,
2870 unsigned long ip, unsigned long parent_ip, unsigned long flags,
2873 struct trace_event_call *call = &event_function;
2874 struct trace_buffer *buffer = tr->array_buffer.buffer;
2875 struct ring_buffer_event *event;
2876 struct ftrace_entry *entry;
2878 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2882 entry = ring_buffer_event_data(event);
2884 entry->parent_ip = parent_ip;
2886 if (!call_filter_check_discard(call, entry, buffer, event)) {
2887 if (static_branch_unlikely(&ftrace_exports_enabled))
2888 ftrace_exports(event);
2889 __buffer_unlock_commit(buffer, event);
2893 #ifdef CONFIG_STACKTRACE
2895 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2896 #define FTRACE_KSTACK_NESTING 4
2898 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2900 struct ftrace_stack {
2901 unsigned long calls[FTRACE_KSTACK_ENTRIES];
2905 struct ftrace_stacks {
2906 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
2909 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2910 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2912 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2913 unsigned long flags,
2914 int skip, int pc, struct pt_regs *regs)
2916 struct trace_event_call *call = &event_kernel_stack;
2917 struct ring_buffer_event *event;
2918 unsigned int size, nr_entries;
2919 struct ftrace_stack *fstack;
2920 struct stack_entry *entry;
2924 * Add one, for this function and the call to save_stack_trace()
2925 * If regs is set, then these functions will not be in the way.
2927 #ifndef CONFIG_UNWINDER_ORC
2932 preempt_disable_notrace();
2934 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2936 /* This should never happen. If it does, yell once and skip */
2937 if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2941 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2942 * interrupt will either see the value pre increment or post
2943 * increment. If the interrupt happens pre increment it will have
2944 * restored the counter when it returns. We just need a barrier to
2945 * keep gcc from moving things around.
2949 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2950 size = ARRAY_SIZE(fstack->calls);
2953 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2956 nr_entries = stack_trace_save(fstack->calls, size, skip);
2959 size = nr_entries * sizeof(unsigned long);
2960 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2961 sizeof(*entry) + size, flags, pc);
2964 entry = ring_buffer_event_data(event);
2966 memcpy(&entry->caller, fstack->calls, size);
2967 entry->size = nr_entries;
2969 if (!call_filter_check_discard(call, entry, buffer, event))
2970 __buffer_unlock_commit(buffer, event);
2973 /* Again, don't let gcc optimize things here */
2975 __this_cpu_dec(ftrace_stack_reserve);
2976 preempt_enable_notrace();
2980 static inline void ftrace_trace_stack(struct trace_array *tr,
2981 struct trace_buffer *buffer,
2982 unsigned long flags,
2983 int skip, int pc, struct pt_regs *regs)
2985 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2988 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2991 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2994 struct trace_buffer *buffer = tr->array_buffer.buffer;
2996 if (rcu_is_watching()) {
2997 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3002 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3003 * but if the above rcu_is_watching() failed, then the NMI
3004 * triggered someplace critical, and rcu_irq_enter() should
3005 * not be called from NMI.
3007 if (unlikely(in_nmi()))
3010 rcu_irq_enter_irqson();
3011 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3012 rcu_irq_exit_irqson();
3016 * trace_dump_stack - record a stack back trace in the trace buffer
3017 * @skip: Number of functions to skip (helper handlers)
3019 void trace_dump_stack(int skip)
3021 unsigned long flags;
3023 if (tracing_disabled || tracing_selftest_running)
3026 local_save_flags(flags);
3028 #ifndef CONFIG_UNWINDER_ORC
3029 /* Skip 1 to skip this function. */
3032 __ftrace_trace_stack(global_trace.array_buffer.buffer,
3033 flags, skip, preempt_count(), NULL);
3035 EXPORT_SYMBOL_GPL(trace_dump_stack);
3037 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3038 static DEFINE_PER_CPU(int, user_stack_count);
3041 ftrace_trace_userstack(struct trace_buffer *buffer, unsigned long flags, int pc)
3043 struct trace_event_call *call = &event_user_stack;
3044 struct ring_buffer_event *event;
3045 struct userstack_entry *entry;
3047 if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
3051 * NMIs can not handle page faults, even with fix ups.
3052 * The save user stack can (and often does) fault.
3054 if (unlikely(in_nmi()))
3058 * prevent recursion, since the user stack tracing may
3059 * trigger other kernel events.
3062 if (__this_cpu_read(user_stack_count))
3065 __this_cpu_inc(user_stack_count);
3067 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3068 sizeof(*entry), flags, pc);
3070 goto out_drop_count;
3071 entry = ring_buffer_event_data(event);
3073 entry->tgid = current->tgid;
3074 memset(&entry->caller, 0, sizeof(entry->caller));
3076 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3077 if (!call_filter_check_discard(call, entry, buffer, event))
3078 __buffer_unlock_commit(buffer, event);
3081 __this_cpu_dec(user_stack_count);
3085 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3086 static void ftrace_trace_userstack(struct trace_buffer *buffer,
3087 unsigned long flags, int pc)
3090 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3092 #endif /* CONFIG_STACKTRACE */
3094 /* created for use with alloc_percpu */
3095 struct trace_buffer_struct {
3097 char buffer[4][TRACE_BUF_SIZE];
3100 static struct trace_buffer_struct *trace_percpu_buffer;
3103 * Thise allows for lockless recording. If we're nested too deeply, then
3104 * this returns NULL.
3106 static char *get_trace_buf(void)
3108 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3110 if (!buffer || buffer->nesting >= 4)
3115 /* Interrupts must see nesting incremented before we use the buffer */
3117 return &buffer->buffer[buffer->nesting][0];
3120 static void put_trace_buf(void)
3122 /* Don't let the decrement of nesting leak before this */
3124 this_cpu_dec(trace_percpu_buffer->nesting);
3127 static int alloc_percpu_trace_buffer(void)
3129 struct trace_buffer_struct *buffers;
3131 if (trace_percpu_buffer)
3134 buffers = alloc_percpu(struct trace_buffer_struct);
3135 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3138 trace_percpu_buffer = buffers;
3142 static int buffers_allocated;
3144 void trace_printk_init_buffers(void)
3146 if (buffers_allocated)
3149 if (alloc_percpu_trace_buffer())
3152 /* trace_printk() is for debug use only. Don't use it in production. */
3155 pr_warn("**********************************************************\n");
3156 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3158 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3160 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3161 pr_warn("** unsafe for production use. **\n");
3163 pr_warn("** If you see this message and you are not debugging **\n");
3164 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3166 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3167 pr_warn("**********************************************************\n");
3169 /* Expand the buffers to set size */
3170 tracing_update_buffers();
3172 buffers_allocated = 1;
3175 * trace_printk_init_buffers() can be called by modules.
3176 * If that happens, then we need to start cmdline recording
3177 * directly here. If the global_trace.buffer is already
3178 * allocated here, then this was called by module code.
3180 if (global_trace.array_buffer.buffer)
3181 tracing_start_cmdline_record();
3183 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3185 void trace_printk_start_comm(void)
3187 /* Start tracing comms if trace printk is set */
3188 if (!buffers_allocated)
3190 tracing_start_cmdline_record();
3193 static void trace_printk_start_stop_comm(int enabled)
3195 if (!buffers_allocated)
3199 tracing_start_cmdline_record();
3201 tracing_stop_cmdline_record();
3205 * trace_vbprintk - write binary msg to tracing buffer
3206 * @ip: The address of the caller
3207 * @fmt: The string format to write to the buffer
3208 * @args: Arguments for @fmt
3210 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3212 struct trace_event_call *call = &event_bprint;
3213 struct ring_buffer_event *event;
3214 struct trace_buffer *buffer;
3215 struct trace_array *tr = &global_trace;
3216 struct bprint_entry *entry;
3217 unsigned long flags;
3219 int len = 0, size, pc;
3221 if (unlikely(tracing_selftest_running || tracing_disabled))
3224 /* Don't pollute graph traces with trace_vprintk internals */
3225 pause_graph_tracing();
3227 pc = preempt_count();
3228 preempt_disable_notrace();
3230 tbuffer = get_trace_buf();
3236 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3238 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3241 local_save_flags(flags);
3242 size = sizeof(*entry) + sizeof(u32) * len;
3243 buffer = tr->array_buffer.buffer;
3244 ring_buffer_nest_start(buffer);
3245 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3249 entry = ring_buffer_event_data(event);
3253 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3254 if (!call_filter_check_discard(call, entry, buffer, event)) {
3255 __buffer_unlock_commit(buffer, event);
3256 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3260 ring_buffer_nest_end(buffer);
3265 preempt_enable_notrace();
3266 unpause_graph_tracing();
3270 EXPORT_SYMBOL_GPL(trace_vbprintk);
3274 __trace_array_vprintk(struct trace_buffer *buffer,
3275 unsigned long ip, const char *fmt, va_list args)
3277 struct trace_event_call *call = &event_print;
3278 struct ring_buffer_event *event;
3279 int len = 0, size, pc;
3280 struct print_entry *entry;
3281 unsigned long flags;
3284 if (tracing_disabled || tracing_selftest_running)
3287 /* Don't pollute graph traces with trace_vprintk internals */
3288 pause_graph_tracing();
3290 pc = preempt_count();
3291 preempt_disable_notrace();
3294 tbuffer = get_trace_buf();
3300 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3302 local_save_flags(flags);
3303 size = sizeof(*entry) + len + 1;
3304 ring_buffer_nest_start(buffer);
3305 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3309 entry = ring_buffer_event_data(event);
3312 memcpy(&entry->buf, tbuffer, len + 1);
3313 if (!call_filter_check_discard(call, entry, buffer, event)) {
3314 __buffer_unlock_commit(buffer, event);
3315 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3319 ring_buffer_nest_end(buffer);
3323 preempt_enable_notrace();
3324 unpause_graph_tracing();
3330 int trace_array_vprintk(struct trace_array *tr,
3331 unsigned long ip, const char *fmt, va_list args)
3333 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3337 * trace_array_printk - Print a message to a specific instance
3338 * @tr: The instance trace_array descriptor
3339 * @ip: The instruction pointer that this is called from.
3340 * @fmt: The format to print (printf format)
3342 * If a subsystem sets up its own instance, they have the right to
3343 * printk strings into their tracing instance buffer using this
3344 * function. Note, this function will not write into the top level
3345 * buffer (use trace_printk() for that), as writing into the top level
3346 * buffer should only have events that can be individually disabled.
3347 * trace_printk() is only used for debugging a kernel, and should not
3348 * be ever encorporated in normal use.
3350 * trace_array_printk() can be used, as it will not add noise to the
3351 * top level tracing buffer.
3353 * Note, trace_array_init_printk() must be called on @tr before this
3357 int trace_array_printk(struct trace_array *tr,
3358 unsigned long ip, const char *fmt, ...)
3366 /* This is only allowed for created instances */
3367 if (tr == &global_trace)
3370 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3374 ret = trace_array_vprintk(tr, ip, fmt, ap);
3378 EXPORT_SYMBOL_GPL(trace_array_printk);
3381 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3382 * @tr: The trace array to initialize the buffers for
3384 * As trace_array_printk() only writes into instances, they are OK to
3385 * have in the kernel (unlike trace_printk()). This needs to be called
3386 * before trace_array_printk() can be used on a trace_array.
3388 int trace_array_init_printk(struct trace_array *tr)
3393 /* This is only allowed for created instances */
3394 if (tr == &global_trace)
3397 return alloc_percpu_trace_buffer();
3399 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3402 int trace_array_printk_buf(struct trace_buffer *buffer,
3403 unsigned long ip, const char *fmt, ...)
3408 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3412 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3418 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3420 return trace_array_vprintk(&global_trace, ip, fmt, args);
3422 EXPORT_SYMBOL_GPL(trace_vprintk);
3424 static void trace_iterator_increment(struct trace_iterator *iter)
3426 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3430 ring_buffer_iter_advance(buf_iter);
3433 static struct trace_entry *
3434 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3435 unsigned long *lost_events)
3437 struct ring_buffer_event *event;
3438 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3441 event = ring_buffer_iter_peek(buf_iter, ts);
3443 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3444 (unsigned long)-1 : 0;
3446 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3451 iter->ent_size = ring_buffer_event_length(event);
3452 return ring_buffer_event_data(event);
3458 static struct trace_entry *
3459 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3460 unsigned long *missing_events, u64 *ent_ts)
3462 struct trace_buffer *buffer = iter->array_buffer->buffer;
3463 struct trace_entry *ent, *next = NULL;
3464 unsigned long lost_events = 0, next_lost = 0;
3465 int cpu_file = iter->cpu_file;
3466 u64 next_ts = 0, ts;
3472 * If we are in a per_cpu trace file, don't bother by iterating over
3473 * all cpu and peek directly.
3475 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3476 if (ring_buffer_empty_cpu(buffer, cpu_file))
3478 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3480 *ent_cpu = cpu_file;
3485 for_each_tracing_cpu(cpu) {
3487 if (ring_buffer_empty_cpu(buffer, cpu))
3490 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3493 * Pick the entry with the smallest timestamp:
3495 if (ent && (!next || ts < next_ts)) {
3499 next_lost = lost_events;
3500 next_size = iter->ent_size;
3504 iter->ent_size = next_size;
3507 *ent_cpu = next_cpu;
3513 *missing_events = next_lost;
3518 #define STATIC_TEMP_BUF_SIZE 128
3519 static char static_temp_buf[STATIC_TEMP_BUF_SIZE];
3521 /* Find the next real entry, without updating the iterator itself */
3522 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3523 int *ent_cpu, u64 *ent_ts)
3525 /* __find_next_entry will reset ent_size */
3526 int ent_size = iter->ent_size;
3527 struct trace_entry *entry;
3530 * If called from ftrace_dump(), then the iter->temp buffer
3531 * will be the static_temp_buf and not created from kmalloc.
3532 * If the entry size is greater than the buffer, we can
3533 * not save it. Just return NULL in that case. This is only
3534 * used to add markers when two consecutive events' time
3535 * stamps have a large delta. See trace_print_lat_context()
3537 if (iter->temp == static_temp_buf &&
3538 STATIC_TEMP_BUF_SIZE < ent_size)
3542 * The __find_next_entry() may call peek_next_entry(), which may
3543 * call ring_buffer_peek() that may make the contents of iter->ent
3544 * undefined. Need to copy iter->ent now.
3546 if (iter->ent && iter->ent != iter->temp) {
3547 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3548 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3550 iter->temp = kmalloc(iter->ent_size, GFP_KERNEL);
3554 memcpy(iter->temp, iter->ent, iter->ent_size);
3555 iter->temp_size = iter->ent_size;
3556 iter->ent = iter->temp;
3558 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3559 /* Put back the original ent_size */
3560 iter->ent_size = ent_size;
3565 /* Find the next real entry, and increment the iterator to the next entry */
3566 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3568 iter->ent = __find_next_entry(iter, &iter->cpu,
3569 &iter->lost_events, &iter->ts);
3572 trace_iterator_increment(iter);
3574 return iter->ent ? iter : NULL;
3577 static void trace_consume(struct trace_iterator *iter)
3579 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3580 &iter->lost_events);
3583 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3585 struct trace_iterator *iter = m->private;
3589 WARN_ON_ONCE(iter->leftover);
3593 /* can't go backwards */
3598 ent = trace_find_next_entry_inc(iter);
3602 while (ent && iter->idx < i)
3603 ent = trace_find_next_entry_inc(iter);
3610 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3612 struct ring_buffer_iter *buf_iter;
3613 unsigned long entries = 0;
3616 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3618 buf_iter = trace_buffer_iter(iter, cpu);
3622 ring_buffer_iter_reset(buf_iter);
3625 * We could have the case with the max latency tracers
3626 * that a reset never took place on a cpu. This is evident
3627 * by the timestamp being before the start of the buffer.
3629 while (ring_buffer_iter_peek(buf_iter, &ts)) {
3630 if (ts >= iter->array_buffer->time_start)
3633 ring_buffer_iter_advance(buf_iter);
3636 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3640 * The current tracer is copied to avoid a global locking
3643 static void *s_start(struct seq_file *m, loff_t *pos)
3645 struct trace_iterator *iter = m->private;
3646 struct trace_array *tr = iter->tr;
3647 int cpu_file = iter->cpu_file;
3653 * copy the tracer to avoid using a global lock all around.
3654 * iter->trace is a copy of current_trace, the pointer to the
3655 * name may be used instead of a strcmp(), as iter->trace->name
3656 * will point to the same string as current_trace->name.
3658 mutex_lock(&trace_types_lock);
3659 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3660 *iter->trace = *tr->current_trace;
3661 mutex_unlock(&trace_types_lock);
3663 #ifdef CONFIG_TRACER_MAX_TRACE
3664 if (iter->snapshot && iter->trace->use_max_tr)
3665 return ERR_PTR(-EBUSY);
3668 if (!iter->snapshot)
3669 atomic_inc(&trace_record_taskinfo_disabled);
3671 if (*pos != iter->pos) {
3676 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3677 for_each_tracing_cpu(cpu)
3678 tracing_iter_reset(iter, cpu);
3680 tracing_iter_reset(iter, cpu_file);
3683 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3688 * If we overflowed the seq_file before, then we want
3689 * to just reuse the trace_seq buffer again.
3695 p = s_next(m, p, &l);
3699 trace_event_read_lock();
3700 trace_access_lock(cpu_file);
3704 static void s_stop(struct seq_file *m, void *p)
3706 struct trace_iterator *iter = m->private;
3708 #ifdef CONFIG_TRACER_MAX_TRACE
3709 if (iter->snapshot && iter->trace->use_max_tr)
3713 if (!iter->snapshot)
3714 atomic_dec(&trace_record_taskinfo_disabled);
3716 trace_access_unlock(iter->cpu_file);
3717 trace_event_read_unlock();
3721 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3722 unsigned long *entries, int cpu)
3724 unsigned long count;
3726 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3728 * If this buffer has skipped entries, then we hold all
3729 * entries for the trace and we need to ignore the
3730 * ones before the time stamp.
3732 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3733 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3734 /* total is the same as the entries */
3738 ring_buffer_overrun_cpu(buf->buffer, cpu);
3743 get_total_entries(struct array_buffer *buf,
3744 unsigned long *total, unsigned long *entries)
3752 for_each_tracing_cpu(cpu) {
3753 get_total_entries_cpu(buf, &t, &e, cpu);
3759 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3761 unsigned long total, entries;
3766 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3771 unsigned long trace_total_entries(struct trace_array *tr)
3773 unsigned long total, entries;
3778 get_total_entries(&tr->array_buffer, &total, &entries);
3783 static void print_lat_help_header(struct seq_file *m)
3785 seq_puts(m, "# _------=> CPU# \n"
3786 "# / _-----=> irqs-off \n"
3787 "# | / _----=> need-resched \n"
3788 "# || / _---=> hardirq/softirq \n"
3789 "# ||| / _--=> preempt-depth \n"
3791 "# cmd pid ||||| time | caller \n"
3792 "# \\ / ||||| \\ | / \n");
3795 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3797 unsigned long total;
3798 unsigned long entries;
3800 get_total_entries(buf, &total, &entries);
3801 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
3802 entries, total, num_online_cpus());
3806 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3809 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3811 print_event_info(buf, m);
3813 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
3814 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
3817 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3820 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3821 const char *space = " ";
3822 int prec = tgid ? 12 : 2;
3824 print_event_info(buf, m);
3826 seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
3827 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
3828 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
3829 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
3830 seq_printf(m, "# %.*s||| / delay\n", prec, space);
3831 seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID ");
3832 seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | ");
3836 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3838 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3839 struct array_buffer *buf = iter->array_buffer;
3840 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3841 struct tracer *type = iter->trace;
3842 unsigned long entries;
3843 unsigned long total;
3844 const char *name = "preemption";
3848 get_total_entries(buf, &total, &entries);
3850 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3852 seq_puts(m, "# -----------------------------------"
3853 "---------------------------------\n");
3854 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3855 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3856 nsecs_to_usecs(data->saved_latency),
3860 #if defined(CONFIG_PREEMPT_NONE)
3862 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3864 #elif defined(CONFIG_PREEMPT)
3866 #elif defined(CONFIG_PREEMPT_RT)
3871 /* These are reserved for later use */
3874 seq_printf(m, " #P:%d)\n", num_online_cpus());
3878 seq_puts(m, "# -----------------\n");
3879 seq_printf(m, "# | task: %.16s-%d "
3880 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3881 data->comm, data->pid,
3882 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3883 data->policy, data->rt_priority);
3884 seq_puts(m, "# -----------------\n");
3886 if (data->critical_start) {
3887 seq_puts(m, "# => started at: ");
3888 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3889 trace_print_seq(m, &iter->seq);
3890 seq_puts(m, "\n# => ended at: ");
3891 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3892 trace_print_seq(m, &iter->seq);
3893 seq_puts(m, "\n#\n");
3899 static void test_cpu_buff_start(struct trace_iterator *iter)
3901 struct trace_seq *s = &iter->seq;
3902 struct trace_array *tr = iter->tr;
3904 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3907 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3910 if (cpumask_available(iter->started) &&
3911 cpumask_test_cpu(iter->cpu, iter->started))
3914 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3917 if (cpumask_available(iter->started))
3918 cpumask_set_cpu(iter->cpu, iter->started);
3920 /* Don't print started cpu buffer for the first entry of the trace */
3922 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3926 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3928 struct trace_array *tr = iter->tr;
3929 struct trace_seq *s = &iter->seq;
3930 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3931 struct trace_entry *entry;
3932 struct trace_event *event;
3936 test_cpu_buff_start(iter);
3938 event = ftrace_find_event(entry->type);
3940 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3941 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3942 trace_print_lat_context(iter);
3944 trace_print_context(iter);
3947 if (trace_seq_has_overflowed(s))
3948 return TRACE_TYPE_PARTIAL_LINE;
3951 return event->funcs->trace(iter, sym_flags, event);
3953 trace_seq_printf(s, "Unknown type %d\n", entry->type);
3955 return trace_handle_return(s);
3958 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3960 struct trace_array *tr = iter->tr;
3961 struct trace_seq *s = &iter->seq;
3962 struct trace_entry *entry;
3963 struct trace_event *event;
3967 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3968 trace_seq_printf(s, "%d %d %llu ",
3969 entry->pid, iter->cpu, iter->ts);
3971 if (trace_seq_has_overflowed(s))
3972 return TRACE_TYPE_PARTIAL_LINE;
3974 event = ftrace_find_event(entry->type);
3976 return event->funcs->raw(iter, 0, event);
3978 trace_seq_printf(s, "%d ?\n", entry->type);
3980 return trace_handle_return(s);
3983 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3985 struct trace_array *tr = iter->tr;
3986 struct trace_seq *s = &iter->seq;
3987 unsigned char newline = '\n';
3988 struct trace_entry *entry;
3989 struct trace_event *event;
3993 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3994 SEQ_PUT_HEX_FIELD(s, entry->pid);
3995 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3996 SEQ_PUT_HEX_FIELD(s, iter->ts);
3997 if (trace_seq_has_overflowed(s))
3998 return TRACE_TYPE_PARTIAL_LINE;
4001 event = ftrace_find_event(entry->type);
4003 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4004 if (ret != TRACE_TYPE_HANDLED)
4008 SEQ_PUT_FIELD(s, newline);
4010 return trace_handle_return(s);
4013 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4015 struct trace_array *tr = iter->tr;
4016 struct trace_seq *s = &iter->seq;
4017 struct trace_entry *entry;
4018 struct trace_event *event;
4022 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4023 SEQ_PUT_FIELD(s, entry->pid);
4024 SEQ_PUT_FIELD(s, iter->cpu);
4025 SEQ_PUT_FIELD(s, iter->ts);
4026 if (trace_seq_has_overflowed(s))
4027 return TRACE_TYPE_PARTIAL_LINE;
4030 event = ftrace_find_event(entry->type);
4031 return event ? event->funcs->binary(iter, 0, event) :
4035 int trace_empty(struct trace_iterator *iter)
4037 struct ring_buffer_iter *buf_iter;
4040 /* If we are looking at one CPU buffer, only check that one */
4041 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4042 cpu = iter->cpu_file;
4043 buf_iter = trace_buffer_iter(iter, cpu);
4045 if (!ring_buffer_iter_empty(buf_iter))
4048 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4054 for_each_tracing_cpu(cpu) {
4055 buf_iter = trace_buffer_iter(iter, cpu);
4057 if (!ring_buffer_iter_empty(buf_iter))
4060 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4068 /* Called with trace_event_read_lock() held. */
4069 enum print_line_t print_trace_line(struct trace_iterator *iter)
4071 struct trace_array *tr = iter->tr;
4072 unsigned long trace_flags = tr->trace_flags;
4073 enum print_line_t ret;
4075 if (iter->lost_events) {
4076 if (iter->lost_events == (unsigned long)-1)
4077 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4080 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4081 iter->cpu, iter->lost_events);
4082 if (trace_seq_has_overflowed(&iter->seq))
4083 return TRACE_TYPE_PARTIAL_LINE;
4086 if (iter->trace && iter->trace->print_line) {
4087 ret = iter->trace->print_line(iter);
4088 if (ret != TRACE_TYPE_UNHANDLED)
4092 if (iter->ent->type == TRACE_BPUTS &&
4093 trace_flags & TRACE_ITER_PRINTK &&
4094 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4095 return trace_print_bputs_msg_only(iter);
4097 if (iter->ent->type == TRACE_BPRINT &&
4098 trace_flags & TRACE_ITER_PRINTK &&
4099 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4100 return trace_print_bprintk_msg_only(iter);
4102 if (iter->ent->type == TRACE_PRINT &&
4103 trace_flags & TRACE_ITER_PRINTK &&
4104 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4105 return trace_print_printk_msg_only(iter);
4107 if (trace_flags & TRACE_ITER_BIN)
4108 return print_bin_fmt(iter);
4110 if (trace_flags & TRACE_ITER_HEX)
4111 return print_hex_fmt(iter);
4113 if (trace_flags & TRACE_ITER_RAW)
4114 return print_raw_fmt(iter);
4116 return print_trace_fmt(iter);
4119 void trace_latency_header(struct seq_file *m)
4121 struct trace_iterator *iter = m->private;
4122 struct trace_array *tr = iter->tr;
4124 /* print nothing if the buffers are empty */
4125 if (trace_empty(iter))
4128 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4129 print_trace_header(m, iter);
4131 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4132 print_lat_help_header(m);
4135 void trace_default_header(struct seq_file *m)
4137 struct trace_iterator *iter = m->private;
4138 struct trace_array *tr = iter->tr;
4139 unsigned long trace_flags = tr->trace_flags;
4141 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4144 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4145 /* print nothing if the buffers are empty */
4146 if (trace_empty(iter))
4148 print_trace_header(m, iter);
4149 if (!(trace_flags & TRACE_ITER_VERBOSE))
4150 print_lat_help_header(m);
4152 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4153 if (trace_flags & TRACE_ITER_IRQ_INFO)
4154 print_func_help_header_irq(iter->array_buffer,
4157 print_func_help_header(iter->array_buffer, m,
4163 static void test_ftrace_alive(struct seq_file *m)
4165 if (!ftrace_is_dead())
4167 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4168 "# MAY BE MISSING FUNCTION EVENTS\n");
4171 #ifdef CONFIG_TRACER_MAX_TRACE
4172 static void show_snapshot_main_help(struct seq_file *m)
4174 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4175 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4176 "# Takes a snapshot of the main buffer.\n"
4177 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4178 "# (Doesn't have to be '2' works with any number that\n"
4179 "# is not a '0' or '1')\n");
4182 static void show_snapshot_percpu_help(struct seq_file *m)
4184 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4185 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4186 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4187 "# Takes a snapshot of the main buffer for this cpu.\n");
4189 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4190 "# Must use main snapshot file to allocate.\n");
4192 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4193 "# (Doesn't have to be '2' works with any number that\n"
4194 "# is not a '0' or '1')\n");
4197 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4199 if (iter->tr->allocated_snapshot)
4200 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4202 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4204 seq_puts(m, "# Snapshot commands:\n");
4205 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4206 show_snapshot_main_help(m);
4208 show_snapshot_percpu_help(m);
4211 /* Should never be called */
4212 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4215 static int s_show(struct seq_file *m, void *v)
4217 struct trace_iterator *iter = v;
4220 if (iter->ent == NULL) {
4222 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4224 test_ftrace_alive(m);
4226 if (iter->snapshot && trace_empty(iter))
4227 print_snapshot_help(m, iter);
4228 else if (iter->trace && iter->trace->print_header)
4229 iter->trace->print_header(m);
4231 trace_default_header(m);
4233 } else if (iter->leftover) {
4235 * If we filled the seq_file buffer earlier, we
4236 * want to just show it now.
4238 ret = trace_print_seq(m, &iter->seq);
4240 /* ret should this time be zero, but you never know */
4241 iter->leftover = ret;
4244 print_trace_line(iter);
4245 ret = trace_print_seq(m, &iter->seq);
4247 * If we overflow the seq_file buffer, then it will
4248 * ask us for this data again at start up.
4250 * ret is 0 if seq_file write succeeded.
4253 iter->leftover = ret;
4260 * Should be used after trace_array_get(), trace_types_lock
4261 * ensures that i_cdev was already initialized.
4263 static inline int tracing_get_cpu(struct inode *inode)
4265 if (inode->i_cdev) /* See trace_create_cpu_file() */
4266 return (long)inode->i_cdev - 1;
4267 return RING_BUFFER_ALL_CPUS;
4270 static const struct seq_operations tracer_seq_ops = {
4277 static struct trace_iterator *
4278 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4280 struct trace_array *tr = inode->i_private;
4281 struct trace_iterator *iter;
4284 if (tracing_disabled)
4285 return ERR_PTR(-ENODEV);
4287 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4289 return ERR_PTR(-ENOMEM);
4291 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4293 if (!iter->buffer_iter)
4297 * trace_find_next_entry() may need to save off iter->ent.
4298 * It will place it into the iter->temp buffer. As most
4299 * events are less than 128, allocate a buffer of that size.
4300 * If one is greater, then trace_find_next_entry() will
4301 * allocate a new buffer to adjust for the bigger iter->ent.
4302 * It's not critical if it fails to get allocated here.
4304 iter->temp = kmalloc(128, GFP_KERNEL);
4306 iter->temp_size = 128;
4309 * We make a copy of the current tracer to avoid concurrent
4310 * changes on it while we are reading.
4312 mutex_lock(&trace_types_lock);
4313 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4317 *iter->trace = *tr->current_trace;
4319 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4324 #ifdef CONFIG_TRACER_MAX_TRACE
4325 /* Currently only the top directory has a snapshot */
4326 if (tr->current_trace->print_max || snapshot)
4327 iter->array_buffer = &tr->max_buffer;
4330 iter->array_buffer = &tr->array_buffer;
4331 iter->snapshot = snapshot;
4333 iter->cpu_file = tracing_get_cpu(inode);
4334 mutex_init(&iter->mutex);
4336 /* Notify the tracer early; before we stop tracing. */
4337 if (iter->trace->open)
4338 iter->trace->open(iter);
4340 /* Annotate start of buffers if we had overruns */
4341 if (ring_buffer_overruns(iter->array_buffer->buffer))
4342 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4344 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4345 if (trace_clocks[tr->clock_id].in_ns)
4346 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4349 * If pause-on-trace is enabled, then stop the trace while
4350 * dumping, unless this is the "snapshot" file
4352 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4353 tracing_stop_tr(tr);
4355 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4356 for_each_tracing_cpu(cpu) {
4357 iter->buffer_iter[cpu] =
4358 ring_buffer_read_prepare(iter->array_buffer->buffer,
4361 ring_buffer_read_prepare_sync();
4362 for_each_tracing_cpu(cpu) {
4363 ring_buffer_read_start(iter->buffer_iter[cpu]);
4364 tracing_iter_reset(iter, cpu);
4367 cpu = iter->cpu_file;
4368 iter->buffer_iter[cpu] =
4369 ring_buffer_read_prepare(iter->array_buffer->buffer,
4371 ring_buffer_read_prepare_sync();
4372 ring_buffer_read_start(iter->buffer_iter[cpu]);
4373 tracing_iter_reset(iter, cpu);
4376 mutex_unlock(&trace_types_lock);
4381 mutex_unlock(&trace_types_lock);
4384 kfree(iter->buffer_iter);
4386 seq_release_private(inode, file);
4387 return ERR_PTR(-ENOMEM);
4390 int tracing_open_generic(struct inode *inode, struct file *filp)
4394 ret = tracing_check_open_get_tr(NULL);
4398 filp->private_data = inode->i_private;
4402 bool tracing_is_disabled(void)
4404 return (tracing_disabled) ? true: false;
4408 * Open and update trace_array ref count.
4409 * Must have the current trace_array passed to it.
4411 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4413 struct trace_array *tr = inode->i_private;
4416 ret = tracing_check_open_get_tr(tr);
4420 filp->private_data = inode->i_private;
4425 static int tracing_release(struct inode *inode, struct file *file)
4427 struct trace_array *tr = inode->i_private;
4428 struct seq_file *m = file->private_data;
4429 struct trace_iterator *iter;
4432 if (!(file->f_mode & FMODE_READ)) {
4433 trace_array_put(tr);
4437 /* Writes do not use seq_file */
4439 mutex_lock(&trace_types_lock);
4441 for_each_tracing_cpu(cpu) {
4442 if (iter->buffer_iter[cpu])
4443 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4446 if (iter->trace && iter->trace->close)
4447 iter->trace->close(iter);
4449 if (!iter->snapshot && tr->stop_count)
4450 /* reenable tracing if it was previously enabled */
4451 tracing_start_tr(tr);
4453 __trace_array_put(tr);
4455 mutex_unlock(&trace_types_lock);
4457 mutex_destroy(&iter->mutex);
4458 free_cpumask_var(iter->started);
4461 kfree(iter->buffer_iter);
4462 seq_release_private(inode, file);
4467 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4469 struct trace_array *tr = inode->i_private;
4471 trace_array_put(tr);
4475 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4477 struct trace_array *tr = inode->i_private;
4479 trace_array_put(tr);
4481 return single_release(inode, file);
4484 static int tracing_open(struct inode *inode, struct file *file)
4486 struct trace_array *tr = inode->i_private;
4487 struct trace_iterator *iter;
4490 ret = tracing_check_open_get_tr(tr);
4494 /* If this file was open for write, then erase contents */
4495 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4496 int cpu = tracing_get_cpu(inode);
4497 struct array_buffer *trace_buf = &tr->array_buffer;
4499 #ifdef CONFIG_TRACER_MAX_TRACE
4500 if (tr->current_trace->print_max)
4501 trace_buf = &tr->max_buffer;
4504 if (cpu == RING_BUFFER_ALL_CPUS)
4505 tracing_reset_online_cpus(trace_buf);
4507 tracing_reset_cpu(trace_buf, cpu);
4510 if (file->f_mode & FMODE_READ) {
4511 iter = __tracing_open(inode, file, false);
4513 ret = PTR_ERR(iter);
4514 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4515 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4519 trace_array_put(tr);
4525 * Some tracers are not suitable for instance buffers.
4526 * A tracer is always available for the global array (toplevel)
4527 * or if it explicitly states that it is.
4530 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4532 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4535 /* Find the next tracer that this trace array may use */
4536 static struct tracer *
4537 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4539 while (t && !trace_ok_for_array(t, tr))
4546 t_next(struct seq_file *m, void *v, loff_t *pos)
4548 struct trace_array *tr = m->private;
4549 struct tracer *t = v;
4554 t = get_tracer_for_array(tr, t->next);
4559 static void *t_start(struct seq_file *m, loff_t *pos)
4561 struct trace_array *tr = m->private;
4565 mutex_lock(&trace_types_lock);
4567 t = get_tracer_for_array(tr, trace_types);
4568 for (; t && l < *pos; t = t_next(m, t, &l))
4574 static void t_stop(struct seq_file *m, void *p)
4576 mutex_unlock(&trace_types_lock);
4579 static int t_show(struct seq_file *m, void *v)
4581 struct tracer *t = v;
4586 seq_puts(m, t->name);
4595 static const struct seq_operations show_traces_seq_ops = {
4602 static int show_traces_open(struct inode *inode, struct file *file)
4604 struct trace_array *tr = inode->i_private;
4608 ret = tracing_check_open_get_tr(tr);
4612 ret = seq_open(file, &show_traces_seq_ops);
4614 trace_array_put(tr);
4618 m = file->private_data;
4624 static int show_traces_release(struct inode *inode, struct file *file)
4626 struct trace_array *tr = inode->i_private;
4628 trace_array_put(tr);
4629 return seq_release(inode, file);
4633 tracing_write_stub(struct file *filp, const char __user *ubuf,
4634 size_t count, loff_t *ppos)
4639 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4643 if (file->f_mode & FMODE_READ)
4644 ret = seq_lseek(file, offset, whence);
4646 file->f_pos = ret = 0;
4651 static const struct file_operations tracing_fops = {
4652 .open = tracing_open,
4654 .write = tracing_write_stub,
4655 .llseek = tracing_lseek,
4656 .release = tracing_release,
4659 static const struct file_operations show_traces_fops = {
4660 .open = show_traces_open,
4662 .llseek = seq_lseek,
4663 .release = show_traces_release,
4667 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4668 size_t count, loff_t *ppos)
4670 struct trace_array *tr = file_inode(filp)->i_private;
4674 len = snprintf(NULL, 0, "%*pb\n",
4675 cpumask_pr_args(tr->tracing_cpumask)) + 1;
4676 mask_str = kmalloc(len, GFP_KERNEL);
4680 len = snprintf(mask_str, len, "%*pb\n",
4681 cpumask_pr_args(tr->tracing_cpumask));
4686 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4694 int tracing_set_cpumask(struct trace_array *tr,
4695 cpumask_var_t tracing_cpumask_new)
4702 local_irq_disable();
4703 arch_spin_lock(&tr->max_lock);
4704 for_each_tracing_cpu(cpu) {
4706 * Increase/decrease the disabled counter if we are
4707 * about to flip a bit in the cpumask:
4709 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4710 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4711 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4712 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4714 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4715 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4716 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4717 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4720 arch_spin_unlock(&tr->max_lock);
4723 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4729 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4730 size_t count, loff_t *ppos)
4732 struct trace_array *tr = file_inode(filp)->i_private;
4733 cpumask_var_t tracing_cpumask_new;
4736 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4739 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4743 err = tracing_set_cpumask(tr, tracing_cpumask_new);
4747 free_cpumask_var(tracing_cpumask_new);
4752 free_cpumask_var(tracing_cpumask_new);
4757 static const struct file_operations tracing_cpumask_fops = {
4758 .open = tracing_open_generic_tr,
4759 .read = tracing_cpumask_read,
4760 .write = tracing_cpumask_write,
4761 .release = tracing_release_generic_tr,
4762 .llseek = generic_file_llseek,
4765 static int tracing_trace_options_show(struct seq_file *m, void *v)
4767 struct tracer_opt *trace_opts;
4768 struct trace_array *tr = m->private;
4772 mutex_lock(&trace_types_lock);
4773 tracer_flags = tr->current_trace->flags->val;
4774 trace_opts = tr->current_trace->flags->opts;
4776 for (i = 0; trace_options[i]; i++) {
4777 if (tr->trace_flags & (1 << i))
4778 seq_printf(m, "%s\n", trace_options[i]);
4780 seq_printf(m, "no%s\n", trace_options[i]);
4783 for (i = 0; trace_opts[i].name; i++) {
4784 if (tracer_flags & trace_opts[i].bit)
4785 seq_printf(m, "%s\n", trace_opts[i].name);
4787 seq_printf(m, "no%s\n", trace_opts[i].name);
4789 mutex_unlock(&trace_types_lock);
4794 static int __set_tracer_option(struct trace_array *tr,
4795 struct tracer_flags *tracer_flags,
4796 struct tracer_opt *opts, int neg)
4798 struct tracer *trace = tracer_flags->trace;
4801 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4806 tracer_flags->val &= ~opts->bit;
4808 tracer_flags->val |= opts->bit;
4812 /* Try to assign a tracer specific option */
4813 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4815 struct tracer *trace = tr->current_trace;
4816 struct tracer_flags *tracer_flags = trace->flags;
4817 struct tracer_opt *opts = NULL;
4820 for (i = 0; tracer_flags->opts[i].name; i++) {
4821 opts = &tracer_flags->opts[i];
4823 if (strcmp(cmp, opts->name) == 0)
4824 return __set_tracer_option(tr, trace->flags, opts, neg);
4830 /* Some tracers require overwrite to stay enabled */
4831 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4833 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4839 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4841 if ((mask == TRACE_ITER_RECORD_TGID) ||
4842 (mask == TRACE_ITER_RECORD_CMD))
4843 lockdep_assert_held(&event_mutex);
4845 /* do nothing if flag is already set */
4846 if (!!(tr->trace_flags & mask) == !!enabled)
4849 /* Give the tracer a chance to approve the change */
4850 if (tr->current_trace->flag_changed)
4851 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4855 tr->trace_flags |= mask;
4857 tr->trace_flags &= ~mask;
4859 if (mask == TRACE_ITER_RECORD_CMD)
4860 trace_event_enable_cmd_record(enabled);
4862 if (mask == TRACE_ITER_RECORD_TGID) {
4864 tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
4868 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4872 trace_event_enable_tgid_record(enabled);
4875 if (mask == TRACE_ITER_EVENT_FORK)
4876 trace_event_follow_fork(tr, enabled);
4878 if (mask == TRACE_ITER_FUNC_FORK)
4879 ftrace_pid_follow_fork(tr, enabled);
4881 if (mask == TRACE_ITER_OVERWRITE) {
4882 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4883 #ifdef CONFIG_TRACER_MAX_TRACE
4884 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4888 if (mask == TRACE_ITER_PRINTK) {
4889 trace_printk_start_stop_comm(enabled);
4890 trace_printk_control(enabled);
4896 int trace_set_options(struct trace_array *tr, char *option)
4901 size_t orig_len = strlen(option);
4904 cmp = strstrip(option);
4906 len = str_has_prefix(cmp, "no");
4912 mutex_lock(&event_mutex);
4913 mutex_lock(&trace_types_lock);
4915 ret = match_string(trace_options, -1, cmp);
4916 /* If no option could be set, test the specific tracer options */
4918 ret = set_tracer_option(tr, cmp, neg);
4920 ret = set_tracer_flag(tr, 1 << ret, !neg);
4922 mutex_unlock(&trace_types_lock);
4923 mutex_unlock(&event_mutex);
4926 * If the first trailing whitespace is replaced with '\0' by strstrip,
4927 * turn it back into a space.
4929 if (orig_len > strlen(option))
4930 option[strlen(option)] = ' ';
4935 static void __init apply_trace_boot_options(void)
4937 char *buf = trace_boot_options_buf;
4941 option = strsep(&buf, ",");
4947 trace_set_options(&global_trace, option);
4949 /* Put back the comma to allow this to be called again */
4956 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4957 size_t cnt, loff_t *ppos)
4959 struct seq_file *m = filp->private_data;
4960 struct trace_array *tr = m->private;
4964 if (cnt >= sizeof(buf))
4967 if (copy_from_user(buf, ubuf, cnt))
4972 ret = trace_set_options(tr, buf);
4981 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4983 struct trace_array *tr = inode->i_private;
4986 ret = tracing_check_open_get_tr(tr);
4990 ret = single_open(file, tracing_trace_options_show, inode->i_private);
4992 trace_array_put(tr);
4997 static const struct file_operations tracing_iter_fops = {
4998 .open = tracing_trace_options_open,
5000 .llseek = seq_lseek,
5001 .release = tracing_single_release_tr,
5002 .write = tracing_trace_options_write,
5005 static const char readme_msg[] =
5006 "tracing mini-HOWTO:\n\n"
5007 "# echo 0 > tracing_on : quick way to disable tracing\n"
5008 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5009 " Important files:\n"
5010 " trace\t\t\t- The static contents of the buffer\n"
5011 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5012 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5013 " current_tracer\t- function and latency tracers\n"
5014 " available_tracers\t- list of configured tracers for current_tracer\n"
5015 " error_log\t- error log for failed commands (that support it)\n"
5016 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5017 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5018 " trace_clock\t\t-change the clock used to order events\n"
5019 " local: Per cpu clock but may not be synced across CPUs\n"
5020 " global: Synced across CPUs but slows tracing down.\n"
5021 " counter: Not a clock, but just an increment\n"
5022 " uptime: Jiffy counter from time of boot\n"
5023 " perf: Same clock that perf events use\n"
5024 #ifdef CONFIG_X86_64
5025 " x86-tsc: TSC cycle counter\n"
5027 "\n timestamp_mode\t-view the mode used to timestamp events\n"
5028 " delta: Delta difference against a buffer-wide timestamp\n"
5029 " absolute: Absolute (standalone) timestamp\n"
5030 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5031 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5032 " tracing_cpumask\t- Limit which CPUs to trace\n"
5033 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5034 "\t\t\t Remove sub-buffer with rmdir\n"
5035 " trace_options\t\t- Set format or modify how tracing happens\n"
5036 "\t\t\t Disable an option by prefixing 'no' to the\n"
5037 "\t\t\t option name\n"
5038 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5039 #ifdef CONFIG_DYNAMIC_FTRACE
5040 "\n available_filter_functions - list of functions that can be filtered on\n"
5041 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5042 "\t\t\t functions\n"
5043 "\t accepts: func_full_name or glob-matching-pattern\n"
5044 "\t modules: Can select a group via module\n"
5045 "\t Format: :mod:<module-name>\n"
5046 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5047 "\t triggers: a command to perform when function is hit\n"
5048 "\t Format: <function>:<trigger>[:count]\n"
5049 "\t trigger: traceon, traceoff\n"
5050 "\t\t enable_event:<system>:<event>\n"
5051 "\t\t disable_event:<system>:<event>\n"
5052 #ifdef CONFIG_STACKTRACE
5055 #ifdef CONFIG_TRACER_SNAPSHOT
5060 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5061 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5062 "\t The first one will disable tracing every time do_fault is hit\n"
5063 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5064 "\t The first time do trap is hit and it disables tracing, the\n"
5065 "\t counter will decrement to 2. If tracing is already disabled,\n"
5066 "\t the counter will not decrement. It only decrements when the\n"
5067 "\t trigger did work\n"
5068 "\t To remove trigger without count:\n"
5069 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5070 "\t To remove trigger with a count:\n"
5071 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5072 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5073 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5074 "\t modules: Can select a group via module command :mod:\n"
5075 "\t Does not accept triggers\n"
5076 #endif /* CONFIG_DYNAMIC_FTRACE */
5077 #ifdef CONFIG_FUNCTION_TRACER
5078 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5080 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5083 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5084 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5085 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5086 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5088 #ifdef CONFIG_TRACER_SNAPSHOT
5089 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5090 "\t\t\t snapshot buffer. Read the contents for more\n"
5091 "\t\t\t information\n"
5093 #ifdef CONFIG_STACK_TRACER
5094 " stack_trace\t\t- Shows the max stack trace when active\n"
5095 " stack_max_size\t- Shows current max stack size that was traced\n"
5096 "\t\t\t Write into this file to reset the max size (trigger a\n"
5097 "\t\t\t new trace)\n"
5098 #ifdef CONFIG_DYNAMIC_FTRACE
5099 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5102 #endif /* CONFIG_STACK_TRACER */
5103 #ifdef CONFIG_DYNAMIC_EVENTS
5104 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5105 "\t\t\t Write into this file to define/undefine new trace events.\n"
5107 #ifdef CONFIG_KPROBE_EVENTS
5108 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5109 "\t\t\t Write into this file to define/undefine new trace events.\n"
5111 #ifdef CONFIG_UPROBE_EVENTS
5112 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5113 "\t\t\t Write into this file to define/undefine new trace events.\n"
5115 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5116 "\t accepts: event-definitions (one definition per line)\n"
5117 "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
5118 "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5119 #ifdef CONFIG_HIST_TRIGGERS
5120 "\t s:[synthetic/]<event> <field> [<field>]\n"
5122 "\t -:[<group>/]<event>\n"
5123 #ifdef CONFIG_KPROBE_EVENTS
5124 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5125 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5127 #ifdef CONFIG_UPROBE_EVENTS
5128 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5130 "\t args: <name>=fetcharg[:type]\n"
5131 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5132 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5133 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5135 "\t $stack<index>, $stack, $retval, $comm,\n"
5137 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5138 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5139 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5140 "\t <type>\\[<array-size>\\]\n"
5141 #ifdef CONFIG_HIST_TRIGGERS
5142 "\t field: <stype> <name>;\n"
5143 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5144 "\t [unsigned] char/int/long\n"
5147 " events/\t\t- Directory containing all trace event subsystems:\n"
5148 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5149 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5150 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5152 " filter\t\t- If set, only events passing filter are traced\n"
5153 " events/<system>/<event>/\t- Directory containing control files for\n"
5155 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5156 " filter\t\t- If set, only events passing filter are traced\n"
5157 " trigger\t\t- If set, a command to perform when event is hit\n"
5158 "\t Format: <trigger>[:count][if <filter>]\n"
5159 "\t trigger: traceon, traceoff\n"
5160 "\t enable_event:<system>:<event>\n"
5161 "\t disable_event:<system>:<event>\n"
5162 #ifdef CONFIG_HIST_TRIGGERS
5163 "\t enable_hist:<system>:<event>\n"
5164 "\t disable_hist:<system>:<event>\n"
5166 #ifdef CONFIG_STACKTRACE
5169 #ifdef CONFIG_TRACER_SNAPSHOT
5172 #ifdef CONFIG_HIST_TRIGGERS
5173 "\t\t hist (see below)\n"
5175 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5176 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5177 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5178 "\t events/block/block_unplug/trigger\n"
5179 "\t The first disables tracing every time block_unplug is hit.\n"
5180 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5181 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5182 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5183 "\t Like function triggers, the counter is only decremented if it\n"
5184 "\t enabled or disabled tracing.\n"
5185 "\t To remove a trigger without a count:\n"
5186 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5187 "\t To remove a trigger with a count:\n"
5188 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5189 "\t Filters can be ignored when removing a trigger.\n"
5190 #ifdef CONFIG_HIST_TRIGGERS
5191 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5192 "\t Format: hist:keys=<field1[,field2,...]>\n"
5193 "\t [:values=<field1[,field2,...]>]\n"
5194 "\t [:sort=<field1[,field2,...]>]\n"
5195 "\t [:size=#entries]\n"
5196 "\t [:pause][:continue][:clear]\n"
5197 "\t [:name=histname1]\n"
5198 "\t [:<handler>.<action>]\n"
5199 "\t [if <filter>]\n\n"
5200 "\t When a matching event is hit, an entry is added to a hash\n"
5201 "\t table using the key(s) and value(s) named, and the value of a\n"
5202 "\t sum called 'hitcount' is incremented. Keys and values\n"
5203 "\t correspond to fields in the event's format description. Keys\n"
5204 "\t can be any field, or the special string 'stacktrace'.\n"
5205 "\t Compound keys consisting of up to two fields can be specified\n"
5206 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5207 "\t fields. Sort keys consisting of up to two fields can be\n"
5208 "\t specified using the 'sort' keyword. The sort direction can\n"
5209 "\t be modified by appending '.descending' or '.ascending' to a\n"
5210 "\t sort field. The 'size' parameter can be used to specify more\n"
5211 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5212 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5213 "\t its histogram data will be shared with other triggers of the\n"
5214 "\t same name, and trigger hits will update this common data.\n\n"
5215 "\t Reading the 'hist' file for the event will dump the hash\n"
5216 "\t table in its entirety to stdout. If there are multiple hist\n"
5217 "\t triggers attached to an event, there will be a table for each\n"
5218 "\t trigger in the output. The table displayed for a named\n"
5219 "\t trigger will be the same as any other instance having the\n"
5220 "\t same name. The default format used to display a given field\n"
5221 "\t can be modified by appending any of the following modifiers\n"
5222 "\t to the field name, as applicable:\n\n"
5223 "\t .hex display a number as a hex value\n"
5224 "\t .sym display an address as a symbol\n"
5225 "\t .sym-offset display an address as a symbol and offset\n"
5226 "\t .execname display a common_pid as a program name\n"
5227 "\t .syscall display a syscall id as a syscall name\n"
5228 "\t .log2 display log2 value rather than raw number\n"
5229 "\t .usecs display a common_timestamp in microseconds\n\n"
5230 "\t The 'pause' parameter can be used to pause an existing hist\n"
5231 "\t trigger or to start a hist trigger but not log any events\n"
5232 "\t until told to do so. 'continue' can be used to start or\n"
5233 "\t restart a paused hist trigger.\n\n"
5234 "\t The 'clear' parameter will clear the contents of a running\n"
5235 "\t hist trigger and leave its current paused/active state\n"
5237 "\t The enable_hist and disable_hist triggers can be used to\n"
5238 "\t have one event conditionally start and stop another event's\n"
5239 "\t already-attached hist trigger. The syntax is analogous to\n"
5240 "\t the enable_event and disable_event triggers.\n\n"
5241 "\t Hist trigger handlers and actions are executed whenever a\n"
5242 "\t a histogram entry is added or updated. They take the form:\n\n"
5243 "\t <handler>.<action>\n\n"
5244 "\t The available handlers are:\n\n"
5245 "\t onmatch(matching.event) - invoke on addition or update\n"
5246 "\t onmax(var) - invoke if var exceeds current max\n"
5247 "\t onchange(var) - invoke action if var changes\n\n"
5248 "\t The available actions are:\n\n"
5249 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5250 "\t save(field,...) - save current event fields\n"
5251 #ifdef CONFIG_TRACER_SNAPSHOT
5252 "\t snapshot() - snapshot the trace buffer\n\n"
5254 #ifdef CONFIG_SYNTH_EVENTS
5255 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5256 "\t Write into this file to define/undefine new synthetic events.\n"
5257 "\t example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5263 tracing_readme_read(struct file *filp, char __user *ubuf,
5264 size_t cnt, loff_t *ppos)
5266 return simple_read_from_buffer(ubuf, cnt, ppos,
5267 readme_msg, strlen(readme_msg));
5270 static const struct file_operations tracing_readme_fops = {
5271 .open = tracing_open_generic,
5272 .read = tracing_readme_read,
5273 .llseek = generic_file_llseek,
5276 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5280 if (*pos || m->count)
5285 for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5286 if (trace_find_tgid(*ptr))
5293 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5303 v = saved_tgids_next(m, v, &l);
5311 static void saved_tgids_stop(struct seq_file *m, void *v)
5315 static int saved_tgids_show(struct seq_file *m, void *v)
5317 int pid = (int *)v - tgid_map;
5319 seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5323 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5324 .start = saved_tgids_start,
5325 .stop = saved_tgids_stop,
5326 .next = saved_tgids_next,
5327 .show = saved_tgids_show,
5330 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5334 ret = tracing_check_open_get_tr(NULL);
5338 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5342 static const struct file_operations tracing_saved_tgids_fops = {
5343 .open = tracing_saved_tgids_open,
5345 .llseek = seq_lseek,
5346 .release = seq_release,
5349 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5351 unsigned int *ptr = v;
5353 if (*pos || m->count)
5358 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5360 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5369 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5375 arch_spin_lock(&trace_cmdline_lock);
5377 v = &savedcmd->map_cmdline_to_pid[0];
5379 v = saved_cmdlines_next(m, v, &l);
5387 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5389 arch_spin_unlock(&trace_cmdline_lock);
5393 static int saved_cmdlines_show(struct seq_file *m, void *v)
5395 char buf[TASK_COMM_LEN];
5396 unsigned int *pid = v;
5398 __trace_find_cmdline(*pid, buf);
5399 seq_printf(m, "%d %s\n", *pid, buf);
5403 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5404 .start = saved_cmdlines_start,
5405 .next = saved_cmdlines_next,
5406 .stop = saved_cmdlines_stop,
5407 .show = saved_cmdlines_show,
5410 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5414 ret = tracing_check_open_get_tr(NULL);
5418 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5421 static const struct file_operations tracing_saved_cmdlines_fops = {
5422 .open = tracing_saved_cmdlines_open,
5424 .llseek = seq_lseek,
5425 .release = seq_release,
5429 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5430 size_t cnt, loff_t *ppos)
5435 arch_spin_lock(&trace_cmdline_lock);
5436 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5437 arch_spin_unlock(&trace_cmdline_lock);
5439 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5442 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5444 kfree(s->saved_cmdlines);
5445 kfree(s->map_cmdline_to_pid);
5449 static int tracing_resize_saved_cmdlines(unsigned int val)
5451 struct saved_cmdlines_buffer *s, *savedcmd_temp;
5453 s = kmalloc(sizeof(*s), GFP_KERNEL);
5457 if (allocate_cmdlines_buffer(val, s) < 0) {
5462 arch_spin_lock(&trace_cmdline_lock);
5463 savedcmd_temp = savedcmd;
5465 arch_spin_unlock(&trace_cmdline_lock);
5466 free_saved_cmdlines_buffer(savedcmd_temp);
5472 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5473 size_t cnt, loff_t *ppos)
5478 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5482 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5483 if (!val || val > PID_MAX_DEFAULT)
5486 ret = tracing_resize_saved_cmdlines((unsigned int)val);
5495 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5496 .open = tracing_open_generic,
5497 .read = tracing_saved_cmdlines_size_read,
5498 .write = tracing_saved_cmdlines_size_write,
5501 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5502 static union trace_eval_map_item *
5503 update_eval_map(union trace_eval_map_item *ptr)
5505 if (!ptr->map.eval_string) {
5506 if (ptr->tail.next) {
5507 ptr = ptr->tail.next;
5508 /* Set ptr to the next real item (skip head) */
5516 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5518 union trace_eval_map_item *ptr = v;
5521 * Paranoid! If ptr points to end, we don't want to increment past it.
5522 * This really should never happen.
5525 ptr = update_eval_map(ptr);
5526 if (WARN_ON_ONCE(!ptr))
5530 ptr = update_eval_map(ptr);
5535 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5537 union trace_eval_map_item *v;
5540 mutex_lock(&trace_eval_mutex);
5542 v = trace_eval_maps;
5546 while (v && l < *pos) {
5547 v = eval_map_next(m, v, &l);
5553 static void eval_map_stop(struct seq_file *m, void *v)
5555 mutex_unlock(&trace_eval_mutex);
5558 static int eval_map_show(struct seq_file *m, void *v)
5560 union trace_eval_map_item *ptr = v;
5562 seq_printf(m, "%s %ld (%s)\n",
5563 ptr->map.eval_string, ptr->map.eval_value,
5569 static const struct seq_operations tracing_eval_map_seq_ops = {
5570 .start = eval_map_start,
5571 .next = eval_map_next,
5572 .stop = eval_map_stop,
5573 .show = eval_map_show,
5576 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5580 ret = tracing_check_open_get_tr(NULL);
5584 return seq_open(filp, &tracing_eval_map_seq_ops);
5587 static const struct file_operations tracing_eval_map_fops = {
5588 .open = tracing_eval_map_open,
5590 .llseek = seq_lseek,
5591 .release = seq_release,
5594 static inline union trace_eval_map_item *
5595 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5597 /* Return tail of array given the head */
5598 return ptr + ptr->head.length + 1;
5602 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5605 struct trace_eval_map **stop;
5606 struct trace_eval_map **map;
5607 union trace_eval_map_item *map_array;
5608 union trace_eval_map_item *ptr;
5613 * The trace_eval_maps contains the map plus a head and tail item,
5614 * where the head holds the module and length of array, and the
5615 * tail holds a pointer to the next list.
5617 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5619 pr_warn("Unable to allocate trace eval mapping\n");
5623 mutex_lock(&trace_eval_mutex);
5625 if (!trace_eval_maps)
5626 trace_eval_maps = map_array;
5628 ptr = trace_eval_maps;
5630 ptr = trace_eval_jmp_to_tail(ptr);
5631 if (!ptr->tail.next)
5633 ptr = ptr->tail.next;
5636 ptr->tail.next = map_array;
5638 map_array->head.mod = mod;
5639 map_array->head.length = len;
5642 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5643 map_array->map = **map;
5646 memset(map_array, 0, sizeof(*map_array));
5648 mutex_unlock(&trace_eval_mutex);
5651 static void trace_create_eval_file(struct dentry *d_tracer)
5653 trace_create_file("eval_map", 0444, d_tracer,
5654 NULL, &tracing_eval_map_fops);
5657 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5658 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5659 static inline void trace_insert_eval_map_file(struct module *mod,
5660 struct trace_eval_map **start, int len) { }
5661 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5663 static void trace_insert_eval_map(struct module *mod,
5664 struct trace_eval_map **start, int len)
5666 struct trace_eval_map **map;
5673 trace_event_eval_update(map, len);
5675 trace_insert_eval_map_file(mod, start, len);
5679 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5680 size_t cnt, loff_t *ppos)
5682 struct trace_array *tr = filp->private_data;
5683 char buf[MAX_TRACER_SIZE+2];
5686 mutex_lock(&trace_types_lock);
5687 r = sprintf(buf, "%s\n", tr->current_trace->name);
5688 mutex_unlock(&trace_types_lock);
5690 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5693 int tracer_init(struct tracer *t, struct trace_array *tr)
5695 tracing_reset_online_cpus(&tr->array_buffer);
5699 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5703 for_each_tracing_cpu(cpu)
5704 per_cpu_ptr(buf->data, cpu)->entries = val;
5707 #ifdef CONFIG_TRACER_MAX_TRACE
5708 /* resize @tr's buffer to the size of @size_tr's entries */
5709 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5710 struct array_buffer *size_buf, int cpu_id)
5714 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5715 for_each_tracing_cpu(cpu) {
5716 ret = ring_buffer_resize(trace_buf->buffer,
5717 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5720 per_cpu_ptr(trace_buf->data, cpu)->entries =
5721 per_cpu_ptr(size_buf->data, cpu)->entries;
5724 ret = ring_buffer_resize(trace_buf->buffer,
5725 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5727 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5728 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5733 #endif /* CONFIG_TRACER_MAX_TRACE */
5735 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5736 unsigned long size, int cpu)
5741 * If kernel or user changes the size of the ring buffer
5742 * we use the size that was given, and we can forget about
5743 * expanding it later.
5745 ring_buffer_expanded = true;
5747 /* May be called before buffers are initialized */
5748 if (!tr->array_buffer.buffer)
5751 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5755 #ifdef CONFIG_TRACER_MAX_TRACE
5756 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5757 !tr->current_trace->use_max_tr)
5760 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5762 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5763 &tr->array_buffer, cpu);
5766 * AARGH! We are left with different
5767 * size max buffer!!!!
5768 * The max buffer is our "snapshot" buffer.
5769 * When a tracer needs a snapshot (one of the
5770 * latency tracers), it swaps the max buffer
5771 * with the saved snap shot. We succeeded to
5772 * update the size of the main buffer, but failed to
5773 * update the size of the max buffer. But when we tried
5774 * to reset the main buffer to the original size, we
5775 * failed there too. This is very unlikely to
5776 * happen, but if it does, warn and kill all
5780 tracing_disabled = 1;
5785 if (cpu == RING_BUFFER_ALL_CPUS)
5786 set_buffer_entries(&tr->max_buffer, size);
5788 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5791 #endif /* CONFIG_TRACER_MAX_TRACE */
5793 if (cpu == RING_BUFFER_ALL_CPUS)
5794 set_buffer_entries(&tr->array_buffer, size);
5796 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5801 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5802 unsigned long size, int cpu_id)
5806 mutex_lock(&trace_types_lock);
5808 if (cpu_id != RING_BUFFER_ALL_CPUS) {
5809 /* make sure, this cpu is enabled in the mask */
5810 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5816 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5821 mutex_unlock(&trace_types_lock);
5828 * tracing_update_buffers - used by tracing facility to expand ring buffers
5830 * To save on memory when the tracing is never used on a system with it
5831 * configured in. The ring buffers are set to a minimum size. But once
5832 * a user starts to use the tracing facility, then they need to grow
5833 * to their default size.
5835 * This function is to be called when a tracer is about to be used.
5837 int tracing_update_buffers(void)
5841 mutex_lock(&trace_types_lock);
5842 if (!ring_buffer_expanded)
5843 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5844 RING_BUFFER_ALL_CPUS);
5845 mutex_unlock(&trace_types_lock);
5850 struct trace_option_dentry;
5853 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5856 * Used to clear out the tracer before deletion of an instance.
5857 * Must have trace_types_lock held.
5859 static void tracing_set_nop(struct trace_array *tr)
5861 if (tr->current_trace == &nop_trace)
5864 tr->current_trace->enabled--;
5866 if (tr->current_trace->reset)
5867 tr->current_trace->reset(tr);
5869 tr->current_trace = &nop_trace;
5872 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5874 /* Only enable if the directory has been created already. */
5878 create_trace_option_files(tr, t);
5881 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5884 #ifdef CONFIG_TRACER_MAX_TRACE
5889 mutex_lock(&trace_types_lock);
5891 if (!ring_buffer_expanded) {
5892 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5893 RING_BUFFER_ALL_CPUS);
5899 for (t = trace_types; t; t = t->next) {
5900 if (strcmp(t->name, buf) == 0)
5907 if (t == tr->current_trace)
5910 #ifdef CONFIG_TRACER_SNAPSHOT
5911 if (t->use_max_tr) {
5912 arch_spin_lock(&tr->max_lock);
5913 if (tr->cond_snapshot)
5915 arch_spin_unlock(&tr->max_lock);
5920 /* Some tracers won't work on kernel command line */
5921 if (system_state < SYSTEM_RUNNING && t->noboot) {
5922 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5927 /* Some tracers are only allowed for the top level buffer */
5928 if (!trace_ok_for_array(t, tr)) {
5933 /* If trace pipe files are being read, we can't change the tracer */
5934 if (tr->trace_ref) {
5939 trace_branch_disable();
5941 tr->current_trace->enabled--;
5943 if (tr->current_trace->reset)
5944 tr->current_trace->reset(tr);
5946 /* Current trace needs to be nop_trace before synchronize_rcu */
5947 tr->current_trace = &nop_trace;
5949 #ifdef CONFIG_TRACER_MAX_TRACE
5950 had_max_tr = tr->allocated_snapshot;
5952 if (had_max_tr && !t->use_max_tr) {
5954 * We need to make sure that the update_max_tr sees that
5955 * current_trace changed to nop_trace to keep it from
5956 * swapping the buffers after we resize it.
5957 * The update_max_tr is called from interrupts disabled
5958 * so a synchronized_sched() is sufficient.
5965 #ifdef CONFIG_TRACER_MAX_TRACE
5966 if (t->use_max_tr && !had_max_tr) {
5967 ret = tracing_alloc_snapshot_instance(tr);
5974 ret = tracer_init(t, tr);
5979 tr->current_trace = t;
5980 tr->current_trace->enabled++;
5981 trace_branch_enable(tr);
5983 mutex_unlock(&trace_types_lock);
5989 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5990 size_t cnt, loff_t *ppos)
5992 struct trace_array *tr = filp->private_data;
5993 char buf[MAX_TRACER_SIZE+1];
6000 if (cnt > MAX_TRACER_SIZE)
6001 cnt = MAX_TRACER_SIZE;
6003 if (copy_from_user(buf, ubuf, cnt))
6008 /* strip ending whitespace. */
6009 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6012 err = tracing_set_tracer(tr, buf);
6022 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6023 size_t cnt, loff_t *ppos)
6028 r = snprintf(buf, sizeof(buf), "%ld\n",
6029 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6030 if (r > sizeof(buf))
6032 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6036 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6037 size_t cnt, loff_t *ppos)
6042 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6052 tracing_thresh_read(struct file *filp, char __user *ubuf,
6053 size_t cnt, loff_t *ppos)
6055 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6059 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6060 size_t cnt, loff_t *ppos)
6062 struct trace_array *tr = filp->private_data;
6065 mutex_lock(&trace_types_lock);
6066 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6070 if (tr->current_trace->update_thresh) {
6071 ret = tr->current_trace->update_thresh(tr);
6078 mutex_unlock(&trace_types_lock);
6083 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6086 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6087 size_t cnt, loff_t *ppos)
6089 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6093 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6094 size_t cnt, loff_t *ppos)
6096 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6101 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6103 struct trace_array *tr = inode->i_private;
6104 struct trace_iterator *iter;
6107 ret = tracing_check_open_get_tr(tr);
6111 mutex_lock(&trace_types_lock);
6113 /* create a buffer to store the information to pass to userspace */
6114 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6117 __trace_array_put(tr);
6121 trace_seq_init(&iter->seq);
6122 iter->trace = tr->current_trace;
6124 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6129 /* trace pipe does not show start of buffer */
6130 cpumask_setall(iter->started);
6132 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6133 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6135 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6136 if (trace_clocks[tr->clock_id].in_ns)
6137 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6140 iter->array_buffer = &tr->array_buffer;
6141 iter->cpu_file = tracing_get_cpu(inode);
6142 mutex_init(&iter->mutex);
6143 filp->private_data = iter;
6145 if (iter->trace->pipe_open)
6146 iter->trace->pipe_open(iter);
6148 nonseekable_open(inode, filp);
6152 mutex_unlock(&trace_types_lock);
6157 __trace_array_put(tr);
6158 mutex_unlock(&trace_types_lock);
6162 static int tracing_release_pipe(struct inode *inode, struct file *file)
6164 struct trace_iterator *iter = file->private_data;
6165 struct trace_array *tr = inode->i_private;
6167 mutex_lock(&trace_types_lock);
6171 if (iter->trace->pipe_close)
6172 iter->trace->pipe_close(iter);
6174 mutex_unlock(&trace_types_lock);
6176 free_cpumask_var(iter->started);
6177 mutex_destroy(&iter->mutex);
6180 trace_array_put(tr);
6186 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6188 struct trace_array *tr = iter->tr;
6190 /* Iterators are static, they should be filled or empty */
6191 if (trace_buffer_iter(iter, iter->cpu_file))
6192 return EPOLLIN | EPOLLRDNORM;
6194 if (tr->trace_flags & TRACE_ITER_BLOCK)
6196 * Always select as readable when in blocking mode
6198 return EPOLLIN | EPOLLRDNORM;
6200 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6205 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6207 struct trace_iterator *iter = filp->private_data;
6209 return trace_poll(iter, filp, poll_table);
6212 /* Must be called with iter->mutex held. */
6213 static int tracing_wait_pipe(struct file *filp)
6215 struct trace_iterator *iter = filp->private_data;
6218 while (trace_empty(iter)) {
6220 if ((filp->f_flags & O_NONBLOCK)) {
6225 * We block until we read something and tracing is disabled.
6226 * We still block if tracing is disabled, but we have never
6227 * read anything. This allows a user to cat this file, and
6228 * then enable tracing. But after we have read something,
6229 * we give an EOF when tracing is again disabled.
6231 * iter->pos will be 0 if we haven't read anything.
6233 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6236 mutex_unlock(&iter->mutex);
6238 ret = wait_on_pipe(iter, 0);
6240 mutex_lock(&iter->mutex);
6253 tracing_read_pipe(struct file *filp, char __user *ubuf,
6254 size_t cnt, loff_t *ppos)
6256 struct trace_iterator *iter = filp->private_data;
6260 * Avoid more than one consumer on a single file descriptor
6261 * This is just a matter of traces coherency, the ring buffer itself
6264 mutex_lock(&iter->mutex);
6266 /* return any leftover data */
6267 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6271 trace_seq_init(&iter->seq);
6273 if (iter->trace->read) {
6274 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6280 sret = tracing_wait_pipe(filp);
6284 /* stop when tracing is finished */
6285 if (trace_empty(iter)) {
6290 if (cnt >= PAGE_SIZE)
6291 cnt = PAGE_SIZE - 1;
6293 /* reset all but tr, trace, and overruns */
6294 memset(&iter->seq, 0,
6295 sizeof(struct trace_iterator) -
6296 offsetof(struct trace_iterator, seq));
6297 cpumask_clear(iter->started);
6298 trace_seq_init(&iter->seq);
6301 trace_event_read_lock();
6302 trace_access_lock(iter->cpu_file);
6303 while (trace_find_next_entry_inc(iter) != NULL) {
6304 enum print_line_t ret;
6305 int save_len = iter->seq.seq.len;
6307 ret = print_trace_line(iter);
6308 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6309 /* don't print partial lines */
6310 iter->seq.seq.len = save_len;
6313 if (ret != TRACE_TYPE_NO_CONSUME)
6314 trace_consume(iter);
6316 if (trace_seq_used(&iter->seq) >= cnt)
6320 * Setting the full flag means we reached the trace_seq buffer
6321 * size and we should leave by partial output condition above.
6322 * One of the trace_seq_* functions is not used properly.
6324 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6327 trace_access_unlock(iter->cpu_file);
6328 trace_event_read_unlock();
6330 /* Now copy what we have to the user */
6331 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6332 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6333 trace_seq_init(&iter->seq);
6336 * If there was nothing to send to user, in spite of consuming trace
6337 * entries, go back to wait for more entries.
6343 mutex_unlock(&iter->mutex);
6348 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6351 __free_page(spd->pages[idx]);
6355 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6361 /* Seq buffer is page-sized, exactly what we need. */
6363 save_len = iter->seq.seq.len;
6364 ret = print_trace_line(iter);
6366 if (trace_seq_has_overflowed(&iter->seq)) {
6367 iter->seq.seq.len = save_len;
6372 * This should not be hit, because it should only
6373 * be set if the iter->seq overflowed. But check it
6374 * anyway to be safe.
6376 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6377 iter->seq.seq.len = save_len;
6381 count = trace_seq_used(&iter->seq) - save_len;
6384 iter->seq.seq.len = save_len;
6388 if (ret != TRACE_TYPE_NO_CONSUME)
6389 trace_consume(iter);
6391 if (!trace_find_next_entry_inc(iter)) {
6401 static ssize_t tracing_splice_read_pipe(struct file *filp,
6403 struct pipe_inode_info *pipe,
6407 struct page *pages_def[PIPE_DEF_BUFFERS];
6408 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6409 struct trace_iterator *iter = filp->private_data;
6410 struct splice_pipe_desc spd = {
6412 .partial = partial_def,
6413 .nr_pages = 0, /* This gets updated below. */
6414 .nr_pages_max = PIPE_DEF_BUFFERS,
6415 .ops = &default_pipe_buf_ops,
6416 .spd_release = tracing_spd_release_pipe,
6422 if (splice_grow_spd(pipe, &spd))
6425 mutex_lock(&iter->mutex);
6427 if (iter->trace->splice_read) {
6428 ret = iter->trace->splice_read(iter, filp,
6429 ppos, pipe, len, flags);
6434 ret = tracing_wait_pipe(filp);
6438 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6443 trace_event_read_lock();
6444 trace_access_lock(iter->cpu_file);
6446 /* Fill as many pages as possible. */
6447 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6448 spd.pages[i] = alloc_page(GFP_KERNEL);
6452 rem = tracing_fill_pipe_page(rem, iter);
6454 /* Copy the data into the page, so we can start over. */
6455 ret = trace_seq_to_buffer(&iter->seq,
6456 page_address(spd.pages[i]),
6457 trace_seq_used(&iter->seq));
6459 __free_page(spd.pages[i]);
6462 spd.partial[i].offset = 0;
6463 spd.partial[i].len = trace_seq_used(&iter->seq);
6465 trace_seq_init(&iter->seq);
6468 trace_access_unlock(iter->cpu_file);
6469 trace_event_read_unlock();
6470 mutex_unlock(&iter->mutex);
6475 ret = splice_to_pipe(pipe, &spd);
6479 splice_shrink_spd(&spd);
6483 mutex_unlock(&iter->mutex);
6488 tracing_entries_read(struct file *filp, char __user *ubuf,
6489 size_t cnt, loff_t *ppos)
6491 struct inode *inode = file_inode(filp);
6492 struct trace_array *tr = inode->i_private;
6493 int cpu = tracing_get_cpu(inode);
6498 mutex_lock(&trace_types_lock);
6500 if (cpu == RING_BUFFER_ALL_CPUS) {
6501 int cpu, buf_size_same;
6506 /* check if all cpu sizes are same */
6507 for_each_tracing_cpu(cpu) {
6508 /* fill in the size from first enabled cpu */
6510 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6511 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6517 if (buf_size_same) {
6518 if (!ring_buffer_expanded)
6519 r = sprintf(buf, "%lu (expanded: %lu)\n",
6521 trace_buf_size >> 10);
6523 r = sprintf(buf, "%lu\n", size >> 10);
6525 r = sprintf(buf, "X\n");
6527 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6529 mutex_unlock(&trace_types_lock);
6531 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6536 tracing_entries_write(struct file *filp, const char __user *ubuf,
6537 size_t cnt, loff_t *ppos)
6539 struct inode *inode = file_inode(filp);
6540 struct trace_array *tr = inode->i_private;
6544 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6548 /* must have at least 1 entry */
6552 /* value is in KB */
6554 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6564 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6565 size_t cnt, loff_t *ppos)
6567 struct trace_array *tr = filp->private_data;
6570 unsigned long size = 0, expanded_size = 0;
6572 mutex_lock(&trace_types_lock);
6573 for_each_tracing_cpu(cpu) {
6574 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6575 if (!ring_buffer_expanded)
6576 expanded_size += trace_buf_size >> 10;
6578 if (ring_buffer_expanded)
6579 r = sprintf(buf, "%lu\n", size);
6581 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6582 mutex_unlock(&trace_types_lock);
6584 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6588 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6589 size_t cnt, loff_t *ppos)
6592 * There is no need to read what the user has written, this function
6593 * is just to make sure that there is no error when "echo" is used
6602 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6604 struct trace_array *tr = inode->i_private;
6606 /* disable tracing ? */
6607 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6608 tracer_tracing_off(tr);
6609 /* resize the ring buffer to 0 */
6610 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6612 trace_array_put(tr);
6618 tracing_mark_write(struct file *filp, const char __user *ubuf,
6619 size_t cnt, loff_t *fpos)
6621 struct trace_array *tr = filp->private_data;
6622 struct ring_buffer_event *event;
6623 enum event_trigger_type tt = ETT_NONE;
6624 struct trace_buffer *buffer;
6625 struct print_entry *entry;
6626 unsigned long irq_flags;
6631 /* Used in tracing_mark_raw_write() as well */
6632 #define FAULTED_STR "<faulted>"
6633 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6635 if (tracing_disabled)
6638 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6641 if (cnt > TRACE_BUF_SIZE)
6642 cnt = TRACE_BUF_SIZE;
6644 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6646 local_save_flags(irq_flags);
6647 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6649 /* If less than "<faulted>", then make sure we can still add that */
6650 if (cnt < FAULTED_SIZE)
6651 size += FAULTED_SIZE - cnt;
6653 buffer = tr->array_buffer.buffer;
6654 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6655 irq_flags, preempt_count());
6656 if (unlikely(!event))
6657 /* Ring buffer disabled, return as if not open for write */
6660 entry = ring_buffer_event_data(event);
6661 entry->ip = _THIS_IP_;
6663 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6665 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6672 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6673 /* do not add \n before testing triggers, but add \0 */
6674 entry->buf[cnt] = '\0';
6675 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6678 if (entry->buf[cnt - 1] != '\n') {
6679 entry->buf[cnt] = '\n';
6680 entry->buf[cnt + 1] = '\0';
6682 entry->buf[cnt] = '\0';
6684 __buffer_unlock_commit(buffer, event);
6687 event_triggers_post_call(tr->trace_marker_file, tt);
6695 /* Limit it for now to 3K (including tag) */
6696 #define RAW_DATA_MAX_SIZE (1024*3)
6699 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6700 size_t cnt, loff_t *fpos)
6702 struct trace_array *tr = filp->private_data;
6703 struct ring_buffer_event *event;
6704 struct trace_buffer *buffer;
6705 struct raw_data_entry *entry;
6706 unsigned long irq_flags;
6711 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6713 if (tracing_disabled)
6716 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6719 /* The marker must at least have a tag id */
6720 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6723 if (cnt > TRACE_BUF_SIZE)
6724 cnt = TRACE_BUF_SIZE;
6726 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6728 local_save_flags(irq_flags);
6729 size = sizeof(*entry) + cnt;
6730 if (cnt < FAULT_SIZE_ID)
6731 size += FAULT_SIZE_ID - cnt;
6733 buffer = tr->array_buffer.buffer;
6734 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6735 irq_flags, preempt_count());
6737 /* Ring buffer disabled, return as if not open for write */
6740 entry = ring_buffer_event_data(event);
6742 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6745 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6750 __buffer_unlock_commit(buffer, event);
6758 static int tracing_clock_show(struct seq_file *m, void *v)
6760 struct trace_array *tr = m->private;
6763 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6765 "%s%s%s%s", i ? " " : "",
6766 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6767 i == tr->clock_id ? "]" : "");
6773 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6777 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6778 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6781 if (i == ARRAY_SIZE(trace_clocks))
6784 mutex_lock(&trace_types_lock);
6788 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6791 * New clock may not be consistent with the previous clock.
6792 * Reset the buffer so that it doesn't have incomparable timestamps.
6794 tracing_reset_online_cpus(&tr->array_buffer);
6796 #ifdef CONFIG_TRACER_MAX_TRACE
6797 if (tr->max_buffer.buffer)
6798 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6799 tracing_reset_online_cpus(&tr->max_buffer);
6802 mutex_unlock(&trace_types_lock);
6807 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6808 size_t cnt, loff_t *fpos)
6810 struct seq_file *m = filp->private_data;
6811 struct trace_array *tr = m->private;
6813 const char *clockstr;
6816 if (cnt >= sizeof(buf))
6819 if (copy_from_user(buf, ubuf, cnt))
6824 clockstr = strstrip(buf);
6826 ret = tracing_set_clock(tr, clockstr);
6835 static int tracing_clock_open(struct inode *inode, struct file *file)
6837 struct trace_array *tr = inode->i_private;
6840 ret = tracing_check_open_get_tr(tr);
6844 ret = single_open(file, tracing_clock_show, inode->i_private);
6846 trace_array_put(tr);
6851 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6853 struct trace_array *tr = m->private;
6855 mutex_lock(&trace_types_lock);
6857 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6858 seq_puts(m, "delta [absolute]\n");
6860 seq_puts(m, "[delta] absolute\n");
6862 mutex_unlock(&trace_types_lock);
6867 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6869 struct trace_array *tr = inode->i_private;
6872 ret = tracing_check_open_get_tr(tr);
6876 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6878 trace_array_put(tr);
6883 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6887 mutex_lock(&trace_types_lock);
6889 if (abs && tr->time_stamp_abs_ref++)
6893 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6898 if (--tr->time_stamp_abs_ref)
6902 ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6904 #ifdef CONFIG_TRACER_MAX_TRACE
6905 if (tr->max_buffer.buffer)
6906 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6909 mutex_unlock(&trace_types_lock);
6914 struct ftrace_buffer_info {
6915 struct trace_iterator iter;
6917 unsigned int spare_cpu;
6921 #ifdef CONFIG_TRACER_SNAPSHOT
6922 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6924 struct trace_array *tr = inode->i_private;
6925 struct trace_iterator *iter;
6929 ret = tracing_check_open_get_tr(tr);
6933 if (file->f_mode & FMODE_READ) {
6934 iter = __tracing_open(inode, file, true);
6936 ret = PTR_ERR(iter);
6938 /* Writes still need the seq_file to hold the private data */
6940 m = kzalloc(sizeof(*m), GFP_KERNEL);
6943 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6951 iter->array_buffer = &tr->max_buffer;
6952 iter->cpu_file = tracing_get_cpu(inode);
6954 file->private_data = m;
6958 trace_array_put(tr);
6964 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6967 struct seq_file *m = filp->private_data;
6968 struct trace_iterator *iter = m->private;
6969 struct trace_array *tr = iter->tr;
6973 ret = tracing_update_buffers();
6977 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6981 mutex_lock(&trace_types_lock);
6983 if (tr->current_trace->use_max_tr) {
6988 arch_spin_lock(&tr->max_lock);
6989 if (tr->cond_snapshot)
6991 arch_spin_unlock(&tr->max_lock);
6997 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7001 if (tr->allocated_snapshot)
7005 /* Only allow per-cpu swap if the ring buffer supports it */
7006 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7007 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7012 if (tr->allocated_snapshot)
7013 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7014 &tr->array_buffer, iter->cpu_file);
7016 ret = tracing_alloc_snapshot_instance(tr);
7019 local_irq_disable();
7020 /* Now, we're going to swap */
7021 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7022 update_max_tr(tr, current, smp_processor_id(), NULL);
7024 update_max_tr_single(tr, current, iter->cpu_file);
7028 if (tr->allocated_snapshot) {
7029 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7030 tracing_reset_online_cpus(&tr->max_buffer);
7032 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7042 mutex_unlock(&trace_types_lock);
7046 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7048 struct seq_file *m = file->private_data;
7051 ret = tracing_release(inode, file);
7053 if (file->f_mode & FMODE_READ)
7056 /* If write only, the seq_file is just a stub */
7064 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7065 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7066 size_t count, loff_t *ppos);
7067 static int tracing_buffers_release(struct inode *inode, struct file *file);
7068 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7069 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7071 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7073 struct ftrace_buffer_info *info;
7076 /* The following checks for tracefs lockdown */
7077 ret = tracing_buffers_open(inode, filp);
7081 info = filp->private_data;
7083 if (info->iter.trace->use_max_tr) {
7084 tracing_buffers_release(inode, filp);
7088 info->iter.snapshot = true;
7089 info->iter.array_buffer = &info->iter.tr->max_buffer;
7094 #endif /* CONFIG_TRACER_SNAPSHOT */
7097 static const struct file_operations tracing_thresh_fops = {
7098 .open = tracing_open_generic,
7099 .read = tracing_thresh_read,
7100 .write = tracing_thresh_write,
7101 .llseek = generic_file_llseek,
7104 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7105 static const struct file_operations tracing_max_lat_fops = {
7106 .open = tracing_open_generic,
7107 .read = tracing_max_lat_read,
7108 .write = tracing_max_lat_write,
7109 .llseek = generic_file_llseek,
7113 static const struct file_operations set_tracer_fops = {
7114 .open = tracing_open_generic,
7115 .read = tracing_set_trace_read,
7116 .write = tracing_set_trace_write,
7117 .llseek = generic_file_llseek,
7120 static const struct file_operations tracing_pipe_fops = {
7121 .open = tracing_open_pipe,
7122 .poll = tracing_poll_pipe,
7123 .read = tracing_read_pipe,
7124 .splice_read = tracing_splice_read_pipe,
7125 .release = tracing_release_pipe,
7126 .llseek = no_llseek,
7129 static const struct file_operations tracing_entries_fops = {
7130 .open = tracing_open_generic_tr,
7131 .read = tracing_entries_read,
7132 .write = tracing_entries_write,
7133 .llseek = generic_file_llseek,
7134 .release = tracing_release_generic_tr,
7137 static const struct file_operations tracing_total_entries_fops = {
7138 .open = tracing_open_generic_tr,
7139 .read = tracing_total_entries_read,
7140 .llseek = generic_file_llseek,
7141 .release = tracing_release_generic_tr,
7144 static const struct file_operations tracing_free_buffer_fops = {
7145 .open = tracing_open_generic_tr,
7146 .write = tracing_free_buffer_write,
7147 .release = tracing_free_buffer_release,
7150 static const struct file_operations tracing_mark_fops = {
7151 .open = tracing_open_generic_tr,
7152 .write = tracing_mark_write,
7153 .llseek = generic_file_llseek,
7154 .release = tracing_release_generic_tr,
7157 static const struct file_operations tracing_mark_raw_fops = {
7158 .open = tracing_open_generic_tr,
7159 .write = tracing_mark_raw_write,
7160 .llseek = generic_file_llseek,
7161 .release = tracing_release_generic_tr,
7164 static const struct file_operations trace_clock_fops = {
7165 .open = tracing_clock_open,
7167 .llseek = seq_lseek,
7168 .release = tracing_single_release_tr,
7169 .write = tracing_clock_write,
7172 static const struct file_operations trace_time_stamp_mode_fops = {
7173 .open = tracing_time_stamp_mode_open,
7175 .llseek = seq_lseek,
7176 .release = tracing_single_release_tr,
7179 #ifdef CONFIG_TRACER_SNAPSHOT
7180 static const struct file_operations snapshot_fops = {
7181 .open = tracing_snapshot_open,
7183 .write = tracing_snapshot_write,
7184 .llseek = tracing_lseek,
7185 .release = tracing_snapshot_release,
7188 static const struct file_operations snapshot_raw_fops = {
7189 .open = snapshot_raw_open,
7190 .read = tracing_buffers_read,
7191 .release = tracing_buffers_release,
7192 .splice_read = tracing_buffers_splice_read,
7193 .llseek = no_llseek,
7196 #endif /* CONFIG_TRACER_SNAPSHOT */
7198 #define TRACING_LOG_ERRS_MAX 8
7199 #define TRACING_LOG_LOC_MAX 128
7201 #define CMD_PREFIX " Command: "
7204 const char **errs; /* ptr to loc-specific array of err strings */
7205 u8 type; /* index into errs -> specific err string */
7206 u8 pos; /* MAX_FILTER_STR_VAL = 256 */
7210 struct tracing_log_err {
7211 struct list_head list;
7212 struct err_info info;
7213 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7214 char cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7217 static DEFINE_MUTEX(tracing_err_log_lock);
7219 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7221 struct tracing_log_err *err;
7223 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7224 err = kzalloc(sizeof(*err), GFP_KERNEL);
7226 err = ERR_PTR(-ENOMEM);
7227 tr->n_err_log_entries++;
7232 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7233 list_del(&err->list);
7239 * err_pos - find the position of a string within a command for error careting
7240 * @cmd: The tracing command that caused the error
7241 * @str: The string to position the caret at within @cmd
7243 * Finds the position of the first occurence of @str within @cmd. The
7244 * return value can be passed to tracing_log_err() for caret placement
7247 * Returns the index within @cmd of the first occurence of @str or 0
7248 * if @str was not found.
7250 unsigned int err_pos(char *cmd, const char *str)
7254 if (WARN_ON(!strlen(cmd)))
7257 found = strstr(cmd, str);
7265 * tracing_log_err - write an error to the tracing error log
7266 * @tr: The associated trace array for the error (NULL for top level array)
7267 * @loc: A string describing where the error occurred
7268 * @cmd: The tracing command that caused the error
7269 * @errs: The array of loc-specific static error strings
7270 * @type: The index into errs[], which produces the specific static err string
7271 * @pos: The position the caret should be placed in the cmd
7273 * Writes an error into tracing/error_log of the form:
7275 * <loc>: error: <text>
7279 * tracing/error_log is a small log file containing the last
7280 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7281 * unless there has been a tracing error, and the error log can be
7282 * cleared and have its memory freed by writing the empty string in
7283 * truncation mode to it i.e. echo > tracing/error_log.
7285 * NOTE: the @errs array along with the @type param are used to
7286 * produce a static error string - this string is not copied and saved
7287 * when the error is logged - only a pointer to it is saved. See
7288 * existing callers for examples of how static strings are typically
7289 * defined for use with tracing_log_err().
7291 void tracing_log_err(struct trace_array *tr,
7292 const char *loc, const char *cmd,
7293 const char **errs, u8 type, u8 pos)
7295 struct tracing_log_err *err;
7300 mutex_lock(&tracing_err_log_lock);
7301 err = get_tracing_log_err(tr);
7302 if (PTR_ERR(err) == -ENOMEM) {
7303 mutex_unlock(&tracing_err_log_lock);
7307 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7308 snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7310 err->info.errs = errs;
7311 err->info.type = type;
7312 err->info.pos = pos;
7313 err->info.ts = local_clock();
7315 list_add_tail(&err->list, &tr->err_log);
7316 mutex_unlock(&tracing_err_log_lock);
7319 static void clear_tracing_err_log(struct trace_array *tr)
7321 struct tracing_log_err *err, *next;
7323 mutex_lock(&tracing_err_log_lock);
7324 list_for_each_entry_safe(err, next, &tr->err_log, list) {
7325 list_del(&err->list);
7329 tr->n_err_log_entries = 0;
7330 mutex_unlock(&tracing_err_log_lock);
7333 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7335 struct trace_array *tr = m->private;
7337 mutex_lock(&tracing_err_log_lock);
7339 return seq_list_start(&tr->err_log, *pos);
7342 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7344 struct trace_array *tr = m->private;
7346 return seq_list_next(v, &tr->err_log, pos);
7349 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7351 mutex_unlock(&tracing_err_log_lock);
7354 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7358 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7360 for (i = 0; i < pos; i++)
7365 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7367 struct tracing_log_err *err = v;
7370 const char *err_text = err->info.errs[err->info.type];
7371 u64 sec = err->info.ts;
7374 nsec = do_div(sec, NSEC_PER_SEC);
7375 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7376 err->loc, err_text);
7377 seq_printf(m, "%s", err->cmd);
7378 tracing_err_log_show_pos(m, err->info.pos);
7384 static const struct seq_operations tracing_err_log_seq_ops = {
7385 .start = tracing_err_log_seq_start,
7386 .next = tracing_err_log_seq_next,
7387 .stop = tracing_err_log_seq_stop,
7388 .show = tracing_err_log_seq_show
7391 static int tracing_err_log_open(struct inode *inode, struct file *file)
7393 struct trace_array *tr = inode->i_private;
7396 ret = tracing_check_open_get_tr(tr);
7400 /* If this file was opened for write, then erase contents */
7401 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7402 clear_tracing_err_log(tr);
7404 if (file->f_mode & FMODE_READ) {
7405 ret = seq_open(file, &tracing_err_log_seq_ops);
7407 struct seq_file *m = file->private_data;
7410 trace_array_put(tr);
7416 static ssize_t tracing_err_log_write(struct file *file,
7417 const char __user *buffer,
7418 size_t count, loff_t *ppos)
7423 static int tracing_err_log_release(struct inode *inode, struct file *file)
7425 struct trace_array *tr = inode->i_private;
7427 trace_array_put(tr);
7429 if (file->f_mode & FMODE_READ)
7430 seq_release(inode, file);
7435 static const struct file_operations tracing_err_log_fops = {
7436 .open = tracing_err_log_open,
7437 .write = tracing_err_log_write,
7439 .llseek = seq_lseek,
7440 .release = tracing_err_log_release,
7443 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7445 struct trace_array *tr = inode->i_private;
7446 struct ftrace_buffer_info *info;
7449 ret = tracing_check_open_get_tr(tr);
7453 info = kvzalloc(sizeof(*info), GFP_KERNEL);
7455 trace_array_put(tr);
7459 mutex_lock(&trace_types_lock);
7462 info->iter.cpu_file = tracing_get_cpu(inode);
7463 info->iter.trace = tr->current_trace;
7464 info->iter.array_buffer = &tr->array_buffer;
7466 /* Force reading ring buffer for first read */
7467 info->read = (unsigned int)-1;
7469 filp->private_data = info;
7473 mutex_unlock(&trace_types_lock);
7475 ret = nonseekable_open(inode, filp);
7477 trace_array_put(tr);
7483 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7485 struct ftrace_buffer_info *info = filp->private_data;
7486 struct trace_iterator *iter = &info->iter;
7488 return trace_poll(iter, filp, poll_table);
7492 tracing_buffers_read(struct file *filp, char __user *ubuf,
7493 size_t count, loff_t *ppos)
7495 struct ftrace_buffer_info *info = filp->private_data;
7496 struct trace_iterator *iter = &info->iter;
7503 #ifdef CONFIG_TRACER_MAX_TRACE
7504 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7509 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7511 if (IS_ERR(info->spare)) {
7512 ret = PTR_ERR(info->spare);
7515 info->spare_cpu = iter->cpu_file;
7521 /* Do we have previous read data to read? */
7522 if (info->read < PAGE_SIZE)
7526 trace_access_lock(iter->cpu_file);
7527 ret = ring_buffer_read_page(iter->array_buffer->buffer,
7531 trace_access_unlock(iter->cpu_file);
7534 if (trace_empty(iter)) {
7535 if ((filp->f_flags & O_NONBLOCK))
7538 ret = wait_on_pipe(iter, 0);
7549 size = PAGE_SIZE - info->read;
7553 ret = copy_to_user(ubuf, info->spare + info->read, size);
7565 static int tracing_buffers_release(struct inode *inode, struct file *file)
7567 struct ftrace_buffer_info *info = file->private_data;
7568 struct trace_iterator *iter = &info->iter;
7570 mutex_lock(&trace_types_lock);
7572 iter->tr->trace_ref--;
7574 __trace_array_put(iter->tr);
7577 ring_buffer_free_read_page(iter->array_buffer->buffer,
7578 info->spare_cpu, info->spare);
7581 mutex_unlock(&trace_types_lock);
7587 struct trace_buffer *buffer;
7590 refcount_t refcount;
7593 static void buffer_ref_release(struct buffer_ref *ref)
7595 if (!refcount_dec_and_test(&ref->refcount))
7597 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7601 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7602 struct pipe_buffer *buf)
7604 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7606 buffer_ref_release(ref);
7610 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7611 struct pipe_buffer *buf)
7613 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7615 if (refcount_read(&ref->refcount) > INT_MAX/2)
7618 refcount_inc(&ref->refcount);
7622 /* Pipe buffer operations for a buffer. */
7623 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7624 .release = buffer_pipe_buf_release,
7625 .get = buffer_pipe_buf_get,
7629 * Callback from splice_to_pipe(), if we need to release some pages
7630 * at the end of the spd in case we error'ed out in filling the pipe.
7632 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7634 struct buffer_ref *ref =
7635 (struct buffer_ref *)spd->partial[i].private;
7637 buffer_ref_release(ref);
7638 spd->partial[i].private = 0;
7642 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7643 struct pipe_inode_info *pipe, size_t len,
7646 struct ftrace_buffer_info *info = file->private_data;
7647 struct trace_iterator *iter = &info->iter;
7648 struct partial_page partial_def[PIPE_DEF_BUFFERS];
7649 struct page *pages_def[PIPE_DEF_BUFFERS];
7650 struct splice_pipe_desc spd = {
7652 .partial = partial_def,
7653 .nr_pages_max = PIPE_DEF_BUFFERS,
7654 .ops = &buffer_pipe_buf_ops,
7655 .spd_release = buffer_spd_release,
7657 struct buffer_ref *ref;
7661 #ifdef CONFIG_TRACER_MAX_TRACE
7662 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7666 if (*ppos & (PAGE_SIZE - 1))
7669 if (len & (PAGE_SIZE - 1)) {
7670 if (len < PAGE_SIZE)
7675 if (splice_grow_spd(pipe, &spd))
7679 trace_access_lock(iter->cpu_file);
7680 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7682 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7686 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7692 refcount_set(&ref->refcount, 1);
7693 ref->buffer = iter->array_buffer->buffer;
7694 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7695 if (IS_ERR(ref->page)) {
7696 ret = PTR_ERR(ref->page);
7701 ref->cpu = iter->cpu_file;
7703 r = ring_buffer_read_page(ref->buffer, &ref->page,
7704 len, iter->cpu_file, 1);
7706 ring_buffer_free_read_page(ref->buffer, ref->cpu,
7712 page = virt_to_page(ref->page);
7714 spd.pages[i] = page;
7715 spd.partial[i].len = PAGE_SIZE;
7716 spd.partial[i].offset = 0;
7717 spd.partial[i].private = (unsigned long)ref;
7721 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7724 trace_access_unlock(iter->cpu_file);
7727 /* did we read anything? */
7728 if (!spd.nr_pages) {
7733 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7736 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7743 ret = splice_to_pipe(pipe, &spd);
7745 splice_shrink_spd(&spd);
7750 static const struct file_operations tracing_buffers_fops = {
7751 .open = tracing_buffers_open,
7752 .read = tracing_buffers_read,
7753 .poll = tracing_buffers_poll,
7754 .release = tracing_buffers_release,
7755 .splice_read = tracing_buffers_splice_read,
7756 .llseek = no_llseek,
7760 tracing_stats_read(struct file *filp, char __user *ubuf,
7761 size_t count, loff_t *ppos)
7763 struct inode *inode = file_inode(filp);
7764 struct trace_array *tr = inode->i_private;
7765 struct array_buffer *trace_buf = &tr->array_buffer;
7766 int cpu = tracing_get_cpu(inode);
7767 struct trace_seq *s;
7769 unsigned long long t;
7770 unsigned long usec_rem;
7772 s = kmalloc(sizeof(*s), GFP_KERNEL);
7778 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7779 trace_seq_printf(s, "entries: %ld\n", cnt);
7781 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7782 trace_seq_printf(s, "overrun: %ld\n", cnt);
7784 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7785 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7787 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7788 trace_seq_printf(s, "bytes: %ld\n", cnt);
7790 if (trace_clocks[tr->clock_id].in_ns) {
7791 /* local or global for trace_clock */
7792 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7793 usec_rem = do_div(t, USEC_PER_SEC);
7794 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7797 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7798 usec_rem = do_div(t, USEC_PER_SEC);
7799 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7801 /* counter or tsc mode for trace_clock */
7802 trace_seq_printf(s, "oldest event ts: %llu\n",
7803 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7805 trace_seq_printf(s, "now ts: %llu\n",
7806 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7809 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7810 trace_seq_printf(s, "dropped events: %ld\n", cnt);
7812 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7813 trace_seq_printf(s, "read events: %ld\n", cnt);
7815 count = simple_read_from_buffer(ubuf, count, ppos,
7816 s->buffer, trace_seq_used(s));
7823 static const struct file_operations tracing_stats_fops = {
7824 .open = tracing_open_generic_tr,
7825 .read = tracing_stats_read,
7826 .llseek = generic_file_llseek,
7827 .release = tracing_release_generic_tr,
7830 #ifdef CONFIG_DYNAMIC_FTRACE
7833 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7834 size_t cnt, loff_t *ppos)
7840 /* 256 should be plenty to hold the amount needed */
7841 buf = kmalloc(256, GFP_KERNEL);
7845 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7846 ftrace_update_tot_cnt,
7847 ftrace_number_of_pages,
7848 ftrace_number_of_groups);
7850 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7855 static const struct file_operations tracing_dyn_info_fops = {
7856 .open = tracing_open_generic,
7857 .read = tracing_read_dyn_info,
7858 .llseek = generic_file_llseek,
7860 #endif /* CONFIG_DYNAMIC_FTRACE */
7862 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7864 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7865 struct trace_array *tr, struct ftrace_probe_ops *ops,
7868 tracing_snapshot_instance(tr);
7872 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7873 struct trace_array *tr, struct ftrace_probe_ops *ops,
7876 struct ftrace_func_mapper *mapper = data;
7880 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7890 tracing_snapshot_instance(tr);
7894 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7895 struct ftrace_probe_ops *ops, void *data)
7897 struct ftrace_func_mapper *mapper = data;
7900 seq_printf(m, "%ps:", (void *)ip);
7902 seq_puts(m, "snapshot");
7905 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7908 seq_printf(m, ":count=%ld\n", *count);
7910 seq_puts(m, ":unlimited\n");
7916 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7917 unsigned long ip, void *init_data, void **data)
7919 struct ftrace_func_mapper *mapper = *data;
7922 mapper = allocate_ftrace_func_mapper();
7928 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7932 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7933 unsigned long ip, void *data)
7935 struct ftrace_func_mapper *mapper = data;
7940 free_ftrace_func_mapper(mapper, NULL);
7944 ftrace_func_mapper_remove_ip(mapper, ip);
7947 static struct ftrace_probe_ops snapshot_probe_ops = {
7948 .func = ftrace_snapshot,
7949 .print = ftrace_snapshot_print,
7952 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7953 .func = ftrace_count_snapshot,
7954 .print = ftrace_snapshot_print,
7955 .init = ftrace_snapshot_init,
7956 .free = ftrace_snapshot_free,
7960 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7961 char *glob, char *cmd, char *param, int enable)
7963 struct ftrace_probe_ops *ops;
7964 void *count = (void *)-1;
7971 /* hash funcs only work with set_ftrace_filter */
7975 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
7978 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7983 number = strsep(¶m, ":");
7985 if (!strlen(number))
7989 * We use the callback data field (which is a pointer)
7992 ret = kstrtoul(number, 0, (unsigned long *)&count);
7997 ret = tracing_alloc_snapshot_instance(tr);
8001 ret = register_ftrace_function_probe(glob, tr, ops, count);
8004 return ret < 0 ? ret : 0;
8007 static struct ftrace_func_command ftrace_snapshot_cmd = {
8009 .func = ftrace_trace_snapshot_callback,
8012 static __init int register_snapshot_cmd(void)
8014 return register_ftrace_command(&ftrace_snapshot_cmd);
8017 static inline __init int register_snapshot_cmd(void) { return 0; }
8018 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8020 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8022 if (WARN_ON(!tr->dir))
8023 return ERR_PTR(-ENODEV);
8025 /* Top directory uses NULL as the parent */
8026 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8029 /* All sub buffers have a descriptor */
8033 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8035 struct dentry *d_tracer;
8038 return tr->percpu_dir;
8040 d_tracer = tracing_get_dentry(tr);
8041 if (IS_ERR(d_tracer))
8044 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8046 MEM_FAIL(!tr->percpu_dir,
8047 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8049 return tr->percpu_dir;
8052 static struct dentry *
8053 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8054 void *data, long cpu, const struct file_operations *fops)
8056 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8058 if (ret) /* See tracing_get_cpu() */
8059 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8064 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8066 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8067 struct dentry *d_cpu;
8068 char cpu_dir[30]; /* 30 characters should be more than enough */
8073 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8074 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8076 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8080 /* per cpu trace_pipe */
8081 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8082 tr, cpu, &tracing_pipe_fops);
8085 trace_create_cpu_file("trace", 0644, d_cpu,
8086 tr, cpu, &tracing_fops);
8088 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8089 tr, cpu, &tracing_buffers_fops);
8091 trace_create_cpu_file("stats", 0444, d_cpu,
8092 tr, cpu, &tracing_stats_fops);
8094 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8095 tr, cpu, &tracing_entries_fops);
8097 #ifdef CONFIG_TRACER_SNAPSHOT
8098 trace_create_cpu_file("snapshot", 0644, d_cpu,
8099 tr, cpu, &snapshot_fops);
8101 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8102 tr, cpu, &snapshot_raw_fops);
8106 #ifdef CONFIG_FTRACE_SELFTEST
8107 /* Let selftest have access to static functions in this file */
8108 #include "trace_selftest.c"
8112 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8115 struct trace_option_dentry *topt = filp->private_data;
8118 if (topt->flags->val & topt->opt->bit)
8123 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8127 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8130 struct trace_option_dentry *topt = filp->private_data;
8134 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8138 if (val != 0 && val != 1)
8141 if (!!(topt->flags->val & topt->opt->bit) != val) {
8142 mutex_lock(&trace_types_lock);
8143 ret = __set_tracer_option(topt->tr, topt->flags,
8145 mutex_unlock(&trace_types_lock);
8156 static const struct file_operations trace_options_fops = {
8157 .open = tracing_open_generic,
8158 .read = trace_options_read,
8159 .write = trace_options_write,
8160 .llseek = generic_file_llseek,
8164 * In order to pass in both the trace_array descriptor as well as the index
8165 * to the flag that the trace option file represents, the trace_array
8166 * has a character array of trace_flags_index[], which holds the index
8167 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8168 * The address of this character array is passed to the flag option file
8169 * read/write callbacks.
8171 * In order to extract both the index and the trace_array descriptor,
8172 * get_tr_index() uses the following algorithm.
8176 * As the pointer itself contains the address of the index (remember
8179 * Then to get the trace_array descriptor, by subtracting that index
8180 * from the ptr, we get to the start of the index itself.
8182 * ptr - idx == &index[0]
8184 * Then a simple container_of() from that pointer gets us to the
8185 * trace_array descriptor.
8187 static void get_tr_index(void *data, struct trace_array **ptr,
8188 unsigned int *pindex)
8190 *pindex = *(unsigned char *)data;
8192 *ptr = container_of(data - *pindex, struct trace_array,
8197 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8200 void *tr_index = filp->private_data;
8201 struct trace_array *tr;
8205 get_tr_index(tr_index, &tr, &index);
8207 if (tr->trace_flags & (1 << index))
8212 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8216 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8219 void *tr_index = filp->private_data;
8220 struct trace_array *tr;
8225 get_tr_index(tr_index, &tr, &index);
8227 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8231 if (val != 0 && val != 1)
8234 mutex_lock(&event_mutex);
8235 mutex_lock(&trace_types_lock);
8236 ret = set_tracer_flag(tr, 1 << index, val);
8237 mutex_unlock(&trace_types_lock);
8238 mutex_unlock(&event_mutex);
8248 static const struct file_operations trace_options_core_fops = {
8249 .open = tracing_open_generic,
8250 .read = trace_options_core_read,
8251 .write = trace_options_core_write,
8252 .llseek = generic_file_llseek,
8255 struct dentry *trace_create_file(const char *name,
8257 struct dentry *parent,
8259 const struct file_operations *fops)
8263 ret = tracefs_create_file(name, mode, parent, data, fops);
8265 pr_warn("Could not create tracefs '%s' entry\n", name);
8271 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8273 struct dentry *d_tracer;
8278 d_tracer = tracing_get_dentry(tr);
8279 if (IS_ERR(d_tracer))
8282 tr->options = tracefs_create_dir("options", d_tracer);
8284 pr_warn("Could not create tracefs directory 'options'\n");
8292 create_trace_option_file(struct trace_array *tr,
8293 struct trace_option_dentry *topt,
8294 struct tracer_flags *flags,
8295 struct tracer_opt *opt)
8297 struct dentry *t_options;
8299 t_options = trace_options_init_dentry(tr);
8303 topt->flags = flags;
8307 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8308 &trace_options_fops);
8313 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8315 struct trace_option_dentry *topts;
8316 struct trace_options *tr_topts;
8317 struct tracer_flags *flags;
8318 struct tracer_opt *opts;
8325 flags = tracer->flags;
8327 if (!flags || !flags->opts)
8331 * If this is an instance, only create flags for tracers
8332 * the instance may have.
8334 if (!trace_ok_for_array(tracer, tr))
8337 for (i = 0; i < tr->nr_topts; i++) {
8338 /* Make sure there's no duplicate flags. */
8339 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8345 for (cnt = 0; opts[cnt].name; cnt++)
8348 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8352 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8359 tr->topts = tr_topts;
8360 tr->topts[tr->nr_topts].tracer = tracer;
8361 tr->topts[tr->nr_topts].topts = topts;
8364 for (cnt = 0; opts[cnt].name; cnt++) {
8365 create_trace_option_file(tr, &topts[cnt], flags,
8367 MEM_FAIL(topts[cnt].entry == NULL,
8368 "Failed to create trace option: %s",
8373 static struct dentry *
8374 create_trace_option_core_file(struct trace_array *tr,
8375 const char *option, long index)
8377 struct dentry *t_options;
8379 t_options = trace_options_init_dentry(tr);
8383 return trace_create_file(option, 0644, t_options,
8384 (void *)&tr->trace_flags_index[index],
8385 &trace_options_core_fops);
8388 static void create_trace_options_dir(struct trace_array *tr)
8390 struct dentry *t_options;
8391 bool top_level = tr == &global_trace;
8394 t_options = trace_options_init_dentry(tr);
8398 for (i = 0; trace_options[i]; i++) {
8400 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8401 create_trace_option_core_file(tr, trace_options[i], i);
8406 rb_simple_read(struct file *filp, char __user *ubuf,
8407 size_t cnt, loff_t *ppos)
8409 struct trace_array *tr = filp->private_data;
8413 r = tracer_tracing_is_on(tr);
8414 r = sprintf(buf, "%d\n", r);
8416 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8420 rb_simple_write(struct file *filp, const char __user *ubuf,
8421 size_t cnt, loff_t *ppos)
8423 struct trace_array *tr = filp->private_data;
8424 struct trace_buffer *buffer = tr->array_buffer.buffer;
8428 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8433 mutex_lock(&trace_types_lock);
8434 if (!!val == tracer_tracing_is_on(tr)) {
8435 val = 0; /* do nothing */
8437 tracer_tracing_on(tr);
8438 if (tr->current_trace->start)
8439 tr->current_trace->start(tr);
8441 tracer_tracing_off(tr);
8442 if (tr->current_trace->stop)
8443 tr->current_trace->stop(tr);
8445 mutex_unlock(&trace_types_lock);
8453 static const struct file_operations rb_simple_fops = {
8454 .open = tracing_open_generic_tr,
8455 .read = rb_simple_read,
8456 .write = rb_simple_write,
8457 .release = tracing_release_generic_tr,
8458 .llseek = default_llseek,
8462 buffer_percent_read(struct file *filp, char __user *ubuf,
8463 size_t cnt, loff_t *ppos)
8465 struct trace_array *tr = filp->private_data;
8469 r = tr->buffer_percent;
8470 r = sprintf(buf, "%d\n", r);
8472 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8476 buffer_percent_write(struct file *filp, const char __user *ubuf,
8477 size_t cnt, loff_t *ppos)
8479 struct trace_array *tr = filp->private_data;
8483 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8493 tr->buffer_percent = val;
8500 static const struct file_operations buffer_percent_fops = {
8501 .open = tracing_open_generic_tr,
8502 .read = buffer_percent_read,
8503 .write = buffer_percent_write,
8504 .release = tracing_release_generic_tr,
8505 .llseek = default_llseek,
8508 static struct dentry *trace_instance_dir;
8511 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8514 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8516 enum ring_buffer_flags rb_flags;
8518 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8522 buf->buffer = ring_buffer_alloc(size, rb_flags);
8526 buf->data = alloc_percpu(struct trace_array_cpu);
8528 ring_buffer_free(buf->buffer);
8533 /* Allocate the first page for all buffers */
8534 set_buffer_entries(&tr->array_buffer,
8535 ring_buffer_size(tr->array_buffer.buffer, 0));
8540 static int allocate_trace_buffers(struct trace_array *tr, int size)
8544 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8548 #ifdef CONFIG_TRACER_MAX_TRACE
8549 ret = allocate_trace_buffer(tr, &tr->max_buffer,
8550 allocate_snapshot ? size : 1);
8551 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8552 ring_buffer_free(tr->array_buffer.buffer);
8553 tr->array_buffer.buffer = NULL;
8554 free_percpu(tr->array_buffer.data);
8555 tr->array_buffer.data = NULL;
8558 tr->allocated_snapshot = allocate_snapshot;
8561 * Only the top level trace array gets its snapshot allocated
8562 * from the kernel command line.
8564 allocate_snapshot = false;
8570 static void free_trace_buffer(struct array_buffer *buf)
8573 ring_buffer_free(buf->buffer);
8575 free_percpu(buf->data);
8580 static void free_trace_buffers(struct trace_array *tr)
8585 free_trace_buffer(&tr->array_buffer);
8587 #ifdef CONFIG_TRACER_MAX_TRACE
8588 free_trace_buffer(&tr->max_buffer);
8592 static void init_trace_flags_index(struct trace_array *tr)
8596 /* Used by the trace options files */
8597 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8598 tr->trace_flags_index[i] = i;
8601 static void __update_tracer_options(struct trace_array *tr)
8605 for (t = trace_types; t; t = t->next)
8606 add_tracer_options(tr, t);
8609 static void update_tracer_options(struct trace_array *tr)
8611 mutex_lock(&trace_types_lock);
8612 __update_tracer_options(tr);
8613 mutex_unlock(&trace_types_lock);
8616 /* Must have trace_types_lock held */
8617 struct trace_array *trace_array_find(const char *instance)
8619 struct trace_array *tr, *found = NULL;
8621 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8622 if (tr->name && strcmp(tr->name, instance) == 0) {
8631 struct trace_array *trace_array_find_get(const char *instance)
8633 struct trace_array *tr;
8635 mutex_lock(&trace_types_lock);
8636 tr = trace_array_find(instance);
8639 mutex_unlock(&trace_types_lock);
8644 static int trace_array_create_dir(struct trace_array *tr)
8648 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8652 ret = event_trace_add_tracer(tr->dir, tr);
8654 tracefs_remove(tr->dir);
8656 init_tracer_tracefs(tr, tr->dir);
8657 __update_tracer_options(tr);
8662 static struct trace_array *trace_array_create(const char *name)
8664 struct trace_array *tr;
8668 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8670 return ERR_PTR(ret);
8672 tr->name = kstrdup(name, GFP_KERNEL);
8676 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8679 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8681 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8683 raw_spin_lock_init(&tr->start_lock);
8685 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8687 tr->current_trace = &nop_trace;
8689 INIT_LIST_HEAD(&tr->systems);
8690 INIT_LIST_HEAD(&tr->events);
8691 INIT_LIST_HEAD(&tr->hist_vars);
8692 INIT_LIST_HEAD(&tr->err_log);
8694 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8697 if (ftrace_allocate_ftrace_ops(tr) < 0)
8700 ftrace_init_trace_array(tr);
8702 init_trace_flags_index(tr);
8704 if (trace_instance_dir) {
8705 ret = trace_array_create_dir(tr);
8709 __trace_early_add_events(tr);
8711 list_add(&tr->list, &ftrace_trace_arrays);
8718 ftrace_free_ftrace_ops(tr);
8719 free_trace_buffers(tr);
8720 free_cpumask_var(tr->tracing_cpumask);
8724 return ERR_PTR(ret);
8727 static int instance_mkdir(const char *name)
8729 struct trace_array *tr;
8732 mutex_lock(&event_mutex);
8733 mutex_lock(&trace_types_lock);
8736 if (trace_array_find(name))
8739 tr = trace_array_create(name);
8741 ret = PTR_ERR_OR_ZERO(tr);
8744 mutex_unlock(&trace_types_lock);
8745 mutex_unlock(&event_mutex);
8750 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8751 * @name: The name of the trace array to be looked up/created.
8753 * Returns pointer to trace array with given name.
8754 * NULL, if it cannot be created.
8756 * NOTE: This function increments the reference counter associated with the
8757 * trace array returned. This makes sure it cannot be freed while in use.
8758 * Use trace_array_put() once the trace array is no longer needed.
8759 * If the trace_array is to be freed, trace_array_destroy() needs to
8760 * be called after the trace_array_put(), or simply let user space delete
8761 * it from the tracefs instances directory. But until the
8762 * trace_array_put() is called, user space can not delete it.
8765 struct trace_array *trace_array_get_by_name(const char *name)
8767 struct trace_array *tr;
8769 mutex_lock(&event_mutex);
8770 mutex_lock(&trace_types_lock);
8772 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8773 if (tr->name && strcmp(tr->name, name) == 0)
8777 tr = trace_array_create(name);
8785 mutex_unlock(&trace_types_lock);
8786 mutex_unlock(&event_mutex);
8789 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8791 static int __remove_instance(struct trace_array *tr)
8795 /* Reference counter for a newly created trace array = 1. */
8796 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8799 list_del(&tr->list);
8801 /* Disable all the flags that were enabled coming in */
8802 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8803 if ((1 << i) & ZEROED_TRACE_FLAGS)
8804 set_tracer_flag(tr, 1 << i, 0);
8807 tracing_set_nop(tr);
8808 clear_ftrace_function_probes(tr);
8809 event_trace_del_tracer(tr);
8810 ftrace_clear_pids(tr);
8811 ftrace_destroy_function_files(tr);
8812 tracefs_remove(tr->dir);
8813 free_trace_buffers(tr);
8815 for (i = 0; i < tr->nr_topts; i++) {
8816 kfree(tr->topts[i].topts);
8820 free_cpumask_var(tr->tracing_cpumask);
8827 int trace_array_destroy(struct trace_array *this_tr)
8829 struct trace_array *tr;
8835 mutex_lock(&event_mutex);
8836 mutex_lock(&trace_types_lock);
8840 /* Making sure trace array exists before destroying it. */
8841 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8842 if (tr == this_tr) {
8843 ret = __remove_instance(tr);
8848 mutex_unlock(&trace_types_lock);
8849 mutex_unlock(&event_mutex);
8853 EXPORT_SYMBOL_GPL(trace_array_destroy);
8855 static int instance_rmdir(const char *name)
8857 struct trace_array *tr;
8860 mutex_lock(&event_mutex);
8861 mutex_lock(&trace_types_lock);
8864 tr = trace_array_find(name);
8866 ret = __remove_instance(tr);
8868 mutex_unlock(&trace_types_lock);
8869 mutex_unlock(&event_mutex);
8874 static __init void create_trace_instances(struct dentry *d_tracer)
8876 struct trace_array *tr;
8878 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8881 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8884 mutex_lock(&event_mutex);
8885 mutex_lock(&trace_types_lock);
8887 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8890 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
8891 "Failed to create instance directory\n"))
8895 mutex_unlock(&trace_types_lock);
8896 mutex_unlock(&event_mutex);
8900 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8902 struct trace_event_file *file;
8905 trace_create_file("available_tracers", 0444, d_tracer,
8906 tr, &show_traces_fops);
8908 trace_create_file("current_tracer", 0644, d_tracer,
8909 tr, &set_tracer_fops);
8911 trace_create_file("tracing_cpumask", 0644, d_tracer,
8912 tr, &tracing_cpumask_fops);
8914 trace_create_file("trace_options", 0644, d_tracer,
8915 tr, &tracing_iter_fops);
8917 trace_create_file("trace", 0644, d_tracer,
8920 trace_create_file("trace_pipe", 0444, d_tracer,
8921 tr, &tracing_pipe_fops);
8923 trace_create_file("buffer_size_kb", 0644, d_tracer,
8924 tr, &tracing_entries_fops);
8926 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8927 tr, &tracing_total_entries_fops);
8929 trace_create_file("free_buffer", 0200, d_tracer,
8930 tr, &tracing_free_buffer_fops);
8932 trace_create_file("trace_marker", 0220, d_tracer,
8933 tr, &tracing_mark_fops);
8935 file = __find_event_file(tr, "ftrace", "print");
8936 if (file && file->dir)
8937 trace_create_file("trigger", 0644, file->dir, file,
8938 &event_trigger_fops);
8939 tr->trace_marker_file = file;
8941 trace_create_file("trace_marker_raw", 0220, d_tracer,
8942 tr, &tracing_mark_raw_fops);
8944 trace_create_file("trace_clock", 0644, d_tracer, tr,
8947 trace_create_file("tracing_on", 0644, d_tracer,
8948 tr, &rb_simple_fops);
8950 trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8951 &trace_time_stamp_mode_fops);
8953 tr->buffer_percent = 50;
8955 trace_create_file("buffer_percent", 0444, d_tracer,
8956 tr, &buffer_percent_fops);
8958 create_trace_options_dir(tr);
8960 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8961 trace_create_maxlat_file(tr, d_tracer);
8964 if (ftrace_create_function_files(tr, d_tracer))
8965 MEM_FAIL(1, "Could not allocate function filter files");
8967 #ifdef CONFIG_TRACER_SNAPSHOT
8968 trace_create_file("snapshot", 0644, d_tracer,
8969 tr, &snapshot_fops);
8972 trace_create_file("error_log", 0644, d_tracer,
8973 tr, &tracing_err_log_fops);
8975 for_each_tracing_cpu(cpu)
8976 tracing_init_tracefs_percpu(tr, cpu);
8978 ftrace_init_tracefs(tr, d_tracer);
8981 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8983 struct vfsmount *mnt;
8984 struct file_system_type *type;
8987 * To maintain backward compatibility for tools that mount
8988 * debugfs to get to the tracing facility, tracefs is automatically
8989 * mounted to the debugfs/tracing directory.
8991 type = get_fs_type("tracefs");
8994 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8995 put_filesystem(type);
9004 * tracing_init_dentry - initialize top level trace array
9006 * This is called when creating files or directories in the tracing
9007 * directory. It is called via fs_initcall() by any of the boot up code
9008 * and expects to return the dentry of the top level tracing directory.
9010 int tracing_init_dentry(void)
9012 struct trace_array *tr = &global_trace;
9014 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9015 pr_warn("Tracing disabled due to lockdown\n");
9019 /* The top level trace array uses NULL as parent */
9023 if (WARN_ON(!tracefs_initialized()))
9027 * As there may still be users that expect the tracing
9028 * files to exist in debugfs/tracing, we must automount
9029 * the tracefs file system there, so older tools still
9030 * work with the newer kerenl.
9032 tr->dir = debugfs_create_automount("tracing", NULL,
9033 trace_automount, NULL);
9038 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9039 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9041 static void __init trace_eval_init(void)
9045 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9046 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9049 #ifdef CONFIG_MODULES
9050 static void trace_module_add_evals(struct module *mod)
9052 if (!mod->num_trace_evals)
9056 * Modules with bad taint do not have events created, do
9057 * not bother with enums either.
9059 if (trace_module_has_bad_taint(mod))
9062 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9065 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9066 static void trace_module_remove_evals(struct module *mod)
9068 union trace_eval_map_item *map;
9069 union trace_eval_map_item **last = &trace_eval_maps;
9071 if (!mod->num_trace_evals)
9074 mutex_lock(&trace_eval_mutex);
9076 map = trace_eval_maps;
9079 if (map->head.mod == mod)
9081 map = trace_eval_jmp_to_tail(map);
9082 last = &map->tail.next;
9083 map = map->tail.next;
9088 *last = trace_eval_jmp_to_tail(map)->tail.next;
9091 mutex_unlock(&trace_eval_mutex);
9094 static inline void trace_module_remove_evals(struct module *mod) { }
9095 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9097 static int trace_module_notify(struct notifier_block *self,
9098 unsigned long val, void *data)
9100 struct module *mod = data;
9103 case MODULE_STATE_COMING:
9104 trace_module_add_evals(mod);
9106 case MODULE_STATE_GOING:
9107 trace_module_remove_evals(mod);
9114 static struct notifier_block trace_module_nb = {
9115 .notifier_call = trace_module_notify,
9118 #endif /* CONFIG_MODULES */
9120 static __init int tracer_init_tracefs(void)
9124 trace_access_lock_init();
9126 ret = tracing_init_dentry();
9132 init_tracer_tracefs(&global_trace, NULL);
9133 ftrace_init_tracefs_toplevel(&global_trace, NULL);
9135 trace_create_file("tracing_thresh", 0644, NULL,
9136 &global_trace, &tracing_thresh_fops);
9138 trace_create_file("README", 0444, NULL,
9139 NULL, &tracing_readme_fops);
9141 trace_create_file("saved_cmdlines", 0444, NULL,
9142 NULL, &tracing_saved_cmdlines_fops);
9144 trace_create_file("saved_cmdlines_size", 0644, NULL,
9145 NULL, &tracing_saved_cmdlines_size_fops);
9147 trace_create_file("saved_tgids", 0444, NULL,
9148 NULL, &tracing_saved_tgids_fops);
9152 trace_create_eval_file(NULL);
9154 #ifdef CONFIG_MODULES
9155 register_module_notifier(&trace_module_nb);
9158 #ifdef CONFIG_DYNAMIC_FTRACE
9159 trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9160 NULL, &tracing_dyn_info_fops);
9163 create_trace_instances(NULL);
9165 update_tracer_options(&global_trace);
9170 static int trace_panic_handler(struct notifier_block *this,
9171 unsigned long event, void *unused)
9173 if (ftrace_dump_on_oops)
9174 ftrace_dump(ftrace_dump_on_oops);
9178 static struct notifier_block trace_panic_notifier = {
9179 .notifier_call = trace_panic_handler,
9181 .priority = 150 /* priority: INT_MAX >= x >= 0 */
9184 static int trace_die_handler(struct notifier_block *self,
9190 if (ftrace_dump_on_oops)
9191 ftrace_dump(ftrace_dump_on_oops);
9199 static struct notifier_block trace_die_notifier = {
9200 .notifier_call = trace_die_handler,
9205 * printk is set to max of 1024, we really don't need it that big.
9206 * Nothing should be printing 1000 characters anyway.
9208 #define TRACE_MAX_PRINT 1000
9211 * Define here KERN_TRACE so that we have one place to modify
9212 * it if we decide to change what log level the ftrace dump
9215 #define KERN_TRACE KERN_EMERG
9218 trace_printk_seq(struct trace_seq *s)
9220 /* Probably should print a warning here. */
9221 if (s->seq.len >= TRACE_MAX_PRINT)
9222 s->seq.len = TRACE_MAX_PRINT;
9225 * More paranoid code. Although the buffer size is set to
9226 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9227 * an extra layer of protection.
9229 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9230 s->seq.len = s->seq.size - 1;
9232 /* should be zero ended, but we are paranoid. */
9233 s->buffer[s->seq.len] = 0;
9235 printk(KERN_TRACE "%s", s->buffer);
9240 void trace_init_global_iter(struct trace_iterator *iter)
9242 iter->tr = &global_trace;
9243 iter->trace = iter->tr->current_trace;
9244 iter->cpu_file = RING_BUFFER_ALL_CPUS;
9245 iter->array_buffer = &global_trace.array_buffer;
9247 if (iter->trace && iter->trace->open)
9248 iter->trace->open(iter);
9250 /* Annotate start of buffers if we had overruns */
9251 if (ring_buffer_overruns(iter->array_buffer->buffer))
9252 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9254 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9255 if (trace_clocks[iter->tr->clock_id].in_ns)
9256 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9259 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9261 /* use static because iter can be a bit big for the stack */
9262 static struct trace_iterator iter;
9263 static atomic_t dump_running;
9264 struct trace_array *tr = &global_trace;
9265 unsigned int old_userobj;
9266 unsigned long flags;
9269 /* Only allow one dump user at a time. */
9270 if (atomic_inc_return(&dump_running) != 1) {
9271 atomic_dec(&dump_running);
9276 * Always turn off tracing when we dump.
9277 * We don't need to show trace output of what happens
9278 * between multiple crashes.
9280 * If the user does a sysrq-z, then they can re-enable
9281 * tracing with echo 1 > tracing_on.
9285 local_irq_save(flags);
9286 printk_nmi_direct_enter();
9288 /* Simulate the iterator */
9289 trace_init_global_iter(&iter);
9290 /* Can not use kmalloc for iter.temp */
9291 iter.temp = static_temp_buf;
9292 iter.temp_size = STATIC_TEMP_BUF_SIZE;
9294 for_each_tracing_cpu(cpu) {
9295 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9298 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9300 /* don't look at user memory in panic mode */
9301 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9303 switch (oops_dump_mode) {
9305 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9308 iter.cpu_file = raw_smp_processor_id();
9313 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9314 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9317 printk(KERN_TRACE "Dumping ftrace buffer:\n");
9319 /* Did function tracer already get disabled? */
9320 if (ftrace_is_dead()) {
9321 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9322 printk("# MAY BE MISSING FUNCTION EVENTS\n");
9326 * We need to stop all tracing on all CPUS to read
9327 * the next buffer. This is a bit expensive, but is
9328 * not done often. We fill all what we can read,
9329 * and then release the locks again.
9332 while (!trace_empty(&iter)) {
9335 printk(KERN_TRACE "---------------------------------\n");
9339 trace_iterator_reset(&iter);
9340 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9342 if (trace_find_next_entry_inc(&iter) != NULL) {
9345 ret = print_trace_line(&iter);
9346 if (ret != TRACE_TYPE_NO_CONSUME)
9347 trace_consume(&iter);
9349 touch_nmi_watchdog();
9351 trace_printk_seq(&iter.seq);
9355 printk(KERN_TRACE " (ftrace buffer empty)\n");
9357 printk(KERN_TRACE "---------------------------------\n");
9360 tr->trace_flags |= old_userobj;
9362 for_each_tracing_cpu(cpu) {
9363 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9365 atomic_dec(&dump_running);
9366 printk_nmi_direct_exit();
9367 local_irq_restore(flags);
9369 EXPORT_SYMBOL_GPL(ftrace_dump);
9371 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9378 argv = argv_split(GFP_KERNEL, buf, &argc);
9383 ret = createfn(argc, argv);
9390 #define WRITE_BUFSIZE 4096
9392 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9393 size_t count, loff_t *ppos,
9394 int (*createfn)(int, char **))
9396 char *kbuf, *buf, *tmp;
9401 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9405 while (done < count) {
9406 size = count - done;
9408 if (size >= WRITE_BUFSIZE)
9409 size = WRITE_BUFSIZE - 1;
9411 if (copy_from_user(kbuf, buffer + done, size)) {
9418 tmp = strchr(buf, '\n');
9421 size = tmp - buf + 1;
9424 if (done + size < count) {
9427 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9428 pr_warn("Line length is too long: Should be less than %d\n",
9436 /* Remove comments */
9437 tmp = strchr(buf, '#');
9442 ret = trace_run_command(buf, createfn);
9447 } while (done < count);
9457 __init static int tracer_alloc_buffers(void)
9463 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9464 pr_warn("Tracing disabled due to lockdown\n");
9469 * Make sure we don't accidently add more trace options
9470 * than we have bits for.
9472 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9474 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9477 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9478 goto out_free_buffer_mask;
9480 /* Only allocate trace_printk buffers if a trace_printk exists */
9481 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9482 /* Must be called before global_trace.buffer is allocated */
9483 trace_printk_init_buffers();
9485 /* To save memory, keep the ring buffer size to its minimum */
9486 if (ring_buffer_expanded)
9487 ring_buf_size = trace_buf_size;
9491 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9492 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9494 raw_spin_lock_init(&global_trace.start_lock);
9497 * The prepare callbacks allocates some memory for the ring buffer. We
9498 * don't free the buffer if the if the CPU goes down. If we were to free
9499 * the buffer, then the user would lose any trace that was in the
9500 * buffer. The memory will be removed once the "instance" is removed.
9502 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9503 "trace/RB:preapre", trace_rb_cpu_prepare,
9506 goto out_free_cpumask;
9507 /* Used for event triggers */
9509 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9511 goto out_rm_hp_state;
9513 if (trace_create_savedcmd() < 0)
9514 goto out_free_temp_buffer;
9516 /* TODO: make the number of buffers hot pluggable with CPUS */
9517 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9518 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9519 goto out_free_savedcmd;
9522 if (global_trace.buffer_disabled)
9525 if (trace_boot_clock) {
9526 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9528 pr_warn("Trace clock %s not defined, going back to default\n",
9533 * register_tracer() might reference current_trace, so it
9534 * needs to be set before we register anything. This is
9535 * just a bootstrap of current_trace anyway.
9537 global_trace.current_trace = &nop_trace;
9539 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9541 ftrace_init_global_array_ops(&global_trace);
9543 init_trace_flags_index(&global_trace);
9545 register_tracer(&nop_trace);
9547 /* Function tracing may start here (via kernel command line) */
9548 init_function_trace();
9550 /* All seems OK, enable tracing */
9551 tracing_disabled = 0;
9553 atomic_notifier_chain_register(&panic_notifier_list,
9554 &trace_panic_notifier);
9556 register_die_notifier(&trace_die_notifier);
9558 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9560 INIT_LIST_HEAD(&global_trace.systems);
9561 INIT_LIST_HEAD(&global_trace.events);
9562 INIT_LIST_HEAD(&global_trace.hist_vars);
9563 INIT_LIST_HEAD(&global_trace.err_log);
9564 list_add(&global_trace.list, &ftrace_trace_arrays);
9566 apply_trace_boot_options();
9568 register_snapshot_cmd();
9573 free_saved_cmdlines_buffer(savedcmd);
9574 out_free_temp_buffer:
9575 ring_buffer_free(temp_buffer);
9577 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9579 free_cpumask_var(global_trace.tracing_cpumask);
9580 out_free_buffer_mask:
9581 free_cpumask_var(tracing_buffer_mask);
9586 void __init early_trace_init(void)
9588 if (tracepoint_printk) {
9589 tracepoint_print_iter =
9590 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9591 if (MEM_FAIL(!tracepoint_print_iter,
9592 "Failed to allocate trace iterator\n"))
9593 tracepoint_printk = 0;
9595 static_key_enable(&tracepoint_printk_key.key);
9597 tracer_alloc_buffers();
9600 void __init trace_init(void)
9605 __init static int clear_boot_tracer(void)
9608 * The default tracer at boot buffer is an init section.
9609 * This function is called in lateinit. If we did not
9610 * find the boot tracer, then clear it out, to prevent
9611 * later registration from accessing the buffer that is
9612 * about to be freed.
9614 if (!default_bootup_tracer)
9617 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9618 default_bootup_tracer);
9619 default_bootup_tracer = NULL;
9624 fs_initcall(tracer_init_tracefs);
9625 late_initcall_sync(clear_boot_tracer);
9627 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9628 __init static int tracing_set_default_clock(void)
9630 /* sched_clock_stable() is determined in late_initcall */
9631 if (!trace_boot_clock && !sched_clock_stable()) {
9632 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9633 pr_warn("Can not set tracing clock due to lockdown\n");
9638 "Unstable clock detected, switching default tracing clock to \"global\"\n"
9639 "If you want to keep using the local clock, then add:\n"
9640 " \"trace_clock=local\"\n"
9641 "on the kernel command line\n");
9642 tracing_set_clock(&global_trace, "global");
9647 late_initcall_sync(tracing_set_default_clock);