Merge tag 'probes-v6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux...

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 23 Feb 2023 21:03:08 +0000 (13:03 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 23 Feb 2023 21:03:08 +0000 (13:03 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 23 Feb 2023 21:03:08 +0000 (13:03 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 23 Feb 2023 21:03:08 +0000 (13:03 -0800)
diff --combined Documentation/trace/kprobetrace.rst

index 0d10307,ef223b8..651f9ab
--- 1/Documentation/trace/kprobetrace.rst
--- 2/Documentation/trace/kprobetrace.rst
+++ b/Documentation/trace/kprobetrace.rst
@@@ -6,21 -6,21 +6,21 @@@ Kprobe-based Event Tracin
   
   Overview
   --------
- -These events are similar to tracepoint based events. Instead of Tracepoint,
+ +These events are similar to tracepoint-based events. Instead of tracepoints,
   this is based on kprobes (kprobe and kretprobe). So it can probe wherever
   kprobes can probe (this means, all functions except those with
   __kprobes/nokprobe_inline annotation and those marked NOKPROBE_SYMBOL).
- -Unlike the Tracepoint based event, this can be added and removed
+ +Unlike the tracepoint-based event, this can be added and removed
   dynamically, on the fly.
   
   To enable this feature, build your kernel with CONFIG_KPROBE_EVENTS=y.
   
- -Similar to the events tracer, this doesn't need to be activated via
+ +Similar to the event tracer, this doesn't need to be activated via
   current_tracer. Instead of that, add probe points via
- -/sys/kernel/debug/tracing/kprobe_events, and enable it via
- -/sys/kernel/debug/tracing/events/kprobes/<EVENT>/enable.
+ +/sys/kernel/tracing/kprobe_events, and enable it via
+ +/sys/kernel/tracing/events/kprobes/<EVENT>/enable.
   
- -You can also use /sys/kernel/debug/tracing/dynamic_events instead of
+ +You can also use /sys/kernel/tracing/dynamic_events instead of
   kprobe_events. That interface will provide unified access to other
   dynamic events too.
   
@@@ -58,7 -58,7 +58,7 @@@ Synopsis of kprobe_event
     NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
     FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
                   (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
-                 (x8/x16/x32/x64), "string", "ustring", "symbol", "symstr"
+                 (x8/x16/x32/x64), "char", "string", "ustring", "symbol", "symstr"
                     and bitfield are supported.
   
     (\*1) only for the probe on function entry (offs == 0).
@@@ -68,25 -68,23 +68,27 @@@
   
   Types
   -----
- -Several types are supported for fetch-args. Kprobe tracer will access memory
+ +Several types are supported for fetchargs. Kprobe tracer will access memory
   by given type. Prefix 's' and 'u' means those types are signed and unsigned
   respectively. 'x' prefix implies it is unsigned. Traced arguments are shown
   in decimal ('s' and 'u') or hexadecimal ('x'). Without type casting, 'x32'
   or 'x64' is used depends on the architecture (e.g. x86-32 uses x32, and
   x86-64 uses x64).
+ +
   These value types can be an array. To record array data, you can add '[N]'
   (where N is a fixed number, less than 64) to the base type.
- -E.g. 'x16[4]' means an array of x16 (2bytes hex) with 4 elements.
+ +E.g. 'x16[4]' means an array of x16 (2-byte hex) with 4 elements.
   Note that the array can be applied to memory type fetchargs, you can not
   apply it to registers/stack-entries etc. (for example, '$stack1:x8[8]' is
   wrong, but '+8($stack):x8[8]' is OK.)
+ +
+ Char type can be used to show the character value of traced arguments.
++
   String type is a special type, which fetches a "null-terminated" string from
   kernel space. This means it will fail and store NULL if the string container
   has been paged out. "ustring" type is an alternative of string for user-space.
- -See :ref:`user_mem_access` for more info..
+ +See :ref:`user_mem_access` for more info.
+ +
   The string array type is a bit different from other types. For other base
   types, <base-type>[1] is equal to <base-type> (e.g. +0(%di):x32[1] is same
   as +0(%di):x32.) But string[1] is not equal to string. The string type itself
@@@ -123,8 -121,8 +125,8 @@@ space. 'ustring' is a shortcut way of p
   
   Note that kprobe-event provides the user-memory access syntax but it doesn't
   use it transparently. This means if you use normal dereference or string type
- -for user memory, it might fail, and may always fail on some archs. The user
- -has to carefully check if the target data is in kernel or user space.
+ +for user memory, it might fail, and may always fail on some architectures. The
+ +user has to carefully check if the target data is in kernel or user space.
   
   Per-Probe Event Filtering
   -------------------------
@@@ -153,7 -151,7 +155,7 @@@ trigger
   Event Profiling
   ---------------
   You can check the total number of probe hits and probe miss-hits via
- -/sys/kernel/debug/tracing/kprobe_profile.
+ +/sys/kernel/tracing/kprobe_profile.
   The first column is event name, the second is the number of probe hits,
   the third is the number of probe miss-hits.
   
@@@ -163,11 -161,11 +165,11 @@@ You can add and enable new kprobe event
   "kprobe_event=" parameter. The parameter accepts a semicolon-delimited
   kprobe events, which format is similar to the kprobe_events.
   The difference is that the probe definition parameters are comma-delimited
- -instead of space. For example, adding myprobe event on do_sys_open like below
+ +instead of space. For example, adding myprobe event on do_sys_open like below::
   
     p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack)
   
- -should be below for kernel boot parameter (just replace spaces with comma)
+ +should be below for kernel boot parameter (just replace spaces with comma)::
   
     p:myprobe,do_sys_open,dfd=%ax,filename=%dx,flags=%cx,mode=+4($stack)
   
@@@ -177,7 -175,7 +179,7 @@@ Usage example
   To add a probe as a new event, write a new definition to kprobe_events
   as below::
   
- -  echo 'p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack)' > /sys/kernel/debug/tracing/kprobe_events
+ +  echo 'p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack)' > /sys/kernel/tracing/kprobe_events
   
   This sets a kprobe on the top of do_sys_open() function with recording
   1st to 4th arguments as "myprobe" event. Note, which register/stack entry is
@@@ -187,15 -185,15 +189,15 @@@ under tools/perf/)
   As this example shows, users can choose more familiar names for each arguments.
   ::
   
- -  echo 'r:myretprobe do_sys_open $retval' >> /sys/kernel/debug/tracing/kprobe_events
+ +  echo 'r:myretprobe do_sys_open $retval' >> /sys/kernel/tracing/kprobe_events
   
   This sets a kretprobe on the return point of do_sys_open() function with
   recording return value as "myretprobe" event.
   You can see the format of these events via
- -/sys/kernel/debug/tracing/events/kprobes/<EVENT>/format.
+ +/sys/kernel/tracing/events/kprobes/<EVENT>/format.
   ::
   
- -  cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format
+ +  cat /sys/kernel/tracing/events/kprobes/myprobe/format
     name: myprobe
     ID: 780
     format:
@@@ -218,7 -216,7 +220,7 @@@
   You can see that the event has 4 arguments as in the expressions you specified.
   ::
   
- -  echo > /sys/kernel/debug/tracing/kprobe_events
+ +  echo > /sys/kernel/tracing/kprobe_events
   
   This clears all probe points.
   
@@@ -233,8 -231,8 +235,8 @@@ Right after definition, each event is d
   events, you need to enable it.
   ::
   
- -  echo 1 > /sys/kernel/debug/tracing/events/kprobes/myprobe/enable
- -  echo 1 > /sys/kernel/debug/tracing/events/kprobes/myretprobe/enable
+ +  echo 1 > /sys/kernel/tracing/events/kprobes/myprobe/enable
+ +  echo 1 > /sys/kernel/tracing/events/kprobes/myretprobe/enable
   
   Use the following command to start tracing in an interval.
   ::
@@@ -243,10 -241,10 +245,10 @@@
       Open something...
       # echo 0 > tracing_on
   
- -And you can see the traced information via /sys/kernel/debug/tracing/trace.
+ +And you can see the traced information via /sys/kernel/tracing/trace.
   ::
   
- -  cat /sys/kernel/debug/tracing/trace
+ +  cat /sys/kernel/tracing/trace
     # tracer: nop
     #
     #           TASK-PID    CPU#    TIMESTAMP  FUNCTION
diff --combined kernel/trace/trace.c

index 0fa59ac,712ba8d..45551c7
--- 1/kernel/trace/trace.c
--- 2/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@@ -49,8 -49,6 +49,8 @@@
   #include <linux/irq_work.h>
   #include <linux/workqueue.h>
   
+ +#include <asm/setup.h> /* COMMAND_LINE_SIZE */
+ +
   #include "trace.h"
   #include "trace_output.h"
   
@@@ -188,12 -186,6 +188,12 @@@ static char *default_bootup_tracer
   static bool allocate_snapshot;
   static bool snapshot_at_boot;
   
+ +static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
+ +static int boot_instance_index;
+ +
+ +static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
+ +static int boot_snapshot_index;
+ +
   static int __init set_cmdline_ftrace(char *str)
   {
         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
@@@ -230,22 -222,9 +230,22 @@@ __setup("traceoff_on_warning", stop_tra
   
   static int __init boot_alloc_snapshot(char *str)
   {
- -      allocate_snapshot = true;
- -      /* We also need the main ring buffer expanded */
- -      ring_buffer_expanded = true;
+ +      char *slot = boot_snapshot_info + boot_snapshot_index;
+ +      int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
+ +      int ret;
+ +
+ +      if (str[0] == '=') {
+ +              str++;
+ +              if (strlen(str) >= left)
+ +                      return -1;
+ +
+ +              ret = snprintf(slot, left, "%s\t", str);
+ +              boot_snapshot_index += ret;
+ +      } else {
+ +              allocate_snapshot = true;
+ +              /* We also need the main ring buffer expanded */
+ +              ring_buffer_expanded = true;
+ +      }
         return 1;
   }
   __setup("alloc_snapshot", boot_alloc_snapshot);
@@@ -260,23 -239,6 +260,23 @@@ static int __init boot_snapshot(char *s
   __setup("ftrace_boot_snapshot", boot_snapshot);
   
   
+ +static int __init boot_instance(char *str)
+ +{
+ +      char *slot = boot_instance_info + boot_instance_index;
+ +      int left = sizeof(boot_instance_info) - boot_instance_index;
+ +      int ret;
+ +
+ +      if (strlen(str) >= left)
+ +              return -1;
+ +
+ +      ret = snprintf(slot, left, "%s\t", str);
+ +      boot_instance_index += ret;
+ +
+ +      return 1;
+ +}
+ +__setup("trace_instance=", boot_instance);
+ +
+ +
   static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
   
   static int __init set_trace_boot_options(char *str)
@@@ -1039,8 -1001,13 +1039,8 @@@ __buffer_unlock_commit(struct trace_buf
                 ring_buffer_unlock_commit(buffer);
   }
   
- -/**
- - * __trace_puts - write a constant string into the trace buffer.
- - * @ip:          The address of the caller
- - * @str:   The constant string to write
- - * @size:  The size of the string.
- - */
- -int __trace_puts(unsigned long ip, const char *str, int size)
+ +int __trace_array_puts(struct trace_array *tr, unsigned long ip,
+ +                     const char *str, int size)
   {
         struct ring_buffer_event *event;
         struct trace_buffer *buffer;
@@@ -1048,7 -1015,7 +1048,7 @@@
         unsigned int trace_ctx;
         int alloc;
   
- -      if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
+ +      if (!(tr->trace_flags & TRACE_ITER_PRINTK))
                 return 0;
   
         if (unlikely(tracing_selftest_running || tracing_disabled))
@@@ -1057,7 -1024,7 +1057,7 @@@
         alloc = sizeof(*entry) + size + 2; /* possible \n added */
   
         trace_ctx = tracing_gen_ctx();
- -      buffer = global_trace.array_buffer.buffer;
+ +      buffer = tr->array_buffer.buffer;
         ring_buffer_nest_start(buffer);
         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
                                             trace_ctx);
@@@ -1079,23 -1046,11 +1079,23 @@@
                 entry->buf[size] = '\0';
   
         __buffer_unlock_commit(buffer, event);
- -      ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
+ +      ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
    out:
         ring_buffer_nest_end(buffer);
         return size;
   }
+ +EXPORT_SYMBOL_GPL(__trace_array_puts);
+ +
+ +/**
+ + * __trace_puts - write a constant string into the trace buffer.
+ + * @ip:          The address of the caller
+ + * @str:   The constant string to write
+ + * @size:  The size of the string.
+ + */
+ +int __trace_puts(unsigned long ip, const char *str, int size)
+ +{
+ +      return __trace_array_puts(&global_trace, ip, str, size);
+ +}
   EXPORT_SYMBOL_GPL(__trace_puts);
   
   /**
@@@ -1187,7 -1142,7 +1187,7 @@@ void tracing_snapshot_instance(struct t
    *
    * Note, make sure to allocate the snapshot with either
    * a tracing_snapshot_alloc(), or by doing it manually
- - * with: echo 1 > /sys/kernel/debug/tracing/snapshot
+ + * with: echo 1 > /sys/kernel/tracing/snapshot
    *
    * If the snapshot buffer is not allocated, it will stop tracing.
    * Basically making a permanent snapshot.
@@@ -3173,9 -3128,6 +3173,9 @@@ void __trace_stack(struct trace_array *
                 return;
         }
   
+ +      if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
+ +              return;
+ +
         /*
          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
          * but if the above rcu_is_watching() failed, then the NMI
@@@ -5646,7 -5598,7 +5646,7 @@@ static const char readme_msg[] 
   #ifdef CONFIG_HIST_TRIGGERS
         "\t           s:[synthetic/]<event> <field> [<field>]\n"
   #endif
-       "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]\n"
+       "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
         "\t           -:[<group>/][<event>]\n"
   #ifdef CONFIG_KPROBE_EVENTS
         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
@@@ -5663,7 -5615,7 +5663,7 @@@
         "\t           $stack<index>, $stack, $retval, $comm,\n"
   #endif
         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
-       "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
+       "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
         "\t           symstr, <type>\\[<array-size>\\]\n"
   #ifdef CONFIG_HIST_TRIGGERS
@@@ -5805,7 -5757,7 +5805,7 @@@
   #ifdef CONFIG_SYNTH_EVENTS
         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
         "\t  Write into this file to define/undefine new synthetic events.\n"
- -      "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
+ +      "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
   #endif
   #endif
   ;
@@@ -9270,6 -9222,10 +9270,6 @@@ static int allocate_trace_buffers(struc
         }
         tr->allocated_snapshot = allocate_snapshot;
   
- -      /*
- -       * Only the top level trace array gets its snapshot allocated
- -       * from the kernel command line.
- -       */
         allocate_snapshot = false;
   #endif
   
@@@ -10185,79 -10141,6 +10185,79 @@@ out
         return ret;
   }
   
+ +#ifdef CONFIG_TRACER_MAX_TRACE
+ +__init static bool tr_needs_alloc_snapshot(const char *name)
+ +{
+ +      char *test;
+ +      int len = strlen(name);
+ +      bool ret;
+ +
+ +      if (!boot_snapshot_index)
+ +              return false;
+ +
+ +      if (strncmp(name, boot_snapshot_info, len) == 0 &&
+ +          boot_snapshot_info[len] == '\t')
+ +              return true;
+ +
+ +      test = kmalloc(strlen(name) + 3, GFP_KERNEL);
+ +      if (!test)
+ +              return false;
+ +
+ +      sprintf(test, "\t%s\t", name);
+ +      ret = strstr(boot_snapshot_info, test) == NULL;
+ +      kfree(test);
+ +      return ret;
+ +}
+ +
+ +__init static void do_allocate_snapshot(const char *name)
+ +{
+ +      if (!tr_needs_alloc_snapshot(name))
+ +              return;
+ +
+ +      /*
+ +       * When allocate_snapshot is set, the next call to
+ +       * allocate_trace_buffers() (called by trace_array_get_by_name())
+ +       * will allocate the snapshot buffer. That will alse clear
+ +       * this flag.
+ +       */
+ +      allocate_snapshot = true;
+ +}
+ +#else
+ +static inline void do_allocate_snapshot(const char *name) { }
+ +#endif
+ +
+ +__init static void enable_instances(void)
+ +{
+ +      struct trace_array *tr;
+ +      char *curr_str;
+ +      char *str;
+ +      char *tok;
+ +
+ +      /* A tab is always appended */
+ +      boot_instance_info[boot_instance_index - 1] = '\0';
+ +      str = boot_instance_info;
+ +
+ +      while ((curr_str = strsep(&str, "\t"))) {
+ +
+ +              tok = strsep(&curr_str, ",");
+ +
+ +              if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
+ +                      do_allocate_snapshot(tok);
+ +
+ +              tr = trace_array_get_by_name(tok);
+ +              if (!tr) {
+ +                      pr_warn("Failed to create instance buffer %s\n", curr_str);
+ +                      continue;
+ +              }
+ +              /* Allow user space to delete it */
+ +              trace_array_put(tr);
+ +
+ +              while ((tok = strsep(&curr_str, ","))) {
+ +                      early_enable_events(tr, tok, true);
+ +              }
+ +      }
+ +}
+ +
   __init static int tracer_alloc_buffers(void)
   {
         int ring_buf_size;
@@@ -10391,19 -10274,10 +10391,19 @@@ out
   
   void __init ftrace_boot_snapshot(void)
   {
+ +      struct trace_array *tr;
+ +
         if (snapshot_at_boot) {
                 tracing_snapshot();
                 internal_trace_puts("** Boot snapshot taken **\n");
         }
+ +
+ +      list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+ +              if (tr == &global_trace)
+ +                      continue;
+ +              trace_array_puts(tr, "** Boot snapshot taken **\n");
+ +              tracing_snapshot_instance(tr);
+ +      }
   }
   
   void __init early_trace_init(void)
@@@ -10425,9 -10299,6 +10425,9 @@@
   void __init trace_init(void)
   {
         trace_event_init();
+ +
+ +      if (boot_instance_index)
+ +              enable_instances();
   }
   
   __init static void clear_boot_tracer(void)
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 23 Feb 2023 21:03:08 +0000 (13:03 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 23 Feb 2023 21:03:08 +0000 (13:03 -0800)
		1	2
Documentation/trace/kprobetrace.rst	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/trace/trace.c	patch \|	diff1 \|	diff2 \|	blob \| history