locking/lock_events: Make lock_events available for all archs & other locks
authorWaiman Long <longman@redhat.com>
Thu, 4 Apr 2019 17:43:17 +0000 (13:43 -0400)
committerIngo Molnar <mingo@kernel.org>
Wed, 10 Apr 2019 08:56:04 +0000 (10:56 +0200)
The QUEUED_LOCK_STAT option to report queued spinlocks event counts
was previously allowed only on x86 architecture. To make the locking
event counting code more useful, it is now renamed to a more generic
LOCK_EVENT_COUNTS config option. This new option will be available to
all the architectures that use qspinlock at the moment.

Other locking code can now start to use the generic locking event
counting code by including lock_events.h and put the new locking event
names into the lock_events_list.h header file.

My experience with lock event counting is that it gives valuable insight
on how the locking code works and what can be done to make it better. I
would like to extend this benefit to other locking code like mutex and
rwsem in the near future.

The PV qspinlock specific code will stay in qspinlock_stat.h. The
locking event counters will now reside in the <debugfs>/lock_event_counts
directory.

Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/20190404174320.22416-9-longman@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/Kconfig
arch/x86/Kconfig
kernel/locking/Makefile
kernel/locking/lock_events.c [new file with mode: 0644]
kernel/locking/lock_events.h
kernel/locking/qspinlock_stat.h

index 33687dddd86a7e04dfa7e7829788b4a0442ae61a..28c0f1ad80d750fe3d29c96556c05a3dec823e81 100644 (file)
@@ -901,6 +901,16 @@ config HAVE_ARCH_PREL32_RELOCATIONS
 config ARCH_USE_MEMREMAP_PROT
        bool
 
+config LOCK_EVENT_COUNTS
+       bool "Locking event counts collection"
+       depends on DEBUG_FS
+       depends on QUEUED_SPINLOCKS
+       ---help---
+         Enable light-weight counting of various locking related events
+         in the system with minimal performance impact. This reduces
+         the chance of application behavior change because of timing
+         differences. The counts are reported via debugfs.
+
 source "kernel/gcov/Kconfig"
 
 source "scripts/gcc-plugins/Kconfig"
index 84b184e016cd47eccf57a7907d50a59ce8088bfc..7d160f58a8f68e566ada7235a394fb983dca2355 100644 (file)
@@ -780,14 +780,6 @@ config PARAVIRT_SPINLOCKS
 
          If you are unsure how to answer this question, answer Y.
 
-config QUEUED_LOCK_STAT
-       bool "Paravirt queued spinlock statistics"
-       depends on PARAVIRT_SPINLOCKS && DEBUG_FS
-       ---help---
-         Enable the collection of statistical data on the slowpath
-         behavior of paravirtualized queued spinlocks and report
-         them on debugfs.
-
 source "arch/x86/xen/Kconfig"
 
 config KVM_GUEST
index 1af83e9ce57d5f4b3c685998e6197ac5141955f5..6fe2f333aecb5a541eb0f78b21263316974161e5 100644 (file)
@@ -28,3 +28,4 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
 obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
 obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
 obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o
+obj-$(CONFIG_LOCK_EVENT_COUNTS) += lock_events.o
diff --git a/kernel/locking/lock_events.c b/kernel/locking/lock_events.c
new file mode 100644 (file)
index 0000000..71c36d1
--- /dev/null
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors: Waiman Long <waiman.long@hpe.com>
+ */
+
+/*
+ * Collect locking event counts
+ */
+#include <linux/debugfs.h>
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
+#include <linux/fs.h>
+
+#include "lock_events.h"
+
+#undef  LOCK_EVENT
+#define LOCK_EVENT(name)       [LOCKEVENT_ ## name] = #name,
+
+#define LOCK_EVENTS_DIR                "lock_event_counts"
+
+/*
+ * When CONFIG_LOCK_EVENT_COUNTS is enabled, event counts of different
+ * types of locks will be reported under the <debugfs>/lock_event_counts/
+ * directory. See lock_events_list.h for the list of available locking
+ * events.
+ *
+ * Writing to the special ".reset_counts" file will reset all the above
+ * locking event counts. This is a very slow operation and so should not
+ * be done frequently.
+ *
+ * These event counts are implemented as per-cpu variables which are
+ * summed and computed whenever the corresponding debugfs files are read. This
+ * minimizes added overhead making the counts usable even in a production
+ * environment.
+ */
+static const char * const lockevent_names[lockevent_num + 1] = {
+
+#include "lock_events_list.h"
+
+       [LOCKEVENT_reset_cnts] = ".reset_counts",
+};
+
+/*
+ * Per-cpu counts
+ */
+DEFINE_PER_CPU(unsigned long, lockevents[lockevent_num]);
+
+/*
+ * The lockevent_read() function can be overridden.
+ */
+ssize_t __weak lockevent_read(struct file *file, char __user *user_buf,
+                             size_t count, loff_t *ppos)
+{
+       char buf[64];
+       int cpu, id, len;
+       u64 sum = 0;
+
+       /*
+        * Get the counter ID stored in file->f_inode->i_private
+        */
+       id = (long)file_inode(file)->i_private;
+
+       if (id >= lockevent_num)
+               return -EBADF;
+
+       for_each_possible_cpu(cpu)
+               sum += per_cpu(lockevents[id], cpu);
+       len = snprintf(buf, sizeof(buf) - 1, "%llu\n", sum);
+
+       return simple_read_from_buffer(user_buf, count, ppos, buf, len);
+}
+
+/*
+ * Function to handle write request
+ *
+ * When idx = reset_cnts, reset all the counts.
+ */
+static ssize_t lockevent_write(struct file *file, const char __user *user_buf,
+                          size_t count, loff_t *ppos)
+{
+       int cpu;
+
+       /*
+        * Get the counter ID stored in file->f_inode->i_private
+        */
+       if ((long)file_inode(file)->i_private != LOCKEVENT_reset_cnts)
+               return count;
+
+       for_each_possible_cpu(cpu) {
+               int i;
+               unsigned long *ptr = per_cpu_ptr(lockevents, cpu);
+
+               for (i = 0 ; i < lockevent_num; i++)
+                       WRITE_ONCE(ptr[i], 0);
+       }
+       return count;
+}
+
+/*
+ * Debugfs data structures
+ */
+static const struct file_operations fops_lockevent = {
+       .read = lockevent_read,
+       .write = lockevent_write,
+       .llseek = default_llseek,
+};
+
+/*
+ * Initialize debugfs for the locking event counts.
+ */
+static int __init init_lockevent_counts(void)
+{
+       struct dentry *d_counts = debugfs_create_dir(LOCK_EVENTS_DIR, NULL);
+       int i;
+
+       if (!d_counts)
+               goto out;
+
+       /*
+        * Create the debugfs files
+        *
+        * As reading from and writing to the stat files can be slow, only
+        * root is allowed to do the read/write to limit impact to system
+        * performance.
+        */
+       for (i = 0; i < lockevent_num; i++)
+               if (!debugfs_create_file(lockevent_names[i], 0400, d_counts,
+                                        (void *)(long)i, &fops_lockevent))
+                       goto fail_undo;
+
+       if (!debugfs_create_file(lockevent_names[LOCKEVENT_reset_cnts], 0200,
+                                d_counts, (void *)(long)LOCKEVENT_reset_cnts,
+                                &fops_lockevent))
+               goto fail_undo;
+
+       return 0;
+fail_undo:
+       debugfs_remove_recursive(d_counts);
+out:
+       pr_warn("Could not create '%s' debugfs entries\n", LOCK_EVENTS_DIR);
+       return -ENOMEM;
+}
+fs_initcall(init_lockevent_counts);
index 4009e07b474a8916c85431f8fa325dafd26a42b0..feb1acc5461111d3b32fde5d962a11278e82c991 100644 (file)
@@ -13,6 +13,9 @@
  * Authors: Waiman Long <longman@redhat.com>
  */
 
+#ifndef __LOCKING_LOCK_EVENTS_H
+#define __LOCKING_LOCK_EVENTS_H
+
 enum lock_events {
 
 #include "lock_events_list.h"
@@ -21,7 +24,7 @@ enum lock_events {
        LOCKEVENT_reset_cnts = lockevent_num,
 };
 
-#ifdef CONFIG_QUEUED_LOCK_STAT
+#ifdef CONFIG_LOCK_EVENT_COUNTS
 /*
  * Per-cpu counters
  */
@@ -46,10 +49,11 @@ static inline void __lockevent_add(enum lock_events event, int inc)
 
 #define lockevent_add(ev, c)   __lockevent_add(LOCKEVENT_ ##ev, c)
 
-#else  /* CONFIG_QUEUED_LOCK_STAT */
+#else  /* CONFIG_LOCK_EVENT_COUNTS */
 
 #define lockevent_inc(ev)
 #define lockevent_add(ev, c)
 #define lockevent_cond_inc(ev, c)
 
-#endif /* CONFIG_QUEUED_LOCK_STAT */
+#endif /* CONFIG_LOCK_EVENT_COUNTS */
+#endif /* __LOCKING_LOCK_EVENTS_H */
index 1db5b375fcf49e4cb55701cad91ca4efbd5e2b54..54152670ff2489fd7997e6505f26da957c35b492 100644 (file)
@@ -9,76 +9,29 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * Authors: Waiman Long <waiman.long@hpe.com>
+ * Authors: Waiman Long <longman@redhat.com>
  */
 
-/*
- * When queued spinlock statistical counters are enabled, the following
- * debugfs files will be created for reporting the counter values:
- *
- * <debugfs>/qlockstat/
- *   pv_hash_hops      - average # of hops per hashing operation
- *   pv_kick_unlock    - # of vCPU kicks issued at unlock time
- *   pv_kick_wake      - # of vCPU kicks used for computing pv_latency_wake
- *   pv_latency_kick   - average latency (ns) of vCPU kick operation
- *   pv_latency_wake   - average latency (ns) from vCPU kick to wakeup
- *   pv_lock_stealing  - # of lock stealing operations
- *   pv_spurious_wakeup        - # of spurious wakeups in non-head vCPUs
- *   pv_wait_again     - # of wait's after a queue head vCPU kick
- *   pv_wait_early     - # of early vCPU wait's
- *   pv_wait_head      - # of vCPU wait's at the queue head
- *   pv_wait_node      - # of vCPU wait's at a non-head queue node
- *   lock_pending      - # of locking operations via pending code
- *   lock_slowpath     - # of locking operations via MCS lock queue
- *   lock_use_node2    - # of locking operations that use 2nd per-CPU node
- *   lock_use_node3    - # of locking operations that use 3rd per-CPU node
- *   lock_use_node4    - # of locking operations that use 4th per-CPU node
- *   lock_no_node      - # of locking operations without using per-CPU node
- *
- * Subtracting lock_use_node[234] from lock_slowpath will give you
- * lock_use_node1.
- *
- * Writing to the special ".reset_counts" file will reset all the above
- * counter values.
- *
- * These statistical counters are implemented as per-cpu variables which are
- * summed and computed whenever the corresponding debugfs files are read. This
- * minimizes added overhead making the counters usable even in a production
- * environment.
- *
- * There may be slight difference between pv_kick_wake and pv_kick_unlock.
- */
 #include "lock_events.h"
 
-#ifdef CONFIG_QUEUED_LOCK_STAT
+#ifdef CONFIG_LOCK_EVENT_COUNTS
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
 /*
- * Collect pvqspinlock statistics
+ * Collect pvqspinlock locking event counts
  */
-#include <linux/debugfs.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
 #include <linux/fs.h>
 
 #define EVENT_COUNT(ev)        lockevents[LOCKEVENT_ ## ev]
 
-#undef  LOCK_EVENT
-#define LOCK_EVENT(name)       [LOCKEVENT_ ## name] = #name,
-
-static const char * const lockevent_names[lockevent_num + 1] = {
-
-#include "lock_events_list.h"
-
-       [LOCKEVENT_reset_cnts] = ".reset_counts",
-};
-
 /*
- * Per-cpu counters
+ * PV specific per-cpu counter
  */
-DEFINE_PER_CPU(unsigned long, lockevents[lockevent_num]);
 static DEFINE_PER_CPU(u64, pv_kick_time);
 
 /*
- * Function to read and return the qlock statistical counter values
+ * Function to read and return the PV qspinlock counts.
  *
  * The following counters are handled specially:
  * 1. pv_latency_kick
@@ -88,8 +41,8 @@ static DEFINE_PER_CPU(u64, pv_kick_time);
  * 3. pv_hash_hops
  *    Average hops/hash = pv_hash_hops/pv_kick_unlock
  */
-static ssize_t lockevent_read(struct file *file, char __user *user_buf,
-                             size_t count, loff_t *ppos)
+ssize_t lockevent_read(struct file *file, char __user *user_buf,
+                      size_t count, loff_t *ppos)
 {
        char buf[64];
        int cpu, id, len;
@@ -149,78 +102,6 @@ static ssize_t lockevent_read(struct file *file, char __user *user_buf,
        return simple_read_from_buffer(user_buf, count, ppos, buf, len);
 }
 
-/*
- * Function to handle write request
- *
- * When id = .reset_cnts, reset all the counter values.
- */
-static ssize_t lockevent_write(struct file *file, const char __user *user_buf,
-                          size_t count, loff_t *ppos)
-{
-       int cpu;
-
-       /*
-        * Get the counter ID stored in file->f_inode->i_private
-        */
-       if ((long)file_inode(file)->i_private != LOCKEVENT_reset_cnts)
-               return count;
-
-       for_each_possible_cpu(cpu) {
-               int i;
-               unsigned long *ptr = per_cpu_ptr(lockevents, cpu);
-
-               for (i = 0 ; i < lockevent_num; i++)
-                       WRITE_ONCE(ptr[i], 0);
-       }
-       return count;
-}
-
-/*
- * Debugfs data structures
- */
-static const struct file_operations fops_lockevent = {
-       .read = lockevent_read,
-       .write = lockevent_write,
-       .llseek = default_llseek,
-};
-
-/*
- * Initialize debugfs for the qspinlock statistical counters
- */
-static int __init init_qspinlock_stat(void)
-{
-       struct dentry *d_counts = debugfs_create_dir("qlockstat", NULL);
-       int i;
-
-       if (!d_counts)
-               goto out;
-
-       /*
-        * Create the debugfs files
-        *
-        * As reading from and writing to the stat files can be slow, only
-        * root is allowed to do the read/write to limit impact to system
-        * performance.
-        */
-       for (i = 0; i < lockevent_num; i++)
-               if (!debugfs_create_file(lockevent_names[i], 0400, d_counts,
-                                        (void *)(long)i, &fops_lockevent))
-                       goto fail_undo;
-
-       if (!debugfs_create_file(lockevent_names[LOCKEVENT_reset_cnts], 0200,
-                                d_counts, (void *)(long)LOCKEVENT_reset_cnts,
-                                &fops_lockevent))
-               goto fail_undo;
-
-       return 0;
-fail_undo:
-       debugfs_remove_recursive(d_counts);
-out:
-       pr_warn("Could not create 'qlockstat' debugfs entries\n");
-       return -ENOMEM;
-}
-fs_initcall(init_qspinlock_stat);
-
 /*
  * PV hash hop count
  */
@@ -260,8 +141,10 @@ static inline void __pv_wait(u8 *ptr, u8 val)
 #define pv_kick(c)     __pv_kick(c)
 #define pv_wait(p, v)  __pv_wait(p, v)
 
-#else /* CONFIG_QUEUED_LOCK_STAT */
+#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+
+#else /* CONFIG_LOCK_EVENT_COUNTS */
 
 static inline void lockevent_pv_hop(int hopcnt)        { }
 
-#endif /* CONFIG_QUEUED_LOCK_STAT */
+#endif /* CONFIG_LOCK_EVENT_COUNTS */