perf c2c: Add report option to show false sharing in adjacent cachelines

author Feng Tang <feng.tang@intel.com>

Tue, 14 Feb 2023 07:58:23 +0000 (15:58 +0800)

committer Arnaldo Carvalho de Melo <acme@redhat.com>

Thu, 16 Feb 2023 12:33:45 +0000 (09:33 -0300)
author Feng Tang <feng.tang@intel.com>
Tue, 14 Feb 2023 07:58:23 +0000 (15:58 +0800)
committer Arnaldo Carvalho de Melo <acme@redhat.com>
Thu, 16 Feb 2023 12:33:45 +0000 (09:33 -0300)
diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt

index 4e8c263..856f0df 100644 (file)
--- a/tools/perf/Documentation/perf-c2c.txt
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -130,6 +130,12 @@ REPORT OPTIONS
         The known limitations include exception handing such as
         setjmp/longjmp will have calls/returns not match.
  
+--double-cl::
+       Group the detection of shared cacheline events into double cacheline
+       granularity. Some architectures have an Adjacent Cacheline Prefetch
+       feature, which causes cacheline sharing to behave like the cacheline
+       size is doubled.
+
  C2C RECORD
  ----------
  The perf c2c record command setup options related to HITM cacheline analysis
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c

index 52d94c7..56974ea 100644 (file)
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -524,7 +524,7 @@ static int dcacheline_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
         char buf[20];
  
         if (he->mem_info)
-               addr = cl_address(he->mem_info->daddr.addr);
+               addr = cl_address(he->mem_info->daddr.addr, chk_double_cl);
  
         return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr));
  }
@@ -562,7 +562,7 @@ static int offset_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
         char buf[20];
  
         if (he->mem_info)
-               addr = cl_offset(he->mem_info->daddr.al_addr);
+               addr = cl_offset(he->mem_info->daddr.al_addr, chk_double_cl);
  
         return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr));
  }
@@ -574,9 +574,10 @@ offset_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
         uint64_t l = 0, r = 0;
  
         if (left->mem_info)
-               l = cl_offset(left->mem_info->daddr.addr);
+               l = cl_offset(left->mem_info->daddr.addr, chk_double_cl);
+
         if (right->mem_info)
-               r = cl_offset(right->mem_info->daddr.addr);
+               r = cl_offset(right->mem_info->daddr.addr, chk_double_cl);
  
         return (int64_t)(r - l);
  }
@@ -2590,7 +2591,7 @@ perf_c2c_cacheline_browser__title(struct hist_browser *browser,
         he = cl_browser->he;
  
         if (he->mem_info)
-               addr = cl_address(he->mem_info->daddr.addr);
+               addr = cl_address(he->mem_info->daddr.addr, chk_double_cl);
  
         scnprintf(bf, size, "Cacheline 0x%lx", addr);
         return 0;
@@ -2788,15 +2789,16 @@ static int ui_quirks(void)
         if (!c2c.use_stdio) {
                 dim_offset.width  = 5;
                 dim_offset.header = header_offset_tui;
-               nodestr = "CL";
+               nodestr = chk_double_cl ? "Double-CL" : "CL";
         }
  
         dim_percent_costly_snoop.header = percent_costly_snoop_header[c2c.display];
  
         /* Fix the zero line for dcacheline column. */
-       buf = fill_line("Cacheline", dim_dcacheline.width +
-                                    dim_dcacheline_node.width +
-                                    dim_dcacheline_count.width + 4);
+       buf = fill_line(chk_double_cl ? "Double-Cacheline" : "Cacheline",
+                               dim_dcacheline.width +
+                               dim_dcacheline_node.width +
+                               dim_dcacheline_count.width + 4);
         if (!buf)
                 return -ENOMEM;
  
@@ -3037,6 +3039,7 @@ static int perf_c2c__report(int argc, const char **argv)
         OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
         OPT_BOOLEAN(0, "stitch-lbr", &c2c.stitch_lbr,
                     "Enable LBR callgraph stitching approach"),
+       OPT_BOOLEAN(0, "double-cl", &chk_double_cl, "Detect adjacent cacheline false sharing"),
         OPT_PARENT(c2c_options),
         OPT_END()
         };
diff --git a/tools/perf/util/cacheline.h b/tools/perf/util/cacheline.h

index dec8c0f..fe6d5b6 100644 (file)
--- a/tools/perf/util/cacheline.h
+++ b/tools/perf/util/cacheline.h
@@ -6,16 +6,31 @@
  
  int __pure cacheline_size(void);
  
-static inline u64 cl_address(u64 address)
+
+/*
+ * Some architectures have 'Adjacent Cacheline Prefetch' feature,
+ * which performs like the cacheline size being doubled.
+ */
+static inline u64 cl_address(u64 address, bool double_cl)
  {
+       u64 size = cacheline_size();
+
+       if (double_cl)
+               size *= 2;
+
         /* return the cacheline of the address */
-       return (address & ~(cacheline_size() - 1));
+       return (address & ~(size - 1));
  }
  
-static inline u64 cl_offset(u64 address)
+static inline u64 cl_offset(u64 address, bool double_cl)
  {
-       /* return the cacheline of the address */
-       return (address & (cacheline_size() - 1));
+       u64 size = cacheline_size();
+
+       if (double_cl)
+               size *= 2;
+
+       /* return the offset inside cacheline */
+       return (address & (size - 1));
  }
  
  #endif // PERF_CACHELINE_H
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c

index 4a64823..093a0c8 100644 (file)
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -54,6 +54,13 @@ static const char *const dynamic_headers[] = {"local_ins_lat", "ins_lat", "local
  static const char *const arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"};
  
  /*
+ * Some architectures have Adjacent Cacheline Prefetch feature, which
+ * behaves like the cacheline size is doubled. Enable this flag to
+ * check things in double cacheline granularity.
+ */
+bool chk_double_cl;
+
+/*
   * Replaces all occurrences of a char used with the:
   *
   * -t, --field-separator
@@ -1500,8 +1507,8 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
  
  addr:
         /* al_addr does all the right addr - start + offset calculations */
-       l = cl_address(left->mem_info->daddr.al_addr);
-       r = cl_address(right->mem_info->daddr.al_addr);
+       l = cl_address(left->mem_info->daddr.al_addr, chk_double_cl);
+       r = cl_address(right->mem_info->daddr.al_addr, chk_double_cl);
  
         if (l > r) return -1;
         if (l < r) return 1;
@@ -1520,7 +1527,7 @@ static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf,
         if (he->mem_info) {
                 struct map *map = he->mem_info->daddr.ms.map;
  
-               addr = cl_address(he->mem_info->daddr.al_addr);
+               addr = cl_address(he->mem_info->daddr.al_addr, chk_double_cl);
                 ms = &he->mem_info->daddr.ms;
  
                 /* print [s] for shared data mmaps */
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h

index 9a91d0d..d79a100 100644 (file)
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -35,6 +35,7 @@ extern struct sort_entry sort_sym_from;
  extern struct sort_entry sort_sym_to;
  extern struct sort_entry sort_srcline;
  extern const char default_mem_sort_order[];
+extern bool chk_double_cl;
  
  struct res_sample {
         u64 time;
author	Feng Tang <feng.tang@intel.com>
	Tue, 14 Feb 2023 07:58:23 +0000 (15:58 +0800)
committer	Arnaldo Carvalho de Melo <acme@redhat.com>
	Thu, 16 Feb 2023 12:33:45 +0000 (09:33 -0300)
tools/perf/Documentation/perf-c2c.txt		patch \| blob \| history
tools/perf/builtin-c2c.c		patch \| blob \| history
tools/perf/util/cacheline.h		patch \| blob \| history
tools/perf/util/sort.c		patch \| blob \| history
tools/perf/util/sort.h		patch \| blob \| history