perf mem: Add statistics for peer snooping
authorLeo Yan <leo.yan@linaro.org>
Thu, 11 Aug 2022 06:24:40 +0000 (14:24 +0800)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Thu, 11 Aug 2022 22:12:12 +0000 (19:12 -0300)
Since the flag PERF_MEM_SNOOPX_PEER is added to support cache snooping
from peer cache line, it can come from a peer core, a peer cluster, or
a remote NUMA node.

This patch adds statistics for the flag PERF_MEM_SNOOPX_PEER.  Note, we
take PERF_MEM_SNOOPX_PEER as an affiliated info, it needs to cooperate
with cache level statistics.  Therefore, we account the load operations
for both the cache level's metrics (e.g. ld_l2hit, ld_llchit, etc.) and
peer related metrics when flag PERF_MEM_SNOOPX_PEER is set.

So three new metrics are introduced: 'lcl_peer' is for local cache
access, the metric 'rmt_peer' is for remote access (includes remote DRAM
and any caches in remote node), and the metric 'tot_peer' is accounting
the sum value of 'lcl_peer' and 'rmt_peer'.

Reviewed-by: Ali Saidi <alisaidi@amazon.com>
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Tested-by: Ali Saidi <alisaidi@amazon.com>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.garry@huawei.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Like Xu <likexu@tencent.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Timothy Hayes <timothy.hayes@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lore.kernel.org/r/20220811062451.435810-5-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/util/mem-events.c
tools/perf/util/mem-events.h

index 5dca188..7648831 100644 (file)
@@ -525,6 +525,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
        u64 op     = data_src->mem_op;
        u64 lvl    = data_src->mem_lvl;
        u64 snoop  = data_src->mem_snoop;
+       u64 snoopx = data_src->mem_snoopx;
        u64 lock   = data_src->mem_lock;
        u64 blk    = data_src->mem_blk;
        /*
@@ -544,6 +545,12 @@ do {                               \
        stats->tot_hitm++;      \
 } while (0)
 
+#define PEER_INC(__f)          \
+do {                           \
+       stats->__f++;           \
+       stats->tot_peer++;      \
+} while (0)
+
 #define P(a, b) PERF_MEM_##a##_##b
 
        stats->nr_entries++;
@@ -567,12 +574,20 @@ do {                              \
                        if (lvl & P(LVL, IO))  stats->ld_io++;
                        if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
                        if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
-                       if (lvl & P(LVL, L2 )) stats->ld_l2hit++;
+                       if (lvl & P(LVL, L2)) {
+                               stats->ld_l2hit++;
+
+                               if (snoopx & P(SNOOPX, PEER))
+                                       PEER_INC(lcl_peer);
+                       }
                        if (lvl & P(LVL, L3 )) {
                                if (snoop & P(SNOOP, HITM))
                                        HITM_INC(lcl_hitm);
                                else
                                        stats->ld_llchit++;
+
+                               if (snoopx & P(SNOOPX, PEER))
+                                       PEER_INC(lcl_peer);
                        }
 
                        if (lvl & P(LVL, LOC_RAM)) {
@@ -597,10 +612,14 @@ do {                              \
                if ((lvl & P(LVL, REM_CCE1)) ||
                    (lvl & P(LVL, REM_CCE2)) ||
                     mrem) {
-                       if (snoop & P(SNOOP, HIT))
+                       if (snoop & P(SNOOP, HIT)) {
                                stats->rmt_hit++;
-                       else if (snoop & P(SNOOP, HITM))
+                       } else if (snoop & P(SNOOP, HITM)) {
                                HITM_INC(rmt_hitm);
+                       } else if (snoopx & P(SNOOPX, PEER)) {
+                               stats->rmt_hit++;
+                               PEER_INC(rmt_peer);
+                       }
                }
 
                if ((lvl & P(LVL, MISS)))
@@ -664,6 +683,9 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
        stats->lcl_hitm         += add->lcl_hitm;
        stats->rmt_hitm         += add->rmt_hitm;
        stats->tot_hitm         += add->tot_hitm;
+       stats->lcl_peer         += add->lcl_peer;
+       stats->rmt_peer         += add->rmt_peer;
+       stats->tot_peer         += add->tot_peer;
        stats->rmt_hit          += add->rmt_hit;
        stats->lcl_dram         += add->lcl_dram;
        stats->rmt_dram         += add->rmt_dram;
index 8a8b568..1237230 100644 (file)
@@ -78,6 +78,9 @@ struct c2c_stats {
        u32     lcl_hitm;            /* count of loads with local HITM  */
        u32     rmt_hitm;            /* count of loads with remote HITM */
        u32     tot_hitm;            /* count of loads with local and remote HITM */
+       u32     lcl_peer;            /* count of loads with local peer cache */
+       u32     rmt_peer;            /* count of loads with remote peer cache */
+       u32     tot_peer;            /* count of loads with local and remote peer cache */
        u32     rmt_hit;             /* count of loads with remote hit clean; */
        u32     lcl_dram;            /* count of loads miss to local DRAM */
        u32     rmt_dram;            /* count of loads miss to remote DRAM */