The Retire Latency field is added in the var3_w of the
PERF_SAMPLE_WEIGHT_STRUCT. The Retire Latency reports pipeline stall of
this instruction compared to the previous instruction in cycles. That's
quite useful to display the information with perf mem report.
The p_stage_cyc for Power is also from the var3_w. Union the p_stage_cyc
and retire_lat to share the code.
Implement X86 specific codes to display the X86 specific header.
Add a new sort key retire_lat for the Retire Latency.
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20230104201349.1451191-8-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
- p_stage_cyc: On powerpc, this presents the number of cycles spent in a
pipeline stage. And currently supported only on powerpc.
- addr: (Full) virtual address of the sampled instruction
+ - retire_lat: On X86, this reports pipeline stall of this instruction compared
+ to the previous instruction in cycles. And currently supported only on X86
By default, comm, dso and symbol keys are used.
(i.e. --sort comm,dso,symbol)
else {
data->weight = weight.var1_dw;
data->ins_lat = weight.var2_w;
+ data->retire_lat = weight.var3_w;
}
}
*array |= ((u64)data->ins_lat << 32);
}
}
+
+const char *arch_perf_header_entry(const char *se_header)
+{
+ if (!strcmp(se_header, "Local Pipeline Stage Cycle"))
+ return "Local Retire Latency";
+ else if (!strcmp(se_header, "Pipeline Stage Cycle"))
+ return "Retire Latency";
+
+ return se_header;
+}
+
+int arch_support_sort_key(const char *sort_key)
+{
+ if (!strcmp(sort_key, "p_stage_cyc"))
+ return 1;
+ if (!strcmp(sort_key, "local_p_stage_cyc"))
+ return 1;
+ return 0;
+}
u8 cpumode;
u16 misc;
u16 ins_lat;
- u16 p_stage_cyc;
+ union {
+ u16 p_stage_cyc;
+ u16 retire_lat;
+ };
bool no_hw_idx; /* No hw_idx collected in branch_stack */
char insn[MAX_INSN];
void *raw_data;
DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
DIM(SORT_ADDR, "addr", sort_addr),
+ DIM(SORT_LOCAL_RETIRE_LAT, "local_retire_lat", sort_local_p_stage_cyc),
+ DIM(SORT_GLOBAL_RETIRE_LAT, "retire_lat", sort_global_p_stage_cyc),
};
#undef DIM
SORT_LOCAL_PIPELINE_STAGE_CYC,
SORT_GLOBAL_PIPELINE_STAGE_CYC,
SORT_ADDR,
+ SORT_LOCAL_RETIRE_LAT,
+ SORT_GLOBAL_RETIRE_LAT,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,