perf: pmu fine-tune for aarch32/64 of A53/A55/A73 [1/1]
authorHanjie Lin <hanjie.lin@amlogic.com>
Tue, 27 Aug 2019 08:21:46 +0000 (16:21 +0800)
committerJianxin Pan <jianxin.pan@amlogic.com>
Wed, 18 Sep 2019 05:35:56 +0000 (22:35 -0700)
PD#SWPL-13243

Problem:
pmu event is not accurate or not complete in A53/A55/A73.

Solution:
1, modify event config for A53/A55/A73.
2, perf executable file must compiled from latest kernel(5.1+)
3, A55 events are most complete, A73 are least complete(eg: less ld_retired/st_retired/stall/prefetch events)
4, A55/A53 same event meanings simlar, but A73 is more different(eg: L1/L2 dcache/icache loads meanings)

sample commands:
a55 arm64:
perf stat -e task-clock,context-switches,cpu-migrations,page-faults,instructions,armv8_pmuv3/ld_retired/,armv8_pmuv3/st_retired/,cycles,branch-loads,branch-load-misses,armv8_pmuv3/a55_l1d_cache_rd/,armv8_pmuv3/a55_l1d_cache_refill_rd/,armv8_pmuv3/a55_l1d_cache_wr/,armv8_pmuv3/a55_l1d_cache_refill_wr/,L1-icache-loads,L1-icache-load-misses,armv8_pmuv3/a55_l2d_cache_rd/,armv8_pmuv3/a55_l2d_cache_refill_rd/,armv8_pmuv3/a55_l1d_cache_refill_inner/,armv8_pmuv3/a55_l1d_cache_refill_outer/,armv8_pmuv3/a55_l1d_cache_refill_prefetch/,armv8_pmuv3/a55_l2d_cache_refill_prefetch/,armv8_pmuv3/a5x_stall_frontend_cache/,armv8_pmuv3/a5x_stall_frontend_tlb/,armv8_pmuv3/a5x_stall_backend_ld/,armv8_pmuv3/a55_stall_backend_ld_cache/,armv8_pmuv3/a55_stall_backend_ld_tlb/,armv8_pmuv3/a5x_stall_backend_st/,armv8_pmuv3/a5x_stall_backend_ilock_agu/,armv8_pmuv3/a5x_stall_backend_ilock_fpu/ ls

a53 arm64:
perf stat -e task-clock,context-switches,cpu-migrations,page-faults,instructions,armv8_pmuv3/ld_retired/,armv8_pmuv3/st_retired/,cycles,branch-loads,branch-load-misses,armv8_pmuv3/l1d_cache/,armv8_pmuv3/l1d_cache_refill/,L1-icache-loads,L1-icache-load-misses,armv8_pmuv3/a5x_l2d_cache/,armv8_pmuv3/a5x_l2d_cache_refill/,armv8_pmuv3/a53_cache_refill_prefetch/,armv8_pmuv3/a53_scu_snooped/,armv8_pmuv3/a5x_stall_frontend_cache/,armv8_pmuv3/a5x_stall_frontend_tlb/,armv8_pmuv3/a5x_stall_backend_ld/,,armv8_pmuv3/a5x_stall_backend_st/,armv8_pmuv3/a5x_stall_backend_ilock_agu/,armv8_pmuv3/a5x_stall_backend_ilock_fpu/ ls

a73 arm64: (w400 bind to a73 cpu2)
perf stat -e task-clock,context-switches,cpu-migrations,page-faults,instructions,cycles,branch-loads,branch-load-misses,armv8_pmuv3/l1d_cache/,armv8_pmuv3/l1d_cache_refill/,armv8_pmuv3/a55_l1d_cache_rd/,armv8_pmuv3/a55_l1d_cache_wr/,armv8_pmuv3/a5x_l2d_cache/,armv8_pmuv3/a5x_l2d_cache_refill/,armv8_pmuv3/a55_l2d_cache_rd/,armv8_pmuv3/a55_l2d_cache_wr/ busybox taskset 4 ls

a55 arm:
perf stat -e task-clock,context-switches,cpu-migrations,page-faults,instructions,armv7_cortex_a15/ld_retired/,armv7_cortex_a15/st_retired/,cycles,branch-loads,branch-load-misses,armv7_cortex_a15/a55_l1d_cache_rd/,armv7_cortex_a15/a55_l1d_cache_refill_rd/,armv7_cortex_a15/a55_l1d_cache_wr/,armv7_cortex_a15/a55_l1d_cache_refill_wr/,L1-icache-loads,L1-icache-load-misses,armv7_cortex_a15/a55_l2d_cache_rd/,armv7_cortex_a15/a55_l2d_cache_refill_rd/,armv7_cortex_a15/a55_l1d_cache_refill_inner/,armv7_cortex_a15/a55_l1d_cache_refill_outer/,armv7_cortex_a15/a55_l1d_cache_refill_prefetch/,armv7_cortex_a15/a55_l2d_cache_refill_prefetch/,armv7_cortex_a15/a5x_stall_frontend_cache/,armv7_cortex_a15/a5x_stall_frontend_tlb/,armv7_cortex_a15/a5x_stall_backend_ld/,armv7_cortex_a15/a55_stall_backend_ld_cache/,armv7_cortex_a15/a55_stall_backend_ld_tlb/,armv7_cortex_a15/a5x_stall_backend_st/,armv7_cortex_a15/a5x_stall_backend_ilock_agu/,armv7_cortex_a15/a5x_stall_backend_ilock_fpu/ ls

a53 arm:
perf stat -e task-clock,context-switches,cpu-migrations,page-faults,instructions,armv7_cortex_a15/ld_retired/,armv7_cortex_a15/st_retired/,cycles,branch-loads,branch-load-misses,armv7_cortex_a15/l1d_cache/,armv7_cortex_a15/l1d_cache_refill/,L1-icache-loads,L1-icache-load-misses,armv7_cortex_a15/a5x_l2d_cache/,armv7_cortex_a15/a5x_l2d_cache_refill/,armv7_cortex_a15/a53_cache_refill_prefetch/,armv7_cortex_a15/a53_scu_snooped/,armv7_cortex_a15/a5x_stall_frontend_cache/,armv7_cortex_a15/a5x_stall_frontend_tlb/,armv7_cortex_a15/a5x_stall_backend_ld/,armv7_cortex_a15/a5x_stall_backend_st/,armv7_cortex_a15/a5x_stall_backend_ilock_agu/,armv7_cortex_a15/a5x_stall_backend_ilock_fpu/ ls

a73 arm: (w400 bind to a73 cpu2)
perf stat -e task-clock,context-switches,cpu-migrations,page-faults,instructions,cycles,branch-loads,branch-load-misses,armv7_cortex_a15/l1d_cache/,armv7_cortex_a15/l1d_cache_refill/,armv7_cortex_a15/a55_l1d_cache_rd/,armv7_cortex_a15/a55_l1d_cache_wr/,armv7_cortex_a15/a5x_l2d_cache/,armv7_cortex_a15/a5x_l2d_cache_refill/,armv7_cortex_a15/a55_l2d_cache_rd/,armv7_cortex_a15/a55_l2d_cache_wr/ busybox taskset 4 ls

Verify:
ac200/u200/w400

Change-Id: I7f11e1480c3c27d016b011d2a84c33e824f69b08
Signed-off-by: Hanjie Lin <hanjie.lin@amlogic.com>
arch/arm/kernel/perf_event_v7.c
arch/arm64/kernel/perf_event.c

index e3a3ebc..a35b803 100644 (file)
@@ -612,6 +612,49 @@ ARMV7_EVENT_ATTR(inst_spec, ARMV7_PERFCTR_INSTR_SPEC);
 ARMV7_EVENT_ATTR(ttbr_write_retired, ARMV7_PERFCTR_TTBR_WRITE);
 ARMV7_EVENT_ATTR(bus_cycles, ARMV7_PERFCTR_BUS_CYCLES);
 
+#ifdef CONFIG_AMLOGIC_MODIFY
+/* a53/a55 common events */
+ARMV7_EVENT_ATTR(a5x_stall_frontend_cache, 0xe1);
+ARMV7_EVENT_ATTR(a5x_stall_frontend_tlb, 0xe2);
+ARMV7_EVENT_ATTR(a5x_stall_frontend_pderr, 0xe3);
+ARMV7_EVENT_ATTR(a5x_stall_backend_ilock_agu, 0xe5);
+ARMV7_EVENT_ATTR(a5x_stall_backend_ilock_fpu, 0xe6);
+ARMV7_EVENT_ATTR(a5x_stall_backend_ld, 0xe7);
+ARMV7_EVENT_ATTR(a5x_stall_backend_st, 0xe8);
+ARMV7_EVENT_ATTR(a5x_l2d_cache, 0x16);
+ARMV7_EVENT_ATTR(a5x_l2d_cache_refill, 0x17);
+
+/* a55 events */
+ARMV7_EVENT_ATTR(a55_stall_frontend, 0x23);
+ARMV7_EVENT_ATTR(a55_stall_backend, 0x24);
+ARMV7_EVENT_ATTR(a55_stall_backend_ilock, 0xe4);
+ARMV7_EVENT_ATTR(a55_l1d_cache_refill_inner, 0x44);
+ARMV7_EVENT_ATTR(a55_l1d_cache_refill_outer, 0x45);
+ARMV7_EVENT_ATTR(a55_l1d_cache_refill_prefetch, 0xc2);
+ARMV7_EVENT_ATTR(a55_l2d_cache_refill_prefetch, 0xc1);
+ARMV7_EVENT_ATTR(a55_l3d_cache_refill_prefetch, 0xc0);
+ARMV7_EVENT_ATTR(a55_stall_backend_ld_cache, 0xe9);
+ARMV7_EVENT_ATTR(a55_stall_backend_ld_tlb, 0xea);
+ARMV7_EVENT_ATTR(a55_stall_backend_st_stb, 0xeb);
+ARMV7_EVENT_ATTR(a55_stall_backend_st_tlb, 0xec);
+ARMV7_EVENT_ATTR(a55_l1d_cache_rd, 0x40);
+ARMV7_EVENT_ATTR(a55_l1d_cache_wr, 0x41);
+ARMV7_EVENT_ATTR(a55_l1d_cache_refill_rd, 0x42);
+ARMV7_EVENT_ATTR(a55_l1d_cache_refill_wr, 0x43);
+ARMV7_EVENT_ATTR(a55_l2d_cache_rd, 0x50);
+ARMV7_EVENT_ATTR(a55_l2d_cache_wr, 0x51);
+ARMV7_EVENT_ATTR(a55_l2d_cache_refill_rd, 0x52);
+ARMV7_EVENT_ATTR(a55_l2d_cache_refill_wr, 0x53);
+ARMV7_EVENT_ATTR(a55_l3d_cache_rd, 0xa0);
+ARMV7_EVENT_ATTR(a55_l3d_cache_refill_rd, 0xa2);
+
+/* a53 events */
+ARMV7_EVENT_ATTR(a53_cache_refill_prefetch, 0xc2);
+ARMV7_EVENT_ATTR(a53_scu_snooped, 0xc8);
+ARMV7_EVENT_ATTR(a53_stall_backend_st_stb, 0xc7);
+ARMV7_EVENT_ATTR(a53_stall_frontend_other, 0xe0);
+#endif
+
 static struct attribute *armv7_pmuv2_event_attrs[] = {
        &armv7_event_attr_sw_incr.attr.attr,
        &armv7_event_attr_l1i_cache_refill.attr.attr,
@@ -643,6 +686,46 @@ static struct attribute *armv7_pmuv2_event_attrs[] = {
        &armv7_event_attr_inst_spec.attr.attr,
        &armv7_event_attr_ttbr_write_retired.attr.attr,
        &armv7_event_attr_bus_cycles.attr.attr,
+#ifdef CONFIG_AMLOGIC_MODIFY
+       /* a55/a53 common events */
+       &armv7_event_attr_a5x_stall_frontend_cache.attr.attr, //0xe1
+       &armv7_event_attr_a5x_stall_frontend_tlb.attr.attr, //0xe2
+       &armv7_event_attr_a5x_stall_frontend_pderr.attr.attr, //0xe3
+       &armv7_event_attr_a5x_stall_backend_ilock_agu.attr.attr, //0xe5
+       &armv7_event_attr_a5x_stall_backend_ilock_fpu.attr.attr, //0xe6
+       &armv7_event_attr_a5x_stall_backend_ld.attr.attr,  //0xe7
+       &armv7_event_attr_a5x_stall_backend_st.attr.attr,  //0xe8
+       &armv7_event_attr_a5x_l2d_cache.attr.attr, //0x16
+       &armv7_event_attr_a5x_l2d_cache_refill.attr.attr, //0x17
+       /* a55 events */
+       &armv7_event_attr_a55_stall_frontend.attr.attr, //0x23
+       &armv7_event_attr_a55_stall_backend.attr.attr,  //0x24
+       &armv7_event_attr_a55_stall_backend_ilock.attr.attr,  //0xe4
+       &armv7_event_attr_a55_stall_backend_ld_cache.attr.attr,  //0xe9
+       &armv7_event_attr_a55_stall_backend_ld_tlb.attr.attr,  //0xea
+       &armv7_event_attr_a55_stall_backend_st_stb.attr.attr,  //0xeb
+       &armv7_event_attr_a55_stall_backend_st_tlb.attr.attr,  //0xec
+       &armv7_event_attr_a55_l1d_cache_refill_inner.attr.attr,  //0x44
+       &armv7_event_attr_a55_l1d_cache_refill_outer.attr.attr,  //0x45
+       &armv7_event_attr_a55_l1d_cache_refill_prefetch.attr.attr,  //0xc2
+       &armv7_event_attr_a55_l2d_cache_refill_prefetch.attr.attr,  //0xc1
+       &armv7_event_attr_a55_l3d_cache_refill_prefetch.attr.attr,  //0xc0
+       &armv7_event_attr_a55_l1d_cache_rd.attr.attr, //0x40
+       &armv7_event_attr_a55_l1d_cache_wr.attr.attr, //0x41
+       &armv7_event_attr_a55_l1d_cache_refill_rd.attr.attr, //0x42
+       &armv7_event_attr_a55_l1d_cache_refill_wr.attr.attr, //0x43
+       &armv7_event_attr_a55_l2d_cache_rd.attr.attr, //0x50
+       &armv7_event_attr_a55_l2d_cache_wr.attr.attr, //0x51
+       &armv7_event_attr_a55_l2d_cache_refill_rd.attr.attr, //0x52
+       &armv7_event_attr_a55_l2d_cache_refill_wr.attr.attr, //0x53
+       &armv7_event_attr_a55_l3d_cache_rd.attr.attr, //0xa0
+       &armv7_event_attr_a55_l3d_cache_refill_rd.attr.attr, //0xa2
+       /* a53 events */
+       &armv7_event_attr_a53_cache_refill_prefetch.attr.attr, //0xc2
+       &armv7_event_attr_a53_scu_snooped.attr.attr, //0xc8
+       &armv7_event_attr_a53_stall_backend_st_stb.attr.attr, //0xc7
+       &armv7_event_attr_a53_stall_frontend_other.attr.attr, //0xe0
+#endif
        NULL,
 };
 
index 9b3b5dd..0042224 100644 (file)
@@ -449,6 +449,49 @@ ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL);
 ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB);
 ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB);
 
+#ifdef CONFIG_AMLOGIC_MODIFY
+/* a53/a55 common */
+ARMV8_EVENT_ATTR(a5x_stall_frontend_cache, 0xe1);
+ARMV8_EVENT_ATTR(a5x_stall_frontend_tlb, 0xe2);
+ARMV8_EVENT_ATTR(a5x_stall_frontend_pderr, 0xe3);
+ARMV8_EVENT_ATTR(a5x_stall_backend_ilock_agu, 0xe5);
+ARMV8_EVENT_ATTR(a5x_stall_backend_ilock_fpu, 0xe6);
+ARMV8_EVENT_ATTR(a5x_stall_backend_ld, 0xe7);
+ARMV8_EVENT_ATTR(a5x_stall_backend_st, 0xe8);
+ARMV8_EVENT_ATTR(a5x_l2d_cache, 0x16);
+ARMV8_EVENT_ATTR(a5x_l2d_cache_refill, 0x17);
+
+/* a55 events */
+ARMV8_EVENT_ATTR(a55_stall_frontend, 0x23);
+ARMV8_EVENT_ATTR(a55_stall_backend, 0x24);
+ARMV8_EVENT_ATTR(a55_stall_backend_ilock, 0xe4);
+ARMV8_EVENT_ATTR(a55_l1d_cache_refill_inner, 0x44);
+ARMV8_EVENT_ATTR(a55_l1d_cache_refill_outer, 0x45);
+ARMV8_EVENT_ATTR(a55_l1d_cache_refill_prefetch, 0xc2);
+ARMV8_EVENT_ATTR(a55_l2d_cache_refill_prefetch, 0xc1);
+ARMV8_EVENT_ATTR(a55_l3d_cache_refill_prefetch, 0xc0);
+ARMV8_EVENT_ATTR(a55_stall_backend_ld_cache, 0xe9);
+ARMV8_EVENT_ATTR(a55_stall_backend_ld_tlb, 0xea);
+ARMV8_EVENT_ATTR(a55_stall_backend_st_stb, 0xeb);
+ARMV8_EVENT_ATTR(a55_stall_backend_st_tlb, 0xec);
+ARMV8_EVENT_ATTR(a55_l1d_cache_rd, 0x40);
+ARMV8_EVENT_ATTR(a55_l1d_cache_wr, 0x41);
+ARMV8_EVENT_ATTR(a55_l1d_cache_refill_rd, 0x42);
+ARMV8_EVENT_ATTR(a55_l1d_cache_refill_wr, 0x43);
+ARMV8_EVENT_ATTR(a55_l2d_cache_rd, 0x50);
+ARMV8_EVENT_ATTR(a55_l2d_cache_wr, 0x51);
+ARMV8_EVENT_ATTR(a55_l2d_cache_refill_rd, 0x52);
+ARMV8_EVENT_ATTR(a55_l2d_cache_refill_wr, 0x53);
+ARMV8_EVENT_ATTR(a55_l3d_cache_rd, 0xa0);
+ARMV8_EVENT_ATTR(a55_l3d_cache_refill_rd, 0xa2);
+
+/* a53 events */
+ARMV8_EVENT_ATTR(a53_cache_refill_prefetch, 0xc2);
+ARMV8_EVENT_ATTR(a53_scu_snooped, 0xc8);
+ARMV8_EVENT_ATTR(a53_stall_backend_st_stb, 0xc7);
+ARMV8_EVENT_ATTR(a53_stall_frontend_other, 0xe0);
+#endif
+
 static struct attribute *armv8_pmuv3_event_attrs[] = {
        &armv8_event_attr_sw_incr.attr.attr,
        &armv8_event_attr_l1i_cache_refill.attr.attr,
@@ -498,6 +541,46 @@ static struct attribute *armv8_pmuv3_event_attrs[] = {
        &armv8_event_attr_l2i_tlb_refill.attr.attr,
        &armv8_event_attr_l2d_tlb.attr.attr,
        &armv8_event_attr_l2i_tlb.attr.attr,
+#ifdef CONFIG_AMLOGIC_MODIFY
+       /* a55/a53 common events */
+       &armv8_event_attr_a5x_stall_frontend_cache.attr.attr, //0xe1
+       &armv8_event_attr_a5x_stall_frontend_tlb.attr.attr, //0xe2
+       &armv8_event_attr_a5x_stall_frontend_pderr.attr.attr, //0xe3
+       &armv8_event_attr_a5x_stall_backend_ilock_agu.attr.attr, //0xe5
+       &armv8_event_attr_a5x_stall_backend_ilock_fpu.attr.attr, //0xe6
+       &armv8_event_attr_a5x_stall_backend_ld.attr.attr,  //0xe7
+       &armv8_event_attr_a5x_stall_backend_st.attr.attr,  //0xe8
+       &armv8_event_attr_a5x_l2d_cache.attr.attr, //0x16
+       &armv8_event_attr_a5x_l2d_cache_refill.attr.attr, //0x17
+       /* a55 events */
+       &armv8_event_attr_a55_stall_frontend.attr.attr, //0x23
+       &armv8_event_attr_a55_stall_backend.attr.attr,  //0x24
+       &armv8_event_attr_a55_stall_backend_ilock.attr.attr,  //0xe4
+       &armv8_event_attr_a55_stall_backend_ld_cache.attr.attr,  //0xe9
+       &armv8_event_attr_a55_stall_backend_ld_tlb.attr.attr,  //0xea
+       &armv8_event_attr_a55_stall_backend_st_stb.attr.attr,  //0xeb
+       &armv8_event_attr_a55_stall_backend_st_tlb.attr.attr,  //0xec
+       &armv8_event_attr_a55_l1d_cache_refill_inner.attr.attr,  //0x44
+       &armv8_event_attr_a55_l1d_cache_refill_outer.attr.attr,  //0x45
+       &armv8_event_attr_a55_l1d_cache_refill_prefetch.attr.attr,  //0xc2
+       &armv8_event_attr_a55_l2d_cache_refill_prefetch.attr.attr,  //0xc1
+       &armv8_event_attr_a55_l3d_cache_refill_prefetch.attr.attr,  //0xc0
+       &armv8_event_attr_a55_l1d_cache_rd.attr.attr, //0x40
+       &armv8_event_attr_a55_l1d_cache_wr.attr.attr, //0x41
+       &armv8_event_attr_a55_l1d_cache_refill_rd.attr.attr, //0x42
+       &armv8_event_attr_a55_l1d_cache_refill_wr.attr.attr, //0x43
+       &armv8_event_attr_a55_l2d_cache_rd.attr.attr, //0x50
+       &armv8_event_attr_a55_l2d_cache_wr.attr.attr, //0x51
+       &armv8_event_attr_a55_l2d_cache_refill_rd.attr.attr, //0x52
+       &armv8_event_attr_a55_l2d_cache_refill_wr.attr.attr, //0x53
+       &armv8_event_attr_a55_l3d_cache_rd.attr.attr, //0xa0
+       &armv8_event_attr_a55_l3d_cache_refill_rd.attr.attr, //0xa2
+       /* a53 events */
+       &armv8_event_attr_a53_cache_refill_prefetch.attr.attr, //0xc2
+       &armv8_event_attr_a53_scu_snooped.attr.attr, //0xc8
+       &armv8_event_attr_a53_stall_backend_st_stb.attr.attr, //0xc7
+       &armv8_event_attr_a53_stall_frontend_other.attr.attr, //0xe0
+#endif
        NULL,
 };
 
@@ -505,6 +588,9 @@ static umode_t
 armv8pmu_event_attr_is_visible(struct kobject *kobj,
                               struct attribute *attr, int unused)
 {
+#ifdef CONFIG_AMLOGIC_MODIFY
+       return 0444;
+#else
        struct device *dev = kobj_to_dev(kobj);
        struct pmu *pmu = dev_get_drvdata(dev);
        struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
@@ -516,6 +602,7 @@ armv8pmu_event_attr_is_visible(struct kobject *kobj,
                return attr->mode;
 
        return 0;
+#endif
 }
 
 static struct attribute_group armv8_pmuv3_events_attr_group = {