bpftool: profile online CPUs instead of possible
authorTonghao Zhang <tong@infragraf.org>
Thu, 2 Feb 2023 13:17:01 +0000 (21:17 +0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 10 Mar 2023 08:32:58 +0000 (09:32 +0100)
[ Upstream commit 377c16fa3f3c60d21e4b05314c8be034ce37f2eb ]

The number of online cpu may be not equal to possible cpu.
"bpftool prog profile" can not create pmu event on possible
but on online cpu.

$ dmidecode -s system-product-name
PowerEdge R620
$ cat /sys/devices/system/cpu/possible
0-47
$ cat /sys/devices/system/cpu/online
0-31

Disable cpu dynamically:
$ echo 0 > /sys/devices/system/cpu/cpuX/online

If one cpu is offline, perf_event_open will return ENODEV.
To fix this issue:
* check value returned and skip offline cpu.
* close pmu_fd immediately on error path, avoid fd leaking.

Fixes: 47c09d6a9f67 ("bpftool: Introduce "prog profile" command")
Signed-off-by: Tonghao Zhang <tong@infragraf.org>
Cc: Quentin Monnet <quentin@isovalent.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Song Liu <song@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Stanislav Fomichev <sdf@google.com>
Cc: Hao Luo <haoluo@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/r/20230202131701.29519-1-tong@infragraf.org
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
tools/bpf/bpftool/prog.c

index c81362a..41c02b6 100644 (file)
@@ -2166,10 +2166,38 @@ static void profile_close_perf_events(struct profiler_bpf *obj)
        profile_perf_event_cnt = 0;
 }
 
+static int profile_open_perf_event(int mid, int cpu, int map_fd)
+{
+       int pmu_fd;
+
+       pmu_fd = syscall(__NR_perf_event_open, &metrics[mid].attr,
+                        -1 /*pid*/, cpu, -1 /*group_fd*/, 0);
+       if (pmu_fd < 0) {
+               if (errno == ENODEV) {
+                       p_info("cpu %d may be offline, skip %s profiling.",
+                               cpu, metrics[mid].name);
+                       profile_perf_event_cnt++;
+                       return 0;
+               }
+               return -1;
+       }
+
+       if (bpf_map_update_elem(map_fd,
+                               &profile_perf_event_cnt,
+                               &pmu_fd, BPF_ANY) ||
+           ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) {
+               close(pmu_fd);
+               return -1;
+       }
+
+       profile_perf_events[profile_perf_event_cnt++] = pmu_fd;
+       return 0;
+}
+
 static int profile_open_perf_events(struct profiler_bpf *obj)
 {
        unsigned int cpu, m;
-       int map_fd, pmu_fd;
+       int map_fd;
 
        profile_perf_events = calloc(
                sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric);
@@ -2188,17 +2216,11 @@ static int profile_open_perf_events(struct profiler_bpf *obj)
                if (!metrics[m].selected)
                        continue;
                for (cpu = 0; cpu < obj->rodata->num_cpu; cpu++) {
-                       pmu_fd = syscall(__NR_perf_event_open, &metrics[m].attr,
-                                        -1/*pid*/, cpu, -1/*group_fd*/, 0);
-                       if (pmu_fd < 0 ||
-                           bpf_map_update_elem(map_fd, &profile_perf_event_cnt,
-                                               &pmu_fd, BPF_ANY) ||
-                           ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) {
+                       if (profile_open_perf_event(m, cpu, map_fd)) {
                                p_err("failed to create event %s on cpu %d",
                                      metrics[m].name, cpu);
                                return -1;
                        }
-                       profile_perf_events[profile_perf_event_cnt++] = pmu_fd;
                }
        }
        return 0;