From 4f7043fc0ce158a5f555b6c0ec7f9818a60b8ce4 Mon Sep 17 00:00:00 2001 From: "Juan A. Suarez Romero" Date: Tue, 27 Apr 2021 18:11:18 +0200 Subject: [PATCH] v3d/simulator: implement performance counters Add support for performance counters when using the simulator. v2 (Iago): - Remove brackets from single-line conditionals - Rename channel to channels - Ensure perfmon start/stop function is implemented in all versions - Use an array for perfmons instead of hash table - Implement performance counters in CSD v3 (Iago): - Rename PERFMON_CHUNKS to PERFMONS_ALLOC_SIZE. - Merge increasing lastid and ensuring space in a single function. v4 (Iago): - Assert perfid <= perfmons_size. v7 (Iago): - Do not stop perfmon on each submission v8 (Iago): - Add comment about stopping the perfmon when retrieving values. Reviewed-by: Iago Toral Quiroga Signed-off-by: Juan A. Suarez Romero Part-of: --- src/broadcom/simulator/v3d_simulator.c | 157 ++++++++++++++++++++++++++++++++ src/broadcom/simulator/v3dx_simulator.c | 47 +++++++++- src/broadcom/simulator/v3dx_simulator.h | 6 ++ 3 files changed, 209 insertions(+), 1 deletion(-) diff --git a/src/broadcom/simulator/v3d_simulator.c b/src/broadcom/simulator/v3d_simulator.c index a6a057d..494e5bb 100644 --- a/src/broadcom/simulator/v3d_simulator.c +++ b/src/broadcom/simulator/v3d_simulator.c @@ -87,6 +87,9 @@ static struct v3d_simulator_state { /** Mapping from GEM fd to struct v3d_simulator_file * */ struct hash_table *fd_map; + /** Last performance monitor ID. */ + uint32_t last_perfid; + struct util_dynarray bin_oom; int refcount; } sim_state = { @@ -100,6 +103,11 @@ struct v3d_simulator_file { /** Mapping from GEM handle to struct v3d_simulator_bo * */ struct hash_table *bo_map; + /** Dynamic array with performance monitors */ + struct v3d_simulator_perfmon **perfmons; + uint32_t perfmons_size; + uint32_t active_perfid; + struct mem_block *gmp; void *gmp_vaddr; @@ -121,12 +129,34 @@ struct v3d_simulator_bo { int handle; }; +struct v3d_simulator_perfmon { + uint32_t ncounters; + uint8_t counters[DRM_V3D_MAX_PERF_COUNTERS]; + uint64_t values[DRM_V3D_MAX_PERF_COUNTERS]; +}; + static void * int_to_key(int key) { return (void *)(uintptr_t)key; } +#define PERFMONS_ALLOC_SIZE 100 + +static uint32_t +perfmons_next_id(struct v3d_simulator_file *sim_file) { + sim_state.last_perfid++; + if (sim_state.last_perfid > sim_file->perfmons_size) { + sim_file->perfmons_size += PERFMONS_ALLOC_SIZE; + sim_file->perfmons = reralloc(sim_file, + sim_file->perfmons, + struct v3d_simulator_perfmon *, + sim_file->perfmons_size); + } + + return sim_state.last_perfid; +} + static struct v3d_simulator_file * v3d_get_simulator_file_for_fd(int fd) { @@ -357,6 +387,46 @@ v3d_simulator_unpin_bos(struct v3d_simulator_file *file, return 0; } +static struct v3d_simulator_perfmon * +v3d_get_simulator_perfmon(int fd, uint32_t perfid) +{ + if (!perfid || perfid > sim_state.last_perfid) + return NULL; + + struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd); + + mtx_lock(&sim_state.mutex); + assert(perfid <= file->perfmons_size); + struct v3d_simulator_perfmon *perfmon = file->perfmons[perfid - 1]; + mtx_unlock(&sim_state.mutex); + + return perfmon; +} + +static void +v3d_simulator_perfmon_switch(int fd, uint32_t perfid) +{ + struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd); + struct v3d_simulator_perfmon *perfmon; + + if (perfid == file->active_perfid) + return; + + perfmon = v3d_get_simulator_perfmon(fd, file->active_perfid); + if (perfmon) + v3d41_simulator_perfmon_stop(sim_state.v3d, + perfmon->ncounters, + perfmon->values); + + perfmon = v3d_get_simulator_perfmon(fd, perfid); + if (perfmon) + v3d41_simulator_perfmon_start(sim_state.v3d, + perfmon->ncounters, + perfmon->counters); + + file->active_perfid = perfid; +} + static int v3d_simulator_submit_cl_ioctl(int fd, struct drm_v3d_submit_cl *submit) { @@ -369,6 +439,9 @@ v3d_simulator_submit_cl_ioctl(int fd, struct drm_v3d_submit_cl *submit) mtx_lock(&sim_state.submit_lock); bin_fd = fd; + + v3d_simulator_perfmon_switch(fd, submit->perfmon_id); + if (sim_state.ver >= 41) v3d41_simulator_submit_cl_ioctl(sim_state.v3d, submit, file->gmp->ofs); else @@ -530,6 +603,8 @@ v3d_simulator_submit_csd_ioctl(int fd, struct drm_v3d_submit_csd *args) for (int i = 0; i < args->bo_handle_count; i++) v3d_simulator_copy_in_handle(file, bo_handles[i]); + v3d_simulator_perfmon_switch(fd, args->perfmon_id); + if (sim_state.ver >= 41) ret = v3d41_simulator_submit_csd_ioctl(sim_state.v3d, args, file->gmp->ofs); @@ -542,6 +617,79 @@ v3d_simulator_submit_csd_ioctl(int fd, struct drm_v3d_submit_csd *args) return ret; } +static int +v3d_simulator_perfmon_create_ioctl(int fd, struct drm_v3d_perfmon_create *args) +{ + struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd); + + if (args->ncounters == 0 || + args->ncounters > DRM_V3D_MAX_PERF_COUNTERS) + return -EINVAL; + + struct v3d_simulator_perfmon *perfmon = rzalloc(file, + struct v3d_simulator_perfmon); + + perfmon->ncounters = args->ncounters; + for (int i = 0; i < args->ncounters; i++) { + if (args->counters[i] >= V3D_PERFCNT_NUM) { + ralloc_free(perfmon); + return -EINVAL; + } else { + perfmon->counters[i] = args->counters[i]; + } + } + + mtx_lock(&sim_state.mutex); + args->id = perfmons_next_id(file); + file->perfmons[args->id - 1] = perfmon; + mtx_unlock(&sim_state.mutex); + + return 0; +} + +static int +v3d_simulator_perfmon_destroy_ioctl(int fd, struct drm_v3d_perfmon_destroy *args) +{ + struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd); + struct v3d_simulator_perfmon *perfmon = + v3d_get_simulator_perfmon(fd, args->id); + + if (!perfmon) + return -EINVAL; + + mtx_lock(&sim_state.mutex); + file->perfmons[args->id - 1] = NULL; + mtx_unlock(&sim_state.mutex); + + ralloc_free(perfmon); + + return 0; +} + +static int +v3d_simulator_perfmon_get_values_ioctl(int fd, struct drm_v3d_perfmon_get_values *args) +{ + struct v3d_simulator_file *file = v3d_get_simulator_file_for_fd(fd); + + mtx_lock(&sim_state.submit_lock); + + /* Stop the perfmon if it is still active */ + if (args->id == file->active_perfid) + v3d_simulator_perfmon_switch(fd, 0); + + mtx_unlock(&sim_state.submit_lock); + + struct v3d_simulator_perfmon *perfmon = + v3d_get_simulator_perfmon(fd, args->id); + + if (!perfmon) + return -EINVAL; + + memcpy((void *)args->values_ptr, perfmon->values, perfmon->ncounters * sizeof(uint64_t)); + + return 0; +} + int v3d_simulator_ioctl(int fd, unsigned long request, void *args) { @@ -575,6 +723,15 @@ v3d_simulator_ioctl(int fd, unsigned long request, void *args) case DRM_IOCTL_V3D_SUBMIT_CSD: return v3d_simulator_submit_csd_ioctl(fd, args); + case DRM_IOCTL_V3D_PERFMON_CREATE: + return v3d_simulator_perfmon_create_ioctl(fd, args); + + case DRM_IOCTL_V3D_PERFMON_DESTROY: + return v3d_simulator_perfmon_destroy_ioctl(fd, args); + + case DRM_IOCTL_V3D_PERFMON_GET_VALUES: + return v3d_simulator_perfmon_get_values_ioctl(fd, args); + case DRM_IOCTL_GEM_OPEN: case DRM_IOCTL_GEM_FLINK: return drmIoctl(fd, request, args); diff --git a/src/broadcom/simulator/v3dx_simulator.c b/src/broadcom/simulator/v3dx_simulator.c index 0fd090d..07bbbe2 100644 --- a/src/broadcom/simulator/v3dx_simulator.c +++ b/src/broadcom/simulator/v3dx_simulator.c @@ -267,7 +267,7 @@ v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d, args->value = 1; return 0; case DRM_V3D_PARAM_SUPPORTS_PERFMON: - args->value = 0; + args->value = V3D_VERSION >= 41; return 0; } @@ -501,4 +501,49 @@ v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d, } } +#if V3D_VERSION >= 41 +#define V3D_PCTR_0_PCTR_N(x) (V3D_PCTR_0_PCTR0 + 4 * (x)) +#define V3D_PCTR_0_SRC_N(x) (V3D_PCTR_0_SRC_0_3 + 4 * (x)) +#define V3D_PCTR_0_SRC_N_SHIFT(x) ((x) * 8) +#define V3D_PCTR_0_SRC_N_MASK(x) (BITFIELD_RANGE(V3D_PCTR_0_SRC_N_SHIFT(x), \ + V3D_PCTR_0_SRC_N_SHIFT(x) + 6)) +#endif + +void +v3dX(simulator_perfmon_start)(struct v3d_hw *v3d, + uint32_t ncounters, + uint8_t *events) +{ +#if V3D_VERSION >= 41 + int i, j; + uint32_t source; + uint32_t mask = BITFIELD_RANGE(0, ncounters); + + for (i = 0; i < ncounters; i+=4) { + source = i / 4; + uint32_t channels = 0; + for (j = 0; j < 4 && (i + j) < ncounters; j++) + channels |= events[i + j] << V3D_PCTR_0_SRC_N_SHIFT(j); + V3D_WRITE(V3D_PCTR_0_SRC_N(source), channels); + } + V3D_WRITE(V3D_PCTR_0_CLR, mask); + V3D_WRITE(V3D_PCTR_0_OVERFLOW, mask); + V3D_WRITE(V3D_PCTR_0_EN, mask); +#endif +} + +void v3dX(simulator_perfmon_stop)(struct v3d_hw *v3d, + uint32_t ncounters, + uint64_t *values) +{ +#if V3D_VERSION >= 41 + int i; + + for (i = 0; i < ncounters; i++) + values[i] += V3D_READ(V3D_PCTR_0_PCTR_N(i)); + + V3D_WRITE(V3D_PCTR_0_EN, 0); +#endif +} + #endif /* USE_V3D_SIMULATOR */ diff --git a/src/broadcom/simulator/v3dx_simulator.h b/src/broadcom/simulator/v3dx_simulator.h index 2c623d7..145ae59 100644 --- a/src/broadcom/simulator/v3dx_simulator.h +++ b/src/broadcom/simulator/v3dx_simulator.h @@ -44,3 +44,9 @@ int v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d, int v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d, struct drm_v3d_submit_csd *args, uint32_t gmp_offset); +void v3dX(simulator_perfmon_start)(struct v3d_hw *v3d, + uint32_t ncounters, + uint8_t *events); +void v3dX(simulator_perfmon_stop)(struct v3d_hw *v3d, + uint32_t ncounters, + uint64_t *values); -- 2.7.4