From e3bbd1688b8892fad6297f0e84f76f9ecd2319cb Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Tue, 16 May 2023 13:31:55 +0200 Subject: [PATCH] freedreno/replay: Add limited support for KGSL KGSL doesn't support IOVAs being set form userspace, so as a workaround we pre-allocate a single buffer and hope that it always starts from the same address. Signed-off-by: Danylo Piliaiev Part-of: --- src/freedreno/decode/meson.build | 14 +- src/freedreno/decode/replay.c | 321 ++++++++++++++++++++++++++++++++------- 2 files changed, 280 insertions(+), 55 deletions(-) diff --git a/src/freedreno/decode/meson.build b/src/freedreno/decode/meson.build index d4f1995..39f23ac 100644 --- a/src/freedreno/decode/meson.build +++ b/src/freedreno/decode/meson.build @@ -70,6 +70,14 @@ if dep_libarchive.found() build_by_default: false, ) + replay_flags = [] + replay_deps = [] + if freedreno_kmds.contains('kgsl') + replay_flags += '-DFD_REPLAY_KGSL' + else + replay_deps += dep_libdrm + endif + replay = executable( 'replay', [ @@ -80,11 +88,9 @@ if dep_libarchive.found() inc_include, inc_src, ], - c_args : [no_override_init_args], + c_args : [no_override_init_args, replay_flags], gnu_symbol_visibility: 'hidden', - dependencies: [ - dep_libdrm, - ], + dependencies: replay_deps, link_with: [ libfreedreno_cffdec, libfreedreno_io, diff --git a/src/freedreno/decode/replay.c b/src/freedreno/decode/replay.c index a6ee76f..00ad699 100644 --- a/src/freedreno/decode/replay.c +++ b/src/freedreno/decode/replay.c @@ -18,8 +18,13 @@ #include #include #include +#if !FD_REPLAY_KGSL #include #include "drm-uapi/msm_drm.h" +#else +#include "../vulkan/msm_kgsl.h" +#endif + #include #include #include @@ -43,6 +48,9 @@ * - echo 1 > /sys/module/msm/parameters/rd_full * * Requires kernel with MSM_INFO_SET_IOVA support. + * In case userspace IOVAs are not supported, like on KGSL, we have to + * pre-allocate a single buffer and hope it always allocated starting + * from the same address. * * TODO: Misrendering, would require marking framebuffer images * at each renderpass in order to fetch and decode them. @@ -56,6 +64,8 @@ static const char *exename = NULL; +static const uint64_t FAKE_ADDRESS_SPACE_SIZE = 1024 * 1024 * 1024; + static int handle_file(const char *filename, uint32_t first_submit, uint32_t last_submit, uint32_t submit_to_override, const char *cmdstreamgen); @@ -166,6 +176,16 @@ struct device { struct util_vma_heap vma; struct u_vector cmdstreams; + + bool has_set_iova; + + uint32_t va_id; + void *va_map; + uint64_t va_iova; + +#ifdef FD_REPLAY_KGSL + uint32_t context_id; +#endif }; void buffer_mem_free(struct device *dev, struct buffer *buf); @@ -195,43 +215,6 @@ rb_buffer_search_cmp(const struct rb_node *node, const void *addrptr) return 0; } -static struct device * -device_create() -{ - struct device *dev = calloc(sizeof(struct device), 1); - - dev->fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER); - if (dev->fd < 0) { - errx(1, "Cannot open MSM fd!"); - } - - uint64_t va_start, va_size; - - struct drm_msm_param req = { - .pipe = MSM_PIPE_3D0, - .param = MSM_PARAM_VA_START, - }; - - int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GET_PARAM, &req, sizeof(req)); - va_start = req.value; - - if (!ret) { - req.param = MSM_PARAM_VA_SIZE; - ret = drmCommandWriteRead(dev->fd, DRM_MSM_GET_PARAM, &req, sizeof(req)); - va_size = req.value; - } - - if (ret) { - err(1, "MSM_INFO_SET_IOVA is unsupported"); - } - - rb_tree_init(&dev->buffers); - util_vma_heap_init(&dev->vma, va_start, ROUND_DOWN_TO(va_size, 4096)); - u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream)); - - return dev; -} - static struct buffer * device_get_buffer(struct device *dev, uint64_t iova) { @@ -261,6 +244,7 @@ device_free_unused_buffers(struct device *dev) } } +#if !FD_REPLAY_KGSL static inline void get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns) { @@ -270,6 +254,76 @@ get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns) tv->tv_nsec = t.tv_nsec + ns % 1000000000; } +static struct device * +device_create() +{ + struct device *dev = calloc(sizeof(struct device), 1); + + dev->fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER); + if (dev->fd < 0) { + errx(1, "Cannot open MSM fd!"); + } + + uint64_t va_start, va_size; + + struct drm_msm_param req = { + .pipe = MSM_PIPE_3D0, + .param = MSM_PARAM_VA_START, + }; + + int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GET_PARAM, &req, sizeof(req)); + va_start = req.value; + + if (!ret) { + req.param = MSM_PARAM_VA_SIZE; + ret = drmCommandWriteRead(dev->fd, DRM_MSM_GET_PARAM, &req, sizeof(req)); + va_size = req.value; + + dev->has_set_iova = true; + } + + if (ret) { + printf("MSM_INFO_SET_IOVA is not supported!\n"); + + struct drm_msm_gem_new req_new = {.size = FAKE_ADDRESS_SPACE_SIZE, .flags = MSM_BO_CACHED_COHERENT}; + drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW, &req_new, sizeof(req_new)); + dev->va_id = req_new.handle; + + struct drm_msm_gem_info req_info = { + .handle = req_new.handle, + .info = MSM_INFO_GET_IOVA, + }; + + drmCommandWriteRead(dev->fd, + DRM_MSM_GEM_INFO, &req_info, sizeof(req_info)); + dev->va_iova = req_info.value; + + struct drm_msm_gem_info req_offset = { + .handle = req_new.handle, + .info = MSM_INFO_GET_OFFSET, + }; + + drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req_offset, sizeof(req_offset)); + + dev->va_map = mmap(0, FAKE_ADDRESS_SPACE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, + dev->fd, req_offset.value); + if (dev->va_map == MAP_FAILED) { + err(1, "mmap failure"); + } + + va_start = dev->va_iova; + va_size = FAKE_ADDRESS_SPACE_SIZE; + + printf("Allocated iova %" PRIx64 "\n", dev->va_iova); + } + + rb_tree_init(&dev->buffers); + util_vma_heap_init(&dev->vma, va_start, ROUND_DOWN_TO(va_size, 4096)); + u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream)); + + return dev; +} + static void device_submit_cmdstreams(struct device *dev) { @@ -285,7 +339,6 @@ device_submit_cmdstreams(struct device *dev) struct cmdstream *cmd; u_vector_foreach(cmd, &dev->cmdstreams) { struct buffer *cmdstream_buf = device_get_buffer(dev, cmd->iova); - cmdstream_buf->flags = MSM_SUBMIT_BO_DUMP; uint32_t bo_idx = 0; rb_tree_foreach (struct buffer, buf, &dev->buffers, node) { @@ -295,10 +348,17 @@ device_submit_cmdstreams(struct device *dev) bo_idx++; } + if (cmdstream_buf) + cmdstream_buf->flags = MSM_SUBMIT_BO_DUMP; + struct drm_msm_gem_submit_cmd *submit_cmd = &cmds[idx]; submit_cmd->type = MSM_SUBMIT_CMD_BUF; submit_cmd->submit_idx = bo_idx; - submit_cmd->submit_offset = cmd->iova - cmdstream_buf->iova; + if (dev->has_set_iova) { + submit_cmd->submit_offset = cmd->iova - cmdstream_buf->iova; + } else { + submit_cmd->submit_offset = cmd->iova - dev->va_iova; + } submit_cmd->size = cmd->size; submit_cmd->pad = 0; submit_cmd->nr_relocs = 0; @@ -313,17 +373,29 @@ device_submit_cmdstreams(struct device *dev) bo_count++; } + if (!dev->has_set_iova) { + bo_count = 1; + } + struct drm_msm_gem_submit_bo *bo_list = calloc(sizeof(struct drm_msm_gem_submit_bo), bo_count); - uint32_t bo_idx = 0; - rb_tree_foreach (struct buffer, buf, &dev->buffers, node) { - struct drm_msm_gem_submit_bo *submit_bo = &bo_list[bo_idx++]; - submit_bo->handle = buf->gem_handle; - submit_bo->flags = buf->flags | MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE; - submit_bo->presumed = buf->iova; + if (dev->has_set_iova) { + uint32_t bo_idx = 0; + rb_tree_foreach (struct buffer, buf, &dev->buffers, node) { + struct drm_msm_gem_submit_bo *submit_bo = &bo_list[bo_idx++]; + submit_bo->handle = buf->gem_handle; + submit_bo->flags = + buf->flags | MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE; + submit_bo->presumed = buf->iova; - buf->flags = 0; + buf->flags = 0; + } + } else { + bo_list[0].handle = dev->va_id; + bo_list[0].flags = + MSM_SUBMIT_BO_DUMP | MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE; + bo_list[0].presumed = dev->va_iova; } struct drm_msm_gem_submit submit_req = { @@ -372,6 +444,13 @@ buffer_mem_alloc(struct device *dev, struct buffer *buf) { util_vma_heap_alloc_addr(&dev->vma, buf->iova, buf->size); + if (!dev->has_set_iova) { + uint64_t offset = buf->iova - dev->va_iova; + assert(offset < FAKE_ADDRESS_SPACE_SIZE && (offset + buf->size) <= FAKE_ADDRESS_SPACE_SIZE); + buf->map = ((uint8_t*)dev->va_map) + offset; + return; + } + { struct drm_msm_gem_new req = {.size = buf->size, .flags = MSM_BO_WC}; @@ -424,15 +503,146 @@ buffer_mem_alloc(struct device *dev, struct buffer *buf) void buffer_mem_free(struct device *dev, struct buffer *buf) { - munmap(buf->map, buf->size); + if (dev->has_set_iova) { + munmap(buf->map, buf->size); + + struct drm_gem_close req = { + .handle = buf->gem_handle, + }; + drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req); + } + + util_vma_heap_free(&dev->vma, buf->iova, buf->size); +} + +#else +static int +safe_ioctl(int fd, unsigned long request, void *arg) +{ + int ret; + + do { + ret = ioctl(fd, request, arg); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + + return ret; +} - struct drm_gem_close req = { - .handle = buf->gem_handle, +static struct device * +device_create() +{ + struct device *dev = calloc(sizeof(struct device), 1); + + static const char path[] = "/dev/kgsl-3d0"; + + dev->fd = open(path, O_RDWR | O_CLOEXEC); + if (dev->fd < 0) { + errx(1, "Cannot open KGSL fd!"); + } + + struct kgsl_gpumem_alloc_id req = { + .size = FAKE_ADDRESS_SPACE_SIZE, + .flags = KGSL_MEMFLAGS_IOCOHERENT, }; - drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req); + int ret = safe_ioctl(dev->fd, IOCTL_KGSL_GPUMEM_ALLOC_ID, &req); + if (ret) { + err(1, "IOCTL_KGSL_GPUMEM_ALLOC_ID failure"); + } + + dev->va_id = req.id; + dev->va_iova = req.gpuaddr; + dev->va_map = mmap(0, FAKE_ADDRESS_SPACE_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, dev->fd, req.id << 12); + + rb_tree_init(&dev->buffers); + util_vma_heap_init(&dev->vma, req.gpuaddr, ROUND_DOWN_TO(FAKE_ADDRESS_SPACE_SIZE, 4096)); + u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream)); + + struct kgsl_drawctxt_create drawctxt_req = { + .flags = KGSL_CONTEXT_SAVE_GMEM | + KGSL_CONTEXT_NO_GMEM_ALLOC | + KGSL_CONTEXT_PREAMBLE, + }; + + ret = safe_ioctl(dev->fd, IOCTL_KGSL_DRAWCTXT_CREATE, &drawctxt_req); + if (ret) { + err(1, "IOCTL_KGSL_DRAWCTXT_CREATE failure"); + } + + printf("Allocated iova %" PRIx64 "\n", dev->va_iova); + + dev->context_id = drawctxt_req.drawctxt_id; + + return dev; +} + +static void +device_submit_cmdstreams(struct device *dev) +{ + device_free_unused_buffers(dev); + device_mark_buffers(dev); + + if (!u_vector_length(&dev->cmdstreams)) + return; + + struct kgsl_command_object cmds[u_vector_length(&dev->cmdstreams)]; + + uint32_t idx = 0; + struct cmdstream *cmd; + u_vector_foreach(cmd, &dev->cmdstreams) { + struct kgsl_command_object *submit_cmd = &cmds[idx++]; + submit_cmd->gpuaddr = cmd->iova; + submit_cmd->size = cmd->size; + submit_cmd->flags = KGSL_CMDLIST_IB; + submit_cmd->id = dev->va_id; + } + + struct kgsl_gpu_command submit_req = { + .flags = KGSL_CMDBATCH_SUBMIT_IB_LIST, + .cmdlist = (uintptr_t) &cmds, + .cmdsize = sizeof(struct kgsl_command_object), + .numcmds = u_vector_length(&dev->cmdstreams), + .numsyncs = 0, + .context_id = dev->context_id, + }; + + int ret = safe_ioctl(dev->fd, IOCTL_KGSL_GPU_COMMAND, &submit_req); + + if (ret) { + err(1, "IOCTL_KGSL_GPU_COMMAND failure %d", ret); + } + + struct kgsl_device_waittimestamp_ctxtid wait = { + .context_id = dev->context_id, + .timestamp = submit_req.timestamp, + .timeout = 3000, + }; + + ret = safe_ioctl(dev->fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &wait); + + if (ret) { + err(1, "IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID failure %d", ret); + } + + u_vector_finish(&dev->cmdstreams); + u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream)); +} + +static void +buffer_mem_alloc(struct device *dev, struct buffer *buf) +{ + util_vma_heap_alloc_addr(&dev->vma, buf->iova, buf->size); + + buf->map = ((uint8_t*)dev->va_map) + (buf->iova - dev->va_iova); +} + +void +buffer_mem_free(struct device *dev, struct buffer *buf) +{ util_vma_heap_free(&dev->vma, buf->iova, buf->size); } +#endif static void upload_buffer(struct device *dev, uint64_t iova, unsigned int size, @@ -444,7 +654,9 @@ upload_buffer(struct device *dev, uint64_t iova, unsigned int size, buf = calloc(sizeof(struct buffer), 1); buf->iova = iova; buf->size = size; - rb_tree_insert(&dev->buffers, &buf->node, rb_buffer_insert_cmp); + + if (dev->has_set_iova) + rb_tree_insert(&dev->buffers, &buf->node, rb_buffer_insert_cmp); buffer_mem_alloc(dev, buf); } else if (buf->size != size) { @@ -462,14 +674,21 @@ static int override_cmdstream(struct device *dev, struct cmdstream *cs, const char *cmdstreamgen) { +#if FD_REPLAY_KGSL + static const char *tmpfilename = "/sdcard/Download/cmdstream_override.rd"; +#else static const char *tmpfilename = "/tmp/cmdstream_override.rd"; +#endif + /* Find a free space for the new cmdstreams and resources we will use * when overriding existing cmdstream. */ /* TODO: should the size be configurable? */ uint64_t hole_size = 32 * 1024 * 1024; + dev->vma.alloc_high = true; uint64_t hole_iova = util_vma_heap_alloc(&dev->vma, hole_size, 4096); + dev->vma.alloc_high = false; util_vma_heap_free(&dev->vma, hole_iova, hole_size); char cmd[2048]; -- 2.7.4