#define MSM_PARAM_FAULTS 0x09 /* RO */
#define MSM_PARAM_SUSPENDS 0x0a /* RO */
#define MSM_PARAM_SYSPROF 0x0b /* WO: 1 preserves perfcntrs, 2 also disables suspend */
+#define MSM_PARAM_COMM 0x0c /* WO: override for task->comm */
+#define MSM_PARAM_CMDLINE 0x0d /* WO: override for task cmdline */
+#define MSM_PARAM_VA_START 0x0e /* RO: start of valid GPU iova range */
+#define MSM_PARAM_VA_SIZE 0x0f /* RO: size of valid GPU iova range (bytes) */
/* For backwards compat. The original support for preemption was based on
* a single ring per priority level so # of priority levels equals the #
__u32 pipe; /* in, MSM_PIPE_x */
__u32 param; /* in, MSM_PARAM_x */
__u64 value; /* out (get_param) or in (set_param) */
+ __u32 len; /* zero for non-pointer params */
+ __u32 pad; /* must be zero */
};
/*
#define MSM_INFO_GET_IOVA 0x01 /* get iova, returned by value */
#define MSM_INFO_SET_NAME 0x02 /* set the debug name (by pointer) */
#define MSM_INFO_GET_NAME 0x03 /* get debug name, returned by pointer */
+#define MSM_INFO_SET_IOVA 0x04 /* set the iova, passed by value */
struct drm_msm_gem_info {
__u32 handle; /* in */
assert(list_is_empty(&dev->deferred_submits));
- dev->funcs->destroy(dev);
-
if (dev->suballoc_bo)
fd_bo_del_locked(dev->suballoc_bo);
fd_bo_cache_cleanup(&dev->bo_cache, 0);
fd_bo_cache_cleanup(&dev->ring_cache, 0);
+
+ /* Needs to be after bo cache cleanup in case backend has a
+ * util_vma_heap that it destroys:
+ */
+ dev->funcs->destroy(dev);
+
_mesa_hash_table_destroy(dev->handle_table, NULL);
_mesa_hash_table_destroy(dev->name_table, NULL);
/* Used in case out_fence==NULL: */
struct util_queue_fence fence;
+ /* Used by retire_queue, if used by backend: */
+ int out_fence_fd;
+ struct util_queue_fence retire_fence;
+
flush_submit_list_fn flush_submit_list;
};
FD_DEFINE_CAST(fd_submit, fd_submit_sp);
MSM_CCMD_SUBMITQUEUE_QUERY,
MSM_CCMD_WAIT_FENCE,
MSM_CCMD_SET_DEBUGINFO,
+ MSM_CCMD_GEM_CLOSE,
MSM_CCMD_LAST,
};
uint64_t size;
uint32_t flags;
uint32_t blob_id;
+
+ uint64_t iova; /* non-zero for guest userspace iova allocation */
};
DEFINE_CAST(msm_ccmd_req, msm_ccmd_gem_new_req)
uint32_t res_id;
uint32_t blob_mem; // TODO do we need this?
uint32_t blob_id; // TODO do we need this?
+
+ uint64_t iova; /* non-zero for guest userspace iova allocation */
};
DEFINE_CAST(msm_ccmd_req, msm_ccmd_gem_info_req)
};
DEFINE_CAST(msm_ccmd_req, msm_ccmd_set_debuginfo_req)
+/*
+ * MSM_CCMD_GEM_CLOSE
+ *
+ * If guest userspace allocated iova's are used, this request can be used
+ * to clear the vma when the guest bo is deleted.
+ *
+ * No response.
+ */
+struct msm_ccmd_gem_close_req {
+ struct msm_ccmd_req hdr;
+
+ uint32_t host_handle;
+};
+DEFINE_CAST(msm_ccmd_req, msm_ccmd_gem_close_req)
+
#endif /* MSM_PROTO_H_ */
* SOFTWARE.
*/
+#include "util/libsync.h"
+
#include "virtio_priv.h"
static int
struct msm_ccmd_gem_cpu_prep_req req = {
.hdr = MSM_CCMD(GEM_CPU_PREP, sizeof(req)),
- .host_handle = to_virtio_bo(bo)->host_handle,
+ .host_handle = virtio_bo_host_handle(bo),
.op = op,
.timeout = 5000000000,
};
struct msm_ccmd_gem_set_name_req *req = (void *)buf;
req->hdr = MSM_CCMD(GEM_SET_NAME, req_len);
- req->host_handle = to_virtio_bo(bo)->host_handle;
+ req->host_handle = virtio_bo_host_handle(bo);
req->len = sz;
memcpy(req->payload, name, sz);
struct msm_ccmd_gem_upload_req *req = (void *)buf;
req->hdr = MSM_CCMD(GEM_UPLOAD, req_len);
- req->host_handle = to_virtio_bo(bo)->host_handle;
+ req->host_handle = virtio_bo_host_handle(bo);
req->pad = 0;
req->off = 0;
req->len = len;
virtio_bo_destroy(struct fd_bo *bo)
{
struct virtio_bo *virtio_bo = to_virtio_bo(bo);
+ struct virtio_device *virtio_dev = to_virtio_device(bo->dev);
+
+ if (virtio_dev->userspace_allocates_iova && bo->iova) {
+ struct msm_ccmd_gem_close_req req = {
+ .hdr = MSM_CCMD(GEM_CLOSE, sizeof(req)),
+ .host_handle = virtio_bo_host_handle(bo),
+ };
+
+ virtio_execbuf(bo->dev, &req.hdr, false);
+
+ virtio_dev_free_iova(bo->dev, bo->iova, bo->size);
+ }
+
free(virtio_bo);
}
.destroy = virtio_bo_destroy,
};
+struct allocation_wait {
+ struct fd_bo *bo;
+ int fence_fd;
+ struct msm_ccmd_gem_new_rsp *new_rsp;
+ struct msm_ccmd_gem_info_rsp *info_rsp;
+};
+
+static void
+allocation_wait_execute(void *job, void *gdata, int thread_index)
+{
+ struct allocation_wait *wait = job;
+ struct virtio_bo *virtio_bo = to_virtio_bo(wait->bo);
+
+ sync_wait(wait->fence_fd, -1);
+ close(wait->fence_fd);
+
+ if (wait->new_rsp) {
+ virtio_bo->host_handle = wait->new_rsp->host_handle;
+ } else {
+ virtio_bo->host_handle = wait->info_rsp->host_handle;
+ wait->bo->size = wait->info_rsp->size;
+ }
+ fd_bo_del(wait->bo);
+ free(wait);
+}
+
+static void
+enqueue_allocation_wait(struct fd_bo *bo, int fence_fd,
+ struct msm_ccmd_gem_new_rsp *new_rsp,
+ struct msm_ccmd_gem_info_rsp *info_rsp)
+{
+ struct allocation_wait *wait = malloc(sizeof(*wait));
+
+ wait->bo = fd_bo_ref(bo);
+ wait->fence_fd = fence_fd;
+ wait->new_rsp = new_rsp;
+ wait->info_rsp = info_rsp;
+
+ util_queue_add_job(&bo->dev->submit_queue,
+ wait, &to_virtio_bo(bo)->fence,
+ allocation_wait_execute,
+ NULL, 0);
+}
+
static struct fd_bo *
bo_from_handle(struct fd_device *dev, uint32_t size, uint32_t handle)
{
if (!virtio_bo)
return NULL;
+ util_queue_fence_init(&virtio_bo->fence);
+
bo = &virtio_bo->base;
+
+ /* Note we need to set these because allocation_wait_execute() could
+ * run before bo_init_commont():
+ */
+ bo->dev = dev;
+ p_atomic_set(&bo->refcnt, 1);
+
bo->size = size;
bo->funcs = &funcs;
bo->handle = handle;
struct fd_bo *
virtio_bo_from_handle(struct fd_device *dev, uint32_t size, uint32_t handle)
{
+ struct virtio_device *virtio_dev = to_virtio_device(dev);
struct fd_bo *bo = bo_from_handle(dev, size, handle);
struct drm_virtgpu_resource_info args = {
.bo_handle = handle,
.hdr = MSM_CCMD(GEM_INFO, sizeof(req)),
.res_id = args.res_handle,
.blob_mem = args.blob_mem,
- .blob_id = p_atomic_inc_return(&to_virtio_device(dev)->next_blob_id),
+ .blob_id = p_atomic_inc_return(&virtio_dev->next_blob_id),
};
+ if (virtio_dev->userspace_allocates_iova) {
+ req.iova = virtio_dev_alloc_iova(dev, size);
+ if (!req.iova) {
+ virtio_dev_free_iova(dev, req.iova, size);
+ ret = -ENOMEM;
+ goto fail;
+ }
+ }
+
struct msm_ccmd_gem_info_rsp *rsp =
virtio_alloc_rsp(dev, &req.hdr, sizeof(*rsp));
- ret = virtio_execbuf(dev, &req.hdr, true);
- if (ret) {
- INFO_MSG("failed to get gem info: %s", strerror(errno));
- goto fail;
- }
- if (rsp->ret) {
- INFO_MSG("failed (on host) to get gem info: %s", strerror(rsp->ret));
- goto fail;
- }
-
struct virtio_bo *virtio_bo = to_virtio_bo(bo);
virtio_bo->blob_id = req.blob_id;
- virtio_bo->host_handle = rsp->host_handle;
- bo->iova = rsp->iova;
-
- /* If the imported buffer is allocated via virgl context (for example
- * minigbm/arc-cros-gralloc) then the guest gem object size is fake,
- * potentially not accounting for UBWC meta data, required pitch
- * alignment, etc. But in the import path the gallium driver checks
- * that the size matches the minimum size based on layout. So replace
- * the guest potentially-fake size with the real size from the host:
- */
- bo->size = rsp->size;
+
+ if (virtio_dev->userspace_allocates_iova) {
+ int fence_fd;
+ ret = virtio_execbuf_fenced(dev, &req.hdr, -1, &fence_fd, 0);
+ if (ret) {
+ INFO_MSG("failed to get gem info: %s", strerror(errno));
+ goto fail;
+ }
+
+ bo->iova = req.iova;
+
+ enqueue_allocation_wait(bo, fence_fd, NULL, rsp);
+ } else {
+ ret = virtio_execbuf(dev, &req.hdr, true);
+ if (ret) {
+ INFO_MSG("failed to get gem info: %s", strerror(errno));
+ goto fail;
+ }
+ if (rsp->ret) {
+ INFO_MSG("failed (on host) to get gem info: %s", strerror(rsp->ret));
+ goto fail;
+ }
+
+ virtio_bo->host_handle = rsp->host_handle;
+ bo->iova = rsp->iova;
+
+ /* If the imported buffer is allocated via virgl context (for example
+ * minigbm/arc-cros-gralloc) then the guest gem object size is fake,
+ * potentially not accounting for UBWC meta data, required pitch
+ * alignment, etc. But in the import path the gallium driver checks
+ * that the size matches the minimum size based on layout. So replace
+ * the guest potentially-fake size with the real size from the host:
+ */
+ bo->size = rsp->size;
+ }
return bo;
req.blob_id = args.blob_id;
rsp = virtio_alloc_rsp(dev, &req.hdr, sizeof(*rsp));
+
+ if (virtio_dev->userspace_allocates_iova) {
+ req.iova = virtio_dev_alloc_iova(dev, size);
+ if (!req.iova) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ }
}
simple_mtx_lock(&virtio_dev->eb_lock);
virtio_bo->blob_id = args.blob_id;
if (rsp) {
- /* RESOURCE_CREATE_BLOB is async, so we need to wait for host..
- * which is a bit unfortunate, but better to sync here than
- * add extra code to check if we need to wait each time we
- * emit a reloc.
- */
- virtio_host_sync(dev, &req.hdr);
-
- virtio_bo->host_handle = rsp->host_handle;
- bo->iova = rsp->iova;
+ if (virtio_dev->userspace_allocates_iova) {
+ int fence_fd;
+
+ /* We can't get a fence fd from RESOURCE_CREATE_BLOB, so send
+ * a NOP packet just for that purpose:
+ */
+ struct msm_ccmd_nop_req nop = {
+ .hdr = MSM_CCMD(NOP, sizeof(nop)),
+ };
+
+ ret = virtio_execbuf_fenced(dev, &nop.hdr, -1, &fence_fd, 0);
+ if (ret) {
+ INFO_MSG("failed to get gem info: %s", strerror(errno));
+ goto fail;
+ }
+
+ bo->iova = req.iova;
+
+ enqueue_allocation_wait(bo, fence_fd, rsp, NULL);
+ } else {
+ /* RESOURCE_CREATE_BLOB is async, so we need to wait for host..
+ * which is a bit unfortunate, but better to sync here than
+ * add extra code to check if we need to wait each time we
+ * emit a reloc.
+ */
+ virtio_host_sync(dev, &req.hdr);
+
+ virtio_bo->host_handle = rsp->host_handle;
+ bo->iova = rsp->iova;
+ }
}
return bo;
fail:
+ if (req.iova) {
+ assert(virtio_dev->userspace_allocates_iova);
+ virtio_dev_free_iova(dev, req.iova, size);
+ }
return NULL;
}
+
+uint32_t
+virtio_bo_host_handle(struct fd_bo *bo)
+{
+ struct virtio_bo *virtio_bo = to_virtio_bo(bo);
+ util_queue_fence_wait(&virtio_bo->fence);
+ return virtio_bo->host_handle;
+}
virtio_device_destroy(struct fd_device *dev)
{
struct virtio_device *virtio_dev = to_virtio_device(dev);
+
fd_bo_del_locked(virtio_dev->shmem_bo);
+
+ if (virtio_dev->userspace_allocates_iova) {
+ util_vma_heap_finish(&virtio_dev->address_space);
+ }
}
static const struct fd_device_funcs funcs = {
INFO_MSG("version_minor: %u", caps.version_minor);
INFO_MSG("version_patchlevel: %u", caps.version_patchlevel);
INFO_MSG("has_cached_coherent: %u", caps.u.msm.has_cached_coherent);
+ INFO_MSG("va_start: 0x%0"PRIx64, caps.u.msm.va_start);
+ INFO_MSG("va_size: 0x%0"PRIx64, caps.u.msm.va_size);
if (caps.wire_format_version != 1) {
ERROR_MSG("Unsupported protocol version: %u", caps.wire_format_version);
set_debuginfo(dev);
+ if (caps.u.msm.va_start && caps.u.msm.va_size) {
+ virtio_dev->userspace_allocates_iova = true;
+
+ util_vma_heap_init(&virtio_dev->address_space,
+ caps.u.msm.va_start,
+ caps.u.msm.va_size);
+ simple_mtx_init(&virtio_dev->address_space_lock, mtx_plain);
+ }
+
return dev;
}
{
struct virtio_pipe *virtio_pipe = to_virtio_pipe(pipe);
+ if (util_queue_is_initialized(&virtio_pipe->retire_queue))
+ util_queue_destroy(&virtio_pipe->retire_queue);
+
close_submitqueue(pipe, virtio_pipe->queue_id);
fd_pipe_sp_ringpool_fini(pipe);
free(virtio_pipe);
if (!(virtio_pipe->gpu_id || virtio_pipe->chip_id))
goto fail;
+ if (to_virtio_device(dev)->userspace_allocates_iova) {
+ util_queue_init(&virtio_pipe->retire_queue, "rq", 8, 1,
+ UTIL_QUEUE_INIT_RESIZE_IF_FULL, NULL);
+ }
+
INFO_MSG("Pipe Info:");
INFO_MSG(" GPU-id: %d", virtio_pipe->gpu_id);
INFO_MSG(" Chip-id: 0x%016"PRIx64, virtio_pipe->chip_id);
#include "util/u_atomic.h"
#include "util/slab.h"
#include "util/timespec.h"
+#include "util/vma.h"
#include "pipe/p_defines.h"
uint32_t next_blob_id;
uint32_t next_seqno;
+
+ bool userspace_allocates_iova;
+
+ /*
+ * Notes on address space allocation:
+ *
+ * In both the import (GEM_INFO) and new (GEM_NEW) path we allocate
+ * the iova. Since the iova (vma on kernel side) is local to the
+ * address space, and that is 1:1 with drm fd (which is 1:1 with
+ * virtio_device and therefore address_space) which is not shared
+ * with anything outside of the driver, and because of the handle
+ * de-duplication, we can safely assume that an iova has not yet
+ * been set on imported buffers.
+ *
+ * The other complication with userspace allocated iova is that
+ * the kernel holds on to a reference to the bo (and the GPU is
+ * still using it's iova) until the submit retires. So a per-pipe
+ * retire_queue is used to hold an extra reference to the submit
+ * (and indirectly all the bo's referenced) until the out-fence is
+ * signaled.
+ */
+ struct util_vma_heap address_space;
+ simple_mtx_t address_space_lock;
};
FD_DEFINE_CAST(fd_device, virtio_device);
struct fd_device *virtio_device_new(int fd, drmVersionPtr version);
+static inline void
+virtio_dev_free_iova(struct fd_device *dev, uint64_t iova, uint32_t size)
+{
+ struct virtio_device *virtio_dev = to_virtio_device(dev);
+
+ simple_mtx_lock(&virtio_dev->address_space_lock);
+ util_vma_heap_free(&virtio_dev->address_space, iova, size);
+ simple_mtx_unlock(&virtio_dev->address_space_lock);
+}
+
+static inline uint64_t
+virtio_dev_alloc_iova(struct fd_device *dev, uint32_t size)
+{
+ struct virtio_device *virtio_dev = to_virtio_device(dev);
+ uint64_t iova;
+
+ simple_mtx_lock(&virtio_dev->address_space_lock);
+ iova = util_vma_heap_alloc(&virtio_dev->address_space, size, 0x1000);
+ simple_mtx_unlock(&virtio_dev->address_space_lock);
+
+ return iova;
+}
+
struct virtio_pipe {
struct fd_pipe base;
uint32_t pipe;
* ca3ffcbeb0c8 ("drm/msm/gpu: Don't allow zero fence_id")
*/
int32_t next_submit_fence;
+
+ /**
+ * When userspace_allocates_iova, we need to defer deleting bo's (and
+ * therefore releasing their address) until submits referencing them
+ * have completed. This is accomplished by enqueueing a job, holding
+ * a reference to the submit, that waits on the submit's out-fence
+ * before dropping the reference to the submit. The submit holds a
+ * reference to the associated ring buffers, which in turn hold a ref
+ * to the associated bo's.
+ */
+ struct util_queue retire_queue;
};
FD_DEFINE_CAST(fd_pipe, virtio_pipe);
struct fd_bo base;
uint64_t offset;
+ struct util_queue_fence fence;
+
+ /*
+ * Note: all access to host_handle must wait on fence, *other* than
+ * access from the submit_queue thread (because async bo allocations
+ * are retired on the submit_queue, guaranteeing that the fence is
+ * signaled before host_handle is accessed). All other access must
+ * use virtio_bo_host_handle().
+ */
uint32_t host_handle;
uint32_t blob_id;
};
struct fd_bo *virtio_bo_from_handle(struct fd_device *dev, uint32_t size,
uint32_t handle);
+uint32_t virtio_bo_host_handle(struct fd_bo *bo);
+
/*
* Internal helpers:
*/
#include <inttypes.h>
#include <pthread.h>
+#include "util/libsync.h"
#include "util/os_file.h"
#include "drm/freedreno_ringbuffer_sp.h"
#include "virtio_priv.h"
+static void
+retire_execute(void *job, void *gdata, int thread_index)
+{
+ struct fd_submit_sp *fd_submit = job;
+
+ sync_wait(fd_submit->out_fence_fd, -1);
+ close(fd_submit->out_fence_fd);
+}
+
+static void
+retire_cleanup(void *job, void *gdata, int thread_index)
+{
+ struct fd_submit_sp *fd_submit = job;
+ fd_submit_del(&fd_submit->base);
+}
+
static int
flush_submit_list(struct list_head *submit_list)
{
struct fd_submit_sp *fd_submit = to_fd_submit_sp(last_submit(submit_list));
struct virtio_pipe *virtio_pipe = to_virtio_pipe(fd_submit->base.pipe);
struct fd_device *dev = virtio_pipe->base.dev;
+ struct virtio_device *virtio_dev = to_virtio_device(dev);
unsigned nr_cmds = 0;
}
for (unsigned i = 0; i < fd_submit->nr_bos; i++) {
+ struct virtio_bo *virtio_bo = to_virtio_bo(fd_submit->bos[i]);
+
+ assert(util_queue_fence_is_signalled(&virtio_bo->fence));
+
submit_bos[i].flags = fd_submit->bos[i]->reloc_flags;
- submit_bos[i].handle = to_virtio_bo(fd_submit->bos[i])->host_handle;
+ submit_bos[i].handle = virtio_bo->host_handle;
submit_bos[i].presumed = 0;
}
*/
out_fence->use_fence_fd = true;
out_fence_fd = &out_fence->fence_fd;
+ } else if (virtio_dev->userspace_allocates_iova) {
+ /* we are using retire_queue, so we need an out-fence for each
+ * submit.. we can just re-use fd_submit->out_fence_fd for temporary
+ * storage.
+ */
+ out_fence_fd = &fd_submit->out_fence_fd;
}
if (fd_submit->in_fence_fd != -1) {
if (fd_submit->in_fence_fd != -1)
close(fd_submit->in_fence_fd);
+ if (virtio_dev->userspace_allocates_iova) {
+ if (out_fence_fd != &fd_submit->out_fence_fd)
+ fd_submit->out_fence_fd = os_dupfd_cloexec(*out_fence_fd);
+ fd_submit_ref(&fd_submit->base);
+
+ util_queue_fence_init(&fd_submit->retire_fence);
+
+ util_queue_add_job(&virtio_pipe->retire_queue,
+ fd_submit, &fd_submit->retire_fence,
+ retire_execute,
+ retire_cleanup,
+ 0);
+ }
+
return 0;
}
union {
struct {
uint32_t has_cached_coherent;
+ uint32_t priorities;
+ uint64_t va_start;
+ uint64_t va_size;
} msm; /* context_type == VIRTGPU_DRM_CONTEXT_MSM */
} u;
};