X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=intel%2Fintel_bufmgr_gem.c;h=007a6d86ca7a1af22639d73c67fde96b2b92095a;hb=6f15ca80815ae34d412c9fbdf526d69c45561a0f;hp=72c8731f722ab61bddc805bcfcc15f283630f630;hpb=0184bb1c6d946bcaf198f7680b3405adca676790;p=platform%2Fupstream%2Flibdrm.git diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c index 72c8731..007a6d8 100644 --- a/intel/intel_bufmgr_gem.c +++ b/intel/intel_bufmgr_gem.c @@ -1,7 +1,7 @@ /************************************************************************** * * Copyright © 2007 Red Hat Inc. - * Copyright © 2007 Intel Corporation + * Copyright © 2007-2012 Intel Corporation * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA * All Rights Reserved. * @@ -51,16 +51,31 @@ #include #include #include +#include #include "errno.h" +#ifndef ETIME +#define ETIME ETIMEDOUT +#endif #include "libdrm_lists.h" #include "intel_bufmgr.h" #include "intel_bufmgr_priv.h" #include "intel_chipset.h" +#include "intel_aub.h" #include "string.h" #include "i915_drm.h" +#ifdef HAVE_VALGRIND +#include +#include +#define VG(x) x +#else +#define VG(x) +#endif + +#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) + #define DBG(...) do { \ if (bufmgr_gem->bufmgr.debug) \ fprintf(stderr, __VA_ARGS__); \ @@ -95,6 +110,10 @@ typedef struct _drm_intel_bufmgr_gem { int num_buckets; time_t time; + drmMMListHead named; + drmMMListHead vma_cache; + int vma_count, vma_open, vma_max; + uint64_t gtt_size; int available_fences; int pci_device; @@ -102,8 +121,16 @@ typedef struct _drm_intel_bufmgr_gem { unsigned int has_bsd : 1; unsigned int has_blt : 1; unsigned int has_relaxed_fencing : 1; + unsigned int has_llc : 1; + unsigned int has_wait_timeout : 1; unsigned int bo_reuse : 1; - char fenced_relocs; + unsigned int no_exec : 1; + unsigned int has_vebox : 1; + bool fenced_relocs; + + char *aub_filename; + FILE *aub_file; + uint32_t aub_offset; } drm_intel_bufmgr_gem; #define DRM_INTEL_RELOC_FENCE (1<<0) @@ -122,8 +149,11 @@ struct _drm_intel_bo_gem { /** * Kenel-assigned global name for this object + * + * List contains both flink named and prime fd'd objects */ unsigned int global_name; + drmMMListHead name_list; /** * Index of the buffer within the validation list while preparing a @@ -152,6 +182,8 @@ struct _drm_intel_bo_gem { void *mem_virtual; /** GTT virtual address for the buffer, saved across map/unmap cycles */ void *gtt_virtual; + int map_count; + drmMMListHead vma_list; /** BO cache list */ drmMMListHead head; @@ -160,24 +192,33 @@ struct _drm_intel_bo_gem { * Boolean of whether this BO and its children have been included in * the current drm_intel_bufmgr_check_aperture_space() total. */ - char included_in_check_aperture; + bool included_in_check_aperture; /** * Boolean of whether this buffer has been used as a relocation * target and had its size accounted for, and thus can't have any * further relocations added to it. */ - char used_as_reloc_target; + bool used_as_reloc_target; /** * Boolean of whether we have encountered an error whilst building the relocation tree. */ - char has_error; + bool has_error; /** * Boolean of whether this buffer can be re-used */ - char reusable; + bool reusable; + + /** + * Boolean of whether the GPU is definitely not accessing the buffer. + * + * This is only valid when reusable, since non-reusable + * buffers are those that have been shared wth other + * processes, so we don't know their state. + */ + bool idle; /** * Size in bytes of this buffer and its relocation descendents. @@ -192,6 +233,14 @@ struct _drm_intel_bo_gem { * relocations. */ int reloc_tree_fences; + + /** Flags that we may need to do the SW_FINSIH ioctl on unmap. */ + bool mapped_cpu_write; + + uint32_t aub_offset; + + drm_intel_aub_annotation *aub_annotations; + unsigned aub_annotation_count; }; static unsigned int @@ -272,7 +321,9 @@ drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, if (*tiling_mode == I915_TILING_NONE) return ALIGN(pitch, 64); - if (*tiling_mode == I915_TILING_X) + if (*tiling_mode == I915_TILING_X + || (IS_915(bufmgr_gem->pci_device) + && *tiling_mode == I915_TILING_Y)) tile_width = 512; else tile_width = 128; @@ -340,7 +391,7 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) (unsigned long long)bo_gem->relocs[j].offset, target_gem->gem_handle, target_gem->name, - target_bo->offset, + target_bo->offset64, bo_gem->relocs[j].delta); } } @@ -503,7 +554,7 @@ drm_intel_setup_reloc_list(drm_intel_bo *bo) bo_gem->reloc_target_info = malloc(max_relocs * sizeof(drm_intel_reloc_target)); if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) { - bo_gem->has_error = 1; + bo_gem->has_error = true; free (bo_gem->relocs); bo_gem->relocs = NULL; @@ -525,11 +576,19 @@ drm_intel_gem_bo_busy(drm_intel_bo *bo) struct drm_i915_gem_busy busy; int ret; - memset(&busy, 0, sizeof(busy)); + if (bo_gem->reusable && bo_gem->idle) + return false; + + VG_CLEAR(busy); busy.handle = bo_gem->gem_handle; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); - + if (ret == 0) { + bo_gem->idle = !busy.busy; + return busy.busy; + } else { + return false; + } return (ret == 0 && busy.busy); } @@ -539,6 +598,7 @@ drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, { struct drm_i915_gem_madvise madv; + VG_CLEAR(madv); madv.handle = bo_gem->gem_handle; madv.madv = state; madv.retained = 1; @@ -588,12 +648,12 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, unsigned int page_size = getpagesize(); int ret; struct drm_intel_gem_bo_bucket *bucket; - int alloc_from_cache; + bool alloc_from_cache; unsigned long bo_size; - int for_render = 0; + bool for_render = false; if (flags & BO_ALLOC_FOR_RENDER) - for_render = 1; + for_render = true; /* Round the allocated size up to a power of two number of pages. */ bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); @@ -612,7 +672,7 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, pthread_mutex_lock(&bufmgr_gem->lock); /* Get a buffer out of the cache if available */ retry: - alloc_from_cache = 0; + alloc_from_cache = false; if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { if (for_render) { /* Allocate new render-target BOs from the tail (MRU) @@ -622,7 +682,7 @@ retry: bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.prev, head); DRMLISTDEL(&bo_gem->head); - alloc_from_cache = 1; + alloc_from_cache = true; } else { /* For non-render-target BOs (where we're probably * going to map it first thing in order to fill it @@ -634,7 +694,7 @@ retry: bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.next, head); if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { - alloc_from_cache = 1; + alloc_from_cache = true; DRMLISTDEL(&bo_gem->head); } } @@ -666,7 +726,8 @@ retry: return NULL; bo_gem->bo.size = bo_size; - memset(&create, 0, sizeof(create)); + + VG_CLEAR(create); create.size = bo_size; ret = drmIoctl(bufmgr_gem->fd, @@ -690,15 +751,20 @@ retry: drm_intel_gem_bo_free(&bo_gem->bo); return NULL; } + + DRMINITLISTHEAD(&bo_gem->name_list); + DRMINITLISTHEAD(&bo_gem->vma_list); } bo_gem->name = name; atomic_set(&bo_gem->refcount, 1); bo_gem->validate_index = -1; bo_gem->reloc_tree_fences = 0; - bo_gem->used_as_reloc_target = 0; - bo_gem->has_error = 0; - bo_gem->reusable = 1; + bo_gem->used_as_reloc_target = false; + bo_gem->has_error = false; + bo_gem->reusable = true; + bo_gem->aub_annotations = NULL; + bo_gem->aub_annotation_count = 0; drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); @@ -739,7 +805,7 @@ drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, uint32_t tiling; do { - unsigned long aligned_y; + unsigned long aligned_y, height_alignment; tiling = *tiling_mode; @@ -755,12 +821,17 @@ drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, * too so we try to be careful. */ aligned_y = y; - if (tiling == I915_TILING_NONE) - aligned_y = ALIGN(y, 2); - else if (tiling == I915_TILING_X) - aligned_y = ALIGN(y, 8); + height_alignment = 2; + + if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE) + height_alignment = 16; + else if (tiling == I915_TILING_X + || (IS_915(bufmgr_gem->pci_device) + && tiling == I915_TILING_Y)) + height_alignment = 8; else if (tiling == I915_TILING_Y) - aligned_y = ALIGN(y, 32); + height_alignment = 32; + aligned_y = ALIGN(y, height_alignment); stride = x * cpp; stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode); @@ -792,12 +863,25 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, int ret; struct drm_gem_open open_arg; struct drm_i915_gem_get_tiling get_tiling; + drmMMListHead *list; - bo_gem = calloc(1, sizeof(*bo_gem)); - if (!bo_gem) - return NULL; + /* At the moment most applications only have a few named bo. + * For instance, in a DRI client only the render buffers passed + * between X and the client are named. And since X returns the + * alternating names for the front/back buffer a linear search + * provides a sufficiently fast match. + */ + for (list = bufmgr_gem->named.next; + list != &bufmgr_gem->named; + list = list->next) { + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); + if (bo_gem->global_name == handle) { + drm_intel_gem_bo_reference(&bo_gem->bo); + return &bo_gem->bo; + } + } - memset(&open_arg, 0, sizeof(open_arg)); + VG_CLEAR(open_arg); open_arg.name = handle; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_OPEN, @@ -805,21 +889,40 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, if (ret != 0) { DBG("Couldn't reference %s handle 0x%08x: %s\n", name, handle, strerror(errno)); - free(bo_gem); return NULL; } + /* Now see if someone has used a prime handle to get this + * object from the kernel before by looking through the list + * again for a matching gem_handle + */ + for (list = bufmgr_gem->named.next; + list != &bufmgr_gem->named; + list = list->next) { + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); + if (bo_gem->gem_handle == open_arg.handle) { + drm_intel_gem_bo_reference(&bo_gem->bo); + return &bo_gem->bo; + } + } + + bo_gem = calloc(1, sizeof(*bo_gem)); + if (!bo_gem) + return NULL; + bo_gem->bo.size = open_arg.size; bo_gem->bo.offset = 0; + bo_gem->bo.offset64 = 0; bo_gem->bo.virtual = NULL; bo_gem->bo.bufmgr = bufmgr; bo_gem->name = name; atomic_set(&bo_gem->refcount, 1); bo_gem->validate_index = -1; bo_gem->gem_handle = open_arg.handle; + bo_gem->bo.handle = open_arg.handle; bo_gem->global_name = handle; - bo_gem->reusable = 0; + bo_gem->reusable = false; - memset(&get_tiling, 0, sizeof(get_tiling)); + VG_CLEAR(get_tiling); get_tiling.handle = bo_gem->gem_handle; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_TILING, @@ -833,6 +936,8 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, /* XXX stride is unknown */ drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); + DRMINITLISTHEAD(&bo_gem->vma_list); + DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); return &bo_gem->bo; @@ -846,22 +951,43 @@ drm_intel_gem_bo_free(drm_intel_bo *bo) struct drm_gem_close close; int ret; - if (bo_gem->mem_virtual) + DRMLISTDEL(&bo_gem->vma_list); + if (bo_gem->mem_virtual) { + VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0)); munmap(bo_gem->mem_virtual, bo_gem->bo.size); - if (bo_gem->gtt_virtual) + bufmgr_gem->vma_count--; + } + if (bo_gem->gtt_virtual) { munmap(bo_gem->gtt_virtual, bo_gem->bo.size); + bufmgr_gem->vma_count--; + } /* Close this object */ - memset(&close, 0, sizeof(close)); + VG_CLEAR(close); close.handle = bo_gem->gem_handle; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); if (ret != 0) { DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", bo_gem->gem_handle, bo_gem->name, strerror(errno)); } + free(bo_gem->aub_annotations); free(bo); } +static void +drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo) +{ +#if HAVE_VALGRIND + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (bo_gem->mem_virtual) + VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size); + + if (bo_gem->gtt_virtual) + VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size); +#endif +} + /** Frees all cached buffers significantly older than @time. */ static void drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) @@ -892,6 +1018,67 @@ drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) bufmgr_gem->time = time; } +static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem) +{ + int limit; + + DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__, + bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max); + + if (bufmgr_gem->vma_max < 0) + return; + + /* We may need to evict a few entries in order to create new mmaps */ + limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open; + if (limit < 0) + limit = 0; + + while (bufmgr_gem->vma_count > limit) { + drm_intel_bo_gem *bo_gem; + + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, + bufmgr_gem->vma_cache.next, + vma_list); + assert(bo_gem->map_count == 0); + DRMLISTDELINIT(&bo_gem->vma_list); + + if (bo_gem->mem_virtual) { + munmap(bo_gem->mem_virtual, bo_gem->bo.size); + bo_gem->mem_virtual = NULL; + bufmgr_gem->vma_count--; + } + if (bo_gem->gtt_virtual) { + munmap(bo_gem->gtt_virtual, bo_gem->bo.size); + bo_gem->gtt_virtual = NULL; + bufmgr_gem->vma_count--; + } + } +} + +static void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem, + drm_intel_bo_gem *bo_gem) +{ + bufmgr_gem->vma_open--; + DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache); + if (bo_gem->mem_virtual) + bufmgr_gem->vma_count++; + if (bo_gem->gtt_virtual) + bufmgr_gem->vma_count++; + drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); +} + +static void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem, + drm_intel_bo_gem *bo_gem) +{ + bufmgr_gem->vma_open++; + DRMLISTDEL(&bo_gem->vma_list); + if (bo_gem->mem_virtual) + bufmgr_gem->vma_count--; + if (bo_gem->gtt_virtual) + bufmgr_gem->vma_count--; + drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); +} + static void drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) { @@ -909,7 +1096,7 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) } } bo_gem->reloc_count = 0; - bo_gem->used_as_reloc_target = 0; + bo_gem->used_as_reloc_target = false; DBG("bo_unreference final: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); @@ -924,6 +1111,16 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) bo_gem->relocs = NULL; } + /* Clear any left-over mappings */ + if (bo_gem->map_count) { + DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count); + bo_gem->map_count = 0; + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + } + + DRMLISTDEL(&bo_gem->name_list); + bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); /* Put the buffer into our internal cache for reuse if we can. */ if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && @@ -978,15 +1175,16 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) pthread_mutex_lock(&bufmgr_gem->lock); - /* Allow recursive mapping. Mesa may recursively map buffers with - * nested display loops. - */ + if (bo_gem->map_count++ == 0) + drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); + if (!bo_gem->mem_virtual) { struct drm_i915_gem_mmap mmap_arg; - DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); + DBG("bo_map: %d (%s), map_count=%d\n", + bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); - memset(&mmap_arg, 0, sizeof(mmap_arg)); + VG_CLEAR(mmap_arg); mmap_arg.handle = bo_gem->gem_handle; mmap_arg.offset = 0; mmap_arg.size = bo->size; @@ -998,15 +1196,19 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", __FILE__, __LINE__, bo_gem->gem_handle, bo_gem->name, strerror(errno)); + if (--bo_gem->map_count == 0) + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); pthread_mutex_unlock(&bufmgr_gem->lock); return ret; } + VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; } DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, bo_gem->mem_virtual); bo->virtual = bo_gem->mem_virtual; + VG_CLEAR(set_domain); set_domain.handle = bo_gem->gem_handle; set_domain.read_domains = I915_GEM_DOMAIN_CPU; if (write_enable) @@ -1022,28 +1224,34 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) strerror(errno)); } + if (write_enable) + bo_gem->mapped_cpu_write = true; + + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size)); pthread_mutex_unlock(&bufmgr_gem->lock); return 0; } -int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) +static int +map_gtt(drm_intel_bo *bo) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - struct drm_i915_gem_set_domain set_domain; int ret; - pthread_mutex_lock(&bufmgr_gem->lock); + if (bo_gem->map_count++ == 0) + drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); /* Get a mapping of the buffer if we haven't before. */ if (bo_gem->gtt_virtual == NULL) { struct drm_i915_gem_mmap_gtt mmap_arg; - DBG("bo_map_gtt: mmap %d (%s)\n", bo_gem->gem_handle, - bo_gem->name); + DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", + bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); - memset(&mmap_arg, 0, sizeof(mmap_arg)); + VG_CLEAR(mmap_arg); mmap_arg.handle = bo_gem->gem_handle; /* Get the fake offset back... */ @@ -1056,7 +1264,8 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) __FILE__, __LINE__, bo_gem->gem_handle, bo_gem->name, strerror(errno)); - pthread_mutex_unlock(&bufmgr_gem->lock); + if (--bo_gem->map_count == 0) + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); return ret; } @@ -1071,7 +1280,8 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) __FILE__, __LINE__, bo_gem->gem_handle, bo_gem->name, strerror(errno)); - pthread_mutex_unlock(&bufmgr_gem->lock); + if (--bo_gem->map_count == 0) + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); return ret; } } @@ -1081,7 +1291,34 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, bo_gem->gtt_virtual); - /* Now move it to the GTT domain so that the CPU caches are flushed */ + return 0; +} + +int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_set_domain set_domain; + int ret; + + pthread_mutex_lock(&bufmgr_gem->lock); + + ret = map_gtt(bo); + if (ret) { + pthread_mutex_unlock(&bufmgr_gem->lock); + return ret; + } + + /* Now move it to the GTT domain so that the GPU and CPU + * caches are flushed and the GPU isn't actively using the + * buffer. + * + * The pagefault handler does this domain change for us when + * it has unbound the BO from the GTT, but it's up to us to + * tell it when we're about to use things if we had done + * rendering and it still happens to be bound to the GTT. + */ + VG_CLEAR(set_domain); set_domain.handle = bo_gem->gem_handle; set_domain.read_domains = I915_GEM_DOMAIN_GTT; set_domain.write_domain = I915_GEM_DOMAIN_GTT; @@ -1094,21 +1331,53 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) strerror(errno)); } + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); pthread_mutex_unlock(&bufmgr_gem->lock); return 0; } -int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) +/** + * Performs a mapping of the buffer object like the normal GTT + * mapping, but avoids waiting for the GPU to be done reading from or + * rendering to the buffer. + * + * This is used in the implementation of GL_ARB_map_buffer_range: The + * user asks to create a buffer, then does a mapping, fills some + * space, runs a drawing command, then asks to map it again without + * synchronizing because it guarantees that it won't write over the + * data that the GPU is busy using (or, more specifically, that if it + * does write over the data, it acknowledges that rendering is + * undefined). + */ + +int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; - int ret = 0; +#ifdef HAVE_VALGRIND + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; +#endif + int ret; - if (bo == NULL) - return 0; + /* If the CPU cache isn't coherent with the GTT, then use a + * regular synchronized mapping. The problem is that we don't + * track where the buffer was last used on the CPU side in + * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so + * we would potentially corrupt the buffer even when the user + * does reasonable things. + */ + if (!bufmgr_gem->has_llc) + return drm_intel_gem_bo_map_gtt(bo); pthread_mutex_lock(&bufmgr_gem->lock); - bo->virtual = NULL; + + ret = map_gtt(bo); + if (ret == 0) { + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); + } + pthread_mutex_unlock(&bufmgr_gem->lock); return ret; @@ -1118,29 +1387,59 @@ static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - struct drm_i915_gem_sw_finish sw_finish; - int ret; + int ret = 0; if (bo == NULL) return 0; pthread_mutex_lock(&bufmgr_gem->lock); - /* Cause a flush to happen if the buffer's pinned for scanout, so the - * results show up in a timely manner. - */ - sw_finish.handle = bo_gem->gem_handle; - ret = drmIoctl(bufmgr_gem->fd, - DRM_IOCTL_I915_GEM_SW_FINISH, - &sw_finish); - ret = ret == -1 ? -errno : 0; + if (bo_gem->map_count <= 0) { + DBG("attempted to unmap an unmapped bo\n"); + pthread_mutex_unlock(&bufmgr_gem->lock); + /* Preserve the old behaviour of just treating this as a + * no-op rather than reporting the error. + */ + return 0; + } + + if (bo_gem->mapped_cpu_write) { + struct drm_i915_gem_sw_finish sw_finish; + + /* Cause a flush to happen if the buffer's pinned for + * scanout, so the results show up in a timely manner. + * Unlike GTT set domains, this only does work if the + * buffer should be scanout-related. + */ + VG_CLEAR(sw_finish); + sw_finish.handle = bo_gem->gem_handle; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_SW_FINISH, + &sw_finish); + ret = ret == -1 ? -errno : 0; + + bo_gem->mapped_cpu_write = false; + } - bo->virtual = NULL; + /* We need to unmap after every innovation as we cannot track + * an open vma for every bo as that will exhaasut the system + * limits and cause later failures. + */ + if (--bo_gem->map_count == 0) { + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + bo->virtual = NULL; + } pthread_mutex_unlock(&bufmgr_gem->lock); return ret; } +int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) +{ + return drm_intel_gem_bo_unmap(bo); +} + static int drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, unsigned long size, const void *data) @@ -1150,7 +1449,7 @@ drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, struct drm_i915_gem_pwrite pwrite; int ret; - memset(&pwrite, 0, sizeof(pwrite)); + VG_CLEAR(pwrite); pwrite.handle = bo_gem->gem_handle; pwrite.offset = offset; pwrite.size = size; @@ -1175,6 +1474,7 @@ drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; int ret; + VG_CLEAR(get_pipe_from_crtc_id); get_pipe_from_crtc_id.crtc_id = crtc_id; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, @@ -1201,7 +1501,7 @@ drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, struct drm_i915_gem_pread pread; int ret; - memset(&pread, 0, sizeof(pread)); + VG_CLEAR(pread); pread.handle = bo_gem->gem_handle; pread.offset = offset; pread.size = size; @@ -1227,6 +1527,58 @@ drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) } /** + * Waits on a BO for the given amount of time. + * + * @bo: buffer object to wait for + * @timeout_ns: amount of time to wait in nanoseconds. + * If value is less than 0, an infinite wait will occur. + * + * Returns 0 if the wait was successful ie. the last batch referencing the + * object has completed within the allotted time. Otherwise some negative return + * value describes the error. Of particular interest is -ETIME when the wait has + * failed to yield the desired result. + * + * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows + * the operation to give up after a certain amount of time. Another subtle + * difference is the internal locking semantics are different (this variant does + * not hold the lock for the duration of the wait). This makes the wait subject + * to a larger userspace race window. + * + * The implementation shall wait until the object is no longer actively + * referenced within a batch buffer at the time of the call. The wait will + * not guarantee that the buffer is re-issued via another thread, or an flinked + * handle. Userspace must make sure this race does not occur if such precision + * is important. + */ +int drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_wait wait; + int ret; + + if (!bufmgr_gem->has_wait_timeout) { + DBG("%s:%d: Timed wait is not supported. Falling back to " + "infinite wait\n", __FILE__, __LINE__); + if (timeout_ns) { + drm_intel_gem_bo_wait_rendering(bo); + return 0; + } else { + return drm_intel_gem_bo_busy(bo) ? -ETIME : 0; + } + } + + wait.bo_handle = bo_gem->gem_handle; + wait.timeout_ns = timeout_ns; + wait.flags = 0; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); + if (ret == -1) + return -errno; + + return ret; +} + +/** * Sets the object to the GTT read and possibly write domain, used by the X * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). * @@ -1241,6 +1593,7 @@ drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) struct drm_i915_gem_set_domain set_domain; int ret; + VG_CLEAR(set_domain); set_domain.handle = bo_gem->gem_handle; set_domain.read_domains = I915_GEM_DOMAIN_GTT; set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; @@ -1264,6 +1617,7 @@ drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) free(bufmgr_gem->exec2_objects); free(bufmgr_gem->exec_objects); free(bufmgr_gem->exec_bos); + free(bufmgr_gem->aub_filename); pthread_mutex_destroy(&bufmgr_gem->lock); @@ -1298,28 +1652,28 @@ static int do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, drm_intel_bo *target_bo, uint32_t target_offset, uint32_t read_domains, uint32_t write_domain, - int need_fence) + bool need_fence) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; - int fenced_command; + bool fenced_command; if (bo_gem->has_error) return -ENOMEM; if (target_bo_gem->has_error) { - bo_gem->has_error = 1; + bo_gem->has_error = true; return -ENOMEM; } /* We never use HW fences for rendering on 965+ */ if (bufmgr_gem->gen >= 4) - need_fence = 0; + need_fence = false; fenced_command = need_fence; if (target_bo_gem->tiling_mode == I915_TILING_NONE) - need_fence = 0; + need_fence = false; /* Create a new relocation list if needed */ if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo)) @@ -1337,7 +1691,7 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, */ assert(!bo_gem->used_as_reloc_target); if (target_bo_gem != bo_gem) { - target_bo_gem->used_as_reloc_target = 1; + target_bo_gem->used_as_reloc_target = true; bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; } /* An object needing a fence is a tiled buffer, so it won't have @@ -1353,7 +1707,7 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, target_bo_gem->gem_handle; bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; - bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset; + bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64; bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; if (target_bo != bo) @@ -1388,7 +1742,50 @@ drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, uint32_t read_domains, uint32_t write_domain) { return do_bo_emit_reloc(bo, offset, target_bo, target_offset, - read_domains, write_domain, 1); + read_domains, write_domain, true); +} + +int +drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + return bo_gem->reloc_count; +} + +/** + * Removes existing relocation entries in the BO after "start". + * + * This allows a user to avoid a two-step process for state setup with + * counting up all the buffer objects and doing a + * drm_intel_bufmgr_check_aperture_space() before emitting any of the + * relocations for the state setup. Instead, save the state of the + * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the + * state, and then check if it still fits in the aperture. + * + * Any further drm_intel_bufmgr_check_aperture_space() queries + * involving this buffer in the tree are undefined after this call. + */ +void +drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int i; + struct timespec time; + + clock_gettime(CLOCK_MONOTONIC, &time); + + assert(bo_gem->reloc_count >= start); + /* Unreference the cleared target buffers */ + for (i = start; i < bo_gem->reloc_count; i++) { + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo; + if (&target_bo_gem->bo != bo) { + bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences; + drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, + time.tv_sec); + } + } + bo_gem->reloc_count = start; } /** @@ -1411,6 +1808,8 @@ drm_intel_gem_bo_process_reloc(drm_intel_bo *bo) if (target_bo == bo) continue; + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + /* Continue walking the tree depth-first. */ drm_intel_gem_bo_process_reloc(target_bo); @@ -1435,6 +1834,8 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) if (target_bo == bo) continue; + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + /* Continue walking the tree depth-first. */ drm_intel_gem_bo_process_reloc2(target_bo); @@ -1457,11 +1858,12 @@ drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; /* Update the buffer offset */ - if (bufmgr_gem->exec_objects[i].offset != bo->offset) { + if (bufmgr_gem->exec_objects[i].offset != bo->offset64) { DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", - bo_gem->gem_handle, bo_gem->name, bo->offset, + bo_gem->gem_handle, bo_gem->name, bo->offset64, (unsigned long long)bufmgr_gem->exec_objects[i]. offset); + bo->offset64 = bufmgr_gem->exec_objects[i].offset; bo->offset = bufmgr_gem->exec_objects[i].offset; } } @@ -1477,92 +1879,390 @@ drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; /* Update the buffer offset */ - if (bufmgr_gem->exec2_objects[i].offset != bo->offset) { + if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) { DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", - bo_gem->gem_handle, bo_gem->name, bo->offset, + bo_gem->gem_handle, bo_gem->name, bo->offset64, (unsigned long long)bufmgr_gem->exec2_objects[i].offset); + bo->offset64 = bufmgr_gem->exec2_objects[i].offset; bo->offset = bufmgr_gem->exec2_objects[i].offset; } } } -static int -drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, - drm_clip_rect_t * cliprects, int num_cliprects, int DR4) +static void +aub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data) +{ + fwrite(&data, 1, 4, bufmgr_gem->aub_file); +} + +static void +aub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size) +{ + fwrite(data, 1, size, bufmgr_gem->aub_file); +} + +static void +aub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - struct drm_i915_gem_execbuffer execbuf; - int ret, i; + uint32_t *data; + unsigned int i; - if (bo_gem->has_error) - return -ENOMEM; + data = malloc(bo->size); + drm_intel_bo_get_subdata(bo, offset, size, data); - pthread_mutex_lock(&bufmgr_gem->lock); - /* Update indices and set up the validate list. */ - drm_intel_gem_bo_process_reloc(bo); + /* Easy mode: write out bo with no relocations */ + if (!bo_gem->reloc_count) { + aub_out_data(bufmgr_gem, data, size); + free(data); + return; + } - /* Add the batch buffer to the validation list. There are no - * relocations pointing to it. - */ - drm_intel_add_validate_buffer(bo); + /* Otherwise, handle the relocations while writing. */ + for (i = 0; i < size / 4; i++) { + int r; + for (r = 0; r < bo_gem->reloc_count; r++) { + struct drm_i915_gem_relocation_entry *reloc; + drm_intel_reloc_target *info; - execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; - execbuf.buffer_count = bufmgr_gem->exec_count; - execbuf.batch_start_offset = 0; - execbuf.batch_len = used; - execbuf.cliprects_ptr = (uintptr_t) cliprects; - execbuf.num_cliprects = num_cliprects; - execbuf.DR1 = 0; - execbuf.DR4 = DR4; + reloc = &bo_gem->relocs[r]; + info = &bo_gem->reloc_target_info[r]; - ret = drmIoctl(bufmgr_gem->fd, - DRM_IOCTL_I915_GEM_EXECBUFFER, - &execbuf); - if (ret != 0) { - ret = -errno; - if (errno == ENOSPC) { - DBG("Execbuffer fails to pin. " - "Estimate: %u. Actual: %u. Available: %u\n", - drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, - bufmgr_gem-> - exec_count), - drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, - bufmgr_gem-> - exec_count), - (unsigned int)bufmgr_gem->gtt_size); - } - } - drm_intel_update_buffer_offsets(bufmgr_gem); + if (reloc->offset == offset + i * 4) { + drm_intel_bo_gem *target_gem; + uint32_t val; - if (bufmgr_gem->bufmgr.debug) - drm_intel_gem_dump_validation_list(bufmgr_gem); + target_gem = (drm_intel_bo_gem *)info->bo; - for (i = 0; i < bufmgr_gem->exec_count; i++) { - drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; - drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + val = reloc->delta; + val += target_gem->aub_offset; - /* Disconnect the buffer from the validate list */ - bo_gem->validate_index = -1; - bufmgr_gem->exec_bos[i] = NULL; + aub_out(bufmgr_gem, val); + data[i] = val; + break; + } + } + if (r == bo_gem->reloc_count) { + /* no relocation, just the data */ + aub_out(bufmgr_gem, data[i]); + } } - bufmgr_gem->exec_count = 0; - pthread_mutex_unlock(&bufmgr_gem->lock); - return ret; + free(data); } -static int -drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, - drm_clip_rect_t *cliprects, int num_cliprects, int DR4, - unsigned int flags) +static void +aub_bo_get_address(drm_intel_bo *bo) { - drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; - struct drm_i915_gem_execbuffer2 execbuf; - int ret, i; + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - switch (flags & 0x7) { - default: + /* Give the object a graphics address in the AUB file. We + * don't just use the GEM object address because we do AUB + * dumping before execution -- we want to successfully log + * when the hardware might hang, and we might even want to aub + * capture for a driver trying to execute on a different + * generation of hardware by disabling the actual kernel exec + * call. + */ + bo_gem->aub_offset = bufmgr_gem->aub_offset; + bufmgr_gem->aub_offset += bo->size; + /* XXX: Handle aperture overflow. */ + assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024); +} + +static void +aub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype, + uint32_t offset, uint32_t size) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + aub_out(bufmgr_gem, + CMD_AUB_TRACE_HEADER_BLOCK | + ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); + aub_out(bufmgr_gem, + AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE); + aub_out(bufmgr_gem, subtype); + aub_out(bufmgr_gem, bo_gem->aub_offset + offset); + aub_out(bufmgr_gem, size); + if (bufmgr_gem->gen >= 8) + aub_out(bufmgr_gem, 0); + aub_write_bo_data(bo, offset, size); +} + +/** + * Break up large objects into multiple writes. Otherwise a 128kb VBO + * would overflow the 16 bits of size field in the packet header and + * everything goes badly after that. + */ +static void +aub_write_large_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype, + uint32_t offset, uint32_t size) +{ + uint32_t block_size; + uint32_t sub_offset; + + for (sub_offset = 0; sub_offset < size; sub_offset += block_size) { + block_size = size - sub_offset; + + if (block_size > 8 * 4096) + block_size = 8 * 4096; + + aub_write_trace_block(bo, type, subtype, offset + sub_offset, + block_size); + } +} + +static void +aub_write_bo(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + uint32_t offset = 0; + unsigned i; + + aub_bo_get_address(bo); + + /* Write out each annotated section separately. */ + for (i = 0; i < bo_gem->aub_annotation_count; ++i) { + drm_intel_aub_annotation *annotation = + &bo_gem->aub_annotations[i]; + uint32_t ending_offset = annotation->ending_offset; + if (ending_offset > bo->size) + ending_offset = bo->size; + if (ending_offset > offset) { + aub_write_large_trace_block(bo, annotation->type, + annotation->subtype, + offset, + ending_offset - offset); + offset = ending_offset; + } + } + + /* Write out any remaining unannotated data */ + if (offset < bo->size) { + aub_write_large_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0, + offset, bo->size - offset); + } +} + +/* + * Make a ringbuffer on fly and dump it + */ +static void +aub_build_dump_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem, + uint32_t batch_buffer, int ring_flag) +{ + uint32_t ringbuffer[4096]; + int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */ + int ring_count = 0; + + if (ring_flag == I915_EXEC_BSD) + ring = AUB_TRACE_TYPE_RING_PRB1; + else if (ring_flag == I915_EXEC_BLT) + ring = AUB_TRACE_TYPE_RING_PRB2; + + /* Make a ring buffer to execute our batchbuffer. */ + memset(ringbuffer, 0, sizeof(ringbuffer)); + if (bufmgr_gem->gen >= 8) { + ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2); + ringbuffer[ring_count++] = batch_buffer; + ringbuffer[ring_count++] = 0; + } else { + ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START; + ringbuffer[ring_count++] = batch_buffer; + } + + /* Write out the ring. This appears to trigger execution of + * the ring in the simulator. + */ + aub_out(bufmgr_gem, + CMD_AUB_TRACE_HEADER_BLOCK | + ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); + aub_out(bufmgr_gem, + AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE); + aub_out(bufmgr_gem, 0); /* general/surface subtype */ + aub_out(bufmgr_gem, bufmgr_gem->aub_offset); + aub_out(bufmgr_gem, ring_count * 4); + if (bufmgr_gem->gen >= 8) + aub_out(bufmgr_gem, 0); + + /* FIXME: Need some flush operations here? */ + aub_out_data(bufmgr_gem, ringbuffer, ring_count * 4); + + /* Update offset pointer */ + bufmgr_gem->aub_offset += 4096; +} + +void +drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, + int x1, int y1, int width, int height, + enum aub_dump_bmp_format format, + int pitch, int offset) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; + uint32_t cpp; + + switch (format) { + case AUB_DUMP_BMP_FORMAT_8BIT: + cpp = 1; + break; + case AUB_DUMP_BMP_FORMAT_ARGB_4444: + cpp = 2; + break; + case AUB_DUMP_BMP_FORMAT_ARGB_0888: + case AUB_DUMP_BMP_FORMAT_ARGB_8888: + cpp = 4; + break; + default: + printf("Unknown AUB dump format %d\n", format); + return; + } + + if (!bufmgr_gem->aub_file) + return; + + aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4); + aub_out(bufmgr_gem, (y1 << 16) | x1); + aub_out(bufmgr_gem, + (format << 24) | + (cpp << 19) | + pitch / 4); + aub_out(bufmgr_gem, (height << 16) | width); + aub_out(bufmgr_gem, bo_gem->aub_offset + offset); + aub_out(bufmgr_gem, + ((bo_gem->tiling_mode != I915_TILING_NONE) ? (1 << 2) : 0) | + ((bo_gem->tiling_mode == I915_TILING_Y) ? (1 << 3) : 0)); +} + +static void +aub_exec(drm_intel_bo *bo, int ring_flag, int used) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int i; + bool batch_buffer_needs_annotations; + + if (!bufmgr_gem->aub_file) + return; + + /* If batch buffer is not annotated, annotate it the best we + * can. + */ + batch_buffer_needs_annotations = bo_gem->aub_annotation_count == 0; + if (batch_buffer_needs_annotations) { + drm_intel_aub_annotation annotations[2] = { + { AUB_TRACE_TYPE_BATCH, 0, used }, + { AUB_TRACE_TYPE_NOTYPE, 0, bo->size } + }; + drm_intel_bufmgr_gem_set_aub_annotations(bo, annotations, 2); + } + + /* Write out all buffers to AUB memory */ + for (i = 0; i < bufmgr_gem->exec_count; i++) { + aub_write_bo(bufmgr_gem->exec_bos[i]); + } + + /* Remove any annotations we added */ + if (batch_buffer_needs_annotations) + drm_intel_bufmgr_gem_set_aub_annotations(bo, NULL, 0); + + /* Dump ring buffer */ + aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag); + + fflush(bufmgr_gem->aub_file); + + /* + * One frame has been dumped. So reset the aub_offset for the next frame. + * + * FIXME: Can we do this? + */ + bufmgr_gem->aub_offset = 0x10000; +} + +static int +drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, + drm_clip_rect_t * cliprects, int num_cliprects, int DR4) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_execbuffer execbuf; + int ret, i; + + if (bo_gem->has_error) + return -ENOMEM; + + pthread_mutex_lock(&bufmgr_gem->lock); + /* Update indices and set up the validate list. */ + drm_intel_gem_bo_process_reloc(bo); + + /* Add the batch buffer to the validation list. There are no + * relocations pointing to it. + */ + drm_intel_add_validate_buffer(bo); + + VG_CLEAR(execbuf); + execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; + execbuf.buffer_count = bufmgr_gem->exec_count; + execbuf.batch_start_offset = 0; + execbuf.batch_len = used; + execbuf.cliprects_ptr = (uintptr_t) cliprects; + execbuf.num_cliprects = num_cliprects; + execbuf.DR1 = 0; + execbuf.DR4 = DR4; + + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_EXECBUFFER, + &execbuf); + if (ret != 0) { + ret = -errno; + if (errno == ENOSPC) { + DBG("Execbuffer fails to pin. " + "Estimate: %u. Actual: %u. Available: %u\n", + drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, + bufmgr_gem-> + exec_count), + drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, + bufmgr_gem-> + exec_count), + (unsigned int)bufmgr_gem->gtt_size); + } + } + drm_intel_update_buffer_offsets(bufmgr_gem); + + if (bufmgr_gem->bufmgr.debug) + drm_intel_gem_dump_validation_list(bufmgr_gem); + + for (i = 0; i < bufmgr_gem->exec_count; i++) { + drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + bo_gem->idle = false; + + /* Disconnect the buffer from the validate list */ + bo_gem->validate_index = -1; + bufmgr_gem->exec_bos[i] = NULL; + } + bufmgr_gem->exec_count = 0; + pthread_mutex_unlock(&bufmgr_gem->lock); + + return ret; +} + +static int +do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, + drm_clip_rect_t *cliprects, int num_cliprects, int DR4, + unsigned int flags) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; + struct drm_i915_gem_execbuffer2 execbuf; + int ret = 0; + int i; + + switch (flags & 0x7) { + default: return -EINVAL; case I915_EXEC_BLT: if (!bufmgr_gem->has_blt) @@ -1572,6 +2272,10 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, if (!bufmgr_gem->has_bsd) return -EINVAL; break; + case I915_EXEC_VEBOX: + if (!bufmgr_gem->has_vebox) + return -EINVAL; + break; case I915_EXEC_RENDER: case I915_EXEC_DEFAULT: break; @@ -1586,6 +2290,7 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, */ drm_intel_add_validate_buffer2(bo, 0); + VG_CLEAR(execbuf); execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; execbuf.buffer_count = bufmgr_gem->exec_count; execbuf.batch_start_offset = 0; @@ -1595,9 +2300,17 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, execbuf.DR1 = 0; execbuf.DR4 = DR4; execbuf.flags = flags; - execbuf.rsvd1 = 0; + if (ctx == NULL) + i915_execbuffer2_set_context_id(execbuf, 0); + else + i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id); execbuf.rsvd2 = 0; + aub_exec(bo, flags, used); + + if (bufmgr_gem->no_exec) + goto skip_execution; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); @@ -1615,6 +2328,7 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, } drm_intel_update_buffer_offsets2(bufmgr_gem); +skip_execution: if (bufmgr_gem->bufmgr.debug) drm_intel_gem_dump_validation_list(bufmgr_gem); @@ -1622,6 +2336,8 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; + bo_gem->idle = false; + /* Disconnect the buffer from the validate list */ bo_gem->validate_index = -1; bufmgr_gem->exec_bos[i] = NULL; @@ -1637,9 +2353,24 @@ drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, drm_clip_rect_t *cliprects, int num_cliprects, int DR4) { - return drm_intel_gem_bo_mrb_exec2(bo, used, - cliprects, num_cliprects, DR4, - I915_EXEC_RENDER); + return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, + I915_EXEC_RENDER); +} + +static int +drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, + drm_clip_rect_t *cliprects, int num_cliprects, int DR4, + unsigned int flags) +{ + return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, + flags); +} + +int +drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx, + int used, unsigned int flags) +{ + return do_exec2(bo, used, ctx, NULL, 0, 0, flags); } static int @@ -1650,7 +2381,7 @@ drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) struct drm_i915_gem_pin pin; int ret; - memset(&pin, 0, sizeof(pin)); + VG_CLEAR(pin); pin.handle = bo_gem->gem_handle; pin.alignment = alignment; @@ -1660,6 +2391,7 @@ drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) if (ret != 0) return -errno; + bo->offset64 = pin.offset; bo->offset = pin.offset; return 0; } @@ -1672,7 +2404,7 @@ drm_intel_gem_bo_unpin(drm_intel_bo *bo) struct drm_i915_gem_unpin unpin; int ret; - memset(&unpin, 0, sizeof(unpin)); + VG_CLEAR(unpin); unpin.handle = bo_gem->gem_handle; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); @@ -1753,23 +2485,127 @@ drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, return 0; } +drm_intel_bo * +drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + int ret; + uint32_t handle; + drm_intel_bo_gem *bo_gem; + struct drm_i915_gem_get_tiling get_tiling; + drmMMListHead *list; + + ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle); + + /* + * See if the kernel has already returned this buffer to us. Just as + * for named buffers, we must not create two bo's pointing at the same + * kernel object + */ + for (list = bufmgr_gem->named.next; + list != &bufmgr_gem->named; + list = list->next) { + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); + if (bo_gem->gem_handle == handle) { + drm_intel_gem_bo_reference(&bo_gem->bo); + return &bo_gem->bo; + } + } + + if (ret) { + fprintf(stderr,"ret is %d %d\n", ret, errno); + return NULL; + } + + bo_gem = calloc(1, sizeof(*bo_gem)); + if (!bo_gem) + return NULL; + + /* Determine size of bo. The fd-to-handle ioctl really should + * return the size, but it doesn't. If we have kernel 3.12 or + * later, we can lseek on the prime fd to get the size. Older + * kernels will just fail, in which case we fall back to the + * provided (estimated or guess size). */ + ret = lseek(prime_fd, 0, SEEK_END); + if (ret != -1) + bo_gem->bo.size = ret; + else + bo_gem->bo.size = size; + + bo_gem->bo.handle = handle; + bo_gem->bo.bufmgr = bufmgr; + + bo_gem->gem_handle = handle; + + atomic_set(&bo_gem->refcount, 1); + + bo_gem->name = "prime"; + bo_gem->validate_index = -1; + bo_gem->reloc_tree_fences = 0; + bo_gem->used_as_reloc_target = false; + bo_gem->has_error = false; + bo_gem->reusable = false; + + DRMINITLISTHEAD(&bo_gem->vma_list); + DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); + + VG_CLEAR(get_tiling); + get_tiling.handle = bo_gem->gem_handle; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_GET_TILING, + &get_tiling); + if (ret != 0) { + drm_intel_gem_bo_unreference(&bo_gem->bo); + return NULL; + } + bo_gem->tiling_mode = get_tiling.tiling_mode; + bo_gem->swizzle_mode = get_tiling.swizzle_mode; + /* XXX stride is unknown */ + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); + + return &bo_gem->bo; +} + +int +drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (DRMLISTEMPTY(&bo_gem->name_list)) + DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); + + if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle, + DRM_CLOEXEC, prime_fd) != 0) + return -errno; + + bo_gem->reusable = false; + + return 0; +} + static int drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - struct drm_gem_flink flink; int ret; if (!bo_gem->global_name) { - memset(&flink, 0, sizeof(flink)); + struct drm_gem_flink flink; + + VG_CLEAR(flink); flink.handle = bo_gem->gem_handle; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); if (ret != 0) return -errno; + bo_gem->global_name = flink.name; - bo_gem->reusable = 0; + bo_gem->reusable = false; + + if (DRMLISTEMPTY(&bo_gem->name_list)) + DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); } *name = bo_gem->global_name; @@ -1788,7 +2624,7 @@ drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; - bufmgr_gem->bo_reuse = 1; + bufmgr_gem->bo_reuse = true; } /** @@ -1804,7 +2640,7 @@ drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2) - bufmgr_gem->fenced_relocs = 1; + bufmgr_gem->fenced_relocs = true; } /** @@ -1822,7 +2658,7 @@ drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) return 0; total += bo->size; - bo_gem->included_in_check_aperture = 1; + bo_gem->included_in_check_aperture = true; for (i = 0; i < bo_gem->reloc_count; i++) total += @@ -1870,7 +2706,7 @@ drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) if (bo == NULL || !bo_gem->included_in_check_aperture) return; - bo_gem->included_in_check_aperture = 0; + bo_gem->included_in_check_aperture = false; for (i = 0; i < bo_gem->reloc_count; i++) drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> @@ -1987,7 +2823,7 @@ drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) { drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - bo_gem->reusable = 0; + bo_gem->reusable = false; return 0; } @@ -2070,6 +2906,281 @@ init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem) } } +void +drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + + bufmgr_gem->vma_max = limit; + + drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); +} + +/** + * Get the PCI ID for the device. This can be overridden by setting the + * INTEL_DEVID_OVERRIDE environment variable to the desired ID. + */ +static int +get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem) +{ + char *devid_override; + int devid; + int ret; + drm_i915_getparam_t gp; + + if (geteuid() == getuid()) { + devid_override = getenv("INTEL_DEVID_OVERRIDE"); + if (devid_override) { + bufmgr_gem->no_exec = true; + return strtod(devid_override, NULL); + } + } + + VG_CLEAR(devid); + VG_CLEAR(gp); + gp.param = I915_PARAM_CHIPSET_ID; + gp.value = &devid; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret) { + fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); + fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); + } + return devid; +} + +int +drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + + return bufmgr_gem->pci_device; +} + +/** + * Sets the AUB filename. + * + * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump() + * for it to have any effect. + */ +void +drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr, + const char *filename) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + + free(bufmgr_gem->aub_filename); + if (filename) + bufmgr_gem->aub_filename = strdup(filename); +} + +/** + * Sets up AUB dumping. + * + * This is a trace file format that can be used with the simulator. + * Packets are emitted in a format somewhat like GPU command packets. + * You can set up a GTT and upload your objects into the referenced + * space, then send off batchbuffers and get BMPs out the other end. + */ +void +drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + int entry = 0x200003; + int i; + int gtt_size = 0x10000; + const char *filename; + + if (!enable) { + if (bufmgr_gem->aub_file) { + fclose(bufmgr_gem->aub_file); + bufmgr_gem->aub_file = NULL; + } + return; + } + + if (geteuid() != getuid()) + return; + + if (bufmgr_gem->aub_filename) + filename = bufmgr_gem->aub_filename; + else + filename = "intel.aub"; + bufmgr_gem->aub_file = fopen(filename, "w+"); + if (!bufmgr_gem->aub_file) + return; + + /* Start allocating objects from just after the GTT. */ + bufmgr_gem->aub_offset = gtt_size; + + /* Start with a (required) version packet. */ + aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2)); + aub_out(bufmgr_gem, + (4 << AUB_HEADER_MAJOR_SHIFT) | + (0 << AUB_HEADER_MINOR_SHIFT)); + for (i = 0; i < 8; i++) { + aub_out(bufmgr_gem, 0); /* app name */ + } + aub_out(bufmgr_gem, 0); /* timestamp */ + aub_out(bufmgr_gem, 0); /* timestamp */ + aub_out(bufmgr_gem, 0); /* comment len */ + + /* Set up the GTT. The max we can handle is 256M */ + aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); + aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_NONLOCAL | 0 | AUB_TRACE_OP_DATA_WRITE); + aub_out(bufmgr_gem, 0); /* subtype */ + aub_out(bufmgr_gem, 0); /* offset */ + aub_out(bufmgr_gem, gtt_size); /* size */ + if (bufmgr_gem->gen >= 8) + aub_out(bufmgr_gem, 0); + for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) { + aub_out(bufmgr_gem, entry); + } +} + +drm_intel_context * +drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + struct drm_i915_gem_context_create create; + drm_intel_context *context = NULL; + int ret; + + context = calloc(1, sizeof(*context)); + if (!context) + return NULL; + + VG_CLEAR(create); + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); + if (ret != 0) { + DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", + strerror(errno)); + free(context); + return NULL; + } + + context->ctx_id = create.ctx_id; + context->bufmgr = bufmgr; + + return context; +} + +void +drm_intel_gem_context_destroy(drm_intel_context *ctx) +{ + drm_intel_bufmgr_gem *bufmgr_gem; + struct drm_i915_gem_context_destroy destroy; + int ret; + + if (ctx == NULL) + return; + + VG_CLEAR(destroy); + + bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; + destroy.ctx_id = ctx->ctx_id; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, + &destroy); + if (ret != 0) + fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n", + strerror(errno)); + + free(ctx); +} + +int +drm_intel_get_reset_stats(drm_intel_context *ctx, + uint32_t *reset_count, + uint32_t *active, + uint32_t *pending) +{ + drm_intel_bufmgr_gem *bufmgr_gem; + struct drm_i915_reset_stats stats; + int ret; + + if (ctx == NULL) + return -EINVAL; + + memset(&stats, 0, sizeof(stats)); + + bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; + stats.ctx_id = ctx->ctx_id; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GET_RESET_STATS, + &stats); + if (ret == 0) { + if (reset_count != NULL) + *reset_count = stats.reset_count; + + if (active != NULL) + *active = stats.batch_active; + + if (pending != NULL) + *pending = stats.batch_pending; + } + + return ret; +} + +int +drm_intel_reg_read(drm_intel_bufmgr *bufmgr, + uint32_t offset, + uint64_t *result) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + struct drm_i915_reg_read reg_read; + int ret; + + VG_CLEAR(reg_read); + reg_read.offset = offset; + + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, ®_read); + + *result = reg_read.val; + return ret; +} + + +/** + * Annotate the given bo for use in aub dumping. + * + * \param annotations is an array of drm_intel_aub_annotation objects + * describing the type of data in various sections of the bo. Each + * element of the array specifies the type and subtype of a section of + * the bo, and the past-the-end offset of that section. The elements + * of \c annotations must be sorted so that ending_offset is + * increasing. + * + * \param count is the number of elements in the \c annotations array. + * If \c count is zero, then \c annotations will not be dereferenced. + * + * Annotations are copied into a private data structure, so caller may + * re-use the memory pointed to by \c annotations after the call + * returns. + * + * Annotations are stored for the lifetime of the bo; to reset to the + * default state (no annotations), call this function with a \c count + * of zero. + */ +void +drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo, + drm_intel_aub_annotation *annotations, + unsigned count) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + unsigned size = sizeof(*annotations) * count; + drm_intel_aub_annotation *new_annotations = + count > 0 ? realloc(bo_gem->aub_annotations, size) : NULL; + if (new_annotations == NULL) { + free(bo_gem->aub_annotations); + bo_gem->aub_annotations = NULL; + bo_gem->aub_annotation_count = 0; + return; + } + memcpy(new_annotations, annotations, size); + bo_gem->aub_annotations = new_annotations; + bo_gem->aub_annotation_count = count; +} + /** * Initializes the GEM buffer manager, which uses the kernel to allocate, map, * and manage map buffer objections. @@ -2082,8 +3193,8 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) drm_intel_bufmgr_gem *bufmgr_gem; struct drm_i915_gem_get_aperture aperture; drm_i915_getparam_t gp; - int ret; - int exec2 = 0; + int ret, tmp; + bool exec2 = false; bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); if (bufmgr_gem == NULL) @@ -2112,27 +3223,43 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) (int)bufmgr_gem->gtt_size / 1024); } - gp.param = I915_PARAM_CHIPSET_ID; - gp.value = &bufmgr_gem->pci_device; - ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); - if (ret) { - fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); - fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); - } + bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem); - if (IS_GEN2(bufmgr_gem)) + if (IS_GEN2(bufmgr_gem->pci_device)) bufmgr_gem->gen = 2; - else if (IS_GEN3(bufmgr_gem)) + else if (IS_GEN3(bufmgr_gem->pci_device)) bufmgr_gem->gen = 3; - else if (IS_GEN4(bufmgr_gem)) + else if (IS_GEN4(bufmgr_gem->pci_device)) bufmgr_gem->gen = 4; - else + else if (IS_GEN5(bufmgr_gem->pci_device)) + bufmgr_gem->gen = 5; + else if (IS_GEN6(bufmgr_gem->pci_device)) bufmgr_gem->gen = 6; + else if (IS_GEN7(bufmgr_gem->pci_device)) + bufmgr_gem->gen = 7; + else if (IS_GEN8(bufmgr_gem->pci_device)) + bufmgr_gem->gen = 8; + else { + free(bufmgr_gem); + return NULL; + } + + if (IS_GEN3(bufmgr_gem->pci_device) && + bufmgr_gem->gtt_size > 256*1024*1024) { + /* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't + * be used for tiled blits. To simplify the accounting, just + * substract the unmappable part (fixed to 256MB on all known + * gen3 devices) if the kernel advertises it. */ + bufmgr_gem->gtt_size -= 256*1024*1024; + } + + VG_CLEAR(gp); + gp.value = &tmp; gp.param = I915_PARAM_HAS_EXECBUF2; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); if (!ret) - exec2 = 1; + exec2 = true; gp.param = I915_PARAM_HAS_BSD; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); @@ -2146,6 +3273,25 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); bufmgr_gem->has_relaxed_fencing = ret == 0; + gp.param = I915_PARAM_HAS_WAIT_TIMEOUT; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + bufmgr_gem->has_wait_timeout = ret == 0; + + gp.param = I915_PARAM_HAS_LLC; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret != 0) { + /* Kernel does not supports HAS_LLC query, fallback to GPU + * generation detection and assume that we have LLC on GEN6/7 + */ + bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) | + IS_GEN7(bufmgr_gem->pci_device)); + } else + bufmgr_gem->has_llc = *gp.value; + + gp.param = I915_PARAM_HAS_VEBOX; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0); + if (bufmgr_gem->gen < 4) { gp.param = I915_PARAM_NUM_FENCES_AVAIL; gp.value = &bufmgr_gem->available_fences; @@ -2216,7 +3362,11 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) drm_intel_gem_get_pipe_from_crtc_id; bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; + DRMINITLISTHEAD(&bufmgr_gem->named); init_cache_buckets(bufmgr_gem); + DRMINITLISTHEAD(&bufmgr_gem->vma_cache); + bufmgr_gem->vma_max = -1; /* unlimited by default */ + return &bufmgr_gem->bufmgr; }