From 53d6bb9fc633a4d0ad99c25ac4a9ca09f12d87bf Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Tue, 3 Mar 2020 21:31:51 -0800 Subject: [PATCH] panfrost: split index cache into shared part Split it into shared part since we're going to re-use it in lima. Reviewed-by: Alyssa Rosenzweig Reviewed-by: Boris Brezillon Signed-off-by: Vasily Khoruzhick Part-of: --- src/gallium/drivers/panfrost/pan_context.c | 38 ++------- src/gallium/drivers/panfrost/pan_resource.c | 37 +-------- src/gallium/drivers/panfrost/pan_resource.h | 24 +----- src/panfrost/Makefile.sources | 4 +- src/panfrost/shared/meson.build | 3 + src/panfrost/shared/pan_minmax_cache.c | 123 ++++++++++++++++++++++++++++ src/panfrost/shared/pan_minmax_cache.h | 52 ++++++++++++ 7 files changed, 190 insertions(+), 91 deletions(-) create mode 100644 src/panfrost/shared/pan_minmax_cache.c create mode 100644 src/panfrost/shared/pan_minmax_cache.h diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index efc7c37..630f675 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -29,6 +29,7 @@ #include "pan_bo.h" #include "pan_context.h" +#include "pan_minmax_cache.h" #include "panfrost-quirks.h" #include "util/macros.h" @@ -1278,8 +1279,6 @@ panfrost_get_index_buffer_bounded(struct panfrost_context *ctx, const struct pip needs_indices = false; } - uint64_t ht_key = 0; - if (!info->has_user_indices) { /* Only resources can be directly mapped */ panfrost_batch_add_bo(batch, rsrc->bo, @@ -1289,22 +1288,8 @@ panfrost_get_index_buffer_bounded(struct panfrost_context *ctx, const struct pip out = rsrc->bo->gpu + offset; /* Check the cache */ - if (rsrc->index_cache) { - ht_key = (((uint64_t) info->count) << 32) | info->start; - - struct panfrost_minmax_cache *cache = rsrc->index_cache; - - for (unsigned i = 0; i < cache->size; ++i) { - if (cache->keys[i] == ht_key) { - uint64_t hit = cache->values[i]; - - *min_index = hit & 0xffffffff; - *max_index = hit >> 32; - needs_indices = false; - break; - } - } - } + needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache, info->start, info->count, + min_index, max_index); } else { /* Otherwise, we need to upload to transient memory */ const uint8_t *ibuf8 = (const uint8_t *) info->index.user; @@ -1315,20 +1300,9 @@ panfrost_get_index_buffer_bounded(struct panfrost_context *ctx, const struct pip /* Fallback */ u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index); - if (!info->has_user_indices && rsrc->index_cache) { - struct panfrost_minmax_cache *cache = rsrc->index_cache; - uint64_t value = (*min_index) | (((uint64_t) *max_index) << 32); - unsigned index = 0; - - if (cache->size == PANFROST_MINMAX_SIZE) { - index = cache->index++; - cache->index = cache->index % PANFROST_MINMAX_SIZE; - } else { - index = cache->size++; - } - - cache->keys[index] = ht_key; - cache->values[index] = value; + if (!info->has_user_indices) { + panfrost_minmax_cache_add(rsrc->index_cache, info->start, info->count, + *min_index, *max_index); } } diff --git a/src/gallium/drivers/panfrost/pan_resource.c b/src/gallium/drivers/panfrost/pan_resource.c index b9d3cf3..ac3e288 100644 --- a/src/gallium/drivers/panfrost/pan_resource.c +++ b/src/gallium/drivers/panfrost/pan_resource.c @@ -536,39 +536,6 @@ panfrost_resource_destroy(struct pipe_screen *screen, ralloc_free(rsrc); } -/* If we've been caching min/max indices and we update the index - * buffer, that may invalidate the min/max. Check what's been cached vs - * what we've written, and throw out invalid entries. */ - -static void -panfrost_invalidate_index_cache(struct panfrost_resource *rsrc, struct pipe_transfer *transfer) -{ - struct panfrost_minmax_cache *cache = rsrc->index_cache; - - /* Ensure there is a cache to invalidate and a write */ - if (!rsrc->index_cache) return; - if (!(transfer->usage & PIPE_TRANSFER_WRITE)) return; - - unsigned valid_count = 0; - - for (unsigned i = 0; i < cache->size; ++i) { - uint64_t key = cache->keys[i]; - - uint32_t start = key & 0xffffffff; - uint32_t count = key >> 32; - - /* 1D range intersection */ - bool invalid = MAX2(transfer->box.x, start) < MIN2(transfer->box.x + transfer->box.width, start + count); - if (!invalid) { - cache->keys[valid_count] = key; - cache->values[valid_count] = cache->values[i]; - valid_count++; - } - } - - cache->size = valid_count; - cache->index = 0; -} static void * panfrost_transfer_map(struct pipe_context *pctx, @@ -691,7 +658,7 @@ panfrost_transfer_map(struct pipe_context *pctx, if ((usage & PIPE_TRANSFER_WRITE) && (usage & PIPE_TRANSFER_MAP_DIRECTLY)) { rsrc->slices[level].initialized = true; - panfrost_invalidate_index_cache(rsrc, &transfer->base); + panfrost_minmax_cache_invalidate(rsrc->index_cache, &transfer->base); } return bo->cpu @@ -741,7 +708,7 @@ panfrost_transfer_unmap(struct pipe_context *pctx, transfer->box.x, transfer->box.x + transfer->box.width); - panfrost_invalidate_index_cache(prsrc, transfer); + panfrost_minmax_cache_invalidate(prsrc->index_cache, transfer); /* Derefence the resource */ pipe_resource_reference(&transfer->resource, NULL); diff --git a/src/gallium/drivers/panfrost/pan_resource.h b/src/gallium/drivers/panfrost/pan_resource.h index 2728c7f..9f8c289 100644 --- a/src/gallium/drivers/panfrost/pan_resource.h +++ b/src/gallium/drivers/panfrost/pan_resource.h @@ -29,33 +29,11 @@ #include #include "pan_screen.h" #include "pan_allocate.h" +#include "pan_minmax_cache.h" #include "pan_texture.h" #include "drm-uapi/drm.h" #include "util/u_range.h" -/* Index buffer min/max cache. We need to caclculate the min/max for arbitrary - * slices (start, start + count) of the index buffer at drawtime. As this can - * be quite expensive, we cache. Conceptually, we just use a hash table mapping - * the key (start, count) to the value (min, max). In practice, mesa's hash - * table implementation is higher overhead than we would like and makes - * handling memory usage a little complicated. So we use this data structure - * instead. Searching is O(n) to the size, but the size is capped at the - * PANFROST_MINMAX_SIZE constant (so this is a tradeoff between cache hit/miss - * ratio and cache search speed). Note that keys are adjacent so we get cache - * line alignment benefits. Insertion is O(1) and in-order until the cache - * fills up, after that it evicts the oldest cached value in a ring facilitated - * by index. - */ - -#define PANFROST_MINMAX_SIZE 64 - -struct panfrost_minmax_cache { - uint64_t keys[PANFROST_MINMAX_SIZE]; - uint64_t values[PANFROST_MINMAX_SIZE]; - unsigned size; - unsigned index; -}; - struct panfrost_resource { struct pipe_resource base; struct { diff --git a/src/panfrost/Makefile.sources b/src/panfrost/Makefile.sources index 4b61906..4f1cdc6 100644 --- a/src/panfrost/Makefile.sources +++ b/src/panfrost/Makefile.sources @@ -56,8 +56,10 @@ midgard_FILES := \ midgard/lcra.c shared_FILES := \ + shared/pan_minmax_cache.c \ shared/pan_tiling.c \ - shared/pan_tiling.h + shared/pan_minmax_cache.h \ + shared/pan_tiling.h \ pandecode_FILES := \ pandecode/common.c \ diff --git a/src/panfrost/shared/meson.build b/src/panfrost/shared/meson.build index 9aae8e8..7f5f398 100644 --- a/src/panfrost/shared/meson.build +++ b/src/panfrost/shared/meson.build @@ -20,7 +20,10 @@ # SOFTWARE. libpanfrost_shared_files = files( + 'pan_minmax_cache.c', 'pan_tiling.c', + + 'pan_minmax_cache.h', 'pan_tiling.h', ) diff --git a/src/panfrost/shared/pan_minmax_cache.c b/src/panfrost/shared/pan_minmax_cache.c new file mode 100644 index 0000000..17018b2 --- /dev/null +++ b/src/panfrost/shared/pan_minmax_cache.c @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2020 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors (Collabora): + * Alyssa Rosenzweig + */ + +/* Index buffer min/max cache. We need to calculate the min/max for arbitrary + * slices (start, start + count) of the index buffer at drawtime. As this can + * be quite expensive, we cache. Conceptually, we just use a hash table mapping + * the key (start, count) to the value (min, max). In practice, mesa's hash + * table implementation is higher overhead than we would like and makes + * handling memory usage a little complicated. So we use this data structure + * instead. Searching is O(n) to the size, but the size is capped at the + * PANFROST_MINMAX_SIZE constant (so this is a tradeoff between cache hit/miss + * ratio and cache search speed). Note that keys are adjacent so we get cache + * line alignment benefits. Insertion is O(1) and in-order until the cache + * fills up, after that it evicts the oldest cached value in a ring facilitated + * by index. + */ + +#include "pan_minmax_cache.h" + +bool +panfrost_minmax_cache_get(struct panfrost_minmax_cache *cache, unsigned start, unsigned count, + unsigned *min_index, unsigned *max_index) +{ + uint64_t ht_key = (((uint64_t)count) << 32) | start; + bool found = false; + + if (!cache) + return false; + + for (unsigned i = 0; i < cache->size; ++i) { + if (cache->keys[i] == ht_key) { + uint64_t hit = cache->values[i]; + + *min_index = hit & 0xffffffff; + *max_index = hit >> 32; + found = true; + break; + } + } + + return found; +} + +void +panfrost_minmax_cache_add(struct panfrost_minmax_cache *cache, unsigned start, unsigned count, + unsigned min_index, unsigned max_index) +{ + uint64_t ht_key = (((uint64_t)count) << 32) | start; + uint64_t value = min_index | (((uint64_t)max_index) << 32); + unsigned index = 0; + + if (!cache) + return; + + if (cache->size == PANFROST_MINMAX_SIZE) { + index = cache->index++; + cache->index = cache->index % PANFROST_MINMAX_SIZE; + } else { + index = cache->size++; + } + + cache->keys[index] = ht_key; + cache->values[index] = value; + +} + +/* If we've been caching min/max indices and we update the index + * buffer, that may invalidate the min/max. Check what's been cached vs + * what we've written, and throw out invalid entries. */ + +void +panfrost_minmax_cache_invalidate(struct panfrost_minmax_cache *cache, struct pipe_transfer *transfer) +{ + /* Ensure there is a cache to invalidate and a write */ + if (!cache) + return; + + if (!(transfer->usage & PIPE_TRANSFER_WRITE)) + return; + + unsigned valid_count = 0; + + for (unsigned i = 0; i < cache->size; ++i) { + uint64_t key = cache->keys[i]; + + uint32_t start = key & 0xffffffff; + uint32_t count = key >> 32; + + /* 1D range intersection */ + bool invalid = MAX2(transfer->box.x, start) < MIN2(transfer->box.x + transfer->box.width, start + count); + if (!invalid) { + cache->keys[valid_count] = key; + cache->values[valid_count] = cache->values[i]; + valid_count++; + } + } + + cache->size = valid_count; + cache->index = 0; +} diff --git a/src/panfrost/shared/pan_minmax_cache.h b/src/panfrost/shared/pan_minmax_cache.h new file mode 100644 index 0000000..fe26437 --- /dev/null +++ b/src/panfrost/shared/pan_minmax_cache.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors (Collabora): + * Alyssa Rosenzweig + */ + +#ifndef H_PAN_MINMAX_CACHE +#define H_PAN_MINMAX_CACHE + +#include "util/u_transfer.h" + +#define PANFROST_MINMAX_SIZE 64 + +struct panfrost_minmax_cache { + uint64_t keys[PANFROST_MINMAX_SIZE]; + uint64_t values[PANFROST_MINMAX_SIZE]; + unsigned size; + unsigned index; +}; + +bool +panfrost_minmax_cache_get(struct panfrost_minmax_cache *cache, unsigned start, unsigned count, + unsigned *min_index, unsigned *max_index); + +void +panfrost_minmax_cache_add(struct panfrost_minmax_cache *cache, unsigned start, unsigned count, + unsigned min_index, unsigned max_index); + +void +panfrost_minmax_cache_invalidate(struct panfrost_minmax_cache *cache, struct pipe_transfer *transfer); + +#endif -- 2.7.4