From 733d32f3765be84a7e908df7e99a278cadcee853 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 20 Jun 2013 10:00:18 -0700 Subject: [PATCH] i915: Fork the shared code from i965. Of this 15000 lines of code in intel/, we've identified 4000 lines that are trivially unnecessary for i915, and another 1000 that are pointless for i965, and expect to find more as time goes on. Split the i915 driver off, so that we can continue active development on i965 without worrying about breaking i915. Acked-by: Kenneth Graunke Acked-by: Chad Versace Acked-by: Adam Jackson (and I hear second hand that idr is OK with it, too) --- src/mesa/drivers/dri/i915/Makefile.am | 1 - src/mesa/drivers/dri/i915/intel_batchbuffer.c | 561 +++++- src/mesa/drivers/dri/i915/intel_batchbuffer.h | 173 ++ src/mesa/drivers/dri/i915/intel_blit.c | 771 ++++++- src/mesa/drivers/dri/i915/intel_blit.h | 83 + src/mesa/drivers/dri/i915/intel_buffer_objects.c | 854 +++++++- src/mesa/drivers/dri/i915/intel_buffer_objects.h | 92 + src/mesa/drivers/dri/i915/intel_buffers.c | 119 +- src/mesa/drivers/dri/i915/intel_buffers.h | 56 + src/mesa/drivers/dri/i915/intel_chipset.h | 314 +++ src/mesa/drivers/dri/i915/intel_clear.h | 38 + src/mesa/drivers/dri/i915/intel_context.c | 1011 +++++++++- src/mesa/drivers/dri/i915/intel_context.h | 643 ++++++ src/mesa/drivers/dri/i915/intel_extensions.c | 189 +- src/mesa/drivers/dri/i915/intel_extensions.h | 42 + src/mesa/drivers/dri/i915/intel_fbo.c | 950 ++++++++- src/mesa/drivers/dri/i915/intel_fbo.h | 212 ++ src/mesa/drivers/dri/i915/intel_mipmap_tree.c | 2350 +++++++++++++++++++++- src/mesa/drivers/dri/i915/intel_mipmap_tree.h | 788 ++++++++ src/mesa/drivers/dri/i915/intel_pixel.c | 136 +- src/mesa/drivers/dri/i915/intel_pixel.h | 63 + src/mesa/drivers/dri/i915/intel_pixel_bitmap.c | 364 +++- src/mesa/drivers/dri/i915/intel_pixel_copy.c | 211 +- src/mesa/drivers/dri/i915/intel_pixel_draw.c | 59 +- src/mesa/drivers/dri/i915/intel_pixel_read.c | 203 +- src/mesa/drivers/dri/i915/intel_reg.h | 300 +++ src/mesa/drivers/dri/i915/intel_regions.c | 354 +++- src/mesa/drivers/dri/i915/intel_regions.h | 161 ++ src/mesa/drivers/dri/i915/intel_resolve_map.c | 112 +- src/mesa/drivers/dri/i915/intel_resolve_map.h | 104 + src/mesa/drivers/dri/i915/intel_screen.c | 1410 ++++++++++++- src/mesa/drivers/dri/i915/intel_screen.h | 89 + src/mesa/drivers/dri/i915/intel_state.c | 196 +- src/mesa/drivers/dri/i915/intel_syncobj.c | 125 +- src/mesa/drivers/dri/i915/intel_tex.c | 190 +- src/mesa/drivers/dri/i915/intel_tex.h | 82 + src/mesa/drivers/dri/i915/intel_tex_copy.c | 133 +- src/mesa/drivers/dri/i915/intel_tex_image.c | 400 +++- src/mesa/drivers/dri/i915/intel_tex_layout.c | 215 +- src/mesa/drivers/dri/i915/intel_tex_layout.h | 40 + src/mesa/drivers/dri/i915/intel_tex_obj.h | 84 + src/mesa/drivers/dri/i915/intel_tex_subimage.c | 336 +++- src/mesa/drivers/dri/i915/intel_tex_validate.c | 142 +- 43 files changed, 14731 insertions(+), 25 deletions(-) mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_batchbuffer.c create mode 100644 src/mesa/drivers/dri/i915/intel_batchbuffer.h mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_blit.c create mode 100644 src/mesa/drivers/dri/i915/intel_blit.h mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_buffer_objects.c create mode 100644 src/mesa/drivers/dri/i915/intel_buffer_objects.h mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_buffers.c create mode 100644 src/mesa/drivers/dri/i915/intel_buffers.h create mode 100644 src/mesa/drivers/dri/i915/intel_chipset.h create mode 100644 src/mesa/drivers/dri/i915/intel_clear.h mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_context.c create mode 100644 src/mesa/drivers/dri/i915/intel_context.h mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_extensions.c create mode 100644 src/mesa/drivers/dri/i915/intel_extensions.h mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_fbo.c create mode 100644 src/mesa/drivers/dri/i915/intel_fbo.h mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_mipmap_tree.c create mode 100644 src/mesa/drivers/dri/i915/intel_mipmap_tree.h mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_pixel.c create mode 100644 src/mesa/drivers/dri/i915/intel_pixel.h mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_pixel_bitmap.c mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_pixel_copy.c mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_pixel_draw.c mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_pixel_read.c create mode 100644 src/mesa/drivers/dri/i915/intel_reg.h mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_regions.c create mode 100644 src/mesa/drivers/dri/i915/intel_regions.h mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_resolve_map.c create mode 100644 src/mesa/drivers/dri/i915/intel_resolve_map.h mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_screen.c create mode 100644 src/mesa/drivers/dri/i915/intel_screen.h mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_state.c mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_syncobj.c mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_tex.c create mode 100644 src/mesa/drivers/dri/i915/intel_tex.h mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_tex_copy.c mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_tex_image.c mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_tex_layout.c create mode 100644 src/mesa/drivers/dri/i915/intel_tex_layout.h create mode 100644 src/mesa/drivers/dri/i915/intel_tex_obj.h mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_tex_subimage.c mode change 120000 => 100644 src/mesa/drivers/dri/i915/intel_tex_validate.c diff --git a/src/mesa/drivers/dri/i915/Makefile.am b/src/mesa/drivers/dri/i915/Makefile.am index 9501719..a4055e1 100644 --- a/src/mesa/drivers/dri/i915/Makefile.am +++ b/src/mesa/drivers/dri/i915/Makefile.am @@ -30,7 +30,6 @@ AM_CFLAGS = \ -I$(top_srcdir)/src/mapi \ -I$(top_srcdir)/src/mesa/ \ -I$(top_srcdir)/src/mesa/drivers/dri/common \ - -I$(top_srcdir)/src/mesa/drivers/dri/intel \ -I$(top_srcdir)/src/mesa/drivers/dri/intel/server \ -I$(top_builddir)/src/mesa/drivers/dri/common \ $(DEFINES) \ diff --git a/src/mesa/drivers/dri/i915/intel_batchbuffer.c b/src/mesa/drivers/dri/i915/intel_batchbuffer.c deleted file mode 120000 index d38cdf3..0000000 --- a/src/mesa/drivers/dri/i915/intel_batchbuffer.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_batchbuffer.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_batchbuffer.c b/src/mesa/drivers/dri/i915/intel_batchbuffer.c new file mode 100644 index 0000000..8c6524e --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_batchbuffer.c @@ -0,0 +1,560 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "intel_context.h" +#include "intel_batchbuffer.h" +#include "intel_buffer_objects.h" +#include "intel_reg.h" +#include "intel_bufmgr.h" +#include "intel_buffers.h" + +static void +intel_batchbuffer_reset(struct intel_context *intel); + +struct cached_batch_item { + struct cached_batch_item *next; + uint16_t header; + uint16_t size; +}; + +static void clear_cache( struct intel_context *intel ) +{ + struct cached_batch_item *item = intel->batch.cached_items; + + while (item) { + struct cached_batch_item *next = item->next; + free(item); + item = next; + } + + intel->batch.cached_items = NULL; +} + +void +intel_batchbuffer_init(struct intel_context *intel) +{ + intel_batchbuffer_reset(intel); + + if (intel->gen >= 6) { + /* We can't just use brw_state_batch to get a chunk of space for + * the gen6 workaround because it involves actually writing to + * the buffer, and the kernel doesn't let us write to the batch. + */ + intel->batch.workaround_bo = drm_intel_bo_alloc(intel->bufmgr, + "pipe_control workaround", + 4096, 4096); + } + + if (!intel->has_llc) { + intel->batch.cpu_map = malloc(intel->maxBatchSize); + intel->batch.map = intel->batch.cpu_map; + } +} + +static void +intel_batchbuffer_reset(struct intel_context *intel) +{ + if (intel->batch.last_bo != NULL) { + drm_intel_bo_unreference(intel->batch.last_bo); + intel->batch.last_bo = NULL; + } + intel->batch.last_bo = intel->batch.bo; + + clear_cache(intel); + + intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer", + intel->maxBatchSize, 4096); + if (intel->has_llc) { + drm_intel_bo_map(intel->batch.bo, true); + intel->batch.map = intel->batch.bo->virtual; + } + + intel->batch.reserved_space = BATCH_RESERVED; + intel->batch.state_batch_offset = intel->batch.bo->size; + intel->batch.used = 0; + intel->batch.needs_sol_reset = false; +} + +void +intel_batchbuffer_save_state(struct intel_context *intel) +{ + intel->batch.saved.used = intel->batch.used; + intel->batch.saved.reloc_count = + drm_intel_gem_bo_get_reloc_count(intel->batch.bo); +} + +void +intel_batchbuffer_reset_to_saved(struct intel_context *intel) +{ + drm_intel_gem_bo_clear_relocs(intel->batch.bo, intel->batch.saved.reloc_count); + + intel->batch.used = intel->batch.saved.used; + + /* Cached batch state is dead, since we just cleared some unknown part of the + * batchbuffer. Assume that the caller resets any other state necessary. + */ + clear_cache(intel); +} + +void +intel_batchbuffer_free(struct intel_context *intel) +{ + free(intel->batch.cpu_map); + drm_intel_bo_unreference(intel->batch.last_bo); + drm_intel_bo_unreference(intel->batch.bo); + drm_intel_bo_unreference(intel->batch.workaround_bo); + clear_cache(intel); +} + +static void +do_batch_dump(struct intel_context *intel) +{ + struct drm_intel_decode *decode; + struct intel_batchbuffer *batch = &intel->batch; + int ret; + + decode = drm_intel_decode_context_alloc(intel->intelScreen->deviceID); + if (!decode) + return; + + ret = drm_intel_bo_map(batch->bo, false); + if (ret == 0) { + drm_intel_decode_set_batch_pointer(decode, + batch->bo->virtual, + batch->bo->offset, + batch->used); + } else { + fprintf(stderr, + "WARNING: failed to map batchbuffer (%s), " + "dumping uploaded data instead.\n", strerror(ret)); + + drm_intel_decode_set_batch_pointer(decode, + batch->map, + batch->bo->offset, + batch->used); + } + + drm_intel_decode(decode); + + drm_intel_decode_context_free(decode); + + if (ret == 0) { + drm_intel_bo_unmap(batch->bo); + + if (intel->vtbl.debug_batch != NULL) + intel->vtbl.debug_batch(intel); + } +} + +/* TODO: Push this whole function into bufmgr. + */ +static int +do_flush_locked(struct intel_context *intel) +{ + struct intel_batchbuffer *batch = &intel->batch; + int ret = 0; + + if (intel->has_llc) { + drm_intel_bo_unmap(batch->bo); + } else { + ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map); + if (ret == 0 && batch->state_batch_offset != batch->bo->size) { + ret = drm_intel_bo_subdata(batch->bo, + batch->state_batch_offset, + batch->bo->size - batch->state_batch_offset, + (char *)batch->map + batch->state_batch_offset); + } + } + + if (!intel->intelScreen->no_hw) { + int flags; + + if (intel->gen < 6 || !batch->is_blit) { + flags = I915_EXEC_RENDER; + } else { + flags = I915_EXEC_BLT; + } + + if (batch->needs_sol_reset) + flags |= I915_EXEC_GEN7_SOL_RESET; + + if (ret == 0) { + if (unlikely(INTEL_DEBUG & DEBUG_AUB) && intel->vtbl.annotate_aub) + intel->vtbl.annotate_aub(intel); + if (intel->hw_ctx == NULL || batch->is_blit) { + ret = drm_intel_bo_mrb_exec(batch->bo, 4 * batch->used, NULL, 0, 0, + flags); + } else { + ret = drm_intel_gem_bo_context_exec(batch->bo, intel->hw_ctx, + 4 * batch->used, flags); + } + } + } + + if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) + do_batch_dump(intel); + + if (ret != 0) { + fprintf(stderr, "intel_do_flush_locked failed: %s\n", strerror(-ret)); + exit(1); + } + intel->vtbl.new_batch(intel); + + return ret; +} + +int +_intel_batchbuffer_flush(struct intel_context *intel, + const char *file, int line) +{ + int ret; + + if (intel->batch.used == 0) + return 0; + + if (intel->first_post_swapbuffers_batch == NULL) { + intel->first_post_swapbuffers_batch = intel->batch.bo; + drm_intel_bo_reference(intel->first_post_swapbuffers_batch); + } + + if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) + fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, + 4*intel->batch.used); + + intel->batch.reserved_space = 0; + + if (intel->vtbl.finish_batch) + intel->vtbl.finish_batch(intel); + + /* Mark the end of the buffer. */ + intel_batchbuffer_emit_dword(intel, MI_BATCH_BUFFER_END); + if (intel->batch.used & 1) { + /* Round batchbuffer usage to 2 DWORDs. */ + intel_batchbuffer_emit_dword(intel, MI_NOOP); + } + + intel_upload_finish(intel); + + /* Check that we didn't just wrap our batchbuffer at a bad time. */ + assert(!intel->no_batch_wrap); + + ret = do_flush_locked(intel); + + if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) { + fprintf(stderr, "waiting for idle\n"); + drm_intel_bo_wait_rendering(intel->batch.bo); + } + + /* Reset the buffer: + */ + intel_batchbuffer_reset(intel); + + return ret; +} + + +/* This is the only way buffers get added to the validate list. + */ +bool +intel_batchbuffer_emit_reloc(struct intel_context *intel, + drm_intel_bo *buffer, + uint32_t read_domains, uint32_t write_domain, + uint32_t delta) +{ + int ret; + + ret = drm_intel_bo_emit_reloc(intel->batch.bo, 4*intel->batch.used, + buffer, delta, + read_domains, write_domain); + assert(ret == 0); + (void)ret; + + /* + * Using the old buffer offset, write in what the right data would be, in case + * the buffer doesn't move and we can short-circuit the relocation processing + * in the kernel + */ + intel_batchbuffer_emit_dword(intel, buffer->offset + delta); + + return true; +} + +bool +intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel, + drm_intel_bo *buffer, + uint32_t read_domains, + uint32_t write_domain, + uint32_t delta) +{ + int ret; + + ret = drm_intel_bo_emit_reloc_fence(intel->batch.bo, 4*intel->batch.used, + buffer, delta, + read_domains, write_domain); + assert(ret == 0); + (void)ret; + + /* + * Using the old buffer offset, write in what the right data would + * be, in case the buffer doesn't move and we can short-circuit the + * relocation processing in the kernel + */ + intel_batchbuffer_emit_dword(intel, buffer->offset + delta); + + return true; +} + +void +intel_batchbuffer_data(struct intel_context *intel, + const void *data, GLuint bytes, bool is_blit) +{ + assert((bytes & 3) == 0); + intel_batchbuffer_require_space(intel, bytes, is_blit); + __memcpy(intel->batch.map + intel->batch.used, data, bytes); + intel->batch.used += bytes >> 2; +} + +void +intel_batchbuffer_cached_advance(struct intel_context *intel) +{ + struct cached_batch_item **prev = &intel->batch.cached_items, *item; + uint32_t sz = (intel->batch.used - intel->batch.emit) * sizeof(uint32_t); + uint32_t *start = intel->batch.map + intel->batch.emit; + uint16_t op = *start >> 16; + + while (*prev) { + uint32_t *old; + + item = *prev; + old = intel->batch.map + item->header; + if (op == *old >> 16) { + if (item->size == sz && memcmp(old, start, sz) == 0) { + if (prev != &intel->batch.cached_items) { + *prev = item->next; + item->next = intel->batch.cached_items; + intel->batch.cached_items = item; + } + intel->batch.used = intel->batch.emit; + return; + } + + goto emit; + } + prev = &item->next; + } + + item = malloc(sizeof(struct cached_batch_item)); + if (item == NULL) + return; + + item->next = intel->batch.cached_items; + intel->batch.cached_items = item; + +emit: + item->size = sz; + item->header = intel->batch.emit; +} + +/** + * Restriction [DevSNB, DevIVB]: + * + * Prior to changing Depth/Stencil Buffer state (i.e. any combination of + * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER, + * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall + * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth + * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by + * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set), + * unless SW can otherwise guarantee that the pipeline from WM onwards is + * already flushed (e.g., via a preceding MI_FLUSH). + */ +void +intel_emit_depth_stall_flushes(struct intel_context *intel) +{ + assert(intel->gen >= 6 && intel->gen <= 7); + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(PIPE_CONTROL_DEPTH_STALL); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH() + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(PIPE_CONTROL_DEPTH_CACHE_FLUSH); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(PIPE_CONTROL_DEPTH_STALL); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); +} + +/** + * From the BSpec, volume 2a.03: VS Stage Input / State: + * "[DevIVB] A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth + * stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS, + * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS, + * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL needs + * to be sent before any combination of VS associated 3DSTATE." + */ +void +gen7_emit_vs_workaround_flush(struct intel_context *intel) +{ + assert(intel->gen == 7); + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_IMMEDIATE); + OUT_RELOC(intel->batch.workaround_bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); +} + +/** + * Emits a PIPE_CONTROL with a non-zero post-sync operation, for + * implementing two workarounds on gen6. From section 1.4.7.1 + * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: + * + * [DevSNB-C+{W/A}] Before any depth stall flush (including those + * produced by non-pipelined state commands), software needs to first + * send a PIPE_CONTROL with no bits set except Post-Sync Operation != + * 0. + * + * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable + * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. + * + * And the workaround for these two requires this workaround first: + * + * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent + * BEFORE the pipe-control with a post-sync op and no write-cache + * flushes. + * + * And this last workaround is tricky because of the requirements on + * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM + * volume 2 part 1: + * + * "1 of the following must also be set: + * - Render Target Cache Flush Enable ([12] of DW1) + * - Depth Cache Flush Enable ([0] of DW1) + * - Stall at Pixel Scoreboard ([1] of DW1) + * - Depth Stall ([13] of DW1) + * - Post-Sync Operation ([13] of DW1) + * - Notify Enable ([8] of DW1)" + * + * The cache flushes require the workaround flush that triggered this + * one, so we can't use it. Depth stall would trigger the same. + * Post-sync nonzero is what triggered this second workaround, so we + * can't use that one either. Notify enable is IRQs, which aren't + * really our business. That leaves only stall at scoreboard. + */ +void +intel_emit_post_sync_nonzero_flush(struct intel_context *intel) +{ + if (!intel->batch.need_workaround_flush) + return; + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); + OUT_RELOC(intel->batch.workaround_bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + + intel->batch.need_workaround_flush = false; +} + +/* Emit a pipelined flush to either flush render and texture cache for + * reading from a FBO-drawn texture, or flush so that frontbuffer + * render appears on the screen in DRI1. + * + * This is also used for the always_flush_cache driconf debug option. + */ +void +intel_batchbuffer_emit_mi_flush(struct intel_context *intel) +{ + if (intel->gen >= 6) { + if (intel->batch.is_blit) { + BEGIN_BATCH_BLT(4); + OUT_BATCH(MI_FLUSH_DW); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } else { + if (intel->gen == 6) { + /* Hardware workaround: SNB B-Spec says: + * + * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache + * Flush Enable =1, a PIPE_CONTROL with any non-zero + * post-sync-op is required. + */ + intel_emit_post_sync_nonzero_flush(intel); + } + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH | + PIPE_CONTROL_WRITE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_VF_CACHE_INVALIDATE | + PIPE_CONTROL_TC_FLUSH | + PIPE_CONTROL_NO_WRITE | + PIPE_CONTROL_CS_STALL); + OUT_BATCH(0); /* write address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + } + } else if (intel->gen >= 4) { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) | + PIPE_CONTROL_WRITE_FLUSH | + PIPE_CONTROL_NO_WRITE); + OUT_BATCH(0); /* write address */ + OUT_BATCH(0); /* write data */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(1); + OUT_BATCH(MI_FLUSH); + ADVANCE_BATCH(); + } +} diff --git a/src/mesa/drivers/dri/i915/intel_batchbuffer.h b/src/mesa/drivers/dri/i915/intel_batchbuffer.h new file mode 100644 index 0000000..1a6d1aa --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_batchbuffer.h @@ -0,0 +1,173 @@ +#ifndef INTEL_BATCHBUFFER_H +#define INTEL_BATCHBUFFER_H + +#include "main/mtypes.h" + +#include "intel_context.h" +#include "intel_bufmgr.h" +#include "intel_reg.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Number of bytes to reserve for commands necessary to complete a batch. + * + * This includes: + * - MI_BATCHBUFFER_END (4 bytes) + * - Optional MI_NOOP for ensuring the batch length is qword aligned (4 bytes) + * - Any state emitted by vtbl->finish_batch(): + * - Gen4-5 record ending occlusion query values (4 * 4 = 16 bytes) + */ +#define BATCH_RESERVED 24 + +struct intel_batchbuffer; + +void intel_batchbuffer_init(struct intel_context *intel); +void intel_batchbuffer_free(struct intel_context *intel); +void intel_batchbuffer_save_state(struct intel_context *intel); +void intel_batchbuffer_reset_to_saved(struct intel_context *intel); + +int _intel_batchbuffer_flush(struct intel_context *intel, + const char *file, int line); + +#define intel_batchbuffer_flush(intel) \ + _intel_batchbuffer_flush(intel, __FILE__, __LINE__) + + + +/* Unlike bmBufferData, this currently requires the buffer be mapped. + * Consider it a convenience function wrapping multple + * intel_buffer_dword() calls. + */ +void intel_batchbuffer_data(struct intel_context *intel, + const void *data, GLuint bytes, bool is_blit); + +bool intel_batchbuffer_emit_reloc(struct intel_context *intel, + drm_intel_bo *buffer, + uint32_t read_domains, + uint32_t write_domain, + uint32_t offset); +bool intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel, + drm_intel_bo *buffer, + uint32_t read_domains, + uint32_t write_domain, + uint32_t offset); +void intel_batchbuffer_emit_mi_flush(struct intel_context *intel); +void intel_emit_post_sync_nonzero_flush(struct intel_context *intel); +void intel_emit_depth_stall_flushes(struct intel_context *intel); +void gen7_emit_vs_workaround_flush(struct intel_context *intel); + +static INLINE uint32_t float_as_int(float f) +{ + union { + float f; + uint32_t d; + } fi; + + fi.f = f; + return fi.d; +} + +/* Inline functions - might actually be better off with these + * non-inlined. Certainly better off switching all command packets to + * be passed as structs rather than dwords, but that's a little bit of + * work... + */ +static INLINE unsigned +intel_batchbuffer_space(struct intel_context *intel) +{ + return (intel->batch.state_batch_offset - intel->batch.reserved_space) + - intel->batch.used*4; +} + + +static INLINE void +intel_batchbuffer_emit_dword(struct intel_context *intel, GLuint dword) +{ +#ifdef DEBUG + assert(intel_batchbuffer_space(intel) >= 4); +#endif + intel->batch.map[intel->batch.used++] = dword; +} + +static INLINE void +intel_batchbuffer_emit_float(struct intel_context *intel, float f) +{ + intel_batchbuffer_emit_dword(intel, float_as_int(f)); +} + +static INLINE void +intel_batchbuffer_require_space(struct intel_context *intel, + GLuint sz, int is_blit) +{ + + if (intel->gen >= 6 && + intel->batch.is_blit != is_blit && intel->batch.used) { + intel_batchbuffer_flush(intel); + } + + intel->batch.is_blit = is_blit; + +#ifdef DEBUG + assert(sz < intel->maxBatchSize - BATCH_RESERVED); +#endif + if (intel_batchbuffer_space(intel) < sz) + intel_batchbuffer_flush(intel); +} + +static INLINE void +intel_batchbuffer_begin(struct intel_context *intel, int n, bool is_blit) +{ + intel_batchbuffer_require_space(intel, n * 4, is_blit); + + intel->batch.emit = intel->batch.used; +#ifdef DEBUG + intel->batch.total = n; +#endif +} + +static INLINE void +intel_batchbuffer_advance(struct intel_context *intel) +{ +#ifdef DEBUG + struct intel_batchbuffer *batch = &intel->batch; + unsigned int _n = batch->used - batch->emit; + assert(batch->total != 0); + if (_n != batch->total) { + fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n", + _n, batch->total); + abort(); + } + batch->total = 0; +#endif +} + +void intel_batchbuffer_cached_advance(struct intel_context *intel); + +/* Here are the crusty old macros, to be removed: + */ +#define BATCH_LOCALS + +#define BEGIN_BATCH(n) intel_batchbuffer_begin(intel, n, false) +#define BEGIN_BATCH_BLT(n) intel_batchbuffer_begin(intel, n, true) +#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel, d) +#define OUT_BATCH_F(f) intel_batchbuffer_emit_float(intel,f) +#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ + intel_batchbuffer_emit_reloc(intel, buf, \ + read_domains, write_domain, delta); \ +} while (0) +#define OUT_RELOC_FENCED(buf, read_domains, write_domain, delta) do { \ + intel_batchbuffer_emit_reloc_fenced(intel, buf, \ + read_domains, write_domain, delta); \ +} while (0) + +#define ADVANCE_BATCH() intel_batchbuffer_advance(intel); +#define CACHED_BATCH() intel_batchbuffer_cached_advance(intel); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/mesa/drivers/dri/i915/intel_blit.c b/src/mesa/drivers/dri/i915/intel_blit.c deleted file mode 120000 index dd6c8d1..0000000 --- a/src/mesa/drivers/dri/i915/intel_blit.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_blit.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_blit.c b/src/mesa/drivers/dri/i915/intel_blit.c new file mode 100644 index 0000000..da56f55 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_blit.c @@ -0,0 +1,770 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/mtypes.h" +#include "main/context.h" +#include "main/enums.h" +#include "main/colormac.h" +#include "main/fbobject.h" + +#include "intel_blit.h" +#include "intel_buffers.h" +#include "intel_context.h" +#include "intel_fbo.h" +#include "intel_reg.h" +#include "intel_regions.h" +#include "intel_batchbuffer.h" +#include "intel_mipmap_tree.h" + +#define FILE_DEBUG_FLAG DEBUG_BLIT + +static void +intel_miptree_set_alpha_to_one(struct intel_context *intel, + struct intel_mipmap_tree *mt, + int x, int y, int width, int height); + +static GLuint translate_raster_op(GLenum logicop) +{ + switch(logicop) { + case GL_CLEAR: return 0x00; + case GL_AND: return 0x88; + case GL_AND_REVERSE: return 0x44; + case GL_COPY: return 0xCC; + case GL_AND_INVERTED: return 0x22; + case GL_NOOP: return 0xAA; + case GL_XOR: return 0x66; + case GL_OR: return 0xEE; + case GL_NOR: return 0x11; + case GL_EQUIV: return 0x99; + case GL_INVERT: return 0x55; + case GL_OR_REVERSE: return 0xDD; + case GL_COPY_INVERTED: return 0x33; + case GL_OR_INVERTED: return 0xBB; + case GL_NAND: return 0x77; + case GL_SET: return 0xFF; + default: return 0; + } +} + +static uint32_t +br13_for_cpp(int cpp) +{ + switch (cpp) { + case 4: + return BR13_8888; + break; + case 2: + return BR13_565; + break; + case 1: + return BR13_8; + break; + default: + assert(0); + return 0; + } +} + +/** + * Emits the packet for switching the blitter from X to Y tiled or back. + * + * This has to be called in a single BEGIN_BATCH_BLT_TILED() / + * ADVANCE_BATCH_TILED(). This is because BCS_SWCTRL is saved and restored as + * part of the power context, not a render context, and if the batchbuffer was + * to get flushed between setting and blitting, or blitting and restoring, our + * tiling state would leak into other unsuspecting applications (like the X + * server). + */ +static void +set_blitter_tiling(struct intel_context *intel, + bool dst_y_tiled, bool src_y_tiled) +{ + assert(intel->gen >= 6); + + /* Idle the blitter before we update how tiling is interpreted. */ + OUT_BATCH(MI_FLUSH_DW); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2)); + OUT_BATCH(BCS_SWCTRL); + OUT_BATCH((BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y) << 16 | + (dst_y_tiled ? BCS_SWCTRL_DST_Y : 0) | + (src_y_tiled ? BCS_SWCTRL_SRC_Y : 0)); +} + +#define BEGIN_BATCH_BLT_TILED(n, dst_y_tiled, src_y_tiled) do { \ + BEGIN_BATCH_BLT(n + ((dst_y_tiled || src_y_tiled) ? 14 : 0)); \ + if (dst_y_tiled || src_y_tiled) \ + set_blitter_tiling(intel, dst_y_tiled, src_y_tiled); \ + } while (0) + +#define ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled) do { \ + if (dst_y_tiled || src_y_tiled) \ + set_blitter_tiling(intel, false, false); \ + ADVANCE_BATCH(); \ + } while (0) + +/** + * Implements a rectangular block transfer (blit) of pixels between two + * miptrees. + * + * Our blitter can operate on 1, 2, or 4-byte-per-pixel data, with generous, + * but limited, pitches and sizes allowed. + * + * The src/dst coordinates are relative to the given level/slice of the + * miptree. + * + * If @src_flip or @dst_flip is set, then the rectangle within that miptree + * will be inverted (including scanline order) when copying. This is common + * in GL when copying between window system and user-created + * renderbuffers/textures. + */ +bool +intel_miptree_blit(struct intel_context *intel, + struct intel_mipmap_tree *src_mt, + int src_level, int src_slice, + uint32_t src_x, uint32_t src_y, bool src_flip, + struct intel_mipmap_tree *dst_mt, + int dst_level, int dst_slice, + uint32_t dst_x, uint32_t dst_y, bool dst_flip, + uint32_t width, uint32_t height, + GLenum logicop) +{ + /* No sRGB decode or encode is done by the hardware blitter, which is + * consistent with what we want in the callers (glCopyTexSubImage(), + * glBlitFramebuffer(), texture validation, etc.). + */ + gl_format src_format = _mesa_get_srgb_format_linear(src_mt->format); + gl_format dst_format = _mesa_get_srgb_format_linear(dst_mt->format); + + /* The blitter doesn't support doing any format conversions. We do also + * support blitting ARGB8888 to XRGB8888 (trivial, the values dropped into + * the X channel don't matter), and XRGB8888 to ARGB8888 by setting the A + * channel to 1.0 at the end. + */ + if (src_format != dst_format && + ((src_format != MESA_FORMAT_ARGB8888 && + src_format != MESA_FORMAT_XRGB8888) || + (dst_format != MESA_FORMAT_ARGB8888 && + dst_format != MESA_FORMAT_XRGB8888))) { + perf_debug("%s: Can't use hardware blitter from %s to %s, " + "falling back.\n", __FUNCTION__, + _mesa_get_format_name(src_format), + _mesa_get_format_name(dst_format)); + return false; + } + + /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics + * Data Size Limitations): + * + * The BLT engine is capable of transferring very large quantities of + * graphics data. Any graphics data read from and written to the + * destination is permitted to represent a number of pixels that + * occupies up to 65,536 scan lines and up to 32,768 bytes per scan line + * at the destination. The maximum number of pixels that may be + * represented per scan line’s worth of graphics data depends on the + * color depth. + * + * Furthermore, intelEmitCopyBlit (which is called below) uses a signed + * 16-bit integer to represent buffer pitch, so it can only handle buffer + * pitches < 32k. + * + * As a result of these two limitations, we can only use the blitter to do + * this copy when the region's pitch is less than 32k. + */ + if (src_mt->region->pitch > 32768 || + dst_mt->region->pitch > 32768) { + perf_debug("Falling back due to >32k pitch\n"); + return false; + } + + /* The blitter has no idea about HiZ or fast color clears, so we need to + * resolve the miptrees before we do anything. + */ + intel_miptree_slice_resolve_depth(intel, src_mt, src_level, src_slice); + intel_miptree_slice_resolve_depth(intel, dst_mt, dst_level, dst_slice); + intel_miptree_resolve_color(intel, src_mt); + intel_miptree_resolve_color(intel, dst_mt); + + if (src_flip) + src_y = src_mt->level[src_level].height - src_y - height; + + if (dst_flip) + dst_y = dst_mt->level[dst_level].height - dst_y - height; + + int src_pitch = src_mt->region->pitch; + if (src_flip != dst_flip) + src_pitch = -src_pitch; + + uint32_t src_image_x, src_image_y; + intel_miptree_get_image_offset(src_mt, src_level, src_slice, + &src_image_x, &src_image_y); + src_x += src_image_x; + src_y += src_image_y; + + uint32_t dst_image_x, dst_image_y; + intel_miptree_get_image_offset(dst_mt, dst_level, dst_slice, + &dst_image_x, &dst_image_y); + dst_x += dst_image_x; + dst_y += dst_image_y; + + if (!intelEmitCopyBlit(intel, + src_mt->cpp, + src_pitch, + src_mt->region->bo, src_mt->offset, + src_mt->region->tiling, + dst_mt->region->pitch, + dst_mt->region->bo, dst_mt->offset, + dst_mt->region->tiling, + src_x, src_y, + dst_x, dst_y, + width, height, + logicop)) { + return false; + } + + if (src_mt->format == MESA_FORMAT_XRGB8888 && + dst_mt->format == MESA_FORMAT_ARGB8888) { + intel_miptree_set_alpha_to_one(intel, dst_mt, + dst_x, dst_y, + width, height); + } + + return true; +} + +/* Copy BitBlt + */ +bool +intelEmitCopyBlit(struct intel_context *intel, + GLuint cpp, + GLshort src_pitch, + drm_intel_bo *src_buffer, + GLuint src_offset, + uint32_t src_tiling, + GLshort dst_pitch, + drm_intel_bo *dst_buffer, + GLuint dst_offset, + uint32_t dst_tiling, + GLshort src_x, GLshort src_y, + GLshort dst_x, GLshort dst_y, + GLshort w, GLshort h, + GLenum logic_op) +{ + GLuint CMD, BR13, pass = 0; + int dst_y2 = dst_y + h; + int dst_x2 = dst_x + w; + drm_intel_bo *aper_array[3]; + bool dst_y_tiled = dst_tiling == I915_TILING_Y; + bool src_y_tiled = src_tiling == I915_TILING_Y; + BATCH_LOCALS; + + if (dst_tiling != I915_TILING_NONE) { + if (dst_offset & 4095) + return false; + } + if (src_tiling != I915_TILING_NONE) { + if (src_offset & 4095) + return false; + } + if ((dst_y_tiled || src_y_tiled) && intel->gen < 6) + return false; + + /* do space check before going any further */ + do { + aper_array[0] = intel->batch.bo; + aper_array[1] = dst_buffer; + aper_array[2] = src_buffer; + + if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { + intel_batchbuffer_flush(intel); + pass++; + } else + break; + } while (pass < 2); + + if (pass >= 2) + return false; + + intel_batchbuffer_require_space(intel, 8 * 4, true); + DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", + __FUNCTION__, + src_buffer, src_pitch, src_offset, src_x, src_y, + dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); + + /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop + * the low bits. + */ + if (src_pitch % 4 != 0 || dst_pitch % 4 != 0) + return false; + + /* For big formats (such as floating point), do the copy using 16 or 32bpp + * and multiply the coordinates. + */ + if (cpp > 4) { + if (cpp % 4 == 2) { + dst_x *= cpp / 2; + dst_x2 *= cpp / 2; + src_x *= cpp / 2; + cpp = 2; + } else { + assert(cpp % 4 == 0); + dst_x *= cpp / 4; + dst_x2 *= cpp / 4; + src_x *= cpp / 4; + cpp = 4; + } + } + + BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16; + + switch (cpp) { + case 1: + case 2: + CMD = XY_SRC_COPY_BLT_CMD; + break; + case 4: + CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + break; + default: + return false; + } + +#ifndef I915 + if (dst_tiling != I915_TILING_NONE) { + CMD |= XY_DST_TILED; + dst_pitch /= 4; + } + if (src_tiling != I915_TILING_NONE) { + CMD |= XY_SRC_TILED; + src_pitch /= 4; + } +#endif + + if (dst_y2 <= dst_y || dst_x2 <= dst_x) { + return true; + } + + assert(dst_x < dst_x2); + assert(dst_y < dst_y2); + + BEGIN_BATCH_BLT_TILED(8, dst_y_tiled, src_y_tiled); + + OUT_BATCH(CMD | (8 - 2)); + OUT_BATCH(BR13 | (uint16_t)dst_pitch); + OUT_BATCH((dst_y << 16) | dst_x); + OUT_BATCH((dst_y2 << 16) | dst_x2); + OUT_RELOC_FENCED(dst_buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + dst_offset); + OUT_BATCH((src_y << 16) | src_x); + OUT_BATCH((uint16_t)src_pitch); + OUT_RELOC_FENCED(src_buffer, + I915_GEM_DOMAIN_RENDER, 0, + src_offset); + + ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled); + + intel_batchbuffer_emit_mi_flush(intel); + + return true; +} + + +/** + * Use blitting to clear the renderbuffers named by 'flags'. + * Note: we can't use the ctx->DrawBuffer->_ColorDrawBufferIndexes field + * since that might include software renderbuffers or renderbuffers + * which we're clearing with triangles. + * \param mask bitmask of BUFFER_BIT_* values indicating buffers to clear + */ +GLbitfield +intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) +{ + struct intel_context *intel = intel_context(ctx); + struct gl_framebuffer *fb = ctx->DrawBuffer; + GLuint clear_depth_value, clear_depth_mask; + GLint cx, cy, cw, ch; + GLbitfield fail_mask = 0; + BATCH_LOCALS; + + /* Note: we don't use this function on Gen7+ hardware, so we can safely + * ignore fast color clear issues. + */ + assert(intel->gen < 7); + + /* + * Compute values for clearing the buffers. + */ + clear_depth_value = 0; + clear_depth_mask = 0; + if (mask & BUFFER_BIT_DEPTH) { + clear_depth_value = (GLuint) (fb->_DepthMax * ctx->Depth.Clear); + clear_depth_mask = XY_BLT_WRITE_RGB; + } + if (mask & BUFFER_BIT_STENCIL) { + clear_depth_value |= (ctx->Stencil.Clear & 0xff) << 24; + clear_depth_mask |= XY_BLT_WRITE_ALPHA; + } + + cx = fb->_Xmin; + if (_mesa_is_winsys_fbo(fb)) + cy = ctx->DrawBuffer->Height - fb->_Ymax; + else + cy = fb->_Ymin; + cw = fb->_Xmax - fb->_Xmin; + ch = fb->_Ymax - fb->_Ymin; + + if (cw == 0 || ch == 0) + return 0; + + /* Loop over all renderbuffers */ + mask &= (1 << BUFFER_COUNT) - 1; + while (mask) { + GLuint buf = ffs(mask) - 1; + bool is_depth_stencil = buf == BUFFER_DEPTH || buf == BUFFER_STENCIL; + struct intel_renderbuffer *irb; + int x1, y1, x2, y2; + uint32_t clear_val; + uint32_t BR13, CMD; + struct intel_region *region; + int pitch, cpp; + drm_intel_bo *aper_array[2]; + + mask &= ~(1 << buf); + + irb = intel_get_renderbuffer(fb, buf); + if (irb && irb->mt) { + region = irb->mt->region; + assert(region); + assert(region->bo); + } else { + fail_mask |= 1 << buf; + continue; + } + + /* OK, clear this renderbuffer */ + x1 = cx + irb->draw_x; + y1 = cy + irb->draw_y; + x2 = cx + cw + irb->draw_x; + y2 = cy + ch + irb->draw_y; + + pitch = region->pitch; + cpp = region->cpp; + + DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n", + __FUNCTION__, + region->bo, pitch, + x1, y1, x2 - x1, y2 - y1); + + BR13 = 0xf0 << 16; + CMD = XY_COLOR_BLT_CMD; + + /* Setup the blit command */ + if (cpp == 4) { + if (is_depth_stencil) { + CMD |= clear_depth_mask; + } else { + /* clearing RGBA */ + CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + } + } + + assert(region->tiling != I915_TILING_Y); + +#ifndef I915 + if (region->tiling != I915_TILING_NONE) { + CMD |= XY_DST_TILED; + pitch /= 4; + } +#endif + BR13 |= pitch; + + if (is_depth_stencil) { + clear_val = clear_depth_value; + } else { + uint8_t clear[4]; + GLfloat *color = ctx->Color.ClearColor.f; + + _mesa_unclamped_float_rgba_to_ubyte(clear, color); + + switch (intel_rb_format(irb)) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + clear_val = PACK_COLOR_8888(clear[3], clear[0], + clear[1], clear[2]); + break; + case MESA_FORMAT_RGB565: + clear_val = PACK_COLOR_565(clear[0], clear[1], clear[2]); + break; + case MESA_FORMAT_ARGB4444: + clear_val = PACK_COLOR_4444(clear[3], clear[0], + clear[1], clear[2]); + break; + case MESA_FORMAT_ARGB1555: + clear_val = PACK_COLOR_1555(clear[3], clear[0], + clear[1], clear[2]); + break; + case MESA_FORMAT_A8: + clear_val = PACK_COLOR_8888(clear[3], clear[3], + clear[3], clear[3]); + break; + default: + fail_mask |= 1 << buf; + continue; + } + } + + BR13 |= br13_for_cpp(cpp); + + assert(x1 < x2); + assert(y1 < y2); + + /* do space check before going any further */ + aper_array[0] = intel->batch.bo; + aper_array[1] = region->bo; + + if (drm_intel_bufmgr_check_aperture_space(aper_array, + ARRAY_SIZE(aper_array)) != 0) { + intel_batchbuffer_flush(intel); + } + + BEGIN_BATCH_BLT(6); + OUT_BATCH(CMD | (6 - 2)); + OUT_BATCH(BR13); + OUT_BATCH((y1 << 16) | x1); + OUT_BATCH((y2 << 16) | x2); + OUT_RELOC_FENCED(region->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + OUT_BATCH(clear_val); + ADVANCE_BATCH(); + + if (intel->always_flush_cache) + intel_batchbuffer_emit_mi_flush(intel); + + if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) + mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL); + } + + return fail_mask; +} + +bool +intelEmitImmediateColorExpandBlit(struct intel_context *intel, + GLuint cpp, + GLubyte *src_bits, GLuint src_size, + GLuint fg_color, + GLshort dst_pitch, + drm_intel_bo *dst_buffer, + GLuint dst_offset, + uint32_t dst_tiling, + GLshort x, GLshort y, + GLshort w, GLshort h, + GLenum logic_op) +{ + int dwords = ALIGN(src_size, 8) / 4; + uint32_t opcode, br13, blit_cmd; + + if (dst_tiling != I915_TILING_NONE) { + if (dst_offset & 4095) + return false; + if (dst_tiling == I915_TILING_Y) + return false; + } + + assert( logic_op - GL_CLEAR >= 0 ); + assert( logic_op - GL_CLEAR < 0x10 ); + assert(dst_pitch > 0); + + if (w < 0 || h < 0) + return true; + + DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n", + __FUNCTION__, + dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords); + + intel_batchbuffer_require_space(intel, + (8 * 4) + + (3 * 4) + + dwords * 4, true); + + opcode = XY_SETUP_BLT_CMD; + if (cpp == 4) + opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; +#ifndef I915 + if (dst_tiling != I915_TILING_NONE) { + opcode |= XY_DST_TILED; + dst_pitch /= 4; + } +#endif + + br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29); + br13 |= br13_for_cpp(cpp); + + blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */ + if (dst_tiling != I915_TILING_NONE) + blit_cmd |= XY_DST_TILED; + + BEGIN_BATCH_BLT(8 + 3); + OUT_BATCH(opcode | (8 - 2)); + OUT_BATCH(br13); + OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ + OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */ + OUT_RELOC_FENCED(dst_buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + dst_offset); + OUT_BATCH(0); /* bg */ + OUT_BATCH(fg_color); /* fg */ + OUT_BATCH(0); /* pattern base addr */ + + OUT_BATCH(blit_cmd | ((3 - 2) + dwords)); + OUT_BATCH((y << 16) | x); + OUT_BATCH(((y + h) << 16) | (x + w)); + ADVANCE_BATCH(); + + intel_batchbuffer_data(intel, src_bits, dwords * 4, true); + + intel_batchbuffer_emit_mi_flush(intel); + + return true; +} + +/* We don't have a memmove-type blit like some other hardware, so we'll do a + * rectangular blit covering a large space, then emit 1-scanline blit at the + * end to cover the last if we need. + */ +void +intel_emit_linear_blit(struct intel_context *intel, + drm_intel_bo *dst_bo, + unsigned int dst_offset, + drm_intel_bo *src_bo, + unsigned int src_offset, + unsigned int size) +{ + struct gl_context *ctx = &intel->ctx; + GLuint pitch, height; + bool ok; + + /* The pitch given to the GPU must be DWORD aligned, and + * we want width to match pitch. Max width is (1 << 15 - 1), + * rounding that down to the nearest DWORD is 1 << 15 - 4 + */ + pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 1), 4); + height = (pitch == 0) ? 1 : size / pitch; + ok = intelEmitCopyBlit(intel, 1, + pitch, src_bo, src_offset, I915_TILING_NONE, + pitch, dst_bo, dst_offset, I915_TILING_NONE, + 0, 0, /* src x/y */ + 0, 0, /* dst x/y */ + pitch, height, /* w, h */ + GL_COPY); + if (!ok) + _mesa_problem(ctx, "Failed to linear blit %dx%d\n", pitch, height); + + src_offset += pitch * height; + dst_offset += pitch * height; + size -= pitch * height; + assert (size < (1 << 15)); + pitch = ALIGN(size, 4); + if (size != 0) { + ok = intelEmitCopyBlit(intel, 1, + pitch, src_bo, src_offset, I915_TILING_NONE, + pitch, dst_bo, dst_offset, I915_TILING_NONE, + 0, 0, /* src x/y */ + 0, 0, /* dst x/y */ + size, 1, /* w, h */ + GL_COPY); + if (!ok) + _mesa_problem(ctx, "Failed to linear blit %dx%d\n", size, 1); + } +} + +/** + * Used to initialize the alpha value of an ARGB8888 miptree after copying + * into it from an XRGB8888 source. + * + * This is very common with glCopyTexImage2D(). Note that the coordinates are + * relative to the start of the miptree, not relative to a slice within the + * miptree. + */ +static void +intel_miptree_set_alpha_to_one(struct intel_context *intel, + struct intel_mipmap_tree *mt, + int x, int y, int width, int height) +{ + struct intel_region *region = mt->region; + uint32_t BR13, CMD; + int pitch, cpp; + drm_intel_bo *aper_array[2]; + BATCH_LOCALS; + + pitch = region->pitch; + cpp = region->cpp; + + DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n", + __FUNCTION__, region->bo, pitch, x, y, width, height); + + BR13 = br13_for_cpp(cpp) | 0xf0 << 16; + CMD = XY_COLOR_BLT_CMD; + CMD |= XY_BLT_WRITE_ALPHA; + +#ifndef I915 + if (region->tiling != I915_TILING_NONE) { + CMD |= XY_DST_TILED; + pitch /= 4; + } +#endif + BR13 |= pitch; + + /* do space check before going any further */ + aper_array[0] = intel->batch.bo; + aper_array[1] = region->bo; + + if (drm_intel_bufmgr_check_aperture_space(aper_array, + ARRAY_SIZE(aper_array)) != 0) { + intel_batchbuffer_flush(intel); + } + + bool dst_y_tiled = region->tiling == I915_TILING_Y; + + BEGIN_BATCH_BLT_TILED(6, dst_y_tiled, false); + OUT_BATCH(CMD | (6 - 2)); + OUT_BATCH(BR13); + OUT_BATCH((y << 16) | x); + OUT_BATCH(((y + height) << 16) | (x + width)); + OUT_RELOC_FENCED(region->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + OUT_BATCH(0xffffffff); /* white, but only alpha gets written */ + ADVANCE_BATCH_TILED(dst_y_tiled, false); + + intel_batchbuffer_emit_mi_flush(intel); +} diff --git a/src/mesa/drivers/dri/i915/intel_blit.h b/src/mesa/drivers/dri/i915/intel_blit.h new file mode 100644 index 0000000..0decc80 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_blit.h @@ -0,0 +1,83 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_BLIT_H +#define INTEL_BLIT_H + +#include "intel_context.h" + +extern void intelCopyBuffer(const __DRIdrawable * dpriv, + const drm_clip_rect_t * rect); + +extern GLbitfield intelClearWithBlit(struct gl_context * ctx, GLbitfield mask); + +bool +intelEmitCopyBlit(struct intel_context *intel, + GLuint cpp, + GLshort src_pitch, + drm_intel_bo *src_buffer, + GLuint src_offset, + uint32_t src_tiling, + GLshort dst_pitch, + drm_intel_bo *dst_buffer, + GLuint dst_offset, + uint32_t dst_tiling, + GLshort srcx, GLshort srcy, + GLshort dstx, GLshort dsty, + GLshort w, GLshort h, + GLenum logicop ); + +bool intel_miptree_blit(struct intel_context *intel, + struct intel_mipmap_tree *src_mt, + int src_level, int src_slice, + uint32_t src_x, uint32_t src_y, bool src_flip, + struct intel_mipmap_tree *dst_mt, + int dst_level, int dst_slice, + uint32_t dst_x, uint32_t dst_y, bool dst_flip, + uint32_t width, uint32_t height, + GLenum logicop); + +bool +intelEmitImmediateColorExpandBlit(struct intel_context *intel, + GLuint cpp, + GLubyte *src_bits, GLuint src_size, + GLuint fg_color, + GLshort dst_pitch, + drm_intel_bo *dst_buffer, + GLuint dst_offset, + uint32_t dst_tiling, + GLshort x, GLshort y, + GLshort w, GLshort h, + GLenum logic_op); +void intel_emit_linear_blit(struct intel_context *intel, + drm_intel_bo *dst_bo, + unsigned int dst_offset, + drm_intel_bo *src_bo, + unsigned int src_offset, + unsigned int size); + +#endif diff --git a/src/mesa/drivers/dri/i915/intel_buffer_objects.c b/src/mesa/drivers/dri/i915/intel_buffer_objects.c deleted file mode 120000 index e06dd3c..0000000 --- a/src/mesa/drivers/dri/i915/intel_buffer_objects.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_buffer_objects.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_buffer_objects.c b/src/mesa/drivers/dri/i915/intel_buffer_objects.c new file mode 100644 index 0000000..f568864 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_buffer_objects.c @@ -0,0 +1,853 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/imports.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "main/bufferobj.h" + +#include "intel_blit.h" +#include "intel_buffer_objects.h" +#include "intel_batchbuffer.h" +#include "intel_context.h" +#include "intel_fbo.h" +#include "intel_mipmap_tree.h" +#include "intel_regions.h" + +#ifndef I915 +#include "brw_context.h" +#endif + +static GLboolean +intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj); + +/** Allocates a new drm_intel_bo to store the data for the buffer object. */ +static void +intel_bufferobj_alloc_buffer(struct intel_context *intel, + struct intel_buffer_object *intel_obj) +{ + intel_obj->buffer = drm_intel_bo_alloc(intel->bufmgr, "bufferobj", + intel_obj->Base.Size, 64); + +#ifndef I915 + /* the buffer might be bound as a uniform buffer, need to update it + */ + { + struct brw_context *brw = brw_context(&intel->ctx); + brw->state.dirty.brw |= BRW_NEW_UNIFORM_BUFFER; + } +#endif +} + +static void +release_buffer(struct intel_buffer_object *intel_obj) +{ + drm_intel_bo_unreference(intel_obj->buffer); + intel_obj->buffer = NULL; + intel_obj->offset = 0; + intel_obj->source = 0; +} + +/** + * There is some duplication between mesa's bufferobjects and our + * bufmgr buffers. Both have an integer handle and a hashtable to + * lookup an opaque structure. It would be nice if the handles and + * internal structure where somehow shared. + */ +static struct gl_buffer_object * +intel_bufferobj_alloc(struct gl_context * ctx, GLuint name, GLenum target) +{ + struct intel_buffer_object *obj = CALLOC_STRUCT(intel_buffer_object); + + _mesa_initialize_buffer_object(ctx, &obj->Base, name, target); + + obj->buffer = NULL; + + return &obj->Base; +} + +/** + * Deallocate/free a vertex/pixel buffer object. + * Called via glDeleteBuffersARB(). + */ +static void +intel_bufferobj_free(struct gl_context * ctx, struct gl_buffer_object *obj) +{ + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + assert(intel_obj); + + /* Buffer objects are automatically unmapped when deleting according + * to the spec, but Mesa doesn't do UnmapBuffer for us at context destroy + * (though it does if you call glDeleteBuffers) + */ + if (obj->Pointer) + intel_bufferobj_unmap(ctx, obj); + + free(intel_obj->sys_buffer); + + drm_intel_bo_unreference(intel_obj->buffer); + free(intel_obj); +} + + + +/** + * Allocate space for and store data in a buffer object. Any data that was + * previously stored in the buffer object is lost. If data is NULL, + * memory will be allocated, but no copy will occur. + * Called via ctx->Driver.BufferData(). + * \return true for success, false if out of memory + */ +static GLboolean +intel_bufferobj_data(struct gl_context * ctx, + GLenum target, + GLsizeiptrARB size, + const GLvoid * data, + GLenum usage, struct gl_buffer_object *obj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + /* Part of the ABI, but this function doesn't use it. + */ +#ifndef I915 + (void) target; +#endif + + intel_obj->Base.Size = size; + intel_obj->Base.Usage = usage; + + assert(!obj->Pointer); /* Mesa should have unmapped it */ + + if (intel_obj->buffer != NULL) + release_buffer(intel_obj); + + free(intel_obj->sys_buffer); + intel_obj->sys_buffer = NULL; + + if (size != 0) { +#ifdef I915 + /* On pre-965, stick VBOs in system memory, as we're always doing + * swtnl with their contents anyway. + */ + if (target == GL_ARRAY_BUFFER || target == GL_ELEMENT_ARRAY_BUFFER) { + intel_obj->sys_buffer = malloc(size); + if (intel_obj->sys_buffer != NULL) { + if (data != NULL) + memcpy(intel_obj->sys_buffer, data, size); + return true; + } + } +#endif + intel_bufferobj_alloc_buffer(intel, intel_obj); + if (!intel_obj->buffer) + return false; + + if (data != NULL) + drm_intel_bo_subdata(intel_obj->buffer, 0, size, data); + } + + return true; +} + + +/** + * Replace data in a subrange of buffer object. If the data range + * specified by size + offset extends beyond the end of the buffer or + * if data is NULL, no copy is performed. + * Called via glBufferSubDataARB(). + */ +static void +intel_bufferobj_subdata(struct gl_context * ctx, + GLintptrARB offset, + GLsizeiptrARB size, + const GLvoid * data, struct gl_buffer_object *obj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + bool busy; + + if (size == 0) + return; + + assert(intel_obj); + + /* If we have a single copy in system memory, update that */ + if (intel_obj->sys_buffer) { + if (intel_obj->source) + release_buffer(intel_obj); + + if (intel_obj->buffer == NULL) { + memcpy((char *)intel_obj->sys_buffer + offset, data, size); + return; + } + + free(intel_obj->sys_buffer); + intel_obj->sys_buffer = NULL; + } + + /* Otherwise we need to update the copy in video memory. */ + busy = + drm_intel_bo_busy(intel_obj->buffer) || + drm_intel_bo_references(intel->batch.bo, intel_obj->buffer); + + if (busy) { + if (size == intel_obj->Base.Size) { + /* Replace the current busy bo with fresh data. */ + drm_intel_bo_unreference(intel_obj->buffer); + intel_bufferobj_alloc_buffer(intel, intel_obj); + drm_intel_bo_subdata(intel_obj->buffer, 0, size, data); + } else { + perf_debug("Using a blit copy to avoid stalling on %ldb " + "glBufferSubData() to a busy buffer object.\n", + (long)size); + drm_intel_bo *temp_bo = + drm_intel_bo_alloc(intel->bufmgr, "subdata temp", size, 64); + + drm_intel_bo_subdata(temp_bo, 0, size, data); + + intel_emit_linear_blit(intel, + intel_obj->buffer, offset, + temp_bo, 0, + size); + + drm_intel_bo_unreference(temp_bo); + } + } else { + drm_intel_bo_subdata(intel_obj->buffer, offset, size, data); + } +} + + +/** + * Called via glGetBufferSubDataARB(). + */ +static void +intel_bufferobj_get_subdata(struct gl_context * ctx, + GLintptrARB offset, + GLsizeiptrARB size, + GLvoid * data, struct gl_buffer_object *obj) +{ + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + struct intel_context *intel = intel_context(ctx); + + assert(intel_obj); + if (intel_obj->sys_buffer) + memcpy(data, (char *)intel_obj->sys_buffer + offset, size); + else { + if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) { + intel_batchbuffer_flush(intel); + } + drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data); + } +} + + + +/** + * Called via glMapBufferRange and glMapBuffer + * + * The goal of this extension is to allow apps to accumulate their rendering + * at the same time as they accumulate their buffer object. Without it, + * you'd end up blocking on execution of rendering every time you mapped + * the buffer to put new data in. + * + * We support it in 3 ways: If unsynchronized, then don't bother + * flushing the batchbuffer before mapping the buffer, which can save blocking + * in many cases. If we would still block, and they allow the whole buffer + * to be invalidated, then just allocate a new buffer to replace the old one. + * If not, and we'd block, and they allow the subrange of the buffer to be + * invalidated, then we can make a new little BO, let them write into that, + * and blit it into the real BO at unmap time. + */ +static void * +intel_bufferobj_map_range(struct gl_context * ctx, + GLintptr offset, GLsizeiptr length, + GLbitfield access, struct gl_buffer_object *obj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + assert(intel_obj); + + /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also + * internally uses our functions directly. + */ + obj->Offset = offset; + obj->Length = length; + obj->AccessFlags = access; + + if (intel_obj->sys_buffer) { + const bool read_only = + (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_READ_BIT; + + if (!read_only && intel_obj->source) + release_buffer(intel_obj); + + if (!intel_obj->buffer || intel_obj->source) { + obj->Pointer = intel_obj->sys_buffer + offset; + return obj->Pointer; + } + + free(intel_obj->sys_buffer); + intel_obj->sys_buffer = NULL; + } + + if (intel_obj->buffer == NULL) { + obj->Pointer = NULL; + return NULL; + } + + /* If the access is synchronized (like a normal buffer mapping), then get + * things flushed out so the later mapping syncs appropriately through GEM. + * If the user doesn't care about existing buffer contents and mapping would + * cause us to block, then throw out the old buffer. + * + * If they set INVALIDATE_BUFFER, we can pitch the current contents to + * achieve the required synchronization. + */ + if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) { + if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) { + if (access & GL_MAP_INVALIDATE_BUFFER_BIT) { + drm_intel_bo_unreference(intel_obj->buffer); + intel_bufferobj_alloc_buffer(intel, intel_obj); + } else { + perf_debug("Stalling on the GPU for mapping a busy buffer " + "object\n"); + intel_flush(ctx); + } + } else if (drm_intel_bo_busy(intel_obj->buffer) && + (access & GL_MAP_INVALIDATE_BUFFER_BIT)) { + drm_intel_bo_unreference(intel_obj->buffer); + intel_bufferobj_alloc_buffer(intel, intel_obj); + } + } + + /* If the user is mapping a range of an active buffer object but + * doesn't require the current contents of that range, make a new + * BO, and we'll copy what they put in there out at unmap or + * FlushRange time. + */ + if ((access & GL_MAP_INVALIDATE_RANGE_BIT) && + drm_intel_bo_busy(intel_obj->buffer)) { + if (access & GL_MAP_FLUSH_EXPLICIT_BIT) { + intel_obj->range_map_buffer = malloc(length); + obj->Pointer = intel_obj->range_map_buffer; + } else { + intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr, + "range map", + length, 64); + if (!(access & GL_MAP_READ_BIT)) { + drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo); + } else { + drm_intel_bo_map(intel_obj->range_map_bo, + (access & GL_MAP_WRITE_BIT) != 0); + } + obj->Pointer = intel_obj->range_map_bo->virtual; + } + return obj->Pointer; + } + + if (access & GL_MAP_UNSYNCHRONIZED_BIT) + drm_intel_gem_bo_map_unsynchronized(intel_obj->buffer); + else if (!(access & GL_MAP_READ_BIT)) { + drm_intel_gem_bo_map_gtt(intel_obj->buffer); + } else { + drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0); + } + + obj->Pointer = intel_obj->buffer->virtual + offset; + return obj->Pointer; +} + +/* Ideally we'd use a BO to avoid taking up cache space for the temporary + * data, but FlushMappedBufferRange may be followed by further writes to + * the pointer, so we would have to re-map after emitting our blit, which + * would defeat the point. + */ +static void +intel_bufferobj_flush_mapped_range(struct gl_context *ctx, + GLintptr offset, GLsizeiptr length, + struct gl_buffer_object *obj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + drm_intel_bo *temp_bo; + + /* Unless we're in the range map using a temporary system buffer, + * there's no work to do. + */ + if (intel_obj->range_map_buffer == NULL) + return; + + if (length == 0) + return; + + temp_bo = drm_intel_bo_alloc(intel->bufmgr, "range map flush", length, 64); + + drm_intel_bo_subdata(temp_bo, 0, length, intel_obj->range_map_buffer); + + intel_emit_linear_blit(intel, + intel_obj->buffer, obj->Offset + offset, + temp_bo, 0, + length); + + drm_intel_bo_unreference(temp_bo); +} + + +/** + * Called via glUnmapBuffer(). + */ +static GLboolean +intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + + assert(intel_obj); + assert(obj->Pointer); + if (intel_obj->sys_buffer != NULL) { + /* always keep the mapping around. */ + } else if (intel_obj->range_map_buffer != NULL) { + /* Since we've emitted some blits to buffers that will (likely) be used + * in rendering operations in other cache domains in this batch, emit a + * flush. Once again, we wish for a domain tracker in libdrm to cover + * usage inside of a batchbuffer. + */ + intel_batchbuffer_emit_mi_flush(intel); + free(intel_obj->range_map_buffer); + intel_obj->range_map_buffer = NULL; + } else if (intel_obj->range_map_bo != NULL) { + drm_intel_bo_unmap(intel_obj->range_map_bo); + + intel_emit_linear_blit(intel, + intel_obj->buffer, obj->Offset, + intel_obj->range_map_bo, 0, + obj->Length); + + /* Since we've emitted some blits to buffers that will (likely) be used + * in rendering operations in other cache domains in this batch, emit a + * flush. Once again, we wish for a domain tracker in libdrm to cover + * usage inside of a batchbuffer. + */ + intel_batchbuffer_emit_mi_flush(intel); + + drm_intel_bo_unreference(intel_obj->range_map_bo); + intel_obj->range_map_bo = NULL; + } else if (intel_obj->buffer != NULL) { + drm_intel_bo_unmap(intel_obj->buffer); + } + obj->Pointer = NULL; + obj->Offset = 0; + obj->Length = 0; + + return true; +} + +drm_intel_bo * +intel_bufferobj_buffer(struct intel_context *intel, + struct intel_buffer_object *intel_obj, + GLuint flag) +{ + if (intel_obj->source) + release_buffer(intel_obj); + + if (intel_obj->buffer == NULL) { + intel_bufferobj_alloc_buffer(intel, intel_obj); + drm_intel_bo_subdata(intel_obj->buffer, + 0, intel_obj->Base.Size, + intel_obj->sys_buffer); + + free(intel_obj->sys_buffer); + intel_obj->sys_buffer = NULL; + intel_obj->offset = 0; + } + + return intel_obj->buffer; +} + +#define INTEL_UPLOAD_SIZE (64*1024) + +void +intel_upload_finish(struct intel_context *intel) +{ + if (!intel->upload.bo) + return; + + if (intel->upload.buffer_len) { + drm_intel_bo_subdata(intel->upload.bo, + intel->upload.buffer_offset, + intel->upload.buffer_len, + intel->upload.buffer); + intel->upload.buffer_len = 0; + } + + drm_intel_bo_unreference(intel->upload.bo); + intel->upload.bo = NULL; +} + +static void wrap_buffers(struct intel_context *intel, GLuint size) +{ + intel_upload_finish(intel); + + if (size < INTEL_UPLOAD_SIZE) + size = INTEL_UPLOAD_SIZE; + + intel->upload.bo = drm_intel_bo_alloc(intel->bufmgr, "upload", size, 0); + intel->upload.offset = 0; +} + +void intel_upload_data(struct intel_context *intel, + const void *ptr, GLuint size, GLuint align, + drm_intel_bo **return_bo, + GLuint *return_offset) +{ + GLuint base, delta; + + base = (intel->upload.offset + align - 1) / align * align; + if (intel->upload.bo == NULL || base + size > intel->upload.bo->size) { + wrap_buffers(intel, size); + base = 0; + } + + drm_intel_bo_reference(intel->upload.bo); + *return_bo = intel->upload.bo; + *return_offset = base; + + delta = base - intel->upload.offset; + if (intel->upload.buffer_len && + intel->upload.buffer_len + delta + size > sizeof(intel->upload.buffer)) + { + drm_intel_bo_subdata(intel->upload.bo, + intel->upload.buffer_offset, + intel->upload.buffer_len, + intel->upload.buffer); + intel->upload.buffer_len = 0; + } + + if (size < sizeof(intel->upload.buffer)) + { + if (intel->upload.buffer_len == 0) + intel->upload.buffer_offset = base; + else + intel->upload.buffer_len += delta; + + memcpy(intel->upload.buffer + intel->upload.buffer_len, ptr, size); + intel->upload.buffer_len += size; + } + else + { + drm_intel_bo_subdata(intel->upload.bo, base, size, ptr); + } + + intel->upload.offset = base + size; +} + +void *intel_upload_map(struct intel_context *intel, GLuint size, GLuint align) +{ + GLuint base, delta; + char *ptr; + + base = (intel->upload.offset + align - 1) / align * align; + if (intel->upload.bo == NULL || base + size > intel->upload.bo->size) { + wrap_buffers(intel, size); + base = 0; + } + + delta = base - intel->upload.offset; + if (intel->upload.buffer_len && + intel->upload.buffer_len + delta + size > sizeof(intel->upload.buffer)) + { + drm_intel_bo_subdata(intel->upload.bo, + intel->upload.buffer_offset, + intel->upload.buffer_len, + intel->upload.buffer); + intel->upload.buffer_len = 0; + } + + if (size <= sizeof(intel->upload.buffer)) { + if (intel->upload.buffer_len == 0) + intel->upload.buffer_offset = base; + else + intel->upload.buffer_len += delta; + + ptr = intel->upload.buffer + intel->upload.buffer_len; + intel->upload.buffer_len += size; + } else + ptr = malloc(size); + + return ptr; +} + +void intel_upload_unmap(struct intel_context *intel, + const void *ptr, GLuint size, GLuint align, + drm_intel_bo **return_bo, + GLuint *return_offset) +{ + GLuint base; + + base = (intel->upload.offset + align - 1) / align * align; + if (size > sizeof(intel->upload.buffer)) { + drm_intel_bo_subdata(intel->upload.bo, base, size, ptr); + free((void*)ptr); + } + + drm_intel_bo_reference(intel->upload.bo); + *return_bo = intel->upload.bo; + *return_offset = base; + + intel->upload.offset = base + size; +} + +drm_intel_bo * +intel_bufferobj_source(struct intel_context *intel, + struct intel_buffer_object *intel_obj, + GLuint align, GLuint *offset) +{ + if (intel_obj->buffer == NULL) { + intel_upload_data(intel, + intel_obj->sys_buffer, intel_obj->Base.Size, align, + &intel_obj->buffer, &intel_obj->offset); + intel_obj->source = 1; + } + + *offset = intel_obj->offset; + return intel_obj->buffer; +} + +static void +intel_bufferobj_copy_subdata(struct gl_context *ctx, + struct gl_buffer_object *src, + struct gl_buffer_object *dst, + GLintptr read_offset, GLintptr write_offset, + GLsizeiptr size) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *intel_src = intel_buffer_object(src); + struct intel_buffer_object *intel_dst = intel_buffer_object(dst); + drm_intel_bo *src_bo, *dst_bo; + GLuint src_offset; + + if (size == 0) + return; + + /* If we're in system memory, just map and memcpy. */ + if (intel_src->sys_buffer || intel_dst->sys_buffer) { + /* The same buffer may be used, but note that regions copied may + * not overlap. + */ + if (src == dst) { + char *ptr = intel_bufferobj_map_range(ctx, 0, dst->Size, + GL_MAP_READ_BIT | + GL_MAP_WRITE_BIT, + dst); + memmove(ptr + write_offset, ptr + read_offset, size); + intel_bufferobj_unmap(ctx, dst); + } else { + const char *src_ptr; + char *dst_ptr; + + src_ptr = intel_bufferobj_map_range(ctx, 0, src->Size, + GL_MAP_READ_BIT, src); + dst_ptr = intel_bufferobj_map_range(ctx, 0, dst->Size, + GL_MAP_WRITE_BIT, dst); + + memcpy(dst_ptr + write_offset, src_ptr + read_offset, size); + + intel_bufferobj_unmap(ctx, src); + intel_bufferobj_unmap(ctx, dst); + } + return; + } + + /* Otherwise, we have real BOs, so blit them. */ + + dst_bo = intel_bufferobj_buffer(intel, intel_dst, INTEL_WRITE_PART); + src_bo = intel_bufferobj_source(intel, intel_src, 64, &src_offset); + + intel_emit_linear_blit(intel, + dst_bo, write_offset, + src_bo, read_offset + src_offset, size); + + /* Since we've emitted some blits to buffers that will (likely) be used + * in rendering operations in other cache domains in this batch, emit a + * flush. Once again, we wish for a domain tracker in libdrm to cover + * usage inside of a batchbuffer. + */ + intel_batchbuffer_emit_mi_flush(intel); +} + +static GLenum +intel_buffer_purgeable(drm_intel_bo *buffer) +{ + int retained = 0; + + if (buffer != NULL) + retained = drm_intel_bo_madvise (buffer, I915_MADV_DONTNEED); + + return retained ? GL_VOLATILE_APPLE : GL_RELEASED_APPLE; +} + +static GLenum +intel_buffer_object_purgeable(struct gl_context * ctx, + struct gl_buffer_object *obj, + GLenum option) +{ + struct intel_buffer_object *intel_obj = intel_buffer_object (obj); + + if (intel_obj->buffer != NULL) + return intel_buffer_purgeable(intel_obj->buffer); + + if (option == GL_RELEASED_APPLE) { + free(intel_obj->sys_buffer); + intel_obj->sys_buffer = NULL; + + return GL_RELEASED_APPLE; + } else { + /* XXX Create the buffer and madvise(MADV_DONTNEED)? */ + struct intel_context *intel = intel_context(ctx); + drm_intel_bo *bo = intel_bufferobj_buffer(intel, intel_obj, INTEL_READ); + + return intel_buffer_purgeable(bo); + } +} + +static GLenum +intel_texture_object_purgeable(struct gl_context * ctx, + struct gl_texture_object *obj, + GLenum option) +{ + struct intel_texture_object *intel; + + (void) ctx; + (void) option; + + intel = intel_texture_object(obj); + if (intel->mt == NULL || intel->mt->region == NULL) + return GL_RELEASED_APPLE; + + return intel_buffer_purgeable(intel->mt->region->bo); +} + +static GLenum +intel_render_object_purgeable(struct gl_context * ctx, + struct gl_renderbuffer *obj, + GLenum option) +{ + struct intel_renderbuffer *intel; + + (void) ctx; + (void) option; + + intel = intel_renderbuffer(obj); + if (intel->mt == NULL) + return GL_RELEASED_APPLE; + + return intel_buffer_purgeable(intel->mt->region->bo); +} + +static GLenum +intel_buffer_unpurgeable(drm_intel_bo *buffer) +{ + int retained; + + retained = 0; + if (buffer != NULL) + retained = drm_intel_bo_madvise (buffer, I915_MADV_WILLNEED); + + return retained ? GL_RETAINED_APPLE : GL_UNDEFINED_APPLE; +} + +static GLenum +intel_buffer_object_unpurgeable(struct gl_context * ctx, + struct gl_buffer_object *obj, + GLenum option) +{ + (void) ctx; + (void) option; + + return intel_buffer_unpurgeable(intel_buffer_object (obj)->buffer); +} + +static GLenum +intel_texture_object_unpurgeable(struct gl_context * ctx, + struct gl_texture_object *obj, + GLenum option) +{ + struct intel_texture_object *intel; + + (void) ctx; + (void) option; + + intel = intel_texture_object(obj); + if (intel->mt == NULL || intel->mt->region == NULL) + return GL_UNDEFINED_APPLE; + + return intel_buffer_unpurgeable(intel->mt->region->bo); +} + +static GLenum +intel_render_object_unpurgeable(struct gl_context * ctx, + struct gl_renderbuffer *obj, + GLenum option) +{ + struct intel_renderbuffer *intel; + + (void) ctx; + (void) option; + + intel = intel_renderbuffer(obj); + if (intel->mt == NULL) + return GL_UNDEFINED_APPLE; + + return intel_buffer_unpurgeable(intel->mt->region->bo); +} + +void +intelInitBufferObjectFuncs(struct dd_function_table *functions) +{ + functions->NewBufferObject = intel_bufferobj_alloc; + functions->DeleteBuffer = intel_bufferobj_free; + functions->BufferData = intel_bufferobj_data; + functions->BufferSubData = intel_bufferobj_subdata; + functions->GetBufferSubData = intel_bufferobj_get_subdata; + functions->MapBufferRange = intel_bufferobj_map_range; + functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range; + functions->UnmapBuffer = intel_bufferobj_unmap; + functions->CopyBufferSubData = intel_bufferobj_copy_subdata; + + functions->BufferObjectPurgeable = intel_buffer_object_purgeable; + functions->TextureObjectPurgeable = intel_texture_object_purgeable; + functions->RenderObjectPurgeable = intel_render_object_purgeable; + + functions->BufferObjectUnpurgeable = intel_buffer_object_unpurgeable; + functions->TextureObjectUnpurgeable = intel_texture_object_unpurgeable; + functions->RenderObjectUnpurgeable = intel_render_object_unpurgeable; +} diff --git a/src/mesa/drivers/dri/i915/intel_buffer_objects.h b/src/mesa/drivers/dri/i915/intel_buffer_objects.h new file mode 100644 index 0000000..92a4121 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_buffer_objects.h @@ -0,0 +1,92 @@ +/************************************************************************** + * + * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_BUFFEROBJ_H +#define INTEL_BUFFEROBJ_H + +#include "main/mtypes.h" + +struct intel_context; +struct gl_buffer_object; + + +/** + * Intel vertex/pixel buffer object, derived from Mesa's gl_buffer_object. + */ +struct intel_buffer_object +{ + struct gl_buffer_object Base; + drm_intel_bo *buffer; /* the low-level buffer manager's buffer handle */ + GLuint offset; /* any offset into that buffer */ + + /** System memory buffer data, if not using a BO to store the data. */ + void *sys_buffer; + + drm_intel_bo *range_map_bo; + void *range_map_buffer; + unsigned int range_map_offset; + GLsizei range_map_size; + + bool source; +}; + + +/* Get the bm buffer associated with a GL bufferobject: + */ +drm_intel_bo *intel_bufferobj_buffer(struct intel_context *intel, + struct intel_buffer_object *obj, + GLuint flag); +drm_intel_bo *intel_bufferobj_source(struct intel_context *intel, + struct intel_buffer_object *obj, + GLuint align, + GLuint *offset); + +void intel_upload_data(struct intel_context *intel, + const void *ptr, GLuint size, GLuint align, + drm_intel_bo **return_bo, + GLuint *return_offset); + +void *intel_upload_map(struct intel_context *intel, + GLuint size, GLuint align); +void intel_upload_unmap(struct intel_context *intel, + const void *ptr, GLuint size, GLuint align, + drm_intel_bo **return_bo, + GLuint *return_offset); + +void intel_upload_finish(struct intel_context *intel); + +/* Hook the bufferobject implementation into mesa: + */ +void intelInitBufferObjectFuncs(struct dd_function_table *functions); + +static inline struct intel_buffer_object * +intel_buffer_object(struct gl_buffer_object *obj) +{ + return (struct intel_buffer_object *) obj; +} + +#endif diff --git a/src/mesa/drivers/dri/i915/intel_buffers.c b/src/mesa/drivers/dri/i915/intel_buffers.c deleted file mode 120000 index c86daa4..0000000 --- a/src/mesa/drivers/dri/i915/intel_buffers.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_buffers.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_buffers.c b/src/mesa/drivers/dri/i915/intel_buffers.c new file mode 100644 index 0000000..fdad480 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_buffers.c @@ -0,0 +1,118 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "intel_context.h" +#include "intel_buffers.h" +#include "intel_fbo.h" +#include "intel_mipmap_tree.h" + +#include "main/fbobject.h" +#include "main/framebuffer.h" +#include "main/renderbuffer.h" + +/** + * Return pointer to current color reading region, or NULL. + */ +struct intel_region * +intel_readbuf_region(struct intel_context *intel) +{ + struct intel_renderbuffer *irb + = intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer); + if (irb && irb->mt) + return irb->mt->region; + else + return NULL; +} + +/** + * Check if we're about to draw into the front color buffer. + * If so, set the intel->front_buffer_dirty field to true. + */ +void +intel_check_front_buffer_rendering(struct intel_context *intel) +{ + const struct gl_framebuffer *fb = intel->ctx.DrawBuffer; + if (_mesa_is_winsys_fbo(fb)) { + /* drawing to window system buffer */ + if (fb->_NumColorDrawBuffers > 0) { + if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { + intel->front_buffer_dirty = true; + } + } + } +} + +static void +intelDrawBuffer(struct gl_context * ctx, GLenum mode) +{ + if (ctx->DrawBuffer && _mesa_is_winsys_fbo(ctx->DrawBuffer)) { + struct intel_context *const intel = intel_context(ctx); + const bool was_front_buffer_rendering = + intel->is_front_buffer_rendering; + + intel->is_front_buffer_rendering = (mode == GL_FRONT_LEFT) + || (mode == GL_FRONT) || (mode == GL_FRONT_AND_BACK); + + /* If we weren't front-buffer rendering before but we are now, + * invalidate our DRI drawable so we'll ask for new buffers + * (including the fake front) before we start rendering again. + */ + if (!was_front_buffer_rendering && intel->is_front_buffer_rendering) + dri2InvalidateDrawable(intel->driContext->driDrawablePriv); + } + + intel_draw_buffer(ctx); +} + + +static void +intelReadBuffer(struct gl_context * ctx, GLenum mode) +{ + if (ctx->DrawBuffer && _mesa_is_winsys_fbo(ctx->DrawBuffer)) { + struct intel_context *const intel = intel_context(ctx); + const bool was_front_buffer_reading = + intel->is_front_buffer_reading; + + intel->is_front_buffer_reading = (mode == GL_FRONT_LEFT) + || (mode == GL_FRONT); + + /* If we weren't front-buffer reading before but we are now, + * invalidate our DRI drawable so we'll ask for new buffers + * (including the fake front) before we start reading again. + */ + if (!was_front_buffer_reading && intel->is_front_buffer_reading) + dri2InvalidateDrawable(intel->driContext->driReadablePriv); + } +} + + +void +intelInitBufferFuncs(struct dd_function_table *functions) +{ + functions->DrawBuffer = intelDrawBuffer; + functions->ReadBuffer = intelReadBuffer; +} diff --git a/src/mesa/drivers/dri/i915/intel_buffers.h b/src/mesa/drivers/dri/i915/intel_buffers.h new file mode 100644 index 0000000..4e3d130 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_buffers.h @@ -0,0 +1,56 @@ + +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_BUFFERS_H +#define INTEL_BUFFERS_H + +#include "dri_util.h" +#include "drm.h" +#include "intel_context.h" + +struct intel_context; +struct intel_framebuffer; + +extern struct intel_region *intel_readbuf_region(struct intel_context *intel); + +extern void intel_check_front_buffer_rendering(struct intel_context *intel); + +static inline void +intel_draw_buffer(struct gl_context * ctx) +{ + struct intel_context *intel = intel_context(ctx); + + intel->vtbl.update_draw_buffer(intel); +} + +extern void intelInitBufferFuncs(struct dd_function_table *functions); +#ifdef I915 +void intelCalcViewport(struct gl_context * ctx); +#endif + +#endif /* INTEL_BUFFERS_H */ diff --git a/src/mesa/drivers/dri/i915/intel_chipset.h b/src/mesa/drivers/dri/i915/intel_chipset.h new file mode 100644 index 0000000..1e98cf4 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_chipset.h @@ -0,0 +1,314 @@ + /* + * Copyright © 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +#define PCI_CHIP_I810 0x7121 +#define PCI_CHIP_I810_DC100 0x7123 +#define PCI_CHIP_I810_E 0x7125 +#define PCI_CHIP_I815 0x1132 + +#define PCI_CHIP_I830_M 0x3577 +#define PCI_CHIP_845_G 0x2562 +#define PCI_CHIP_I855_GM 0x3582 +#define PCI_CHIP_I865_G 0x2572 + +#define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_E7221_G 0x258A +#define PCI_CHIP_I915_GM 0x2592 +#define PCI_CHIP_I945_G 0x2772 +#define PCI_CHIP_I945_GM 0x27A2 +#define PCI_CHIP_I945_GME 0x27AE + +#define PCI_CHIP_Q35_G 0x29B2 +#define PCI_CHIP_G33_G 0x29C2 +#define PCI_CHIP_Q33_G 0x29D2 + +#define PCI_CHIP_IGD_GM 0xA011 +#define PCI_CHIP_IGD_G 0xA001 + +#define IS_IGDGM(devid) (devid == PCI_CHIP_IGD_GM) +#define IS_IGDG(devid) (devid == PCI_CHIP_IGD_G) +#define IS_IGD(devid) (IS_IGDG(devid) || IS_IGDGM(devid)) + +#define PCI_CHIP_I965_G 0x29A2 +#define PCI_CHIP_I965_Q 0x2992 +#define PCI_CHIP_I965_G_1 0x2982 +#define PCI_CHIP_I946_GZ 0x2972 +#define PCI_CHIP_I965_GM 0x2A02 +#define PCI_CHIP_I965_GME 0x2A12 + +#define PCI_CHIP_GM45_GM 0x2A42 + +#define PCI_CHIP_IGD_E_G 0x2E02 +#define PCI_CHIP_Q45_G 0x2E12 +#define PCI_CHIP_G45_G 0x2E22 +#define PCI_CHIP_G41_G 0x2E32 +#define PCI_CHIP_B43_G 0x2E42 +#define PCI_CHIP_B43_G1 0x2E92 + +#define PCI_CHIP_ILD_G 0x0042 +#define PCI_CHIP_ILM_G 0x0046 + +#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 /* Desktop */ +#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112 +#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122 +#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 /* Mobile */ +#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116 +#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126 +#define PCI_CHIP_SANDYBRIDGE_S 0x010A /* Server */ + +#define PCI_CHIP_IVYBRIDGE_GT1 0x0152 /* Desktop */ +#define PCI_CHIP_IVYBRIDGE_GT2 0x0162 +#define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156 /* Mobile */ +#define PCI_CHIP_IVYBRIDGE_M_GT2 0x0166 +#define PCI_CHIP_IVYBRIDGE_S_GT1 0x015a /* Server */ +#define PCI_CHIP_IVYBRIDGE_S_GT2 0x016a + +#define PCI_CHIP_BAYTRAIL_M_1 0x0F31 +#define PCI_CHIP_BAYTRAIL_M_2 0x0F32 +#define PCI_CHIP_BAYTRAIL_M_3 0x0F33 +#define PCI_CHIP_BAYTRAIL_M_4 0x0157 +#define PCI_CHIP_BAYTRAIL_D 0x0155 + +#define PCI_CHIP_HASWELL_GT1 0x0402 /* Desktop */ +#define PCI_CHIP_HASWELL_GT2 0x0412 +#define PCI_CHIP_HASWELL_GT3 0x0422 +#define PCI_CHIP_HASWELL_M_GT1 0x0406 /* Mobile */ +#define PCI_CHIP_HASWELL_M_GT2 0x0416 +#define PCI_CHIP_HASWELL_M_GT3 0x0426 +#define PCI_CHIP_HASWELL_S_GT1 0x040A /* Server */ +#define PCI_CHIP_HASWELL_S_GT2 0x041A +#define PCI_CHIP_HASWELL_S_GT3 0x042A +#define PCI_CHIP_HASWELL_B_GT1 0x040B /* Reserved */ +#define PCI_CHIP_HASWELL_B_GT2 0x041B +#define PCI_CHIP_HASWELL_B_GT3 0x042B +#define PCI_CHIP_HASWELL_E_GT1 0x040E /* Reserved */ +#define PCI_CHIP_HASWELL_E_GT2 0x041E +#define PCI_CHIP_HASWELL_E_GT3 0x042E +#define PCI_CHIP_HASWELL_SDV_GT1 0x0C02 /* Desktop */ +#define PCI_CHIP_HASWELL_SDV_GT2 0x0C12 +#define PCI_CHIP_HASWELL_SDV_GT3 0x0C22 +#define PCI_CHIP_HASWELL_SDV_M_GT1 0x0C06 /* Mobile */ +#define PCI_CHIP_HASWELL_SDV_M_GT2 0x0C16 +#define PCI_CHIP_HASWELL_SDV_M_GT3 0x0C26 +#define PCI_CHIP_HASWELL_SDV_S_GT1 0x0C0A /* Server */ +#define PCI_CHIP_HASWELL_SDV_S_GT2 0x0C1A +#define PCI_CHIP_HASWELL_SDV_S_GT3 0x0C2A +#define PCI_CHIP_HASWELL_SDV_B_GT1 0x0C0B /* Reserved */ +#define PCI_CHIP_HASWELL_SDV_B_GT2 0x0C1B +#define PCI_CHIP_HASWELL_SDV_B_GT3 0x0C2B +#define PCI_CHIP_HASWELL_SDV_E_GT1 0x0C0E /* Reserved */ +#define PCI_CHIP_HASWELL_SDV_E_GT2 0x0C1E +#define PCI_CHIP_HASWELL_SDV_E_GT3 0x0C2E +#define PCI_CHIP_HASWELL_ULT_GT1 0x0A02 /* Desktop */ +#define PCI_CHIP_HASWELL_ULT_GT2 0x0A12 +#define PCI_CHIP_HASWELL_ULT_GT3 0x0A22 +#define PCI_CHIP_HASWELL_ULT_M_GT1 0x0A06 /* Mobile */ +#define PCI_CHIP_HASWELL_ULT_M_GT2 0x0A16 +#define PCI_CHIP_HASWELL_ULT_M_GT3 0x0A26 +#define PCI_CHIP_HASWELL_ULT_S_GT1 0x0A0A /* Server */ +#define PCI_CHIP_HASWELL_ULT_S_GT2 0x0A1A +#define PCI_CHIP_HASWELL_ULT_S_GT3 0x0A2A +#define PCI_CHIP_HASWELL_ULT_B_GT1 0x0A0B /* Reserved */ +#define PCI_CHIP_HASWELL_ULT_B_GT2 0x0A1B +#define PCI_CHIP_HASWELL_ULT_B_GT3 0x0A2B +#define PCI_CHIP_HASWELL_ULT_E_GT1 0x0A0E /* Reserved */ +#define PCI_CHIP_HASWELL_ULT_E_GT2 0x0A1E +#define PCI_CHIP_HASWELL_ULT_E_GT3 0x0A2E +#define PCI_CHIP_HASWELL_CRW_GT1 0x0D02 /* Desktop */ +#define PCI_CHIP_HASWELL_CRW_GT2 0x0D12 +#define PCI_CHIP_HASWELL_CRW_GT3 0x0D22 +#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D06 /* Mobile */ +#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D16 +#define PCI_CHIP_HASWELL_CRW_M_GT3 0x0D26 +#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D0A /* Server */ +#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D1A +#define PCI_CHIP_HASWELL_CRW_S_GT3 0x0D2A +#define PCI_CHIP_HASWELL_CRW_B_GT1 0x0D0B /* Reserved */ +#define PCI_CHIP_HASWELL_CRW_B_GT2 0x0D1B +#define PCI_CHIP_HASWELL_CRW_B_GT3 0x0D2B +#define PCI_CHIP_HASWELL_CRW_E_GT1 0x0D0E /* Reserved */ +#define PCI_CHIP_HASWELL_CRW_E_GT2 0x0D1E +#define PCI_CHIP_HASWELL_CRW_E_GT3 0x0D2E + +#define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \ + devid == PCI_CHIP_I915_GM || \ + devid == PCI_CHIP_I945_GM || \ + devid == PCI_CHIP_I945_GME || \ + devid == PCI_CHIP_I965_GM || \ + devid == PCI_CHIP_I965_GME || \ + devid == PCI_CHIP_GM45_GM || \ + IS_IGD(devid) || \ + devid == PCI_CHIP_ILM_G) + +#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ + devid == PCI_CHIP_Q45_G || \ + devid == PCI_CHIP_G45_G || \ + devid == PCI_CHIP_G41_G || \ + devid == PCI_CHIP_B43_G || \ + devid == PCI_CHIP_B43_G1) +#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) +#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) + +#define IS_ILD(devid) (devid == PCI_CHIP_ILD_G) +#define IS_ILM(devid) (devid == PCI_CHIP_ILM_G) +#define IS_GEN5(devid) (IS_ILD(devid) || IS_ILM(devid)) + +#define IS_915(devid) (devid == PCI_CHIP_I915_G || \ + devid == PCI_CHIP_E7221_G || \ + devid == PCI_CHIP_I915_GM) + +#define IS_945(devid) (devid == PCI_CHIP_I945_G || \ + devid == PCI_CHIP_I945_GM || \ + devid == PCI_CHIP_I945_GME || \ + devid == PCI_CHIP_G33_G || \ + devid == PCI_CHIP_Q33_G || \ + devid == PCI_CHIP_Q35_G || IS_IGD(devid)) + +#define IS_GEN4(devid) (devid == PCI_CHIP_I965_G || \ + devid == PCI_CHIP_I965_Q || \ + devid == PCI_CHIP_I965_G_1 || \ + devid == PCI_CHIP_I965_GM || \ + devid == PCI_CHIP_I965_GME || \ + devid == PCI_CHIP_I946_GZ || \ + IS_G4X(devid)) + +/* Compat macro for intel_decode.c */ +#define IS_IRONLAKE(devid) IS_GEN5(devid) + +#define IS_SNB_GT1(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \ + devid == PCI_CHIP_SANDYBRIDGE_S) + +#define IS_SNB_GT2(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT2 || \ + devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS) + +#define IS_GEN6(devid) (IS_SNB_GT1(devid) || IS_SNB_GT2(devid)) + +#define IS_IVB_GT1(devid) (devid == PCI_CHIP_IVYBRIDGE_GT1 || \ + devid == PCI_CHIP_IVYBRIDGE_M_GT1 || \ + devid == PCI_CHIP_IVYBRIDGE_S_GT1) + +#define IS_IVB_GT2(devid) (devid == PCI_CHIP_IVYBRIDGE_GT2 || \ + devid == PCI_CHIP_IVYBRIDGE_M_GT2 || \ + devid == PCI_CHIP_IVYBRIDGE_S_GT2) + +#define IS_IVYBRIDGE(devid) (IS_IVB_GT1(devid) || IS_IVB_GT2(devid)) + +#define IS_BAYTRAIL(devid) (devid == PCI_CHIP_BAYTRAIL_M_1 || \ + devid == PCI_CHIP_BAYTRAIL_M_2 || \ + devid == PCI_CHIP_BAYTRAIL_M_3 || \ + devid == PCI_CHIP_BAYTRAIL_M_4 || \ + devid == PCI_CHIP_BAYTRAIL_D) + +#define IS_GEN7(devid) (IS_IVYBRIDGE(devid) || \ + IS_BAYTRAIL(devid) || \ + IS_HASWELL(devid)) + +#define IS_HSW_GT1(devid) (devid == PCI_CHIP_HASWELL_GT1 || \ + devid == PCI_CHIP_HASWELL_M_GT1 || \ + devid == PCI_CHIP_HASWELL_S_GT1 || \ + devid == PCI_CHIP_HASWELL_B_GT1 || \ + devid == PCI_CHIP_HASWELL_E_GT1 || \ + devid == PCI_CHIP_HASWELL_SDV_GT1 || \ + devid == PCI_CHIP_HASWELL_SDV_M_GT1 || \ + devid == PCI_CHIP_HASWELL_SDV_S_GT1 || \ + devid == PCI_CHIP_HASWELL_SDV_B_GT1 || \ + devid == PCI_CHIP_HASWELL_SDV_E_GT1 || \ + devid == PCI_CHIP_HASWELL_ULT_GT1 || \ + devid == PCI_CHIP_HASWELL_ULT_M_GT1 || \ + devid == PCI_CHIP_HASWELL_ULT_S_GT1 || \ + devid == PCI_CHIP_HASWELL_ULT_B_GT1 || \ + devid == PCI_CHIP_HASWELL_ULT_E_GT1 || \ + devid == PCI_CHIP_HASWELL_CRW_GT1 || \ + devid == PCI_CHIP_HASWELL_CRW_M_GT1 || \ + devid == PCI_CHIP_HASWELL_CRW_S_GT1 || \ + devid == PCI_CHIP_HASWELL_CRW_B_GT1 || \ + devid == PCI_CHIP_HASWELL_CRW_E_GT1) +#define IS_HSW_GT2(devid) (devid == PCI_CHIP_HASWELL_GT2 || \ + devid == PCI_CHIP_HASWELL_M_GT2 || \ + devid == PCI_CHIP_HASWELL_S_GT2 || \ + devid == PCI_CHIP_HASWELL_B_GT2 || \ + devid == PCI_CHIP_HASWELL_E_GT2 || \ + devid == PCI_CHIP_HASWELL_SDV_GT2 || \ + devid == PCI_CHIP_HASWELL_SDV_M_GT2 || \ + devid == PCI_CHIP_HASWELL_SDV_S_GT2 || \ + devid == PCI_CHIP_HASWELL_SDV_B_GT2 || \ + devid == PCI_CHIP_HASWELL_SDV_E_GT2 || \ + devid == PCI_CHIP_HASWELL_ULT_GT2 || \ + devid == PCI_CHIP_HASWELL_ULT_M_GT2 || \ + devid == PCI_CHIP_HASWELL_ULT_S_GT2 || \ + devid == PCI_CHIP_HASWELL_ULT_B_GT2 || \ + devid == PCI_CHIP_HASWELL_ULT_E_GT2 || \ + devid == PCI_CHIP_HASWELL_CRW_GT2 || \ + devid == PCI_CHIP_HASWELL_CRW_M_GT2 || \ + devid == PCI_CHIP_HASWELL_CRW_S_GT2 || \ + devid == PCI_CHIP_HASWELL_CRW_B_GT2 || \ + devid == PCI_CHIP_HASWELL_CRW_E_GT2) +#define IS_HSW_GT3(devid) (devid == PCI_CHIP_HASWELL_GT3 || \ + devid == PCI_CHIP_HASWELL_M_GT3 || \ + devid == PCI_CHIP_HASWELL_S_GT3 || \ + devid == PCI_CHIP_HASWELL_B_GT3 || \ + devid == PCI_CHIP_HASWELL_E_GT3 || \ + devid == PCI_CHIP_HASWELL_SDV_GT3 || \ + devid == PCI_CHIP_HASWELL_SDV_M_GT3 || \ + devid == PCI_CHIP_HASWELL_SDV_S_GT3 || \ + devid == PCI_CHIP_HASWELL_SDV_B_GT3 || \ + devid == PCI_CHIP_HASWELL_SDV_E_GT3 || \ + devid == PCI_CHIP_HASWELL_ULT_GT3 || \ + devid == PCI_CHIP_HASWELL_ULT_M_GT3 || \ + devid == PCI_CHIP_HASWELL_ULT_S_GT3 || \ + devid == PCI_CHIP_HASWELL_ULT_B_GT3 || \ + devid == PCI_CHIP_HASWELL_ULT_E_GT3 || \ + devid == PCI_CHIP_HASWELL_CRW_GT3 || \ + devid == PCI_CHIP_HASWELL_CRW_M_GT3 || \ + devid == PCI_CHIP_HASWELL_CRW_S_GT3 || \ + devid == PCI_CHIP_HASWELL_CRW_B_GT3 || \ + devid == PCI_CHIP_HASWELL_CRW_E_GT3) + +#define IS_HASWELL(devid) (IS_HSW_GT1(devid) || \ + IS_HSW_GT2(devid) || \ + IS_HSW_GT3(devid)) + +#define IS_965(devid) (IS_GEN4(devid) || \ + IS_G4X(devid) || \ + IS_GEN5(devid) || \ + IS_GEN6(devid) || \ + IS_GEN7(devid)) + +#define IS_9XX(devid) (IS_915(devid) || \ + IS_945(devid) || \ + IS_965(devid)) + +#define IS_GEN3(devid) (IS_915(devid) || \ + IS_945(devid)) + +#define IS_GEN2(devid) (devid == PCI_CHIP_I830_M || \ + devid == PCI_CHIP_845_G || \ + devid == PCI_CHIP_I855_GM || \ + devid == PCI_CHIP_I865_G) diff --git a/src/mesa/drivers/dri/i915/intel_clear.h b/src/mesa/drivers/dri/i915/intel_clear.h new file mode 100644 index 0000000..7fd6b31 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_clear.h @@ -0,0 +1,38 @@ + +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_CLEAR_H +#define INTEL_CLEAR_H + +struct dd_function_table; + +extern void +intelInitClearFuncs(struct dd_function_table *functions); + + +#endif /* INTEL_CLEAR_H */ diff --git a/src/mesa/drivers/dri/i915/intel_context.c b/src/mesa/drivers/dri/i915/intel_context.c deleted file mode 120000 index 27a1cbb..0000000 --- a/src/mesa/drivers/dri/i915/intel_context.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_context.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_context.c b/src/mesa/drivers/dri/i915/intel_context.c new file mode 100644 index 0000000..23d8281 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_context.c @@ -0,0 +1,1010 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/glheader.h" +#include "main/context.h" +#include "main/extensions.h" +#include "main/fbobject.h" +#include "main/framebuffer.h" +#include "main/imports.h" +#include "main/points.h" +#include "main/renderbuffer.h" + +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "tnl/tnl.h" +#include "drivers/common/driverfuncs.h" +#include "drivers/common/meta.h" + +#include "intel_chipset.h" +#include "intel_buffers.h" +#include "intel_tex.h" +#include "intel_batchbuffer.h" +#include "intel_clear.h" +#include "intel_extensions.h" +#include "intel_pixel.h" +#include "intel_regions.h" +#include "intel_buffer_objects.h" +#include "intel_fbo.h" +#include "intel_bufmgr.h" +#include "intel_screen.h" +#include "intel_mipmap_tree.h" + +#include "utils.h" +#include "../glsl/ralloc.h" + +#ifndef INTEL_DEBUG +int INTEL_DEBUG = (0); +#endif + + +static const GLubyte * +intelGetString(struct gl_context * ctx, GLenum name) +{ + const struct intel_context *const intel = intel_context(ctx); + const char *chipset; + static char buffer[128]; + + switch (name) { + case GL_VENDOR: + return (GLubyte *) "Intel Open Source Technology Center"; + break; + + case GL_RENDERER: + switch (intel->intelScreen->deviceID) { +#undef CHIPSET +#define CHIPSET(id, symbol, str) case id: chipset = str; break; +#include "pci_ids/i915_pci_ids.h" +#include "pci_ids/i965_pci_ids.h" + default: + chipset = "Unknown Intel Chipset"; + break; + } + + (void) driGetRendererString(buffer, chipset, 0); + return (GLubyte *) buffer; + + default: + return NULL; + } +} + +void +intel_resolve_for_dri2_flush(struct intel_context *intel, + __DRIdrawable *drawable) +{ + if (intel->gen < 6) { + /* MSAA and fast color clear are not supported, so don't waste time + * checking whether a resolve is needed. + */ + return; + } + + struct gl_framebuffer *fb = drawable->driverPrivate; + struct intel_renderbuffer *rb; + + /* Usually, only the back buffer will need to be downsampled. However, + * the front buffer will also need it if the user has rendered into it. + */ + static const gl_buffer_index buffers[2] = { + BUFFER_BACK_LEFT, + BUFFER_FRONT_LEFT, + }; + + for (int i = 0; i < 2; ++i) { + rb = intel_get_renderbuffer(fb, buffers[i]); + if (rb == NULL || rb->mt == NULL) + continue; + if (rb->mt->num_samples <= 1) + intel_miptree_resolve_color(intel, rb->mt); + else + intel_miptree_downsample(intel, rb->mt); + } +} + +static void +intel_flush_front(struct gl_context *ctx) +{ + struct intel_context *intel = intel_context(ctx); + __DRIcontext *driContext = intel->driContext; + __DRIdrawable *driDrawable = driContext->driDrawablePriv; + __DRIscreen *const screen = intel->intelScreen->driScrnPriv; + + if (intel->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) { + if (screen->dri2.loader->flushFrontBuffer != NULL && + driDrawable && + driDrawable->loaderPrivate) { + + /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT. + * + * This potentially resolves both front and back buffer. It + * is unnecessary to resolve the back, but harms nothing except + * performance. And no one cares about front-buffer render + * performance. + */ + intel_resolve_for_dri2_flush(intel, driDrawable); + + screen->dri2.loader->flushFrontBuffer(driDrawable, + driDrawable->loaderPrivate); + + /* We set the dirty bit in intel_prepare_render() if we're + * front buffer rendering once we get there. + */ + intel->front_buffer_dirty = false; + } + } +} + +static unsigned +intel_bits_per_pixel(const struct intel_renderbuffer *rb) +{ + return _mesa_get_format_bytes(intel_rb_format(rb)) * 8; +} + +static void +intel_query_dri2_buffers(struct intel_context *intel, + __DRIdrawable *drawable, + __DRIbuffer **buffers, + int *count); + +static void +intel_process_dri2_buffer(struct intel_context *intel, + __DRIdrawable *drawable, + __DRIbuffer *buffer, + struct intel_renderbuffer *rb, + const char *buffer_name); + +void +intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) +{ + struct gl_framebuffer *fb = drawable->driverPrivate; + struct intel_renderbuffer *rb; + struct intel_context *intel = context->driverPrivate; + __DRIbuffer *buffers = NULL; + int i, count; + const char *region_name; + + /* Set this up front, so that in case our buffers get invalidated + * while we're getting new buffers, we don't clobber the stamp and + * thus ignore the invalidate. */ + drawable->lastStamp = drawable->dri2.stamp; + + if (unlikely(INTEL_DEBUG & DEBUG_DRI)) + fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable); + + intel_query_dri2_buffers(intel, drawable, &buffers, &count); + + if (buffers == NULL) + return; + + for (i = 0; i < count; i++) { + switch (buffers[i].attachment) { + case __DRI_BUFFER_FRONT_LEFT: + rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); + region_name = "dri2 front buffer"; + break; + + case __DRI_BUFFER_FAKE_FRONT_LEFT: + rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); + region_name = "dri2 fake front buffer"; + break; + + case __DRI_BUFFER_BACK_LEFT: + rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); + region_name = "dri2 back buffer"; + break; + + case __DRI_BUFFER_DEPTH: + case __DRI_BUFFER_HIZ: + case __DRI_BUFFER_DEPTH_STENCIL: + case __DRI_BUFFER_STENCIL: + case __DRI_BUFFER_ACCUM: + default: + fprintf(stderr, + "unhandled buffer attach event, attachment type %d\n", + buffers[i].attachment); + return; + } + + intel_process_dri2_buffer(intel, drawable, &buffers[i], rb, region_name); + } + + driUpdateFramebufferSize(&intel->ctx, drawable); +} + +/** + * intel_prepare_render should be called anywhere that curent read/drawbuffer + * state is required. + */ +void +intel_prepare_render(struct intel_context *intel) +{ + __DRIcontext *driContext = intel->driContext; + __DRIdrawable *drawable; + + drawable = driContext->driDrawablePriv; + if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) { + if (drawable->lastStamp != drawable->dri2.stamp) + intel_update_renderbuffers(driContext, drawable); + intel_draw_buffer(&intel->ctx); + driContext->dri2.draw_stamp = drawable->dri2.stamp; + } + + drawable = driContext->driReadablePriv; + if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) { + if (drawable->lastStamp != drawable->dri2.stamp) + intel_update_renderbuffers(driContext, drawable); + driContext->dri2.read_stamp = drawable->dri2.stamp; + } + + /* If we're currently rendering to the front buffer, the rendering + * that will happen next will probably dirty the front buffer. So + * mark it as dirty here. + */ + if (intel->is_front_buffer_rendering) + intel->front_buffer_dirty = true; + + /* Wait for the swapbuffers before the one we just emitted, so we + * don't get too many swaps outstanding for apps that are GPU-heavy + * but not CPU-heavy. + * + * We're using intelDRI2Flush (called from the loader before + * swapbuffer) and glFlush (for front buffer rendering) as the + * indicator that a frame is done and then throttle when we get + * here as we prepare to render the next frame. At this point for + * round trips for swap/copy and getting new buffers are done and + * we'll spend less time waiting on the GPU. + * + * Unfortunately, we don't have a handle to the batch containing + * the swap, and getting our hands on that doesn't seem worth it, + * so we just us the first batch we emitted after the last swap. + */ + if (intel->need_throttle && intel->first_post_swapbuffers_batch) { + if (!intel->disable_throttling) + drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch); + drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); + intel->first_post_swapbuffers_batch = NULL; + intel->need_throttle = false; + } +} + +static void +intel_viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei w, GLsizei h) +{ + struct intel_context *intel = intel_context(ctx); + __DRIcontext *driContext = intel->driContext; + + if (intel->saved_viewport) + intel->saved_viewport(ctx, x, y, w, h); + + if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) { + dri2InvalidateDrawable(driContext->driDrawablePriv); + dri2InvalidateDrawable(driContext->driReadablePriv); + } +} + +static const struct dri_debug_control debug_control[] = { + { "tex", DEBUG_TEXTURE}, + { "state", DEBUG_STATE}, + { "ioctl", DEBUG_IOCTL}, + { "blit", DEBUG_BLIT}, + { "mip", DEBUG_MIPTREE}, + { "fall", DEBUG_PERF}, + { "perf", DEBUG_PERF}, + { "bat", DEBUG_BATCH}, + { "pix", DEBUG_PIXEL}, + { "buf", DEBUG_BUFMGR}, + { "reg", DEBUG_REGION}, + { "fbo", DEBUG_FBO}, + { "fs", DEBUG_WM }, + { "gs", DEBUG_GS}, + { "sync", DEBUG_SYNC}, + { "prim", DEBUG_PRIMS }, + { "vert", DEBUG_VERTS }, + { "dri", DEBUG_DRI }, + { "sf", DEBUG_SF }, + { "stats", DEBUG_STATS }, + { "wm", DEBUG_WM }, + { "urb", DEBUG_URB }, + { "vs", DEBUG_VS }, + { "clip", DEBUG_CLIP }, + { "aub", DEBUG_AUB }, + { "shader_time", DEBUG_SHADER_TIME }, + { "no16", DEBUG_NO16 }, + { "blorp", DEBUG_BLORP }, + { NULL, 0 } +}; + + +static void +intelInvalidateState(struct gl_context * ctx, GLuint new_state) +{ + struct intel_context *intel = intel_context(ctx); + + if (ctx->swrast_context) + _swrast_InvalidateState(ctx, new_state); + _vbo_InvalidateState(ctx, new_state); + + intel->NewGLState |= new_state; + + if (intel->vtbl.invalidate_state) + intel->vtbl.invalidate_state( intel, new_state ); +} + +void +intel_flush_rendering_to_batch(struct gl_context *ctx) +{ + struct intel_context *intel = intel_context(ctx); + + if (intel->Fallback) + _swrast_flush(ctx); + + if (intel->gen < 4) + INTEL_FIREVERTICES(intel); +} + +void +_intel_flush(struct gl_context *ctx, const char *file, int line) +{ + struct intel_context *intel = intel_context(ctx); + + intel_flush_rendering_to_batch(ctx); + + if (intel->batch.used) + _intel_batchbuffer_flush(intel, file, line); +} + +static void +intel_glFlush(struct gl_context *ctx) +{ + struct intel_context *intel = intel_context(ctx); + + intel_flush(ctx); + intel_flush_front(ctx); + if (intel->is_front_buffer_rendering) + intel->need_throttle = true; +} + +void +intelFinish(struct gl_context * ctx) +{ + struct intel_context *intel = intel_context(ctx); + + intel_flush(ctx); + intel_flush_front(ctx); + + if (intel->batch.last_bo) + drm_intel_bo_wait_rendering(intel->batch.last_bo); +} + +void +intelInitDriverFunctions(struct dd_function_table *functions) +{ + _mesa_init_driver_functions(functions); + + functions->Flush = intel_glFlush; + functions->Finish = intelFinish; + functions->GetString = intelGetString; + functions->UpdateState = intelInvalidateState; + + intelInitTextureFuncs(functions); + intelInitTextureImageFuncs(functions); + intelInitTextureSubImageFuncs(functions); + intelInitTextureCopyImageFuncs(functions); + intelInitClearFuncs(functions); + intelInitBufferFuncs(functions); + intelInitPixelFuncs(functions); + intelInitBufferObjectFuncs(functions); + intel_init_syncobj_functions(functions); +} + +static bool +validate_context_version(struct intel_screen *screen, + int mesa_api, + unsigned major_version, + unsigned minor_version, + unsigned *dri_ctx_error) +{ + unsigned req_version = 10 * major_version + minor_version; + unsigned max_version = 0; + + switch (mesa_api) { + case API_OPENGL_COMPAT: + max_version = screen->max_gl_compat_version; + break; + case API_OPENGL_CORE: + max_version = screen->max_gl_core_version; + break; + case API_OPENGLES: + max_version = screen->max_gl_es1_version; + break; + case API_OPENGLES2: + max_version = screen->max_gl_es2_version; + break; + default: + max_version = 0; + break; + } + + if (max_version == 0) { + *dri_ctx_error = __DRI_CTX_ERROR_BAD_API; + return false; + } else if (req_version > max_version) { + *dri_ctx_error = __DRI_CTX_ERROR_BAD_VERSION; + return false; + } + + return true; +} + +bool +intelInitContext(struct intel_context *intel, + int api, + unsigned major_version, + unsigned minor_version, + const struct gl_config * mesaVis, + __DRIcontext * driContextPriv, + void *sharedContextPrivate, + struct dd_function_table *functions, + unsigned *dri_ctx_error) +{ + struct gl_context *ctx = &intel->ctx; + struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate; + __DRIscreen *sPriv = driContextPriv->driScreenPriv; + struct intel_screen *intelScreen = sPriv->driverPrivate; + int bo_reuse_mode; + struct gl_config visual; + + /* we can't do anything without a connection to the device */ + if (intelScreen->bufmgr == NULL) { + *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; + return false; + } + + if (!validate_context_version(intelScreen, + api, major_version, minor_version, + dri_ctx_error)) + return false; + + /* Can't rely on invalidate events, fall back to glViewport hack */ + if (!driContextPriv->driScreenPriv->dri2.useInvalidate) { + intel->saved_viewport = functions->Viewport; + functions->Viewport = intel_viewport; + } + + if (mesaVis == NULL) { + memset(&visual, 0, sizeof visual); + mesaVis = &visual; + } + + intel->intelScreen = intelScreen; + + if (!_mesa_initialize_context(&intel->ctx, api, mesaVis, shareCtx, + functions)) { + *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; + printf("%s: failed to init mesa context\n", __FUNCTION__); + return false; + } + + driContextPriv->driverPrivate = intel; + intel->driContext = driContextPriv; + intel->driFd = sPriv->fd; + + intel->gen = intelScreen->gen; + + const int devID = intelScreen->deviceID; + if (IS_SNB_GT1(devID) || IS_IVB_GT1(devID) || IS_HSW_GT1(devID)) + intel->gt = 1; + else if (IS_SNB_GT2(devID) || IS_IVB_GT2(devID) || IS_HSW_GT2(devID)) + intel->gt = 2; + else if (IS_HSW_GT3(devID)) + intel->gt = 3; + else + intel->gt = 0; + + if (IS_HASWELL(devID)) { + intel->is_haswell = true; + } else if (IS_BAYTRAIL(devID)) { + intel->is_baytrail = true; + intel->gt = 1; + } else if (IS_G4X(devID)) { + intel->is_g4x = true; + } else if (IS_945(devID)) { + intel->is_945 = true; + } + + if (intel->gen >= 5) { + intel->needs_ff_sync = true; + } + + intel->has_separate_stencil = intel->intelScreen->hw_has_separate_stencil; + intel->must_use_separate_stencil = intel->intelScreen->hw_must_use_separate_stencil; + intel->has_hiz = intel->gen >= 6; + intel->has_llc = intel->intelScreen->hw_has_llc; + intel->has_swizzling = intel->intelScreen->hw_has_swizzling; + + memset(&ctx->TextureFormatSupported, + 0, sizeof(ctx->TextureFormatSupported)); + + driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache, + sPriv->myNum, (intel->gen >= 4) ? "i965" : "i915"); + if (intel->gen < 4) + intel->maxBatchSize = 4096; + else + intel->maxBatchSize = BATCH_SZ; + + /* Estimate the size of the mappable aperture into the GTT. There's an + * ioctl to get the whole GTT size, but not one to get the mappable subset. + * It turns out it's basically always 256MB, though some ancient hardware + * was smaller. + */ + uint32_t gtt_size = 256 * 1024 * 1024; + if (intel->gen == 2) + gtt_size = 128 * 1024 * 1024; + + /* We don't want to map two objects such that a memcpy between them would + * just fault one mapping in and then the other over and over forever. So + * we would need to divide the GTT size by 2. Additionally, some GTT is + * taken up by things like the framebuffer and the ringbuffer and such, so + * be more conservative. + */ + intel->max_gtt_map_object_size = gtt_size / 4; + + intel->bufmgr = intelScreen->bufmgr; + + bo_reuse_mode = driQueryOptioni(&intel->optionCache, "bo_reuse"); + switch (bo_reuse_mode) { + case DRI_CONF_BO_REUSE_DISABLED: + break; + case DRI_CONF_BO_REUSE_ALL: + intel_bufmgr_gem_enable_reuse(intel->bufmgr); + break; + } + + ctx->Const.MinLineWidth = 1.0; + ctx->Const.MinLineWidthAA = 1.0; + ctx->Const.MaxLineWidth = 5.0; + ctx->Const.MaxLineWidthAA = 5.0; + ctx->Const.LineWidthGranularity = 0.5; + + ctx->Const.MinPointSize = 1.0; + ctx->Const.MinPointSizeAA = 1.0; + ctx->Const.MaxPointSize = 255.0; + ctx->Const.MaxPointSizeAA = 3.0; + ctx->Const.PointSizeGranularity = 1.0; + + if (intel->gen >= 6) + ctx->Const.MaxClipPlanes = 8; + + ctx->Const.StripTextureBorder = GL_TRUE; + + /* reinitialize the context point state. + * It depend on constants in __struct gl_contextRec::Const + */ + _mesa_init_point(ctx); + + if (intel->gen >= 4) { + ctx->Const.MaxRenderbufferSize = 8192; + } else { + ctx->Const.MaxRenderbufferSize = 2048; + } + + /* Initialize the software rasterizer and helper modules. + * + * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for + * software fallbacks (which we have to support on legacy GL to do weird + * glDrawPixels(), glBitmap(), and other functions). + */ + if (intel->gen <= 3 || api != API_OPENGL_CORE) { + _swrast_CreateContext(ctx); + } + + _vbo_CreateContext(ctx); + if (ctx->swrast_context) { + _tnl_CreateContext(ctx); + _swsetup_CreateContext(ctx); + + /* Configure swrast to match hardware characteristics: */ + _swrast_allow_pixel_fog(ctx, false); + _swrast_allow_vertex_fog(ctx, true); + } + + _mesa_meta_init(ctx); + + intel->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24; + intel->hw_stipple = 1; + + intel->RenderIndex = ~0; + + intelInitExtensions(ctx); + + INTEL_DEBUG = driParseDebugString(getenv("INTEL_DEBUG"), debug_control); + if (INTEL_DEBUG & DEBUG_BUFMGR) + dri_bufmgr_set_debug(intel->bufmgr, true); + if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && intel->gen < 7) { + fprintf(stderr, + "shader_time debugging requires gen7 (Ivybridge) or better.\n"); + INTEL_DEBUG &= ~DEBUG_SHADER_TIME; + } + if (INTEL_DEBUG & DEBUG_PERF) + intel->perf_debug = true; + + if (INTEL_DEBUG & DEBUG_AUB) + drm_intel_bufmgr_gem_set_aub_dump(intel->bufmgr, true); + + intel_batchbuffer_init(intel); + + intel_fbo_init(intel); + + intel->use_early_z = driQueryOptionb(&intel->optionCache, "early_z"); + + if (!driQueryOptionb(&intel->optionCache, "hiz")) { + intel->has_hiz = false; + /* On gen6, you can only do separate stencil with HIZ. */ + if (intel->gen == 6) + intel->has_separate_stencil = false; + } + + intel->prim.primitive = ~0; + + /* Force all software fallbacks */ +#ifdef I915 + if (driQueryOptionb(&intel->optionCache, "no_rast")) { + fprintf(stderr, "disabling 3D rasterization\n"); + intel->no_rast = 1; + } +#endif + + if (driQueryOptionb(&intel->optionCache, "always_flush_batch")) { + fprintf(stderr, "flushing batchbuffer before/after each draw call\n"); + intel->always_flush_batch = 1; + } + + if (driQueryOptionb(&intel->optionCache, "always_flush_cache")) { + fprintf(stderr, "flushing GPU caches before/after each draw call\n"); + intel->always_flush_cache = 1; + } + + if (driQueryOptionb(&intel->optionCache, "disable_throttling")) { + fprintf(stderr, "disabling flush throttling\n"); + intel->disable_throttling = 1; + } + + return true; +} + +void +intelDestroyContext(__DRIcontext * driContextPriv) +{ + struct intel_context *intel = + (struct intel_context *) driContextPriv->driverPrivate; + struct gl_context *ctx = &intel->ctx; + + assert(intel); /* should never be null */ + if (intel) { + INTEL_FIREVERTICES(intel); + + /* Dump a final BMP in case the application doesn't call SwapBuffers */ + if (INTEL_DEBUG & DEBUG_AUB) { + intel_batchbuffer_flush(intel); + aub_dump_bmp(&intel->ctx); + } + + _mesa_meta_free(&intel->ctx); + + intel->vtbl.destroy(intel); + + if (ctx->swrast_context) { + _swsetup_DestroyContext(&intel->ctx); + _tnl_DestroyContext(&intel->ctx); + } + _vbo_DestroyContext(&intel->ctx); + + if (ctx->swrast_context) + _swrast_DestroyContext(&intel->ctx); + intel->Fallback = 0x0; /* don't call _swrast_Flush later */ + + intel_batchbuffer_free(intel); + + free(intel->prim.vb); + intel->prim.vb = NULL; + drm_intel_bo_unreference(intel->prim.vb_bo); + intel->prim.vb_bo = NULL; + drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); + intel->first_post_swapbuffers_batch = NULL; + + driDestroyOptionCache(&intel->optionCache); + + /* free the Mesa context */ + _mesa_free_context_data(&intel->ctx); + + _math_matrix_dtr(&intel->ViewportMatrix); + + ralloc_free(intel); + driContextPriv->driverPrivate = NULL; + } +} + +GLboolean +intelUnbindContext(__DRIcontext * driContextPriv) +{ + /* Unset current context and dispath table */ + _mesa_make_current(NULL, NULL, NULL); + + return true; +} + +/** + * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior + * on window system framebuffers. + * + * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if + * your renderbuffer can do sRGB encode, and you can flip a switch that does + * sRGB encode if the renderbuffer can handle it. You can ask specifically + * for a visual where you're guaranteed to be capable, but it turns out that + * everyone just makes all their ARGB8888 visuals capable and doesn't offer + * incapable ones, becuase there's no difference between the two in resources + * used. Applications thus get built that accidentally rely on the default + * visual choice being sRGB, so we make ours sRGB capable. Everything sounds + * great... + * + * But for GLES2/3, they decided that it was silly to not turn on sRGB encode + * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent. + * So they removed the enable knob and made it "if the renderbuffer is sRGB + * capable, do sRGB encode". Then, for your window system renderbuffers, you + * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals + * and get no sRGB encode (assuming that both kinds of visual are available). + * Thus our choice to support sRGB by default on our visuals for desktop would + * result in broken rendering of GLES apps that aren't expecting sRGB encode. + * + * Unfortunately, renderbuffer setup happens before a context is created. So + * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3 + * context (without an sRGB visual, though we don't have sRGB visuals exposed + * yet), we go turn that back off before anyone finds out. + */ +static void +intel_gles3_srgb_workaround(struct intel_context *intel, + struct gl_framebuffer *fb) +{ + struct gl_context *ctx = &intel->ctx; + + if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable) + return; + + /* Some day when we support the sRGB capable bit on visuals available for + * GLES, we'll need to respect that and not disable things here. + */ + fb->Visual.sRGBCapable = false; + for (int i = 0; i < BUFFER_COUNT; i++) { + if (fb->Attachment[i].Renderbuffer && + fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_SARGB8) { + fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_ARGB8888; + } + } +} + +GLboolean +intelMakeCurrent(__DRIcontext * driContextPriv, + __DRIdrawable * driDrawPriv, + __DRIdrawable * driReadPriv) +{ + struct intel_context *intel; + GET_CURRENT_CONTEXT(curCtx); + + if (driContextPriv) + intel = (struct intel_context *) driContextPriv->driverPrivate; + else + intel = NULL; + + /* According to the glXMakeCurrent() man page: "Pending commands to + * the previous context, if any, are flushed before it is released." + * But only flush if we're actually changing contexts. + */ + if (intel_context(curCtx) && intel_context(curCtx) != intel) { + _mesa_flush(curCtx); + } + + if (driContextPriv) { + struct gl_context *ctx = &intel->ctx; + struct gl_framebuffer *fb, *readFb; + + if (driDrawPriv == NULL && driReadPriv == NULL) { + fb = _mesa_get_incomplete_framebuffer(); + readFb = _mesa_get_incomplete_framebuffer(); + } else { + fb = driDrawPriv->driverPrivate; + readFb = driReadPriv->driverPrivate; + driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1; + driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1; + } + + intel_prepare_render(intel); + _mesa_make_current(ctx, fb, readFb); + + intel_gles3_srgb_workaround(intel, ctx->WinSysDrawBuffer); + intel_gles3_srgb_workaround(intel, ctx->WinSysReadBuffer); + + /* We do this in intel_prepare_render() too, but intel->ctx.DrawBuffer + * is NULL at that point. We can't call _mesa_makecurrent() + * first, since we need the buffer size for the initial + * viewport. So just call intel_draw_buffer() again here. */ + intel_draw_buffer(ctx); + } + else { + _mesa_make_current(NULL, NULL, NULL); + } + + return true; +} + +/** + * \brief Query DRI2 to obtain a DRIdrawable's buffers. + * + * To determine which DRI buffers to request, examine the renderbuffers + * attached to the drawable's framebuffer. Then request the buffers with + * DRI2GetBuffers() or DRI2GetBuffersWithFormat(). + * + * This is called from intel_update_renderbuffers(). + * + * \param drawable Drawable whose buffers are queried. + * \param buffers [out] List of buffers returned by DRI2 query. + * \param buffer_count [out] Number of buffers returned. + * + * \see intel_update_renderbuffers() + * \see DRI2GetBuffers() + * \see DRI2GetBuffersWithFormat() + */ +static void +intel_query_dri2_buffers(struct intel_context *intel, + __DRIdrawable *drawable, + __DRIbuffer **buffers, + int *buffer_count) +{ + __DRIscreen *screen = intel->intelScreen->driScrnPriv; + struct gl_framebuffer *fb = drawable->driverPrivate; + int i = 0; + unsigned attachments[8]; + + struct intel_renderbuffer *front_rb; + struct intel_renderbuffer *back_rb; + + front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); + back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); + + memset(attachments, 0, sizeof(attachments)); + if ((intel->is_front_buffer_rendering || + intel->is_front_buffer_reading || + !back_rb) && front_rb) { + /* If a fake front buffer is in use, then querying for + * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from + * the real front buffer to the fake front buffer. So before doing the + * query, we need to make sure all the pending drawing has landed in the + * real front buffer. + */ + intel_flush(&intel->ctx); + intel_flush_front(&intel->ctx); + + attachments[i++] = __DRI_BUFFER_FRONT_LEFT; + attachments[i++] = intel_bits_per_pixel(front_rb); + } else if (front_rb && intel->front_buffer_dirty) { + /* We have pending front buffer rendering, but we aren't querying for a + * front buffer. If the front buffer we have is a fake front buffer, + * the X server is going to throw it away when it processes the query. + * So before doing the query, make sure all the pending drawing has + * landed in the real front buffer. + */ + intel_flush(&intel->ctx); + intel_flush_front(&intel->ctx); + } + + if (back_rb) { + attachments[i++] = __DRI_BUFFER_BACK_LEFT; + attachments[i++] = intel_bits_per_pixel(back_rb); + } + + assert(i <= ARRAY_SIZE(attachments)); + + *buffers = screen->dri2.loader->getBuffersWithFormat(drawable, + &drawable->w, + &drawable->h, + attachments, i / 2, + buffer_count, + drawable->loaderPrivate); +} + +/** + * \brief Assign a DRI buffer's DRM region to a renderbuffer. + * + * This is called from intel_update_renderbuffers(). + * + * \par Note: + * DRI buffers whose attachment point is DRI2BufferStencil or + * DRI2BufferDepthStencil are handled as special cases. + * + * \param buffer_name is a human readable name, such as "dri2 front buffer", + * that is passed to intel_region_alloc_for_handle(). + * + * \see intel_update_renderbuffers() + * \see intel_region_alloc_for_handle() + */ +static void +intel_process_dri2_buffer(struct intel_context *intel, + __DRIdrawable *drawable, + __DRIbuffer *buffer, + struct intel_renderbuffer *rb, + const char *buffer_name) +{ + struct intel_region *region = NULL; + + if (!rb) + return; + + unsigned num_samples = rb->Base.Base.NumSamples; + + /* We try to avoid closing and reopening the same BO name, because the first + * use of a mapping of the buffer involves a bunch of page faulting which is + * moderately expensive. + */ + if (num_samples == 0) { + if (rb->mt && + rb->mt->region && + rb->mt->region->name == buffer->name) + return; + } else { + if (rb->mt && + rb->mt->singlesample_mt && + rb->mt->singlesample_mt->region && + rb->mt->singlesample_mt->region->name == buffer->name) + return; + } + + if (unlikely(INTEL_DEBUG & DEBUG_DRI)) { + fprintf(stderr, + "attaching buffer %d, at %d, cpp %d, pitch %d\n", + buffer->name, buffer->attachment, + buffer->cpp, buffer->pitch); + } + + intel_miptree_release(&rb->mt); + region = intel_region_alloc_for_handle(intel->intelScreen, + buffer->cpp, + drawable->w, + drawable->h, + buffer->pitch, + buffer->name, + buffer_name); + if (!region) + return; + + rb->mt = intel_miptree_create_for_dri2_buffer(intel, + buffer->attachment, + intel_rb_format(rb), + num_samples, + region); + intel_region_release(®ion); +} diff --git a/src/mesa/drivers/dri/i915/intel_context.h b/src/mesa/drivers/dri/i915/intel_context.h new file mode 100644 index 0000000..eac65ba --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_context.h @@ -0,0 +1,643 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTELCONTEXT_INC +#define INTELCONTEXT_INC + + +#include +#include +#include "main/mtypes.h" +#include "main/mm.h" + +#ifdef __cplusplus +extern "C" { + /* Evil hack for using libdrm in a c++ compiler. */ + #define virtual virt +#endif + +#include "drm.h" +#include "intel_bufmgr.h" + +#include "intel_screen.h" +#include "intel_tex_obj.h" +#include "i915_drm.h" + +#ifdef __cplusplus + #undef virtual +#endif + +#include "tnl/t_vertex.h" + +#define TAG(x) intel##x +#include "tnl_dd/t_dd_vertex.h" +#undef TAG + +#define DV_PF_555 (1<<8) +#define DV_PF_565 (2<<8) +#define DV_PF_8888 (3<<8) +#define DV_PF_4444 (8<<8) +#define DV_PF_1555 (9<<8) + +struct intel_region; +struct intel_context; + +typedef void (*intel_tri_func) (struct intel_context *, intelVertex *, + intelVertex *, intelVertex *); +typedef void (*intel_line_func) (struct intel_context *, intelVertex *, + intelVertex *); +typedef void (*intel_point_func) (struct intel_context *, intelVertex *); + +/** + * Bits for intel->Fallback field + */ +/*@{*/ +#define INTEL_FALLBACK_DRAW_BUFFER 0x1 +#define INTEL_FALLBACK_READ_BUFFER 0x2 +#define INTEL_FALLBACK_DEPTH_BUFFER 0x4 +#define INTEL_FALLBACK_STENCIL_BUFFER 0x8 +#define INTEL_FALLBACK_USER 0x10 +#define INTEL_FALLBACK_RENDERMODE 0x20 +#define INTEL_FALLBACK_TEXTURE 0x40 +#define INTEL_FALLBACK_DRIVER 0x1000 /**< first for drivers */ +/*@}*/ + +extern void intelFallback(struct intel_context *intel, GLbitfield bit, + bool mode); +#define FALLBACK( intel, bit, mode ) intelFallback( intel, bit, mode ) + + +#define INTEL_WRITE_PART 0x1 +#define INTEL_WRITE_FULL 0x2 +#define INTEL_READ 0x4 + +#define INTEL_MAX_FIXUP 64 + +#ifndef likely +#ifdef __GNUC__ +#define likely(expr) (__builtin_expect(expr, 1)) +#define unlikely(expr) (__builtin_expect(expr, 0)) +#else +#define likely(expr) (expr) +#define unlikely(expr) (expr) +#endif +#endif + +struct intel_sync_object { + struct gl_sync_object Base; + + /** Batch associated with this sync object */ + drm_intel_bo *bo; +}; + +struct brw_context; + +struct intel_batchbuffer { + /** Current batchbuffer being queued up. */ + drm_intel_bo *bo; + /** Last BO submitted to the hardware. Used for glFinish(). */ + drm_intel_bo *last_bo; + /** BO for post-sync nonzero writes for gen6 workaround. */ + drm_intel_bo *workaround_bo; + bool need_workaround_flush; + + struct cached_batch_item *cached_items; + + uint16_t emit, total; + uint16_t used, reserved_space; + uint32_t *map; + uint32_t *cpu_map; +#define BATCH_SZ (8192*sizeof(uint32_t)) + + uint32_t state_batch_offset; + bool is_blit; + bool needs_sol_reset; + + struct { + uint16_t used; + int reloc_count; + } saved; +}; + +/** + * intel_context is derived from Mesa's context class: struct gl_context. + */ +struct intel_context +{ + struct gl_context ctx; /**< base class, must be first field */ + + struct + { + void (*destroy) (struct intel_context * intel); + void (*emit_state) (struct intel_context * intel); + void (*finish_batch) (struct intel_context * intel); + void (*new_batch) (struct intel_context * intel); + void (*emit_invarient_state) (struct intel_context * intel); + void (*update_texture_state) (struct intel_context * intel); + + void (*render_start) (struct intel_context * intel); + void (*render_prevalidate) (struct intel_context * intel); + void (*set_draw_region) (struct intel_context * intel, + struct intel_region * draw_regions[], + struct intel_region * depth_region, + GLuint num_regions); + void (*update_draw_buffer)(struct intel_context *intel); + + void (*reduced_primitive_state) (struct intel_context * intel, + GLenum rprim); + + bool (*check_vertex_size) (struct intel_context * intel, + GLuint expected); + void (*invalidate_state) (struct intel_context *intel, + GLuint new_state); + + void (*assert_not_dirty) (struct intel_context *intel); + + void (*debug_batch)(struct intel_context *intel); + void (*annotate_aub)(struct intel_context *intel); + bool (*render_target_supported)(struct intel_context *intel, + struct gl_renderbuffer *rb); + + /** Can HiZ be enabled on a depthbuffer of the given format? */ + bool (*is_hiz_depth_format)(struct intel_context *intel, + gl_format format); + + /** + * Surface state operations (i965+ only) + * \{ + */ + void (*update_texture_surface)(struct gl_context *ctx, + unsigned unit, + uint32_t *binding_table, + unsigned surf_index); + void (*update_renderbuffer_surface)(struct brw_context *brw, + struct gl_renderbuffer *rb, + bool layered, + unsigned unit); + void (*update_null_renderbuffer_surface)(struct brw_context *brw, + unsigned unit); + void (*create_constant_surface)(struct brw_context *brw, + drm_intel_bo *bo, + uint32_t offset, + uint32_t size, + uint32_t *out_offset, + bool dword_pitch); + /** \} */ + + /** + * Send the appropriate state packets to configure depth, stencil, and + * HiZ buffers (i965+ only) + */ + void (*emit_depth_stencil_hiz)(struct brw_context *brw, + struct intel_mipmap_tree *depth_mt, + uint32_t depth_offset, + uint32_t depthbuffer_format, + uint32_t depth_surface_type, + struct intel_mipmap_tree *stencil_mt, + bool hiz, bool separate_stencil, + uint32_t width, uint32_t height, + uint32_t tile_x, uint32_t tile_y); + + } vtbl; + + GLbitfield Fallback; /**< mask of INTEL_FALLBACK_x bits */ + GLuint NewGLState; + + dri_bufmgr *bufmgr; + unsigned int maxBatchSize; + + /** + * Generation number of the hardware: 2 is 8xx, 3 is 9xx pre-965, 4 is 965. + */ + int gen; + int gt; + bool needs_ff_sync; + bool is_haswell; + bool is_baytrail; + bool is_g4x; + bool is_945; + bool has_separate_stencil; + bool must_use_separate_stencil; + bool has_hiz; + bool has_llc; + bool has_swizzling; + + int urb_size; + + drm_intel_context *hw_ctx; + + struct intel_batchbuffer batch; + + drm_intel_bo *first_post_swapbuffers_batch; + bool need_throttle; + bool no_batch_wrap; + bool tnl_pipeline_running; /**< Set while i915's _tnl_run_pipeline. */ + + /** + * Set if we're either a debug context or the INTEL_DEBUG=perf environment + * variable is set, this is the flag indicating to do expensive work that + * might lead to a perf_debug() call. + */ + bool perf_debug; + + struct + { + GLuint id; + uint32_t start_ptr; /**< for i8xx */ + uint32_t primitive; /**< Current hardware primitive type */ + void (*flush) (struct intel_context *); + drm_intel_bo *vb_bo; + uint8_t *vb; + unsigned int start_offset; /**< Byte offset of primitive sequence */ + unsigned int current_offset; /**< Byte offset of next vertex */ + unsigned int count; /**< Number of vertices in current primitive */ + } prim; + + struct { + drm_intel_bo *bo; + GLuint offset; + uint32_t buffer_len; + uint32_t buffer_offset; + char buffer[4096]; + } upload; + + uint32_t max_gtt_map_object_size; + + GLuint stats_wm; + + /* Offsets of fields within the current vertex: + */ + GLuint coloroffset; + GLuint specoffset; + GLuint wpos_offset; + + struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX]; + GLuint vertex_attr_count; + + GLfloat polygon_offset_scale; /* dependent on depth_scale, bpp */ + + bool hw_stencil; + bool hw_stipple; + bool no_rast; + bool always_flush_batch; + bool always_flush_cache; + bool disable_throttling; + + /* State for intelvb.c and inteltris.c. + */ + GLuint RenderIndex; + GLmatrix ViewportMatrix; + GLenum render_primitive; + GLenum reduced_primitive; /*< Only gen < 6 */ + GLuint vertex_size; + GLubyte *verts; /* points to tnl->clipspace.vertex_buf */ + + /* Fallback rasterization functions + */ + intel_point_func draw_point; + intel_line_func draw_line; + intel_tri_func draw_tri; + + /** + * Set if rendering has occured to the drawable's front buffer. + * + * This is used in the DRI2 case to detect that glFlush should also copy + * the contents of the fake front buffer to the real front buffer. + */ + bool front_buffer_dirty; + + /** + * Track whether front-buffer rendering is currently enabled + * + * A separate flag is used to track this in order to support MRT more + * easily. + */ + bool is_front_buffer_rendering; + /** + * Track whether front-buffer is the current read target. + * + * This is closely associated with is_front_buffer_rendering, but may + * be set separately. The DRI2 fake front buffer must be referenced + * either way. + */ + bool is_front_buffer_reading; + + bool use_early_z; + + int driFd; + + __DRIcontext *driContext; + struct intel_screen *intelScreen; + void (*saved_viewport)(struct gl_context * ctx, + GLint x, GLint y, GLsizei width, GLsizei height); + + /** + * Configuration cache + */ + driOptionCache optionCache; +}; + +extern char *__progname; + + +#define SUBPIXEL_X 0.125 +#define SUBPIXEL_Y 0.125 + +/** + * Align a value down to an alignment value + * + * If \c value is not already aligned to the requested alignment value, it + * will be rounded down. + * + * \param value Value to be rounded + * \param alignment Alignment value to be used. This must be a power of two. + * + * \sa ALIGN() + */ +#define ROUND_DOWN_TO(value, alignment) ((value) & ~(alignment - 1)) + +static INLINE uint32_t +U_FIXED(float value, uint32_t frac_bits) +{ + value *= (1 << frac_bits); + return value < 0 ? 0 : value; +} + +static INLINE uint32_t +S_FIXED(float value, uint32_t frac_bits) +{ + return value * (1 << frac_bits); +} + +#define INTEL_FIREVERTICES(intel) \ +do { \ + if ((intel)->prim.flush) \ + (intel)->prim.flush(intel); \ +} while (0) + +/* ================================================================ + * From linux kernel i386 header files, copes with odd sizes better + * than COPY_DWORDS would: + * XXX Put this in src/mesa/main/imports.h ??? + */ +#if defined(i386) || defined(__i386__) +static INLINE void * __memcpy(void * to, const void * from, size_t n) +{ + int d0, d1, d2; + __asm__ __volatile__( + "rep ; movsl\n\t" + "testb $2,%b4\n\t" + "je 1f\n\t" + "movsw\n" + "1:\ttestb $1,%b4\n\t" + "je 2f\n\t" + "movsb\n" + "2:" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from) + : "memory"); + return (to); +} +#else +#define __memcpy(a,b,c) memcpy(a,b,c) +#endif + + +/* ================================================================ + * Debugging: + */ +extern int INTEL_DEBUG; + +#define DEBUG_TEXTURE 0x1 +#define DEBUG_STATE 0x2 +#define DEBUG_IOCTL 0x4 +#define DEBUG_BLIT 0x8 +#define DEBUG_MIPTREE 0x10 +#define DEBUG_PERF 0x20 +#define DEBUG_BATCH 0x80 +#define DEBUG_PIXEL 0x100 +#define DEBUG_BUFMGR 0x200 +#define DEBUG_REGION 0x400 +#define DEBUG_FBO 0x800 +#define DEBUG_GS 0x1000 +#define DEBUG_SYNC 0x2000 +#define DEBUG_PRIMS 0x4000 +#define DEBUG_VERTS 0x8000 +#define DEBUG_DRI 0x10000 +#define DEBUG_SF 0x20000 +#define DEBUG_STATS 0x100000 +#define DEBUG_WM 0x400000 +#define DEBUG_URB 0x800000 +#define DEBUG_VS 0x1000000 +#define DEBUG_CLIP 0x2000000 +#define DEBUG_AUB 0x4000000 +#define DEBUG_SHADER_TIME 0x8000000 +#define DEBUG_BLORP 0x10000000 +#define DEBUG_NO16 0x20000000 + +#ifdef HAVE_ANDROID_PLATFORM +#define LOG_TAG "INTEL-MESA" +#include +#ifndef ALOGW +#define ALOGW LOGW +#endif +#define dbg_printf(...) ALOGW(__VA_ARGS__) +#else +#define dbg_printf(...) printf(__VA_ARGS__) +#endif /* HAVE_ANDROID_PLATFORM */ + +#define DBG(...) do { \ + if (unlikely(INTEL_DEBUG & FILE_DEBUG_FLAG)) \ + dbg_printf(__VA_ARGS__); \ +} while(0) + +#define perf_debug(...) do { \ + static GLuint msg_id = 0; \ + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) \ + dbg_printf(__VA_ARGS__); \ + if (intel->perf_debug) \ + _mesa_gl_debug(&intel->ctx, &msg_id, \ + MESA_DEBUG_TYPE_PERFORMANCE, \ + MESA_DEBUG_SEVERITY_MEDIUM, \ + __VA_ARGS__); \ +} while(0) + +#define WARN_ONCE(cond, fmt...) do { \ + if (unlikely(cond)) { \ + static bool _warned = false; \ + static GLuint msg_id = 0; \ + if (!_warned) { \ + fprintf(stderr, "WARNING: "); \ + fprintf(stderr, fmt); \ + _warned = true; \ + \ + _mesa_gl_debug(ctx, &msg_id, \ + MESA_DEBUG_TYPE_OTHER, \ + MESA_DEBUG_SEVERITY_HIGH, fmt); \ + } \ + } \ +} while (0) + +#define PCI_CHIP_845_G 0x2562 +#define PCI_CHIP_I830_M 0x3577 +#define PCI_CHIP_I855_GM 0x3582 +#define PCI_CHIP_I865_G 0x2572 +#define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_I915_GM 0x2592 +#define PCI_CHIP_I945_G 0x2772 +#define PCI_CHIP_I945_GM 0x27A2 +#define PCI_CHIP_I945_GME 0x27AE +#define PCI_CHIP_G33_G 0x29C2 +#define PCI_CHIP_Q35_G 0x29B2 +#define PCI_CHIP_Q33_G 0x29D2 + + +/* ================================================================ + * intel_context.c: + */ + +extern bool intelInitContext(struct intel_context *intel, + int api, + unsigned major_version, + unsigned minor_version, + const struct gl_config * mesaVis, + __DRIcontext * driContextPriv, + void *sharedContextPrivate, + struct dd_function_table *functions, + unsigned *dri_ctx_error); + +extern void intelFinish(struct gl_context * ctx); +extern void intel_flush_rendering_to_batch(struct gl_context *ctx); +extern void _intel_flush(struct gl_context * ctx, const char *file, int line); + +#define intel_flush(ctx) _intel_flush(ctx, __FILE__, __LINE__) + +extern void intelInitDriverFunctions(struct dd_function_table *functions); + +void intel_init_syncobj_functions(struct dd_function_table *functions); + + +/* ================================================================ + * intel_state.c: + */ + +#define COMPAREFUNC_ALWAYS 0 +#define COMPAREFUNC_NEVER 0x1 +#define COMPAREFUNC_LESS 0x2 +#define COMPAREFUNC_EQUAL 0x3 +#define COMPAREFUNC_LEQUAL 0x4 +#define COMPAREFUNC_GREATER 0x5 +#define COMPAREFUNC_NOTEQUAL 0x6 +#define COMPAREFUNC_GEQUAL 0x7 + +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 0x1 +#define STENCILOP_REPLACE 0x2 +#define STENCILOP_INCRSAT 0x3 +#define STENCILOP_DECRSAT 0x4 +#define STENCILOP_INCR 0x5 +#define STENCILOP_DECR 0x6 +#define STENCILOP_INVERT 0x7 + +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 0x1 +#define LOGICOP_AND_INV 0x2 +#define LOGICOP_COPY_INV 0x3 +#define LOGICOP_AND_RVRSE 0x4 +#define LOGICOP_INV 0x5 +#define LOGICOP_XOR 0x6 +#define LOGICOP_NAND 0x7 +#define LOGICOP_AND 0x8 +#define LOGICOP_EQUIV 0x9 +#define LOGICOP_NOOP 0xa +#define LOGICOP_OR_INV 0xb +#define LOGICOP_COPY 0xc +#define LOGICOP_OR_RVRSE 0xd +#define LOGICOP_OR 0xe +#define LOGICOP_SET 0xf + +#define BLENDFACT_ZERO 0x01 +#define BLENDFACT_ONE 0x02 +#define BLENDFACT_SRC_COLR 0x03 +#define BLENDFACT_INV_SRC_COLR 0x04 +#define BLENDFACT_SRC_ALPHA 0x05 +#define BLENDFACT_INV_SRC_ALPHA 0x06 +#define BLENDFACT_DST_ALPHA 0x07 +#define BLENDFACT_INV_DST_ALPHA 0x08 +#define BLENDFACT_DST_COLR 0x09 +#define BLENDFACT_INV_DST_COLR 0x0a +#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b +#define BLENDFACT_CONST_COLOR 0x0c +#define BLENDFACT_INV_CONST_COLOR 0x0d +#define BLENDFACT_CONST_ALPHA 0x0e +#define BLENDFACT_INV_CONST_ALPHA 0x0f +#define BLENDFACT_MASK 0x0f + +enum { + DRI_CONF_BO_REUSE_DISABLED, + DRI_CONF_BO_REUSE_ALL +}; + +extern int intel_translate_shadow_compare_func(GLenum func); +extern int intel_translate_compare_func(GLenum func); +extern int intel_translate_stencil_op(GLenum op); +extern int intel_translate_blend_factor(GLenum factor); +extern int intel_translate_logic_op(GLenum opcode); + +void intel_update_renderbuffers(__DRIcontext *context, + __DRIdrawable *drawable); +void intel_prepare_render(struct intel_context *intel); + +void +intel_resolve_for_dri2_flush(struct intel_context *intel, + __DRIdrawable *drawable); + +void i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region, + uint32_t buffer_id); +void intel_init_texture_formats(struct gl_context *ctx); + +/*====================================================================== + * Inline conversion functions. + * These are better-typed than the macros used previously: + */ +static INLINE struct intel_context * +intel_context(struct gl_context * ctx) +{ + return (struct intel_context *) ctx; +} + +static INLINE bool +is_power_of_two(uint32_t value) +{ + return (value & (value - 1)) == 0; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/mesa/drivers/dri/i915/intel_extensions.c b/src/mesa/drivers/dri/i915/intel_extensions.c deleted file mode 120000 index a2f3e8c..0000000 --- a/src/mesa/drivers/dri/i915/intel_extensions.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_extensions.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_extensions.c b/src/mesa/drivers/dri/i915/intel_extensions.c new file mode 100644 index 0000000..5cb2fa3 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_extensions.c @@ -0,0 +1,188 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/version.h" + +#include "intel_chipset.h" +#include "intel_context.h" +#include "intel_extensions.h" +#include "intel_reg.h" +#include "utils.h" + +/** + * Initializes potential list of extensions if ctx == NULL, or actually enables + * extensions for a context. + */ +void +intelInitExtensions(struct gl_context *ctx) +{ + struct intel_context *intel = intel_context(ctx); + + ctx->Extensions.ARB_draw_elements_base_vertex = true; + ctx->Extensions.ARB_explicit_attrib_location = true; + ctx->Extensions.ARB_framebuffer_object = true; + ctx->Extensions.ARB_half_float_pixel = true; + ctx->Extensions.ARB_internalformat_query = true; + ctx->Extensions.ARB_map_buffer_range = true; + ctx->Extensions.ARB_point_sprite = true; + ctx->Extensions.ARB_shader_objects = true; + ctx->Extensions.ARB_shading_language_100 = true; + ctx->Extensions.ARB_sync = true; + ctx->Extensions.ARB_texture_border_clamp = true; + ctx->Extensions.ARB_texture_cube_map = true; + ctx->Extensions.ARB_texture_env_combine = true; + ctx->Extensions.ARB_texture_env_crossbar = true; + ctx->Extensions.ARB_texture_env_dot3 = true; + ctx->Extensions.ARB_texture_storage = true; + ctx->Extensions.ARB_vertex_program = true; + ctx->Extensions.ARB_vertex_shader = true; + ctx->Extensions.EXT_blend_color = true; + ctx->Extensions.EXT_blend_equation_separate = true; + ctx->Extensions.EXT_blend_func_separate = true; + ctx->Extensions.EXT_blend_minmax = true; + ctx->Extensions.EXT_framebuffer_blit = true; + ctx->Extensions.EXT_framebuffer_object = true; + ctx->Extensions.EXT_fog_coord = true; + ctx->Extensions.EXT_gpu_program_parameters = true; + ctx->Extensions.EXT_packed_depth_stencil = true; + ctx->Extensions.EXT_pixel_buffer_object = true; + ctx->Extensions.EXT_point_parameters = true; + ctx->Extensions.EXT_provoking_vertex = true; + ctx->Extensions.EXT_secondary_color = true; + ctx->Extensions.EXT_separate_shader_objects = true; + ctx->Extensions.EXT_texture_env_dot3 = true; + ctx->Extensions.EXT_texture_filter_anisotropic = true; + ctx->Extensions.APPLE_object_purgeable = true; + ctx->Extensions.MESA_pack_invert = true; + ctx->Extensions.MESA_ycbcr_texture = true; + ctx->Extensions.NV_blend_square = true; + ctx->Extensions.NV_texture_rectangle = true; + ctx->Extensions.TDFX_texture_compression_FXT1 = true; + ctx->Extensions.OES_EGL_image = true; + ctx->Extensions.OES_draw_texture = true; + + if (intel->gen >= 6) + ctx->Const.GLSLVersion = 140; + else + ctx->Const.GLSLVersion = 120; + _mesa_override_glsl_version(ctx); + + if (intel->gen >= 6) { + ctx->Extensions.EXT_framebuffer_multisample = true; + ctx->Extensions.EXT_transform_feedback = true; + ctx->Extensions.ARB_blend_func_extended = !driQueryOptionb(&intel->optionCache, "disable_blend_func_extended"); + ctx->Extensions.ARB_draw_buffers_blend = true; + ctx->Extensions.ARB_ES3_compatibility = true; + ctx->Extensions.ARB_uniform_buffer_object = true; + ctx->Extensions.ARB_texture_buffer_object = true; + ctx->Extensions.ARB_texture_buffer_object_rgb32 = true; + ctx->Extensions.ARB_texture_cube_map_array = true; + ctx->Extensions.OES_depth_texture_cube_map = true; + ctx->Extensions.ARB_shading_language_packing = true; + ctx->Extensions.ARB_texture_multisample = true; + ctx->Extensions.ARB_texture_storage_multisample = true; + } + + if (intel->gen >= 5) { + ctx->Extensions.ARB_texture_query_lod = true; + ctx->Extensions.EXT_timer_query = true; + } + + if (intel->gen >= 6) { + uint64_t dummy; + /* Test if the kernel has the ioctl. */ + if (drm_intel_reg_read(intel->bufmgr, TIMESTAMP, &dummy) == 0) + ctx->Extensions.ARB_timer_query = true; + } + + if (intel->gen >= 4) { + if (ctx->API == API_OPENGL_CORE) + ctx->Extensions.ARB_base_instance = true; + if (ctx->API != API_OPENGL_CORE) + ctx->Extensions.ARB_color_buffer_float = true; + ctx->Extensions.ARB_depth_buffer_float = true; + ctx->Extensions.ARB_depth_clamp = true; + ctx->Extensions.ARB_draw_instanced = true; + ctx->Extensions.ARB_instanced_arrays = true; + ctx->Extensions.ARB_fragment_coord_conventions = true; + ctx->Extensions.ARB_fragment_program_shadow = true; + ctx->Extensions.ARB_fragment_shader = true; + ctx->Extensions.ARB_half_float_vertex = true; + ctx->Extensions.ARB_occlusion_query = true; + ctx->Extensions.ARB_occlusion_query2 = true; + ctx->Extensions.ARB_point_sprite = true; + ctx->Extensions.ARB_seamless_cube_map = true; + ctx->Extensions.ARB_shader_bit_encoding = true; + ctx->Extensions.ARB_shader_texture_lod = true; + ctx->Extensions.ARB_texture_float = true; + ctx->Extensions.EXT_texture_shared_exponent = true; + ctx->Extensions.EXT_packed_float = true; + ctx->Extensions.ARB_texture_compression_rgtc = true; + ctx->Extensions.ARB_texture_rg = true; + ctx->Extensions.ARB_texture_rgb10_a2ui = true; + ctx->Extensions.ARB_vertex_type_2_10_10_10_rev = true; + ctx->Extensions.EXT_draw_buffers2 = true; + ctx->Extensions.EXT_framebuffer_sRGB = true; + ctx->Extensions.EXT_texture_array = true; + ctx->Extensions.EXT_texture_integer = true; + ctx->Extensions.EXT_texture_snorm = true; + ctx->Extensions.EXT_texture_swizzle = true; + ctx->Extensions.EXT_vertex_array_bgra = true; + ctx->Extensions.ATI_envmap_bumpmap = true; + ctx->Extensions.MESA_texture_array = true; + ctx->Extensions.NV_conditional_render = true; + ctx->Extensions.OES_compressed_ETC1_RGB8_texture = true; + ctx->Extensions.OES_standard_derivatives = true; + } + + if (intel->gen >= 3) { + ctx->Extensions.ARB_ES2_compatibility = true; + ctx->Extensions.ARB_depth_texture = true; + ctx->Extensions.ARB_fragment_program = true; + ctx->Extensions.ARB_shadow = true; + ctx->Extensions.ARB_texture_non_power_of_two = true; + ctx->Extensions.EXT_texture_sRGB = true; + ctx->Extensions.EXT_texture_sRGB_decode = true; + ctx->Extensions.EXT_shadow_funcs = true; + ctx->Extensions.EXT_stencil_two_side = true; + ctx->Extensions.ATI_separate_stencil = true; + ctx->Extensions.ATI_texture_env_combine3 = true; + ctx->Extensions.NV_texture_env_combine4 = true; + ctx->Extensions.ARB_fragment_shader = true; + ctx->Extensions.ARB_occlusion_query = true; + } + + if (intel->ctx.Mesa_DXTn + || driQueryOptionb(&intel->optionCache, "force_s3tc_enable")) + ctx->Extensions.EXT_texture_compression_s3tc = true; + + ctx->Extensions.ANGLE_texture_compression_dxt = true; + + if (intel->gen >= 4) { + ctx->Extensions.NV_primitive_restart = true; + } +} diff --git a/src/mesa/drivers/dri/i915/intel_extensions.h b/src/mesa/drivers/dri/i915/intel_extensions.h new file mode 100644 index 0000000..9991c00 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_extensions.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_EXTENSIONS_H +#define INTEL_EXTENSIONS_H + + +extern void +intelInitExtensions(struct gl_context *ctx); + +extern void +intelInitExtensionsES1(struct gl_context *ctx); + +extern void +intelInitExtensionsES2(struct gl_context *ctx); + + +#endif diff --git a/src/mesa/drivers/dri/i915/intel_fbo.c b/src/mesa/drivers/dri/i915/intel_fbo.c deleted file mode 120000 index a19f86d..0000000 --- a/src/mesa/drivers/dri/i915/intel_fbo.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_fbo.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_fbo.c b/src/mesa/drivers/dri/i915/intel_fbo.c new file mode 100644 index 0000000..d16523b --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_fbo.c @@ -0,0 +1,949 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/enums.h" +#include "main/imports.h" +#include "main/macros.h" +#include "main/mtypes.h" +#include "main/fbobject.h" +#include "main/framebuffer.h" +#include "main/renderbuffer.h" +#include "main/context.h" +#include "main/teximage.h" +#include "main/image.h" + +#include "swrast/swrast.h" +#include "drivers/common/meta.h" + +#include "intel_context.h" +#include "intel_batchbuffer.h" +#include "intel_buffers.h" +#include "intel_blit.h" +#include "intel_fbo.h" +#include "intel_mipmap_tree.h" +#include "intel_regions.h" +#include "intel_tex.h" +#ifndef I915 +#include "brw_context.h" +#endif + +#define FILE_DEBUG_FLAG DEBUG_FBO + +static struct gl_renderbuffer * +intel_new_renderbuffer(struct gl_context * ctx, GLuint name); + +struct intel_region* +intel_get_rb_region(struct gl_framebuffer *fb, GLuint attIndex) +{ + struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, attIndex); + if (irb && irb->mt) { + if (attIndex == BUFFER_STENCIL && irb->mt->stencil_mt) + return irb->mt->stencil_mt->region; + else + return irb->mt->region; + } else + return NULL; +} + +/** + * Create a new framebuffer object. + */ +static struct gl_framebuffer * +intel_new_framebuffer(struct gl_context * ctx, GLuint name) +{ + /* Only drawable state in intel_framebuffer at this time, just use Mesa's + * class + */ + return _mesa_new_framebuffer(ctx, name); +} + + +/** Called by gl_renderbuffer::Delete() */ +static void +intel_delete_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb) +{ + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + ASSERT(irb); + + intel_miptree_release(&irb->mt); + + _mesa_delete_renderbuffer(ctx, rb); +} + +/** + * \see dd_function_table::MapRenderbuffer + */ +static void +intel_map_renderbuffer(struct gl_context *ctx, + struct gl_renderbuffer *rb, + GLuint x, GLuint y, GLuint w, GLuint h, + GLbitfield mode, + GLubyte **out_map, + GLint *out_stride) +{ + struct intel_context *intel = intel_context(ctx); + struct swrast_renderbuffer *srb = (struct swrast_renderbuffer *)rb; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + void *map; + int stride; + + if (srb->Buffer) { + /* this is a malloc'd renderbuffer (accum buffer), not an irb */ + GLint bpp = _mesa_get_format_bytes(rb->Format); + GLint rowStride = srb->RowStride; + *out_map = (GLubyte *) srb->Buffer + y * rowStride + x * bpp; + *out_stride = rowStride; + return; + } + + intel_prepare_render(intel); + + /* For a window-system renderbuffer, we need to flip the mapping we receive + * upside-down. So we need to ask for a rectangle on flipped vertically, and + * we then return a pointer to the bottom of it with a negative stride. + */ + if (rb->Name == 0) { + y = rb->Height - y - h; + } + + intel_miptree_map(intel, irb->mt, irb->mt_level, irb->mt_layer, + x, y, w, h, mode, &map, &stride); + + if (rb->Name == 0) { + map += (h - 1) * stride; + stride = -stride; + } + + DBG("%s: rb %d (%s) mt mapped: (%d, %d) (%dx%d) -> %p/%d\n", + __FUNCTION__, rb->Name, _mesa_get_format_name(rb->Format), + x, y, w, h, map, stride); + + *out_map = map; + *out_stride = stride; +} + +/** + * \see dd_function_table::UnmapRenderbuffer + */ +static void +intel_unmap_renderbuffer(struct gl_context *ctx, + struct gl_renderbuffer *rb) +{ + struct intel_context *intel = intel_context(ctx); + struct swrast_renderbuffer *srb = (struct swrast_renderbuffer *)rb; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + DBG("%s: rb %d (%s)\n", __FUNCTION__, + rb->Name, _mesa_get_format_name(rb->Format)); + + if (srb->Buffer) { + /* this is a malloc'd renderbuffer (accum buffer) */ + /* nothing to do */ + return; + } + + intel_miptree_unmap(intel, irb->mt, irb->mt_level, irb->mt_layer); +} + + +/** + * Round up the requested multisample count to the next supported sample size. + */ +unsigned +intel_quantize_num_samples(struct intel_screen *intel, unsigned num_samples) +{ + switch (intel->gen) { + case 6: + /* Gen6 supports only 4x multisampling. */ + if (num_samples > 0) + return 4; + else + return 0; + case 7: + /* Gen7 supports 4x and 8x multisampling. */ + if (num_samples > 4) + return 8; + else if (num_samples > 0) + return 4; + else + return 0; + return 0; + default: + /* MSAA unsupported. */ + return 0; + } +} + + +/** + * Called via glRenderbufferStorageEXT() to set the format and allocate + * storage for a user-created renderbuffer. + */ +static GLboolean +intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer *rb, + GLenum internalFormat, + GLuint width, GLuint height) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_screen *screen = intel->intelScreen; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + rb->NumSamples = intel_quantize_num_samples(screen, rb->NumSamples); + + switch (internalFormat) { + default: + /* Use the same format-choice logic as for textures. + * Renderbuffers aren't any different from textures for us, + * except they're less useful because you can't texture with + * them. + */ + rb->Format = intel->ctx.Driver.ChooseTextureFormat(ctx, GL_TEXTURE_2D, + internalFormat, + GL_NONE, GL_NONE); + break; + case GL_STENCIL_INDEX: + case GL_STENCIL_INDEX1_EXT: + case GL_STENCIL_INDEX4_EXT: + case GL_STENCIL_INDEX8_EXT: + case GL_STENCIL_INDEX16_EXT: + /* These aren't actual texture formats, so force them here. */ + if (intel->has_separate_stencil) { + rb->Format = MESA_FORMAT_S8; + } else { + assert(!intel->must_use_separate_stencil); + rb->Format = MESA_FORMAT_S8_Z24; + } + break; + } + + rb->Width = width; + rb->Height = height; + rb->_BaseFormat = _mesa_base_fbo_format(ctx, internalFormat); + + intel_miptree_release(&irb->mt); + + DBG("%s: %s: %s (%dx%d)\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(internalFormat), + _mesa_get_format_name(rb->Format), width, height); + + if (width == 0 || height == 0) + return true; + + irb->mt = intel_miptree_create_for_renderbuffer(intel, rb->Format, + width, height, + rb->NumSamples); + if (!irb->mt) + return false; + + return true; +} + + +static void +intel_image_target_renderbuffer_storage(struct gl_context *ctx, + struct gl_renderbuffer *rb, + void *image_handle) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_renderbuffer *irb; + __DRIscreen *screen; + __DRIimage *image; + + screen = intel->intelScreen->driScrnPriv; + image = screen->dri2.image->lookupEGLImage(screen, image_handle, + screen->loaderPrivate); + if (image == NULL) + return; + + /* __DRIimage is opaque to the core so it has to be checked here */ + switch (image->format) { + case MESA_FORMAT_RGBA8888_REV: + _mesa_error(&intel->ctx, GL_INVALID_OPERATION, + "glEGLImageTargetRenderbufferStorage(unsupported image format"); + return; + break; + default: + break; + } + + irb = intel_renderbuffer(rb); + intel_miptree_release(&irb->mt); + irb->mt = intel_miptree_create_for_bo(intel, + image->region->bo, + image->format, + image->offset, + image->region->width, + image->region->height, + image->region->pitch, + image->region->tiling); + if (!irb->mt) + return; + + rb->InternalFormat = image->internal_format; + rb->Width = image->region->width; + rb->Height = image->region->height; + rb->Format = image->format; + rb->_BaseFormat = _mesa_base_fbo_format(&intel->ctx, + image->internal_format); + rb->NeedsFinishRenderTexture = true; +} + +/** + * Called by _mesa_resize_framebuffer() for each hardware renderbuffer when a + * window system framebuffer is resized. + * + * Any actual buffer reallocations for hardware renderbuffers (which would + * have triggered _mesa_resize_framebuffer()) were done by + * intel_process_dri2_buffer(). + */ +static GLboolean +intel_alloc_window_storage(struct gl_context * ctx, struct gl_renderbuffer *rb, + GLenum internalFormat, GLuint width, GLuint height) +{ + ASSERT(rb->Name == 0); + rb->Width = width; + rb->Height = height; + rb->InternalFormat = internalFormat; + + return true; +} + +/** Dummy function for gl_renderbuffer::AllocStorage() */ +static GLboolean +intel_nop_alloc_storage(struct gl_context * ctx, struct gl_renderbuffer *rb, + GLenum internalFormat, GLuint width, GLuint height) +{ + _mesa_problem(ctx, "intel_op_alloc_storage should never be called."); + return false; +} + +/** + * Create a new intel_renderbuffer which corresponds to an on-screen window, + * not a user-created renderbuffer. + * + * \param num_samples must be quantized. + */ +struct intel_renderbuffer * +intel_create_renderbuffer(gl_format format, unsigned num_samples) +{ + struct intel_renderbuffer *irb; + struct gl_renderbuffer *rb; + + GET_CURRENT_CONTEXT(ctx); + + irb = CALLOC_STRUCT(intel_renderbuffer); + if (!irb) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer"); + return NULL; + } + + rb = &irb->Base.Base; + + _mesa_init_renderbuffer(rb, 0); + rb->ClassID = INTEL_RB_CLASS; + rb->_BaseFormat = _mesa_get_format_base_format(format); + rb->Format = format; + rb->InternalFormat = rb->_BaseFormat; + rb->NumSamples = num_samples; + + /* intel-specific methods */ + rb->Delete = intel_delete_renderbuffer; + rb->AllocStorage = intel_alloc_window_storage; + + return irb; +} + +/** + * Private window-system buffers (as opposed to ones shared with the display + * server created with intel_create_renderbuffer()) are most similar in their + * handling to user-created renderbuffers, but they have a resize handler that + * may be called at intel_update_renderbuffers() time. + * + * \param num_samples must be quantized. + */ +struct intel_renderbuffer * +intel_create_private_renderbuffer(gl_format format, unsigned num_samples) +{ + struct intel_renderbuffer *irb; + + irb = intel_create_renderbuffer(format, num_samples); + irb->Base.Base.AllocStorage = intel_alloc_renderbuffer_storage; + + return irb; +} + +/** + * Create a new renderbuffer object. + * Typically called via glBindRenderbufferEXT(). + */ +static struct gl_renderbuffer * +intel_new_renderbuffer(struct gl_context * ctx, GLuint name) +{ + /*struct intel_context *intel = intel_context(ctx); */ + struct intel_renderbuffer *irb; + struct gl_renderbuffer *rb; + + irb = CALLOC_STRUCT(intel_renderbuffer); + if (!irb) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer"); + return NULL; + } + + rb = &irb->Base.Base; + + _mesa_init_renderbuffer(rb, name); + rb->ClassID = INTEL_RB_CLASS; + + /* intel-specific methods */ + rb->Delete = intel_delete_renderbuffer; + rb->AllocStorage = intel_alloc_renderbuffer_storage; + /* span routines set in alloc_storage function */ + + return rb; +} + + +/** + * Called via glBindFramebufferEXT(). + */ +static void +intel_bind_framebuffer(struct gl_context * ctx, GLenum target, + struct gl_framebuffer *fb, struct gl_framebuffer *fbread) +{ + if (target == GL_FRAMEBUFFER_EXT || target == GL_DRAW_FRAMEBUFFER_EXT) { + intel_draw_buffer(ctx); + } + else { + /* don't need to do anything if target == GL_READ_FRAMEBUFFER_EXT */ + } +} + + +/** + * Called via glFramebufferRenderbufferEXT(). + */ +static void +intel_framebuffer_renderbuffer(struct gl_context * ctx, + struct gl_framebuffer *fb, + GLenum attachment, struct gl_renderbuffer *rb) +{ + DBG("Intel FramebufferRenderbuffer %u %u\n", fb->Name, rb ? rb->Name : 0); + + _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb); + intel_draw_buffer(ctx); +} + +static bool +intel_renderbuffer_update_wrapper(struct intel_context *intel, + struct intel_renderbuffer *irb, + struct gl_texture_image *image, + uint32_t layer) +{ + struct gl_renderbuffer *rb = &irb->Base.Base; + struct intel_texture_image *intel_image = intel_texture_image(image); + struct intel_mipmap_tree *mt = intel_image->mt; + int level = image->Level; + + rb->Depth = image->Depth; + + rb->AllocStorage = intel_nop_alloc_storage; + + intel_miptree_check_level_layer(mt, level, layer); + irb->mt_level = level; + + switch (mt->msaa_layout) { + case INTEL_MSAA_LAYOUT_UMS: + case INTEL_MSAA_LAYOUT_CMS: + irb->mt_layer = layer * mt->num_samples; + break; + + default: + irb->mt_layer = layer; + } + + intel_miptree_reference(&irb->mt, mt); + + intel_renderbuffer_set_draw_offset(irb); + + if (mt->hiz_mt == NULL && + intel->vtbl.is_hiz_depth_format(intel, rb->Format)) { + intel_miptree_alloc_hiz(intel, mt); + if (!mt->hiz_mt) + return false; + } + + return true; +} + +void +intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb) +{ + unsigned int dst_x, dst_y; + + /* compute offset of the particular 2D image within the texture region */ + intel_miptree_get_image_offset(irb->mt, + irb->mt_level, + irb->mt_layer, + &dst_x, &dst_y); + + irb->draw_x = dst_x; + irb->draw_y = dst_y; +} + +/** + * Called by glFramebufferTexture[123]DEXT() (and other places) to + * prepare for rendering into texture memory. This might be called + * many times to choose different texture levels, cube faces, etc + * before intel_finish_render_texture() is ever called. + */ +static void +intel_render_texture(struct gl_context * ctx, + struct gl_framebuffer *fb, + struct gl_renderbuffer_attachment *att) +{ + struct intel_context *intel = intel_context(ctx); + struct gl_renderbuffer *rb = att->Renderbuffer; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + struct gl_texture_image *image = rb->TexImage; + struct intel_texture_image *intel_image = intel_texture_image(image); + struct intel_mipmap_tree *mt = intel_image->mt; + int layer; + + (void) fb; + + if (att->CubeMapFace > 0) { + assert(att->Zoffset == 0); + layer = att->CubeMapFace; + } else { + layer = att->Zoffset; + } + + if (!intel_image->mt) { + /* Fallback on drawing to a texture that doesn't have a miptree + * (has a border, width/height 0, etc.) + */ + _swrast_render_texture(ctx, fb, att); + return; + } + + intel_miptree_check_level_layer(mt, att->TextureLevel, layer); + + if (!intel_renderbuffer_update_wrapper(intel, irb, image, layer)) { + _swrast_render_texture(ctx, fb, att); + return; + } + + DBG("Begin render %s texture tex=%u w=%d h=%d d=%d refcount=%d\n", + _mesa_get_format_name(image->TexFormat), + att->Texture->Name, image->Width, image->Height, image->Depth, + rb->RefCount); + + /* update drawing region, etc */ + intel_draw_buffer(ctx); +} + + +/** + * Called by Mesa when rendering to a texture is done. + */ +static void +intel_finish_render_texture(struct gl_context * ctx, struct gl_renderbuffer *rb) +{ + struct intel_context *intel = intel_context(ctx); + + DBG("Finish render %s texture\n", _mesa_get_format_name(rb->Format)); + + /* Since we've (probably) rendered to the texture and will (likely) use + * it in the texture domain later on in this batchbuffer, flush the + * batch. Once again, we wish for a domain tracker in libdrm to cover + * usage inside of a batchbuffer like GEM does in the kernel. + */ + intel_batchbuffer_emit_mi_flush(intel); +} + +#define fbo_incomplete(fb, ...) do { \ + static GLuint msg_id = 0; \ + if (unlikely(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT)) { \ + _mesa_gl_debug(ctx, &msg_id, \ + MESA_DEBUG_TYPE_OTHER, \ + MESA_DEBUG_SEVERITY_MEDIUM, \ + __VA_ARGS__); \ + } \ + DBG(__VA_ARGS__); \ + fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED; \ + } while (0) + +/** + * Do additional "completeness" testing of a framebuffer object. + */ +static void +intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_renderbuffer *depthRb = + intel_get_renderbuffer(fb, BUFFER_DEPTH); + struct intel_renderbuffer *stencilRb = + intel_get_renderbuffer(fb, BUFFER_STENCIL); + struct intel_mipmap_tree *depth_mt = NULL, *stencil_mt = NULL; + int i; + + DBG("%s() on fb %p (%s)\n", __FUNCTION__, + fb, (fb == ctx->DrawBuffer ? "drawbuffer" : + (fb == ctx->ReadBuffer ? "readbuffer" : "other buffer"))); + + if (depthRb) + depth_mt = depthRb->mt; + if (stencilRb) { + stencil_mt = stencilRb->mt; + if (stencil_mt->stencil_mt) + stencil_mt = stencil_mt->stencil_mt; + } + + if (depth_mt && stencil_mt) { + if (depth_mt == stencil_mt) { + /* For true packed depth/stencil (not faked on prefers-separate-stencil + * hardware) we need to be sure they're the same level/layer, since + * we'll be emitting a single packet describing the packed setup. + */ + if (depthRb->mt_level != stencilRb->mt_level || + depthRb->mt_layer != stencilRb->mt_layer) { + fbo_incomplete(fb, + "FBO incomplete: depth image level/layer %d/%d != " + "stencil image %d/%d\n", + depthRb->mt_level, + depthRb->mt_layer, + stencilRb->mt_level, + stencilRb->mt_layer); + } + } else { + if (!intel->has_separate_stencil) { + fbo_incomplete(fb, "FBO incomplete: separate stencil " + "unsupported\n"); + } + if (stencil_mt->format != MESA_FORMAT_S8) { + fbo_incomplete(fb, "FBO incomplete: separate stencil is %s " + "instead of S8\n", + _mesa_get_format_name(stencil_mt->format)); + } + if (intel->gen < 7 && !intel_renderbuffer_has_hiz(depthRb)) { + /* Before Gen7, separate depth and stencil buffers can be used + * only if HiZ is enabled. From the Sandybridge PRM, Volume 2, + * Part 1, Bit 3DSTATE_DEPTH_BUFFER.SeparateStencilBufferEnable: + * [DevSNB]: This field must be set to the same value (enabled + * or disabled) as Hierarchical Depth Buffer Enable. + */ + fbo_incomplete(fb, "FBO incomplete: separate stencil " + "without HiZ\n"); + } + } + } + + for (i = 0; i < Elements(fb->Attachment); i++) { + struct gl_renderbuffer *rb; + struct intel_renderbuffer *irb; + + if (fb->Attachment[i].Type == GL_NONE) + continue; + + /* A supported attachment will have a Renderbuffer set either + * from being a Renderbuffer or being a texture that got the + * intel_wrap_texture() treatment. + */ + rb = fb->Attachment[i].Renderbuffer; + if (rb == NULL) { + fbo_incomplete(fb, "FBO incomplete: attachment without " + "renderbuffer\n"); + continue; + } + + if (fb->Attachment[i].Type == GL_TEXTURE) { + if (rb->TexImage->Border) { + fbo_incomplete(fb, "FBO incomplete: texture with border\n"); + continue; + } + } + + irb = intel_renderbuffer(rb); + if (irb == NULL) { + fbo_incomplete(fb, "FBO incomplete: software rendering " + "renderbuffer\n"); + continue; + } + + if (!intel->vtbl.render_target_supported(intel, rb)) { + fbo_incomplete(fb, "FBO incomplete: Unsupported HW " + "texture/renderbuffer format attached: %s\n", + _mesa_get_format_name(intel_rb_format(irb))); + } + } +} + +/** + * Try to do a glBlitFramebuffer using glCopyTexSubImage2D + * We can do this when the dst renderbuffer is actually a texture and + * there is no scaling, mirroring or scissoring. + * + * \return new buffer mask indicating the buffers left to blit using the + * normal path. + */ +static GLbitfield +intel_blit_framebuffer_with_blitter(struct gl_context *ctx, + GLint srcX0, GLint srcY0, + GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, + GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter) +{ + struct intel_context *intel = intel_context(ctx); + + if (mask & GL_COLOR_BUFFER_BIT) { + GLint i; + const struct gl_framebuffer *drawFb = ctx->DrawBuffer; + const struct gl_framebuffer *readFb = ctx->ReadBuffer; + struct gl_renderbuffer *src_rb = readFb->_ColorReadBuffer; + struct intel_renderbuffer *src_irb = intel_renderbuffer(src_rb); + + if (!src_irb) { + perf_debug("glBlitFramebuffer(): missing src renderbuffer. " + "Falling back to software rendering.\n"); + return mask; + } + + /* If the source and destination are the same size with no mirroring, + * the rectangles are within the size of the texture and there is no + * scissor, then we can probably use the blit engine. + */ + if (!(srcX0 - srcX1 == dstX0 - dstX1 && + srcY0 - srcY1 == dstY0 - dstY1 && + srcX1 >= srcX0 && + srcY1 >= srcY0 && + srcX0 >= 0 && srcX1 <= readFb->Width && + srcY0 >= 0 && srcY1 <= readFb->Height && + dstX0 >= 0 && dstX1 <= drawFb->Width && + dstY0 >= 0 && dstY1 <= drawFb->Height && + !ctx->Scissor.Enabled)) { + perf_debug("glBlitFramebuffer(): non-1:1 blit. " + "Falling back to software rendering.\n"); + return mask; + } + + /* Blit to all active draw buffers. We don't do any pre-checking, + * because we assume that copying to MRTs is rare, and failure midway + * through copying is even more rare. Even if it was to occur, it's + * safe to let meta start the copy over from scratch, because + * glBlitFramebuffer completely overwrites the destination pixels, and + * results are undefined if any destination pixels have a dependency on + * source pixels. + */ + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + struct gl_renderbuffer *dst_rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + struct intel_renderbuffer *dst_irb = intel_renderbuffer(dst_rb); + + if (!dst_irb) { + perf_debug("glBlitFramebuffer(): missing dst renderbuffer. " + "Falling back to software rendering.\n"); + return mask; + } + + gl_format src_format = _mesa_get_srgb_format_linear(src_rb->Format); + gl_format dst_format = _mesa_get_srgb_format_linear(dst_rb->Format); + if (src_format != dst_format) { + perf_debug("glBlitFramebuffer(): unsupported blit from %s to %s. " + "Falling back to software rendering.\n", + _mesa_get_format_name(src_format), + _mesa_get_format_name(dst_format)); + return mask; + } + + if (!intel_miptree_blit(intel, + src_irb->mt, + src_irb->mt_level, src_irb->mt_layer, + srcX0, srcY0, src_rb->Name == 0, + dst_irb->mt, + dst_irb->mt_level, dst_irb->mt_layer, + dstX0, dstY0, dst_rb->Name == 0, + dstX1 - dstX0, dstY1 - dstY0, GL_COPY)) { + perf_debug("glBlitFramebuffer(): unknown blit failure. " + "Falling back to software rendering.\n"); + return mask; + } + } + + mask &= ~GL_COLOR_BUFFER_BIT; + } + + return mask; +} + +static void +intel_blit_framebuffer(struct gl_context *ctx, + GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter) +{ +#ifndef I915 + mask = brw_blorp_framebuffer(intel_context(ctx), + srcX0, srcY0, srcX1, srcY1, + dstX0, dstY0, dstX1, dstY1, + mask, filter); + if (mask == 0x0) + return; +#endif + + /* Try using the BLT engine. */ + mask = intel_blit_framebuffer_with_blitter(ctx, + srcX0, srcY0, srcX1, srcY1, + dstX0, dstY0, dstX1, dstY1, + mask, filter); + if (mask == 0x0) + return; + + + _mesa_meta_BlitFramebuffer(ctx, + srcX0, srcY0, srcX1, srcY1, + dstX0, dstY0, dstX1, dstY1, + mask, filter); +} + +/** + * This is a no-op except on multisample buffers shared with DRI2. + */ +void +intel_renderbuffer_set_needs_downsample(struct intel_renderbuffer *irb) +{ + if (irb->mt && irb->mt->singlesample_mt) + irb->mt->need_downsample = true; +} + +/** + * Does the renderbuffer have hiz enabled? + */ +bool +intel_renderbuffer_has_hiz(struct intel_renderbuffer *irb) +{ + return intel_miptree_slice_has_hiz(irb->mt, irb->mt_level, irb->mt_layer); +} + +void +intel_renderbuffer_set_needs_hiz_resolve(struct intel_renderbuffer *irb) +{ + if (irb->mt) { + intel_miptree_slice_set_needs_hiz_resolve(irb->mt, + irb->mt_level, + irb->mt_layer); + } +} + +void +intel_renderbuffer_set_needs_depth_resolve(struct intel_renderbuffer *irb) +{ + if (irb->mt) { + intel_miptree_slice_set_needs_depth_resolve(irb->mt, + irb->mt_level, + irb->mt_layer); + } +} + +bool +intel_renderbuffer_resolve_hiz(struct intel_context *intel, + struct intel_renderbuffer *irb) +{ + if (irb->mt) + return intel_miptree_slice_resolve_hiz(intel, + irb->mt, + irb->mt_level, + irb->mt_layer); + + return false; +} + +bool +intel_renderbuffer_resolve_depth(struct intel_context *intel, + struct intel_renderbuffer *irb) +{ + if (irb->mt) + return intel_miptree_slice_resolve_depth(intel, + irb->mt, + irb->mt_level, + irb->mt_layer); + + return false; +} + +void +intel_renderbuffer_move_to_temp(struct intel_context *intel, + struct intel_renderbuffer *irb, + bool invalidate) +{ + struct gl_renderbuffer *rb =&irb->Base.Base; + struct intel_texture_image *intel_image = intel_texture_image(rb->TexImage); + struct intel_mipmap_tree *new_mt; + int width, height, depth; + + intel_miptree_get_dimensions_for_image(rb->TexImage, &width, &height, &depth); + + new_mt = intel_miptree_create(intel, rb->TexImage->TexObject->Target, + intel_image->base.Base.TexFormat, + intel_image->base.Base.Level, + intel_image->base.Base.Level, + width, height, depth, + true, + irb->mt->num_samples, + INTEL_MIPTREE_TILING_ANY); + + if (intel->vtbl.is_hiz_depth_format(intel, new_mt->format)) { + intel_miptree_alloc_hiz(intel, new_mt); + } + + intel_miptree_copy_teximage(intel, intel_image, new_mt, invalidate); + + intel_miptree_reference(&irb->mt, intel_image->mt); + intel_renderbuffer_set_draw_offset(irb); + intel_miptree_release(&new_mt); +} + +/** + * Do one-time context initializations related to GL_EXT_framebuffer_object. + * Hook in device driver functions. + */ +void +intel_fbo_init(struct intel_context *intel) +{ + intel->ctx.Driver.NewFramebuffer = intel_new_framebuffer; + intel->ctx.Driver.NewRenderbuffer = intel_new_renderbuffer; + intel->ctx.Driver.MapRenderbuffer = intel_map_renderbuffer; + intel->ctx.Driver.UnmapRenderbuffer = intel_unmap_renderbuffer; + intel->ctx.Driver.BindFramebuffer = intel_bind_framebuffer; + intel->ctx.Driver.FramebufferRenderbuffer = intel_framebuffer_renderbuffer; + intel->ctx.Driver.RenderTexture = intel_render_texture; + intel->ctx.Driver.FinishRenderTexture = intel_finish_render_texture; + intel->ctx.Driver.ValidateFramebuffer = intel_validate_framebuffer; + intel->ctx.Driver.BlitFramebuffer = intel_blit_framebuffer; + intel->ctx.Driver.EGLImageTargetRenderbufferStorage = + intel_image_target_renderbuffer_storage; +} diff --git a/src/mesa/drivers/dri/i915/intel_fbo.h b/src/mesa/drivers/dri/i915/intel_fbo.h new file mode 100644 index 0000000..e1b4df5 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_fbo.h @@ -0,0 +1,212 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_FBO_H +#define INTEL_FBO_H + +#include +#include +#include "main/formats.h" +#include "main/macros.h" +#include "intel_context.h" +#include "intel_mipmap_tree.h" +#include "intel_screen.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct intel_context; +struct intel_mipmap_tree; +struct intel_texture_image; + +/** + * Intel renderbuffer, derived from gl_renderbuffer. + */ +struct intel_renderbuffer +{ + struct swrast_renderbuffer Base; + struct intel_mipmap_tree *mt; /**< The renderbuffer storage. */ + drm_intel_bo *map_bo; + + /** + * \name Miptree view + * \{ + * + * Multiple renderbuffers may simultaneously wrap a single texture and each + * provide a different view into that texture. The fields below indicate + * which miptree slice is wrapped by this renderbuffer. The fields' values + * are consistent with the 'level' and 'layer' parameters of + * glFramebufferTextureLayer(). + * + * For renderbuffers not created with glFramebufferTexture*(), mt_level and + * mt_layer are 0. + */ + unsigned int mt_level; + unsigned int mt_layer; + /** \} */ + + GLuint draw_x, draw_y; /**< Offset of drawing within the region */ +}; + + +/** + * gl_renderbuffer is a base class which we subclass. The Class field + * is used for simple run-time type checking. + */ +#define INTEL_RB_CLASS 0x12345678 + + +/** + * Return a gl_renderbuffer ptr casted to intel_renderbuffer. + * NULL will be returned if the rb isn't really an intel_renderbuffer. + * This is determined by checking the ClassID. + */ +static INLINE struct intel_renderbuffer * +intel_renderbuffer(struct gl_renderbuffer *rb) +{ + struct intel_renderbuffer *irb = (struct intel_renderbuffer *) rb; + if (irb && irb->Base.Base.ClassID == INTEL_RB_CLASS) { + /*_mesa_warning(NULL, "Returning non-intel Rb\n");*/ + return irb; + } + else + return NULL; +} + + +/** + * \brief Return the framebuffer attachment specified by attIndex. + * + * If the framebuffer lacks the specified attachment, then return null. + * + * If the attached renderbuffer is a wrapper, then return wrapped + * renderbuffer. + */ +static INLINE struct intel_renderbuffer * +intel_get_renderbuffer(struct gl_framebuffer *fb, gl_buffer_index attIndex) +{ + struct gl_renderbuffer *rb; + + assert((unsigned)attIndex < ARRAY_SIZE(fb->Attachment)); + + rb = fb->Attachment[attIndex].Renderbuffer; + if (!rb) + return NULL; + + return intel_renderbuffer(rb); +} + + +static INLINE gl_format +intel_rb_format(const struct intel_renderbuffer *rb) +{ + return rb->Base.Base.Format; +} + +extern struct intel_renderbuffer * +intel_create_renderbuffer(gl_format format, unsigned num_samples); + +struct intel_renderbuffer * +intel_create_private_renderbuffer(gl_format format, unsigned num_samples); + +struct gl_renderbuffer* +intel_create_wrapped_renderbuffer(struct gl_context * ctx, + int width, int height, + gl_format format); + +extern void +intel_fbo_init(struct intel_context *intel); + + +extern void +intel_flip_renderbuffers(struct gl_framebuffer *fb); + +void +intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb); + +static inline uint32_t +intel_renderbuffer_get_tile_offsets(struct intel_renderbuffer *irb, + uint32_t *tile_x, + uint32_t *tile_y) +{ + return intel_miptree_get_tile_offsets(irb->mt, irb->mt_level, irb->mt_layer, + tile_x, tile_y); +} + +struct intel_region* +intel_get_rb_region(struct gl_framebuffer *fb, GLuint attIndex); + +void +intel_renderbuffer_set_needs_downsample(struct intel_renderbuffer *irb); + +bool +intel_renderbuffer_has_hiz(struct intel_renderbuffer *irb); + +void +intel_renderbuffer_set_needs_hiz_resolve(struct intel_renderbuffer *irb); + +void +intel_renderbuffer_set_needs_depth_resolve(struct intel_renderbuffer *irb); + + +/** + * \brief Perform a HiZ resolve on the renderbuffer. + * + * It is safe to call this function on a renderbuffer without HiZ. In that + * case, the function is a no-op. + * + * \return false if no resolve was needed + */ +bool +intel_renderbuffer_resolve_hiz(struct intel_context *intel, + struct intel_renderbuffer *irb); + +/** + * \brief Perform a depth resolve on the renderbuffer. + * + * It is safe to call this function on a renderbuffer without HiZ. In that + * case, the function is a no-op. + * + * \return false if no resolve was needed + */ +bool +intel_renderbuffer_resolve_depth(struct intel_context *intel, + struct intel_renderbuffer *irb); + +void intel_renderbuffer_move_to_temp(struct intel_context *intel, + struct intel_renderbuffer *irb, + bool invalidate); + +unsigned +intel_quantize_num_samples(struct intel_screen *intel, unsigned num_samples); + +#ifdef __cplusplus +} +#endif + +#endif /* INTEL_FBO_H */ diff --git a/src/mesa/drivers/dri/i915/intel_mipmap_tree.c b/src/mesa/drivers/dri/i915/intel_mipmap_tree.c deleted file mode 120000 index 242fed0..0000000 --- a/src/mesa/drivers/dri/i915/intel_mipmap_tree.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_mipmap_tree.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_mipmap_tree.c b/src/mesa/drivers/dri/i915/intel_mipmap_tree.c new file mode 100644 index 0000000..1776a4b --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_mipmap_tree.c @@ -0,0 +1,2349 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include +#include + +#include "intel_batchbuffer.h" +#include "intel_chipset.h" +#include "intel_context.h" +#include "intel_mipmap_tree.h" +#include "intel_regions.h" +#include "intel_resolve_map.h" +#include "intel_tex_layout.h" +#include "intel_tex.h" +#include "intel_blit.h" + +#ifndef I915 +#include "brw_blorp.h" +#endif + +#include "main/enums.h" +#include "main/formats.h" +#include "main/glformats.h" +#include "main/texcompress_etc.h" +#include "main/teximage.h" + +#define FILE_DEBUG_FLAG DEBUG_MIPTREE + +static GLenum +target_to_target(GLenum target) +{ + switch (target) { + case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: + return GL_TEXTURE_CUBE_MAP_ARB; + default: + return target; + } +} + + +/** + * Determine which MSAA layout should be used by the MSAA surface being + * created, based on the chip generation and the surface type. + */ +static enum intel_msaa_layout +compute_msaa_layout(struct intel_context *intel, gl_format format, GLenum target) +{ + /* Prior to Gen7, all MSAA surfaces used IMS layout. */ + if (intel->gen < 7) + return INTEL_MSAA_LAYOUT_IMS; + + /* In Gen7, IMS layout is only used for depth and stencil buffers. */ + switch (_mesa_get_format_base_format(format)) { + case GL_DEPTH_COMPONENT: + case GL_STENCIL_INDEX: + case GL_DEPTH_STENCIL: + return INTEL_MSAA_LAYOUT_IMS; + default: + /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"): + * + * This field must be set to 0 for all SINT MSRTs when all RT channels + * are not written + * + * In practice this means that we have to disable MCS for all signed + * integer MSAA buffers. The alternative, to disable MCS only when one + * of the render target channels is disabled, is impractical because it + * would require converting between CMS and UMS MSAA layouts on the fly, + * which is expensive. + */ + if (_mesa_get_format_datatype(format) == GL_INT) { + /* TODO: is this workaround needed for future chipsets? */ + assert(intel->gen == 7); + return INTEL_MSAA_LAYOUT_UMS; + } else { + /* For now, if we're going to be texturing from this surface, + * force UMS, so that the shader doesn't have to do different things + * based on whether there's a multisample control surface needing sampled first. + * We can't just blindly read the MCS surface in all cases because: + * + * From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"): + * + * If this field is disabled and the sampling engine message + * is issued on this surface, the MCS surface may be accessed. Software + * must ensure that the surface is defined to avoid GTT errors. + */ + if (target == GL_TEXTURE_2D_MULTISAMPLE || + target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) { + return INTEL_MSAA_LAYOUT_UMS; + } else { + return INTEL_MSAA_LAYOUT_CMS; + } + } + } +} + + +/** + * For single-sampled render targets ("non-MSRT"), the MCS buffer is a + * scaled-down bitfield representation of the color buffer which is capable of + * recording when blocks of the color buffer are equal to the clear value. + * This function returns the block size that will be used by the MCS buffer + * corresponding to a certain color miptree. + * + * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)", + * beneath the "Fast Color Clear" bullet (p327): + * + * The following table describes the RT alignment + * + * Pixels Lines + * TiledY RT CL + * bpp + * 32 8 4 + * 64 4 4 + * 128 2 4 + * TiledX RT CL + * bpp + * 32 16 2 + * 64 8 2 + * 128 4 2 + * + * This alignment has the following uses: + * + * - For figuring out the size of the MCS buffer. Each 4k tile in the MCS + * buffer contains 128 blocks horizontally and 256 blocks vertically. + * + * - For figuring out alignment restrictions for a fast clear operation. Fast + * clear operations must always clear aligned multiples of 16 blocks + * horizontally and 32 blocks vertically. + * + * - For scaling down the coordinates sent through the render pipeline during + * a fast clear. X coordinates must be scaled down by 8 times the block + * width, and Y coordinates by 16 times the block height. + * + * - For scaling down the coordinates sent through the render pipeline during + * a "Render Target Resolve" operation. X coordinates must be scaled down + * by half the block width, and Y coordinates by half the block height. + */ +void +intel_get_non_msrt_mcs_alignment(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned *width_px, unsigned *height) +{ + switch (mt->region->tiling) { + default: + assert(!"Non-MSRT MCS requires X or Y tiling"); + /* In release builds, fall through */ + case I915_TILING_Y: + *width_px = 32 / mt->cpp; + *height = 4; + break; + case I915_TILING_X: + *width_px = 64 / mt->cpp; + *height = 2; + } +} + + +/** + * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer + * can be used. + * + * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)", + * beneath the "Fast Color Clear" bullet (p326): + * + * - Support is limited to tiled render targets. + * - Support is for non-mip-mapped and non-array surface types only. + * + * And then later, on p327: + * + * - MCS buffer for non-MSRT is supported only for RT formats 32bpp, + * 64bpp, and 128bpp. + */ +bool +intel_is_non_msrt_mcs_buffer_supported(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ +#ifdef I915 + /* MCS is not supported on the i915 (pre-Gen4) driver */ + return false; +#else + struct brw_context *brw = brw_context(&intel->ctx); + + /* MCS support does not exist prior to Gen7 */ + if (intel->gen < 7) + return false; + + /* MCS is only supported for color buffers */ + switch (_mesa_get_format_base_format(mt->format)) { + case GL_DEPTH_COMPONENT: + case GL_DEPTH_STENCIL: + case GL_STENCIL_INDEX: + return false; + } + + if (mt->region->tiling != I915_TILING_X && + mt->region->tiling != I915_TILING_Y) + return false; + if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16) + return false; + if (mt->first_level != 0 || mt->last_level != 0) + return false; + if (mt->physical_depth0 != 1) + return false; + + /* There's no point in using an MCS buffer if the surface isn't in a + * renderable format. + */ + if (!brw->format_supported_as_render_target[mt->format]) + return false; + + return true; +#endif +} + + +/** + * @param for_bo Indicates that the caller is + * intel_miptree_create_for_bo(). If true, then do not create + * \c stencil_mt. + */ +struct intel_mipmap_tree * +intel_miptree_create_layout(struct intel_context *intel, + GLenum target, + gl_format format, + GLuint first_level, + GLuint last_level, + GLuint width0, + GLuint height0, + GLuint depth0, + bool for_bo, + GLuint num_samples) +{ + struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); + + DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(target), + _mesa_get_format_name(format), + first_level, last_level, mt); + + mt->target = target_to_target(target); + mt->format = format; + mt->first_level = first_level; + mt->last_level = last_level; + mt->logical_width0 = width0; + mt->logical_height0 = height0; + mt->logical_depth0 = depth0; +#ifndef I915 + mt->mcs_state = INTEL_MCS_STATE_NONE; +#endif + + /* The cpp is bytes per (1, blockheight)-sized block for compressed + * textures. This is why you'll see divides by blockheight all over + */ + unsigned bw, bh; + _mesa_get_format_block_size(format, &bw, &bh); + assert(_mesa_get_format_bytes(mt->format) % bw == 0); + mt->cpp = _mesa_get_format_bytes(mt->format) / bw; + + mt->num_samples = num_samples; + mt->compressed = _mesa_is_format_compressed(format); + mt->msaa_layout = INTEL_MSAA_LAYOUT_NONE; + mt->refcount = 1; + + if (num_samples > 1) { + /* Adjust width/height/depth for MSAA */ + mt->msaa_layout = compute_msaa_layout(intel, format, mt->target); + if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) { + /* In the Sandy Bridge PRM, volume 4, part 1, page 31, it says: + * + * "Any of the other messages (sample*, LOD, load4) used with a + * (4x) multisampled surface will in-effect sample a surface with + * double the height and width as that indicated in the surface + * state. Each pixel position on the original-sized surface is + * replaced with a 2x2 of samples with the following arrangement: + * + * sample 0 sample 2 + * sample 1 sample 3" + * + * Thus, when sampling from a multisampled texture, it behaves as + * though the layout in memory for (x,y,sample) is: + * + * (0,0,0) (0,0,2) (1,0,0) (1,0,2) + * (0,0,1) (0,0,3) (1,0,1) (1,0,3) + * + * (0,1,0) (0,1,2) (1,1,0) (1,1,2) + * (0,1,1) (0,1,3) (1,1,1) (1,1,3) + * + * However, the actual layout of multisampled data in memory is: + * + * (0,0,0) (1,0,0) (0,0,1) (1,0,1) + * (0,1,0) (1,1,0) (0,1,1) (1,1,1) + * + * (0,0,2) (1,0,2) (0,0,3) (1,0,3) + * (0,1,2) (1,1,2) (0,1,3) (1,1,3) + * + * This pattern repeats for each 2x2 pixel block. + * + * As a result, when calculating the size of our 4-sample buffer for + * an odd width or height, we have to align before scaling up because + * sample 3 is in that bottom right 2x2 block. + */ + switch (num_samples) { + case 4: + width0 = ALIGN(width0, 2) * 2; + height0 = ALIGN(height0, 2) * 2; + break; + case 8: + width0 = ALIGN(width0, 2) * 4; + height0 = ALIGN(height0, 2) * 2; + break; + default: + /* num_samples should already have been quantized to 0, 1, 4, or + * 8. + */ + assert(false); + } + } else { + /* Non-interleaved */ + depth0 *= num_samples; + } + } + + /* array_spacing_lod0 is only used for non-IMS MSAA surfaces. TODO: can we + * use it elsewhere? + */ + switch (mt->msaa_layout) { + case INTEL_MSAA_LAYOUT_NONE: + case INTEL_MSAA_LAYOUT_IMS: + mt->array_spacing_lod0 = false; + break; + case INTEL_MSAA_LAYOUT_UMS: + case INTEL_MSAA_LAYOUT_CMS: + mt->array_spacing_lod0 = true; + break; + } + + if (target == GL_TEXTURE_CUBE_MAP) { + assert(depth0 == 1); + depth0 = 6; + } + + mt->physical_width0 = width0; + mt->physical_height0 = height0; + mt->physical_depth0 = depth0; + + if (!for_bo && + _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL && + (intel->must_use_separate_stencil || + (intel->has_separate_stencil && + intel->vtbl.is_hiz_depth_format(intel, format)))) { + mt->stencil_mt = intel_miptree_create(intel, + mt->target, + MESA_FORMAT_S8, + mt->first_level, + mt->last_level, + mt->logical_width0, + mt->logical_height0, + mt->logical_depth0, + true, + num_samples, + INTEL_MIPTREE_TILING_ANY); + if (!mt->stencil_mt) { + intel_miptree_release(&mt); + return NULL; + } + + /* Fix up the Z miptree format for how we're splitting out separate + * stencil. Gen7 expects there to be no stencil bits in its depth buffer. + */ + if (mt->format == MESA_FORMAT_S8_Z24) { + mt->format = MESA_FORMAT_X8_Z24; + } else if (mt->format == MESA_FORMAT_Z32_FLOAT_X24S8) { + mt->format = MESA_FORMAT_Z32_FLOAT; + mt->cpp = 4; + } else { + _mesa_problem(NULL, "Unknown format %s in separate stencil mt\n", + _mesa_get_format_name(mt->format)); + } + } + + intel_get_texture_alignment_unit(intel, mt->format, + &mt->align_w, &mt->align_h); + +#ifdef I915 + (void) intel; + if (intel->is_945) + i945_miptree_layout(mt); + else + i915_miptree_layout(mt); +#else + brw_miptree_layout(intel, mt); +#endif + + return mt; +} + +/** + * \brief Helper function for intel_miptree_create(). + */ +static uint32_t +intel_miptree_choose_tiling(struct intel_context *intel, + gl_format format, + uint32_t width0, + uint32_t num_samples, + enum intel_miptree_tiling_mode requested, + struct intel_mipmap_tree *mt) +{ + + if (format == MESA_FORMAT_S8) { + /* The stencil buffer is W tiled. However, we request from the kernel a + * non-tiled buffer because the GTT is incapable of W fencing. + */ + return I915_TILING_NONE; + } + + /* Some usages may want only one type of tiling, like depth miptrees (Y + * tiled), or temporary BOs for uploading data once (linear). + */ + switch (requested) { + case INTEL_MIPTREE_TILING_ANY: + break; + case INTEL_MIPTREE_TILING_Y: + return I915_TILING_Y; + case INTEL_MIPTREE_TILING_NONE: + return I915_TILING_NONE; + } + + if (num_samples > 1) { + /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled + * Surface"): + * + * [DevSNB+]: For multi-sample render targets, this field must be + * 1. MSRTs can only be tiled. + * + * Our usual reason for preferring X tiling (fast blits using the + * blitting engine) doesn't apply to MSAA, since we'll generally be + * downsampling or upsampling when blitting between the MSAA buffer + * and another buffer, and the blitting engine doesn't support that. + * So use Y tiling, since it makes better use of the cache. + */ + return I915_TILING_Y; + } + + GLenum base_format = _mesa_get_format_base_format(format); + if (intel->gen >= 4 && + (base_format == GL_DEPTH_COMPONENT || + base_format == GL_DEPTH_STENCIL_EXT)) + return I915_TILING_Y; + + int minimum_pitch = mt->total_width * mt->cpp; + + /* If the width is much smaller than a tile, don't bother tiling. */ + if (minimum_pitch < 64) + return I915_TILING_NONE; + + if (ALIGN(minimum_pitch, 512) >= 32768) { + perf_debug("%dx%d miptree too large to blit, falling back to untiled", + mt->total_width, mt->total_height); + return I915_TILING_NONE; + } + + /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */ + if (intel->gen < 6) + return I915_TILING_X; + + return I915_TILING_Y | I915_TILING_X; +} + +struct intel_mipmap_tree * +intel_miptree_create(struct intel_context *intel, + GLenum target, + gl_format format, + GLuint first_level, + GLuint last_level, + GLuint width0, + GLuint height0, + GLuint depth0, + bool expect_accelerated_upload, + GLuint num_samples, + enum intel_miptree_tiling_mode requested_tiling) +{ + struct intel_mipmap_tree *mt; + gl_format tex_format = format; + gl_format etc_format = MESA_FORMAT_NONE; + GLuint total_width, total_height; + + if (!intel->is_baytrail) { + switch (format) { + case MESA_FORMAT_ETC1_RGB8: + format = MESA_FORMAT_RGBX8888_REV; + break; + case MESA_FORMAT_ETC2_RGB8: + format = MESA_FORMAT_RGBX8888_REV; + break; + case MESA_FORMAT_ETC2_SRGB8: + case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC: + case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1: + format = MESA_FORMAT_SARGB8; + break; + case MESA_FORMAT_ETC2_RGBA8_EAC: + case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1: + format = MESA_FORMAT_RGBA8888_REV; + break; + case MESA_FORMAT_ETC2_R11_EAC: + format = MESA_FORMAT_R16; + break; + case MESA_FORMAT_ETC2_SIGNED_R11_EAC: + format = MESA_FORMAT_SIGNED_R16; + break; + case MESA_FORMAT_ETC2_RG11_EAC: + format = MESA_FORMAT_GR1616; + break; + case MESA_FORMAT_ETC2_SIGNED_RG11_EAC: + format = MESA_FORMAT_SIGNED_GR1616; + break; + default: + /* Non ETC1 / ETC2 format */ + break; + } + } + + etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE; + + mt = intel_miptree_create_layout(intel, target, format, + first_level, last_level, width0, + height0, depth0, + false, num_samples); + /* + * pitch == 0 || height == 0 indicates the null texture + */ + if (!mt || !mt->total_width || !mt->total_height) { + intel_miptree_release(&mt); + return NULL; + } + + total_width = mt->total_width; + total_height = mt->total_height; + + if (format == MESA_FORMAT_S8) { + /* Align to size of W tile, 64x64. */ + total_width = ALIGN(total_width, 64); + total_height = ALIGN(total_height, 64); + } + + uint32_t tiling = intel_miptree_choose_tiling(intel, format, width0, + num_samples, requested_tiling, + mt); + bool y_or_x = tiling == (I915_TILING_Y | I915_TILING_X); + + mt->etc_format = etc_format; + mt->region = intel_region_alloc(intel->intelScreen, + y_or_x ? I915_TILING_Y : tiling, + mt->cpp, + total_width, + total_height, + expect_accelerated_upload); + + /* If the region is too large to fit in the aperture, we need to use the + * BLT engine to support it. The BLT paths can't currently handle Y-tiling, + * so we need to fall back to X. + */ + if (y_or_x && mt->region->bo->size >= intel->max_gtt_map_object_size) { + perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n", + mt->total_width, mt->total_height); + intel_region_release(&mt->region); + + mt->region = intel_region_alloc(intel->intelScreen, + I915_TILING_X, + mt->cpp, + total_width, + total_height, + expect_accelerated_upload); + } + + mt->offset = 0; + + if (!mt->region) { + intel_miptree_release(&mt); + return NULL; + } + +#ifndef I915 + /* If this miptree is capable of supporting fast color clears, set + * mcs_state appropriately to ensure that fast clears will occur. + * Allocation of the MCS miptree will be deferred until the first fast + * clear actually occurs. + */ + if (intel_is_non_msrt_mcs_buffer_supported(intel, mt)) + mt->mcs_state = INTEL_MCS_STATE_RESOLVED; +#endif + + return mt; +} + +struct intel_mipmap_tree * +intel_miptree_create_for_bo(struct intel_context *intel, + drm_intel_bo *bo, + gl_format format, + uint32_t offset, + uint32_t width, + uint32_t height, + int pitch, + uint32_t tiling) +{ + struct intel_mipmap_tree *mt; + + struct intel_region *region = calloc(1, sizeof(*region)); + if (!region) + return NULL; + + /* Nothing will be able to use this miptree with the BO if the offset isn't + * aligned. + */ + if (tiling != I915_TILING_NONE) + assert(offset % 4096 == 0); + + /* miptrees can't handle negative pitch. If you need flipping of images, + * that's outside of the scope of the mt. + */ + assert(pitch >= 0); + + mt = intel_miptree_create_layout(intel, GL_TEXTURE_2D, format, + 0, 0, + width, height, 1, + true, 0 /* num_samples */); + if (!mt) + return mt; + + region->cpp = mt->cpp; + region->width = width; + region->height = height; + region->pitch = pitch; + region->refcount = 1; + drm_intel_bo_reference(bo); + region->bo = bo; + region->tiling = tiling; + + mt->region = region; + mt->offset = offset; + + return mt; +} + + +/** + * For a singlesample DRI2 buffer, this simply wraps the given region with a miptree. + * + * For a multisample DRI2 buffer, this wraps the given region with + * a singlesample miptree, then creates a multisample miptree into which the + * singlesample miptree is embedded as a child. + */ +struct intel_mipmap_tree* +intel_miptree_create_for_dri2_buffer(struct intel_context *intel, + unsigned dri_attachment, + gl_format format, + uint32_t num_samples, + struct intel_region *region) +{ + struct intel_mipmap_tree *singlesample_mt = NULL; + struct intel_mipmap_tree *multisample_mt = NULL; + + /* Only the front and back buffers, which are color buffers, are shared + * through DRI2. + */ + assert(dri_attachment == __DRI_BUFFER_BACK_LEFT || + dri_attachment == __DRI_BUFFER_FRONT_LEFT || + dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT); + assert(_mesa_get_format_base_format(format) == GL_RGB || + _mesa_get_format_base_format(format) == GL_RGBA); + + singlesample_mt = intel_miptree_create_for_bo(intel, + region->bo, + format, + 0, + region->width, + region->height, + region->pitch, + region->tiling); + if (!singlesample_mt) + return NULL; + singlesample_mt->region->name = region->name; + +#ifndef I915 + /* If this miptree is capable of supporting fast color clears, set + * mcs_state appropriately to ensure that fast clears will occur. + * Allocation of the MCS miptree will be deferred until the first fast + * clear actually occurs. + */ + if (intel_is_non_msrt_mcs_buffer_supported(intel, singlesample_mt)) + singlesample_mt->mcs_state = INTEL_MCS_STATE_RESOLVED; +#endif + + if (num_samples == 0) + return singlesample_mt; + + multisample_mt = intel_miptree_create_for_renderbuffer(intel, + format, + region->width, + region->height, + num_samples); + if (!multisample_mt) { + intel_miptree_release(&singlesample_mt); + return NULL; + } + + multisample_mt->singlesample_mt = singlesample_mt; + multisample_mt->need_downsample = false; + + if (intel->is_front_buffer_rendering && + (dri_attachment == __DRI_BUFFER_FRONT_LEFT || + dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT)) { + intel_miptree_upsample(intel, multisample_mt); + } + + return multisample_mt; +} + +struct intel_mipmap_tree* +intel_miptree_create_for_renderbuffer(struct intel_context *intel, + gl_format format, + uint32_t width, + uint32_t height, + uint32_t num_samples) +{ + struct intel_mipmap_tree *mt; + uint32_t depth = 1; + bool ok; + + mt = intel_miptree_create(intel, GL_TEXTURE_2D, format, 0, 0, + width, height, depth, true, num_samples, + INTEL_MIPTREE_TILING_ANY); + if (!mt) + goto fail; + + if (intel->vtbl.is_hiz_depth_format(intel, format)) { + ok = intel_miptree_alloc_hiz(intel, mt); + if (!ok) + goto fail; + } + + if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) { + ok = intel_miptree_alloc_mcs(intel, mt, num_samples); + if (!ok) + goto fail; + } + + return mt; + +fail: + intel_miptree_release(&mt); + return NULL; +} + +void +intel_miptree_reference(struct intel_mipmap_tree **dst, + struct intel_mipmap_tree *src) +{ + if (*dst == src) + return; + + intel_miptree_release(dst); + + if (src) { + src->refcount++; + DBG("%s %p refcount now %d\n", __FUNCTION__, src, src->refcount); + } + + *dst = src; +} + + +void +intel_miptree_release(struct intel_mipmap_tree **mt) +{ + if (!*mt) + return; + + DBG("%s %p refcount will be %d\n", __FUNCTION__, *mt, (*mt)->refcount - 1); + if (--(*mt)->refcount <= 0) { + GLuint i; + + DBG("%s deleting %p\n", __FUNCTION__, *mt); + + intel_region_release(&((*mt)->region)); + intel_miptree_release(&(*mt)->stencil_mt); + intel_miptree_release(&(*mt)->hiz_mt); +#ifndef I915 + intel_miptree_release(&(*mt)->mcs_mt); +#endif + intel_miptree_release(&(*mt)->singlesample_mt); + intel_resolve_map_clear(&(*mt)->hiz_map); + + for (i = 0; i < MAX_TEXTURE_LEVELS; i++) { + free((*mt)->level[i].slice); + } + + free(*mt); + } + *mt = NULL; +} + +void +intel_miptree_get_dimensions_for_image(struct gl_texture_image *image, + int *width, int *height, int *depth) +{ + switch (image->TexObject->Target) { + case GL_TEXTURE_1D_ARRAY: + *width = image->Width; + *height = 1; + *depth = image->Height; + break; + default: + *width = image->Width; + *height = image->Height; + *depth = image->Depth; + break; + } +} + +/** + * Can the image be pulled into a unified mipmap tree? This mirrors + * the completeness test in a lot of ways. + * + * Not sure whether I want to pass gl_texture_image here. + */ +bool +intel_miptree_match_image(struct intel_mipmap_tree *mt, + struct gl_texture_image *image) +{ + struct intel_texture_image *intelImage = intel_texture_image(image); + GLuint level = intelImage->base.Base.Level; + int width, height, depth; + + /* glTexImage* choose the texture object based on the target passed in, and + * objects can't change targets over their lifetimes, so this should be + * true. + */ + assert(target_to_target(image->TexObject->Target) == mt->target); + + gl_format mt_format = mt->format; + if (mt->format == MESA_FORMAT_X8_Z24 && mt->stencil_mt) + mt_format = MESA_FORMAT_S8_Z24; + if (mt->format == MESA_FORMAT_Z32_FLOAT && mt->stencil_mt) + mt_format = MESA_FORMAT_Z32_FLOAT_X24S8; + if (mt->etc_format != MESA_FORMAT_NONE) + mt_format = mt->etc_format; + + if (image->TexFormat != mt_format) + return false; + + intel_miptree_get_dimensions_for_image(image, &width, &height, &depth); + + if (mt->target == GL_TEXTURE_CUBE_MAP) + depth = 6; + + /* Test image dimensions against the base level image adjusted for + * minification. This will also catch images not present in the + * tree, changed targets, etc. + */ + if (mt->target == GL_TEXTURE_2D_MULTISAMPLE || + mt->target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) { + /* nonzero level here is always bogus */ + assert(level == 0); + + if (width != mt->logical_width0 || + height != mt->logical_height0 || + depth != mt->logical_depth0) { + return false; + } + } + else { + /* all normal textures, renderbuffers, etc */ + if (width != mt->level[level].width || + height != mt->level[level].height || + depth != mt->level[level].depth) { + return false; + } + } + + if (image->NumSamples != mt->num_samples) + return false; + + return true; +} + + +void +intel_miptree_set_level_info(struct intel_mipmap_tree *mt, + GLuint level, + GLuint x, GLuint y, + GLuint w, GLuint h, GLuint d) +{ + mt->level[level].width = w; + mt->level[level].height = h; + mt->level[level].depth = d; + mt->level[level].level_x = x; + mt->level[level].level_y = y; + + DBG("%s level %d size: %d,%d,%d offset %d,%d\n", __FUNCTION__, + level, w, h, d, x, y); + + assert(mt->level[level].slice == NULL); + + mt->level[level].slice = calloc(d, sizeof(*mt->level[0].slice)); + mt->level[level].slice[0].x_offset = mt->level[level].level_x; + mt->level[level].slice[0].y_offset = mt->level[level].level_y; +} + + +void +intel_miptree_set_image_offset(struct intel_mipmap_tree *mt, + GLuint level, GLuint img, + GLuint x, GLuint y) +{ + if (img == 0 && level == 0) + assert(x == 0 && y == 0); + + assert(img < mt->level[level].depth); + + mt->level[level].slice[img].x_offset = mt->level[level].level_x + x; + mt->level[level].slice[img].y_offset = mt->level[level].level_y + y; + + DBG("%s level %d img %d pos %d,%d\n", + __FUNCTION__, level, img, + mt->level[level].slice[img].x_offset, + mt->level[level].slice[img].y_offset); +} + +void +intel_miptree_get_image_offset(struct intel_mipmap_tree *mt, + GLuint level, GLuint slice, + GLuint *x, GLuint *y) +{ + assert(slice < mt->level[level].depth); + + *x = mt->level[level].slice[slice].x_offset; + *y = mt->level[level].slice[slice].y_offset; +} + +/** + * Rendering with tiled buffers requires that the base address of the buffer + * be aligned to a page boundary. For renderbuffers, and sometimes with + * textures, we may want the surface to point at a texture image level that + * isn't at a page boundary. + * + * This function returns an appropriately-aligned base offset + * according to the tiling restrictions, plus any required x/y offset + * from there. + */ +uint32_t +intel_miptree_get_tile_offsets(struct intel_mipmap_tree *mt, + GLuint level, GLuint slice, + uint32_t *tile_x, + uint32_t *tile_y) +{ + struct intel_region *region = mt->region; + uint32_t x, y; + uint32_t mask_x, mask_y; + + intel_region_get_tile_masks(region, &mask_x, &mask_y, false); + intel_miptree_get_image_offset(mt, level, slice, &x, &y); + + *tile_x = x & mask_x; + *tile_y = y & mask_y; + + return intel_region_get_aligned_offset(region, x & ~mask_x, y & ~mask_y, + false); +} + +static void +intel_miptree_copy_slice_sw(struct intel_context *intel, + struct intel_mipmap_tree *dst_mt, + struct intel_mipmap_tree *src_mt, + int level, + int slice, + int width, + int height) +{ + void *src, *dst; + int src_stride, dst_stride; + int cpp = dst_mt->cpp; + + intel_miptree_map(intel, src_mt, + level, slice, + 0, 0, + width, height, + GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT, + &src, &src_stride); + + intel_miptree_map(intel, dst_mt, + level, slice, + 0, 0, + width, height, + GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | + BRW_MAP_DIRECT_BIT, + &dst, &dst_stride); + + DBG("sw blit %s mt %p %p/%d -> %s mt %p %p/%d (%dx%d)\n", + _mesa_get_format_name(src_mt->format), + src_mt, src, src_stride, + _mesa_get_format_name(dst_mt->format), + dst_mt, dst, dst_stride, + width, height); + + int row_size = cpp * width; + if (src_stride == row_size && + dst_stride == row_size) { + memcpy(dst, src, row_size * height); + } else { + for (int i = 0; i < height; i++) { + memcpy(dst, src, row_size); + dst += dst_stride; + src += src_stride; + } + } + + intel_miptree_unmap(intel, dst_mt, level, slice); + intel_miptree_unmap(intel, src_mt, level, slice); + + /* Don't forget to copy the stencil data over, too. We could have skipped + * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map + * shuffling the two data sources in/out of temporary storage instead of + * the direct mapping we get this way. + */ + if (dst_mt->stencil_mt) { + assert(src_mt->stencil_mt); + intel_miptree_copy_slice_sw(intel, dst_mt->stencil_mt, src_mt->stencil_mt, + level, slice, width, height); + } +} + +static void +intel_miptree_copy_slice(struct intel_context *intel, + struct intel_mipmap_tree *dst_mt, + struct intel_mipmap_tree *src_mt, + int level, + int face, + int depth) + +{ + gl_format format = src_mt->format; + uint32_t width = src_mt->level[level].width; + uint32_t height = src_mt->level[level].height; + int slice; + + if (face > 0) + slice = face; + else + slice = depth; + + assert(depth < src_mt->level[level].depth); + assert(src_mt->format == dst_mt->format); + + if (dst_mt->compressed) { + height = ALIGN(height, dst_mt->align_h) / dst_mt->align_h; + width = ALIGN(width, dst_mt->align_w); + } + + /* If it's a packed depth/stencil buffer with separate stencil, the blit + * below won't apply since we can't do the depth's Y tiling or the + * stencil's W tiling in the blitter. + */ + if (src_mt->stencil_mt) { + intel_miptree_copy_slice_sw(intel, + dst_mt, src_mt, + level, slice, + width, height); + return; + } + + uint32_t dst_x, dst_y, src_x, src_y; + intel_miptree_get_image_offset(dst_mt, level, slice, &dst_x, &dst_y); + intel_miptree_get_image_offset(src_mt, level, slice, &src_x, &src_y); + + DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n", + _mesa_get_format_name(src_mt->format), + src_mt, src_x, src_y, src_mt->region->pitch, + _mesa_get_format_name(dst_mt->format), + dst_mt, dst_x, dst_y, dst_mt->region->pitch, + width, height); + + if (!intel_miptree_blit(intel, + src_mt, level, slice, 0, 0, false, + dst_mt, level, slice, 0, 0, false, + width, height, GL_COPY)) { + perf_debug("miptree validate blit for %s failed\n", + _mesa_get_format_name(format)); + + intel_miptree_copy_slice_sw(intel, dst_mt, src_mt, level, slice, + width, height); + } +} + +/** + * Copies the image's current data to the given miptree, and associates that + * miptree with the image. + * + * If \c invalidate is true, then the actual image data does not need to be + * copied, but the image still needs to be associated to the new miptree (this + * is set to true if we're about to clear the image). + */ +void +intel_miptree_copy_teximage(struct intel_context *intel, + struct intel_texture_image *intelImage, + struct intel_mipmap_tree *dst_mt, + bool invalidate) +{ + struct intel_mipmap_tree *src_mt = intelImage->mt; + struct intel_texture_object *intel_obj = + intel_texture_object(intelImage->base.Base.TexObject); + int level = intelImage->base.Base.Level; + int face = intelImage->base.Base.Face; + GLuint depth = intelImage->base.Base.Depth; + + if (!invalidate) { + for (int slice = 0; slice < depth; slice++) { + intel_miptree_copy_slice(intel, dst_mt, src_mt, level, face, slice); + } + } + + intel_miptree_reference(&intelImage->mt, dst_mt); + intel_obj->needs_validate = true; +} + +bool +intel_miptree_alloc_mcs(struct intel_context *intel, + struct intel_mipmap_tree *mt, + GLuint num_samples) +{ + assert(intel->gen >= 7); /* MCS only used on Gen7+ */ +#ifdef I915 + return false; +#else + assert(mt->mcs_mt == NULL); + + /* Choose the correct format for the MCS buffer. All that really matters + * is that we allocate the right buffer size, since we'll always be + * accessing this miptree using MCS-specific hardware mechanisms, which + * infer the correct format based on num_samples. + */ + gl_format format; + switch (num_samples) { + case 4: + /* 8 bits/pixel are required for MCS data when using 4x MSAA (2 bits for + * each sample). + */ + format = MESA_FORMAT_R8; + break; + case 8: + /* 32 bits/pixel are required for MCS data when using 8x MSAA (3 bits + * for each sample, plus 8 padding bits). + */ + format = MESA_FORMAT_R_UINT32; + break; + default: + assert(!"Unrecognized sample count in intel_miptree_alloc_mcs"); + return false; + }; + + /* From the Ivy Bridge PRM, Vol4 Part1 p76, "MCS Base Address": + * + * "The MCS surface must be stored as Tile Y." + */ + mt->mcs_state = INTEL_MCS_STATE_MSAA; + mt->mcs_mt = intel_miptree_create(intel, + mt->target, + format, + mt->first_level, + mt->last_level, + mt->logical_width0, + mt->logical_height0, + mt->logical_depth0, + true, + 0 /* num_samples */, + INTEL_MIPTREE_TILING_Y); + + /* From the Ivy Bridge PRM, Vol 2 Part 1 p326: + * + * When MCS buffer is enabled and bound to MSRT, it is required that it + * is cleared prior to any rendering. + * + * Since we don't use the MCS buffer for any purpose other than rendering, + * it makes sense to just clear it immediately upon allocation. + * + * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff. + */ + void *data = intel_miptree_map_raw(intel, mt->mcs_mt); + memset(data, 0xff, mt->mcs_mt->region->bo->size); + intel_miptree_unmap_raw(intel, mt->mcs_mt); + + return mt->mcs_mt; +#endif +} + + +bool +intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ +#ifdef I915 + assert(!"MCS not supported on i915"); + return false; +#else + assert(mt->mcs_mt == NULL); + + /* The format of the MCS buffer is opaque to the driver; all that matters + * is that we get its size and pitch right. We'll pretend that the format + * is R32. Since an MCS tile covers 128 blocks horizontally, and a Y-tiled + * R32 buffer is 32 pixels across, we'll need to scale the width down by + * the block width and then a further factor of 4. Since an MCS tile + * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows high, + * we'll need to scale the height down by the block height and then a + * further factor of 8. + */ + const gl_format format = MESA_FORMAT_R_UINT32; + unsigned block_width_px; + unsigned block_height; + intel_get_non_msrt_mcs_alignment(intel, mt, &block_width_px, &block_height); + unsigned width_divisor = block_width_px * 4; + unsigned height_divisor = block_height * 8; + unsigned mcs_width = + ALIGN(mt->logical_width0, width_divisor) / width_divisor; + unsigned mcs_height = + ALIGN(mt->logical_height0, height_divisor) / height_divisor; + assert(mt->logical_depth0 == 1); + mt->mcs_mt = intel_miptree_create(intel, + mt->target, + format, + mt->first_level, + mt->last_level, + mcs_width, + mcs_height, + mt->logical_depth0, + true, + 0 /* num_samples */, + INTEL_MIPTREE_TILING_Y); + + return mt->mcs_mt; +#endif +} + + +/** + * Helper for intel_miptree_alloc_hiz() that sets + * \c mt->level[level].slice[layer].has_hiz. Return true if and only if + * \c has_hiz was set. + */ +static bool +intel_miptree_slice_enable_hiz(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + assert(mt->hiz_mt); + + if (intel->is_haswell) { + /* Disable HiZ for some slices to work around a hardware bug. + * + * Haswell hardware fails to respect + * 3DSTATE_DEPTH_BUFFER.Depth_Coordinate_Offset_X/Y when during HiZ + * ambiguate operations. The failure is inconsistent and affected by + * other GPU contexts. Running a heavy GPU workload in a separate + * process causes the failure rate to drop to nearly 0. + * + * To workaround the bug, we enable HiZ only when we can guarantee that + * the Depth Coordinate Offset fields will be set to 0. The function + * brw_get_depthstencil_tile_masks() is used to calculate the fields, + * and the function is sometimes called in such a way that the presence + * of an attached stencil buffer changes the fuction's return value. + * + * The largest tile size considered by brw_get_depthstencil_tile_masks() + * is that of the stencil buffer. Therefore, if this hiz slice's + * corresponding depth slice has an offset that is aligned to the + * stencil buffer tile size, 64x64 pixels, then + * 3DSTATE_DEPTH_BUFFER.Depth_Coordinate_Offset_X/Y is set to 0. + */ + uint32_t depth_x_offset = mt->level[level].slice[layer].x_offset; + uint32_t depth_y_offset = mt->level[level].slice[layer].y_offset; + if ((depth_x_offset & 63) || (depth_y_offset & 63)) { + return false; + } + } + + mt->level[level].slice[layer].has_hiz = true; + return true; +} + + + +bool +intel_miptree_alloc_hiz(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ + assert(mt->hiz_mt == NULL); + mt->hiz_mt = intel_miptree_create(intel, + mt->target, + mt->format, + mt->first_level, + mt->last_level, + mt->logical_width0, + mt->logical_height0, + mt->logical_depth0, + true, + mt->num_samples, + INTEL_MIPTREE_TILING_ANY); + + if (!mt->hiz_mt) + return false; + + /* Mark that all slices need a HiZ resolve. */ + struct intel_resolve_map *head = &mt->hiz_map; + for (int level = mt->first_level; level <= mt->last_level; ++level) { + for (int layer = 0; layer < mt->level[level].depth; ++layer) { + if (!intel_miptree_slice_enable_hiz(intel, mt, level, layer)) + continue; + + head->next = malloc(sizeof(*head->next)); + head->next->prev = head; + head->next->next = NULL; + head = head->next; + + head->level = level; + head->layer = layer; + head->need = GEN6_HIZ_OP_HIZ_RESOLVE; + } + } + + return true; +} + +/** + * Does the miptree slice have hiz enabled? + */ +bool +intel_miptree_slice_has_hiz(struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + intel_miptree_check_level_layer(mt, level, layer); + return mt->level[level].slice[layer].has_hiz; +} + +void +intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + if (!intel_miptree_slice_has_hiz(mt, level, layer)) + return; + + intel_resolve_map_set(&mt->hiz_map, + level, layer, GEN6_HIZ_OP_HIZ_RESOLVE); +} + + +void +intel_miptree_slice_set_needs_depth_resolve(struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + if (!intel_miptree_slice_has_hiz(mt, level, layer)) + return; + + intel_resolve_map_set(&mt->hiz_map, + level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE); +} + +static bool +intel_miptree_slice_resolve(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer, + enum gen6_hiz_op need) +{ + intel_miptree_check_level_layer(mt, level, layer); + + struct intel_resolve_map *item = + intel_resolve_map_get(&mt->hiz_map, level, layer); + + if (!item || item->need != need) + return false; + + intel_hiz_exec(intel, mt, level, layer, need); + intel_resolve_map_remove(item); + return true; +} + +bool +intel_miptree_slice_resolve_hiz(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + return intel_miptree_slice_resolve(intel, mt, level, layer, + GEN6_HIZ_OP_HIZ_RESOLVE); +} + +bool +intel_miptree_slice_resolve_depth(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + return intel_miptree_slice_resolve(intel, mt, level, layer, + GEN6_HIZ_OP_DEPTH_RESOLVE); +} + +static bool +intel_miptree_all_slices_resolve(struct intel_context *intel, + struct intel_mipmap_tree *mt, + enum gen6_hiz_op need) +{ + bool did_resolve = false; + struct intel_resolve_map *i, *next; + + for (i = mt->hiz_map.next; i; i = next) { + next = i->next; + if (i->need != need) + continue; + + intel_hiz_exec(intel, mt, i->level, i->layer, need); + intel_resolve_map_remove(i); + did_resolve = true; + } + + return did_resolve; +} + +bool +intel_miptree_all_slices_resolve_hiz(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ + return intel_miptree_all_slices_resolve(intel, mt, + GEN6_HIZ_OP_HIZ_RESOLVE); +} + +bool +intel_miptree_all_slices_resolve_depth(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ + return intel_miptree_all_slices_resolve(intel, mt, + GEN6_HIZ_OP_DEPTH_RESOLVE); +} + + +void +intel_miptree_resolve_color(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ +#ifdef I915 + /* Fast color clear is not supported on the i915 (pre-Gen4) driver */ +#else + switch (mt->mcs_state) { + case INTEL_MCS_STATE_NONE: + case INTEL_MCS_STATE_MSAA: + case INTEL_MCS_STATE_RESOLVED: + /* No resolve needed */ + break; + case INTEL_MCS_STATE_UNRESOLVED: + case INTEL_MCS_STATE_CLEAR: + brw_blorp_resolve_color(intel, mt); + break; + } +#endif +} + + +/** + * Make it possible to share the region backing the given miptree with another + * process or another miptree. + * + * Fast color clears are unsafe with shared buffers, so we need to resolve and + * then discard the MCS buffer, if present. We also set the mcs_state to + * INTEL_MCS_STATE_NONE to ensure that no MCS buffer gets allocated in the + * future. + */ +void +intel_miptree_make_shareable(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ +#ifdef I915 + /* Nothing needs to be done for I915 */ + (void) intel; + (void) mt; +#else + /* MCS buffers are also used for multisample buffers, but we can't resolve + * away a multisample MCS buffer because it's an integral part of how the + * pixel data is stored. Fortunately this code path should never be + * reached for multisample buffers. + */ + assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE); + + if (mt->mcs_mt) { + intel_miptree_resolve_color(intel, mt); + intel_miptree_release(&mt->mcs_mt); + mt->mcs_state = INTEL_MCS_STATE_NONE; + } +#endif +} + + +/** + * \brief Get pointer offset into stencil buffer. + * + * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we + * must decode the tile's layout in software. + * + * See + * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile + * Format. + * - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm + * + * Even though the returned offset is always positive, the return type is + * signed due to + * commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137 + * mesa: Fix return type of _mesa_get_format_bytes() (#37351) + */ +static intptr_t +intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled) +{ + uint32_t tile_size = 4096; + uint32_t tile_width = 64; + uint32_t tile_height = 64; + uint32_t row_size = 64 * stride; + + uint32_t tile_x = x / tile_width; + uint32_t tile_y = y / tile_height; + + /* The byte's address relative to the tile's base addres. */ + uint32_t byte_x = x % tile_width; + uint32_t byte_y = y % tile_height; + + uintptr_t u = tile_y * row_size + + tile_x * tile_size + + 512 * (byte_x / 8) + + 64 * (byte_y / 8) + + 32 * ((byte_y / 4) % 2) + + 16 * ((byte_x / 4) % 2) + + 8 * ((byte_y / 2) % 2) + + 4 * ((byte_x / 2) % 2) + + 2 * (byte_y % 2) + + 1 * (byte_x % 2); + + if (swizzled) { + /* adjust for bit6 swizzling */ + if (((byte_x / 8) % 2) == 1) { + if (((byte_y / 8) % 2) == 0) { + u += 64; + } else { + u -= 64; + } + } + } + + return u; +} + +static void +intel_miptree_updownsample(struct intel_context *intel, + struct intel_mipmap_tree *src, + struct intel_mipmap_tree *dst, + unsigned width, + unsigned height) +{ +#ifndef I915 + int src_x0 = 0; + int src_y0 = 0; + int dst_x0 = 0; + int dst_y0 = 0; + + brw_blorp_blit_miptrees(intel, + src, 0 /* level */, 0 /* layer */, + dst, 0 /* level */, 0 /* layer */, + src_x0, src_y0, + width, height, + dst_x0, dst_y0, + width, height, + false, false /*mirror x, y*/); + + if (src->stencil_mt) { + brw_blorp_blit_miptrees(intel, + src->stencil_mt, 0 /* level */, 0 /* layer */, + dst->stencil_mt, 0 /* level */, 0 /* layer */, + src_x0, src_y0, + width, height, + dst_x0, dst_y0, + width, height, + false, false /*mirror x, y*/); + } +#endif /* I915 */ +} + +static void +assert_is_flat(struct intel_mipmap_tree *mt) +{ + assert(mt->target == GL_TEXTURE_2D); + assert(mt->first_level == 0); + assert(mt->last_level == 0); +} + +/** + * \brief Downsample from mt to mt->singlesample_mt. + * + * If the miptree needs no downsample, then skip. + */ +void +intel_miptree_downsample(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ + /* Only flat, renderbuffer-like miptrees are supported. */ + assert_is_flat(mt); + + if (!mt->need_downsample) + return; + intel_miptree_updownsample(intel, + mt, mt->singlesample_mt, + mt->logical_width0, + mt->logical_height0); + mt->need_downsample = false; +} + +/** + * \brief Upsample from mt->singlesample_mt to mt. + * + * The upsample is done unconditionally. + */ +void +intel_miptree_upsample(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ + /* Only flat, renderbuffer-like miptrees are supported. */ + assert_is_flat(mt); + assert(!mt->need_downsample); + + intel_miptree_updownsample(intel, + mt->singlesample_mt, mt, + mt->logical_width0, + mt->logical_height0); +} + +void * +intel_miptree_map_raw(struct intel_context *intel, struct intel_mipmap_tree *mt) +{ + /* CPU accesses to color buffers don't understand fast color clears, so + * resolve any pending fast color clears before we map. + */ + intel_miptree_resolve_color(intel, mt); + + drm_intel_bo *bo = mt->region->bo; + + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { + if (drm_intel_bo_busy(bo)) { + perf_debug("Mapping a busy BO, causing a stall on the GPU.\n"); + } + } + + intel_flush(&intel->ctx); + + if (mt->region->tiling != I915_TILING_NONE) + drm_intel_gem_bo_map_gtt(bo); + else + drm_intel_bo_map(bo, true); + + return bo->virtual; +} + +void +intel_miptree_unmap_raw(struct intel_context *intel, + struct intel_mipmap_tree *mt) +{ + drm_intel_bo_unmap(mt->region->bo); +} + +static void +intel_miptree_map_gtt(struct intel_context *intel, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, unsigned int slice) +{ + unsigned int bw, bh; + void *base; + unsigned int image_x, image_y; + int x = map->x; + int y = map->y; + + /* For compressed formats, the stride is the number of bytes per + * row of blocks. intel_miptree_get_image_offset() already does + * the divide. + */ + _mesa_get_format_block_size(mt->format, &bw, &bh); + assert(y % bh == 0); + y /= bh; + + base = intel_miptree_map_raw(intel, mt) + mt->offset; + + if (base == NULL) + map->ptr = NULL; + else { + /* Note that in the case of cube maps, the caller must have passed the + * slice number referencing the face. + */ + intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); + x += image_x; + y += image_y; + + map->stride = mt->region->pitch; + map->ptr = base + y * map->stride + x * mt->cpp; + } + + DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__, + map->x, map->y, map->w, map->h, + mt, _mesa_get_format_name(mt->format), + x, y, map->ptr, map->stride); +} + +static void +intel_miptree_unmap_gtt(struct intel_context *intel, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, + unsigned int slice) +{ + intel_miptree_unmap_raw(intel, mt); +} + +static void +intel_miptree_map_blit(struct intel_context *intel, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, unsigned int slice) +{ + map->mt = intel_miptree_create(intel, GL_TEXTURE_2D, mt->format, + 0, 0, + map->w, map->h, 1, + false, 0, + INTEL_MIPTREE_TILING_NONE); + if (!map->mt) { + fprintf(stderr, "Failed to allocate blit temporary\n"); + goto fail; + } + map->stride = map->mt->region->pitch; + + if (!intel_miptree_blit(intel, + mt, level, slice, + map->x, map->y, false, + map->mt, 0, 0, + 0, 0, false, + map->w, map->h, GL_COPY)) { + fprintf(stderr, "Failed to blit\n"); + goto fail; + } + + intel_batchbuffer_flush(intel); + map->ptr = intel_miptree_map_raw(intel, map->mt); + + DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__, + map->x, map->y, map->w, map->h, + mt, _mesa_get_format_name(mt->format), + level, slice, map->ptr, map->stride); + + return; + +fail: + intel_miptree_release(&map->mt); + map->ptr = NULL; + map->stride = 0; +} + +static void +intel_miptree_unmap_blit(struct intel_context *intel, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, + unsigned int slice) +{ + struct gl_context *ctx = &intel->ctx; + + intel_miptree_unmap_raw(intel, map->mt); + + if (map->mode & GL_MAP_WRITE_BIT) { + bool ok = intel_miptree_blit(intel, + map->mt, 0, 0, + 0, 0, false, + mt, level, slice, + map->x, map->y, false, + map->w, map->h, GL_COPY); + WARN_ONCE(!ok, "Failed to blit from linear temporary mapping"); + } + + intel_miptree_release(&map->mt); +} + +static void +intel_miptree_map_s8(struct intel_context *intel, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, unsigned int slice) +{ + map->stride = map->w; + map->buffer = map->ptr = malloc(map->stride * map->h); + if (!map->buffer) + return; + + /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no + * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless + * invalidate is set, since we'll be writing the whole rectangle from our + * temporary buffer back out. + */ + if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { + uint8_t *untiled_s8_map = map->ptr; + uint8_t *tiled_s8_map = intel_miptree_map_raw(intel, mt); + unsigned int image_x, image_y; + + intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); + + for (uint32_t y = 0; y < map->h; y++) { + for (uint32_t x = 0; x < map->w; x++) { + ptrdiff_t offset = intel_offset_S8(mt->region->pitch, + x + image_x + map->x, + y + image_y + map->y, + intel->has_swizzling); + untiled_s8_map[y * map->w + x] = tiled_s8_map[offset]; + } + } + + intel_miptree_unmap_raw(intel, mt); + + DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __FUNCTION__, + map->x, map->y, map->w, map->h, + mt, map->x + image_x, map->y + image_y, map->ptr, map->stride); + } else { + DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__, + map->x, map->y, map->w, map->h, + mt, map->ptr, map->stride); + } +} + +static void +intel_miptree_unmap_s8(struct intel_context *intel, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, + unsigned int slice) +{ + if (map->mode & GL_MAP_WRITE_BIT) { + unsigned int image_x, image_y; + uint8_t *untiled_s8_map = map->ptr; + uint8_t *tiled_s8_map = intel_miptree_map_raw(intel, mt); + + intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); + + for (uint32_t y = 0; y < map->h; y++) { + for (uint32_t x = 0; x < map->w; x++) { + ptrdiff_t offset = intel_offset_S8(mt->region->pitch, + x + map->x, + y + map->y, + intel->has_swizzling); + tiled_s8_map[offset] = untiled_s8_map[y * map->w + x]; + } + } + + intel_miptree_unmap_raw(intel, mt); + } + + free(map->buffer); +} + +static void +intel_miptree_map_etc(struct intel_context *intel, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, + unsigned int slice) +{ + assert(mt->etc_format != MESA_FORMAT_NONE); + if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) { + assert(mt->format == MESA_FORMAT_RGBX8888_REV); + } + + assert(map->mode & GL_MAP_WRITE_BIT); + assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT); + + map->stride = _mesa_format_row_stride(mt->etc_format, map->w); + map->buffer = malloc(_mesa_format_image_size(mt->etc_format, + map->w, map->h, 1)); + map->ptr = map->buffer; +} + +static void +intel_miptree_unmap_etc(struct intel_context *intel, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, + unsigned int slice) +{ + uint32_t image_x; + uint32_t image_y; + intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y); + + image_x += map->x; + image_y += map->y; + + uint8_t *dst = intel_miptree_map_raw(intel, mt) + + image_y * mt->region->pitch + + image_x * mt->region->cpp; + + if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) + _mesa_etc1_unpack_rgba8888(dst, mt->region->pitch, + map->ptr, map->stride, + map->w, map->h); + else + _mesa_unpack_etc2_format(dst, mt->region->pitch, + map->ptr, map->stride, + map->w, map->h, mt->etc_format); + + intel_miptree_unmap_raw(intel, mt); + free(map->buffer); +} + +/** + * Mapping function for packed depth/stencil miptrees backed by real separate + * miptrees for depth and stencil. + * + * On gen7, and to support HiZ pre-gen7, we have to have the stencil buffer + * separate from the depth buffer. Yet at the GL API level, we have to expose + * packed depth/stencil textures and FBO attachments, and Mesa core expects to + * be able to map that memory for texture storage and glReadPixels-type + * operations. We give Mesa core that access by mallocing a temporary and + * copying the data between the actual backing store and the temporary. + */ +static void +intel_miptree_map_depthstencil(struct intel_context *intel, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, unsigned int slice) +{ + struct intel_mipmap_tree *z_mt = mt; + struct intel_mipmap_tree *s_mt = mt->stencil_mt; + bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z32_FLOAT; + int packed_bpp = map_z32f_x24s8 ? 8 : 4; + + map->stride = map->w * packed_bpp; + map->buffer = map->ptr = malloc(map->stride * map->h); + if (!map->buffer) + return; + + /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no + * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless + * invalidate is set, since we'll be writing the whole rectangle from our + * temporary buffer back out. + */ + if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) { + uint32_t *packed_map = map->ptr; + uint8_t *s_map = intel_miptree_map_raw(intel, s_mt); + uint32_t *z_map = intel_miptree_map_raw(intel, z_mt); + unsigned int s_image_x, s_image_y; + unsigned int z_image_x, z_image_y; + + intel_miptree_get_image_offset(s_mt, level, slice, + &s_image_x, &s_image_y); + intel_miptree_get_image_offset(z_mt, level, slice, + &z_image_x, &z_image_y); + + for (uint32_t y = 0; y < map->h; y++) { + for (uint32_t x = 0; x < map->w; x++) { + int map_x = map->x + x, map_y = map->y + y; + ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch, + map_x + s_image_x, + map_y + s_image_y, + intel->has_swizzling); + ptrdiff_t z_offset = ((map_y + z_image_y) * + (z_mt->region->pitch / 4) + + (map_x + z_image_x)); + uint8_t s = s_map[s_offset]; + uint32_t z = z_map[z_offset]; + + if (map_z32f_x24s8) { + packed_map[(y * map->w + x) * 2 + 0] = z; + packed_map[(y * map->w + x) * 2 + 1] = s; + } else { + packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff); + } + } + } + + intel_miptree_unmap_raw(intel, s_mt); + intel_miptree_unmap_raw(intel, z_mt); + + DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n", + __FUNCTION__, + map->x, map->y, map->w, map->h, + z_mt, map->x + z_image_x, map->y + z_image_y, + s_mt, map->x + s_image_x, map->y + s_image_y, + map->ptr, map->stride); + } else { + DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__, + map->x, map->y, map->w, map->h, + mt, map->ptr, map->stride); + } +} + +static void +intel_miptree_unmap_depthstencil(struct intel_context *intel, + struct intel_mipmap_tree *mt, + struct intel_miptree_map *map, + unsigned int level, + unsigned int slice) +{ + struct intel_mipmap_tree *z_mt = mt; + struct intel_mipmap_tree *s_mt = mt->stencil_mt; + bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z32_FLOAT; + + if (map->mode & GL_MAP_WRITE_BIT) { + uint32_t *packed_map = map->ptr; + uint8_t *s_map = intel_miptree_map_raw(intel, s_mt); + uint32_t *z_map = intel_miptree_map_raw(intel, z_mt); + unsigned int s_image_x, s_image_y; + unsigned int z_image_x, z_image_y; + + intel_miptree_get_image_offset(s_mt, level, slice, + &s_image_x, &s_image_y); + intel_miptree_get_image_offset(z_mt, level, slice, + &z_image_x, &z_image_y); + + for (uint32_t y = 0; y < map->h; y++) { + for (uint32_t x = 0; x < map->w; x++) { + ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch, + x + s_image_x + map->x, + y + s_image_y + map->y, + intel->has_swizzling); + ptrdiff_t z_offset = ((y + z_image_y) * + (z_mt->region->pitch / 4) + + (x + z_image_x)); + + if (map_z32f_x24s8) { + z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0]; + s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1]; + } else { + uint32_t packed = packed_map[y * map->w + x]; + s_map[s_offset] = packed >> 24; + z_map[z_offset] = packed; + } + } + } + + intel_miptree_unmap_raw(intel, s_mt); + intel_miptree_unmap_raw(intel, z_mt); + + DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n", + __FUNCTION__, + map->x, map->y, map->w, map->h, + z_mt, _mesa_get_format_name(z_mt->format), + map->x + z_image_x, map->y + z_image_y, + s_mt, map->x + s_image_x, map->y + s_image_y, + map->ptr, map->stride); + } + + free(map->buffer); +} + +/** + * Create and attach a map to the miptree at (level, slice). Return the + * attached map. + */ +static struct intel_miptree_map* +intel_miptree_attach_map(struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice, + unsigned int x, + unsigned int y, + unsigned int w, + unsigned int h, + GLbitfield mode) +{ + struct intel_miptree_map *map = calloc(1, sizeof(*map)); + + if (!map) + return NULL; + + assert(mt->level[level].slice[slice].map == NULL); + mt->level[level].slice[slice].map = map; + + map->mode = mode; + map->x = x; + map->y = y; + map->w = w; + map->h = h; + + return map; +} + +/** + * Release the map at (level, slice). + */ +static void +intel_miptree_release_map(struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice) +{ + struct intel_miptree_map **map; + + map = &mt->level[level].slice[slice].map; + free(*map); + *map = NULL; +} + +static void +intel_miptree_map_singlesample(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice, + unsigned int x, + unsigned int y, + unsigned int w, + unsigned int h, + GLbitfield mode, + void **out_ptr, + int *out_stride) +{ + struct intel_miptree_map *map; + + assert(mt->num_samples <= 1); + + map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode); + if (!map){ + *out_ptr = NULL; + *out_stride = 0; + return; + } + + intel_miptree_slice_resolve_depth(intel, mt, level, slice); + if (map->mode & GL_MAP_WRITE_BIT) { + intel_miptree_slice_set_needs_hiz_resolve(mt, level, slice); + } + + if (mt->format == MESA_FORMAT_S8) { + intel_miptree_map_s8(intel, mt, map, level, slice); + } else if (mt->etc_format != MESA_FORMAT_NONE && + !(mode & BRW_MAP_DIRECT_BIT)) { + intel_miptree_map_etc(intel, mt, map, level, slice); + } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) { + intel_miptree_map_depthstencil(intel, mt, map, level, slice); + } + /* See intel_miptree_blit() for details on the 32k pitch limit. */ + else if (intel->has_llc && + !(mode & GL_MAP_WRITE_BIT) && + !mt->compressed && + (mt->region->tiling == I915_TILING_X || + (intel->gen >= 6 && mt->region->tiling == I915_TILING_Y)) && + mt->region->pitch < 32768) { + intel_miptree_map_blit(intel, mt, map, level, slice); + } else if (mt->region->tiling != I915_TILING_NONE && + mt->region->bo->size >= intel->max_gtt_map_object_size) { + assert(mt->region->pitch < 32768); + intel_miptree_map_blit(intel, mt, map, level, slice); + } else { + intel_miptree_map_gtt(intel, mt, map, level, slice); + } + + *out_ptr = map->ptr; + *out_stride = map->stride; + + if (map->ptr == NULL) + intel_miptree_release_map(mt, level, slice); +} + +static void +intel_miptree_unmap_singlesample(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice) +{ + struct intel_miptree_map *map = mt->level[level].slice[slice].map; + + assert(mt->num_samples <= 1); + + if (!map) + return; + + DBG("%s: mt %p (%s) level %d slice %d\n", __FUNCTION__, + mt, _mesa_get_format_name(mt->format), level, slice); + + if (mt->format == MESA_FORMAT_S8) { + intel_miptree_unmap_s8(intel, mt, map, level, slice); + } else if (mt->etc_format != MESA_FORMAT_NONE && + !(map->mode & BRW_MAP_DIRECT_BIT)) { + intel_miptree_unmap_etc(intel, mt, map, level, slice); + } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) { + intel_miptree_unmap_depthstencil(intel, mt, map, level, slice); + } else if (map->mt) { + intel_miptree_unmap_blit(intel, mt, map, level, slice); + } else { + intel_miptree_unmap_gtt(intel, mt, map, level, slice); + } + + intel_miptree_release_map(mt, level, slice); +} + +static void +intel_miptree_map_multisample(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice, + unsigned int x, + unsigned int y, + unsigned int w, + unsigned int h, + GLbitfield mode, + void **out_ptr, + int *out_stride) +{ + struct intel_miptree_map *map; + + assert(mt->num_samples > 1); + + /* Only flat, renderbuffer-like miptrees are supported. */ + if (mt->target != GL_TEXTURE_2D || + mt->first_level != 0 || + mt->last_level != 0) { + _mesa_problem(&intel->ctx, "attempt to map a multisample miptree for " + "which (target, first_level, last_level != " + "(GL_TEXTURE_2D, 0, 0)"); + goto fail; + } + + map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode); + if (!map) + goto fail; + + if (!mt->singlesample_mt) { + mt->singlesample_mt = + intel_miptree_create_for_renderbuffer(intel, + mt->format, + mt->logical_width0, + mt->logical_height0, + 0 /*num_samples*/); + if (!mt->singlesample_mt) + goto fail; + + map->singlesample_mt_is_tmp = true; + mt->need_downsample = true; + } + + intel_miptree_downsample(intel, mt); + intel_miptree_map_singlesample(intel, mt->singlesample_mt, + level, slice, + x, y, w, h, + mode, + out_ptr, out_stride); + return; + +fail: + intel_miptree_release_map(mt, level, slice); + *out_ptr = NULL; + *out_stride = 0; +} + +static void +intel_miptree_unmap_multisample(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice) +{ + struct intel_miptree_map *map = mt->level[level].slice[slice].map; + + assert(mt->num_samples > 1); + + if (!map) + return; + + intel_miptree_unmap_singlesample(intel, mt->singlesample_mt, level, slice); + + mt->need_downsample = false; + if (map->mode & GL_MAP_WRITE_BIT) + intel_miptree_upsample(intel, mt); + + if (map->singlesample_mt_is_tmp) + intel_miptree_release(&mt->singlesample_mt); + + intel_miptree_release_map(mt, level, slice); +} + +void +intel_miptree_map(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice, + unsigned int x, + unsigned int y, + unsigned int w, + unsigned int h, + GLbitfield mode, + void **out_ptr, + int *out_stride) +{ + if (mt->num_samples <= 1) + intel_miptree_map_singlesample(intel, mt, + level, slice, + x, y, w, h, + mode, + out_ptr, out_stride); + else + intel_miptree_map_multisample(intel, mt, + level, slice, + x, y, w, h, + mode, + out_ptr, out_stride); +} + +void +intel_miptree_unmap(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice) +{ + if (mt->num_samples <= 1) + intel_miptree_unmap_singlesample(intel, mt, level, slice); + else + intel_miptree_unmap_multisample(intel, mt, level, slice); +} diff --git a/src/mesa/drivers/dri/i915/intel_mipmap_tree.h b/src/mesa/drivers/dri/i915/intel_mipmap_tree.h new file mode 100644 index 0000000..6dab092 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_mipmap_tree.h @@ -0,0 +1,788 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_MIPMAP_TREE_H +#define INTEL_MIPMAP_TREE_H + +#include + +#include "intel_regions.h" +#include "intel_resolve_map.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* A layer on top of the intel_regions code which adds: + * + * - Code to size and layout a region to hold a set of mipmaps. + * - Query to determine if a new image fits in an existing tree. + * - More refcounting + * - maybe able to remove refcounting from intel_region? + * - ? + * + * The fixed mipmap layout of intel hardware where one offset + * specifies the position of all images in a mipmap hierachy + * complicates the implementation of GL texture image commands, + * compared to hardware where each image is specified with an + * independent offset. + * + * In an ideal world, each texture object would be associated with a + * single bufmgr buffer or 2d intel_region, and all the images within + * the texture object would slot into the tree as they arrive. The + * reality can be a little messier, as images can arrive from the user + * with sizes that don't fit in the existing tree, or in an order + * where the tree layout cannot be guessed immediately. + * + * This structure encodes an idealized mipmap tree. The GL image + * commands build these where possible, otherwise store the images in + * temporary system buffers. + */ + +struct intel_resolve_map; +struct intel_texture_image; + +/** + * When calling intel_miptree_map() on an ETC-transcoded-to-RGB miptree or a + * depthstencil-split-to-separate-stencil miptree, we'll normally make a + * tmeporary and recreate the kind of data requested by Mesa core, since we're + * satisfying some glGetTexImage() request or something. + * + * However, occasionally you want to actually map the miptree's current data + * without transcoding back. This flag to intel_miptree_map() gets you that. + */ +#define BRW_MAP_DIRECT_BIT 0x80000000 + +struct intel_miptree_map { + /** Bitfield of GL_MAP_READ_BIT, GL_MAP_WRITE_BIT, GL_MAP_INVALIDATE_BIT */ + GLbitfield mode; + /** Region of interest for the map. */ + int x, y, w, h; + /** Possibly malloced temporary buffer for the mapping. */ + void *buffer; + /** Possible pointer to a temporary linear miptree for the mapping. */ + struct intel_mipmap_tree *mt; + /** Pointer to the start of (map_x, map_y) returned by the mapping. */ + void *ptr; + /** Stride of the mapping. */ + int stride; + + /** + * intel_mipmap_tree::singlesample_mt is temporary storage that persists + * only for the duration of the map. + */ + bool singlesample_mt_is_tmp; +}; + +/** + * Describes the location of each texture image within a texture region. + */ +struct intel_mipmap_level +{ + /** Offset to this miptree level, used in computing x_offset. */ + GLuint level_x; + /** Offset to this miptree level, used in computing y_offset. */ + GLuint level_y; + GLuint width; + GLuint height; + + /** + * \brief Number of 2D slices in this miplevel. + * + * The exact semantics of depth varies according to the texture target: + * - For GL_TEXTURE_CUBE_MAP, depth is 6. + * - For GL_TEXTURE_2D_ARRAY, depth is the number of array slices. It is + * identical for all miplevels in the texture. + * - For GL_TEXTURE_3D, it is the texture's depth at this miplevel. Its + * value, like width and height, varies with miplevel. + * - For other texture types, depth is 1. + */ + GLuint depth; + + /** + * \brief List of 2D images in this mipmap level. + * + * This may be a list of cube faces, array slices in 2D array texture, or + * layers in a 3D texture. The list's length is \c depth. + */ + struct intel_mipmap_slice { + /** + * \name Offset to slice + * \{ + * + * Hardware formats are so diverse that that there is no unified way to + * compute the slice offsets, so we store them in this table. + * + * The (x, y) offset to slice \c s at level \c l relative the miptrees + * base address is + * \code + * x = mt->level[l].slice[s].x_offset + * y = mt->level[l].slice[s].y_offset + */ + GLuint x_offset; + GLuint y_offset; + /** \} */ + + /** + * Mapping information. Persistent for the duration of + * intel_miptree_map/unmap on this slice. + */ + struct intel_miptree_map *map; + + /** + * \brief Is HiZ enabled for this slice? + * + * If \c mt->level[l].slice[s].has_hiz is set, then (1) \c mt->hiz_mt + * has been allocated and (2) the HiZ memory corresponding to this slice + * resides at \c mt->hiz_mt->level[l].slice[s]. + */ + bool has_hiz; + } *slice; +}; + +/** + * Enum for keeping track of the different MSAA layouts supported by Gen7. + */ +enum intel_msaa_layout +{ + /** + * Ordinary surface with no MSAA. + */ + INTEL_MSAA_LAYOUT_NONE, + + /** + * Interleaved Multisample Surface. The additional samples are + * accommodated by scaling up the width and the height of the surface so + * that all the samples corresponding to a pixel are located at nearby + * memory locations. + */ + INTEL_MSAA_LAYOUT_IMS, + + /** + * Uncompressed Multisample Surface. The surface is stored as a 2D array, + * with array slice n containing all pixel data for sample n. + */ + INTEL_MSAA_LAYOUT_UMS, + + /** + * Compressed Multisample Surface. The surface is stored as in + * INTEL_MSAA_LAYOUT_UMS, but there is an additional buffer called the MCS + * (Multisample Control Surface) buffer. Each pixel in the MCS buffer + * indicates the mapping from sample number to array slice. This allows + * the common case (where all samples constituting a pixel have the same + * color value) to be stored efficiently by just using a single array + * slice. + */ + INTEL_MSAA_LAYOUT_CMS, +}; + + +#ifndef I915 +/** + * Enum for keeping track of the state of an MCS buffer associated with a + * miptree. This determines when fast clear related operations are needed. + * + * Fast clear works by deferring the memory writes that would be used to clear + * the buffer, so that instead of performing them at the time of the clear + * operation, the hardware automatically performs them at the time that the + * buffer is later accessed for rendering. The MCS buffer keeps track of + * which regions of the buffer still have pending clear writes. + * + * This enum keeps track of the driver's knowledge of the state of the MCS + * buffer. + * + * MCS buffers only exist on Gen7+. + */ +enum intel_mcs_state +{ + /** + * There is no MCS buffer for this miptree, and one should never be + * allocated. + */ + INTEL_MCS_STATE_NONE, + + /** + * An MCS buffer exists for this miptree, and it is used for MSAA purposes. + */ + INTEL_MCS_STATE_MSAA, + + /** + * No deferred clears are pending for this miptree, and the contents of the + * color buffer are entirely correct. An MCS buffer may or may not exist + * for this miptree. If it does exist, it is entirely in the "no deferred + * clears pending" state. If it does not exist, it will be created the + * first time a fast color clear is executed. + * + * In this state, the color buffer can be used for purposes other than + * rendering without needing a render target resolve. + */ + INTEL_MCS_STATE_RESOLVED, + + /** + * An MCS buffer exists for this miptree, and deferred clears are pending + * for some regions of the color buffer, as indicated by the MCS buffer. + * The contents of the color buffer are only correct for the regions where + * the MCS buffer doesn't indicate a deferred clear. + * + * In this state, a render target resolve must be performed before the + * color buffer can be used for purposes other than rendering. + */ + INTEL_MCS_STATE_UNRESOLVED, + + /** + * An MCS buffer exists for this miptree, and deferred clears are pending + * for the entire color buffer, and the contents of the MCS buffer reflect + * this. The contents of the color buffer are undefined. + * + * In this state, a render target resolve must be performed before the + * color buffer can be used for purposes other than rendering. + * + * If the client attempts to clear a buffer which is already in this state, + * the clear can be safely skipped, since the buffer is already clear. + */ + INTEL_MCS_STATE_CLEAR, +}; +#endif + +struct intel_mipmap_tree +{ + /* Effectively the key: + */ + GLenum target; + + /** + * Generally, this is just the same as the gl_texture_image->TexFormat or + * gl_renderbuffer->Format. + * + * However, for textures and renderbuffers with packed depth/stencil formats + * on hardware where we want or need to use separate stencil, there will be + * two miptrees for storing the data. If the depthstencil texture or rb is + * MESA_FORMAT_Z32_FLOAT_X24S8, then mt->format will be + * MESA_FORMAT_Z32_FLOAT, otherwise for MESA_FORMAT_S8_Z24 objects it will be + * MESA_FORMAT_X8_Z24. + * + * For ETC1/ETC2 textures, this is one of the uncompressed mesa texture + * formats if the hardware lacks support for ETC1/ETC2. See @ref wraps_etc. + */ + gl_format format; + + /** This variable stores the value of ETC compressed texture format */ + gl_format etc_format; + + /** + * The X offset of each image in the miptree must be aligned to this. See + * the "Alignment Unit Size" section of the BSpec. + */ + unsigned int align_w; + unsigned int align_h; /**< \see align_w */ + + GLuint first_level; + GLuint last_level; + + /** + * Level zero image dimensions. These dimensions correspond to the + * physical layout of data in memory. Accordingly, they account for the + * extra width, height, and or depth that must be allocated in order to + * accommodate multisample formats, and they account for the extra factor + * of 6 in depth that must be allocated in order to accommodate cubemap + * textures. + */ + GLuint physical_width0, physical_height0, physical_depth0; + + GLuint cpp; + GLuint num_samples; + bool compressed; + + /** + * Level zero image dimensions. These dimensions correspond to the + * logical width, height, and depth of the region as seen by client code. + * Accordingly, they do not account for the extra width, height, and/or + * depth that must be allocated in order to accommodate multisample + * formats, nor do they account for the extra factor of 6 in depth that + * must be allocated in order to accommodate cubemap textures. + */ + uint32_t logical_width0, logical_height0, logical_depth0; + + /** + * For 1D array, 2D array, cube, and 2D multisampled surfaces on Gen7: true + * if the surface only contains LOD 0, and hence no space is for LOD's + * other than 0 in between array slices. + * + * Corresponds to the surface_array_spacing bit in gen7_surface_state. + */ + bool array_spacing_lod0; + + /** + * MSAA layout used by this buffer. + */ + enum intel_msaa_layout msaa_layout; + + /* Derived from the above: + */ + GLuint total_width; + GLuint total_height; + + /* The 3DSTATE_CLEAR_PARAMS value associated with the last depth clear to + * this depth mipmap tree, if any. + */ + uint32_t depth_clear_value; + + /* Includes image offset tables: + */ + struct intel_mipmap_level level[MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct intel_region *region; + + /* Offset into region bo where miptree starts: + */ + uint32_t offset; + + /** + * \brief Singlesample miptree. + * + * This is used under two cases. + * + * --- Case 1: As persistent singlesample storage for multisample window + * system front and back buffers --- + * + * Suppose that the window system FBO was created with a multisample + * config. Let `back_irb` be the `intel_renderbuffer` for the FBO's back + * buffer. Then `back_irb` contains two miptrees: a parent multisample + * miptree (back_irb->mt) and a child singlesample miptree + * (back_irb->mt->singlesample_mt). The DRM buffer shared with DRI2 + * belongs to `back_irb->mt->singlesample_mt` and contains singlesample + * data. The singlesample miptree is created at the same time as and + * persists for the lifetime of its parent multisample miptree. + * + * When access to the singlesample data is needed, such as at + * eglSwapBuffers and glReadPixels, an automatic downsample occurs from + * `back_rb->mt` to `back_rb->mt->singlesample_mt` when necessary. + * + * This description of the back buffer applies analogously to the front + * buffer. + * + * + * --- Case 2: As temporary singlesample storage for mapping multisample + * miptrees --- + * + * Suppose the intel_miptree_map is called on a multisample miptree, `mt`, + * for which case 1 does not apply (that is, `mt` does not belong to + * a front or back buffer). Then `mt->singlesample_mt` is null at the + * start of the call. intel_miptree_map will create a temporary + * singlesample miptree, store it at `mt->singlesample_mt`, downsample from + * `mt` to `mt->singlesample_mt` if necessary, then map + * `mt->singlesample_mt`. The temporary miptree is later deleted during + * intel_miptree_unmap. + */ + struct intel_mipmap_tree *singlesample_mt; + + /** + * \brief A downsample is needed from this miptree to singlesample_mt. + */ + bool need_downsample; + + /** + * \brief HiZ miptree + * + * The hiz miptree contains the miptree's hiz buffer. To allocate the hiz + * miptree, use intel_miptree_alloc_hiz(). + * + * To determine if hiz is enabled, do not check this pointer. Instead, use + * intel_miptree_slice_has_hiz(). + */ + struct intel_mipmap_tree *hiz_mt; + + /** + * \brief Map of miptree slices to needed resolves. + * + * This is used only when the miptree has a child HiZ miptree. + * + * Let \c mt be a depth miptree with HiZ enabled. Then the resolve map is + * \c mt->hiz_map. The resolve map of the child HiZ miptree, \c + * mt->hiz_mt->hiz_map, is unused. + */ + struct intel_resolve_map hiz_map; + + /** + * \brief Stencil miptree for depthstencil textures. + * + * This miptree is used for depthstencil textures and renderbuffers that + * require separate stencil. It always has the true copy of the stencil + * bits, regardless of mt->format. + * + * \see intel_miptree_map_depthstencil() + * \see intel_miptree_unmap_depthstencil() + */ + struct intel_mipmap_tree *stencil_mt; + +#ifndef I915 + /** + * \brief MCS miptree. + * + * This miptree contains the "multisample control surface", which stores + * the necessary information to implement compressed MSAA + * (INTEL_MSAA_FORMAT_CMS) and "fast color clear" behaviour on Gen7+. + * + * NULL if no MCS miptree is in use for this surface. + */ + struct intel_mipmap_tree *mcs_mt; + + /** + * MCS state for this buffer. + */ + enum intel_mcs_state mcs_state; +#endif + + /** + * The SURFACE_STATE bits associated with the last fast color clear to this + * color mipmap tree, if any. + * + * This value will only ever contain ones in bits 28-31, so it is safe to + * OR into dword 7 of SURFACE_STATE. + */ + uint32_t fast_clear_color_value; + + /* These are also refcounted: + */ + GLuint refcount; +}; + +enum intel_miptree_tiling_mode { + INTEL_MIPTREE_TILING_ANY, + INTEL_MIPTREE_TILING_Y, + INTEL_MIPTREE_TILING_NONE, +}; + +bool +intel_is_non_msrt_mcs_buffer_supported(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +void +intel_get_non_msrt_mcs_alignment(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned *width_px, unsigned *height); + +bool +intel_miptree_alloc_non_msrt_mcs(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel, + GLenum target, + gl_format format, + GLuint first_level, + GLuint last_level, + GLuint width0, + GLuint height0, + GLuint depth0, + bool expect_accelerated_upload, + GLuint num_samples, + enum intel_miptree_tiling_mode); + +struct intel_mipmap_tree * +intel_miptree_create_layout(struct intel_context *intel, + GLenum target, + gl_format format, + GLuint first_level, + GLuint last_level, + GLuint width0, + GLuint height0, + GLuint depth0, + bool for_bo, + GLuint num_samples); + +struct intel_mipmap_tree * +intel_miptree_create_for_bo(struct intel_context *intel, + drm_intel_bo *bo, + gl_format format, + uint32_t offset, + uint32_t width, + uint32_t height, + int pitch, + uint32_t tiling); + +struct intel_mipmap_tree* +intel_miptree_create_for_dri2_buffer(struct intel_context *intel, + unsigned dri_attachment, + gl_format format, + uint32_t num_samples, + struct intel_region *region); + +/** + * Create a miptree appropriate as the storage for a non-texture renderbuffer. + * The miptree has the following properties: + * - The target is GL_TEXTURE_2D. + * - There are no levels other than the base level 0. + * - Depth is 1. + */ +struct intel_mipmap_tree* +intel_miptree_create_for_renderbuffer(struct intel_context *intel, + gl_format format, + uint32_t width, + uint32_t height, + uint32_t num_samples); + +/** \brief Assert that the level and layer are valid for the miptree. */ +static inline void +intel_miptree_check_level_layer(struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer) +{ + assert(level >= mt->first_level); + assert(level <= mt->last_level); + assert(layer < mt->level[level].depth); +} + +int intel_miptree_pitch_align (struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t tiling, + int pitch); + +void intel_miptree_reference(struct intel_mipmap_tree **dst, + struct intel_mipmap_tree *src); + +void intel_miptree_release(struct intel_mipmap_tree **mt); + +/* Check if an image fits an existing mipmap tree layout + */ +bool intel_miptree_match_image(struct intel_mipmap_tree *mt, + struct gl_texture_image *image); + +void +intel_miptree_get_image_offset(struct intel_mipmap_tree *mt, + GLuint level, GLuint slice, + GLuint *x, GLuint *y); + +void +intel_miptree_get_dimensions_for_image(struct gl_texture_image *image, + int *width, int *height, int *depth); + +uint32_t +intel_miptree_get_tile_offsets(struct intel_mipmap_tree *mt, + GLuint level, GLuint slice, + uint32_t *tile_x, + uint32_t *tile_y); + +void intel_miptree_set_level_info(struct intel_mipmap_tree *mt, + GLuint level, + GLuint x, GLuint y, + GLuint w, GLuint h, GLuint d); + +void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt, + GLuint level, + GLuint img, GLuint x, GLuint y); + +void +intel_miptree_copy_teximage(struct intel_context *intel, + struct intel_texture_image *intelImage, + struct intel_mipmap_tree *dst_mt, bool invalidate); + +/** + * Copy the stencil data from \c mt->stencil_mt->region to \c mt->region for + * the given miptree slice. + * + * \see intel_mipmap_tree::stencil_mt + */ +void +intel_miptree_s8z24_scatter(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t slice); + +/** + * Copy the stencil data in \c mt->stencil_mt->region to \c mt->region for the + * given miptree slice. + * + * \see intel_mipmap_tree::stencil_mt + */ +void +intel_miptree_s8z24_gather(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer); + +bool +intel_miptree_alloc_mcs(struct intel_context *intel, + struct intel_mipmap_tree *mt, + GLuint num_samples); + +/** + * \name Miptree HiZ functions + * \{ + * + * It is safe to call the "slice_set_need_resolve" and "slice_resolve" + * functions on a miptree without HiZ. In that case, each function is a no-op. + */ + +/** + * \brief Allocate the miptree's embedded HiZ miptree. + * \see intel_mipmap_tree:hiz_mt + * \return false if allocation failed + */ + +bool +intel_miptree_alloc_hiz(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +bool +intel_miptree_slice_has_hiz(struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t layer); + +void +intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t depth); +void +intel_miptree_slice_set_needs_depth_resolve(struct intel_mipmap_tree *mt, + uint32_t level, + uint32_t depth); + +/** + * \return false if no resolve was needed + */ +bool +intel_miptree_slice_resolve_hiz(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int depth); + +/** + * \return false if no resolve was needed + */ +bool +intel_miptree_slice_resolve_depth(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int depth); + +/** + * \return false if no resolve was needed + */ +bool +intel_miptree_all_slices_resolve_hiz(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +/** + * \return false if no resolve was needed + */ +bool +intel_miptree_all_slices_resolve_depth(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +/**\}*/ + +/** + * Update the fast clear state for a miptree to indicate that it has been used + * for rendering. + */ +static inline void +intel_miptree_used_for_rendering(struct intel_mipmap_tree *mt) +{ +#ifdef I915 + /* Nothing needs to be done for I915, since it doesn't support fast + * clear. + */ +#else + /* If the buffer was previously in fast clear state, change it to + * unresolved state, since it won't be guaranteed to be clear after + * rendering occurs. + */ + if (mt->mcs_state == INTEL_MCS_STATE_CLEAR) + mt->mcs_state = INTEL_MCS_STATE_UNRESOLVED; +#endif +} + +void +intel_miptree_resolve_color(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +void +intel_miptree_make_shareable(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +void +intel_miptree_downsample(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +void +intel_miptree_upsample(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +/* i915_mipmap_tree.c: + */ +void i915_miptree_layout(struct intel_mipmap_tree *mt); +void i945_miptree_layout(struct intel_mipmap_tree *mt); +void brw_miptree_layout(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +void *intel_miptree_map_raw(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +void intel_miptree_unmap_raw(struct intel_context *intel, + struct intel_mipmap_tree *mt); + +void +intel_miptree_map(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice, + unsigned int x, + unsigned int y, + unsigned int w, + unsigned int h, + GLbitfield mode, + void **out_ptr, + int *out_stride); + +void +intel_miptree_unmap(struct intel_context *intel, + struct intel_mipmap_tree *mt, + unsigned int level, + unsigned int slice); + +#ifdef I915 +static inline void +intel_hiz_exec(struct intel_context *intel, struct intel_mipmap_tree *mt, + unsigned int level, unsigned int layer, enum gen6_hiz_op op) +{ + /* Stub on i915. It would be nice if we didn't execute resolve code at all + * there. + */ +} +#else +void +intel_hiz_exec(struct intel_context *intel, struct intel_mipmap_tree *mt, + unsigned int level, unsigned int layer, enum gen6_hiz_op op); +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/mesa/drivers/dri/i915/intel_pixel.c b/src/mesa/drivers/dri/i915/intel_pixel.c deleted file mode 120000 index d733c5e..0000000 --- a/src/mesa/drivers/dri/i915/intel_pixel.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_pixel.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_pixel.c b/src/mesa/drivers/dri/i915/intel_pixel.c new file mode 100644 index 0000000..f58cb85 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_pixel.c @@ -0,0 +1,135 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portionsalloc + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/accum.h" +#include "main/enums.h" +#include "main/state.h" +#include "main/bufferobj.h" +#include "main/context.h" +#include "swrast/swrast.h" + +#include "intel_context.h" +#include "intel_pixel.h" +#include "intel_regions.h" + +#define FILE_DEBUG_FLAG DEBUG_PIXEL + +static GLenum +effective_func(GLenum func, bool src_alpha_is_one) +{ + if (src_alpha_is_one) { + if (func == GL_SRC_ALPHA) + return GL_ONE; + if (func == GL_ONE_MINUS_SRC_ALPHA) + return GL_ZERO; + } + + return func; +} + +/** + * Check if any fragment operations are in effect which might effect + * glDraw/CopyPixels. + */ +bool +intel_check_blit_fragment_ops(struct gl_context * ctx, bool src_alpha_is_one) +{ + if (ctx->NewState) + _mesa_update_state(ctx); + + if (ctx->FragmentProgram._Enabled) { + DBG("fallback due to fragment program\n"); + return false; + } + + if (ctx->Color.BlendEnabled && + (effective_func(ctx->Color.Blend[0].SrcRGB, src_alpha_is_one) != GL_ONE || + effective_func(ctx->Color.Blend[0].DstRGB, src_alpha_is_one) != GL_ZERO || + ctx->Color.Blend[0].EquationRGB != GL_FUNC_ADD || + effective_func(ctx->Color.Blend[0].SrcA, src_alpha_is_one) != GL_ONE || + effective_func(ctx->Color.Blend[0].DstA, src_alpha_is_one) != GL_ZERO || + ctx->Color.Blend[0].EquationA != GL_FUNC_ADD)) { + DBG("fallback due to blend\n"); + return false; + } + + if (ctx->Texture._EnabledUnits) { + DBG("fallback due to texturing\n"); + return false; + } + + if (!(ctx->Color.ColorMask[0][0] && + ctx->Color.ColorMask[0][1] && + ctx->Color.ColorMask[0][2] && + ctx->Color.ColorMask[0][3])) { + DBG("fallback due to color masking\n"); + return false; + } + + if (ctx->Color.AlphaEnabled) { + DBG("fallback due to alpha\n"); + return false; + } + + if (ctx->Depth.Test) { + DBG("fallback due to depth test\n"); + return false; + } + + if (ctx->Fog.Enabled) { + DBG("fallback due to fog\n"); + return false; + } + + if (ctx->_ImageTransferState) { + DBG("fallback due to image transfer\n"); + return false; + } + + if (ctx->Stencil._Enabled) { + DBG("fallback due to image stencil\n"); + return false; + } + + if (ctx->RenderMode != GL_RENDER) { + DBG("fallback due to render mode\n"); + return false; + } + + return true; +} + +void +intelInitPixelFuncs(struct dd_function_table *functions) +{ + functions->Accum = _mesa_accum; + functions->Bitmap = intelBitmap; + functions->CopyPixels = intelCopyPixels; + functions->DrawPixels = intelDrawPixels; + functions->ReadPixels = intelReadPixels; +} + diff --git a/src/mesa/drivers/dri/i915/intel_pixel.h b/src/mesa/drivers/dri/i915/intel_pixel.h new file mode 100644 index 0000000..7578e6f --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_pixel.h @@ -0,0 +1,63 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_PIXEL_H +#define INTEL_PIXEL_H + +#include "main/mtypes.h" + +void intelInitPixelFuncs(struct dd_function_table *functions); +bool intel_check_blit_fragment_ops(struct gl_context * ctx, + bool src_alpha_is_one); + +void intelReadPixels(struct gl_context * ctx, + GLint x, GLint y, + GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *pack, + GLvoid * pixels); + +void intelDrawPixels(struct gl_context * ctx, + GLint x, GLint y, + GLsizei width, GLsizei height, + GLenum format, + GLenum type, + const struct gl_pixelstore_attrib *unpack, + const GLvoid * pixels); + +void intelCopyPixels(struct gl_context * ctx, + GLint srcx, GLint srcy, + GLsizei width, GLsizei height, + GLint destx, GLint desty, GLenum type); + +void intelBitmap(struct gl_context * ctx, + GLint x, GLint y, + GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte * pixels); + +#endif diff --git a/src/mesa/drivers/dri/i915/intel_pixel_bitmap.c b/src/mesa/drivers/dri/i915/intel_pixel_bitmap.c deleted file mode 120000 index 9085c7b..0000000 --- a/src/mesa/drivers/dri/i915/intel_pixel_bitmap.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_pixel_bitmap.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_pixel_bitmap.c b/src/mesa/drivers/dri/i915/intel_pixel_bitmap.c new file mode 100644 index 0000000..8c0edf2 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_pixel_bitmap.c @@ -0,0 +1,363 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portionsalloc + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/glheader.h" +#include "main/enums.h" +#include "main/image.h" +#include "main/colormac.h" +#include "main/condrender.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "main/pbo.h" +#include "main/bufferobj.h" +#include "main/state.h" +#include "main/texobj.h" +#include "main/context.h" +#include "main/fbobject.h" +#include "swrast/swrast.h" +#include "drivers/common/meta.h" + +#include "intel_screen.h" +#include "intel_context.h" +#include "intel_batchbuffer.h" +#include "intel_blit.h" +#include "intel_fbo.h" +#include "intel_regions.h" +#include "intel_buffers.h" +#include "intel_pixel.h" +#include "intel_reg.h" + + +#define FILE_DEBUG_FLAG DEBUG_PIXEL + + +/* Unlike the other intel_pixel_* functions, the expectation here is + * that the incoming data is not in a PBO. With the XY_TEXT blit + * method, there's no benefit haveing it in a PBO, but we could + * implement a path based on XY_MONO_SRC_COPY_BLIT which might benefit + * PBO bitmaps. I think they are probably pretty rare though - I + * wonder if Xgl uses them? + */ +static const GLubyte *map_pbo( struct gl_context *ctx, + GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap ) +{ + GLubyte *buf; + + if (!_mesa_validate_pbo_access(2, unpack, width, height, 1, + GL_COLOR_INDEX, GL_BITMAP, + INT_MAX, (const GLvoid *) bitmap)) { + _mesa_error(ctx, GL_INVALID_OPERATION,"glBitmap(invalid PBO access)"); + return NULL; + } + + buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size, + GL_MAP_READ_BIT, + unpack->BufferObj); + if (!buf) { + _mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)"); + return NULL; + } + + return ADD_POINTERS(buf, bitmap); +} + +static bool test_bit( const GLubyte *src, GLuint bit ) +{ + return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0; +} + +static void set_bit( GLubyte *dest, GLuint bit ) +{ + dest[bit/8] |= 1 << (bit % 8); +} + +/* Extract a rectangle's worth of data from the bitmap. Called + * per chunk of HW-sized bitmap. + */ +static GLuint get_bitmap_rect(GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap, + GLuint x, GLuint y, + GLuint w, GLuint h, + GLubyte *dest, + GLuint row_align, + bool invert) +{ + GLuint src_offset = (x + unpack->SkipPixels) & 0x7; + GLuint mask = unpack->LsbFirst ? 0 : 7; + GLuint bit = 0; + GLint row, col; + GLint first, last; + GLint incr; + GLuint count = 0; + + DBG("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n", + __FUNCTION__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask); + + if (invert) { + first = h-1; + last = 0; + incr = -1; + } + else { + first = 0; + last = h-1; + incr = 1; + } + + /* Require that dest be pre-zero'd. + */ + for (row = first; row != (last+incr); row += incr) { + const GLubyte *rowsrc = _mesa_image_address2d(unpack, bitmap, + width, height, + GL_COLOR_INDEX, GL_BITMAP, + y + row, x); + + for (col = 0; col < w; col++, bit++) { + if (test_bit(rowsrc, (col + src_offset) ^ mask)) { + set_bit(dest, bit ^ 7); + count++; + } + } + + if (row_align) + bit = ALIGN(bit, row_align); + } + + return count; +} + +/** + * Returns the low Y value of the vertical range given, flipped according to + * whether the framebuffer is or not. + */ +static INLINE int +y_flip(struct gl_framebuffer *fb, int y, int height) +{ + if (_mesa_is_user_fbo(fb)) + return y; + else + return fb->Height - y - height; +} + +/* + * Render a bitmap. + */ +static bool +do_blit_bitmap( struct gl_context *ctx, + GLint dstx, GLint dsty, + GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap ) +{ + struct intel_context *intel = intel_context(ctx); + struct gl_framebuffer *fb = ctx->DrawBuffer; + struct intel_renderbuffer *irb; + GLfloat tmpColor[4]; + GLubyte ubcolor[4]; + GLuint color; + GLsizei bitmap_width = width; + GLsizei bitmap_height = height; + GLint px, py; + GLuint stipple[32]; + GLint orig_dstx = dstx; + GLint orig_dsty = dsty; + + /* Update draw buffer bounds */ + _mesa_update_state(ctx); + + if (ctx->Depth.Test) { + /* The blit path produces incorrect results when depth testing is on. + * It seems the blit Z coord is always 1.0 (the far plane) so fragments + * will likely be obscured by other, closer geometry. + */ + return false; + } + + intel_prepare_render(intel); + + if (fb->_NumColorDrawBuffers != 1) { + perf_debug("accelerated glBitmap() only supports rendering to a " + "single color buffer\n"); + return false; + } + + irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]); + + if (_mesa_is_bufferobj(unpack->BufferObj)) { + bitmap = map_pbo(ctx, width, height, unpack, bitmap); + if (bitmap == NULL) + return true; /* even though this is an error, we're done */ + } + + COPY_4V(tmpColor, ctx->Current.RasterColor); + + if (_mesa_need_secondary_color(ctx)) { + ADD_3V(tmpColor, tmpColor, ctx->Current.RasterSecondaryColor); + } + + UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[0], tmpColor[0]); + UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[1], tmpColor[1]); + UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[2], tmpColor[2]); + UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[3], tmpColor[3]); + + switch (irb->mt->format) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + color = PACK_COLOR_8888(ubcolor[3], ubcolor[0], ubcolor[1], ubcolor[2]); + break; + case MESA_FORMAT_RGB565: + color = PACK_COLOR_565(ubcolor[0], ubcolor[1], ubcolor[2]); + break; + default: + perf_debug("Unsupported format %s in accelerated glBitmap()\n", + _mesa_get_format_name(irb->mt->format)); + return false; + } + + if (!intel_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F)) + return false; + + /* Clip to buffer bounds and scissor. */ + if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin, + fb->_Xmax, fb->_Ymax, + &dstx, &dsty, &width, &height)) + goto out; + + dsty = y_flip(fb, dsty, height); + +#define DY 32 +#define DX 32 + + /* The blitter has no idea about fast color clears, so we need to resolve + * the miptree before we do anything. + */ + intel_miptree_resolve_color(intel, irb->mt); + + /* Chop it all into chunks that can be digested by hardware: */ + for (py = 0; py < height; py += DY) { + for (px = 0; px < width; px += DX) { + int h = MIN2(DY, height - py); + int w = MIN2(DX, width - px); + GLuint sz = ALIGN(ALIGN(w,8) * h, 64)/8; + GLenum logic_op = ctx->Color.ColorLogicOpEnabled ? + ctx->Color.LogicOp : GL_COPY; + + assert(sz <= sizeof(stipple)); + memset(stipple, 0, sz); + + /* May need to adjust this when padding has been introduced in + * sz above: + * + * Have to translate destination coordinates back into source + * coordinates. + */ + int count = get_bitmap_rect(bitmap_width, bitmap_height, unpack, + bitmap, + -orig_dstx + (dstx + px), + -orig_dsty + y_flip(fb, dsty + py, h), + w, h, + (GLubyte *)stipple, + 8, + _mesa_is_winsys_fbo(fb)); + if (count == 0) + continue; + + if (!intelEmitImmediateColorExpandBlit(intel, + irb->mt->cpp, + (GLubyte *)stipple, + sz, + color, + irb->mt->region->pitch, + irb->mt->region->bo, + 0, + irb->mt->region->tiling, + dstx + px, + dsty + py, + w, h, + logic_op)) { + return false; + } + + if (ctx->Query.CurrentOcclusionObject) + ctx->Query.CurrentOcclusionObject->Result += count; + } + } +out: + + if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) + intel_batchbuffer_flush(intel); + + if (_mesa_is_bufferobj(unpack->BufferObj)) { + /* done with PBO so unmap it now */ + ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj); + } + + intel_check_front_buffer_rendering(intel); + + return true; +} + + +/* There are a large number of possible ways to implement bitmap on + * this hardware, most of them have some sort of drawback. Here are a + * few that spring to mind: + * + * Blit: + * - XY_MONO_SRC_BLT_CMD + * - use XY_SETUP_CLIP_BLT for cliprect clipping. + * - XY_TEXT_BLT + * - XY_TEXT_IMMEDIATE_BLT + * - blit per cliprect, subject to maximum immediate data size. + * - XY_COLOR_BLT + * - per pixel or run of pixels + * - XY_PIXEL_BLT + * - good for sparse bitmaps + * + * 3D engine: + * - Point per pixel + * - Translate bitmap to an alpha texture and render as a quad + * - Chop bitmap up into 32x32 squares and render w/polygon stipple. + */ +void +intelBitmap(struct gl_context * ctx, + GLint x, GLint y, + GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte * pixels) +{ + if (!_mesa_check_conditional_render(ctx)) + return; + + if (do_blit_bitmap(ctx, x, y, width, height, + unpack, pixels)) + return; + + _mesa_meta_Bitmap(ctx, x, y, width, height, unpack, pixels); +} diff --git a/src/mesa/drivers/dri/i915/intel_pixel_copy.c b/src/mesa/drivers/dri/i915/intel_pixel_copy.c deleted file mode 120000 index ee43360..0000000 --- a/src/mesa/drivers/dri/i915/intel_pixel_copy.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_pixel_copy.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_pixel_copy.c b/src/mesa/drivers/dri/i915/intel_pixel_copy.c new file mode 100644 index 0000000..ba8e06f --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_pixel_copy.c @@ -0,0 +1,210 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/glheader.h" +#include "main/image.h" +#include "main/state.h" +#include "main/mtypes.h" +#include "main/condrender.h" +#include "main/fbobject.h" +#include "drivers/common/meta.h" + +#include "intel_context.h" +#include "intel_buffers.h" +#include "intel_mipmap_tree.h" +#include "intel_regions.h" +#include "intel_pixel.h" +#include "intel_fbo.h" +#include "intel_blit.h" + +#define FILE_DEBUG_FLAG DEBUG_PIXEL + +/** + * CopyPixels with the blitter. Don't support zooming, pixel transfer, etc. + */ +static bool +do_blit_copypixels(struct gl_context * ctx, + GLint srcx, GLint srcy, + GLsizei width, GLsizei height, + GLint dstx, GLint dsty, GLenum type) +{ + struct intel_context *intel = intel_context(ctx); + struct gl_framebuffer *fb = ctx->DrawBuffer; + struct gl_framebuffer *read_fb = ctx->ReadBuffer; + GLint orig_dstx; + GLint orig_dsty; + GLint orig_srcx; + GLint orig_srcy; + struct intel_renderbuffer *draw_irb = NULL; + struct intel_renderbuffer *read_irb = NULL; + + /* Update draw buffer bounds */ + _mesa_update_state(ctx); + + switch (type) { + case GL_COLOR: + if (fb->_NumColorDrawBuffers != 1) { + perf_debug("glCopyPixels() fallback: MRT\n"); + return false; + } + + draw_irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]); + read_irb = intel_renderbuffer(read_fb->_ColorReadBuffer); + break; + case GL_DEPTH_STENCIL_EXT: + draw_irb = intel_renderbuffer(fb->Attachment[BUFFER_DEPTH].Renderbuffer); + read_irb = + intel_renderbuffer(read_fb->Attachment[BUFFER_DEPTH].Renderbuffer); + break; + case GL_DEPTH: + perf_debug("glCopyPixels() fallback: GL_DEPTH\n"); + return false; + case GL_STENCIL: + perf_debug("glCopyPixels() fallback: GL_STENCIL\n"); + return false; + default: + perf_debug("glCopyPixels(): Unknown type\n"); + return false; + } + + if (!draw_irb) { + perf_debug("glCopyPixels() fallback: missing draw buffer\n"); + return false; + } + + if (!read_irb) { + perf_debug("glCopyPixels() fallback: missing read buffer\n"); + return false; + } + + if (ctx->_ImageTransferState) { + perf_debug("glCopyPixels(): Unsupported image transfer state\n"); + return false; + } + + if (ctx->Depth.Test) { + perf_debug("glCopyPixels(): Unsupported depth test state\n"); + return false; + } + + if (ctx->Stencil._Enabled) { + perf_debug("glCopyPixels(): Unsupported stencil test state\n"); + return false; + } + + if (ctx->Fog.Enabled || + ctx->Texture._EnabledUnits || + ctx->FragmentProgram._Enabled) { + perf_debug("glCopyPixels(): Unsupported fragment shader state\n"); + return false; + } + + if (ctx->Color.AlphaEnabled || + ctx->Color.BlendEnabled) { + perf_debug("glCopyPixels(): Unsupported blend state\n"); + return false; + } + + if (!ctx->Color.ColorMask[0][0] || + !ctx->Color.ColorMask[0][1] || + !ctx->Color.ColorMask[0][2] || + !ctx->Color.ColorMask[0][3]) { + perf_debug("glCopyPixels(): Unsupported color mask state\n"); + return false; + } + + if (ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F) { + perf_debug("glCopyPixles(): Unsupported pixel zoom\n"); + return false; + } + + intel_prepare_render(intel); + + intel_flush(&intel->ctx); + + /* Clip to destination buffer. */ + orig_dstx = dstx; + orig_dsty = dsty; + if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin, + fb->_Xmax, fb->_Ymax, + &dstx, &dsty, &width, &height)) + goto out; + /* Adjust src coords for our post-clipped destination origin */ + srcx += dstx - orig_dstx; + srcy += dsty - orig_dsty; + + /* Clip to source buffer. */ + orig_srcx = srcx; + orig_srcy = srcy; + if (!_mesa_clip_to_region(0, 0, + read_fb->Width, read_fb->Height, + &srcx, &srcy, &width, &height)) + goto out; + /* Adjust dst coords for our post-clipped source origin */ + dstx += srcx - orig_srcx; + dsty += srcy - orig_srcy; + + if (!intel_miptree_blit(intel, + read_irb->mt, read_irb->mt_level, read_irb->mt_layer, + srcx, srcy, _mesa_is_winsys_fbo(read_fb), + draw_irb->mt, draw_irb->mt_level, draw_irb->mt_layer, + dstx, dsty, _mesa_is_winsys_fbo(fb), + width, height, + (ctx->Color.ColorLogicOpEnabled ? + ctx->Color.LogicOp : GL_COPY))) { + DBG("%s: blit failure\n", __FUNCTION__); + return false; + } + + if (ctx->Query.CurrentOcclusionObject) + ctx->Query.CurrentOcclusionObject->Result += width * height; + +out: + intel_check_front_buffer_rendering(intel); + + DBG("%s: success\n", __FUNCTION__); + return true; +} + + +void +intelCopyPixels(struct gl_context * ctx, + GLint srcx, GLint srcy, + GLsizei width, GLsizei height, + GLint destx, GLint desty, GLenum type) +{ + DBG("%s\n", __FUNCTION__); + + if (!_mesa_check_conditional_render(ctx)) + return; + + if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type)) + return; + + /* this will use swrast if needed */ + _mesa_meta_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type); +} diff --git a/src/mesa/drivers/dri/i915/intel_pixel_draw.c b/src/mesa/drivers/dri/i915/intel_pixel_draw.c deleted file mode 120000 index 8431a24..0000000 --- a/src/mesa/drivers/dri/i915/intel_pixel_draw.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_pixel_draw.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_pixel_draw.c b/src/mesa/drivers/dri/i915/intel_pixel_draw.c new file mode 100644 index 0000000..2ec7ed8 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_pixel_draw.c @@ -0,0 +1,58 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portionsalloc + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/glheader.h" +#include "main/enums.h" +#include "main/image.h" +#include "main/mtypes.h" +#include "main/teximage.h" +#include "main/texobj.h" +#include "main/texstate.h" +#include "swrast/swrast.h" +#include "drivers/common/meta.h" + +#include "intel_context.h" +#include "intel_pixel.h" + +void +intelDrawPixels(struct gl_context * ctx, + GLint x, GLint y, + GLsizei width, GLsizei height, + GLenum format, + GLenum type, + const struct gl_pixelstore_attrib *unpack, + const GLvoid * pixels) +{ + if (format == GL_STENCIL_INDEX) { + _swrast_DrawPixels(ctx, x, y, width, height, format, type, + unpack, pixels); + return; + } + + _mesa_meta_DrawPixels(ctx, x, y, width, height, format, type, + unpack, pixels); +} diff --git a/src/mesa/drivers/dri/i915/intel_pixel_read.c b/src/mesa/drivers/dri/i915/intel_pixel_read.c deleted file mode 120000 index cc4589f..0000000 --- a/src/mesa/drivers/dri/i915/intel_pixel_read.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_pixel_read.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_pixel_read.c b/src/mesa/drivers/dri/i915/intel_pixel_read.c new file mode 100644 index 0000000..26eb496 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_pixel_read.c @@ -0,0 +1,202 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/glheader.h" +#include "main/enums.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "main/fbobject.h" +#include "main/image.h" +#include "main/bufferobj.h" +#include "main/readpix.h" +#include "main/state.h" + +#include "intel_screen.h" +#include "intel_context.h" +#include "intel_blit.h" +#include "intel_buffers.h" +#include "intel_fbo.h" +#include "intel_mipmap_tree.h" +#include "intel_regions.h" +#include "intel_pixel.h" +#include "intel_buffer_objects.h" + +#define FILE_DEBUG_FLAG DEBUG_PIXEL + +/* For many applications, the new ability to pull the source buffers + * back out of the GTT and then do the packing/conversion operations + * in software will be as much of an improvement as trying to get the + * blitter and/or texture engine to do the work. + * + * This step is gated on private backbuffers. + * + * Obviously the frontbuffer can't be pulled back, so that is either + * an argument for blit/texture readpixels, or for blitting to a + * temporary and then pulling that back. + * + * When the destination is a pbo, however, it's not clear if it is + * ever going to be pulled to main memory (though the access param + * will be a good hint). So it sounds like we do want to be able to + * choose between blit/texture implementation on the gpu and pullback + * and cpu-based copying. + * + * Unless you can magically turn client memory into a PBO for the + * duration of this call, there will be a cpu-based copying step in + * any case. + */ + +static bool +do_blit_readpixels(struct gl_context * ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *pack, GLvoid * pixels) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *dst = intel_buffer_object(pack->BufferObj); + GLuint dst_offset; + drm_intel_bo *dst_buffer; + bool all; + GLint dst_x, dst_y; + GLuint dirty; + + DBG("%s\n", __FUNCTION__); + + assert(_mesa_is_bufferobj(pack->BufferObj)); + + struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + if (ctx->_ImageTransferState || + !_mesa_format_matches_format_and_type(irb->mt->format, format, type, + false)) { + DBG("%s - bad format for blit\n", __FUNCTION__); + return false; + } + + if (pack->SwapBytes || pack->LsbFirst) { + DBG("%s: bad packing params\n", __FUNCTION__); + return false; + } + + int dst_stride = _mesa_image_row_stride(pack, width, format, type); + bool dst_flip = false; + /* Mesa flips the dst_stride for pack->Invert, but we want our mt to have a + * normal dst_stride. + */ + if (pack->Invert) { + dst_stride = -dst_stride; + dst_flip = true; + } + + dst_offset = (GLintptr)pixels; + dst_offset += _mesa_image_offset(2, pack, width, height, + format, type, 0, 0, 0); + + if (!_mesa_clip_copytexsubimage(ctx, + &dst_x, &dst_y, + &x, &y, + &width, &height)) { + return true; + } + + dirty = intel->front_buffer_dirty; + intel_prepare_render(intel); + intel->front_buffer_dirty = dirty; + + all = (width * height * irb->mt->cpp == dst->Base.Size && + x == 0 && dst_offset == 0); + + dst_buffer = intel_bufferobj_buffer(intel, dst, + all ? INTEL_WRITE_FULL : + INTEL_WRITE_PART); + + struct intel_mipmap_tree *pbo_mt = + intel_miptree_create_for_bo(intel, + dst_buffer, + irb->mt->format, + dst_offset, + width, height, + dst_stride, I915_TILING_NONE); + + if (!intel_miptree_blit(intel, + irb->mt, irb->mt_level, irb->mt_layer, + x, y, _mesa_is_winsys_fbo(ctx->ReadBuffer), + pbo_mt, 0, 0, + 0, 0, dst_flip, + width, height, GL_COPY)) { + return false; + } + + intel_miptree_release(&pbo_mt); + + DBG("%s - DONE\n", __FUNCTION__); + + return true; +} + +void +intelReadPixels(struct gl_context * ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *pack, GLvoid * pixels) +{ + struct intel_context *intel = intel_context(ctx); + bool dirty; + + intel_flush_rendering_to_batch(ctx); + + DBG("%s\n", __FUNCTION__); + + if (_mesa_is_bufferobj(pack->BufferObj)) { + /* Using PBOs, so try the BLT based path. */ + if (do_blit_readpixels(ctx, x, y, width, height, format, type, pack, + pixels)) { + return; + } + + perf_debug("%s: fallback to CPU mapping in PBO case\n", __FUNCTION__); + } + + /* glReadPixels() wont dirty the front buffer, so reset the dirty + * flag after calling intel_prepare_render(). */ + dirty = intel->front_buffer_dirty; + intel_prepare_render(intel); + intel->front_buffer_dirty = dirty; + + /* Update Mesa state before calling _mesa_readpixels(). + * XXX this may not be needed since ReadPixels no longer uses the + * span code. + */ + + if (ctx->NewState) + _mesa_update_state(ctx); + + _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels); + + /* There's an intel_prepare_render() call in intelSpanRenderStart(). */ + intel->front_buffer_dirty = dirty; +} diff --git a/src/mesa/drivers/dri/i915/intel_reg.h b/src/mesa/drivers/dri/i915/intel_reg.h new file mode 100644 index 0000000..dd91a15 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_reg.h @@ -0,0 +1,300 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#define CMD_MI (0x0 << 29) +#define CMD_2D (0x2 << 29) +#define CMD_3D (0x3 << 29) + +#define MI_NOOP (CMD_MI | 0) + +#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23) + +#define MI_FLUSH (CMD_MI | (4 << 23)) +#define FLUSH_MAP_CACHE (1 << 0) +#define INHIBIT_FLUSH_RENDER_CACHE (1 << 2) + +#define MI_LOAD_REGISTER_IMM (CMD_MI | (0x22 << 23)) + +#define MI_FLUSH_DW (CMD_MI | (0x26 << 23) | 2) + +/* Stalls command execution waiting for the given events to have occurred. */ +#define MI_WAIT_FOR_EVENT (CMD_MI | (0x3 << 23)) +#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) +#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) + +#define MI_STORE_REGISTER_MEM (CMD_MI | (0x24 << 23)) +# define MI_STORE_REGISTER_MEM_USE_GGTT (1 << 22) + +/* p189 */ +#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 (CMD_3D | (0x1d<<24) | (0x04<<16)) +#define I1_LOAD_S(n) (1<<(4+n)) + +#define _3DSTATE_DRAWRECT_INFO (CMD_3D | (0x1d<<24) | (0x80<<16) | 0x3) + +/** @{ + * + * PIPE_CONTROL operation, a combination MI_FLUSH and register write with + * additional flushing control. + */ +#define _3DSTATE_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24)) +#define PIPE_CONTROL_CS_STALL (1 << 20) +#define PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET (1 << 19) +#define PIPE_CONTROL_TLB_INVALIDATE (1 << 18) +#define PIPE_CONTROL_SYNC_GFDT (1 << 17) +#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1 << 16) +#define PIPE_CONTROL_NO_WRITE (0 << 14) +#define PIPE_CONTROL_WRITE_IMMEDIATE (1 << 14) +#define PIPE_CONTROL_WRITE_DEPTH_COUNT (2 << 14) +#define PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14) +#define PIPE_CONTROL_DEPTH_STALL (1 << 13) +#define PIPE_CONTROL_WRITE_FLUSH (1 << 12) +#define PIPE_CONTROL_INSTRUCTION_FLUSH (1 << 11) +#define PIPE_CONTROL_TC_FLUSH (1 << 10) /* GM45+ only */ +#define PIPE_CONTROL_ISP_DIS (1 << 9) +#define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8) +/* GT */ +#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4) +#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3) +#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2) +#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1) +#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) +#define PIPE_CONTROL_PPGTT_WRITE (0 << 2) +#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2) + +/** @} */ + +/** @{ + * 915 definitions + * + * 915 documents say that bits 31:28 and 1 are "undefined, must be zero." + */ +#define S0_VB_OFFSET_MASK 0x0ffffffc +#define S0_AUTO_CACHE_INV_DISABLE (1<<0) +/** @} */ + +/** @{ + * 830 definitions + */ +#define S0_VB_OFFSET_MASK_830 0xffffff80 +#define S0_VB_PITCH_SHIFT_830 1 +#define S0_VB_ENABLE_830 (1<<0) +/** @} */ + +#define S1_VERTEX_WIDTH_SHIFT 24 +#define S1_VERTEX_WIDTH_MASK (0x3f<<24) +#define S1_VERTEX_PITCH_SHIFT 16 +#define S1_VERTEX_PITCH_MASK (0x3f<<16) + +#define TEXCOORDFMT_2D 0x0 +#define TEXCOORDFMT_3D 0x1 +#define TEXCOORDFMT_4D 0x2 +#define TEXCOORDFMT_1D 0x3 +#define TEXCOORDFMT_2D_16 0x4 +#define TEXCOORDFMT_4D_16 0x5 +#define TEXCOORDFMT_NOT_PRESENT 0xf +#define S2_TEXCOORD_FMT0_MASK 0xf +#define S2_TEXCOORD_FMT1_SHIFT 4 +#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4)) +#define S2_TEXCOORD_NONE (~0) +#define S2_TEX_COUNT_SHIFT_830 12 +#define S2_VERTEX_1_WIDTH_SHIFT_830 0 +#define S2_VERTEX_0_WIDTH_SHIFT_830 6 +/* S3 not interesting */ + +#define S4_POINT_WIDTH_SHIFT 23 +#define S4_POINT_WIDTH_MASK (0x1ff<<23) +#define S4_LINE_WIDTH_SHIFT 19 +#define S4_LINE_WIDTH_ONE (0x2<<19) +#define S4_LINE_WIDTH_MASK (0xf<<19) +#define S4_FLATSHADE_ALPHA (1<<18) +#define S4_FLATSHADE_FOG (1<<17) +#define S4_FLATSHADE_SPECULAR (1<<16) +#define S4_FLATSHADE_COLOR (1<<15) +#define S4_CULLMODE_BOTH (0<<13) +#define S4_CULLMODE_NONE (1<<13) +#define S4_CULLMODE_CW (2<<13) +#define S4_CULLMODE_CCW (3<<13) +#define S4_CULLMODE_MASK (3<<13) +#define S4_VFMT_POINT_WIDTH (1<<12) +#define S4_VFMT_SPEC_FOG (1<<11) +#define S4_VFMT_COLOR (1<<10) +#define S4_VFMT_DEPTH_OFFSET (1<<9) +#define S4_VFMT_XYZ (1<<6) +#define S4_VFMT_XYZW (2<<6) +#define S4_VFMT_XY (3<<6) +#define S4_VFMT_XYW (4<<6) +#define S4_VFMT_XYZW_MASK (7<<6) +#define S4_FORCE_DEFAULT_DIFFUSE (1<<5) +#define S4_FORCE_DEFAULT_SPECULAR (1<<4) +#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3) +#define S4_VFMT_FOG_PARAM (1<<2) +#define S4_SPRITE_POINT_ENABLE (1<<1) +#define S4_LINE_ANTIALIAS_ENABLE (1<<0) + +#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \ + S4_VFMT_SPEC_FOG | \ + S4_VFMT_COLOR | \ + S4_VFMT_DEPTH_OFFSET | \ + S4_VFMT_XYZW_MASK | \ + S4_VFMT_FOG_PARAM) + + +#define S5_WRITEDISABLE_ALPHA (1<<31) +#define S5_WRITEDISABLE_RED (1<<30) +#define S5_WRITEDISABLE_GREEN (1<<29) +#define S5_WRITEDISABLE_BLUE (1<<28) +#define S5_WRITEDISABLE_MASK (0xf<<28) +#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27) +#define S5_LAST_PIXEL_ENABLE (1<<26) +#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25) +#define S5_FOG_ENABLE (1<<24) +#define S5_STENCIL_REF_SHIFT 16 +#define S5_STENCIL_REF_MASK (0xff<<16) +#define S5_STENCIL_TEST_FUNC_SHIFT 13 +#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13) +#define S5_STENCIL_FAIL_SHIFT 10 +#define S5_STENCIL_FAIL_MASK (0x7<<10) +#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7 +#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7) +#define S5_STENCIL_PASS_Z_PASS_SHIFT 4 +#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4) +#define S5_STENCIL_WRITE_ENABLE (1<<3) +#define S5_STENCIL_TEST_ENABLE (1<<2) +#define S5_COLOR_DITHER_ENABLE (1<<1) +#define S5_LOGICOP_ENABLE (1<<0) + + +#define S6_ALPHA_TEST_ENABLE (1<<31) +#define S6_ALPHA_TEST_FUNC_SHIFT 28 +#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28) +#define S6_ALPHA_REF_SHIFT 20 +#define S6_ALPHA_REF_MASK (0xff<<20) +#define S6_DEPTH_TEST_ENABLE (1<<19) +#define S6_DEPTH_TEST_FUNC_SHIFT 16 +#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16) +#define S6_CBUF_BLEND_ENABLE (1<<15) +#define S6_CBUF_BLEND_FUNC_SHIFT 12 +#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12) +#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8 +#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8) +#define S6_CBUF_DST_BLEND_FACT_SHIFT 4 +#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4) +#define S6_DEPTH_WRITE_ENABLE (1<<3) +#define S6_COLOR_WRITE_ENABLE (1<<2) +#define S6_TRISTRIP_PV_SHIFT 0 +#define S6_TRISTRIP_PV_MASK (0x3<<0) + +#define S7_DEPTH_OFFSET_CONST_MASK ~0 + +/* p143 */ +#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) +/* Dword 1 */ +#define BUF_3D_ID_COLOR_BACK (0x3<<24) +#define BUF_3D_ID_DEPTH (0x7<<24) +#define BUF_3D_USE_FENCE (1<<23) +#define BUF_3D_TILED_SURFACE (1<<22) +#define BUF_3D_TILE_WALK_X 0 +#define BUF_3D_TILE_WALK_Y (1<<21) +#define BUF_3D_PITCH(x) (((x)/4)<<2) +/* Dword 2 */ +#define BUF_3D_ADDR(x) ((x) & ~0x3) + +/* Primitive dispatch on 830-945 */ +#define _3DPRIMITIVE (CMD_3D | (0x1f << 24)) +#define PRIM_INDIRECT (1<<23) +#define PRIM_INLINE (0<<23) +#define PRIM_INDIRECT_SEQUENTIAL (0<<17) +#define PRIM_INDIRECT_ELTS (1<<17) + +#define PRIM3D_TRILIST (0x0<<18) +#define PRIM3D_TRISTRIP (0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) +#define PRIM3D_TRIFAN (0x3<<18) +#define PRIM3D_POLY (0x4<<18) +#define PRIM3D_LINELIST (0x5<<18) +#define PRIM3D_LINESTRIP (0x6<<18) +#define PRIM3D_RECTLIST (0x7<<18) +#define PRIM3D_POINTLIST (0x8<<18) +#define PRIM3D_DIB (0x9<<18) +#define PRIM3D_MASK (0x1f<<18) + +#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22)) + +#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22)) + +#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22)) + +#define XY_TEXT_IMMEDIATE_BLIT_CMD (CMD_2D | (0x31 << 22)) +# define XY_TEXT_BYTE_PACKED (1 << 16) + +/* BR00 */ +#define XY_BLT_WRITE_ALPHA (1 << 21) +#define XY_BLT_WRITE_RGB (1 << 20) +#define XY_SRC_TILED (1 << 15) +#define XY_DST_TILED (1 << 11) + +/* BR13 */ +#define BR13_8 (0x0 << 24) +#define BR13_565 (0x1 << 24) +#define BR13_8888 (0x3 << 24) + +#define FENCE_LINEAR 0 +#define FENCE_XMAJOR 1 +#define FENCE_YMAJOR 2 + +/* Pipeline Statistics Counter Registers */ +#define IA_VERTICES_COUNT 0x2310 +#define IA_PRIMITIVES_COUNT 0x2318 +#define VS_INVOCATION_COUNT 0x2320 +#define HS_INVOCATION_COUNT 0x2300 +#define DS_INVOCATION_COUNT 0x2308 +#define GS_INVOCATION_COUNT 0x2328 +#define GS_PRIMITIVES_COUNT 0x2330 +#define CL_INVOCATION_COUNT 0x2338 +#define CL_PRIMITIVES_COUNT 0x2340 +#define PS_INVOCATION_COUNT 0x2348 +#define PS_DEPTH_COUNT 0x2350 + +#define SO_NUM_PRIM_STORAGE_NEEDED 0x2280 +#define SO_PRIM_STORAGE_NEEDED0_IVB 0x5240 +#define SO_PRIM_STORAGE_NEEDED1_IVB 0x5248 +#define SO_PRIM_STORAGE_NEEDED2_IVB 0x5250 +#define SO_PRIM_STORAGE_NEEDED3_IVB 0x5258 + +#define SO_NUM_PRIMS_WRITTEN 0x2288 +#define SO_NUM_PRIMS_WRITTEN0_IVB 0x5200 +#define SO_NUM_PRIMS_WRITTEN1_IVB 0x5208 +#define SO_NUM_PRIMS_WRITTEN2_IVB 0x5210 +#define SO_NUM_PRIMS_WRITTEN3_IVB 0x5218 + +#define GEN7_SO_WRITE_OFFSET(n) (0x5280 + (n) * 4) + +#define TIMESTAMP 0x2358 + +#define BCS_SWCTRL 0x22200 +# define BCS_SWCTRL_SRC_Y (1 << 0) +# define BCS_SWCTRL_DST_Y (1 << 1) diff --git a/src/mesa/drivers/dri/i915/intel_regions.c b/src/mesa/drivers/dri/i915/intel_regions.c deleted file mode 120000 index 89b2f15..0000000 --- a/src/mesa/drivers/dri/i915/intel_regions.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_regions.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_regions.c b/src/mesa/drivers/dri/i915/intel_regions.c new file mode 100644 index 0000000..44f7030 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_regions.c @@ -0,0 +1,353 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Provide additional functionality on top of bufmgr buffers: + * - 2d semantics and blit operations + * - refcounting of buffers for multiple images in a buffer. + * - refcounting of buffer mappings. + * - some logic for moving the buffers to the best memory pools for + * given operations. + * + * Most of this is to make it easier to implement the fixed-layout + * mipmap tree required by intel hardware in the face of GL's + * programming interface where each image can be specifed in random + * order and it isn't clear what layout the tree should have until the + * last moment. + */ + +#include +#include + +#include "main/hash.h" +#include "intel_context.h" +#include "intel_regions.h" +#include "intel_blit.h" +#include "intel_buffer_objects.h" +#include "intel_bufmgr.h" +#include "intel_batchbuffer.h" + +#define FILE_DEBUG_FLAG DEBUG_REGION + +/* This should be set to the maximum backtrace size desired. + * Set it to 0 to disable backtrace debugging. + */ +#define DEBUG_BACKTRACE_SIZE 0 + +#if DEBUG_BACKTRACE_SIZE == 0 +/* Use the standard debug output */ +#define _DBG(...) DBG(__VA_ARGS__) +#else +/* Use backtracing debug output */ +#define _DBG(...) {debug_backtrace(); DBG(__VA_ARGS__);} + +/* Backtracing debug support */ +#include + +static void +debug_backtrace(void) +{ + void *trace[DEBUG_BACKTRACE_SIZE]; + char **strings = NULL; + int traceSize; + register int i; + + traceSize = backtrace(trace, DEBUG_BACKTRACE_SIZE); + strings = backtrace_symbols(trace, traceSize); + if (strings == NULL) { + DBG("no backtrace:"); + return; + } + + /* Spit out all the strings with a colon separator. Ignore + * the first, since we don't really care about the call + * to debug_backtrace() itself. Skip until the final "/" in + * the trace to avoid really long lines. + */ + for (i = 1; i < traceSize; i++) { + char *p = strings[i], *slash = strings[i]; + while (*p) { + if (*p++ == '/') { + slash = p; + } + } + + DBG("%s:", slash); + } + + /* Free up the memory, and we're done */ + free(strings); +} + +#endif + +static struct intel_region * +intel_region_alloc_internal(struct intel_screen *screen, + GLuint cpp, + GLuint width, GLuint height, GLuint pitch, + uint32_t tiling, drm_intel_bo *buffer) +{ + struct intel_region *region; + + region = calloc(sizeof(*region), 1); + if (region == NULL) + return region; + + region->cpp = cpp; + region->width = width; + region->height = height; + region->pitch = pitch; + region->refcount = 1; + region->bo = buffer; + region->tiling = tiling; + + _DBG("%s <-- %p\n", __FUNCTION__, region); + return region; +} + +struct intel_region * +intel_region_alloc(struct intel_screen *screen, + uint32_t tiling, + GLuint cpp, GLuint width, GLuint height, + bool expect_accelerated_upload) +{ + drm_intel_bo *buffer; + unsigned long flags = 0; + unsigned long aligned_pitch; + struct intel_region *region; + + if (expect_accelerated_upload) + flags |= BO_ALLOC_FOR_RENDER; + + buffer = drm_intel_bo_alloc_tiled(screen->bufmgr, "region", + width, height, cpp, + &tiling, &aligned_pitch, flags); + if (buffer == NULL) + return NULL; + + region = intel_region_alloc_internal(screen, cpp, width, height, + aligned_pitch, tiling, buffer); + if (region == NULL) { + drm_intel_bo_unreference(buffer); + return NULL; + } + + return region; +} + +bool +intel_region_flink(struct intel_region *region, uint32_t *name) +{ + if (region->name == 0) { + if (drm_intel_bo_flink(region->bo, ®ion->name)) + return false; + } + + *name = region->name; + + return true; +} + +struct intel_region * +intel_region_alloc_for_handle(struct intel_screen *screen, + GLuint cpp, + GLuint width, GLuint height, GLuint pitch, + GLuint handle, const char *name) +{ + struct intel_region *region; + drm_intel_bo *buffer; + int ret; + uint32_t bit_6_swizzle, tiling; + + buffer = intel_bo_gem_create_from_name(screen->bufmgr, name, handle); + if (buffer == NULL) + return NULL; + ret = drm_intel_bo_get_tiling(buffer, &tiling, &bit_6_swizzle); + if (ret != 0) { + fprintf(stderr, "Couldn't get tiling of buffer %d (%s): %s\n", + handle, name, strerror(-ret)); + drm_intel_bo_unreference(buffer); + return NULL; + } + + region = intel_region_alloc_internal(screen, cpp, + width, height, pitch, tiling, buffer); + if (region == NULL) { + drm_intel_bo_unreference(buffer); + return NULL; + } + + region->name = handle; + + return region; +} + +struct intel_region * +intel_region_alloc_for_fd(struct intel_screen *screen, + GLuint cpp, + GLuint width, GLuint height, GLuint pitch, + int fd, const char *name) +{ + struct intel_region *region; + drm_intel_bo *buffer; + int ret; + uint32_t bit_6_swizzle, tiling; + + buffer = drm_intel_bo_gem_create_from_prime(screen->bufmgr, + fd, height * pitch); + if (buffer == NULL) + return NULL; + ret = drm_intel_bo_get_tiling(buffer, &tiling, &bit_6_swizzle); + if (ret != 0) { + fprintf(stderr, "Couldn't get tiling of buffer (%s): %s\n", + name, strerror(-ret)); + drm_intel_bo_unreference(buffer); + return NULL; + } + + region = intel_region_alloc_internal(screen, cpp, + width, height, pitch, tiling, buffer); + if (region == NULL) { + drm_intel_bo_unreference(buffer); + return NULL; + } + + return region; +} + +void +intel_region_reference(struct intel_region **dst, struct intel_region *src) +{ + _DBG("%s: %p(%d) -> %p(%d)\n", __FUNCTION__, + *dst, *dst ? (*dst)->refcount : 0, src, src ? src->refcount : 0); + + if (src != *dst) { + if (*dst) + intel_region_release(dst); + + if (src) + src->refcount++; + *dst = src; + } +} + +void +intel_region_release(struct intel_region **region_handle) +{ + struct intel_region *region = *region_handle; + + if (region == NULL) { + _DBG("%s NULL\n", __FUNCTION__); + return; + } + + _DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1); + + ASSERT(region->refcount > 0); + region->refcount--; + + if (region->refcount == 0) { + drm_intel_bo_unreference(region->bo); + + free(region); + } + *region_handle = NULL; +} + +/** + * This function computes masks that may be used to select the bits of the X + * and Y coordinates that indicate the offset within a tile. If the region is + * untiled, the masks are set to 0. + */ +void +intel_region_get_tile_masks(struct intel_region *region, + uint32_t *mask_x, uint32_t *mask_y, + bool map_stencil_as_y_tiled) +{ + int cpp = region->cpp; + uint32_t tiling = region->tiling; + + if (map_stencil_as_y_tiled) + tiling = I915_TILING_Y; + + switch (tiling) { + default: + assert(false); + case I915_TILING_NONE: + *mask_x = *mask_y = 0; + break; + case I915_TILING_X: + *mask_x = 512 / cpp - 1; + *mask_y = 7; + break; + case I915_TILING_Y: + *mask_x = 128 / cpp - 1; + *mask_y = 31; + break; + } +} + +/** + * Compute the offset (in bytes) from the start of the region to the given x + * and y coordinate. For tiled regions, caller must ensure that x and y are + * multiples of the tile size. + */ +uint32_t +intel_region_get_aligned_offset(struct intel_region *region, uint32_t x, + uint32_t y, bool map_stencil_as_y_tiled) +{ + int cpp = region->cpp; + uint32_t pitch = region->pitch; + uint32_t tiling = region->tiling; + + if (map_stencil_as_y_tiled) { + tiling = I915_TILING_Y; + + /* When mapping a W-tiled stencil buffer as Y-tiled, each 64-high W-tile + * gets transformed into a 32-high Y-tile. Accordingly, the pitch of + * the resulting region is twice the pitch of the original region, since + * each row in the Y-tiled view corresponds to two rows in the actual + * W-tiled surface. So we need to correct the pitch before computing + * the offsets. + */ + pitch *= 2; + } + + switch (tiling) { + default: + assert(false); + case I915_TILING_NONE: + return y * pitch + x * cpp; + case I915_TILING_X: + assert((x % (512 / cpp)) == 0); + assert((y % 8) == 0); + return y * pitch + x / (512 / cpp) * 4096; + case I915_TILING_Y: + assert((x % (128 / cpp)) == 0); + assert((y % 32) == 0); + return y * pitch + x / (128 / cpp) * 4096; + } +} diff --git a/src/mesa/drivers/dri/i915/intel_regions.h b/src/mesa/drivers/dri/i915/intel_regions.h new file mode 100644 index 0000000..1fb6b27 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_regions.h @@ -0,0 +1,161 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_REGIONS_H +#define INTEL_REGIONS_H + +/** @file intel_regions.h + * + * Structure definitions and prototypes for intel_region handling, + * which is the basic structure for rectangular collections of pixels + * stored in a drm_intel_bo. + */ + +#include +#include + +#include "main/mtypes.h" +#include "intel_bufmgr.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct intel_context; +struct intel_screen; +struct intel_buffer_object; + +/** + * A layer on top of the bufmgr buffers that adds a few useful things: + * + * - Refcounting for local buffer references. + * - Refcounting for buffer maps + * - Buffer dimensions - pitch and height. + * - Blitter commands for copying 2D regions between buffers. (really???) + */ +struct intel_region +{ + drm_intel_bo *bo; /**< buffer manager's buffer */ + GLuint refcount; /**< Reference count for region */ + GLuint cpp; /**< bytes per pixel */ + GLuint width; /**< in pixels */ + GLuint height; /**< in pixels */ + GLuint pitch; /**< in bytes */ + + uint32_t tiling; /**< Which tiling mode the region is in */ + + uint32_t name; /**< Global name for the bo */ +}; + + +/* Allocate a refcounted region. Pointers to regions should only be + * copied by calling intel_reference_region(). + */ +struct intel_region *intel_region_alloc(struct intel_screen *screen, + uint32_t tiling, + GLuint cpp, GLuint width, + GLuint height, + bool expect_accelerated_upload); + +struct intel_region * +intel_region_alloc_for_handle(struct intel_screen *screen, + GLuint cpp, + GLuint width, GLuint height, GLuint pitch, + unsigned int handle, const char *name); + +struct intel_region * +intel_region_alloc_for_fd(struct intel_screen *screen, + GLuint cpp, + GLuint width, GLuint height, GLuint pitch, + int fd, const char *name); + +bool +intel_region_flink(struct intel_region *region, uint32_t *name); + +void intel_region_reference(struct intel_region **dst, + struct intel_region *src); + +void intel_region_release(struct intel_region **ib); + +void intel_recreate_static_regions(struct intel_context *intel); + +void +intel_region_get_tile_masks(struct intel_region *region, + uint32_t *mask_x, uint32_t *mask_y, + bool map_stencil_as_y_tiled); + +uint32_t +intel_region_get_aligned_offset(struct intel_region *region, uint32_t x, + uint32_t y, bool map_stencil_as_y_tiled); + +/** + * Used with images created with image_from_names + * to help support planar images. + */ +struct intel_image_format { + int fourcc; + int components; + int nplanes; + struct { + int buffer_index; + int width_shift; + int height_shift; + uint32_t dri_format; + int cpp; + } planes[3]; +}; + +struct __DRIimageRec { + struct intel_region *region; + GLenum internal_format; + uint32_t dri_format; + GLuint format; + uint32_t offset; + + /* + * Need to save these here between calls to + * image_from_names and calls to image_from_planar. + */ + uint32_t strides[3]; + uint32_t offsets[3]; + struct intel_image_format *planar_format; + + /* particular miptree level */ + GLuint width; + GLuint height; + GLuint tile_x; + GLuint tile_y; + bool has_depthstencil; + + void *data; +}; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/mesa/drivers/dri/i915/intel_resolve_map.c b/src/mesa/drivers/dri/i915/intel_resolve_map.c deleted file mode 120000 index 77e50fb..0000000 --- a/src/mesa/drivers/dri/i915/intel_resolve_map.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_resolve_map.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_resolve_map.c b/src/mesa/drivers/dri/i915/intel_resolve_map.c new file mode 100644 index 0000000..04b5c94 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_resolve_map.c @@ -0,0 +1,111 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "intel_resolve_map.h" + +#include +#include + +/** + * \brief Set that the miptree slice at (level, layer) needs a resolve. + * + * If a map element already exists with the given key, then the value is + * changed to the given value of \c need. + */ +void +intel_resolve_map_set(struct intel_resolve_map *head, + uint32_t level, + uint32_t layer, + enum gen6_hiz_op need) +{ + struct intel_resolve_map **tail = &head->next; + struct intel_resolve_map *prev = head; + + while (*tail) { + if ((*tail)->level == level && (*tail)->layer == layer) { + (*tail)->need = need; + return; + } + prev = *tail; + tail = &(*tail)->next; + } + + *tail = malloc(sizeof(**tail)); + (*tail)->prev = prev; + (*tail)->next = NULL; + (*tail)->level = level; + (*tail)->layer = layer; + (*tail)->need = need; +} + +/** + * \brief Get an element from the map. + * \return null if element is not contained in map. + */ +struct intel_resolve_map* +intel_resolve_map_get(struct intel_resolve_map *head, + uint32_t level, + uint32_t layer) +{ + struct intel_resolve_map *item = head->next; + + while (item) { + if (item->level == level && item->layer == layer) + break; + else + item = item->next; + } + + return item; +} + +/** + * \brief Remove and free an element from the map. + */ +void +intel_resolve_map_remove(struct intel_resolve_map *elem) +{ + if (elem->prev) + elem->prev->next = elem->next; + if (elem->next) + elem->next->prev = elem->prev; + free(elem); +} + +/** + * \brief Remove and free all elements of the map. + */ +void +intel_resolve_map_clear(struct intel_resolve_map *head) +{ + struct intel_resolve_map *next = head->next; + struct intel_resolve_map *trash; + + while (next) { + trash = next; + next = next->next; + free(trash); + } + + head->next = NULL; +} diff --git a/src/mesa/drivers/dri/i915/intel_resolve_map.h b/src/mesa/drivers/dri/i915/intel_resolve_map.h new file mode 100644 index 0000000..8504271 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_resolve_map.h @@ -0,0 +1,104 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * For an overview of the HiZ operations, see the following sections of the + * Sandy Bridge PRM, Volume 1, Part2: + * - 7.5.3.1 Depth Buffer Clear + * - 7.5.3.2 Depth Buffer Resolve + * - 7.5.3.3 Hierarchical Depth Buffer Resolve + * + * Of these, two get entered in the resolve map as needing to be done to the + * buffer: depth resolve and hiz resolve. + */ +enum gen6_hiz_op { + GEN6_HIZ_OP_DEPTH_CLEAR, + GEN6_HIZ_OP_DEPTH_RESOLVE, + GEN6_HIZ_OP_HIZ_RESOLVE, + GEN6_HIZ_OP_NONE, +}; + +/** + * \brief Map of miptree slices to needed resolves. + * + * The map is implemented as a linear doubly-linked list. + * + * In the intel_resolve_map*() functions, the \c head argument is not + * inspected for its data. It only serves as an anchor for the list. + * + * \par Design Discussion + * + * There are two possible ways to record which miptree slices need + * resolves. 1) Maintain a flag for every miptree slice in the texture, + * likely in intel_mipmap_level::slice, or 2) maintain a list of only + * those slices that need a resolve. + * + * Immediately before drawing, a full depth resolve performed on each + * enabled depth texture. If design 1 were chosen, then at each draw call + * it would be necessary to iterate over each miptree slice of each + * enabled depth texture in order to query if each slice needed a resolve. + * In the worst case, this would require 2^16 iterations: 16 texture + * units, 16 miplevels, and 256 depth layers (assuming maximums for OpenGL + * 2.1). + * + * By choosing design 2, the number of iterations is exactly the minimum + * necessary. + */ +struct intel_resolve_map { + uint32_t level; + uint32_t layer; + enum gen6_hiz_op need; + + struct intel_resolve_map *next; + struct intel_resolve_map *prev; +}; + +void +intel_resolve_map_set(struct intel_resolve_map *head, + uint32_t level, + uint32_t layer, + enum gen6_hiz_op need); + +struct intel_resolve_map* +intel_resolve_map_get(struct intel_resolve_map *head, + uint32_t level, + uint32_t layer); + +void +intel_resolve_map_remove(struct intel_resolve_map *elem); + +void +intel_resolve_map_clear(struct intel_resolve_map *head); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + diff --git a/src/mesa/drivers/dri/i915/intel_screen.c b/src/mesa/drivers/dri/i915/intel_screen.c deleted file mode 120000 index f2db482..0000000 --- a/src/mesa/drivers/dri/i915/intel_screen.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_screen.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_screen.c b/src/mesa/drivers/dri/i915/intel_screen.c new file mode 100644 index 0000000..60a69a6 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_screen.c @@ -0,0 +1,1409 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include +#include +#include "main/glheader.h" +#include "main/context.h" +#include "main/framebuffer.h" +#include "main/renderbuffer.h" +#include "main/texobj.h" +#include "main/hash.h" +#include "main/fbobject.h" +#include "main/version.h" +#include "swrast/s_renderbuffer.h" + +#include "utils.h" +#include "xmlpool.h" + +PUBLIC const char __driConfigOptions[] = + DRI_CONF_BEGIN + DRI_CONF_SECTION_PERFORMANCE + DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_ALWAYS_SYNC) + /* Options correspond to DRI_CONF_BO_REUSE_DISABLED, + * DRI_CONF_BO_REUSE_ALL + */ + DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 1, "0:1") + DRI_CONF_DESC_BEGIN(en, "Buffer object reuse") + DRI_CONF_ENUM(0, "Disable buffer object reuse") + DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects") + DRI_CONF_DESC_END + DRI_CONF_OPT_END + + DRI_CONF_OPT_BEGIN_B(hiz, "true") + DRI_CONF_DESC(en, "Enable Hierarchical Z on gen6+") + DRI_CONF_OPT_END + + DRI_CONF_OPT_BEGIN_B(early_z, "false") + DRI_CONF_DESC(en, "Enable early Z in classic mode (unstable, 945-only).") + DRI_CONF_OPT_END + + DRI_CONF_SECTION_END + DRI_CONF_SECTION_QUALITY + DRI_CONF_FORCE_S3TC_ENABLE("false") + DRI_CONF_ALLOW_LARGE_TEXTURES(2) + DRI_CONF_SECTION_END + DRI_CONF_SECTION_DEBUG + DRI_CONF_NO_RAST("false") + DRI_CONF_ALWAYS_FLUSH_BATCH("false") + DRI_CONF_ALWAYS_FLUSH_CACHE("false") + DRI_CONF_DISABLE_THROTTLING("false") + DRI_CONF_FORCE_GLSL_EXTENSIONS_WARN("false") + DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS("false") + DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED("false") + + DRI_CONF_OPT_BEGIN_B(shader_precompile, "true") + DRI_CONF_DESC(en, "Perform code generation at shader link time.") + DRI_CONF_OPT_END + DRI_CONF_SECTION_END +DRI_CONF_END; + +const GLuint __driNConfigOptions = 14; + +#include "intel_batchbuffer.h" +#include "intel_buffers.h" +#include "intel_bufmgr.h" +#include "intel_chipset.h" +#include "intel_fbo.h" +#include "intel_mipmap_tree.h" +#include "intel_screen.h" +#include "intel_tex.h" +#include "intel_regions.h" + +#ifndef I915 +#include "brw_context.h" +#endif + +#include "i915_drm.h" + +#ifdef USE_NEW_INTERFACE +static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; +#endif /*USE_NEW_INTERFACE */ + +/** + * For debugging purposes, this returns a time in seconds. + */ +double +get_time(void) +{ + struct timespec tp; + + clock_gettime(CLOCK_MONOTONIC, &tp); + + return tp.tv_sec + tp.tv_nsec / 1000000000.0; +} + +void +aub_dump_bmp(struct gl_context *ctx) +{ + struct gl_framebuffer *fb = ctx->DrawBuffer; + + for (int i = 0; i < fb->_NumColorDrawBuffers; i++) { + struct intel_renderbuffer *irb = + intel_renderbuffer(fb->_ColorDrawBuffers[i]); + + if (irb && irb->mt) { + enum aub_dump_bmp_format format; + + switch (irb->Base.Base.Format) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + format = AUB_DUMP_BMP_FORMAT_ARGB_8888; + break; + default: + continue; + } + + assert(irb->mt->region->pitch % irb->mt->region->cpp == 0); + drm_intel_gem_bo_aub_dump_bmp(irb->mt->region->bo, + irb->draw_x, + irb->draw_y, + irb->Base.Base.Width, + irb->Base.Base.Height, + format, + irb->mt->region->pitch, + 0); + } + } +} + +static const __DRItexBufferExtension intelTexBufferExtension = { + .base = { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION }, + + .setTexBuffer = intelSetTexBuffer, + .setTexBuffer2 = intelSetTexBuffer2, + .releaseTexBuffer = NULL, +}; + +static void +intelDRI2Flush(__DRIdrawable *drawable) +{ + GET_CURRENT_CONTEXT(ctx); + struct intel_context *intel = intel_context(ctx); + if (intel == NULL) + return; + + if (intel->gen < 4) + INTEL_FIREVERTICES(intel); + + intel_resolve_for_dri2_flush(intel, drawable); + intel->need_throttle = true; + + if (intel->batch.used) + intel_batchbuffer_flush(intel); + + if (INTEL_DEBUG & DEBUG_AUB) { + aub_dump_bmp(ctx); + } +} + +static const struct __DRI2flushExtensionRec intelFlushExtension = { + .base = { __DRI2_FLUSH, 3 }, + + .flush = intelDRI2Flush, + .invalidate = dri2InvalidateDrawable, +}; + +static struct intel_image_format intel_image_formats[] = { + { __DRI_IMAGE_FOURCC_ARGB8888, __DRI_IMAGE_COMPONENTS_RGBA, 1, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB8888, 4 } } }, + + { __DRI_IMAGE_FOURCC_XRGB8888, __DRI_IMAGE_COMPONENTS_RGB, 1, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_XRGB8888, 4 }, } }, + + { __DRI_IMAGE_FOURCC_YUV410, __DRI_IMAGE_COMPONENTS_Y_U_V, 3, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, + { 1, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 }, + { 2, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 } } }, + + { __DRI_IMAGE_FOURCC_YUV411, __DRI_IMAGE_COMPONENTS_Y_U_V, 3, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, + { 1, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 }, + { 2, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 } } }, + + { __DRI_IMAGE_FOURCC_YUV420, __DRI_IMAGE_COMPONENTS_Y_U_V, 3, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, + { 1, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 }, + { 2, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 } } }, + + { __DRI_IMAGE_FOURCC_YUV422, __DRI_IMAGE_COMPONENTS_Y_U_V, 3, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, + { 1, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 }, + { 2, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 } } }, + + { __DRI_IMAGE_FOURCC_YUV444, __DRI_IMAGE_COMPONENTS_Y_U_V, 3, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, + { 1, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, + { 2, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 } } }, + + { __DRI_IMAGE_FOURCC_NV12, __DRI_IMAGE_COMPONENTS_Y_UV, 2, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, + { 1, 1, 1, __DRI_IMAGE_FORMAT_GR88, 2 } } }, + + { __DRI_IMAGE_FOURCC_NV16, __DRI_IMAGE_COMPONENTS_Y_UV, 2, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, + { 1, 1, 0, __DRI_IMAGE_FORMAT_GR88, 2 } } }, + + /* For YUYV buffers, we set up two overlapping DRI images and treat + * them as planar buffers in the compositors. Plane 0 is GR88 and + * samples YU or YV pairs and places Y into the R component, while + * plane 1 is ARGB and samples YUYV clusters and places pairs and + * places U into the G component and V into A. This lets the + * texture sampler interpolate the Y components correctly when + * sampling from plane 0, and interpolate U and V correctly when + * sampling from plane 1. */ + { __DRI_IMAGE_FOURCC_YUYV, __DRI_IMAGE_COMPONENTS_Y_XUXV, 2, + { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 }, + { 0, 1, 0, __DRI_IMAGE_FORMAT_ARGB8888, 4 } } } +}; + +static __DRIimage * +intel_allocate_image(int dri_format, void *loaderPrivate) +{ + __DRIimage *image; + + image = calloc(1, sizeof *image); + if (image == NULL) + return NULL; + + image->dri_format = dri_format; + image->offset = 0; + + switch (dri_format) { + case __DRI_IMAGE_FORMAT_RGB565: + image->format = MESA_FORMAT_RGB565; + break; + case __DRI_IMAGE_FORMAT_XRGB8888: + image->format = MESA_FORMAT_XRGB8888; + break; + case __DRI_IMAGE_FORMAT_ARGB8888: + image->format = MESA_FORMAT_ARGB8888; + break; + case __DRI_IMAGE_FORMAT_ABGR8888: + image->format = MESA_FORMAT_RGBA8888_REV; + break; + case __DRI_IMAGE_FORMAT_XBGR8888: + image->format = MESA_FORMAT_RGBX8888_REV; + break; + case __DRI_IMAGE_FORMAT_R8: + image->format = MESA_FORMAT_R8; + break; + case __DRI_IMAGE_FORMAT_GR88: + image->format = MESA_FORMAT_GR88; + break; + case __DRI_IMAGE_FORMAT_NONE: + image->format = MESA_FORMAT_NONE; + break; + default: + free(image); + return NULL; + } + + image->internal_format = _mesa_get_format_base_format(image->format); + image->data = loaderPrivate; + + return image; +} + +/** + * Sets up a DRIImage structure to point to our shared image in a region + */ +static void +intel_setup_image_from_mipmap_tree(struct intel_context *intel, __DRIimage *image, + struct intel_mipmap_tree *mt, GLuint level, + GLuint zoffset) +{ + unsigned int draw_x, draw_y; + uint32_t mask_x, mask_y; + + intel_miptree_make_shareable(intel, mt); + + intel_miptree_check_level_layer(mt, level, zoffset); + + intel_region_get_tile_masks(mt->region, &mask_x, &mask_y, false); + intel_miptree_get_image_offset(mt, level, zoffset, &draw_x, &draw_y); + + image->width = mt->level[level].width; + image->height = mt->level[level].height; + image->tile_x = draw_x & mask_x; + image->tile_y = draw_y & mask_y; + + image->offset = intel_region_get_aligned_offset(mt->region, + draw_x & ~mask_x, + draw_y & ~mask_y, + false); + + intel_region_reference(&image->region, mt->region); +} + +static void +intel_setup_image_from_dimensions(__DRIimage *image) +{ + image->width = image->region->width; + image->height = image->region->height; + image->tile_x = 0; + image->tile_y = 0; + image->has_depthstencil = false; +} + +static inline uint32_t +intel_dri_format(GLuint format) +{ + switch (format) { + case MESA_FORMAT_RGB565: + return __DRI_IMAGE_FORMAT_RGB565; + case MESA_FORMAT_XRGB8888: + return __DRI_IMAGE_FORMAT_XRGB8888; + case MESA_FORMAT_ARGB8888: + return __DRI_IMAGE_FORMAT_ARGB8888; + case MESA_FORMAT_RGBA8888_REV: + return __DRI_IMAGE_FORMAT_ABGR8888; + case MESA_FORMAT_R8: + return __DRI_IMAGE_FORMAT_R8; + case MESA_FORMAT_RG88: + return __DRI_IMAGE_FORMAT_GR88; + } + + return MESA_FORMAT_NONE; +} + +static __DRIimage * +intel_create_image_from_name(__DRIscreen *screen, + int width, int height, int format, + int name, int pitch, void *loaderPrivate) +{ + struct intel_screen *intelScreen = screen->driverPrivate; + __DRIimage *image; + int cpp; + + image = intel_allocate_image(format, loaderPrivate); + if (image == NULL) + return NULL; + + if (image->format == MESA_FORMAT_NONE) + cpp = 1; + else + cpp = _mesa_get_format_bytes(image->format); + image->region = intel_region_alloc_for_handle(intelScreen, + cpp, width, height, + pitch * cpp, name, "image"); + if (image->region == NULL) { + free(image); + return NULL; + } + + intel_setup_image_from_dimensions(image); + + return image; +} + +static __DRIimage * +intel_create_image_from_renderbuffer(__DRIcontext *context, + int renderbuffer, void *loaderPrivate) +{ + __DRIimage *image; + struct intel_context *intel = context->driverPrivate; + struct gl_renderbuffer *rb; + struct intel_renderbuffer *irb; + + rb = _mesa_lookup_renderbuffer(&intel->ctx, renderbuffer); + if (!rb) { + _mesa_error(&intel->ctx, + GL_INVALID_OPERATION, "glRenderbufferExternalMESA"); + return NULL; + } + + irb = intel_renderbuffer(rb); + intel_miptree_make_shareable(intel, irb->mt); + image = calloc(1, sizeof *image); + if (image == NULL) + return NULL; + + image->internal_format = rb->InternalFormat; + image->format = rb->Format; + image->offset = 0; + image->data = loaderPrivate; + intel_region_reference(&image->region, irb->mt->region); + intel_setup_image_from_dimensions(image); + image->dri_format = intel_dri_format(image->format); + image->has_depthstencil = irb->mt->stencil_mt? true : false; + + rb->NeedsFinishRenderTexture = true; + return image; +} + +static __DRIimage * +intel_create_image_from_texture(__DRIcontext *context, int target, + unsigned texture, int zoffset, + int level, + unsigned *error, + void *loaderPrivate) +{ + __DRIimage *image; + struct intel_context *intel = context->driverPrivate; + struct gl_texture_object *obj; + struct intel_texture_object *iobj; + GLuint face = 0; + + obj = _mesa_lookup_texture(&intel->ctx, texture); + if (!obj || obj->Target != target) { + *error = __DRI_IMAGE_ERROR_BAD_PARAMETER; + return NULL; + } + + if (target == GL_TEXTURE_CUBE_MAP) + face = zoffset; + + _mesa_test_texobj_completeness(&intel->ctx, obj); + iobj = intel_texture_object(obj); + if (!obj->_BaseComplete || (level > 0 && !obj->_MipmapComplete)) { + *error = __DRI_IMAGE_ERROR_BAD_PARAMETER; + return NULL; + } + + if (level < obj->BaseLevel || level > obj->_MaxLevel) { + *error = __DRI_IMAGE_ERROR_BAD_MATCH; + return NULL; + } + + if (target == GL_TEXTURE_3D && obj->Image[face][level]->Depth < zoffset) { + *error = __DRI_IMAGE_ERROR_BAD_MATCH; + return NULL; + } + image = calloc(1, sizeof *image); + if (image == NULL) { + *error = __DRI_IMAGE_ERROR_BAD_ALLOC; + return NULL; + } + + image->internal_format = obj->Image[face][level]->InternalFormat; + image->format = obj->Image[face][level]->TexFormat; + image->data = loaderPrivate; + intel_setup_image_from_mipmap_tree(intel, image, iobj->mt, level, zoffset); + image->dri_format = intel_dri_format(image->format); + image->has_depthstencil = iobj->mt->stencil_mt? true : false; + if (image->dri_format == MESA_FORMAT_NONE) { + *error = __DRI_IMAGE_ERROR_BAD_PARAMETER; + free(image); + return NULL; + } + + *error = __DRI_IMAGE_ERROR_SUCCESS; + return image; +} + +static void +intel_destroy_image(__DRIimage *image) +{ + intel_region_release(&image->region); + free(image); +} + +static __DRIimage * +intel_create_image(__DRIscreen *screen, + int width, int height, int format, + unsigned int use, + void *loaderPrivate) +{ + __DRIimage *image; + struct intel_screen *intelScreen = screen->driverPrivate; + uint32_t tiling; + int cpp; + + tiling = I915_TILING_X; + if (use & __DRI_IMAGE_USE_CURSOR) { + if (width != 64 || height != 64) + return NULL; + tiling = I915_TILING_NONE; + } + + image = intel_allocate_image(format, loaderPrivate); + if (image == NULL) + return NULL; + + cpp = _mesa_get_format_bytes(image->format); + image->region = + intel_region_alloc(intelScreen, tiling, cpp, width, height, true); + if (image->region == NULL) { + free(image); + return NULL; + } + + intel_setup_image_from_dimensions(image); + + return image; +} + +static GLboolean +intel_query_image(__DRIimage *image, int attrib, int *value) +{ + switch (attrib) { + case __DRI_IMAGE_ATTRIB_STRIDE: + *value = image->region->pitch; + return true; + case __DRI_IMAGE_ATTRIB_HANDLE: + *value = image->region->bo->handle; + return true; + case __DRI_IMAGE_ATTRIB_NAME: + return intel_region_flink(image->region, (uint32_t *) value); + case __DRI_IMAGE_ATTRIB_FORMAT: + *value = image->dri_format; + return true; + case __DRI_IMAGE_ATTRIB_WIDTH: + *value = image->region->width; + return true; + case __DRI_IMAGE_ATTRIB_HEIGHT: + *value = image->region->height; + return true; + case __DRI_IMAGE_ATTRIB_COMPONENTS: + if (image->planar_format == NULL) + return false; + *value = image->planar_format->components; + return true; + case __DRI_IMAGE_ATTRIB_FD: + if (drm_intel_bo_gem_export_to_prime(image->region->bo, value) == 0) + return true; + return false; + default: + return false; + } +} + +static __DRIimage * +intel_dup_image(__DRIimage *orig_image, void *loaderPrivate) +{ + __DRIimage *image; + + image = calloc(1, sizeof *image); + if (image == NULL) + return NULL; + + intel_region_reference(&image->region, orig_image->region); + if (image->region == NULL) { + free(image); + return NULL; + } + + image->internal_format = orig_image->internal_format; + image->planar_format = orig_image->planar_format; + image->dri_format = orig_image->dri_format; + image->format = orig_image->format; + image->offset = orig_image->offset; + image->width = orig_image->width; + image->height = orig_image->height; + image->tile_x = orig_image->tile_x; + image->tile_y = orig_image->tile_y; + image->has_depthstencil = orig_image->has_depthstencil; + image->data = loaderPrivate; + + memcpy(image->strides, orig_image->strides, sizeof(image->strides)); + memcpy(image->offsets, orig_image->offsets, sizeof(image->offsets)); + + return image; +} + +static GLboolean +intel_validate_usage(__DRIimage *image, unsigned int use) +{ + if (use & __DRI_IMAGE_USE_CURSOR) { + if (image->region->width != 64 || image->region->height != 64) + return GL_FALSE; + } + + return GL_TRUE; +} + +static __DRIimage * +intel_create_image_from_names(__DRIscreen *screen, + int width, int height, int fourcc, + int *names, int num_names, + int *strides, int *offsets, + void *loaderPrivate) +{ + struct intel_image_format *f = NULL; + __DRIimage *image; + int i, index; + + if (screen == NULL || names == NULL || num_names != 1) + return NULL; + + for (i = 0; i < ARRAY_SIZE(intel_image_formats); i++) { + if (intel_image_formats[i].fourcc == fourcc) { + f = &intel_image_formats[i]; + } + } + + if (f == NULL) + return NULL; + + image = intel_create_image_from_name(screen, width, height, + __DRI_IMAGE_FORMAT_NONE, + names[0], strides[0], + loaderPrivate); + + if (image == NULL) + return NULL; + + image->planar_format = f; + for (i = 0; i < f->nplanes; i++) { + index = f->planes[i].buffer_index; + image->offsets[index] = offsets[index]; + image->strides[index] = strides[index]; + } + + return image; +} + +static __DRIimage * +intel_create_image_from_fds(__DRIscreen *screen, + int width, int height, int fourcc, + int *fds, int num_fds, int *strides, int *offsets, + void *loaderPrivate) +{ + struct intel_screen *intelScreen = screen->driverPrivate; + struct intel_image_format *f = NULL; + __DRIimage *image; + int i, index; + + if (fds == NULL || num_fds != 1) + return NULL; + + for (i = 0; i < ARRAY_SIZE(intel_image_formats); i++) { + if (intel_image_formats[i].fourcc == fourcc) { + f = &intel_image_formats[i]; + } + } + + if (f == NULL) + return NULL; + + image = intel_allocate_image(__DRI_IMAGE_FORMAT_NONE, loaderPrivate); + if (image == NULL) + return NULL; + + image->region = intel_region_alloc_for_fd(intelScreen, + 1, width, height, + strides[0], fds[0], "image"); + if (image->region == NULL) { + free(image); + return NULL; + } + + image->planar_format = f; + for (i = 0; i < f->nplanes; i++) { + index = f->planes[i].buffer_index; + image->offsets[index] = offsets[index]; + image->strides[index] = strides[index]; + } + + return image; +} + + +static __DRIimage * +intel_from_planar(__DRIimage *parent, int plane, void *loaderPrivate) +{ + int width, height, offset, stride, dri_format, index; + struct intel_image_format *f; + uint32_t mask_x, mask_y; + __DRIimage *image; + + if (parent == NULL || parent->planar_format == NULL) + return NULL; + + f = parent->planar_format; + + if (plane >= f->nplanes) + return NULL; + + width = parent->region->width >> f->planes[plane].width_shift; + height = parent->region->height >> f->planes[plane].height_shift; + dri_format = f->planes[plane].dri_format; + index = f->planes[plane].buffer_index; + offset = parent->offsets[index]; + stride = parent->strides[index]; + + image = intel_allocate_image(dri_format, loaderPrivate); + if (image == NULL) + return NULL; + + if (offset + height * stride > parent->region->bo->size) { + _mesa_warning(NULL, "intel_create_sub_image: subimage out of bounds"); + free(image); + return NULL; + } + + image->region = calloc(sizeof(*image->region), 1); + if (image->region == NULL) { + free(image); + return NULL; + } + + image->region->cpp = _mesa_get_format_bytes(image->format); + image->region->width = width; + image->region->height = height; + image->region->pitch = stride; + image->region->refcount = 1; + image->region->bo = parent->region->bo; + drm_intel_bo_reference(image->region->bo); + image->region->tiling = parent->region->tiling; + image->offset = offset; + intel_setup_image_from_dimensions(image); + + intel_region_get_tile_masks(image->region, &mask_x, &mask_y, false); + if (offset & mask_x) + _mesa_warning(NULL, + "intel_create_sub_image: offset not on tile boundary"); + + return image; +} + +static struct __DRIimageExtensionRec intelImageExtension = { + .base = { __DRI_IMAGE, 7 }, + + .createImageFromName = intel_create_image_from_name, + .createImageFromRenderbuffer = intel_create_image_from_renderbuffer, + .destroyImage = intel_destroy_image, + .createImage = intel_create_image, + .queryImage = intel_query_image, + .dupImage = intel_dup_image, + .validateUsage = intel_validate_usage, + .createImageFromNames = intel_create_image_from_names, + .fromPlanar = intel_from_planar, + .createImageFromTexture = intel_create_image_from_texture, + .createImageFromFds = intel_create_image_from_fds +}; + +static const __DRIextension *intelScreenExtensions[] = { + &intelTexBufferExtension.base, + &intelFlushExtension.base, + &intelImageExtension.base, + &dri2ConfigQueryExtension.base, + NULL +}; + +static bool +intel_get_param(__DRIscreen *psp, int param, int *value) +{ + int ret; + struct drm_i915_getparam gp; + + memset(&gp, 0, sizeof(gp)); + gp.param = param; + gp.value = value; + + ret = drmCommandWriteRead(psp->fd, DRM_I915_GETPARAM, &gp, sizeof(gp)); + if (ret) { + if (ret != -EINVAL) + _mesa_warning(NULL, "drm_i915_getparam: %d", ret); + return false; + } + + return true; +} + +static bool +intel_get_boolean(__DRIscreen *psp, int param) +{ + int value = 0; + return intel_get_param(psp, param, &value) && value; +} + +static void +intelDestroyScreen(__DRIscreen * sPriv) +{ + struct intel_screen *intelScreen = sPriv->driverPrivate; + + dri_bufmgr_destroy(intelScreen->bufmgr); + driDestroyOptionInfo(&intelScreen->optionCache); + + free(intelScreen); + sPriv->driverPrivate = NULL; +} + + +/** + * This is called when we need to set up GL rendering to a new X window. + */ +static GLboolean +intelCreateBuffer(__DRIscreen * driScrnPriv, + __DRIdrawable * driDrawPriv, + const struct gl_config * mesaVis, GLboolean isPixmap) +{ + struct intel_renderbuffer *rb; + struct intel_screen *screen = (struct intel_screen*) driScrnPriv->driverPrivate; + gl_format rgbFormat; + unsigned num_samples = intel_quantize_num_samples(screen, mesaVis->samples); + struct gl_framebuffer *fb; + + if (isPixmap) + return false; + + fb = CALLOC_STRUCT(gl_framebuffer); + if (!fb) + return false; + + _mesa_initialize_window_framebuffer(fb, mesaVis); + + if (mesaVis->redBits == 5) + rgbFormat = MESA_FORMAT_RGB565; + else if (mesaVis->sRGBCapable) + rgbFormat = MESA_FORMAT_SARGB8; + else if (mesaVis->alphaBits == 0) + rgbFormat = MESA_FORMAT_XRGB8888; + else { + if (screen->gen >= 4) { + rgbFormat = MESA_FORMAT_SARGB8; + fb->Visual.sRGBCapable = true; + } else { + rgbFormat = MESA_FORMAT_ARGB8888; + } + + } + + /* setup the hardware-based renderbuffers */ + rb = intel_create_renderbuffer(rgbFormat, num_samples); + _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &rb->Base.Base); + + if (mesaVis->doubleBufferMode) { + rb = intel_create_renderbuffer(rgbFormat, num_samples); + _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &rb->Base.Base); + } + + /* + * Assert here that the gl_config has an expected depth/stencil bit + * combination: one of d24/s8, d16/s0, d0/s0. (See intelInitScreen2(), + * which constructs the advertised configs.) + */ + if (mesaVis->depthBits == 24) { + assert(mesaVis->stencilBits == 8); + + if (screen->hw_has_separate_stencil) { + rb = intel_create_private_renderbuffer(MESA_FORMAT_X8_Z24, + num_samples); + _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &rb->Base.Base); + rb = intel_create_private_renderbuffer(MESA_FORMAT_S8, + num_samples); + _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &rb->Base.Base); + } else { + /* + * Use combined depth/stencil. Note that the renderbuffer is + * attached to two attachment points. + */ + rb = intel_create_private_renderbuffer(MESA_FORMAT_S8_Z24, + num_samples); + _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &rb->Base.Base); + _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &rb->Base.Base); + } + } + else if (mesaVis->depthBits == 16) { + assert(mesaVis->stencilBits == 0); + rb = intel_create_private_renderbuffer(MESA_FORMAT_Z16, + num_samples); + _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &rb->Base.Base); + } + else { + assert(mesaVis->depthBits == 0); + assert(mesaVis->stencilBits == 0); + } + + /* now add any/all software-based renderbuffers we may need */ + _swrast_add_soft_renderbuffers(fb, + false, /* never sw color */ + false, /* never sw depth */ + false, /* never sw stencil */ + mesaVis->accumRedBits > 0, + false, /* never sw alpha */ + false /* never sw aux */ ); + driDrawPriv->driverPrivate = fb; + + return true; +} + +static void +intelDestroyBuffer(__DRIdrawable * driDrawPriv) +{ + struct gl_framebuffer *fb = driDrawPriv->driverPrivate; + + _mesa_reference_framebuffer(&fb, NULL); +} + +/* There are probably better ways to do this, such as an + * init-designated function to register chipids and createcontext + * functions. + */ +extern bool +i830CreateContext(int api, + const struct gl_config *mesaVis, + __DRIcontext *driContextPriv, + unsigned major_version, + unsigned minor_version, + unsigned *error, + void *sharedContextPrivate); + +extern bool +i915CreateContext(int api, + const struct gl_config *mesaVis, + __DRIcontext *driContextPriv, + unsigned major_version, + unsigned minor_version, + unsigned *error, + void *sharedContextPrivate); +extern bool +brwCreateContext(int api, + const struct gl_config *mesaVis, + __DRIcontext *driContextPriv, + unsigned major_version, + unsigned minor_version, + uint32_t flags, + unsigned *error, + void *sharedContextPrivate); + +static GLboolean +intelCreateContext(gl_api api, + const struct gl_config * mesaVis, + __DRIcontext * driContextPriv, + unsigned major_version, + unsigned minor_version, + uint32_t flags, + unsigned *error, + void *sharedContextPrivate) +{ + bool success = false; + +#ifdef I915 + __DRIscreen *sPriv = driContextPriv->driScreenPriv; + struct intel_screen *intelScreen = sPriv->driverPrivate; + + if (IS_9XX(intelScreen->deviceID)) { + success = i915CreateContext(api, mesaVis, driContextPriv, + major_version, minor_version, error, + sharedContextPrivate); + } else { + intelScreen->no_vbo = true; + success = i830CreateContext(api, mesaVis, driContextPriv, + major_version, minor_version, error, + sharedContextPrivate); + } +#else + success = brwCreateContext(api, mesaVis, + driContextPriv, + major_version, minor_version, flags, + error, sharedContextPrivate); +#endif + + if (success) + return true; + + if (driContextPriv->driverPrivate != NULL) + intelDestroyContext(driContextPriv); + + return false; +} + +static bool +intel_init_bufmgr(struct intel_screen *intelScreen) +{ + __DRIscreen *spriv = intelScreen->driScrnPriv; + + intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL; + + intelScreen->bufmgr = intel_bufmgr_gem_init(spriv->fd, BATCH_SZ); + if (intelScreen->bufmgr == NULL) { + fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n", + __func__, __LINE__); + return false; + } + + drm_intel_bufmgr_gem_enable_fenced_relocs(intelScreen->bufmgr); + + if (!intel_get_boolean(spriv, I915_PARAM_HAS_RELAXED_DELTA)) { + fprintf(stderr, "[%s: %u] Kernel 2.6.39 required.\n", __func__, __LINE__); + return false; + } + + return true; +} + +/** + * Override intel_screen.hw_has_separate_stencil with environment variable + * INTEL_SEPARATE_STENCIL. + * + * Valid values for INTEL_SEPARATE_STENCIL are "0" and "1". If an invalid + * valid value is encountered, a warning is emitted and INTEL_SEPARATE_STENCIL + * is ignored. + */ +static void +intel_override_separate_stencil(struct intel_screen *screen) +{ + const char *s = getenv("INTEL_SEPARATE_STENCIL"); + if (!s) { + return; + } else if (!strncmp("0", s, 2)) { + screen->hw_has_separate_stencil = false; + } else if (!strncmp("1", s, 2)) { + screen->hw_has_separate_stencil = true; + } else { + fprintf(stderr, + "warning: env variable INTEL_SEPARATE_STENCIL=\"%s\" has " + "invalid value and is ignored", s); + } +} + +static bool +intel_detect_swizzling(struct intel_screen *screen) +{ + drm_intel_bo *buffer; + unsigned long flags = 0; + unsigned long aligned_pitch; + uint32_t tiling = I915_TILING_X; + uint32_t swizzle_mode = 0; + + buffer = drm_intel_bo_alloc_tiled(screen->bufmgr, "swizzle test", + 64, 64, 4, + &tiling, &aligned_pitch, flags); + if (buffer == NULL) + return false; + + drm_intel_bo_get_tiling(buffer, &tiling, &swizzle_mode); + drm_intel_bo_unreference(buffer); + + if (swizzle_mode == I915_BIT_6_SWIZZLE_NONE) + return false; + else + return true; +} + +static __DRIconfig** +intel_screen_make_configs(__DRIscreen *dri_screen) +{ + static const gl_format formats[] = { + MESA_FORMAT_RGB565, + MESA_FORMAT_ARGB8888 + }; + + /* GLX_SWAP_COPY_OML is not supported due to page flipping. */ + static const GLenum back_buffer_modes[] = { + GLX_SWAP_UNDEFINED_OML, GLX_NONE, + }; + + static const uint8_t singlesample_samples[1] = {0}; + static const uint8_t multisample_samples[2] = {4, 8}; + + struct intel_screen *screen = dri_screen->driverPrivate; + uint8_t depth_bits[4], stencil_bits[4]; + __DRIconfig **configs = NULL; + + /* Generate singlesample configs without accumulation buffer. */ + for (int i = 0; i < ARRAY_SIZE(formats); i++) { + __DRIconfig **new_configs; + int num_depth_stencil_bits = 2; + + /* Starting with DRI2 protocol version 1.1 we can request a depth/stencil + * buffer that has a different number of bits per pixel than the color + * buffer, gen >= 6 supports this. + */ + depth_bits[0] = 0; + stencil_bits[0] = 0; + + if (formats[i] == MESA_FORMAT_RGB565) { + depth_bits[1] = 16; + stencil_bits[1] = 0; + if (screen->gen >= 6) { + depth_bits[2] = 24; + stencil_bits[2] = 8; + num_depth_stencil_bits = 3; + } + } else { + depth_bits[1] = 24; + stencil_bits[1] = 8; + } + + new_configs = driCreateConfigs(formats[i], + depth_bits, + stencil_bits, + num_depth_stencil_bits, + back_buffer_modes, 2, + singlesample_samples, 1, + false); + configs = driConcatConfigs(configs, new_configs); + } + + /* Generate the minimum possible set of configs that include an + * accumulation buffer. + */ + for (int i = 0; i < ARRAY_SIZE(formats); i++) { + __DRIconfig **new_configs; + + if (formats[i] == MESA_FORMAT_RGB565) { + depth_bits[0] = 16; + stencil_bits[0] = 0; + } else { + depth_bits[0] = 24; + stencil_bits[0] = 8; + } + + new_configs = driCreateConfigs(formats[i], + depth_bits, stencil_bits, 1, + back_buffer_modes, 1, + singlesample_samples, 1, + true); + configs = driConcatConfigs(configs, new_configs); + } + + /* Generate multisample configs. + * + * This loop breaks early, and hence is a no-op, on gen < 6. + * + * Multisample configs must follow the singlesample configs in order to + * work around an X server bug present in 1.12. The X server chooses to + * associate the first listed RGBA888-Z24S8 config, regardless of its + * sample count, with the 32-bit depth visual used for compositing. + * + * Only doublebuffer configs with GLX_SWAP_UNDEFINED_OML behavior are + * supported. Singlebuffer configs are not supported because no one wants + * them. + */ + for (int i = 0; i < ARRAY_SIZE(formats); i++) { + if (screen->gen < 6) + break; + + __DRIconfig **new_configs; + const int num_depth_stencil_bits = 2; + int num_msaa_modes = 0; + + depth_bits[0] = 0; + stencil_bits[0] = 0; + + if (formats[i] == MESA_FORMAT_RGB565) { + depth_bits[1] = 16; + stencil_bits[1] = 0; + } else { + depth_bits[1] = 24; + stencil_bits[1] = 8; + } + + if (screen->gen >= 7) + num_msaa_modes = 2; + else if (screen->gen == 6) + num_msaa_modes = 1; + + new_configs = driCreateConfigs(formats[i], + depth_bits, + stencil_bits, + num_depth_stencil_bits, + back_buffer_modes, 1, + multisample_samples, + num_msaa_modes, + false); + configs = driConcatConfigs(configs, new_configs); + } + + if (configs == NULL) { + fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__, + __LINE__); + return NULL; + } + + return configs; +} + +static void +set_max_gl_versions(struct intel_screen *screen) +{ + int gl_version_override = _mesa_get_gl_version_override(); + + switch (screen->gen) { + case 7: + screen->max_gl_core_version = 31; + screen->max_gl_compat_version = 30; + screen->max_gl_es1_version = 11; + screen->max_gl_es2_version = 30; + break; + case 6: + screen->max_gl_core_version = 31; + screen->max_gl_compat_version = 30; + screen->max_gl_es1_version = 11; + screen->max_gl_es2_version = 30; + break; + case 5: + case 4: + screen->max_gl_core_version = 0; + screen->max_gl_compat_version = 21; + screen->max_gl_es1_version = 11; + screen->max_gl_es2_version = 20; + break; + case 3: { + screen->max_gl_core_version = 0; + screen->max_gl_es1_version = 11; + screen->max_gl_compat_version = 21; + screen->max_gl_es2_version = 20; + + break; + } + case 2: + screen->max_gl_core_version = 0; + screen->max_gl_compat_version = 13; + screen->max_gl_es1_version = 11; + screen->max_gl_es2_version = 0; + break; + default: + assert(!"unrecognized intel_screen::gen"); + break; + } + + if (gl_version_override >= 31) { + screen->max_gl_core_version = MAX2(screen->max_gl_core_version, + gl_version_override); + } else { + screen->max_gl_compat_version = MAX2(screen->max_gl_compat_version, + gl_version_override); + } + +#ifndef FEATURE_ES1 + screen->max_gl_es1_version = 0; +#endif + +#ifndef FEATURE_ES2 + screen->max_gl_es2_version = 0; +#endif +} + +/** + * This is the driver specific part of the createNewScreen entry point. + * Called when using DRI2. + * + * \return the struct gl_config supported by this driver + */ +static const +__DRIconfig **intelInitScreen2(__DRIscreen *psp) +{ + struct intel_screen *intelScreen; + + if (psp->dri2.loader->base.version <= 2 || + psp->dri2.loader->getBuffersWithFormat == NULL) { + fprintf(stderr, + "\nERROR! DRI2 loader with getBuffersWithFormat() " + "support required\n"); + return false; + } + + /* Allocate the private area */ + intelScreen = calloc(1, sizeof *intelScreen); + if (!intelScreen) { + fprintf(stderr, "\nERROR! Allocating private area failed\n"); + return false; + } + /* parse information in __driConfigOptions */ + driParseOptionInfo(&intelScreen->optionCache, + __driConfigOptions, __driNConfigOptions); + + intelScreen->driScrnPriv = psp; + psp->driverPrivate = (void *) intelScreen; + + if (!intel_init_bufmgr(intelScreen)) + return false; + + intelScreen->deviceID = drm_intel_bufmgr_gem_get_devid(intelScreen->bufmgr); + + if (IS_GEN7(intelScreen->deviceID)) { + intelScreen->gen = 7; + } else if (IS_GEN6(intelScreen->deviceID)) { + intelScreen->gen = 6; + } else if (IS_GEN5(intelScreen->deviceID)) { + intelScreen->gen = 5; + } else if (IS_965(intelScreen->deviceID)) { + intelScreen->gen = 4; + } else if (IS_9XX(intelScreen->deviceID)) { + intelScreen->gen = 3; + } else { + intelScreen->gen = 2; + } + + intelScreen->hw_has_separate_stencil = intelScreen->gen >= 6; + intelScreen->hw_must_use_separate_stencil = intelScreen->gen >= 7; + + int has_llc = 0; + bool success = intel_get_param(intelScreen->driScrnPriv, I915_PARAM_HAS_LLC, + &has_llc); + if (success && has_llc) + intelScreen->hw_has_llc = true; + else if (!success && intelScreen->gen >= 6) + intelScreen->hw_has_llc = true; + + intel_override_separate_stencil(intelScreen); + + intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen); + + set_max_gl_versions(intelScreen); + + psp->api_mask = (1 << __DRI_API_OPENGL); + if (intelScreen->max_gl_core_version > 0) + psp->api_mask |= (1 << __DRI_API_OPENGL_CORE); + if (intelScreen->max_gl_es1_version > 0) + psp->api_mask |= (1 << __DRI_API_GLES); + if (intelScreen->max_gl_es2_version > 0) + psp->api_mask |= (1 << __DRI_API_GLES2); + if (intelScreen->max_gl_es2_version >= 30) + psp->api_mask |= (1 << __DRI_API_GLES3); + + psp->extensions = intelScreenExtensions; + + return (const __DRIconfig**) intel_screen_make_configs(psp); +} + +struct intel_buffer { + __DRIbuffer base; + struct intel_region *region; +}; + +static __DRIbuffer * +intelAllocateBuffer(__DRIscreen *screen, + unsigned attachment, unsigned format, + int width, int height) +{ + struct intel_buffer *intelBuffer; + struct intel_screen *intelScreen = screen->driverPrivate; + + assert(attachment == __DRI_BUFFER_FRONT_LEFT || + attachment == __DRI_BUFFER_BACK_LEFT); + + intelBuffer = calloc(1, sizeof *intelBuffer); + if (intelBuffer == NULL) + return NULL; + + /* The front and back buffers are color buffers, which are X tiled. */ + intelBuffer->region = intel_region_alloc(intelScreen, + I915_TILING_X, + format / 8, + width, + height, + true); + + if (intelBuffer->region == NULL) { + free(intelBuffer); + return NULL; + } + + intel_region_flink(intelBuffer->region, &intelBuffer->base.name); + + intelBuffer->base.attachment = attachment; + intelBuffer->base.cpp = intelBuffer->region->cpp; + intelBuffer->base.pitch = intelBuffer->region->pitch; + + return &intelBuffer->base; +} + +static void +intelReleaseBuffer(__DRIscreen *screen, __DRIbuffer *buffer) +{ + struct intel_buffer *intelBuffer = (struct intel_buffer *) buffer; + + intel_region_release(&intelBuffer->region); + free(intelBuffer); +} + + +const struct __DriverAPIRec driDriverAPI = { + .InitScreen = intelInitScreen2, + .DestroyScreen = intelDestroyScreen, + .CreateContext = intelCreateContext, + .DestroyContext = intelDestroyContext, + .CreateBuffer = intelCreateBuffer, + .DestroyBuffer = intelDestroyBuffer, + .MakeCurrent = intelMakeCurrent, + .UnbindContext = intelUnbindContext, + .AllocateBuffer = intelAllocateBuffer, + .ReleaseBuffer = intelReleaseBuffer +}; + +/* This is the table of extensions that the loader will dlsym() for. */ +PUBLIC const __DRIextension *__driDriverExtensions[] = { + &driCoreExtension.base, + &driDRI2Extension.base, + NULL +}; diff --git a/src/mesa/drivers/dri/i915/intel_screen.h b/src/mesa/drivers/dri/i915/intel_screen.h new file mode 100644 index 0000000..188e2c0 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_screen.h @@ -0,0 +1,89 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _INTEL_INIT_H_ +#define _INTEL_INIT_H_ + +#include +#include +#include "dri_util.h" +#include "intel_bufmgr.h" +#include "i915_drm.h" +#include "xmlconfig.h" + +struct intel_screen +{ + int deviceID; + int gen; + + int max_gl_core_version; + int max_gl_compat_version; + int max_gl_es1_version; + int max_gl_es2_version; + + __DRIscreen *driScrnPriv; + + bool no_hw; + + /* + * The hardware hiz and separate stencil fields are needed in intel_screen, + * rather than solely in intel_context, because glXCreatePbuffer and + * glXCreatePixmap are not passed a GLXContext. + */ + bool hw_has_separate_stencil; + bool hw_must_use_separate_stencil; + + bool hw_has_llc; + bool hw_has_swizzling; + + bool no_vbo; + dri_bufmgr *bufmgr; + + /** + * A unique ID for shader programs. + */ + unsigned program_id; + + /** + * Configuration cache with default values for all contexts + */ + driOptionCache optionCache; +}; + +extern void intelDestroyContext(__DRIcontext * driContextPriv); + +extern GLboolean intelUnbindContext(__DRIcontext * driContextPriv); + +extern GLboolean +intelMakeCurrent(__DRIcontext * driContextPriv, + __DRIdrawable * driDrawPriv, + __DRIdrawable * driReadPriv); + +double get_time(void); +void aub_dump_bmp(struct gl_context *ctx); + +#endif diff --git a/src/mesa/drivers/dri/i915/intel_state.c b/src/mesa/drivers/dri/i915/intel_state.c deleted file mode 120000 index 519672f..0000000 --- a/src/mesa/drivers/dri/i915/intel_state.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_state.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_state.c b/src/mesa/drivers/dri/i915/intel_state.c new file mode 100644 index 0000000..6a817cd --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_state.c @@ -0,0 +1,195 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/glheader.h" +#include "main/context.h" +#include "main/macros.h" +#include "main/enums.h" +#include "main/colormac.h" +#include "main/dd.h" + +#include "intel_screen.h" +#include "intel_context.h" + +int +intel_translate_shadow_compare_func(GLenum func) +{ + switch (func) { + case GL_NEVER: + return COMPAREFUNC_ALWAYS; + case GL_LESS: + return COMPAREFUNC_LEQUAL; + case GL_LEQUAL: + return COMPAREFUNC_LESS; + case GL_GREATER: + return COMPAREFUNC_GEQUAL; + case GL_GEQUAL: + return COMPAREFUNC_GREATER; + case GL_NOTEQUAL: + return COMPAREFUNC_EQUAL; + case GL_EQUAL: + return COMPAREFUNC_NOTEQUAL; + case GL_ALWAYS: + return COMPAREFUNC_NEVER; + } + + fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func); + return COMPAREFUNC_NEVER; +} + +int +intel_translate_compare_func(GLenum func) +{ + switch (func) { + case GL_NEVER: + return COMPAREFUNC_NEVER; + case GL_LESS: + return COMPAREFUNC_LESS; + case GL_LEQUAL: + return COMPAREFUNC_LEQUAL; + case GL_GREATER: + return COMPAREFUNC_GREATER; + case GL_GEQUAL: + return COMPAREFUNC_GEQUAL; + case GL_NOTEQUAL: + return COMPAREFUNC_NOTEQUAL; + case GL_EQUAL: + return COMPAREFUNC_EQUAL; + case GL_ALWAYS: + return COMPAREFUNC_ALWAYS; + } + + fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func); + return COMPAREFUNC_ALWAYS; +} + +int +intel_translate_stencil_op(GLenum op) +{ + switch (op) { + case GL_KEEP: + return STENCILOP_KEEP; + case GL_ZERO: + return STENCILOP_ZERO; + case GL_REPLACE: + return STENCILOP_REPLACE; + case GL_INCR: + return STENCILOP_INCRSAT; + case GL_DECR: + return STENCILOP_DECRSAT; + case GL_INCR_WRAP: + return STENCILOP_INCR; + case GL_DECR_WRAP: + return STENCILOP_DECR; + case GL_INVERT: + return STENCILOP_INVERT; + default: + return STENCILOP_ZERO; + } +} + +int +intel_translate_blend_factor(GLenum factor) +{ + switch (factor) { + case GL_ZERO: + return BLENDFACT_ZERO; + case GL_SRC_ALPHA: + return BLENDFACT_SRC_ALPHA; + case GL_ONE: + return BLENDFACT_ONE; + case GL_SRC_COLOR: + return BLENDFACT_SRC_COLR; + case GL_ONE_MINUS_SRC_COLOR: + return BLENDFACT_INV_SRC_COLR; + case GL_DST_COLOR: + return BLENDFACT_DST_COLR; + case GL_ONE_MINUS_DST_COLOR: + return BLENDFACT_INV_DST_COLR; + case GL_ONE_MINUS_SRC_ALPHA: + return BLENDFACT_INV_SRC_ALPHA; + case GL_DST_ALPHA: + return BLENDFACT_DST_ALPHA; + case GL_ONE_MINUS_DST_ALPHA: + return BLENDFACT_INV_DST_ALPHA; + case GL_SRC_ALPHA_SATURATE: + return BLENDFACT_SRC_ALPHA_SATURATE; + case GL_CONSTANT_COLOR: + return BLENDFACT_CONST_COLOR; + case GL_ONE_MINUS_CONSTANT_COLOR: + return BLENDFACT_INV_CONST_COLOR; + case GL_CONSTANT_ALPHA: + return BLENDFACT_CONST_ALPHA; + case GL_ONE_MINUS_CONSTANT_ALPHA: + return BLENDFACT_INV_CONST_ALPHA; + } + + fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, factor); + return BLENDFACT_ZERO; +} + +int +intel_translate_logic_op(GLenum opcode) +{ + switch (opcode) { + case GL_CLEAR: + return LOGICOP_CLEAR; + case GL_AND: + return LOGICOP_AND; + case GL_AND_REVERSE: + return LOGICOP_AND_RVRSE; + case GL_COPY: + return LOGICOP_COPY; + case GL_COPY_INVERTED: + return LOGICOP_COPY_INV; + case GL_AND_INVERTED: + return LOGICOP_AND_INV; + case GL_NOOP: + return LOGICOP_NOOP; + case GL_XOR: + return LOGICOP_XOR; + case GL_OR: + return LOGICOP_OR; + case GL_OR_INVERTED: + return LOGICOP_OR_INV; + case GL_NOR: + return LOGICOP_NOR; + case GL_EQUIV: + return LOGICOP_EQUIV; + case GL_INVERT: + return LOGICOP_INV; + case GL_OR_REVERSE: + return LOGICOP_OR_RVRSE; + case GL_NAND: + return LOGICOP_NAND; + case GL_SET: + return LOGICOP_SET; + default: + return LOGICOP_SET; + } +} diff --git a/src/mesa/drivers/dri/i915/intel_syncobj.c b/src/mesa/drivers/dri/i915/intel_syncobj.c deleted file mode 120000 index 0b2e56a..0000000 --- a/src/mesa/drivers/dri/i915/intel_syncobj.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_syncobj.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_syncobj.c b/src/mesa/drivers/dri/i915/intel_syncobj.c new file mode 100644 index 0000000..9657d9a --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_syncobj.c @@ -0,0 +1,124 @@ +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +/** @file intel_syncobj.c + * + * Support for ARB_sync + * + * ARB_sync is implemented by flushing the current batchbuffer and keeping a + * reference on it. We can then check for completion or wait for completion + * using the normal buffer object mechanisms. This does mean that if an + * application is using many sync objects, it will emit small batchbuffers + * which may end up being a significant overhead. In other tests of removing + * gratuitous batchbuffer syncs in Mesa, it hasn't appeared to be a significant + * performance bottleneck, though. + */ + +#include "main/simple_list.h" +#include "main/imports.h" + +#include "intel_context.h" +#include "intel_batchbuffer.h" +#include "intel_reg.h" + +static struct gl_sync_object * +intel_new_sync_object(struct gl_context *ctx, GLuint id) +{ + struct intel_sync_object *sync; + + sync = calloc(1, sizeof(struct intel_sync_object)); + + return &sync->Base; +} + +static void +intel_delete_sync_object(struct gl_context *ctx, struct gl_sync_object *s) +{ + struct intel_sync_object *sync = (struct intel_sync_object *)s; + + drm_intel_bo_unreference(sync->bo); + free(sync); +} + +static void +intel_fence_sync(struct gl_context *ctx, struct gl_sync_object *s, + GLenum condition, GLbitfield flags) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_sync_object *sync = (struct intel_sync_object *)s; + + assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE); + intel_batchbuffer_emit_mi_flush(intel); + + sync->bo = intel->batch.bo; + drm_intel_bo_reference(sync->bo); + + intel_flush(ctx); +} + +static void intel_client_wait_sync(struct gl_context *ctx, struct gl_sync_object *s, + GLbitfield flags, GLuint64 timeout) +{ + struct intel_sync_object *sync = (struct intel_sync_object *)s; + + if (sync->bo && drm_intel_gem_bo_wait(sync->bo, timeout) == 0) { + s->StatusFlag = 1; + drm_intel_bo_unreference(sync->bo); + sync->bo = NULL; + } +} + +/* We have nothing to do for WaitSync. Our GL command stream is sequential, + * so given that the sync object has already flushed the batchbuffer, + * any batchbuffers coming after this waitsync will naturally not occur until + * the previous one is done. + */ +static void intel_server_wait_sync(struct gl_context *ctx, struct gl_sync_object *s, + GLbitfield flags, GLuint64 timeout) +{ +} + +static void intel_check_sync(struct gl_context *ctx, struct gl_sync_object *s) +{ + struct intel_sync_object *sync = (struct intel_sync_object *)s; + + if (sync->bo && !drm_intel_bo_busy(sync->bo)) { + drm_intel_bo_unreference(sync->bo); + sync->bo = NULL; + s->StatusFlag = 1; + } +} + +void intel_init_syncobj_functions(struct dd_function_table *functions) +{ + functions->NewSyncObject = intel_new_sync_object; + functions->DeleteSyncObject = intel_delete_sync_object; + functions->FenceSync = intel_fence_sync; + functions->CheckSync = intel_check_sync; + functions->ClientWaitSync = intel_client_wait_sync; + functions->ServerWaitSync = intel_server_wait_sync; +} diff --git a/src/mesa/drivers/dri/i915/intel_tex.c b/src/mesa/drivers/dri/i915/intel_tex.c deleted file mode 120000 index d77ce74..0000000 --- a/src/mesa/drivers/dri/i915/intel_tex.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_tex.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_tex.c b/src/mesa/drivers/dri/i915/intel_tex.c new file mode 100644 index 0000000..24f13df --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_tex.c @@ -0,0 +1,189 @@ +#include "swrast/swrast.h" +#include "main/renderbuffer.h" +#include "main/texobj.h" +#include "main/teximage.h" +#include "main/mipmap.h" +#include "drivers/common/meta.h" +#include "intel_context.h" +#include "intel_mipmap_tree.h" +#include "intel_tex.h" +#include "intel_fbo.h" + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE + +static struct gl_texture_image * +intelNewTextureImage(struct gl_context * ctx) +{ + DBG("%s\n", __FUNCTION__); + (void) ctx; + return (struct gl_texture_image *) CALLOC_STRUCT(intel_texture_image); +} + +static void +intelDeleteTextureImage(struct gl_context * ctx, struct gl_texture_image *img) +{ + /* nothing special (yet) for intel_texture_image */ + _mesa_delete_texture_image(ctx, img); +} + + +static struct gl_texture_object * +intelNewTextureObject(struct gl_context * ctx, GLuint name, GLenum target) +{ + struct intel_texture_object *obj = CALLOC_STRUCT(intel_texture_object); + + (void) ctx; + + DBG("%s\n", __FUNCTION__); + + if (obj == NULL) + return NULL; + + _mesa_initialize_texture_object(&obj->base, name, target); + + obj->needs_validate = true; + + return &obj->base; +} + +static void +intelDeleteTextureObject(struct gl_context *ctx, + struct gl_texture_object *texObj) +{ + struct intel_texture_object *intelObj = intel_texture_object(texObj); + + intel_miptree_release(&intelObj->mt); + _mesa_delete_texture_object(ctx, texObj); +} + +static GLboolean +intel_alloc_texture_image_buffer(struct gl_context *ctx, + struct gl_texture_image *image) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_image *intel_image = intel_texture_image(image); + struct gl_texture_object *texobj = image->TexObject; + struct intel_texture_object *intel_texobj = intel_texture_object(texobj); + + assert(image->Border == 0); + + /* Quantize sample count */ + if (image->NumSamples) { + image->NumSamples = intel_quantize_num_samples(intel->intelScreen, image->NumSamples); + if (!image->NumSamples) + return false; + } + + /* Because the driver uses AllocTextureImageBuffer() internally, it may end + * up mismatched with FreeTextureImageBuffer(), but that is safe to call + * multiple times. + */ + ctx->Driver.FreeTextureImageBuffer(ctx, image); + + if (!_swrast_init_texture_image(image)) + return false; + + if (intel_texobj->mt && + intel_miptree_match_image(intel_texobj->mt, image)) { + intel_miptree_reference(&intel_image->mt, intel_texobj->mt); + DBG("%s: alloc obj %p level %d %dx%dx%d using object's miptree %p\n", + __FUNCTION__, texobj, image->Level, + image->Width, image->Height, image->Depth, intel_texobj->mt); + } else { + intel_image->mt = intel_miptree_create_for_teximage(intel, intel_texobj, + intel_image, + false); + + /* Even if the object currently has a mipmap tree associated + * with it, this one is a more likely candidate to represent the + * whole object since our level didn't fit what was there + * before, and any lower levels would fit into our miptree. + */ + intel_miptree_reference(&intel_texobj->mt, intel_image->mt); + + DBG("%s: alloc obj %p level %d %dx%dx%d using new miptree %p\n", + __FUNCTION__, texobj, image->Level, + image->Width, image->Height, image->Depth, intel_image->mt); + } + + intel_texobj->needs_validate = true; + + return true; +} + +static void +intel_free_texture_image_buffer(struct gl_context * ctx, + struct gl_texture_image *texImage) +{ + struct intel_texture_image *intelImage = intel_texture_image(texImage); + + DBG("%s\n", __FUNCTION__); + + intel_miptree_release(&intelImage->mt); + + _swrast_free_texture_image_buffer(ctx, texImage); +} + +/** + * Map texture memory/buffer into user space. + * Note: the region of interest parameters are ignored here. + * \param mode bitmask of GL_MAP_READ_BIT, GL_MAP_WRITE_BIT + * \param mapOut returns start of mapping of region of interest + * \param rowStrideOut returns row stride in bytes + */ +static void +intel_map_texture_image(struct gl_context *ctx, + struct gl_texture_image *tex_image, + GLuint slice, + GLuint x, GLuint y, GLuint w, GLuint h, + GLbitfield mode, + GLubyte **map, + GLint *stride) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_image *intel_image = intel_texture_image(tex_image); + struct intel_mipmap_tree *mt = intel_image->mt; + + /* Our texture data is always stored in a miptree. */ + assert(mt); + + /* Check that our caller wasn't confused about how to map a 1D texture. */ + assert(tex_image->TexObject->Target != GL_TEXTURE_1D_ARRAY || + h == 1); + + /* intel_miptree_map operates on a unified "slice" number that references the + * cube face, since it's all just slices to the miptree code. + */ + if (tex_image->TexObject->Target == GL_TEXTURE_CUBE_MAP) + slice = tex_image->Face; + + intel_miptree_map(intel, mt, tex_image->Level, slice, x, y, w, h, mode, + (void **)map, stride); +} + +static void +intel_unmap_texture_image(struct gl_context *ctx, + struct gl_texture_image *tex_image, GLuint slice) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_image *intel_image = intel_texture_image(tex_image); + struct intel_mipmap_tree *mt = intel_image->mt; + + if (tex_image->TexObject->Target == GL_TEXTURE_CUBE_MAP) + slice = tex_image->Face; + + intel_miptree_unmap(intel, mt, tex_image->Level, slice); +} + +void +intelInitTextureFuncs(struct dd_function_table *functions) +{ + functions->NewTextureObject = intelNewTextureObject; + functions->NewTextureImage = intelNewTextureImage; + functions->DeleteTextureImage = intelDeleteTextureImage; + functions->DeleteTexture = intelDeleteTextureObject; + functions->AllocTextureImageBuffer = intel_alloc_texture_image_buffer; + functions->FreeTextureImageBuffer = intel_free_texture_image_buffer; + functions->MapTextureImage = intel_map_texture_image; + functions->UnmapTextureImage = intel_unmap_texture_image; +} diff --git a/src/mesa/drivers/dri/i915/intel_tex.h b/src/mesa/drivers/dri/i915/intel_tex.h new file mode 100644 index 0000000..a08fdf4 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_tex.h @@ -0,0 +1,82 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTELTEX_INC +#define INTELTEX_INC + +#include "main/mtypes.h" +#include "main/formats.h" +#include "intel_context.h" + +struct intel_renderbuffer; + +void intelInitTextureFuncs(struct dd_function_table *functions); + +void intelInitTextureImageFuncs(struct dd_function_table *functions); + +void intelInitTextureSubImageFuncs(struct dd_function_table *functions); + +void intelInitTextureCopyImageFuncs(struct dd_function_table *functions); + +void intelSetTexBuffer(__DRIcontext *pDRICtx, + GLint target, __DRIdrawable *pDraw); +void intelSetTexBuffer2(__DRIcontext *pDRICtx, + GLint target, GLint format, __DRIdrawable *pDraw); + +struct intel_mipmap_tree * +intel_miptree_create_for_teximage(struct intel_context *intel, + struct intel_texture_object *intelObj, + struct intel_texture_image *intelImage, + bool expect_accelerated_upload); + +GLuint intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit); + +void intel_tex_map_level_images(struct intel_context *intel, + struct intel_texture_object *intelObj, + int level, + GLbitfield mode); + +void intel_tex_unmap_level_images(struct intel_context *intel, + struct intel_texture_object *intelObj, + int level); + +bool +intel_tex_image_s8z24_create_renderbuffers(struct intel_context *intel, + struct intel_texture_image *image); + +bool +intel_texsubimage_tiled_memcpy(struct gl_context *ctx, + GLuint dims, + struct gl_texture_image *texImage, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, + const GLvoid *pixels, + const struct gl_pixelstore_attrib *packing, + bool for_glTexImage); + +#endif diff --git a/src/mesa/drivers/dri/i915/intel_tex_copy.c b/src/mesa/drivers/dri/i915/intel_tex_copy.c deleted file mode 120000 index 87196c5..0000000 --- a/src/mesa/drivers/dri/i915/intel_tex_copy.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_tex_copy.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_tex_copy.c b/src/mesa/drivers/dri/i915/intel_tex_copy.c new file mode 100644 index 0000000..d018cec --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_tex_copy.c @@ -0,0 +1,132 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/mtypes.h" +#include "main/enums.h" +#include "main/image.h" +#include "main/teximage.h" +#include "main/texstate.h" +#include "main/fbobject.h" + +#include "drivers/common/meta.h" + +#include "intel_screen.h" +#include "intel_context.h" +#include "intel_mipmap_tree.h" +#include "intel_regions.h" +#include "intel_fbo.h" +#include "intel_tex.h" +#include "intel_blit.h" +#ifndef I915 +#include "brw_context.h" +#endif + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE + + +static bool +intel_copy_texsubimage(struct intel_context *intel, + struct intel_texture_image *intelImage, + GLint dstx, GLint dsty, GLint slice, + struct intel_renderbuffer *irb, + GLint x, GLint y, GLsizei width, GLsizei height) +{ + const GLenum internalFormat = intelImage->base.Base.InternalFormat; + + intel_prepare_render(intel); + + /* glCopyTexSubImage() can be called on a multisampled renderbuffer (if + * that renderbuffer is associated with the window system framebuffer), + * however the hardware blitter can't handle this case, so fall back to + * meta (which can, since it uses ReadPixels). + */ + if (irb->Base.Base.NumSamples != 0) + return false; + + /* glCopyTexSubImage() can't be called on a multisampled texture. */ + assert(intelImage->base.Base.NumSamples == 0); + + if (!intelImage->mt || !irb || !irb->mt) { + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) + fprintf(stderr, "%s fail %p %p (0x%08x)\n", + __FUNCTION__, intelImage->mt, irb, internalFormat); + return false; + } + + /* blit from src buffer to texture */ + if (!intel_miptree_blit(intel, + irb->mt, irb->mt_level, irb->mt_layer, + x, y, irb->Base.Base.Name == 0, + intelImage->mt, intelImage->base.Base.Level, + intelImage->base.Base.Face + slice, + dstx, dsty, false, + width, height, GL_COPY)) { + return false; + } + + return true; +} + + +static void +intelCopyTexSubImage(struct gl_context *ctx, GLuint dims, + struct gl_texture_image *texImage, + GLint xoffset, GLint yoffset, GLint slice, + struct gl_renderbuffer *rb, + GLint x, GLint y, + GLsizei width, GLsizei height) +{ + struct intel_context *intel = intel_context(ctx); + +#ifndef I915 + /* Try BLORP first. It can handle almost everything. */ + if (brw_blorp_copytexsubimage(intel, rb, texImage, slice, x, y, + xoffset, yoffset, width, height)) + return; +#endif + + /* Next, try the BLT engine. */ + if (intel_copy_texsubimage(intel, + intel_texture_image(texImage), + xoffset, yoffset, slice, + intel_renderbuffer(rb), x, y, width, height)) { + return; + } + + /* Finally, fall back to meta. This will likely be slow. */ + perf_debug("%s - fallback to swrast\n", __FUNCTION__); + _mesa_meta_CopyTexSubImage(ctx, dims, texImage, + xoffset, yoffset, slice, + rb, x, y, width, height); +} + + +void +intelInitTextureCopyImageFuncs(struct dd_function_table *functions) +{ + functions->CopyTexSubImage = intelCopyTexSubImage; +} diff --git a/src/mesa/drivers/dri/i915/intel_tex_image.c b/src/mesa/drivers/dri/i915/intel_tex_image.c deleted file mode 120000 index 567abe4..0000000 --- a/src/mesa/drivers/dri/i915/intel_tex_image.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_tex_image.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_tex_image.c b/src/mesa/drivers/dri/i915/intel_tex_image.c new file mode 100644 index 0000000..b91b2b5 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_tex_image.c @@ -0,0 +1,399 @@ + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/mtypes.h" +#include "main/enums.h" +#include "main/bufferobj.h" +#include "main/context.h" +#include "main/formats.h" +#include "main/image.h" +#include "main/pbo.h" +#include "main/renderbuffer.h" +#include "main/texcompress.h" +#include "main/texgetimage.h" +#include "main/texobj.h" +#include "main/teximage.h" +#include "main/texstore.h" + +#include "intel_context.h" +#include "intel_mipmap_tree.h" +#include "intel_buffer_objects.h" +#include "intel_batchbuffer.h" +#include "intel_tex.h" +#include "intel_blit.h" +#include "intel_fbo.h" + +#ifndef I915 +#include "brw_context.h" +#endif + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE + +/* Work back from the specified level of the image to the baselevel and create a + * miptree of that size. + */ +struct intel_mipmap_tree * +intel_miptree_create_for_teximage(struct intel_context *intel, + struct intel_texture_object *intelObj, + struct intel_texture_image *intelImage, + bool expect_accelerated_upload) +{ + GLuint firstLevel; + GLuint lastLevel; + int width, height, depth; + GLuint i; + + intel_miptree_get_dimensions_for_image(&intelImage->base.Base, + &width, &height, &depth); + + DBG("%s\n", __FUNCTION__); + + if (intelImage->base.Base.Level > intelObj->base.BaseLevel && + (width == 1 || + (intelObj->base.Target != GL_TEXTURE_1D && height == 1) || + (intelObj->base.Target == GL_TEXTURE_3D && depth == 1))) { + /* For this combination, we're at some lower mipmap level and + * some important dimension is 1. We can't extrapolate up to a + * likely base level width/height/depth for a full mipmap stack + * from this info, so just allocate this one level. + */ + firstLevel = intelImage->base.Base.Level; + lastLevel = intelImage->base.Base.Level; + } else { + /* If this image disrespects BaseLevel, allocate from level zero. + * Usually BaseLevel == 0, so it's unlikely to happen. + */ + if (intelImage->base.Base.Level < intelObj->base.BaseLevel) + firstLevel = 0; + else + firstLevel = intelObj->base.BaseLevel; + + /* Figure out image dimensions at start level. */ + for (i = intelImage->base.Base.Level; i > firstLevel; i--) { + width <<= 1; + if (height != 1) + height <<= 1; + if (depth != 1) + depth <<= 1; + } + + /* Guess a reasonable value for lastLevel. This is probably going + * to be wrong fairly often and might mean that we have to look at + * resizable buffers, or require that buffers implement lazy + * pagetable arrangements. + */ + if ((intelObj->base.Sampler.MinFilter == GL_NEAREST || + intelObj->base.Sampler.MinFilter == GL_LINEAR) && + intelImage->base.Base.Level == firstLevel && + (intel->gen < 4 || firstLevel == 0)) { + lastLevel = firstLevel; + } else { + lastLevel = (firstLevel + + _mesa_get_tex_max_num_levels(intelObj->base.Target, + width, height, depth) - 1); + } + } + + return intel_miptree_create(intel, + intelObj->base.Target, + intelImage->base.Base.TexFormat, + firstLevel, + lastLevel, + width, + height, + depth, + expect_accelerated_upload, + intelImage->base.Base.NumSamples, + INTEL_MIPTREE_TILING_ANY); +} + +/* XXX: Do this for TexSubImage also: + */ +static bool +try_pbo_upload(struct gl_context *ctx, + struct gl_texture_image *image, + const struct gl_pixelstore_attrib *unpack, + GLenum format, GLenum type, const void *pixels) +{ + struct intel_texture_image *intelImage = intel_texture_image(image); + struct intel_context *intel = intel_context(ctx); + struct intel_buffer_object *pbo = intel_buffer_object(unpack->BufferObj); + GLuint src_offset; + drm_intel_bo *src_buffer; + + if (!_mesa_is_bufferobj(unpack->BufferObj)) + return false; + + DBG("trying pbo upload\n"); + + if (intel->ctx._ImageTransferState || + unpack->SkipPixels || unpack->SkipRows) { + DBG("%s: image transfer\n", __FUNCTION__); + return false; + } + + ctx->Driver.AllocTextureImageBuffer(ctx, image); + + if (!intelImage->mt) { + DBG("%s: no miptree\n", __FUNCTION__); + return false; + } + + if (!_mesa_format_matches_format_and_type(intelImage->mt->format, + format, type, false)) { + DBG("%s: format mismatch (upload to %s with format 0x%x, type 0x%x)\n", + __FUNCTION__, _mesa_get_format_name(intelImage->mt->format), + format, type); + return false; + } + + if (image->TexObject->Target == GL_TEXTURE_1D_ARRAY || + image->TexObject->Target == GL_TEXTURE_2D_ARRAY) { + DBG("%s: no support for array textures\n", __FUNCTION__); + return false; + } + + src_buffer = intel_bufferobj_source(intel, pbo, 64, &src_offset); + /* note: potential 64-bit ptr to 32-bit int cast */ + src_offset += (GLuint) (unsigned long) pixels; + + int src_stride = + _mesa_image_row_stride(unpack, image->Width, format, type); + + struct intel_mipmap_tree *pbo_mt = + intel_miptree_create_for_bo(intel, + src_buffer, + intelImage->mt->format, + src_offset, + image->Width, image->Height, + src_stride, I915_TILING_NONE); + if (!pbo_mt) + return false; + + if (!intel_miptree_blit(intel, + pbo_mt, 0, 0, + 0, 0, false, + intelImage->mt, image->Level, image->Face, + 0, 0, false, + image->Width, image->Height, GL_COPY)) { + DBG("%s: blit failed\n", __FUNCTION__); + return false; + } + + intel_miptree_release(&pbo_mt); + + DBG("%s: success\n", __FUNCTION__); + return true; +} + +static void +intelTexImage(struct gl_context * ctx, + GLuint dims, + struct gl_texture_image *texImage, + GLenum format, GLenum type, const void *pixels, + const struct gl_pixelstore_attrib *unpack) +{ + bool ok; + + DBG("%s target %s level %d %dx%dx%d\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(texImage->TexObject->Target), + texImage->Level, texImage->Width, texImage->Height, texImage->Depth); + + ok = intel_texsubimage_tiled_memcpy(ctx, dims, texImage, + 0, 0, 0, /*x,y,z offsets*/ + texImage->Width, + texImage->Height, + texImage->Depth, + format, type, pixels, unpack, + true /*for_glTexImage*/); + if (ok) + return; + + /* Attempt to use the blitter for PBO image uploads. + */ + if (dims <= 2 && + try_pbo_upload(ctx, texImage, unpack, format, type, pixels)) { + return; + } + + DBG("%s: upload image %dx%dx%d pixels %p\n", + __FUNCTION__, texImage->Width, texImage->Height, texImage->Depth, + pixels); + + _mesa_store_teximage(ctx, dims, texImage, + format, type, pixels, unpack); +} + + +/** + * Binds a region to a texture image, like it was uploaded by glTexImage2D(). + * + * Used for GLX_EXT_texture_from_pixmap and EGL image extensions, + */ +static void +intel_set_texture_image_region(struct gl_context *ctx, + struct gl_texture_image *image, + struct intel_region *region, + GLenum target, + GLenum internalFormat, + gl_format format, + uint32_t offset, + GLuint width, + GLuint height, + GLuint tile_x, + GLuint tile_y) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_image *intel_image = intel_texture_image(image); + struct gl_texture_object *texobj = image->TexObject; + struct intel_texture_object *intel_texobj = intel_texture_object(texobj); + bool has_surface_tile_offset = false; + uint32_t draw_x, draw_y; + + _mesa_init_teximage_fields(&intel->ctx, image, + width, height, 1, + 0, internalFormat, format); + + ctx->Driver.FreeTextureImageBuffer(ctx, image); + + intel_image->mt = intel_miptree_create_layout(intel, target, image->TexFormat, + 0, 0, + width, height, 1, + true, 0 /* num_samples */); + if (intel_image->mt == NULL) + return; + intel_region_reference(&intel_image->mt->region, region); + intel_image->mt->total_width = width; + intel_image->mt->total_height = height; + intel_image->mt->level[0].slice[0].x_offset = tile_x; + intel_image->mt->level[0].slice[0].y_offset = tile_y; + + intel_miptree_get_tile_offsets(intel_image->mt, 0, 0, &draw_x, &draw_y); +#ifndef I915 + has_surface_tile_offset = brw_context(ctx)->has_surface_tile_offset; +#endif + + /* From "OES_EGL_image" error reporting. We report GL_INVALID_OPERATION + * for EGL images from non-tile aligned sufaces in gen4 hw and earlier which has + * trouble resolving back to destination image due to alignment issues. + */ + if (!has_surface_tile_offset && + (draw_x != 0 || draw_y != 0)) { + _mesa_error(ctx, GL_INVALID_OPERATION, __func__); + intel_miptree_release(&intel_image->mt); + return; + } + + intel_texobj->needs_validate = true; + + intel_image->mt->offset = offset; + assert(region->pitch % region->cpp == 0); + intel_image->base.RowStride = region->pitch / region->cpp; + + /* Immediately validate the image to the object. */ + intel_miptree_reference(&intel_texobj->mt, intel_image->mt); +} + +void +intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, + GLint texture_format, + __DRIdrawable *dPriv) +{ + struct gl_framebuffer *fb = dPriv->driverPrivate; + struct intel_context *intel = pDRICtx->driverPrivate; + struct gl_context *ctx = &intel->ctx; + struct intel_texture_object *intelObj; + struct intel_renderbuffer *rb; + struct gl_texture_object *texObj; + struct gl_texture_image *texImage; + int level = 0, internalFormat = 0; + gl_format texFormat = MESA_FORMAT_NONE; + + texObj = _mesa_get_current_tex_object(ctx, target); + intelObj = intel_texture_object(texObj); + + if (!intelObj) + return; + + if (dPriv->lastStamp != dPriv->dri2.stamp || + !pDRICtx->driScreenPriv->dri2.useInvalidate) + intel_update_renderbuffers(pDRICtx, dPriv); + + rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); + /* If the region isn't set, then intel_update_renderbuffers was unable + * to get the buffers for the drawable. + */ + if (!rb || !rb->mt) + return; + + if (rb->mt->cpp == 4) { + if (texture_format == __DRI_TEXTURE_FORMAT_RGB) { + internalFormat = GL_RGB; + texFormat = MESA_FORMAT_XRGB8888; + } + else { + internalFormat = GL_RGBA; + texFormat = MESA_FORMAT_ARGB8888; + } + } else if (rb->mt->cpp == 2) { + internalFormat = GL_RGB; + texFormat = MESA_FORMAT_RGB565; + } + + _mesa_lock_texture(&intel->ctx, texObj); + texImage = _mesa_get_tex_image(ctx, texObj, target, level); + intel_miptree_make_shareable(intel, rb->mt); + intel_set_texture_image_region(ctx, texImage, rb->mt->region, target, + internalFormat, texFormat, 0, + rb->mt->region->width, + rb->mt->region->height, + 0, 0); + _mesa_unlock_texture(&intel->ctx, texObj); +} + +void +intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) +{ + /* The old interface didn't have the format argument, so copy our + * implementation's behavior at the time. + */ + intelSetTexBuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv); +} + +static void +intel_image_target_texture_2d(struct gl_context *ctx, GLenum target, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage, + GLeglImageOES image_handle) +{ + struct intel_context *intel = intel_context(ctx); + __DRIscreen *screen; + __DRIimage *image; + + screen = intel->intelScreen->driScrnPriv; + image = screen->dri2.image->lookupEGLImage(screen, image_handle, + screen->loaderPrivate); + if (image == NULL) + return; + + /* Disallow depth/stencil textures: we don't have a way to pass the + * separate stencil miptree of a GL_DEPTH_STENCIL texture through. + */ + if (image->has_depthstencil) { + _mesa_error(ctx, GL_INVALID_OPERATION, __func__); + return; + } + + intel_set_texture_image_region(ctx, texImage, image->region, + target, image->internal_format, + image->format, image->offset, + image->width, image->height, + image->tile_x, image->tile_y); +} + +void +intelInitTextureImageFuncs(struct dd_function_table *functions) +{ + functions->TexImage = intelTexImage; + functions->EGLImageTargetTexture2D = intel_image_target_texture_2d; +} diff --git a/src/mesa/drivers/dri/i915/intel_tex_layout.c b/src/mesa/drivers/dri/i915/intel_tex_layout.c deleted file mode 120000 index fe61b44..0000000 --- a/src/mesa/drivers/dri/i915/intel_tex_layout.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_tex_layout.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_tex_layout.c b/src/mesa/drivers/dri/i915/intel_tex_layout.c new file mode 100644 index 0000000..fbb6520 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_tex_layout.c @@ -0,0 +1,214 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell + * Michel Dänzer + */ + +#include "intel_mipmap_tree.h" +#include "intel_tex_layout.h" +#include "intel_context.h" + +#include "main/image.h" +#include "main/macros.h" + +static unsigned int +intel_horizontal_texture_alignment_unit(struct intel_context *intel, + gl_format format) +{ + /** + * From the "Alignment Unit Size" section of various specs, namely: + * - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4 + * - i965 and G45 PRMs: Volume 1, Section 6.17.3.4. + * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4 + * - BSpec (for Ivybridge and slight variations in separate stencil) + * + * +----------------------------------------------------------------------+ + * | | alignment unit width ("i") | + * | Surface Property |-----------------------------| + * | | 915 | 965 | ILK | SNB | IVB | + * +----------------------------------------------------------------------+ + * | YUV 4:2:2 format | 8 | 4 | 4 | 4 | 4 | + * | BC1-5 compressed format (DXTn/S3TC) | 4 | 4 | 4 | 4 | 4 | + * | FXT1 compressed format | 8 | 8 | 8 | 8 | 8 | + * | Depth Buffer (16-bit) | 4 | 4 | 4 | 4 | 8 | + * | Depth Buffer (other) | 4 | 4 | 4 | 4 | 4 | + * | Separate Stencil Buffer | N/A | N/A | 8 | 8 | 8 | + * | All Others | 4 | 4 | 4 | 4 | 4 | + * +----------------------------------------------------------------------+ + * + * On IVB+, non-special cases can be overridden by setting the SURFACE_STATE + * "Surface Horizontal Alignment" field to HALIGN_4 or HALIGN_8. + */ + if (_mesa_is_format_compressed(format)) { + /* The hardware alignment requirements for compressed textures + * happen to match the block boundaries. + */ + unsigned int i, j; + _mesa_get_format_block_size(format, &i, &j); + return i; + } + + if (format == MESA_FORMAT_S8) + return 8; + + /* The depth alignment requirements in the table above are for rendering to + * depth miplevels using the LOD control fields. We don't use LOD control + * fields, and instead use page offsets plus intra-tile x/y offsets, which + * require that the low 3 bits are zero. To reduce the number of x/y + * offset workaround blits we do, align the X to 8, which depth texturing + * can handle (sadly, it can't handle 8 in the Y direction). + */ + if (intel->gen >= 7 && + _mesa_get_format_base_format(format) == GL_DEPTH_COMPONENT) + return 8; + + return 4; +} + +static unsigned int +intel_vertical_texture_alignment_unit(struct intel_context *intel, + gl_format format) +{ + /** + * From the "Alignment Unit Size" section of various specs, namely: + * - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4 + * - i965 and G45 PRMs: Volume 1, Section 6.17.3.4. + * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4 + * - BSpec (for Ivybridge and slight variations in separate stencil) + * + * +----------------------------------------------------------------------+ + * | | alignment unit height ("j") | + * | Surface Property |-----------------------------| + * | | 915 | 965 | ILK | SNB | IVB | + * +----------------------------------------------------------------------+ + * | BC1-5 compressed format (DXTn/S3TC) | 4 | 4 | 4 | 4 | 4 | + * | FXT1 compressed format | 4 | 4 | 4 | 4 | 4 | + * | Depth Buffer | 2 | 2 | 2 | 4 | 4 | + * | Separate Stencil Buffer | N/A | N/A | N/A | 4 | 8 | + * | Multisampled (4x or 8x) render target | N/A | N/A | N/A | 4 | 4 | + * | All Others | 2 | 2 | 2 | 2 | 2 | + * +----------------------------------------------------------------------+ + * + * On SNB+, non-special cases can be overridden by setting the SURFACE_STATE + * "Surface Vertical Alignment" field to VALIGN_2 or VALIGN_4. + * + * We currently don't support multisampling. + */ + if (_mesa_is_format_compressed(format)) + return 4; + + if (format == MESA_FORMAT_S8) + return intel->gen >= 7 ? 8 : 4; + + GLenum base_format = _mesa_get_format_base_format(format); + + if (intel->gen >= 6 && + (base_format == GL_DEPTH_COMPONENT || + base_format == GL_DEPTH_STENCIL)) { + return 4; + } + + return 2; +} + +void +intel_get_texture_alignment_unit(struct intel_context *intel, + gl_format format, + unsigned int *w, unsigned int *h) +{ + *w = intel_horizontal_texture_alignment_unit(intel, format); + *h = intel_vertical_texture_alignment_unit(intel, format); +} + +void i945_miptree_layout_2d(struct intel_mipmap_tree *mt) +{ + GLuint level; + GLuint x = 0; + GLuint y = 0; + GLuint width = mt->physical_width0; + GLuint height = mt->physical_height0; + GLuint depth = mt->physical_depth0; /* number of array layers. */ + + mt->total_width = mt->physical_width0; + + if (mt->compressed) { + mt->total_width = ALIGN(mt->physical_width0, mt->align_w); + } + + /* May need to adjust width to accomodate the placement of + * the 2nd mipmap. This occurs when the alignment + * constraints of mipmap placement push the right edge of the + * 2nd mipmap out past the width of its parent. + */ + if (mt->first_level != mt->last_level) { + GLuint mip1_width; + + if (mt->compressed) { + mip1_width = ALIGN(minify(mt->physical_width0, 1), mt->align_w) + + ALIGN(minify(mt->physical_width0, 2), mt->align_w); + } else { + mip1_width = ALIGN(minify(mt->physical_width0, 1), mt->align_w) + + minify(mt->physical_width0, 2); + } + + if (mip1_width > mt->total_width) { + mt->total_width = mip1_width; + } + } + + mt->total_height = 0; + + for ( level = mt->first_level ; level <= mt->last_level ; level++ ) { + GLuint img_height; + + intel_miptree_set_level_info(mt, level, x, y, width, + height, depth); + + img_height = ALIGN(height, mt->align_h); + if (mt->compressed) + img_height /= mt->align_h; + + /* Because the images are packed better, the final offset + * might not be the maximal one: + */ + mt->total_height = MAX2(mt->total_height, y + img_height); + + /* Layout_below: step right after second mipmap. + */ + if (level == mt->first_level + 1) { + x += ALIGN(width, mt->align_w); + } + else { + y += img_height; + } + + width = minify(width, 1); + height = minify(height, 1); + } +} diff --git a/src/mesa/drivers/dri/i915/intel_tex_layout.h b/src/mesa/drivers/dri/i915/intel_tex_layout.h new file mode 100644 index 0000000..f353cf4 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_tex_layout.h @@ -0,0 +1,40 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell + * Michel Dänzer + */ + +#include "main/macros.h" + +extern void i945_miptree_layout_2d(struct intel_mipmap_tree *mt); + +void +intel_get_texture_alignment_unit(struct intel_context *intel, + gl_format format, + unsigned int *w, unsigned int *h); diff --git a/src/mesa/drivers/dri/i915/intel_tex_obj.h b/src/mesa/drivers/dri/i915/intel_tex_obj.h new file mode 100644 index 0000000..e30dd8a --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_tex_obj.h @@ -0,0 +1,84 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _INTEL_TEX_OBJ_H +#define _INTEL_TEX_OBJ_H + +#include "swrast/s_context.h" + + +struct intel_texture_object +{ + struct gl_texture_object base; + + /* This is a mirror of base._MaxLevel, updated at validate time, + * except that we don't bother with the non-base levels for + * non-mipmapped textures. + */ + unsigned int _MaxLevel; + + /* On validation any active images held in main memory or in other + * regions will be copied to this region and the old storage freed. + */ + struct intel_mipmap_tree *mt; + + /** + * Set when mipmap trees in the texture images of this texture object + * might not all be the mipmap tree above. + */ + bool needs_validate; +}; + + +/** + * intel_texture_image is a subclass of swrast_texture_image because we + * sometimes fall back to using the swrast module for software rendering. + */ +struct intel_texture_image +{ + struct swrast_texture_image base; + + /* If intelImage->mt != NULL, image data is stored here. + * Else if intelImage->base.Buffer != NULL, image is stored there. + * Else there is no image data. + */ + struct intel_mipmap_tree *mt; +}; + +static INLINE struct intel_texture_object * +intel_texture_object(struct gl_texture_object *obj) +{ + return (struct intel_texture_object *) obj; +} + +static INLINE struct intel_texture_image * +intel_texture_image(struct gl_texture_image *img) +{ + return (struct intel_texture_image *) img; +} + +#endif /* _INTEL_TEX_OBJ_H */ diff --git a/src/mesa/drivers/dri/i915/intel_tex_subimage.c b/src/mesa/drivers/dri/i915/intel_tex_subimage.c deleted file mode 120000 index b3a8a3d..0000000 --- a/src/mesa/drivers/dri/i915/intel_tex_subimage.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_tex_subimage.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_tex_subimage.c b/src/mesa/drivers/dri/i915/intel_tex_subimage.c new file mode 100644 index 0000000..f936e9b --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_tex_subimage.c @@ -0,0 +1,335 @@ + +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/bufferobj.h" +#include "main/macros.h" +#include "main/mtypes.h" +#include "main/pbo.h" +#include "main/texobj.h" +#include "main/texstore.h" +#include "main/texcompress.h" +#include "main/enums.h" + +#include "intel_batchbuffer.h" +#include "intel_context.h" +#include "intel_tex.h" +#include "intel_mipmap_tree.h" +#include "intel_blit.h" + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE + +static bool +intel_blit_texsubimage(struct gl_context * ctx, + struct gl_texture_image *texImage, + GLint xoffset, GLint yoffset, + GLint width, GLint height, + GLenum format, GLenum type, const void *pixels, + const struct gl_pixelstore_attrib *packing) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_image *intelImage = intel_texture_image(texImage); + + /* Try to do a blit upload of the subimage if the texture is + * currently busy. + */ + if (!intelImage->mt) + return false; + + /* The blitter can't handle Y tiling */ + if (intelImage->mt->region->tiling == I915_TILING_Y) + return false; + + if (texImage->TexObject->Target != GL_TEXTURE_2D) + return false; + + /* On gen6, it's probably not worth swapping to the blit ring to do + * this because of all the overhead involved. + */ + if (intel->gen >= 6) + return false; + + if (!drm_intel_bo_busy(intelImage->mt->region->bo)) + return false; + + DBG("BLT subimage %s target %s level %d offset %d,%d %dx%d\n", + __FUNCTION__, + _mesa_lookup_enum_by_nr(texImage->TexObject->Target), + texImage->Level, xoffset, yoffset, width, height); + + pixels = _mesa_validate_pbo_teximage(ctx, 2, width, height, 1, + format, type, pixels, packing, + "glTexSubImage"); + if (!pixels) + return false; + + struct intel_mipmap_tree *temp_mt = + intel_miptree_create(intel, GL_TEXTURE_2D, texImage->TexFormat, + 0, 0, + width, height, 1, + false, 0, INTEL_MIPTREE_TILING_NONE); + if (!temp_mt) + goto err; + + GLubyte *dst = intel_miptree_map_raw(intel, temp_mt); + if (!dst) + goto err; + + if (!_mesa_texstore(ctx, 2, texImage->_BaseFormat, + texImage->TexFormat, + temp_mt->region->pitch, + &dst, + width, height, 1, + format, type, pixels, packing)) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage"); + } + + intel_miptree_unmap_raw(intel, temp_mt); + + bool ret; + + ret = intel_miptree_blit(intel, + temp_mt, 0, 0, + 0, 0, false, + intelImage->mt, texImage->Level, texImage->Face, + xoffset, yoffset, false, + width, height, GL_COPY); + assert(ret); + + intel_miptree_release(&temp_mt); + _mesa_unmap_teximage_pbo(ctx, packing); + + return ret; + +err: + _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage"); + intel_miptree_release(&temp_mt); + _mesa_unmap_teximage_pbo(ctx, packing); + return false; +} + +/** + * \brief A fast path for glTexImage and glTexSubImage. + * + * \param for_glTexImage Was this called from glTexImage or glTexSubImage? + * + * This fast path is taken when the hardware natively supports the texture + * format (such as GL_BGRA) and when the texture memory is X-tiled. It uploads + * the texture data by mapping the texture memory without a GTT fence, thus + * acquiring a tiled view of the memory, and then memcpy'ing sucessive + * subspans within each tile. + * + * This is a performance win over the conventional texture upload path because + * it avoids the performance penalty of writing through the write-combine + * buffer. In the conventional texture upload path, + * texstore.c:store_texsubimage(), the texture memory is mapped through a GTT + * fence, thus acquiring a linear view of the memory, then each row in the + * image is memcpy'd. In this fast path, we replace each row's memcpy with + * a sequence of memcpy's over each bit6 swizzle span in the row. + * + * This fast path's use case is Google Chrome's paint rectangles. Chrome (as + * of version 21) renders each page as a tiling of 256x256 GL_BGRA textures. + * Each page's content is initially uploaded with glTexImage2D and damaged + * regions are updated with glTexSubImage2D. On some workloads, the + * performance gain of this fastpath on Sandybridge is over 5x. + */ +bool +intel_texsubimage_tiled_memcpy(struct gl_context * ctx, + GLuint dims, + struct gl_texture_image *texImage, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, + const GLvoid *pixels, + const struct gl_pixelstore_attrib *packing, + bool for_glTexImage) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_image *image = intel_texture_image(texImage); + + /* The miptree's buffer. */ + drm_intel_bo *bo; + + int error = 0; + + /* This fastpath is restricted to a specific texture type: level 0 of + * a 2D BGRA texture. It could be generalized to support more types by + * varying the arithmetic loop below. + */ + if (!intel->has_llc || + format != GL_BGRA || + type != GL_UNSIGNED_BYTE || + texImage->TexFormat != MESA_FORMAT_ARGB8888 || + texImage->TexObject->Target != GL_TEXTURE_2D || + texImage->Level != 0 || + pixels == NULL || + _mesa_is_bufferobj(packing->BufferObj) || + packing->Alignment > 4 || + packing->SkipPixels > 0 || + packing->SkipRows > 0 || + (packing->RowLength != 0 && packing->RowLength != width) || + packing->SwapBytes || + packing->LsbFirst || + packing->Invert) + return false; + + if (for_glTexImage) + ctx->Driver.AllocTextureImageBuffer(ctx, texImage); + + if (!image->mt || + image->mt->region->tiling != I915_TILING_X) { + /* The algorithm below is written only for X-tiled memory. */ + return false; + } + + /* Since we are going to write raw data to the miptree, we need to resolve + * any pending fast color clears before we start. + */ + intel_miptree_resolve_color(intel, image->mt); + + bo = image->mt->region->bo; + + if (drm_intel_bo_references(intel->batch.bo, bo)) { + perf_debug("Flushing before mapping a referenced bo.\n"); + intel_batchbuffer_flush(intel); + } + + if (unlikely(intel->perf_debug)) { + if (drm_intel_bo_busy(bo)) { + perf_debug("Mapping a busy BO, causing a stall on the GPU.\n"); + } + } + + error = drm_intel_bo_map(bo, true /*write_enable*/); + if (error || bo->virtual == NULL) { + DBG("%s: failed to map bo\n", __FUNCTION__); + return false; + } + + /* We postponed printing this message until having committed to executing + * the function. + */ + DBG("%s: level=%d offset=(%d,%d) (w,h)=(%d,%d)\n", + __FUNCTION__, texImage->Level, xoffset, yoffset, width, height); + + /* In the tiling algorithm below, some variables are in units of pixels, + * others are in units of bytes, and others (such as height) are unitless. + * Each variable name is suffixed with its units. + */ + + const uint32_t x_max_pixels = xoffset + width; + const uint32_t y_max_pixels = yoffset + height; + + const uint32_t tile_size_bytes = 4096; + + const uint32_t tile_width_bytes = 512; + const uint32_t tile_width_pixels = 128; + + const uint32_t tile_height = 8; + + const uint32_t cpp = 4; /* chars per pixel of GL_BGRA */ + const uint32_t swizzle_width_pixels = 16; + + const uint32_t stride_bytes = image->mt->region->pitch; + const uint32_t width_tiles = stride_bytes / tile_width_bytes; + + for (uint32_t y_pixels = yoffset; y_pixels < y_max_pixels; ++y_pixels) { + const uint32_t y_offset_bytes = (y_pixels / tile_height) * width_tiles * tile_size_bytes + + (y_pixels % tile_height) * tile_width_bytes; + + for (uint32_t x_pixels = xoffset; x_pixels < x_max_pixels; x_pixels += swizzle_width_pixels) { + const uint32_t x_offset_bytes = (x_pixels / tile_width_pixels) * tile_size_bytes + + (x_pixels % tile_width_pixels) * cpp; + + intptr_t offset_bytes = y_offset_bytes + x_offset_bytes; + if (intel->has_swizzling) { +#if 0 + /* Clear, unoptimized version. */ + bool bit6 = (offset_bytes >> 6) & 1; + bool bit9 = (offset_bytes >> 9) & 1; + bool bit10 = (offset_bytes >> 10) & 1; + + if (bit9 ^ bit10) + offset_bytes ^= (1 << 6); +#else + /* Optimized, obfuscated version. */ + offset_bytes ^= ((offset_bytes >> 3) ^ (offset_bytes >> 4)) + & (1 << 6); +#endif + } + + const uint32_t swizzle_bound_pixels = ALIGN(x_pixels + 1, swizzle_width_pixels); + const uint32_t memcpy_bound_pixels = MIN2(x_max_pixels, swizzle_bound_pixels); + const uint32_t copy_size = cpp * (memcpy_bound_pixels - x_pixels); + + memcpy(bo->virtual + offset_bytes, pixels, copy_size); + pixels += copy_size; + x_pixels -= (x_pixels % swizzle_width_pixels); + } + } + + drm_intel_bo_unmap(bo); + return true; +} + +static void +intelTexSubImage(struct gl_context * ctx, + GLuint dims, + struct gl_texture_image *texImage, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing) +{ + bool ok; + + ok = intel_texsubimage_tiled_memcpy(ctx, dims, texImage, + xoffset, yoffset, zoffset, + width, height, depth, + format, type, pixels, packing, + false /*for_glTexImage*/); + if (ok) + return; + + /* The intel_blit_texsubimage() function only handles 2D images */ + if (dims != 2 || !intel_blit_texsubimage(ctx, texImage, + xoffset, yoffset, + width, height, + format, type, pixels, packing)) { + _mesa_store_texsubimage(ctx, dims, texImage, + xoffset, yoffset, zoffset, + width, height, depth, + format, type, pixels, packing); + } +} + +void +intelInitTextureSubImageFuncs(struct dd_function_table *functions) +{ + functions->TexSubImage = intelTexSubImage; +} diff --git a/src/mesa/drivers/dri/i915/intel_tex_validate.c b/src/mesa/drivers/dri/i915/intel_tex_validate.c deleted file mode 120000 index 41a7567..0000000 --- a/src/mesa/drivers/dri/i915/intel_tex_validate.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_tex_validate.c \ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_tex_validate.c b/src/mesa/drivers/dri/i915/intel_tex_validate.c new file mode 100644 index 0000000..a8a8647 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_tex_validate.c @@ -0,0 +1,141 @@ +#include "main/mtypes.h" +#include "main/macros.h" +#include "main/samplerobj.h" +#include "main/texobj.h" + +#include "intel_context.h" +#include "intel_mipmap_tree.h" +#include "intel_blit.h" +#include "intel_tex.h" +#include "intel_tex_layout.h" + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE + +/** + * When validating, we only care about the texture images that could + * be seen, so for non-mipmapped modes we want to ignore everything + * but BaseLevel. + */ +static void +intel_update_max_level(struct intel_texture_object *intelObj, + struct gl_sampler_object *sampler) +{ + struct gl_texture_object *tObj = &intelObj->base; + int maxlevel; + + if (sampler->MinFilter == GL_NEAREST || + sampler->MinFilter == GL_LINEAR) { + maxlevel = tObj->BaseLevel; + } else { + maxlevel = tObj->_MaxLevel; + } + + if (intelObj->_MaxLevel != maxlevel) { + intelObj->_MaxLevel = maxlevel; + intelObj->needs_validate = true; + } +} + +/* + */ +GLuint +intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) +{ + struct gl_context *ctx = &intel->ctx; + struct gl_texture_object *tObj = intel->ctx.Texture.Unit[unit]._Current; + struct intel_texture_object *intelObj = intel_texture_object(tObj); + struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit); + GLuint face, i; + GLuint nr_faces = 0; + struct intel_texture_image *firstImage; + int width, height, depth; + + /* TBOs require no validation -- they always just point to their BO. */ + if (tObj->Target == GL_TEXTURE_BUFFER) + return true; + + /* We know/require this is true by now: + */ + assert(intelObj->base._BaseComplete); + + /* What levels must the tree include at a minimum? + */ + intel_update_max_level(intelObj, sampler); + if (intelObj->mt && intelObj->mt->first_level != tObj->BaseLevel) + intelObj->needs_validate = true; + + if (!intelObj->needs_validate) + return true; + + firstImage = intel_texture_image(tObj->Image[0][tObj->BaseLevel]); + + /* Check tree can hold all active levels. Check tree matches + * target, imageFormat, etc. + * + * For pre-gen4, we have to match first_level == tObj->BaseLevel, + * because we don't have the control that gen4 does to make min/mag + * determination happen at a nonzero (hardware) baselevel. Because + * of that, we just always relayout on baselevel change. + */ + if (intelObj->mt && + (!intel_miptree_match_image(intelObj->mt, &firstImage->base.Base) || + intelObj->mt->first_level != tObj->BaseLevel || + intelObj->mt->last_level < intelObj->_MaxLevel)) { + intel_miptree_release(&intelObj->mt); + } + + + /* May need to create a new tree: + */ + if (!intelObj->mt) { + intel_miptree_get_dimensions_for_image(&firstImage->base.Base, + &width, &height, &depth); + + perf_debug("Creating new %s %dx%dx%d %d..%d miptree to handle finalized " + "texture miptree.\n", + _mesa_get_format_name(firstImage->base.Base.TexFormat), + width, height, depth, tObj->BaseLevel, intelObj->_MaxLevel); + + intelObj->mt = intel_miptree_create(intel, + intelObj->base.Target, + firstImage->base.Base.TexFormat, + tObj->BaseLevel, + intelObj->_MaxLevel, + width, + height, + depth, + true, + 0 /* num_samples */, + INTEL_MIPTREE_TILING_ANY); + if (!intelObj->mt) + return false; + } + + /* Pull in any images not in the object's tree: + */ + nr_faces = _mesa_num_tex_faces(intelObj->base.Target); + for (face = 0; face < nr_faces; face++) { + for (i = tObj->BaseLevel; i <= intelObj->_MaxLevel; i++) { + struct intel_texture_image *intelImage = + intel_texture_image(intelObj->base.Image[face][i]); + /* skip too small size mipmap */ + if (intelImage == NULL) + break; + + if (intelObj->mt != intelImage->mt) { + intel_miptree_copy_teximage(intel, intelImage, intelObj->mt, + false /* invalidate */); + } + + /* After we're done, we'd better agree that our layout is + * appropriate, or we'll end up hitting this function again on the + * next draw + */ + assert(intel_miptree_match_image(intelObj->mt, &intelImage->base.Base)); + } + } + + intelObj->needs_validate = false; + + return true; +} -- 2.7.4