From 973ddc897d48548fe66f739d703d2e3808de04d1 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 31 Oct 2012 14:42:39 -0700 Subject: [PATCH] intel: Fix software copying of miptree faces for weird formats. Now that we have W-tiled S8, we can't just region_map and poke at bits -- there has to be some swizzling. Rely on intel_miptree_map to get that job done. This should also get the highest performance path we know of for the mapping (interesting if I get around to finishing movntdqa some day). v2: Fix stale name of the bit in a comment. Reviewed-by: Chad Versace --- src/mesa/drivers/dri/intel/intel_mipmap_tree.c | 95 +++++++++++++++++++++----- src/mesa/drivers/dri/intel/intel_regions.c | 33 --------- src/mesa/drivers/dri/intel/intel_regions.h | 10 --- 3 files changed, 77 insertions(+), 61 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index b36a1eb..ee058a1 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -702,6 +702,68 @@ intel_miptree_get_tile_offsets(struct intel_mipmap_tree *mt, } static void +intel_miptree_copy_slice_sw(struct intel_context *intel, + struct intel_mipmap_tree *dst_mt, + struct intel_mipmap_tree *src_mt, + int level, + int slice, + int width, + int height) +{ + void *src, *dst; + int src_stride, dst_stride; + int cpp = dst_mt->cpp; + + intel_miptree_map(intel, src_mt, + level, slice, + 0, 0, + width, height, + GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT, + &src, &src_stride); + + intel_miptree_map(intel, dst_mt, + level, slice, + 0, 0, + width, height, + GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | + BRW_MAP_DIRECT_BIT, + &dst, &dst_stride); + + DBG("sw blit %s mt %p %p/%d -> %s mt %p %p/%d (%dx%d)\n", + _mesa_get_format_name(src_mt->format), + src_mt, src, src_stride, + _mesa_get_format_name(dst_mt->format), + dst_mt, dst, dst_stride, + width, height); + + int row_size = cpp * width; + if (src_stride == row_size && + dst_stride == row_size) { + memcpy(dst, src, row_size * height); + } else { + for (int i = 0; i < height; i++) { + memcpy(dst, src, row_size); + dst += dst_stride; + src += src_stride; + } + } + + intel_miptree_unmap(intel, dst_mt, level, slice); + intel_miptree_unmap(intel, src_mt, level, slice); + + /* Don't forget to copy the stencil data over, too. We could have skipped + * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map + * shuffling the two data sources in/out of temporary storage instead of + * the direct mapping we get this way. + */ + if (dst_mt->stencil_mt) { + assert(src_mt->stencil_mt); + intel_miptree_copy_slice_sw(intel, dst_mt->stencil_mt, src_mt->stencil_mt, + level, slice, width, height); + } +} + +static void intel_miptree_copy_slice(struct intel_context *intel, struct intel_mipmap_tree *dst_mt, struct intel_mipmap_tree *src_mt, @@ -721,12 +783,25 @@ intel_miptree_copy_slice(struct intel_context *intel, slice = depth; assert(depth < src_mt->level[level].depth); + assert(src_mt->format == dst_mt->format); if (dst_mt->compressed) { height = ALIGN(height, dst_mt->align_h) / dst_mt->align_h; width = ALIGN(width, dst_mt->align_w); } + /* If it's a packed depth/stencil buffer with separate stencil, the blit + * below won't apply since we can't do the depth's Y tiling or the + * stencil's W tiling in the blitter. + */ + if (src_mt->stencil_mt) { + intel_miptree_copy_slice_sw(intel, + dst_mt, src_mt, + level, slice, + width, height); + return; + } + uint32_t dst_x, dst_y, src_x, src_y; intel_miptree_get_image_offset(dst_mt, level, slice, &dst_x, &dst_y); intel_miptree_get_image_offset(src_mt, level, slice, &src_x, &src_y); @@ -751,25 +826,9 @@ intel_miptree_copy_slice(struct intel_context *intel, fallback_debug("miptree validate blit for %s failed\n", _mesa_get_format_name(format)); - void *dst = intel_region_map(intel, dst_mt->region, GL_MAP_WRITE_BIT); - void *src = intel_region_map(intel, src_mt->region, GL_MAP_READ_BIT); - - _mesa_copy_rect(dst, - dst_mt->cpp, - dst_mt->region->pitch, - dst_x, dst_y, - width, height, - src, src_mt->region->pitch, - src_x, src_y); - - intel_region_unmap(intel, dst_mt->region); - intel_region_unmap(intel, src_mt->region); - } - if (src_mt->stencil_mt) { - intel_miptree_copy_slice(intel, - dst_mt->stencil_mt, src_mt->stencil_mt, - level, face, depth); + intel_miptree_copy_slice_sw(intel, dst_mt, src_mt, level, slice, + width, height); } } diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index 89d91b0..a5e49d9 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -328,39 +328,6 @@ intel_region_release(struct intel_region **region_handle) *region_handle = NULL; } -/* - * XXX Move this into core Mesa? - */ -void -_mesa_copy_rect(GLubyte * dst, - GLuint cpp, - GLuint dst_pitch, - GLuint dst_x, - GLuint dst_y, - GLuint width, - GLuint height, - const GLubyte * src, - GLuint src_pitch, GLuint src_x, GLuint src_y) -{ - GLuint i; - - dst += dst_x * cpp; - src += src_x * cpp; - dst += dst_y * dst_pitch; - src += src_y * src_pitch; - width *= cpp; - - if (width == dst_pitch && width == src_pitch) - memcpy(dst, src, height * width); - else { - for (i = 0; i < height; i++) { - memcpy(dst, src, width); - dst += dst_pitch; - src += src_pitch; - } - } -} - /* Copy rectangular sub-regions. Need better logic about when to * push buffers into AGP - will currently do so whenever possible. */ diff --git a/src/mesa/drivers/dri/intel/intel_regions.h b/src/mesa/drivers/dri/intel/intel_regions.h index 28d581c..9318141 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.h +++ b/src/mesa/drivers/dri/intel/intel_regions.h @@ -123,16 +123,6 @@ intel_region_copy(struct intel_context *intel, bool flip, GLenum logicop); -void _mesa_copy_rect(GLubyte * dst, - GLuint cpp, - GLuint dst_pitch, - GLuint dst_x, - GLuint dst_y, - GLuint width, - GLuint height, - const GLubyte * src, - GLuint src_pitch, GLuint src_x, GLuint src_y); - void intel_region_get_tile_masks(struct intel_region *region, uint32_t *mask_x, uint32_t *mask_y, -- 2.7.4