From 6c487ff3bd40acdfc553b328e8327bfa90c49340 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 17 Mar 2014 01:18:43 +0100 Subject: [PATCH] r600g: deobfuscate async DMA code MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Michel Dänzer --- src/gallium/drivers/r600/evergreen_hw_context.c | 10 +++++----- src/gallium/drivers/r600/evergreen_state.c | 20 ++++++++++---------- src/gallium/drivers/r600/evergreend.h | 4 ++++ src/gallium/drivers/r600/r600_hw_context.c | 13 ++++++------- src/gallium/drivers/r600/r600_state.c | 18 +++++++++--------- src/gallium/drivers/r600/r600d.h | 1 + 6 files changed, 35 insertions(+), 31 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c index e43eacc..a47461c 100644 --- a/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/src/gallium/drivers/r600/evergreen_hw_context.c @@ -50,19 +50,19 @@ void evergreen_dma_copy(struct r600_context *rctx, src_offset += r600_resource_va(&rctx->screen->b.b, src); /* see if we use dword or byte copy */ - if (!(dst_offset & 0x3) && !(src_offset & 0x3) && !(size & 0x3)) { + if (!(dst_offset % 4) && !(src_offset % 4) && !(size % 4)) { size >>= 2; - sub_cmd = 0x00; + sub_cmd = EG_DMA_COPY_DWORD_ALIGNED; shift = 2; } else { - sub_cmd = 0x40; + sub_cmd = EG_DMA_COPY_BYTE_ALIGNED; shift = 0; } - ncopy = (size / 0x000fffff) + !!(size % 0x000fffff); + ncopy = (size / EG_DMA_COPY_MAX_SIZE) + !!(size % EG_DMA_COPY_MAX_SIZE); r600_need_dma_space(&rctx->b, ncopy * 5); for (i = 0; i < ncopy; i++) { - csize = size < 0x000fffff ? size : 0x000fffff; + csize = size < EG_DMA_COPY_MAX_SIZE ? size : EG_DMA_COPY_MAX_SIZE; /* emit reloc before writting cs so that cs is always in consistent state */ r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ, RADEON_PRIO_MIN); diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index fb34506..11cd767 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -3233,15 +3233,15 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx, non_disp_tiling = 1; y = 0; - sub_cmd = 0x8; + sub_cmd = EG_DMA_COPY_TILED; lbpp = util_logbase2(bpp); - pitch_tile_max = ((pitch / bpp) >> 3) - 1; + pitch_tile_max = ((pitch / bpp) / 8) - 1; nbanks = eg_num_banks(rctx->screen->b.tiling_info.num_banks); if (dst_mode == RADEON_SURF_MODE_LINEAR) { /* T2L */ array_mode = evergreen_array_mode(src_mode); - slice_tile_max = (rsrc->surface.level[src_level].nblk_x * rsrc->surface.level[src_level].nblk_y) >> 6; + slice_tile_max = (rsrc->surface.level[src_level].nblk_x * rsrc->surface.level[src_level].nblk_y) / (8*8); slice_tile_max = slice_tile_max ? slice_tile_max - 1 : 0; /* linear height must be the same as the slice tile max height, it's ok even * if the linear destination/source have smaller heigh as the size of the @@ -3266,7 +3266,7 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx, } else { /* L2T */ array_mode = evergreen_array_mode(dst_mode); - slice_tile_max = (rdst->surface.level[dst_level].nblk_x * rdst->surface.level[dst_level].nblk_y) >> 6; + slice_tile_max = (rdst->surface.level[dst_level].nblk_x * rdst->surface.level[dst_level].nblk_y) / (8*8); slice_tile_max = slice_tile_max ? slice_tile_max - 1 : 0; /* linear height must be the same as the slice tile max height, it's ok even * if the linear destination/source have smaller heigh as the size of the @@ -3290,16 +3290,16 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx, addr += r600_resource_va(&rctx->screen->b.b, src); } - size = (copy_height * pitch) >> 2; - ncopy = (size / 0x000fffff) + !!(size % 0x000fffff); + size = (copy_height * pitch) / 4; + ncopy = (size / EG_DMA_COPY_MAX_SIZE) + !!(size % EG_DMA_COPY_MAX_SIZE); r600_need_dma_space(&rctx->b, ncopy * 9); for (i = 0; i < ncopy; i++) { cheight = copy_height; - if (((cheight * pitch) >> 2) > 0x000fffff) { - cheight = (0x000fffff << 2) / pitch; + if (((cheight * pitch) / 4) > EG_DMA_COPY_MAX_SIZE) { + cheight = (EG_DMA_COPY_MAX_SIZE * 4) / pitch; } - size = (cheight * pitch) >> 2; + size = (cheight * pitch) / 4; /* emit reloc before writting cs so that cs is always in consistent state */ r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, RADEON_USAGE_READ, RADEON_PRIO_MIN); @@ -3381,7 +3381,7 @@ static void evergreen_dma_blit(struct pipe_context *ctx, /* the x test here are currently useless (because we don't support partial blit) * but keep them around so we don't forget about those */ - if ((src_pitch & 0x7) || (src_box->x & 0x7) || (dst_x & 0x7) || (src_box->y & 0x7) || (dst_y & 0x7)) { + if (src_pitch % 8 || src_box->x % 8 || dst_x % 8 || src_box->y % 8 || dst_y % 8) { goto fallback; } diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 986fd16..9fde184 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -2445,6 +2445,10 @@ /* async DMA Packet types */ #define DMA_PACKET_WRITE 0x2 #define DMA_PACKET_COPY 0x3 +#define EG_DMA_COPY_MAX_SIZE 0xfffff +#define EG_DMA_COPY_DWORD_ALIGNED 0x00 +#define EG_DMA_COPY_BYTE_ALIGNED 0x40 +#define EG_DMA_COPY_TILED 0x8 #define DMA_PACKET_INDIRECT_BUFFER 0x4 #define DMA_PACKET_SEMAPHORE 0x5 #define DMA_PACKET_FENCE 0x6 diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index eb5500e..267b326 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -448,7 +448,7 @@ void r600_dma_copy(struct r600_context *rctx, uint64_t size) { struct radeon_winsys_cs *cs = rctx->b.rings.dma.cs; - unsigned i, ncopy, csize, shift; + unsigned i, ncopy, csize; struct r600_resource *rdst = (struct r600_resource*)dst; struct r600_resource *rsrc = (struct r600_resource*)src; @@ -458,13 +458,12 @@ void r600_dma_copy(struct r600_context *rctx, util_range_add(&rdst->valid_buffer_range, dst_offset, dst_offset + size); - size >>= 2; - shift = 2; - ncopy = (size / 0xffff) + !!(size % 0xffff); + size >>= 2; /* convert to dwords */ + ncopy = (size / R600_DMA_COPY_MAX_SIZE_DW) + !!(size % R600_DMA_COPY_MAX_SIZE_DW); r600_need_dma_space(&rctx->b, ncopy * 5); for (i = 0; i < ncopy; i++) { - csize = size < 0xffff ? size : 0xffff; + csize = size < R600_DMA_COPY_MAX_SIZE_DW ? size : R600_DMA_COPY_MAX_SIZE_DW; /* emit reloc before writting cs so that cs is always in consistent state */ r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ, RADEON_PRIO_MIN); @@ -475,8 +474,8 @@ void r600_dma_copy(struct r600_context *rctx, cs->buf[cs->cdw++] = src_offset & 0xfffffffc; cs->buf[cs->cdw++] = (dst_offset >> 32UL) & 0xff; cs->buf[cs->cdw++] = (src_offset >> 32UL) & 0xff; - dst_offset += csize << shift; - src_offset += csize << shift; + dst_offset += csize << 2; + src_offset += csize << 2; size -= csize; } } diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 4aa3798..d87caf8 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -2802,12 +2802,12 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx, y = 0; lbpp = util_logbase2(bpp); - pitch_tile_max = ((pitch / bpp) >> 3) - 1; + pitch_tile_max = ((pitch / bpp) / 8) - 1; if (dst_mode == RADEON_SURF_MODE_LINEAR) { /* T2L */ array_mode = r600_array_mode(src_mode); - slice_tile_max = (rsrc->surface.level[src_level].nblk_x * rsrc->surface.level[src_level].nblk_y) >> 6; + slice_tile_max = (rsrc->surface.level[src_level].nblk_x * rsrc->surface.level[src_level].nblk_y) / (8*8); slice_tile_max = slice_tile_max ? slice_tile_max - 1 : 0; /* linear height must be the same as the slice tile max height, it's ok even * if the linear destination/source have smaller heigh as the size of the @@ -2826,7 +2826,7 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx, } else { /* L2T */ array_mode = r600_array_mode(dst_mode); - slice_tile_max = (rdst->surface.level[dst_level].nblk_x * rdst->surface.level[dst_level].nblk_y) >> 6; + slice_tile_max = (rdst->surface.level[dst_level].nblk_x * rdst->surface.level[dst_level].nblk_y) / (8*8); slice_tile_max = slice_tile_max ? slice_tile_max - 1 : 0; /* linear height must be the same as the slice tile max height, it's ok even * if the linear destination/source have smaller heigh as the size of the @@ -2844,20 +2844,20 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx, addr += src_y * pitch + src_x * bpp; } /* check that we are in dw/base alignment constraint */ - if ((addr & 0x3) || (base & 0xff)) { + if (addr % 4 || base % 256) { return FALSE; } /* It's a r6xx/r7xx limitation, the blit must be on 8 boundary for number * line in the blit. Compute max 8 line we can copy in the size limit */ - cheight = ((0x0000ffff << 2) / pitch) & 0xfffffff8; + cheight = ((R600_DMA_COPY_MAX_SIZE_DW * 4) / pitch) & 0xfffffff8; ncopy = (copy_height / cheight) + !!(copy_height % cheight); r600_need_dma_space(&rctx->b, ncopy * 7); for (i = 0; i < ncopy; i++) { cheight = cheight > copy_height ? copy_height : cheight; - size = (cheight * pitch) >> 2; + size = (cheight * pitch) / 4; /* emit reloc before writting cs so that cs is always in consistent state */ r600_context_bo_reloc(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, RADEON_USAGE_READ, RADEON_PRIO_MIN); @@ -2930,11 +2930,11 @@ static void r600_dma_blit(struct pipe_context *ctx, dst_mode = dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : dst_mode; if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) { - /* strick requirement on r6xx/r7xx */ + /* strict requirement on r6xx/r7xx */ goto fallback; } /* lot of constraint on alignment this should capture them all */ - if ((src_pitch & 0x7) || (src_box->y & 0x7) || (dst_y & 0x7)) { + if (src_pitch % 8 || src_box->y % 8 || dst_y % 8) { goto fallback; } @@ -2954,7 +2954,7 @@ static void r600_dma_blit(struct pipe_context *ctx, dst_offset += dst_y * dst_pitch + dst_x * bpp; size = src_box->height * src_pitch; /* must be dw aligned */ - if ((dst_offset & 0x3) || (src_offset & 0x3) || (size & 0x3)) { + if (dst_offset % 4 || src_offset % 4 || size % 4) { goto fallback; } r600_dma_copy(rctx, dst, src, dst_offset, src_offset, size); diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h index 05d1f0a..f787803 100644 --- a/src/gallium/drivers/r600/r600d.h +++ b/src/gallium/drivers/r600/r600d.h @@ -3743,6 +3743,7 @@ /* async DMA Packet types */ #define DMA_PACKET_WRITE 0x2 #define DMA_PACKET_COPY 0x3 +#define R600_DMA_COPY_MAX_SIZE_DW 0xffff #define DMA_PACKET_INDIRECT_BUFFER 0x4 #define DMA_PACKET_SEMAPHORE 0x5 #define DMA_PACKET_FENCE 0x6 -- 2.7.4