From: Chris Wilson Date: Wed, 6 Nov 2013 14:51:42 +0000 (+0000) Subject: sna: Use tiling BLT fallback for BLT composite operations X-Git-Tag: 2.99.906~23 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ae380a960df6b3a9714d78eb6cb42249764488ba;p=platform%2Fupstream%2Fxf86-video-intel.git sna: Use tiling BLT fallback for BLT composite operations This avoid a circuituous route through the render pathways and multiple levels of tiling fallbacks to accomplish the same copy. Signed-off-by: Chris Wilson --- diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c index 39ff0f6..693fedc 100644 --- a/src/sna/sna_blt.c +++ b/src/sna/sna_blt.c @@ -1734,8 +1734,8 @@ blt_composite_copy_with_alpha(struct sna *sna, x2 = x1 + r->width; y2 = y1 + r->height; - src_x = r->src.x - x1; - src_y = r->src.y - y1; + src_x = r->src.x - x1 + op->u.blt.sx; + src_y = r->src.y - y1 + op->u.blt.sy; /* clip against dst */ if (x1 < 0) @@ -1811,7 +1811,9 @@ prepare_blt_copy(struct sna *sna, if (!kgem_check_many_bo_fenced(&sna->kgem, op->dst.bo, bo, NULL)) { DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__)); - return false; + return sna_tiling_blt_composite(sna, op, bo, + src->drawable.bitsPerPixel, + alpha_fixup); } _kgem_set_mode(&sna->kgem, KGEM_BLT); } @@ -2682,25 +2684,27 @@ sna_blt_composite__convert(struct sna *sna, return false; } + DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n", + __FUNCTION__, + tmp->dst.x, tmp->dst.y, sx, sy, alpha_fixup)); + + tmp->u.blt.src_pixmap = NULL; + tmp->u.blt.sx = sx; + tmp->u.blt.sy = sy; + kgem_set_mode(&sna->kgem, KGEM_BLT, tmp->dst.bo); if (!kgem_check_many_bo_fenced(&sna->kgem, tmp->dst.bo, tmp->src.bo, NULL)) { kgem_submit(&sna->kgem); if (!kgem_check_many_bo_fenced(&sna->kgem, tmp->dst.bo, tmp->src.bo, NULL)) { DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__)); - return false; + return sna_tiling_blt_composite(sna, tmp, tmp->src.bo, + PICT_FORMAT_BPP(tmp->src.pict_format), + alpha_fixup); } _kgem_set_mode(&sna->kgem, KGEM_BLT); } - DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n", - __FUNCTION__, - tmp->dst.x, tmp->dst.y, sx, sy, alpha_fixup)); - - tmp->u.blt.src_pixmap = NULL; - tmp->u.blt.sx = sx; - tmp->u.blt.sy = sy; - if (alpha_fixup) { tmp->blt = blt_composite_copy_with_alpha; tmp->box = blt_composite_copy_box_with_alpha; @@ -3531,6 +3535,148 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, return true; } +bool sna_blt_copy_boxes__with_alpha(struct sna *sna, uint8_t alu, + struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + int bpp, int alpha_fixup, + const BoxRec *box, int nbox) +{ + struct kgem *kgem = &sna->kgem; + unsigned src_pitch, br13, cmd; + +#if DEBUG_NO_BLT || NO_BLT_COPY_BOXES + return false; +#endif + + DBG(("%s src=(%d, %d) -> (%d, %d) x %d, tiling=(%d, %d), pitch=(%d, %d)\n", + __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, nbox, + src_bo->tiling, dst_bo->tiling, + src_bo->pitch, dst_bo->pitch)); + + if (wedged(sna) || !kgem_bo_can_blt(kgem, src_bo) || !kgem_bo_can_blt(kgem, dst_bo)) { + DBG(("%s: cannot blt to src? %d or dst? %d\n", + __FUNCTION__, + kgem_bo_can_blt(kgem, src_bo), + kgem_bo_can_blt(kgem, dst_bo))); + return false; + } + + cmd = XY_FULL_MONO_PATTERN_BLT | (kgem->gen >= 0100 ? 12 : 10); + src_pitch = src_bo->pitch; + if (kgem->gen >= 040 && src_bo->tiling) { + cmd |= BLT_SRC_TILED; + src_pitch >>= 2; + } + assert(src_pitch <= MAXSHORT); + + br13 = dst_bo->pitch; + if (kgem->gen >= 040 && dst_bo->tiling) { + cmd |= BLT_DST_TILED; + br13 >>= 2; + } + assert(br13 <= MAXSHORT); + + br13 |= copy_ROP[alu] << 16; + switch (bpp) { + default: assert(0); + case 32: br13 |= 1 << 25; /* RGB8888 */ + case 16: br13 |= 1 << 24; /* RGB565 */ + case 8: break; + } + + kgem_set_mode(kgem, KGEM_BLT, dst_bo); + if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) { + DBG(("%s: cannot fit src+dst into aperture\n", __FUNCTION__)); + return false; + } + + /* Compare first box against a previous fill */ + if ((alu == GXcopy || alu == GXclear || alu == GXset) && + kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle) { + if (kgem->gen >= 0100) { + if (kgem->nbatch >= 7 && + kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) && + kgem->batch[kgem->nbatch-5] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) && + kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) { + DBG(("%s: deleting last fill\n", __FUNCTION__)); + kgem->nbatch -= 7; + kgem->nreloc--; + } + } else { + if (kgem->nbatch >= 6 && + kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) && + kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) && + kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) { + DBG(("%s: deleting last fill\n", __FUNCTION__)); + kgem->nbatch -= 6; + kgem->nreloc--; + } + } + } + + while (nbox--) { + uint32_t *b; + + if (!kgem_check_batch(kgem, 14) || + !kgem_check_reloc(kgem, 2)) { + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + } + + assert(sna->kgem.mode == KGEM_BLT); + b = kgem->batch + kgem->nbatch; + b[0] = cmd; + b[1] = br13; + b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx); + b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx); + if (sna->kgem.gen >= 0100) { + *(uint64_t *)(b+4) = + kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = src_pitch; + b[7] = (box->y1 + src_dy) << 16 | (box->x1 + src_dx); + *(uint64_t *)(b+8) = + kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[10] = alpha_fixup; + b[11] = alpha_fixup; + b[12] = 0; + b[13] = 0; + kgem->nbatch += 14; + } else { + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = src_pitch; + b[6] = (box->y1 + src_dy) << 16 | (box->x1 + src_dx); + b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[8] = alpha_fixup; + b[9] = alpha_fixup; + b[10] = 0; + b[11] = 0; + kgem->nbatch += 12; + } + assert(kgem->nbatch < kgem->surface); + box++; + } + + if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) + _kgem_submit(kgem); + + sna->blt_state.fill_bo = 0; + return true; +} + static void box_extents(const BoxRec *box, int n, BoxRec *extents) { *extents = *box; diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h index 38bde39..d64d652 100644 --- a/src/sna/sna_render.h +++ b/src/sna/sna_render.h @@ -592,6 +592,12 @@ bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, int bpp, const BoxRec *box, int nbox); +bool sna_tiling_blt_composite(struct sna *sna, + struct sna_composite_op *op, + struct kgem_bo *bo, + int bpp, + uint32_t alpha_fixup); + bool sna_blt_composite(struct sna *sna, uint32_t op, PicturePtr src, @@ -629,6 +635,11 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, int bpp, const BoxRec *box, int n); +bool sna_blt_copy_boxes__with_alpha(struct sna *sna, uint8_t alu, + struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + int bpp, int alpha_fixup, + const BoxRec *box, int nbox); bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu, PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, diff --git a/src/sna/sna_tiling.c b/src/sna/sna_tiling.c index d23fb00..ae9c84b 100644 --- a/src/sna/sna_tiling.c +++ b/src/sna/sna_tiling.c @@ -689,6 +689,317 @@ done: return ret; } +fastcall static void +tiling_blt(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + int x1, x2, y1, y2; + int src_x, src_y; + BoxRec box; + + DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n", + __FUNCTION__, + r->src.x, r->src.y, + r->dst.x, r->dst.y, + r->width, r->height)); + + /* XXX higher layer should have clipped? */ + + x1 = r->dst.x + op->dst.x; + y1 = r->dst.y + op->dst.y; + x2 = x1 + r->width; + y2 = y1 + r->height; + + src_x = r->src.x - x1 + op->u.blt.sx; + src_y = r->src.y - y1 + op->u.blt.sy; + + /* clip against dst */ + if (x1 < 0) + x1 = 0; + if (y1 < 0) + y1 = 0; + + if (x2 > op->dst.width) + x2 = op->dst.width; + + if (y2 > op->dst.height) + y2 = op->dst.height; + + DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2)); + + if (x2 <= x1 || y2 <= y1) + return; + + box.x1 = x1; box.y1 = y1; + box.x2 = x2; box.y2 = y2; + sna_tiling_blt_copy_boxes(sna, GXcopy, + op->src.bo, src_x, src_y, + op->dst.bo, 0, 0, + op->u.blt.bpp, + &box, 1); +} + +fastcall static void +tiling_blt_box(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box) +{ + DBG(("%s: box (%d, %d), (%d, %d)\n", + __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); + sna_tiling_blt_copy_boxes(sna, GXcopy, + op->src.bo, op->u.blt.sx, op->u.blt.sy, + op->dst.bo, op->dst.x, op->dst.y, + op->u.blt.bpp, + box, 1); +} + +static void +tiling_blt_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + sna_tiling_blt_copy_boxes(sna, GXcopy, + op->src.bo, op->u.blt.sx, op->u.blt.sy, + op->dst.bo, op->dst.x, op->dst.y, + op->u.blt.bpp, + box, nbox); +} + +static bool +sna_tiling_blt_copy_boxes__with_alpha(struct sna *sna, uint8_t alu, + struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + int bpp, int alpha_fixup, + const BoxRec *box, int nbox) +{ + RegionRec region, tile, this; + struct kgem_bo *bo; + int max_size, step; + bool ret = false; + + if (wedged(sna) || + !kgem_bo_can_blt(&sna->kgem, src_bo) || + !kgem_bo_can_blt(&sna->kgem, dst_bo)) { + /* XXX */ + DBG(("%s: tiling blt fail: src?=%d, dst?=%d\n", + __FUNCTION__, + kgem_bo_can_blt(&sna->kgem, src_bo), + kgem_bo_can_blt(&sna->kgem, dst_bo))); + return false; + } + + max_size = sna->kgem.aperture_high * PAGE_SIZE; + max_size -= MAX(kgem_bo_size(src_bo), kgem_bo_size(dst_bo)); + if (max_size <= 0) { + DBG(("%s: tiles cannot fit into aperture\n", __FUNCTION__)); + return false; + } + if (max_size > sna->kgem.max_copy_tile_size) + max_size = sna->kgem.max_copy_tile_size; + + pixman_region_init_rects(®ion, box, nbox); + + /* Use a small step to accommodate enlargement through tile alignment */ + step = sna->render.max_3d_size; + if (region.extents.x1 & (8*512 / bpp - 1) || region.extents.y1 & 63) + step /= 2; + while (step * step * 4 > max_size) + step /= 2; + if (sna->kgem.gen < 033) + step /= 2; /* accommodate severe fence restrictions */ + if (step == 0) { + DBG(("%s: tiles cannot fit into aperture\n", __FUNCTION__)); + return false; + } + + DBG(("%s (alu=%d), tile.size=%d, box=%dx[(%d, %d), (%d, %d)])\n", + __FUNCTION__, alu, step, nbox, + region.extents.x1, region.extents.y1, + region.extents.x2, region.extents.y2)); + + for (tile.extents.y1 = tile.extents.y2 = region.extents.y1; + tile.extents.y2 < region.extents.y2; + tile.extents.y1 = tile.extents.y2) { + int y2 = tile.extents.y1 + step; + if (y2 > region.extents.y2) + y2 = region.extents.y2; + tile.extents.y2 = y2; + + for (tile.extents.x1 = tile.extents.x2 = region.extents.x1; + tile.extents.x2 < region.extents.x2; + tile.extents.x1 = tile.extents.x2) { + int w, h; + int x2 = tile.extents.x1 + step; + if (x2 > region.extents.x2) + x2 = region.extents.x2; + tile.extents.x2 = x2; + + tile.data = NULL; + + RegionNull(&this); + RegionIntersect(&this, ®ion, &tile); + if (RegionNil(&this)) + continue; + + w = this.extents.x2 - this.extents.x1; + h = this.extents.y2 - this.extents.y1; + bo = kgem_create_2d(&sna->kgem, w, h, bpp, + kgem_choose_tiling(&sna->kgem, + I915_TILING_X, + w, h, bpp), + CREATE_TEMPORARY); + if (bo) { + int16_t dx = this.extents.x1; + int16_t dy = this.extents.y1; + + assert(bo->pitch <= 8192); + assert(bo->tiling != I915_TILING_Y); + + if (!sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + bo, -dx, -dy, + bpp, REGION_RECTS(&this), REGION_NUM_RECTS(&this))) + goto err; + + if (!sna_blt_copy_boxes__with_alpha(sna, alu, + bo, -dx, -dy, + dst_bo, dst_dx, dst_dy, + bpp, alpha_fixup, + REGION_RECTS(&this), REGION_NUM_RECTS(&this))) + goto err; + + kgem_bo_destroy(&sna->kgem, bo); + } + RegionUninit(&this); + } + } + + ret = true; + goto done; +err: + kgem_bo_destroy(&sna->kgem, bo); + RegionUninit(&this); +done: + pixman_region_fini(®ion); + return ret; +} + +fastcall static void +tiling_blt__with_alpha(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + int x1, x2, y1, y2; + int src_x, src_y; + BoxRec box; + + DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n", + __FUNCTION__, + r->src.x, r->src.y, + r->dst.x, r->dst.y, + r->width, r->height)); + + /* XXX higher layer should have clipped? */ + + x1 = r->dst.x + op->dst.x; + y1 = r->dst.y + op->dst.y; + x2 = x1 + r->width; + y2 = y1 + r->height; + + src_x = r->src.x - x1 + op->u.blt.sx; + src_y = r->src.y - y1 + op->u.blt.sy; + + /* clip against dst */ + if (x1 < 0) + x1 = 0; + if (y1 < 0) + y1 = 0; + + if (x2 > op->dst.width) + x2 = op->dst.width; + + if (y2 > op->dst.height) + y2 = op->dst.height; + + DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2)); + + if (x2 <= x1 || y2 <= y1) + return; + + box.x1 = x1; box.y1 = y1; + box.x2 = x2; box.y2 = y2; + sna_tiling_blt_copy_boxes__with_alpha(sna, GXcopy, + op->src.bo, src_x, src_y, + op->dst.bo, 0, 0, + op->u.blt.bpp, op->u.blt.pixel, + &box, 1); +} + +fastcall static void +tiling_blt_box__with_alpha(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box) +{ + DBG(("%s: box (%d, %d), (%d, %d)\n", + __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); + sna_tiling_blt_copy_boxes__with_alpha(sna, GXcopy, + op->src.bo, op->u.blt.sx, op->u.blt.sy, + op->dst.bo, op->dst.x, op->dst.y, + op->u.blt.bpp, op->u.blt.pixel, + box, 1); +} + +static void +tiling_blt_boxes__with_alpha(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + sna_tiling_blt_copy_boxes__with_alpha(sna, GXcopy, + op->src.bo, op->u.blt.sx, op->u.blt.sy, + op->dst.bo, op->dst.x, op->dst.y, + op->u.blt.bpp, op->u.blt.pixel, + box, nbox); +} + +static void nop_done(struct sna *sna, const struct sna_composite_op *op) +{ + assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem)); + (void)op; +} + +bool +sna_tiling_blt_composite(struct sna *sna, + struct sna_composite_op *op, + struct kgem_bo *bo, + int bpp, + uint32_t alpha_fixup) +{ + assert(op->op == PictOpSrc); + assert(op->dst.bo); + assert(kgem_bo_can_blt(&sna->kgem, op->dst.bo)); + assert(kgem_bo_can_blt(&sna->kgem, bo)); + + op->src.bo = bo; + op->u.blt.bpp = bpp; + op->u.blt.pixel = alpha_fixup; + + if (alpha_fixup) { + op->blt = tiling_blt__with_alpha; + op->box = tiling_blt_box__with_alpha; + op->boxes = tiling_blt_boxes__with_alpha; + } else { + op->blt = tiling_blt; + op->box = tiling_blt_box; + op->boxes = tiling_blt_boxes; + } + op->done = nop_done; + + return true; +} + bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,