sna: Use tiling BLT fallback for BLT composite operations
authorChris Wilson <chris@chris-wilson.co.uk>
Wed, 6 Nov 2013 14:51:42 +0000 (14:51 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Wed, 6 Nov 2013 14:51:42 +0000 (14:51 +0000)
This avoid a circuituous route through the render pathways and multiple
levels of tiling fallbacks to accomplish the same copy.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
src/sna/sna_blt.c
src/sna/sna_render.h
src/sna/sna_tiling.c

index 39ff0f6..693fedc 100644 (file)
@@ -1734,8 +1734,8 @@ blt_composite_copy_with_alpha(struct sna *sna,
        x2 = x1 + r->width;
        y2 = y1 + r->height;
 
-       src_x = r->src.x - x1;
-       src_y = r->src.y - y1;
+       src_x = r->src.x - x1 + op->u.blt.sx;
+       src_y = r->src.y - y1 + op->u.blt.sy;
 
        /* clip against dst */
        if (x1 < 0)
@@ -1811,7 +1811,9 @@ prepare_blt_copy(struct sna *sna,
                if (!kgem_check_many_bo_fenced(&sna->kgem,
                                               op->dst.bo, bo, NULL)) {
                        DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__));
-                       return false;
+                       return sna_tiling_blt_composite(sna, op, bo,
+                                                       src->drawable.bitsPerPixel,
+                                                       alpha_fixup);
                }
                _kgem_set_mode(&sna->kgem, KGEM_BLT);
        }
@@ -2682,25 +2684,27 @@ sna_blt_composite__convert(struct sna *sna,
                        return false;
        }
 
+       DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n",
+            __FUNCTION__,
+            tmp->dst.x, tmp->dst.y, sx, sy, alpha_fixup));
+
+       tmp->u.blt.src_pixmap = NULL;
+       tmp->u.blt.sx = sx;
+       tmp->u.blt.sy = sy;
+
        kgem_set_mode(&sna->kgem, KGEM_BLT, tmp->dst.bo);
        if (!kgem_check_many_bo_fenced(&sna->kgem, tmp->dst.bo, tmp->src.bo, NULL)) {
                kgem_submit(&sna->kgem);
                if (!kgem_check_many_bo_fenced(&sna->kgem,
                                               tmp->dst.bo, tmp->src.bo, NULL)) {
                        DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__));
-                       return false;
+                       return sna_tiling_blt_composite(sna, tmp, tmp->src.bo,
+                                                       PICT_FORMAT_BPP(tmp->src.pict_format),
+                                                       alpha_fixup);
                }
                _kgem_set_mode(&sna->kgem, KGEM_BLT);
        }
 
-       DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n",
-            __FUNCTION__,
-            tmp->dst.x, tmp->dst.y, sx, sy, alpha_fixup));
-
-       tmp->u.blt.src_pixmap = NULL;
-       tmp->u.blt.sx = sx;
-       tmp->u.blt.sy = sy;
-
        if (alpha_fixup) {
                tmp->blt   = blt_composite_copy_with_alpha;
                tmp->box   = blt_composite_copy_box_with_alpha;
@@ -3531,6 +3535,148 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
        return true;
 }
 
+bool sna_blt_copy_boxes__with_alpha(struct sna *sna, uint8_t alu,
+                                   struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+                                   struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+                                   int bpp, int alpha_fixup,
+                                   const BoxRec *box, int nbox)
+{
+       struct kgem *kgem = &sna->kgem;
+       unsigned src_pitch, br13, cmd;
+
+#if DEBUG_NO_BLT || NO_BLT_COPY_BOXES
+       return false;
+#endif
+
+       DBG(("%s src=(%d, %d) -> (%d, %d) x %d, tiling=(%d, %d), pitch=(%d, %d)\n",
+            __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, nbox,
+           src_bo->tiling, dst_bo->tiling,
+           src_bo->pitch, dst_bo->pitch));
+
+       if (wedged(sna) || !kgem_bo_can_blt(kgem, src_bo) || !kgem_bo_can_blt(kgem, dst_bo)) {
+               DBG(("%s: cannot blt to src? %d or dst? %d\n",
+                    __FUNCTION__,
+                    kgem_bo_can_blt(kgem, src_bo),
+                    kgem_bo_can_blt(kgem, dst_bo)));
+               return false;
+       }
+
+       cmd = XY_FULL_MONO_PATTERN_BLT | (kgem->gen >= 0100 ? 12 : 10);
+       src_pitch = src_bo->pitch;
+       if (kgem->gen >= 040 && src_bo->tiling) {
+               cmd |= BLT_SRC_TILED;
+               src_pitch >>= 2;
+       }
+       assert(src_pitch <= MAXSHORT);
+
+       br13 = dst_bo->pitch;
+       if (kgem->gen >= 040 && dst_bo->tiling) {
+               cmd |= BLT_DST_TILED;
+               br13 >>= 2;
+       }
+       assert(br13 <= MAXSHORT);
+
+       br13 |= copy_ROP[alu] << 16;
+       switch (bpp) {
+       default: assert(0);
+       case 32: br13 |= 1 << 25; /* RGB8888 */
+       case 16: br13 |= 1 << 24; /* RGB565 */
+       case 8: break;
+       }
+
+       kgem_set_mode(kgem, KGEM_BLT, dst_bo);
+       if (!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
+               DBG(("%s: cannot fit src+dst into aperture\n", __FUNCTION__));
+               return false;
+       }
+
+       /* Compare first box against a previous fill */
+       if ((alu == GXcopy || alu == GXclear || alu == GXset) &&
+           kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle) {
+               if (kgem->gen >= 0100) {
+                       if (kgem->nbatch >= 7 &&
+                           kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) &&
+                           kgem->batch[kgem->nbatch-5] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
+                           kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
+                               DBG(("%s: deleting last fill\n", __FUNCTION__));
+                               kgem->nbatch -= 7;
+                               kgem->nreloc--;
+                       }
+               } else {
+                       if (kgem->nbatch >= 6 &&
+                           kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) &&
+                           kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
+                           kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
+                               DBG(("%s: deleting last fill\n", __FUNCTION__));
+                               kgem->nbatch -= 6;
+                               kgem->nreloc--;
+                       }
+               }
+       }
+
+       while (nbox--) {
+               uint32_t *b;
+
+               if (!kgem_check_batch(kgem, 14) ||
+                   !kgem_check_reloc(kgem, 2)) {
+                       _kgem_submit(kgem);
+                       _kgem_set_mode(kgem, KGEM_BLT);
+               }
+
+               assert(sna->kgem.mode == KGEM_BLT);
+               b = kgem->batch + kgem->nbatch;
+               b[0] = cmd;
+               b[1] = br13;
+               b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
+               b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
+               if (sna->kgem.gen >= 0100) {
+                       *(uint64_t *)(b+4) =
+                               kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
+                                                I915_GEM_DOMAIN_RENDER << 16 |
+                                                I915_GEM_DOMAIN_RENDER |
+                                                KGEM_RELOC_FENCED,
+                                                0);
+                       b[6] = src_pitch;
+                       b[7] = (box->y1 + src_dy) << 16 | (box->x1 + src_dx);
+                       *(uint64_t *)(b+8) =
+                               kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
+                                                I915_GEM_DOMAIN_RENDER << 16 |
+                                                KGEM_RELOC_FENCED,
+                                                0);
+                       b[10] = alpha_fixup;
+                       b[11] = alpha_fixup;
+                       b[12] = 0;
+                       b[13] = 0;
+                       kgem->nbatch += 14;
+               } else {
+                       b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
+                                             I915_GEM_DOMAIN_RENDER << 16 |
+                                             I915_GEM_DOMAIN_RENDER |
+                                             KGEM_RELOC_FENCED,
+                                             0);
+                       b[5] = src_pitch;
+                       b[6] = (box->y1 + src_dy) << 16 | (box->x1 + src_dx);
+                       b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
+                                             I915_GEM_DOMAIN_RENDER << 16 |
+                                             KGEM_RELOC_FENCED,
+                                             0);
+                       b[8] = alpha_fixup;
+                       b[9] = alpha_fixup;
+                       b[10] = 0;
+                       b[11] = 0;
+                       kgem->nbatch += 12;
+               }
+               assert(kgem->nbatch < kgem->surface);
+               box++;
+       }
+
+       if (kgem->nexec > 1 && __kgem_ring_empty(kgem))
+               _kgem_submit(kgem);
+
+       sna->blt_state.fill_bo = 0;
+       return true;
+}
+
 static void box_extents(const BoxRec *box, int n, BoxRec *extents)
 {
        *extents = *box;
index 38bde39..d64d652 100644 (file)
@@ -592,6 +592,12 @@ bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu,
                               struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
                               int bpp, const BoxRec *box, int nbox);
 
+bool sna_tiling_blt_composite(struct sna *sna,
+                             struct sna_composite_op *op,
+                             struct kgem_bo *bo,
+                             int bpp,
+                             uint32_t alpha_fixup);
+
 bool sna_blt_composite(struct sna *sna,
                       uint32_t op,
                       PicturePtr src,
@@ -629,6 +635,11 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
                        struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
                        int bpp,
                        const BoxRec *box, int n);
+bool sna_blt_copy_boxes__with_alpha(struct sna *sna, uint8_t alu,
+                                   struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+                                   struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+                                   int bpp, int alpha_fixup,
+                                   const BoxRec *box, int nbox);
 bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu,
                                 PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
                                 PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
index d23fb00..ae9c84b 100644 (file)
@@ -689,6 +689,317 @@ done:
        return ret;
 }
 
+fastcall static void
+tiling_blt(struct sna *sna,
+          const struct sna_composite_op *op,
+          const struct sna_composite_rectangles *r)
+{
+       int x1, x2, y1, y2;
+       int src_x, src_y;
+       BoxRec box;
+
+       DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
+            __FUNCTION__,
+            r->src.x, r->src.y,
+            r->dst.x, r->dst.y,
+            r->width, r->height));
+
+       /* XXX higher layer should have clipped? */
+
+       x1 = r->dst.x + op->dst.x;
+       y1 = r->dst.y + op->dst.y;
+       x2 = x1 + r->width;
+       y2 = y1 + r->height;
+
+       src_x = r->src.x - x1 + op->u.blt.sx;
+       src_y = r->src.y - y1 + op->u.blt.sy;
+
+       /* clip against dst */
+       if (x1 < 0)
+               x1 = 0;
+       if (y1 < 0)
+               y1 = 0;
+
+       if (x2 > op->dst.width)
+               x2 = op->dst.width;
+
+       if (y2 > op->dst.height)
+               y2 = op->dst.height;
+
+       DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2));
+
+       if (x2 <= x1 || y2 <= y1)
+               return;
+
+       box.x1 = x1; box.y1 = y1;
+       box.x2 = x2; box.y2 = y2;
+       sna_tiling_blt_copy_boxes(sna, GXcopy,
+                                 op->src.bo, src_x, src_y,
+                                 op->dst.bo, 0, 0,
+                                 op->u.blt.bpp,
+                                 &box, 1);
+}
+
+fastcall static void
+tiling_blt_box(struct sna *sna,
+              const struct sna_composite_op *op,
+              const BoxRec *box)
+{
+       DBG(("%s: box (%d, %d), (%d, %d)\n",
+            __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
+       sna_tiling_blt_copy_boxes(sna, GXcopy,
+                                 op->src.bo, op->u.blt.sx, op->u.blt.sy,
+                                 op->dst.bo, op->dst.x, op->dst.y,
+                                 op->u.blt.bpp,
+                                 box, 1);
+}
+
+static void
+tiling_blt_boxes(struct sna *sna,
+                const struct sna_composite_op *op,
+                const BoxRec *box, int nbox)
+{
+       DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
+       sna_tiling_blt_copy_boxes(sna, GXcopy,
+                                 op->src.bo, op->u.blt.sx, op->u.blt.sy,
+                                 op->dst.bo, op->dst.x, op->dst.y,
+                                 op->u.blt.bpp,
+                                 box, nbox);
+}
+
+static bool
+sna_tiling_blt_copy_boxes__with_alpha(struct sna *sna, uint8_t alu,
+                                     struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+                                     struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+                                     int bpp, int alpha_fixup,
+                                     const BoxRec *box, int nbox)
+{
+       RegionRec region, tile, this;
+       struct kgem_bo *bo;
+       int max_size, step;
+       bool ret = false;
+
+       if (wedged(sna) ||
+           !kgem_bo_can_blt(&sna->kgem, src_bo) ||
+           !kgem_bo_can_blt(&sna->kgem, dst_bo)) {
+               /* XXX */
+               DBG(("%s: tiling blt fail: src?=%d, dst?=%d\n",
+                    __FUNCTION__,
+                    kgem_bo_can_blt(&sna->kgem, src_bo),
+                    kgem_bo_can_blt(&sna->kgem, dst_bo)));
+               return false;
+       }
+
+       max_size = sna->kgem.aperture_high * PAGE_SIZE;
+       max_size -= MAX(kgem_bo_size(src_bo), kgem_bo_size(dst_bo));
+       if (max_size <= 0) {
+               DBG(("%s: tiles cannot fit into aperture\n", __FUNCTION__));
+               return false;
+       }
+       if (max_size > sna->kgem.max_copy_tile_size)
+               max_size = sna->kgem.max_copy_tile_size;
+
+       pixman_region_init_rects(&region, box, nbox);
+
+       /* Use a small step to accommodate enlargement through tile alignment */
+       step = sna->render.max_3d_size;
+       if (region.extents.x1 & (8*512 / bpp - 1) || region.extents.y1 & 63)
+               step /= 2;
+       while (step * step * 4 > max_size)
+               step /= 2;
+       if (sna->kgem.gen < 033)
+               step /= 2; /* accommodate severe fence restrictions */
+       if (step == 0) {
+               DBG(("%s: tiles cannot fit into aperture\n", __FUNCTION__));
+               return false;
+       }
+
+       DBG(("%s (alu=%d), tile.size=%d, box=%dx[(%d, %d), (%d, %d)])\n",
+            __FUNCTION__, alu, step, nbox,
+            region.extents.x1, region.extents.y1,
+            region.extents.x2, region.extents.y2));
+
+       for (tile.extents.y1 = tile.extents.y2 = region.extents.y1;
+            tile.extents.y2 < region.extents.y2;
+            tile.extents.y1 = tile.extents.y2) {
+               int y2 = tile.extents.y1 + step;
+               if (y2 > region.extents.y2)
+                       y2 = region.extents.y2;
+               tile.extents.y2 = y2;
+
+               for (tile.extents.x1 = tile.extents.x2 = region.extents.x1;
+                    tile.extents.x2 < region.extents.x2;
+                    tile.extents.x1 = tile.extents.x2) {
+                       int w, h;
+                       int x2 = tile.extents.x1 + step;
+                       if (x2 > region.extents.x2)
+                               x2 = region.extents.x2;
+                       tile.extents.x2 = x2;
+
+                       tile.data = NULL;
+
+                       RegionNull(&this);
+                       RegionIntersect(&this, &region, &tile);
+                       if (RegionNil(&this))
+                               continue;
+
+                       w = this.extents.x2 - this.extents.x1;
+                       h = this.extents.y2 - this.extents.y1;
+                       bo = kgem_create_2d(&sna->kgem, w, h, bpp,
+                                           kgem_choose_tiling(&sna->kgem,
+                                                              I915_TILING_X,
+                                                              w, h, bpp),
+                                           CREATE_TEMPORARY);
+                       if (bo) {
+                               int16_t dx = this.extents.x1;
+                               int16_t dy = this.extents.y1;
+
+                               assert(bo->pitch <= 8192);
+                               assert(bo->tiling != I915_TILING_Y);
+
+                               if (!sna_blt_copy_boxes(sna, alu,
+                                                       src_bo, src_dx, src_dy,
+                                                       bo, -dx, -dy,
+                                                       bpp, REGION_RECTS(&this), REGION_NUM_RECTS(&this)))
+                                       goto err;
+
+                               if (!sna_blt_copy_boxes__with_alpha(sna, alu,
+                                                                   bo, -dx, -dy,
+                                                                   dst_bo, dst_dx, dst_dy,
+                                                                   bpp, alpha_fixup,
+                                                                   REGION_RECTS(&this), REGION_NUM_RECTS(&this)))
+                                       goto err;
+
+                               kgem_bo_destroy(&sna->kgem, bo);
+                       }
+                       RegionUninit(&this);
+               }
+       }
+
+       ret = true;
+       goto done;
+err:
+       kgem_bo_destroy(&sna->kgem, bo);
+       RegionUninit(&this);
+done:
+       pixman_region_fini(&region);
+       return ret;
+}
+
+fastcall static void
+tiling_blt__with_alpha(struct sna *sna,
+                      const struct sna_composite_op *op,
+                      const struct sna_composite_rectangles *r)
+{
+       int x1, x2, y1, y2;
+       int src_x, src_y;
+       BoxRec box;
+
+       DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
+            __FUNCTION__,
+            r->src.x, r->src.y,
+            r->dst.x, r->dst.y,
+            r->width, r->height));
+
+       /* XXX higher layer should have clipped? */
+
+       x1 = r->dst.x + op->dst.x;
+       y1 = r->dst.y + op->dst.y;
+       x2 = x1 + r->width;
+       y2 = y1 + r->height;
+
+       src_x = r->src.x - x1 + op->u.blt.sx;
+       src_y = r->src.y - y1 + op->u.blt.sy;
+
+       /* clip against dst */
+       if (x1 < 0)
+               x1 = 0;
+       if (y1 < 0)
+               y1 = 0;
+
+       if (x2 > op->dst.width)
+               x2 = op->dst.width;
+
+       if (y2 > op->dst.height)
+               y2 = op->dst.height;
+
+       DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2));
+
+       if (x2 <= x1 || y2 <= y1)
+               return;
+
+       box.x1 = x1; box.y1 = y1;
+       box.x2 = x2; box.y2 = y2;
+       sna_tiling_blt_copy_boxes__with_alpha(sna, GXcopy,
+                                             op->src.bo, src_x, src_y,
+                                             op->dst.bo, 0, 0,
+                                             op->u.blt.bpp, op->u.blt.pixel,
+                                             &box, 1);
+}
+
+fastcall static void
+tiling_blt_box__with_alpha(struct sna *sna,
+                          const struct sna_composite_op *op,
+                          const BoxRec *box)
+{
+       DBG(("%s: box (%d, %d), (%d, %d)\n",
+            __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
+       sna_tiling_blt_copy_boxes__with_alpha(sna, GXcopy,
+                                             op->src.bo, op->u.blt.sx, op->u.blt.sy,
+                                             op->dst.bo, op->dst.x, op->dst.y,
+                                             op->u.blt.bpp, op->u.blt.pixel,
+                                             box, 1);
+}
+
+static void
+tiling_blt_boxes__with_alpha(struct sna *sna,
+                            const struct sna_composite_op *op,
+                            const BoxRec *box, int nbox)
+{
+       DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
+       sna_tiling_blt_copy_boxes__with_alpha(sna, GXcopy,
+                                             op->src.bo, op->u.blt.sx, op->u.blt.sy,
+                                             op->dst.bo, op->dst.x, op->dst.y,
+                                             op->u.blt.bpp, op->u.blt.pixel,
+                                             box, nbox);
+}
+
+static void nop_done(struct sna *sna, const struct sna_composite_op *op)
+{
+       assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
+       (void)op;
+}
+
+bool
+sna_tiling_blt_composite(struct sna *sna,
+                        struct sna_composite_op *op,
+                        struct kgem_bo *bo,
+                        int bpp,
+                        uint32_t alpha_fixup)
+{
+       assert(op->op == PictOpSrc);
+       assert(op->dst.bo);
+       assert(kgem_bo_can_blt(&sna->kgem, op->dst.bo));
+       assert(kgem_bo_can_blt(&sna->kgem, bo));
+
+       op->src.bo = bo;
+       op->u.blt.bpp = bpp;
+       op->u.blt.pixel = alpha_fixup;
+
+       if (alpha_fixup) {
+               op->blt   = tiling_blt__with_alpha;
+               op->box   = tiling_blt_box__with_alpha;
+               op->boxes = tiling_blt_boxes__with_alpha;
+       } else {
+               op->blt   = tiling_blt;
+               op->box   = tiling_blt_box;
+               op->boxes = tiling_blt_boxes;
+       }
+       op->done  = nop_done;
+
+       return true;
+}
+
 bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu,
                               struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
                               struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,