sna: Emit points using the BLT primitive when appropriate
authorChris Wilson <chris@chris-wilson.co.uk>
Fri, 7 Mar 2014 08:15:21 +0000 (08:15 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Mon, 10 Mar 2014 14:54:38 +0000 (14:54 +0000)
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
src/sna/sna_accel.c
src/sna/sna_blt.c
src/sna/sna_reg.h
src/sna/sna_render.h

index 6359de1..aff7b34 100644 (file)
@@ -6586,10 +6586,14 @@ sna_poly_point__fill(DrawablePtr drawable, GCPtr gc,
        DDXPointRec last;
 
        DBG(("%s: count=%d\n", __FUNCTION__, n));
+       if (n == 0)
+               return;
 
        last.x = drawable->x + data->dx;
        last.y = drawable->y + data->dy;
-       while (n) {
+       if (op->points && mode != CoordModePrevious) {
+               op->points(data->sna, op, last.x, last.y, pt, n);
+       } else do {
                BoxRec *b = box;
                unsigned nbox = n;
                if (nbox > ARRAY_SIZE(box))
@@ -6608,7 +6612,7 @@ sna_poly_point__fill(DrawablePtr drawable, GCPtr gc,
                        b++;
                } while (--nbox);
                op->boxes(data->sna, op, box, b - box);
-       }
+       } while (n);
 }
 
 static void
@@ -8177,26 +8181,30 @@ sna_poly_point_blt(DrawablePtr drawable,
 
                assert_pixmap_contains_points(pixmap, pt, n, last.x, last.y);
                sna_damage_add_points(damage, pt, n, last.x, last.y);
-               do {
-                       unsigned nbox = n;
-                       if (nbox > ARRAY_SIZE(box))
-                               nbox = ARRAY_SIZE(box);
-                       n -= nbox;
+               if (fill.points && mode != CoordModePrevious) {
+                       fill.points(sna, &fill, last.x, last.y, pt, n);
+               } else {
                        do {
-                               *(DDXPointRec *)b = *pt++;
+                               unsigned nbox = n;
+                               if (nbox > ARRAY_SIZE(box))
+                                       nbox = ARRAY_SIZE(box);
+                               n -= nbox;
+                               do {
+                                       *(DDXPointRec *)b = *pt++;
 
-                               b->x1 += last.x;
-                               b->y1 += last.y;
-                               if (mode == CoordModePrevious)
-                                       last = *(DDXPointRec *)b;
+                                       b->x1 += last.x;
+                                       b->y1 += last.y;
+                                       if (mode == CoordModePrevious)
+                                               last = *(DDXPointRec *)b;
 
-                               b->x2 = b->x1 + 1;
-                               b->y2 = b->y1 + 1;
-                               b++;
-                       } while (--nbox);
-                       fill.boxes(sna, &fill, box, b - box);
-                       b = box;
-               } while (n);
+                                       b->x2 = b->x1 + 1;
+                                       b->y2 = b->y1 + 1;
+                                       b++;
+                               } while (--nbox);
+                               fill.boxes(sna, &fill, box, b - box);
+                               b = box;
+                       } while (n);
+               }
        } else {
                RegionPtr clip = gc->pCompositeClip;
 
index 4bbcdbc..26a0944 100644 (file)
@@ -2862,6 +2862,123 @@ fastcall static void sna_blt_fill_op_boxes(struct sna *sna,
        _sna_blt_fill_boxes(sna, &op->base.u.blt, box, nbox);
 }
 
+static inline uint64_t pt_add(const DDXPointRec *pt, int16_t dx, int16_t dy)
+{
+       union {
+               DDXPointRec pt;
+               uint32_t i;
+       } u;
+
+       u.pt.x = pt->x + dx;
+       u.pt.y = pt->y + dy;
+
+       return XY_PIXEL_BLT | (uint64_t)u.i<<32;
+}
+
+fastcall static void sna_blt_fill_op_points(struct sna *sna,
+                                           const struct sna_fill_op *op,
+                                           int16_t dx, int16_t dy,
+                                           const DDXPointRec *p, int n)
+{
+       const struct sna_blt_state *blt = &op->base.u.blt;
+       struct kgem *kgem = &sna->kgem;
+
+       DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, npoints));
+
+       if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
+               sna_blt_fill_begin(sna, blt);
+
+               sna->blt_state.fill_bo = blt->bo[0]->unique_id;
+               sna->blt_state.fill_pixel = blt->pixel;
+               sna->blt_state.fill_alu = blt->alu;
+       }
+
+       if (!kgem_check_batch(kgem, 2))
+               sna_blt_fill_begin(sna, blt);
+
+       do {
+               uint32_t *b = kgem->batch + kgem->nbatch;
+               int n_this_time;
+
+               assert(sna->kgem.mode == KGEM_BLT);
+               n_this_time = n;
+               if (2*n_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+                       n_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 2;
+               assert(n_this_time);
+               n -= n_this_time;
+
+               kgem->nbatch += 2 * n_this_time;
+               assert(kgem->nbatch < kgem->surface);
+
+               if ((dx|dy) == 0) {
+                       while (n_this_time >= 8) {
+                               *((uint64_t *)b + 0) = pt_add(p+0, 0, 0);
+                               *((uint64_t *)b + 1) = pt_add(p+1, 0, 0);
+                               *((uint64_t *)b + 2) = pt_add(p+2, 0, 0);
+                               *((uint64_t *)b + 3) = pt_add(p+3, 0, 0);
+                               *((uint64_t *)b + 4) = pt_add(p+4, 0, 0);
+                               *((uint64_t *)b + 5) = pt_add(p+5, 0, 0);
+                               *((uint64_t *)b + 6) = pt_add(p+6, 0, 0);
+                               *((uint64_t *)b + 7) = pt_add(p+7, 0, 0);
+                               b += 16;
+                               n_this_time -= 8;
+                               p += 8;
+                       }
+                       if (n_this_time & 4) {
+                               *((uint64_t *)b + 0) = pt_add(p+0, 0, 0);
+                               *((uint64_t *)b + 1) = pt_add(p+1, 0, 0);
+                               *((uint64_t *)b + 2) = pt_add(p+2, 0, 0);
+                               *((uint64_t *)b + 3) = pt_add(p+3, 0, 0);
+                               b += 8;
+                               p += 4;
+                       }
+                       if (n_this_time & 2) {
+                               *((uint64_t *)b + 0) = pt_add(p+0, 0, 0);
+                               *((uint64_t *)b + 1) = pt_add(p+1, 0, 0);
+                               b += 4;
+                               p += 2;
+                       }
+                       if (n_this_time & 1)
+                               *((uint64_t *)b + 0) = pt_add(p++, 0, 0);
+               } else {
+                       while (n_this_time >= 8) {
+                               *((uint64_t *)b + 0) = pt_add(p+0, dx, dy);
+                               *((uint64_t *)b + 1) = pt_add(p+1, dx, dy);
+                               *((uint64_t *)b + 2) = pt_add(p+2, dx, dy);
+                               *((uint64_t *)b + 3) = pt_add(p+3, dx, dy);
+                               *((uint64_t *)b + 4) = pt_add(p+4, dx, dy);
+                               *((uint64_t *)b + 5) = pt_add(p+5, dx, dy);
+                               *((uint64_t *)b + 6) = pt_add(p+6, dx, dy);
+                               *((uint64_t *)b + 7) = pt_add(p+7, dx, dy);
+                               b += 16;
+                               n_this_time -= 8;
+                               p += 8;
+                       }
+                       if (n_this_time & 4) {
+                               *((uint64_t *)b + 0) = pt_add(p+0, dx, dy);
+                               *((uint64_t *)b + 1) = pt_add(p+1, dx, dy);
+                               *((uint64_t *)b + 2) = pt_add(p+2, dx, dy);
+                               *((uint64_t *)b + 3) = pt_add(p+3, dx, dy);
+                               b += 8;
+                               p += 8;
+                       }
+                       if (n_this_time & 2) {
+                               *((uint64_t *)b + 0) = pt_add(p+0, dx, dy);
+                               *((uint64_t *)b + 1) = pt_add(p+1, dx, dy);
+                               b += 4;
+                               p += 2;
+                       }
+                       if (n_this_time & 1)
+                               *((uint64_t *)b + 0) = pt_add(p++, dx, dy);
+               }
+
+               if (!n)
+                       return;
+
+               sna_blt_fill_begin(sna, blt);
+       } while (1);
+}
+
 bool sna_blt_fill(struct sna *sna, uint8_t alu,
                  struct kgem_bo *bo, int bpp,
                  uint32_t pixel,
@@ -2886,6 +3003,7 @@ bool sna_blt_fill(struct sna *sna, uint8_t alu,
        fill->blt   = sna_blt_fill_op_blt;
        fill->box   = sna_blt_fill_op_box;
        fill->boxes = sna_blt_fill_op_boxes;
+       fill->points = sna_blt_fill_op_points;
        fill->done  =
                (void (*)(struct sna *, const struct sna_fill_op *))nop_done;
        return true;
index bda6ef6..92a1ae5 100644 (file)
@@ -47,6 +47,7 @@
 #define XY_SETUP_BLT                   (2<<29|0x01<<22)
 #define XY_SETUP_MONO_PATTERN_SL_BLT   (2<<29|0x11<<22)
 #define XY_SETUP_CLIP                  (2<<29|0x03<<22|1)
+#define XY_PIXEL_BLT                   (2<<29|0x24<<22)
 #define XY_SCANLINE_BLT                        (2<<29|0x25<<22|1)
 #define XY_TEXT_IMMEDIATE_BLT          (2<<29|0x31<<22|(1<<16))
 #define XY_SRC_COPY_BLT_CMD            (2<<29|0x53<<22)
index 67848b9..325b7cc 100644 (file)
@@ -199,6 +199,11 @@ struct sna_fill_op {
                               const struct sna_fill_op *op,
                               const BoxRec *box,
                               int count);
+       fastcall void (*points)(struct sna *sna,
+                              const struct sna_fill_op *op,
+                              int16_t dx, int16_t dy,
+                              const DDXPointRec *points,
+                              int count);
        void (*done)(struct sna *sna, const struct sna_fill_op *op);
 };