sna/trapezoids: Recompute num_threads to match range
authorChris Wilson <chris@chris-wilson.co.uk>
Mon, 7 Oct 2013 22:21:38 +0000 (23:21 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Mon, 7 Oct 2013 23:04:54 +0000 (00:04 +0100)
We need to be careful not to execute threads past the end of the alloted
buffer by making sure the clip extents correctly align.

Reported-by: Joseph Yasi <joe.yasi@gmail.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=70204
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
src/sna/sna_threads.c
src/sna/sna_trapezoids.c
src/sna/sna_trapezoids_boxes.c
src/sna/sna_trapezoids_imprecise.c
src/sna/sna_trapezoids_mono.c
src/sna/sna_trapezoids_precise.c

index b771bda..3e10e5a 100644 (file)
@@ -274,6 +274,7 @@ void sna_image_composite(pixman_op_t        op,
 
                y = dst_y;
                dy = (height + num_threads - 1) / num_threads;
+               num_threads = (height + dy - 1) / dy;
 
                data[0].op = op;
                data[0].src = src;
@@ -298,6 +299,7 @@ void sna_image_composite(pixman_op_t        op,
                        sna_threads_run(thread_composite, &data[n]);
                }
 
+               assert(y < dst_y + height);
                if (y + dy > dst_y + height)
                        dy = dst_y + height - y;
 
index c9c031a..8827b9c 100644 (file)
@@ -390,6 +390,7 @@ trapezoids_fallback(struct sna *sna,
 
                                y = bounds.y1;
                                dy = (height + num_threads - 1) / num_threads;
+                               num_threads = (bounds.y2 - bounds.y1 + dy - 1) / dy;
 
                                for (n = 1; n < num_threads; n++) {
                                        threads[n] = threads[0];
@@ -400,9 +401,9 @@ trapezoids_fallback(struct sna *sna,
                                        sna_threads_run(rasterize_traps_thread, &threads[n]);
                                }
 
+                               assert(y < threads[0].bounds.y2);
                                threads[0].ptr += (y - bounds.y1) * threads[0].stride;
                                threads[0].bounds.y1 = y;
-                               threads[0].bounds.y2 = bounds.y2;
                                rasterize_traps_thread(&threads[0]);
 
                                sna_threads_wait();
index d7861d2..4b0346b 100644 (file)
@@ -1171,6 +1171,7 @@ composite_unaligned_boxes_inplace(struct sna *sna,
 
                        y = clip.extents.y1;
                        dy = (clip.extents.y2 - clip.extents.y1 + num_threads - 1) / num_threads;
+                       num_threads = (clip.extents.y2 - clip.extents.y1 + dy - 1) / dy;
 
                        for (i = 1; i < num_threads; i++) {
                                thread[i] = thread[0];
@@ -1179,6 +1180,7 @@ composite_unaligned_boxes_inplace(struct sna *sna,
                                sna_threads_run(rectilinear_inplace_thread, &thread[i]);
                        }
 
+                       assert(y < clip.extents.y2);
                        thread[0].y1 = y;
                        thread[0].y2 = clip.extents.y2;
                        rectilinear_inplace_thread(&thread[0]);
index 88cb660..4809e9c 100644 (file)
@@ -255,9 +255,7 @@ struct tor {
     struct active_list active[1];
     struct cell_list   coverages[1];
 
-    /* Clip box. */
-    int xmin, xmax;
-    int ymin, ymax;
+    BoxRec extents;
 };
 
 /* Compute the floored division a/b. Assumes / and % perform symmetric
@@ -362,10 +360,7 @@ cell_list_alloc(struct cell_list *cells,
 inline static struct cell *
 cell_list_find(struct cell_list *cells, int x)
 {
-       struct cell *tail = cells->cursor;
-
-       if (tail->x == x)
-               return tail;
+       struct cell *tail;
 
        if (x >= cells->x2)
                return &cells->tail;
@@ -373,6 +368,10 @@ cell_list_find(struct cell_list *cells, int x)
        if (x < cells->x1)
                return &cells->head;
 
+       tail = cells->cursor;
+       if (tail->x == x)
+               return tail;
+
        do {
                if (tail->next->x > x)
                        break;
@@ -459,13 +458,9 @@ polygon_fini(struct polygon *polygon)
 }
 
 static bool
-polygon_init(struct polygon *polygon,
-            int num_edges,
-            int ymin,
-            int ymax)
+polygon_init(struct polygon *polygon, int num_edges, int ymin, int ymax)
 {
-       unsigned num_buckets =
-               EDGE_Y_BUCKET_INDEX(ymax+EDGE_Y_BUCKET_HEIGHT-1, ymin);
+       unsigned num_buckets = EDGE_Y_BUCKET_INDEX(ymax-1, ymin) + 1;
 
        if (unlikely(ymax - ymin > 0x7FFFFFFFU - EDGE_Y_BUCKET_HEIGHT))
                return false;
@@ -502,6 +497,7 @@ _polygon_insert_edge_into_its_y_bucket(struct polygon *polygon, struct edge *e)
 {
        unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop, polygon->ymin);
        struct edge **ptail = &polygon->y_buckets[ix];
+       assert(e->ytop < polygon->ymax);
        e->next = *ptail;
        *ptail = e;
 }
@@ -928,10 +924,7 @@ tor_init(struct tor *converter, const BoxRec *box, int num_edges)
               FAST_SAMPLES_X, FAST_SAMPLES_Y,
               num_edges));
 
-       converter->xmin = box->x1;
-       converter->ymin = box->y1;
-       converter->xmax = box->x2;
-       converter->ymax = box->y2;
+       converter->extents = *box;
 
        if (!cell_list_init(converter->coverages, box->x1, box->x2))
                return false;
@@ -1076,6 +1069,7 @@ tor_blt_span_mono_unbounded_clipped(struct sna *sna,
 
 static void
 tor_blt(struct sna *sna,
+       struct tor *converter,
        struct sna_composite_spans_op *op,
        pixman_region16_t *clip,
        void (*span)(struct sna *sna,
@@ -1083,18 +1077,18 @@ tor_blt(struct sna *sna,
                     pixman_region16_t *clip,
                     const BoxRec *box,
                     int coverage),
-       struct cell_list *cells,
        int y, int height,
-       int xmin, int xmax,
        int unbounded)
 {
+       struct cell_list *cells = converter->coverages;
        struct cell *cell;
        BoxRec box;
        int cover;
 
-       box.y1 = y;
-       box.y2 = y + height;
-       box.x1 = xmin;
+       box.y1 = y + converter->extents.y1;
+       box.y2 = box.y1 + height;
+       assert(box.y2 <= converter->extents.y2);
+       box.x1 = converter->extents.x1;
 
        /* Form the spans from the coverages and areas. */
        cover = cells->head.covered_height*FAST_SAMPLES_X*2;
@@ -1102,8 +1096,8 @@ tor_blt(struct sna *sna,
        for (cell = cells->head.next; cell != &cells->tail; cell = cell->next) {
                int x = cell->x;
 
-               assert(x >= xmin);
-               assert(x < xmax);
+               assert(x >= converter->extents.x1);
+               assert(x < converter->extents.x2);
                __DBG(("%s: cell=(%d, %d, %d), cover=%d, max=%d\n", __FUNCTION__,
                       cell->x, cell->covered_height, cell->uncovered_area,
                       cover, xmax));
@@ -1137,7 +1131,7 @@ tor_blt(struct sna *sna,
                }
        }
 
-       box.x2 = xmax;
+       box.x2 = converter->extents.x2;
        if (box.x2 > box.x1 && (unbounded || cover)) {
                __DBG(("%s: span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
                       box.x1, box.y1,
@@ -1148,28 +1142,6 @@ tor_blt(struct sna *sna,
        }
 }
 
-static void
-tor_blt_empty(struct sna *sna,
-             struct sna_composite_spans_op *op,
-             pixman_region16_t *clip,
-             void (*span)(struct sna *sna,
-                          struct sna_composite_spans_op *op,
-                          pixman_region16_t *clip,
-                          const BoxRec *box,
-                          int coverage),
-             int y, int height,
-             int xmin, int xmax)
-{
-       BoxRec box;
-
-       box.x1 = xmin;
-       box.x2 = xmax;
-       box.y1 = y;
-       box.y2 = y + height;
-
-       span(sna, op, clip, &box, 0);
-}
-
 flatten static void
 tor_render(struct sna *sna,
           struct tor *converter,
@@ -1182,14 +1154,11 @@ tor_render(struct sna *sna,
                        int coverage),
           int unbounded)
 {
-       int ymin = converter->ymin;
-       int xmin = converter->xmin;
-       int xmax = converter->xmax;
-       int i, j, h = converter->ymax - ymin;
        struct polygon *polygon = converter->polygon;
        struct cell_list *coverages = converter->coverages;
        struct active_list *active = converter->active;
        struct edge *buckets[FAST_SAMPLES_Y] = { 0 };
+       int16_t i, j, h = converter->extents.y2 - converter->extents.y1;
 
        __DBG(("%s: unbounded=%d\n", __FUNCTION__, unbounded));
 
@@ -1201,15 +1170,23 @@ tor_render(struct sna *sna,
 
                /* Determine if we can ignore this row or use the full pixel
                 * stepper. */
-               if (!polygon->y_buckets[i]) {
+               if (polygon->y_buckets[i] == NULL) {
                        if (active->head.next == &active->tail) {
-                               for (; !polygon->y_buckets[j]; j++)
+                               for (; polygon->y_buckets[j] == NULL; j++)
                                        ;
                                __DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
                                       __FUNCTION__, i, j));
 
-                               if (unbounded)
-                                       tor_blt_empty(sna, op, clip, span, i+ymin, j-i, xmin, xmax);
+                               assert(j <= h);
+                               if (unbounded) {
+                                       BoxRec box;
+
+                                       box = converter->extents;
+                                       box.y1 += i;
+                                       box.y2 = converter->extents.y1 + j;
+
+                                       span(sna, op, clip, &box, 0);
+                               }
                                continue;
                        }
 
@@ -1228,6 +1205,7 @@ tor_render(struct sna *sna,
                                do_full_step -= FAST_SAMPLES_Y;
                                j++;
                        }
+                       assert(j >= i + 1 && j <= h);
                        if (j != i + 1)
                                step_edges(active, j - (i + 1));
 
@@ -1249,9 +1227,8 @@ tor_render(struct sna *sna,
                        }
                }
 
-               tor_blt(sna, op, clip, span, coverages,
-                       i+ymin, j-i, xmin, xmax,
-                       unbounded);
+               assert(j > i);
+               tor_blt(sna, converter, op, clip, span, i, j-i, unbounded);
                cell_list_reset(coverages);
        }
 }
@@ -1531,7 +1508,7 @@ inplace_end_subrows(struct active_list *active, uint8_t *row,
 static void
 tor_inplace(struct tor *converter, PixmapPtr scratch, int mono, uint8_t *buf)
 {
-       int i, j, h = converter->ymax;
+       int i, j, h = converter->extents.y2;
        struct polygon *polygon = converter->polygon;
        struct active_list *active = converter->active;
        struct edge *buckets[FAST_SAMPLES_Y] = { 0 };
@@ -1541,8 +1518,8 @@ tor_inplace(struct tor *converter, PixmapPtr scratch, int mono, uint8_t *buf)
 
        __DBG(("%s: mono=%d, buf?=%d\n", __FUNCTION__, mono, buf != NULL));
        assert(!mono);
-       assert(converter->ymin == 0);
-       assert(converter->xmin == 0);
+       assert(converter->extents.y1 == 0);
+       assert(converter->extents.x1 == 0);
        assert(scratch->drawable.depth == 8);
 
        /* Render each pixel row. */
@@ -2005,6 +1982,7 @@ imprecise_trapezoid_span_converter(struct sna *sna,
                y = clip.extents.y1;
                h = clip.extents.y2 - clip.extents.y1;
                h = (h + num_threads - 1) / num_threads;
+               num_threads = (clip.extents.y2 - clip.extents.y1 + h - 1) / h;
 
                for (n = 1; n < num_threads; n++) {
                        threads[n] = threads[0];
@@ -2014,6 +1992,7 @@ imprecise_trapezoid_span_converter(struct sna *sna,
                        sna_threads_run(span_thread, &threads[n]);
                }
 
+               assert(y < threads[0].extents.y2);
                threads[0].extents.y1 = y;
                span_thread(&threads[0]);
 
@@ -2862,6 +2841,7 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
                y = region.extents.y1;
                h = region.extents.y2 - region.extents.y1;
                h = (h + num_threads - 1) / num_threads;
+               num_threads = (region.extents.y2 - region.extents.y1 + h - 1) / h;
 
                for (n = 1; n < num_threads; n++) {
                        threads[n] = threads[0];
@@ -2871,8 +2851,8 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
                        sna_threads_run(inplace_x8r8g8b8_thread, &threads[n]);
                }
 
+               assert(y < threads[0].extents.y2);
                threads[0].extents.y1 = y;
-               threads[0].extents.y2 = region.extents.y2;
                inplace_x8r8g8b8_thread(&threads[0]);
 
                sna_threads_wait();
@@ -3132,6 +3112,7 @@ imprecise_trapezoid_span_inplace(struct sna *sna,
                y = region.extents.y1;
                h = region.extents.y2 - region.extents.y1;
                h = (h + num_threads - 1) / num_threads;
+               num_threads = (region.extents.y2 - region.extents.y1 + h - 1) / h;
 
                for (n = 1; n < num_threads; n++) {
                        threads[n] = threads[0];
@@ -3141,8 +3122,8 @@ imprecise_trapezoid_span_inplace(struct sna *sna,
                        sna_threads_run(inplace_thread, &threads[n]);
                }
 
+               assert(y < threads[0].extents.y2);
                threads[0].extents.y1 = y;
-               threads[0].extents.y2 = region.extents.y2;
                inplace_thread(&threads[0]);
 
                sna_threads_wait();
index e5cb7a6..790863e 100644 (file)
@@ -843,6 +843,7 @@ mono_trapezoids_span_converter(struct sna *sna,
                y = extents.y1;
                h = extents.y2 - extents.y1;
                h = (h + num_threads - 1) / num_threads;
+               num_threads = (extents.y2 - extents.y1 + h - 1) / h;
 
                for (n = 1; n < num_threads; n++) {
                        threads[n] = threads[0];
index b4d7592..3720d8b 100644 (file)
@@ -268,9 +268,7 @@ struct tor {
     struct active_list active[1];
     struct cell_list   coverages[1];
 
-    /* Clip box. */
-    int xmin, xmax;
-    int ymin, ymax;
+    BoxRec extents;
 };
 
 /* Compute the floored division a/b. Assumes / and % perform symmetric
@@ -473,13 +471,9 @@ polygon_fini(struct polygon *polygon)
 }
 
 static bool
-polygon_init(struct polygon *polygon,
-            int num_edges,
-            int ymin,
-            int ymax)
+polygon_init(struct polygon *polygon, int num_edges, int ymin, int ymax)
 {
-       unsigned num_buckets =
-               EDGE_Y_BUCKET_INDEX(ymax+EDGE_Y_BUCKET_HEIGHT-1, ymin);
+       unsigned num_buckets = EDGE_Y_BUCKET_INDEX(ymax-1, ymin) + 1;
 
        if (unlikely(ymax - ymin > 0x7FFFFFFFU - EDGE_Y_BUCKET_HEIGHT))
                return false;
@@ -516,6 +510,7 @@ _polygon_insert_edge_into_its_y_bucket(struct polygon *polygon, struct edge *e)
 {
        unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop, polygon->ymin);
        struct edge **ptail = &polygon->y_buckets[ix];
+       assert(e->ytop < polygon->ymax);
        e->next = *ptail;
        *ptail = e;
 }
@@ -534,7 +529,7 @@ polygon_add_edge(struct polygon *polygon,
        int ymin = polygon->ymin;
        int ymax = polygon->ymax;
 
-       assert (dy > 0);
+       assert(dy > 0);
 
        e->dy = dy;
        e->dir = dir;
@@ -934,10 +929,7 @@ tor_init(struct tor *converter, const BoxRec *box, int num_edges)
               SAMPLES_X, SAMPLES_Y,
               num_edges));
 
-       converter->xmin = box->x1;
-       converter->ymin = box->y1;
-       converter->xmax = box->x2;
-       converter->ymax = box->y2;
+       converter->extents = *box;
 
        if (!cell_list_init(converter->coverages, box->x1, box->x2))
                return false;
@@ -1033,6 +1025,7 @@ tor_blt_span_clipped(struct sna *sna,
 
 static void
 tor_blt(struct sna *sna,
+       struct tor *converter,
        struct sna_composite_spans_op *op,
        pixman_region16_t *clip,
        void (*span)(struct sna *sna,
@@ -1040,18 +1033,18 @@ tor_blt(struct sna *sna,
                     pixman_region16_t *clip,
                     const BoxRec *box,
                     int coverage),
-       struct cell_list *cells,
        int y, int height,
-       int xmin, int xmax,
        int unbounded)
 {
+       struct cell_list *cells = converter->coverages;
        struct cell *cell;
        BoxRec box;
        int cover;
 
-       box.y1 = y;
-       box.y2 = y + height;
-       box.x1 = xmin;
+       box.y1 = y + converter->extents.y1;
+       box.y2 = box.y1 + height;
+       assert(box.y2 <= converter->extents.y2);
+       box.x1 = converter->extents.x1;
 
        /* Form the spans from the coverages and areas. */
        cover = cells->head.covered_height*SAMPLES_X*2;
@@ -1059,8 +1052,8 @@ tor_blt(struct sna *sna,
        for (cell = cells->head.next; cell != &cells->tail; cell = cell->next) {
                int x = cell->x;
 
-               assert(x >= xmin);
-               assert(x < xmax);
+               assert(x >= converter->extents.x1);
+               assert(x < converter->extents.x2);
                __DBG(("%s: cell=(%d, %d, %d), cover=%d, max=%d\n", __FUNCTION__,
                       cell->x, cell->covered_height, cell->uncovered_area,
                       cover, xmax));
@@ -1094,7 +1087,7 @@ tor_blt(struct sna *sna,
                }
        }
 
-       box.x2 = xmax;
+       box.x2 = converter->extents.x2;
        if (box.x2 > box.x1 && (unbounded || cover)) {
                __DBG(("%s: span (%d, %d)x(%d, %d) @ %d\n", __FUNCTION__,
                       box.x1, box.y1,
@@ -1105,28 +1098,6 @@ tor_blt(struct sna *sna,
        }
 }
 
-static void
-tor_blt_empty(struct sna *sna,
-             struct sna_composite_spans_op *op,
-             pixman_region16_t *clip,
-             void (*span)(struct sna *sna,
-                          struct sna_composite_spans_op *op,
-                          pixman_region16_t *clip,
-                          const BoxRec *box,
-                          int coverage),
-             int y, int height,
-             int xmin, int xmax)
-{
-       BoxRec box;
-
-       box.x1 = xmin;
-       box.x2 = xmax;
-       box.y1 = y;
-       box.y2 = y + height;
-
-       span(sna, op, clip, &box, 0);
-}
-
 flatten static void
 tor_render(struct sna *sna,
           struct tor *converter,
@@ -1139,14 +1110,11 @@ tor_render(struct sna *sna,
                        int coverage),
           int unbounded)
 {
-       int ymin = converter->ymin;
-       int xmin = converter->xmin;
-       int xmax = converter->xmax;
-       int i, j, h = converter->ymax - ymin;
        struct polygon *polygon = converter->polygon;
        struct cell_list *coverages = converter->coverages;
        struct active_list *active = converter->active;
        struct edge *buckets[SAMPLES_Y] = { 0 };
+       int16_t i, j, h = converter->extents.y2 - converter->extents.y1;
 
        __DBG(("%s: unbounded=%d\n", __FUNCTION__, unbounded));
 
@@ -1158,15 +1126,23 @@ tor_render(struct sna *sna,
 
                /* Determine if we can ignore this row or use the full pixel
                 * stepper. */
-               if (!polygon->y_buckets[i]) {
+               if (polygon->y_buckets[i] == NULL) {
                        if (active->head.next == &active->tail) {
-                               for (; !polygon->y_buckets[j]; j++)
+                               for (; polygon->y_buckets[j] == NULL; j++)
                                        ;
                                __DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
                                       __FUNCTION__, i, j));
 
-                               if (unbounded)
-                                       tor_blt_empty(sna, op, clip, span, i+ymin, j-i, xmin, xmax);
+                               assert(j <= h);
+                               if (unbounded) {
+                                       BoxRec box;
+
+                                       box = converter->extents;
+                                       box.y1 += i;
+                                       box.y2 = converter->extents.y1 + j;
+
+                                       span(sna, op, clip, &box, 0);
+                               }
                                continue;
                        }
 
@@ -1185,6 +1161,7 @@ tor_render(struct sna *sna,
                                do_full_step -= SAMPLES_Y;
                                j++;
                        }
+                       assert(j >= i + 1 && j <= h);
                        if (j != i + 1)
                                step_edges(active, j - (i + 1));
 
@@ -1193,7 +1170,7 @@ tor_render(struct sna *sna,
                } else {
                        int suby;
 
-                       fill_buckets(active, polygon->y_buckets[i], (i+ymin)*SAMPLES_Y, buckets);
+                       fill_buckets(active, polygon->y_buckets[i], (i+converter->extents.y1)*SAMPLES_Y, buckets);
 
                        /* Subsample this row. */
                        for (suby = 0; suby < SAMPLES_Y; suby++) {
@@ -1206,9 +1183,8 @@ tor_render(struct sna *sna,
                        }
                }
 
-               tor_blt(sna, op, clip, span, coverages,
-                       i+ymin, j-i, xmin, xmax,
-                       unbounded);
+               assert(j > i);
+               tor_blt(sna, converter, op, clip, span, i, j-i, unbounded);
                cell_list_reset(coverages);
        }
 }
@@ -1478,7 +1454,7 @@ flatten static void
 tor_inplace(struct tor *converter, PixmapPtr scratch)
 {
        uint8_t buf[TOR_INPLACE_SIZE];
-       int i, j, h = converter->ymax - converter->ymin;
+       int i, j, h = converter->extents.y2 - converter->extents.y1;
        struct polygon *polygon = converter->polygon;
        struct active_list *active = converter->active;
        struct edge *buckets[SAMPLES_Y] = { 0 };
@@ -1487,10 +1463,10 @@ tor_inplace(struct tor *converter, PixmapPtr scratch)
        int width = scratch->drawable.width;
 
        __DBG(("%s: buf?=%d\n", __FUNCTION__, buf != NULL));
-       assert(converter->xmin == 0);
+       assert(converter->extents.x1 == 0);
        assert(scratch->drawable.depth == 8);
 
-       row += converter->ymin * stride;
+       row += converter->extents.y1 * stride;
 
        /* Render each pixel row. */
        for (i = 0; i < h; i = j) {
@@ -1540,7 +1516,7 @@ tor_inplace(struct tor *converter, PixmapPtr scratch)
                } else {
                        int suby;
 
-                       fill_buckets(active, polygon->y_buckets[i], (i+converter->ymin)*SAMPLES_Y, buckets);
+                       fill_buckets(active, polygon->y_buckets[i], (i+converter->extents.y1)*SAMPLES_Y, buckets);
 
                        /* Subsample this row. */
                        memset(ptr, 0, width);
@@ -1931,6 +1907,7 @@ precise_trapezoid_span_converter(struct sna *sna,
                y = clip.extents.y1;
                h = clip.extents.y2 - clip.extents.y1;
                h = (h + num_threads - 1) / num_threads;
+               num_threads = (clip.extents.y2 - clip.extents.y1 + h - 1) / h;
 
                for (n = 1; n < num_threads; n++) {
                        threads[n] = threads[0];
@@ -1940,6 +1917,7 @@ precise_trapezoid_span_converter(struct sna *sna,
                        sna_threads_run(span_thread, &threads[n]);
                }
 
+               assert(y < threads[0].extents.y2);
                threads[0].extents.y1 = y;
                span_thread(&threads[0]);
 
@@ -2155,6 +2133,7 @@ precise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
                y = extents.y1;
                h = extents.y2 - extents.y1;
                h = (h + num_threads - 1) / num_threads;
+               num_threads = (extents.y2 - extents.y1 + h - 1) / h;
 
                for (n = 1; n < num_threads; n++) {
                        threads[n] = threads[0];
@@ -2164,6 +2143,7 @@ precise_trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
                        sna_threads_run(mask_thread, &threads[n]);
                }
 
+               assert(y < threads[0].extents.y2);
                threads[0].extents.y1 = y;
                mask_thread(&threads[0]);
 
@@ -2859,6 +2839,7 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
                y = region.extents.y1;
                h = region.extents.y2 - region.extents.y1;
                h = (h + num_threads - 1) / num_threads;
+               num_threads = (region.extents.y2 - region.extents.y1 + h - 1) / h;
 
                for (n = 1; n < num_threads; n++) {
                        threads[n] = threads[0];
@@ -2868,8 +2849,8 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
                        sna_threads_run(inplace_x8r8g8b8_thread, &threads[n]);
                }
 
+               assert(y < threads[0].extents.y2);
                threads[0].extents.y1 = y;
-               threads[0].extents.y2 = region.extents.y2;
                inplace_x8r8g8b8_thread(&threads[0]);
 
                sna_threads_wait();
@@ -3130,6 +3111,7 @@ precise_trapezoid_span_inplace(struct sna *sna,
                y = region.extents.y1;
                h = region.extents.y2 - region.extents.y1;
                h = (h + num_threads - 1) / num_threads;
+               num_threads = (region.extents.y2 - region.extents.y1 + h - 1) / h;
 
                for (n = 1; n < num_threads; n++) {
                        threads[n] = threads[0];
@@ -3139,8 +3121,8 @@ precise_trapezoid_span_inplace(struct sna *sna,
                        sna_threads_run(inplace_thread, &threads[n]);
                }
 
+               assert(y < threads[0].extents.y2);
                threads[0].extents.y1 = y;
-               threads[0].extents.y2 = region.extents.y2;
                inplace_thread(&threads[0]);
 
                sna_threads_wait();
@@ -3273,6 +3255,7 @@ precise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
                y = extents.y1;
                h = extents.y2 - extents.y1;
                h = (h + num_threads - 1) / num_threads;
+               num_threads = (extents.y2 - extents.y1 + h - 1) / h;
 
                for (n = 1; n < num_threads; n++) {
                        threads[n] = threads[0];
@@ -3282,6 +3265,7 @@ precise_trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
                        sna_threads_run(mask_thread, &threads[n]);
                }
 
+               assert(y < threads[0].extents.y2);
                threads[0].extents.y1 = y;
                mask_thread(&threads[0]);