take two
[profile/ivi/xorg-x11-drv-intel.git] / src / sna / gen3_render.c
1 /*
2  * Copyright © 2010-2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Chris Wilson <chris@chris-wilson.co.uk>
25  *
26  */
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include "sna.h"
33 #include "sna_render.h"
34 #include "sna_render_inline.h"
35 #include "sna_reg.h"
36 #include "sna_video.h"
37
38 #include "gen3_render.h"
39
40 #define NO_COMPOSITE 0
41 #define NO_COMPOSITE_SPANS 0
42 #define NO_COPY 0
43 #define NO_COPY_BOXES 0
44 #define NO_FILL 0
45 #define NO_FILL_ONE 0
46 #define NO_FILL_BOXES 0
47
48 #define PREFER_BLT_FILL 1
49
50 enum {
51         SHADER_NONE = 0,
52         SHADER_ZERO,
53         SHADER_BLACK,
54         SHADER_WHITE,
55         SHADER_CONSTANT,
56         SHADER_LINEAR,
57         SHADER_RADIAL,
58         SHADER_TEXTURE,
59         SHADER_OPACITY,
60 };
61
62 #define MAX_3D_SIZE 2048
63 #define MAX_3D_PITCH 8192
64
65 #define OUT_BATCH(v) batch_emit(sna, v)
66 #define OUT_BATCH_F(v) batch_emit_float(sna, v)
67 #define OUT_VERTEX(v) vertex_emit(sna, v)
68
69 enum gen3_radial_mode {
70         RADIAL_ONE,
71         RADIAL_TWO
72 };
73
74 static const struct blendinfo {
75         bool dst_alpha;
76         bool src_alpha;
77         uint32_t src_blend;
78         uint32_t dst_blend;
79 } gen3_blend_op[] = {
80         /* Clear */     {0, 0, BLENDFACT_ZERO, BLENDFACT_ZERO},
81         /* Src */       {0, 0, BLENDFACT_ONE, BLENDFACT_ZERO},
82         /* Dst */       {0, 0, BLENDFACT_ZERO, BLENDFACT_ONE},
83         /* Over */      {0, 1, BLENDFACT_ONE, BLENDFACT_INV_SRC_ALPHA},
84         /* OverReverse */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ONE},
85         /* In */        {1, 0, BLENDFACT_DST_ALPHA, BLENDFACT_ZERO},
86         /* InReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_SRC_ALPHA},
87         /* Out */       {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ZERO},
88         /* OutReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_INV_SRC_ALPHA},
89         /* Atop */      {1, 1, BLENDFACT_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
90         /* AtopReverse */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_SRC_ALPHA},
91         /* Xor */       {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
92         /* Add */       {0, 0, BLENDFACT_ONE, BLENDFACT_ONE},
93 };
94
95 #define S6_COLOR_WRITE_ONLY \
96         (S6_COLOR_WRITE_ENABLE | \
97          BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT | \
98          BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT | \
99          BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT)
100
101 static const struct formatinfo {
102         unsigned int fmt, xfmt;
103         uint32_t card_fmt;
104         bool rb_reversed;
105 } gen3_tex_formats[] = {
106         {PICT_a8, 0, MAPSURF_8BIT | MT_8BIT_A8, false},
107         {PICT_a8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_ARGB8888, false},
108         {PICT_x8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_XRGB8888, false},
109         {PICT_a8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_ABGR8888, false},
110         {PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false},
111         {PICT_a2r10g10b10, PICT_x2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010, false},
112         {PICT_a2b10g10r10, PICT_x2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010, false},
113         {PICT_r5g6b5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, false},
114         {PICT_b5g6r5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, true},
115         {PICT_a1r5g5b5, PICT_x1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555, false},
116         {PICT_a1b5g5r5, PICT_x1b5g5r5, MAPSURF_16BIT | MT_16BIT_ARGB1555, true},
117         {PICT_a4r4g4b4, PICT_x4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444, false},
118         {PICT_a4b4g4r4, PICT_x4b4g4r4, MAPSURF_16BIT | MT_16BIT_ARGB4444, true},
119 };
120
121 #define xFixedToDouble(f) pixman_fixed_to_double(f)
122
123 static inline bool too_large(int width, int height)
124 {
125         return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
126 }
127
128 static inline uint32_t gen3_buf_tiling(uint32_t tiling)
129 {
130         uint32_t v = 0;
131         switch (tiling) {
132         case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y;
133         case I915_TILING_X: v |= BUF_3D_TILED_SURFACE;
134         case I915_TILING_NONE: break;
135         }
136         return v;
137 }
138
139 static inline bool
140 gen3_check_pitch_3d(struct kgem_bo *bo)
141 {
142         return bo->pitch <= MAX_3D_PITCH;
143 }
144
145 static uint32_t gen3_get_blend_cntl(int op,
146                                     bool has_component_alpha,
147                                     uint32_t dst_format)
148 {
149         uint32_t sblend = gen3_blend_op[op].src_blend;
150         uint32_t dblend = gen3_blend_op[op].dst_blend;
151
152         if (op <= PictOpSrc) /* for clear and src disable blending */
153                 return S6_COLOR_WRITE_ONLY;
154
155         /* If there's no dst alpha channel, adjust the blend op so that we'll
156          * treat it as always 1.
157          */
158         if (gen3_blend_op[op].dst_alpha) {
159                 if (PICT_FORMAT_A(dst_format) == 0) {
160                         if (sblend == BLENDFACT_DST_ALPHA)
161                                 sblend = BLENDFACT_ONE;
162                         else if (sblend == BLENDFACT_INV_DST_ALPHA)
163                                 sblend = BLENDFACT_ZERO;
164                 }
165
166                 /* gen3 engine reads 8bit color buffer into green channel
167                  * in cases like color buffer blending etc., and also writes
168                  * back green channel.  So with dst_alpha blend we should use
169                  * color factor. See spec on "8-bit rendering".
170                  */
171                 if (dst_format == PICT_a8) {
172                         if (sblend == BLENDFACT_DST_ALPHA)
173                                 sblend = BLENDFACT_DST_COLR;
174                         else if (sblend == BLENDFACT_INV_DST_ALPHA)
175                                 sblend = BLENDFACT_INV_DST_COLR;
176                 }
177         }
178
179         /* If the source alpha is being used, then we should only be in a case
180          * where the source blend factor is 0, and the source blend value is the
181          * mask channels multiplied by the source picture's alpha.
182          */
183         if (has_component_alpha && gen3_blend_op[op].src_alpha) {
184                 if (dblend == BLENDFACT_SRC_ALPHA)
185                         dblend = BLENDFACT_SRC_COLR;
186                 else if (dblend == BLENDFACT_INV_SRC_ALPHA)
187                         dblend = BLENDFACT_INV_SRC_COLR;
188         }
189
190         return (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
191                 BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
192                 sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT |
193                 dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
194 }
195
196 static bool gen3_check_dst_format(uint32_t format)
197 {
198         switch (format) {
199         case PICT_a8r8g8b8:
200         case PICT_x8r8g8b8:
201         case PICT_a8b8g8r8:
202         case PICT_x8b8g8r8:
203         case PICT_r5g6b5:
204         case PICT_b5g6r5:
205         case PICT_a1r5g5b5:
206         case PICT_x1r5g5b5:
207         case PICT_a1b5g5r5:
208         case PICT_x1b5g5r5:
209         case PICT_a2r10g10b10:
210         case PICT_x2r10g10b10:
211         case PICT_a2b10g10r10:
212         case PICT_x2b10g10r10:
213         case PICT_a8:
214         case PICT_a4r4g4b4:
215         case PICT_x4r4g4b4:
216         case PICT_a4b4g4r4:
217         case PICT_x4b4g4r4:
218                 return true;
219         default:
220                 return false;
221         }
222 }
223
224 static bool gen3_dst_rb_reversed(uint32_t format)
225 {
226         switch (format) {
227         case PICT_a8r8g8b8:
228         case PICT_x8r8g8b8:
229         case PICT_r5g6b5:
230         case PICT_a1r5g5b5:
231         case PICT_x1r5g5b5:
232         case PICT_a2r10g10b10:
233         case PICT_x2r10g10b10:
234         case PICT_a8:
235         case PICT_a4r4g4b4:
236         case PICT_x4r4g4b4:
237                 return false;
238         default:
239                 return true;
240         }
241 }
242
243 #define DSTORG_HORT_BIAS(x)             ((x)<<20)
244 #define DSTORG_VERT_BIAS(x)             ((x)<<16)
245
246 static uint32_t gen3_get_dst_format(uint32_t format)
247 {
248 #define BIAS (DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8))
249         switch (format) {
250         default:
251         case PICT_a8r8g8b8:
252         case PICT_x8r8g8b8:
253         case PICT_a8b8g8r8:
254         case PICT_x8b8g8r8:
255                 return BIAS | COLR_BUF_ARGB8888;
256         case PICT_r5g6b5:
257         case PICT_b5g6r5:
258                 return BIAS | COLR_BUF_RGB565;
259         case PICT_a1r5g5b5:
260         case PICT_x1r5g5b5:
261         case PICT_a1b5g5r5:
262         case PICT_x1b5g5r5:
263                 return BIAS | COLR_BUF_ARGB1555;
264         case PICT_a2r10g10b10:
265         case PICT_x2r10g10b10:
266         case PICT_a2b10g10r10:
267         case PICT_x2b10g10r10:
268                 return BIAS | COLR_BUF_ARGB2AAA;
269         case PICT_a8:
270                 return BIAS | COLR_BUF_8BIT;
271         case PICT_a4r4g4b4:
272         case PICT_x4r4g4b4:
273         case PICT_a4b4g4r4:
274         case PICT_x4b4g4r4:
275                 return BIAS | COLR_BUF_ARGB4444;
276         }
277 #undef BIAS
278 }
279
280 static bool gen3_check_format(PicturePtr p)
281 {
282         switch (p->format) {
283         case PICT_a8:
284         case PICT_a8r8g8b8:
285         case PICT_x8r8g8b8:
286         case PICT_a8b8g8r8:
287         case PICT_x8b8g8r8:
288         case PICT_a2r10g10b10:
289         case PICT_a2b10g10r10:
290         case PICT_r5g6b5:
291         case PICT_b5g6r5:
292         case PICT_a1r5g5b5:
293         case PICT_a1b5g5r5:
294         case PICT_a4r4g4b4:
295         case PICT_a4b4g4r4:
296                 return true;
297         default:
298                 return false;
299         }
300 }
301
302 static bool gen3_check_xformat(PicturePtr p)
303 {
304         switch (p->format) {
305         case PICT_a8r8g8b8:
306         case PICT_x8r8g8b8:
307         case PICT_a8b8g8r8:
308         case PICT_x8b8g8r8:
309         case PICT_r5g6b5:
310         case PICT_b5g6r5:
311         case PICT_a1r5g5b5:
312         case PICT_x1r5g5b5:
313         case PICT_a1b5g5r5:
314         case PICT_x1b5g5r5:
315         case PICT_a2r10g10b10:
316         case PICT_x2r10g10b10:
317         case PICT_a2b10g10r10:
318         case PICT_x2b10g10r10:
319         case PICT_a8:
320         case PICT_a4r4g4b4:
321         case PICT_x4r4g4b4:
322         case PICT_a4b4g4r4:
323         case PICT_x4b4g4r4:
324                 return true;
325         default:
326                 return false;
327         }
328 }
329
330 static uint32_t gen3_texture_repeat(uint32_t repeat)
331 {
332 #define REPEAT(x) \
333         (SS3_NORMALIZED_COORDS | \
334          TEXCOORDMODE_##x << SS3_TCX_ADDR_MODE_SHIFT | \
335          TEXCOORDMODE_##x << SS3_TCY_ADDR_MODE_SHIFT)
336         switch (repeat) {
337         default:
338         case RepeatNone:
339                 return REPEAT(CLAMP_BORDER);
340         case RepeatNormal:
341                 return REPEAT(WRAP);
342         case RepeatPad:
343                 return REPEAT(CLAMP_EDGE);
344         case RepeatReflect:
345                 return REPEAT(MIRROR);
346         }
347 #undef REPEAT
348 }
349
350 static uint32_t gen3_gradient_repeat(uint32_t repeat)
351 {
352 #define REPEAT(x) \
353         (SS3_NORMALIZED_COORDS | \
354          TEXCOORDMODE_##x  << SS3_TCX_ADDR_MODE_SHIFT | \
355          TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT)
356         switch (repeat) {
357         default:
358         case RepeatNone:
359                 return REPEAT(CLAMP_BORDER);
360         case RepeatNormal:
361                 return REPEAT(WRAP);
362         case RepeatPad:
363                 return REPEAT(CLAMP_EDGE);
364         case RepeatReflect:
365                 return REPEAT(MIRROR);
366         }
367 #undef REPEAT
368 }
369
370 static bool gen3_check_repeat(PicturePtr p)
371 {
372         if (!p->repeat)
373                 return true;
374
375         switch (p->repeatType) {
376         case RepeatNone:
377         case RepeatNormal:
378         case RepeatPad:
379         case RepeatReflect:
380                 return true;
381         default:
382                 return false;
383         }
384 }
385
386 static uint32_t gen3_filter(uint32_t filter)
387 {
388         switch (filter) {
389         default:
390                 assert(0);
391         case PictFilterNearest:
392                 return (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
393                         FILTER_NEAREST << SS2_MIN_FILTER_SHIFT |
394                         MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
395         case PictFilterBilinear:
396                 return (FILTER_LINEAR  << SS2_MAG_FILTER_SHIFT |
397                         FILTER_LINEAR  << SS2_MIN_FILTER_SHIFT |
398                         MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
399         }
400 }
401
402 static bool gen3_check_filter(PicturePtr p)
403 {
404         switch (p->filter) {
405         case PictFilterNearest:
406         case PictFilterBilinear:
407                 return true;
408         default:
409                 return false;
410         }
411 }
412
413 static inline void
414 gen3_emit_composite_dstcoord(struct sna *sna, int16_t dstX, int16_t dstY)
415 {
416         OUT_VERTEX(dstX);
417         OUT_VERTEX(dstY);
418 }
419
420 fastcall static void
421 gen3_emit_composite_primitive_constant(struct sna *sna,
422                                        const struct sna_composite_op *op,
423                                        const struct sna_composite_rectangles *r)
424 {
425         int16_t dst_x = r->dst.x + op->dst.x;
426         int16_t dst_y = r->dst.y + op->dst.y;
427
428         gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
429         gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
430         gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
431 }
432
433 fastcall static void
434 gen3_emit_composite_primitive_identity_gradient(struct sna *sna,
435                                                 const struct sna_composite_op *op,
436                                                 const struct sna_composite_rectangles *r)
437 {
438         int16_t dst_x, dst_y;
439         int16_t src_x, src_y;
440
441         dst_x = r->dst.x + op->dst.x;
442         dst_y = r->dst.y + op->dst.y;
443         src_x = r->src.x + op->src.offset[0];
444         src_y = r->src.y + op->src.offset[1];
445
446         gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
447         OUT_VERTEX(src_x + r->width);
448         OUT_VERTEX(src_y + r->height);
449
450         gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
451         OUT_VERTEX(src_x);
452         OUT_VERTEX(src_y + r->height);
453
454         gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
455         OUT_VERTEX(src_x);
456         OUT_VERTEX(src_y);
457 }
458
459 fastcall static void
460 gen3_emit_composite_primitive_affine_gradient(struct sna *sna,
461                                               const struct sna_composite_op *op,
462                                               const struct sna_composite_rectangles *r)
463 {
464         PictTransform *transform = op->src.transform;
465         int16_t dst_x, dst_y;
466         int16_t src_x, src_y;
467         float sx, sy;
468
469         dst_x = r->dst.x + op->dst.x;
470         dst_y = r->dst.y + op->dst.y;
471         src_x = r->src.x + op->src.offset[0];
472         src_y = r->src.y + op->src.offset[1];
473
474         sna_get_transformed_coordinates(src_x + r->width, src_y + r->height,
475                                         transform,
476                                         &sx, &sy);
477         gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
478         OUT_VERTEX(sx);
479         OUT_VERTEX(sy);
480
481         sna_get_transformed_coordinates(src_x, src_y + r->height,
482                                         transform,
483                                         &sx, &sy);
484         gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
485         OUT_VERTEX(sx);
486         OUT_VERTEX(sy);
487
488         sna_get_transformed_coordinates(src_x, src_y,
489                                         transform,
490                                         &sx, &sy);
491         gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
492         OUT_VERTEX(sx);
493         OUT_VERTEX(sy);
494 }
495
496 fastcall static void
497 gen3_emit_composite_primitive_identity_source(struct sna *sna,
498                                               const struct sna_composite_op *op,
499                                               const struct sna_composite_rectangles *r)
500 {
501         float w = r->width;
502         float h = r->height;
503         float *v;
504
505         v = sna->render.vertices + sna->render.vertex_used;
506         sna->render.vertex_used += 12;
507
508         v[8] = v[4] = r->dst.x + op->dst.x;
509         v[0] = v[4] + w;
510
511         v[9] = r->dst.y + op->dst.y;
512         v[5] = v[1] = v[9] + h;
513
514         v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
515         v[2] = v[6] + w * op->src.scale[0];
516
517         v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
518         v[7] = v[3] = v[11] + h * op->src.scale[1];
519 }
520
521 fastcall static void
522 gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna,
523                                                         const struct sna_composite_op *op,
524                                                         const struct sna_composite_rectangles *r)
525 {
526         float w = r->width;
527         float h = r->height;
528         float *v;
529
530         v = sna->render.vertices + sna->render.vertex_used;
531         sna->render.vertex_used += 12;
532
533         v[8] = v[4] = r->dst.x;
534         v[9] = r->dst.y;
535
536         v[0] = v[4] + w;
537         v[5] = v[1] = v[9] + h;
538
539         v[10] = v[6] = r->src.x * op->src.scale[0];
540         v[11] = r->src.y * op->src.scale[1];
541
542         v[2] = v[6] + w * op->src.scale[0];
543         v[7] = v[3] = v[11] + h * op->src.scale[1];
544 }
545
546 fastcall static void
547 gen3_emit_composite_primitive_affine_source(struct sna *sna,
548                                             const struct sna_composite_op *op,
549                                             const struct sna_composite_rectangles *r)
550 {
551         PictTransform *transform = op->src.transform;
552         int16_t dst_x = r->dst.x + op->dst.x;
553         int16_t dst_y = r->dst.y + op->dst.y;
554         int src_x = r->src.x + (int)op->src.offset[0];
555         int src_y = r->src.y + (int)op->src.offset[1];
556         float sx, sy;
557
558         _sna_get_transformed_coordinates(src_x + r->width, src_y + r->height,
559                                          transform,
560                                          &sx, &sy);
561
562         gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
563         OUT_VERTEX(sx * op->src.scale[0]);
564         OUT_VERTEX(sy * op->src.scale[1]);
565
566         _sna_get_transformed_coordinates(src_x, src_y + r->height,
567                                          transform,
568                                          &sx, &sy);
569         gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
570         OUT_VERTEX(sx * op->src.scale[0]);
571         OUT_VERTEX(sy * op->src.scale[1]);
572
573         _sna_get_transformed_coordinates(src_x, src_y,
574                                          transform,
575                                          &sx, &sy);
576         gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
577         OUT_VERTEX(sx * op->src.scale[0]);
578         OUT_VERTEX(sy * op->src.scale[1]);
579 }
580
581 fastcall static void
582 gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna,
583                                                      const struct sna_composite_op *op,
584                                                      const struct sna_composite_rectangles *r)
585 {
586         float w = r->width;
587         float h = r->height;
588         float *v;
589
590         v = sna->render.vertices + sna->render.vertex_used;
591         sna->render.vertex_used += 12;
592
593         v[8] = v[4] = r->dst.x + op->dst.x;
594         v[0] = v[4] + w;
595
596         v[9] = r->dst.y + op->dst.y;
597         v[5] = v[1] = v[9] + h;
598
599         v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
600         v[2] = v[6] + w * op->mask.scale[0];
601
602         v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
603         v[7] = v[3] = v[11] + h * op->mask.scale[1];
604 }
605
606 fastcall static void
607 gen3_emit_composite_primitive_constant_identity_mask_no_offset(struct sna *sna,
608                                                                const struct sna_composite_op *op,
609                                                                const struct sna_composite_rectangles *r)
610 {
611         float w = r->width;
612         float h = r->height;
613         float *v;
614
615         v = sna->render.vertices + sna->render.vertex_used;
616         sna->render.vertex_used += 12;
617
618         v[8] = v[4] = r->dst.x;
619         v[9] = r->dst.y;
620
621         v[0] = v[4] + w;
622         v[5] = v[1] = v[9] + h;
623
624         v[10] = v[6] = r->mask.x * op->mask.scale[0];
625         v[11] = r->mask.y * op->mask.scale[1];
626
627         v[2] = v[6] + w * op->mask.scale[0];
628         v[7] = v[3] = v[11] + h * op->mask.scale[1];
629 }
630
631 fastcall static void
632 gen3_emit_composite_primitive_identity_source_mask(struct sna *sna,
633                                                    const struct sna_composite_op *op,
634                                                    const struct sna_composite_rectangles *r)
635 {
636         float dst_x, dst_y;
637         float src_x, src_y;
638         float msk_x, msk_y;
639         float w, h;
640         float *v;
641
642         dst_x = r->dst.x + op->dst.x;
643         dst_y = r->dst.y + op->dst.y;
644         src_x = r->src.x + op->src.offset[0];
645         src_y = r->src.y + op->src.offset[1];
646         msk_x = r->mask.x + op->mask.offset[0];
647         msk_y = r->mask.y + op->mask.offset[1];
648         w = r->width;
649         h = r->height;
650
651         v = sna->render.vertices + sna->render.vertex_used;
652         sna->render.vertex_used += 18;
653
654         v[0] = dst_x + w;
655         v[1] = dst_y + h;
656         v[2] = (src_x + w) * op->src.scale[0];
657         v[3] = (src_y + h) * op->src.scale[1];
658         v[4] = (msk_x + w) * op->mask.scale[0];
659         v[5] = (msk_y + h) * op->mask.scale[1];
660
661         v[6] = dst_x;
662         v[7] = v[1];
663         v[8] = src_x * op->src.scale[0];
664         v[9] = v[3];
665         v[10] = msk_x * op->mask.scale[0];
666         v[11] =v[5];
667
668         v[12] = v[6];
669         v[13] = dst_y;
670         v[14] = v[8];
671         v[15] = src_y * op->src.scale[1];
672         v[16] = v[10];
673         v[17] = msk_y * op->mask.scale[1];
674 }
675
676 fastcall static void
677 gen3_emit_composite_primitive_affine_source_mask(struct sna *sna,
678                                                  const struct sna_composite_op *op,
679                                                  const struct sna_composite_rectangles *r)
680 {
681         int16_t src_x, src_y;
682         float dst_x, dst_y;
683         float msk_x, msk_y;
684         float w, h;
685         float *v;
686
687         dst_x = r->dst.x + op->dst.x;
688         dst_y = r->dst.y + op->dst.y;
689         src_x = r->src.x + op->src.offset[0];
690         src_y = r->src.y + op->src.offset[1];
691         msk_x = r->mask.x + op->mask.offset[0];
692         msk_y = r->mask.y + op->mask.offset[1];
693         w = r->width;
694         h = r->height;
695
696         v = sna->render.vertices + sna->render.vertex_used;
697         sna->render.vertex_used += 18;
698
699         v[0] = dst_x + w;
700         v[1] = dst_y + h;
701         sna_get_transformed_coordinates(src_x + r->width, src_y + r->height,
702                                         op->src.transform,
703                                         &v[2], &v[3]);
704         v[2] *= op->src.scale[0];
705         v[3] *= op->src.scale[1];
706         v[4] = (msk_x + w) * op->mask.scale[0];
707         v[5] = (msk_y + h) * op->mask.scale[1];
708
709         v[6] = dst_x;
710         v[7] = v[1];
711         sna_get_transformed_coordinates(src_x, src_y + r->height,
712                                         op->src.transform,
713                                         &v[8], &v[9]);
714         v[8] *= op->src.scale[0];
715         v[9] *= op->src.scale[1];
716         v[10] = msk_x * op->mask.scale[0];
717         v[11] =v[5];
718
719         v[12] = v[6];
720         v[13] = dst_y;
721         sna_get_transformed_coordinates(src_x, src_y,
722                                         op->src.transform,
723                                         &v[14], &v[15]);
724         v[14] *= op->src.scale[0];
725         v[15] *= op->src.scale[1];
726         v[16] = v[10];
727         v[17] = msk_y * op->mask.scale[1];
728 }
729
730 static void
731 gen3_emit_composite_texcoord(struct sna *sna,
732                              const struct sna_composite_channel *channel,
733                              int16_t x, int16_t y)
734 {
735         float s = 0, t = 0, w = 1;
736
737         switch (channel->u.gen3.type) {
738         case SHADER_OPACITY:
739         case SHADER_NONE:
740         case SHADER_ZERO:
741         case SHADER_BLACK:
742         case SHADER_WHITE:
743         case SHADER_CONSTANT:
744                 break;
745
746         case SHADER_LINEAR:
747         case SHADER_RADIAL:
748         case SHADER_TEXTURE:
749                 x += channel->offset[0];
750                 y += channel->offset[1];
751                 if (channel->is_affine) {
752                         sna_get_transformed_coordinates(x, y,
753                                                         channel->transform,
754                                                         &s, &t);
755                         OUT_VERTEX(s * channel->scale[0]);
756                         OUT_VERTEX(t * channel->scale[1]);
757                 } else {
758                         sna_get_transformed_coordinates_3d(x, y,
759                                                            channel->transform,
760                                                            &s, &t, &w);
761                         OUT_VERTEX(s * channel->scale[0]);
762                         OUT_VERTEX(t * channel->scale[1]);
763                         OUT_VERTEX(0);
764                         OUT_VERTEX(w);
765                 }
766                 break;
767         }
768 }
769
770 static void
771 gen3_emit_composite_vertex(struct sna *sna,
772                            const struct sna_composite_op *op,
773                            int16_t srcX, int16_t srcY,
774                            int16_t maskX, int16_t maskY,
775                            int16_t dstX, int16_t dstY)
776 {
777         gen3_emit_composite_dstcoord(sna, dstX, dstY);
778         gen3_emit_composite_texcoord(sna, &op->src, srcX, srcY);
779         gen3_emit_composite_texcoord(sna, &op->mask, maskX, maskY);
780 }
781
782 fastcall static void
783 gen3_emit_composite_primitive(struct sna *sna,
784                               const struct sna_composite_op *op,
785                               const struct sna_composite_rectangles *r)
786 {
787         gen3_emit_composite_vertex(sna, op,
788                                    r->src.x + r->width,
789                                    r->src.y + r->height,
790                                    r->mask.x + r->width,
791                                    r->mask.y + r->height,
792                                    op->dst.x + r->dst.x + r->width,
793                                    op->dst.y + r->dst.y + r->height);
794         gen3_emit_composite_vertex(sna, op,
795                                    r->src.x,
796                                    r->src.y + r->height,
797                                    r->mask.x,
798                                    r->mask.y + r->height,
799                                    op->dst.x + r->dst.x,
800                                    op->dst.y + r->dst.y + r->height);
801         gen3_emit_composite_vertex(sna, op,
802                                    r->src.x,
803                                    r->src.y,
804                                    r->mask.x,
805                                    r->mask.y,
806                                    op->dst.x + r->dst.x,
807                                    op->dst.y + r->dst.y);
808 }
809
810 static inline void
811 gen3_2d_perspective(struct sna *sna, int in, int out)
812 {
813         gen3_fs_rcp(out, 0, gen3_fs_operand(in, W, W, W, W));
814         gen3_fs_mul(out,
815                     gen3_fs_operand(in, X, Y, ZERO, ONE),
816                     gen3_fs_operand_reg(out));
817 }
818
819 static inline void
820 gen3_linear_coord(struct sna *sna,
821                   const struct sna_composite_channel *channel,
822                   int in, int out)
823 {
824         int c = channel->u.gen3.constants;
825
826         if (!channel->is_affine) {
827                 gen3_2d_perspective(sna, in, FS_U0);
828                 in = FS_U0;
829         }
830
831         gen3_fs_mov(out, gen3_fs_operand_zero());
832         gen3_fs_dp3(out, MASK_X,
833                     gen3_fs_operand(in, X, Y, ONE, ZERO),
834                     gen3_fs_operand_reg(c));
835 }
836
837 static void
838 gen3_radial_coord(struct sna *sna,
839                   const struct sna_composite_channel *channel,
840                   int in, int out)
841 {
842         int c = channel->u.gen3.constants;
843
844         if (!channel->is_affine) {
845                 gen3_2d_perspective(sna, in, FS_U0);
846                 in = FS_U0;
847         }
848
849         switch (channel->u.gen3.mode) {
850         case RADIAL_ONE:
851                 /*
852                    pdx = (x - c1x) / dr, pdy = (y - c1y) / dr;
853                    r² = pdx*pdx + pdy*pdy
854                    t = r²/sqrt(r²) - r1/dr;
855                    */
856                 gen3_fs_mad(FS_U0, MASK_X | MASK_Y,
857                             gen3_fs_operand(in, X, Y, ZERO, ZERO),
858                             gen3_fs_operand(c, Z, Z, ZERO, ZERO),
859                             gen3_fs_operand(c, NEG_X, NEG_Y, ZERO, ZERO));
860                 gen3_fs_dp2add(FS_U0, MASK_X,
861                                gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
862                                gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
863                                gen3_fs_operand_zero());
864                 gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U0, X, X, X, X));
865                 gen3_fs_mad(out, 0,
866                             gen3_fs_operand(FS_U0, X, ZERO, ZERO, ZERO),
867                             gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
868                             gen3_fs_operand(c, W, ZERO, ZERO, ZERO));
869                 break;
870
871         case RADIAL_TWO:
872                 /*
873                    pdx = x - c1x, pdy = y - c1y;
874                    A = dx² + dy² - dr²
875                    B = -2*(pdx*dx + pdy*dy + r1*dr);
876                    C = pdx² + pdy² - r1²;
877                    det = B*B - 4*A*C;
878                    t = (-B + sqrt (det)) / (2 * A)
879                    */
880
881                 /* u0.x = pdx, u0.y = pdy, u[0].z = r1; */
882                 gen3_fs_add(FS_U0,
883                             gen3_fs_operand(in, X, Y, ZERO, ZERO),
884                             gen3_fs_operand(c, X, Y, Z, ZERO));
885                 /* u0.x = pdx, u0.y = pdy, u[0].z = r1, u[0].w = B; */
886                 gen3_fs_dp3(FS_U0, MASK_W,
887                             gen3_fs_operand(FS_U0, X, Y, ONE, ZERO),
888                             gen3_fs_operand(c+1, X, Y, Z, ZERO));
889                 /* u1.x = pdx² + pdy² - r1²; [C] */
890                 gen3_fs_dp3(FS_U1, MASK_X,
891                             gen3_fs_operand(FS_U0, X, Y, Z, ZERO),
892                             gen3_fs_operand(FS_U0, X, Y, NEG_Z, ZERO));
893                 /* u1.x = C, u1.y = B, u1.z=-4*A; */
894                 gen3_fs_mov_masked(FS_U1, MASK_Y, gen3_fs_operand(FS_U0, W, W, W, W));
895                 gen3_fs_mov_masked(FS_U1, MASK_Z, gen3_fs_operand(c, W, W, W, W));
896                 /* u1.x = B² - 4*A*C */
897                 gen3_fs_dp2add(FS_U1, MASK_X,
898                                gen3_fs_operand(FS_U1, X, Y, ZERO, ZERO),
899                                gen3_fs_operand(FS_U1, Z, Y, ZERO, ZERO),
900                                gen3_fs_operand_zero());
901                 /* out.x = -B + sqrt (B² - 4*A*C), */
902                 gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U1, X, X, X, X));
903                 gen3_fs_mad(out, MASK_X,
904                             gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
905                             gen3_fs_operand(FS_U1, X, ZERO, ZERO, ZERO),
906                             gen3_fs_operand(FS_U0, NEG_W, ZERO, ZERO, ZERO));
907                 /* out.x = (-B + sqrt (B² - 4*A*C)) / (2 * A), */
908                 gen3_fs_mul(out,
909                             gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
910                             gen3_fs_operand(c+1, W, ZERO, ZERO, ZERO));
911                 break;
912         }
913 }
914
915 static void
916 gen3_composite_emit_shader(struct sna *sna,
917                            const struct sna_composite_op *op,
918                            uint8_t blend)
919 {
920         bool dst_is_alpha = PIXMAN_FORMAT_RGB(op->dst.format) == 0;
921         const struct sna_composite_channel *src, *mask;
922         struct gen3_render_state *state = &sna->render_state.gen3;
923         uint32_t shader_offset, id;
924         int src_reg, mask_reg;
925         int t, length;
926
927         src = &op->src;
928         mask = &op->mask;
929         if (mask->u.gen3.type == SHADER_NONE)
930                 mask = NULL;
931
932         if (mask && src->is_opaque &&
933             gen3_blend_op[blend].src_alpha &&
934             op->has_component_alpha) {
935                 src = mask;
936                 mask = NULL;
937         }
938
939         id = (src->u.gen3.type |
940               src->is_affine << 4 |
941               src->alpha_fixup << 5 |
942               src->rb_reversed << 6);
943         if (mask) {
944                 id |= (mask->u.gen3.type << 8 |
945                        mask->is_affine << 12 |
946                        gen3_blend_op[blend].src_alpha << 13 |
947                        op->has_component_alpha << 14 |
948                        mask->alpha_fixup << 15 |
949                        mask->rb_reversed << 16);
950         }
951         id |= dst_is_alpha << 24;
952         id |= op->rb_reversed << 25;
953
954         if (id == state->last_shader)
955                 return;
956
957         state->last_shader = id;
958
959         shader_offset = sna->kgem.nbatch++;
960         t = 0;
961         switch (src->u.gen3.type) {
962         case SHADER_NONE:
963         case SHADER_OPACITY:
964                 assert(0);
965         case SHADER_ZERO:
966         case SHADER_BLACK:
967         case SHADER_WHITE:
968                 break;
969         case SHADER_CONSTANT:
970                 gen3_fs_dcl(FS_T8);
971                 src_reg = FS_T8;
972                 break;
973         case SHADER_TEXTURE:
974         case SHADER_RADIAL:
975         case SHADER_LINEAR:
976                 gen3_fs_dcl(FS_S0);
977                 gen3_fs_dcl(FS_T0);
978                 t++;
979                 break;
980         }
981
982         if (mask == NULL) {
983                 switch (src->u.gen3.type) {
984                 case SHADER_ZERO:
985                         gen3_fs_mov(FS_OC, gen3_fs_operand_zero());
986                         goto done;
987                 case SHADER_BLACK:
988                         if (dst_is_alpha)
989                                 gen3_fs_mov(FS_OC, gen3_fs_operand_one());
990                         else
991                                 gen3_fs_mov(FS_OC, gen3_fs_operand(FS_R0, ZERO, ZERO, ZERO, ONE));
992                         goto done;
993                 case SHADER_WHITE:
994                         gen3_fs_mov(FS_OC, gen3_fs_operand_one());
995                         goto done;
996                 }
997                 if (src->alpha_fixup && dst_is_alpha) {
998                         gen3_fs_mov(FS_OC, gen3_fs_operand_one());
999                         goto done;
1000                 }
1001                 /* No mask, so load directly to output color */
1002                 if (src->u.gen3.type != SHADER_CONSTANT) {
1003                         if (dst_is_alpha || src->rb_reversed ^ op->rb_reversed)
1004                                 src_reg = FS_R0;
1005                         else
1006                                 src_reg = FS_OC;
1007                 }
1008                 switch (src->u.gen3.type) {
1009                 case SHADER_LINEAR:
1010                         gen3_linear_coord(sna, src, FS_T0, FS_R0);
1011                         gen3_fs_texld(src_reg, FS_S0, FS_R0);
1012                         break;
1013
1014                 case SHADER_RADIAL:
1015                         gen3_radial_coord(sna, src, FS_T0, FS_R0);
1016                         gen3_fs_texld(src_reg, FS_S0, FS_R0);
1017                         break;
1018
1019                 case SHADER_TEXTURE:
1020                         if (src->is_affine)
1021                                 gen3_fs_texld(src_reg, FS_S0, FS_T0);
1022                         else
1023                                 gen3_fs_texldp(src_reg, FS_S0, FS_T0);
1024                         break;
1025
1026                 case SHADER_NONE:
1027                 case SHADER_WHITE:
1028                 case SHADER_BLACK:
1029                 case SHADER_ZERO:
1030                         assert(0);
1031                 case SHADER_CONSTANT:
1032                         break;
1033                 }
1034
1035                 if (src_reg != FS_OC) {
1036                         if (src->alpha_fixup)
1037                                 gen3_fs_mov(FS_OC,
1038                                             src->rb_reversed ^ op->rb_reversed ?
1039                                             gen3_fs_operand(src_reg, Z, Y, X, ONE) :
1040                                             gen3_fs_operand(src_reg, X, Y, Z, ONE));
1041                         else if (dst_is_alpha)
1042                                 gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, W, W, W, W));
1043                         else if (src->rb_reversed ^ op->rb_reversed)
1044                                 gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, Z, Y, X, W));
1045                         else
1046                                 gen3_fs_mov(FS_OC, gen3_fs_operand_reg(src_reg));
1047                 } else if (src->alpha_fixup)
1048                         gen3_fs_mov_masked(FS_OC, MASK_W, gen3_fs_operand_one());
1049         } else {
1050                 int out_reg = FS_OC;
1051                 if (op->rb_reversed)
1052                         out_reg = FS_U0;
1053
1054                 switch (mask->u.gen3.type) {
1055                 case SHADER_CONSTANT:
1056                         gen3_fs_dcl(FS_T9);
1057                         mask_reg = FS_T9;
1058                         break;
1059                 case SHADER_TEXTURE:
1060                 case SHADER_LINEAR:
1061                 case SHADER_RADIAL:
1062                         gen3_fs_dcl(FS_S0 + t);
1063                         /* fall through */
1064                 case SHADER_OPACITY:
1065                         gen3_fs_dcl(FS_T0 + t);
1066                         break;
1067                 case SHADER_ZERO:
1068                 case SHADER_BLACK:
1069                         assert(0);
1070                 case SHADER_NONE:
1071                 case SHADER_WHITE:
1072                         break;
1073                 }
1074
1075                 t = 0;
1076                 switch (src->u.gen3.type) {
1077                 case SHADER_LINEAR:
1078                         gen3_linear_coord(sna, src, FS_T0, FS_R0);
1079                         gen3_fs_texld(FS_R0, FS_S0, FS_R0);
1080                         src_reg = FS_R0;
1081                         t++;
1082                         break;
1083
1084                 case SHADER_RADIAL:
1085                         gen3_radial_coord(sna, src, FS_T0, FS_R0);
1086                         gen3_fs_texld(FS_R0, FS_S0, FS_R0);
1087                         src_reg = FS_R0;
1088                         t++;
1089                         break;
1090
1091                 case SHADER_TEXTURE:
1092                         if (src->is_affine)
1093                                 gen3_fs_texld(FS_R0, FS_S0, FS_T0);
1094                         else
1095                                 gen3_fs_texldp(FS_R0, FS_S0, FS_T0);
1096                         src_reg = FS_R0;
1097                         t++;
1098                         break;
1099
1100                 case SHADER_CONSTANT:
1101                 case SHADER_NONE:
1102                 case SHADER_ZERO:
1103                 case SHADER_BLACK:
1104                 case SHADER_WHITE:
1105                         break;
1106                 }
1107                 if (src->alpha_fixup)
1108                         gen3_fs_mov_masked(src_reg, MASK_W, gen3_fs_operand_one());
1109                 if (src->rb_reversed)
1110                         gen3_fs_mov(src_reg, gen3_fs_operand(src_reg, Z, Y, X, W));
1111
1112                 switch (mask->u.gen3.type) {
1113                 case SHADER_LINEAR:
1114                         gen3_linear_coord(sna, mask, FS_T0 + t, FS_R1);
1115                         gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
1116                         mask_reg = FS_R1;
1117                         break;
1118
1119                 case SHADER_RADIAL:
1120                         gen3_radial_coord(sna, mask, FS_T0 + t, FS_R1);
1121                         gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
1122                         mask_reg = FS_R1;
1123                         break;
1124
1125                 case SHADER_TEXTURE:
1126                         if (mask->is_affine)
1127                                 gen3_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t);
1128                         else
1129                                 gen3_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t);
1130                         mask_reg = FS_R1;
1131                         break;
1132
1133                 case SHADER_OPACITY:
1134                         switch (src->u.gen3.type) {
1135                         case SHADER_BLACK:
1136                         case SHADER_WHITE:
1137                                 if (dst_is_alpha || src->u.gen3.type == SHADER_WHITE) {
1138                                         gen3_fs_mov(out_reg,
1139                                                     gen3_fs_operand(FS_T0 + t, X, X, X, X));
1140                                 } else {
1141                                         gen3_fs_mov(out_reg,
1142                                                     gen3_fs_operand(FS_T0 + t, ZERO, ZERO, ZERO, X));
1143                                 }
1144                                 break;
1145                         default:
1146                                 if (dst_is_alpha) {
1147                                         gen3_fs_mul(out_reg,
1148                                                     gen3_fs_operand(src_reg, W, W, W, W),
1149                                                     gen3_fs_operand(FS_T0 + t, X, X, X, X));
1150                                 } else {
1151                                         gen3_fs_mul(out_reg,
1152                                                     gen3_fs_operand(src_reg, X, Y, Z, W),
1153                                                     gen3_fs_operand(FS_T0 + t, X, X, X, X));
1154                                 }
1155                         }
1156                         goto mask_done;
1157
1158                 case SHADER_CONSTANT:
1159                 case SHADER_ZERO:
1160                 case SHADER_BLACK:
1161                 case SHADER_WHITE:
1162                 case SHADER_NONE:
1163                         break;
1164                 }
1165                 if (mask->alpha_fixup)
1166                         gen3_fs_mov_masked(mask_reg, MASK_W, gen3_fs_operand_one());
1167                 if (mask->rb_reversed)
1168                         gen3_fs_mov(mask_reg, gen3_fs_operand(mask_reg, Z, Y, X, W));
1169
1170                 if (dst_is_alpha) {
1171                         switch (src->u.gen3.type) {
1172                         case SHADER_BLACK:
1173                         case SHADER_WHITE:
1174                                 gen3_fs_mov(out_reg,
1175                                             gen3_fs_operand(mask_reg, W, W, W, W));
1176                                 break;
1177                         default:
1178                                 gen3_fs_mul(out_reg,
1179                                             gen3_fs_operand(src_reg, W, W, W, W),
1180                                             gen3_fs_operand(mask_reg, W, W, W, W));
1181                                 break;
1182                         }
1183                 } else {
1184                         /* If component alpha is active in the mask and the blend
1185                          * operation uses the source alpha, then we know we don't
1186                          * need the source value (otherwise we would have hit a
1187                          * fallback earlier), so we provide the source alpha (src.A *
1188                          * mask.X) as output color.
1189                          * Conversely, if CA is set and we don't need the source alpha,
1190                          * then we produce the source value (src.X * mask.X) and the
1191                          * source alpha is unused.  Otherwise, we provide the non-CA
1192                          * source value (src.X * mask.A).
1193                          */
1194                         if (op->has_component_alpha) {
1195                                 switch (src->u.gen3.type) {
1196                                 case SHADER_BLACK:
1197                                         if (gen3_blend_op[blend].src_alpha)
1198                                                 gen3_fs_mov(out_reg,
1199                                                             gen3_fs_operand_reg(mask_reg));
1200                                         else
1201                                                 gen3_fs_mov(out_reg,
1202                                                             gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
1203                                         break;
1204                                 case SHADER_WHITE:
1205                                         gen3_fs_mov(out_reg,
1206                                                     gen3_fs_operand_reg(mask_reg));
1207                                         break;
1208                                 default:
1209                                         if (gen3_blend_op[blend].src_alpha)
1210                                                 gen3_fs_mul(out_reg,
1211                                                             gen3_fs_operand(src_reg, W, W, W, W),
1212                                                             gen3_fs_operand_reg(mask_reg));
1213                                         else
1214                                                 gen3_fs_mul(out_reg,
1215                                                             gen3_fs_operand_reg(src_reg),
1216                                                             gen3_fs_operand_reg(mask_reg));
1217                                         break;
1218                                 }
1219                         } else {
1220                                 switch (src->u.gen3.type) {
1221                                 case SHADER_WHITE:
1222                                         gen3_fs_mov(out_reg,
1223                                                     gen3_fs_operand(mask_reg, W, W, W, W));
1224                                         break;
1225                                 case SHADER_BLACK:
1226                                         gen3_fs_mov(out_reg,
1227                                                     gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
1228                                         break;
1229                                 default:
1230                                         gen3_fs_mul(out_reg,
1231                                                     gen3_fs_operand_reg(src_reg),
1232                                                     gen3_fs_operand(mask_reg, W, W, W, W));
1233                                         break;
1234                                 }
1235                         }
1236                 }
1237 mask_done:
1238                 if (op->rb_reversed)
1239                         gen3_fs_mov(FS_OC, gen3_fs_operand(FS_U0, Z, Y, X, W));
1240         }
1241
1242 done:
1243         length = sna->kgem.nbatch - shader_offset;
1244         sna->kgem.batch[shader_offset] =
1245                 _3DSTATE_PIXEL_SHADER_PROGRAM | (length - 2);
1246 }
1247
1248 static uint32_t gen3_ms_tiling(uint32_t tiling)
1249 {
1250         uint32_t v = 0;
1251         switch (tiling) {
1252         case I915_TILING_Y: v |= MS3_TILE_WALK;
1253         case I915_TILING_X: v |= MS3_TILED_SURFACE;
1254         case I915_TILING_NONE: break;
1255         }
1256         return v;
1257 }
1258
1259 static void gen3_emit_invariant(struct sna *sna)
1260 {
1261         /* Disable independent alpha blend */
1262         OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | IAB_MODIFY_ENABLE |
1263                   IAB_MODIFY_FUNC | BLENDFUNC_ADD << IAB_FUNC_SHIFT |
1264                   IAB_MODIFY_SRC_FACTOR | BLENDFACT_ONE << IAB_SRC_FACTOR_SHIFT |
1265                   IAB_MODIFY_DST_FACTOR | BLENDFACT_ZERO << IAB_DST_FACTOR_SHIFT);
1266
1267         OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
1268                   CSB_TCB(0, 0) |
1269                   CSB_TCB(1, 1) |
1270                   CSB_TCB(2, 2) |
1271                   CSB_TCB(3, 3) |
1272                   CSB_TCB(4, 4) |
1273                   CSB_TCB(5, 5) |
1274                   CSB_TCB(6, 6) |
1275                   CSB_TCB(7, 7));
1276
1277         OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 3);
1278         OUT_BATCH(0); /* Disable texture coordinate wrap-shortest */
1279         OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) |
1280                   S4_LINE_WIDTH_ONE |
1281                   S4_CULLMODE_NONE |
1282                   S4_VFMT_XY);
1283         OUT_BATCH(0); /* Disable fog/stencil. *Enable* write mask. */
1284         OUT_BATCH(S6_COLOR_WRITE_ONLY); /* Disable blending, depth */
1285
1286         OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
1287         OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
1288
1289         OUT_BATCH(_3DSTATE_LOAD_INDIRECT);
1290         OUT_BATCH(0x00000000);
1291
1292         OUT_BATCH(_3DSTATE_STIPPLE);
1293         OUT_BATCH(0x00000000);
1294
1295         sna->render_state.gen3.need_invariant = false;
1296 }
1297
1298 #define MAX_OBJECTS 3 /* worst case: dst + src + mask  */
1299
1300 static void
1301 gen3_get_batch(struct sna *sna)
1302 {
1303         kgem_set_mode(&sna->kgem, KGEM_RENDER);
1304
1305         if (!kgem_check_batch(&sna->kgem, 200)) {
1306                 DBG(("%s: flushing batch: size %d > %d\n",
1307                      __FUNCTION__, 200,
1308                      sna->kgem.surface-sna->kgem.nbatch));
1309                 kgem_submit(&sna->kgem);
1310                 _kgem_set_mode(&sna->kgem, KGEM_RENDER);
1311         }
1312
1313         if (!kgem_check_reloc(&sna->kgem, MAX_OBJECTS)) {
1314                 DBG(("%s: flushing batch: reloc %d >= %d\n",
1315                      __FUNCTION__,
1316                      sna->kgem.nreloc,
1317                      (int)KGEM_RELOC_SIZE(&sna->kgem) - MAX_OBJECTS));
1318                 kgem_submit(&sna->kgem);
1319                 _kgem_set_mode(&sna->kgem, KGEM_RENDER);
1320         }
1321
1322         if (!kgem_check_exec(&sna->kgem, MAX_OBJECTS)) {
1323                 DBG(("%s: flushing batch: exec %d >= %d\n",
1324                      __FUNCTION__,
1325                      sna->kgem.nexec,
1326                      (int)KGEM_EXEC_SIZE(&sna->kgem) - MAX_OBJECTS - 1));
1327                 kgem_submit(&sna->kgem);
1328                 _kgem_set_mode(&sna->kgem, KGEM_RENDER);
1329         }
1330
1331         if (sna->render_state.gen3.need_invariant)
1332                 gen3_emit_invariant(sna);
1333 #undef MAX_OBJECTS
1334 }
1335
1336 static void gen3_emit_target(struct sna *sna,
1337                              struct kgem_bo *bo,
1338                              int width,
1339                              int height,
1340                              int format)
1341 {
1342         struct gen3_render_state *state = &sna->render_state.gen3;
1343
1344         assert(!too_large(width, height));
1345
1346         /* BUF_INFO is an implicit flush, so skip if the target is unchanged. */
1347         assert(bo->unique_id != 0);
1348         if (bo->unique_id != state->current_dst) {
1349                 uint32_t v;
1350
1351                 DBG(("%s: setting new target id=%d, handle=%d\n",
1352                      __FUNCTION__, bo->unique_id, bo->handle));
1353
1354                 OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
1355                 OUT_BATCH(BUF_3D_ID_COLOR_BACK |
1356                           gen3_buf_tiling(bo->tiling) |
1357                           bo->pitch);
1358                 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
1359                                          bo,
1360                                          I915_GEM_DOMAIN_RENDER << 16 |
1361                                          I915_GEM_DOMAIN_RENDER,
1362                                          0));
1363
1364                 OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
1365                 OUT_BATCH(gen3_get_dst_format(format));
1366
1367                 v = DRAW_YMAX(height - 1) | DRAW_XMAX(width - 1);
1368                 if (v != state->last_drawrect_limit) {
1369                         OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
1370                         OUT_BATCH(0); /* XXX dither origin? */
1371                         OUT_BATCH(0);
1372                         OUT_BATCH(v);
1373                         OUT_BATCH(0);
1374                         state->last_drawrect_limit = v;
1375                 }
1376
1377                 state->current_dst = bo->unique_id;
1378         }
1379         kgem_bo_mark_dirty(bo);
1380 }
1381
1382 static void gen3_emit_composite_state(struct sna *sna,
1383                                       const struct sna_composite_op *op)
1384 {
1385         struct gen3_render_state *state = &sna->render_state.gen3;
1386         uint32_t map[4];
1387         uint32_t sampler[4];
1388         struct kgem_bo *bo[2];
1389         unsigned int tex_count, n;
1390         uint32_t ss2;
1391
1392         gen3_get_batch(sna);
1393
1394         if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
1395                 if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo)
1396                         OUT_BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE);
1397                 else
1398                         OUT_BATCH(_3DSTATE_MODES_5_CMD |
1399                                   PIPELINE_FLUSH_RENDER_CACHE |
1400                                   PIPELINE_FLUSH_TEXTURE_CACHE);
1401                 kgem_clear_dirty(&sna->kgem);
1402         }
1403
1404         gen3_emit_target(sna,
1405                          op->dst.bo,
1406                          op->dst.width,
1407                          op->dst.height,
1408                          op->dst.format);
1409
1410         ss2 = ~0;
1411         tex_count = 0;
1412         switch (op->src.u.gen3.type) {
1413         case SHADER_OPACITY:
1414         case SHADER_NONE:
1415                 assert(0);
1416         case SHADER_ZERO:
1417         case SHADER_BLACK:
1418         case SHADER_WHITE:
1419                 break;
1420         case SHADER_CONSTANT:
1421                 if (op->src.u.gen3.mode != state->last_diffuse) {
1422                         OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
1423                         OUT_BATCH(op->src.u.gen3.mode);
1424                         state->last_diffuse = op->src.u.gen3.mode;
1425                 }
1426                 break;
1427         case SHADER_LINEAR:
1428         case SHADER_RADIAL:
1429         case SHADER_TEXTURE:
1430                 ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
1431                 ss2 |= S2_TEXCOORD_FMT(tex_count,
1432                                        op->src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
1433                 map[tex_count * 2 + 0] =
1434                         op->src.card_format |
1435                         gen3_ms_tiling(op->src.bo->tiling) |
1436                         (op->src.height - 1) << MS3_HEIGHT_SHIFT |
1437                         (op->src.width - 1) << MS3_WIDTH_SHIFT;
1438                 map[tex_count * 2 + 1] =
1439                         (op->src.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
1440
1441                 sampler[tex_count * 2 + 0] = op->src.filter;
1442                 sampler[tex_count * 2 + 1] =
1443                         op->src.repeat |
1444                         tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
1445                 bo[tex_count] = op->src.bo;
1446                 tex_count++;
1447                 break;
1448         }
1449         switch (op->mask.u.gen3.type) {
1450         case SHADER_NONE:
1451         case SHADER_ZERO:
1452         case SHADER_BLACK:
1453         case SHADER_WHITE:
1454                 break;
1455         case SHADER_CONSTANT:
1456                 if (op->mask.u.gen3.mode != state->last_specular) {
1457                         OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
1458                         OUT_BATCH(op->mask.u.gen3.mode);
1459                         state->last_specular = op->mask.u.gen3.mode;
1460                 }
1461                 break;
1462         case SHADER_LINEAR:
1463         case SHADER_RADIAL:
1464         case SHADER_TEXTURE:
1465                 ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
1466                 ss2 |= S2_TEXCOORD_FMT(tex_count,
1467                                        op->mask.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
1468                 map[tex_count * 2 + 0] =
1469                         op->mask.card_format |
1470                         gen3_ms_tiling(op->mask.bo->tiling) |
1471                         (op->mask.height - 1) << MS3_HEIGHT_SHIFT |
1472                         (op->mask.width - 1) << MS3_WIDTH_SHIFT;
1473                 map[tex_count * 2 + 1] =
1474                         (op->mask.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
1475
1476                 sampler[tex_count * 2 + 0] = op->mask.filter;
1477                 sampler[tex_count * 2 + 1] =
1478                         op->mask.repeat |
1479                         tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
1480                 bo[tex_count] = op->mask.bo;
1481                 tex_count++;
1482                 break;
1483         case SHADER_OPACITY:
1484                 ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
1485                 ss2 |= S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_1D);
1486                 break;
1487         }
1488
1489         {
1490                 uint32_t blend_offset = sna->kgem.nbatch;
1491
1492                 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
1493                 OUT_BATCH(ss2);
1494                 OUT_BATCH(gen3_get_blend_cntl(op->op,
1495                                               op->has_component_alpha,
1496                                               op->dst.format));
1497
1498                 if (memcmp(sna->kgem.batch + state->last_blend + 1,
1499                            sna->kgem.batch + blend_offset + 1,
1500                            2 * 4) == 0)
1501                         sna->kgem.nbatch = blend_offset;
1502                 else
1503                         state->last_blend = blend_offset;
1504         }
1505
1506         if (op->u.gen3.num_constants) {
1507                 int count = op->u.gen3.num_constants;
1508                 if (state->last_constants) {
1509                         int last = sna->kgem.batch[state->last_constants+1];
1510                         if (last == (1 << (count >> 2)) - 1 &&
1511                             memcmp(&sna->kgem.batch[state->last_constants+2],
1512                                    op->u.gen3.constants,
1513                                    count * sizeof(uint32_t)) == 0)
1514                                 count = 0;
1515                 }
1516                 if (count) {
1517                         state->last_constants = sna->kgem.nbatch;
1518                         OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | count);
1519                         OUT_BATCH((1 << (count >> 2)) - 1);
1520
1521                         memcpy(sna->kgem.batch + sna->kgem.nbatch,
1522                                op->u.gen3.constants,
1523                                count * sizeof(uint32_t));
1524                         sna->kgem.nbatch += count;
1525                 }
1526         }
1527
1528         if (tex_count != 0) {
1529                 uint32_t rewind;
1530
1531                 n = 0;
1532                 if (tex_count == state->tex_count) {
1533                         for (; n < tex_count; n++) {
1534                                 if (map[2*n+0] != state->tex_map[2*n+0] ||
1535                                     map[2*n+1] != state->tex_map[2*n+1] ||
1536                                     state->tex_handle[n] != bo[n]->handle ||
1537                                     state->tex_delta[n] != bo[n]->delta)
1538                                         break;
1539                         }
1540                 }
1541                 if (n < tex_count) {
1542                         OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count));
1543                         OUT_BATCH((1 << tex_count) - 1);
1544                         for (n = 0; n < tex_count; n++) {
1545                                 OUT_BATCH(kgem_add_reloc(&sna->kgem,
1546                                                          sna->kgem.nbatch,
1547                                                          bo[n],
1548                                                          I915_GEM_DOMAIN_SAMPLER<< 16,
1549                                                          0));
1550                                 OUT_BATCH(map[2*n + 0]);
1551                                 OUT_BATCH(map[2*n + 1]);
1552
1553                                 state->tex_map[2*n+0] = map[2*n+0];
1554                                 state->tex_map[2*n+1] = map[2*n+1];
1555                                 state->tex_handle[n] = bo[n]->handle;
1556                                 state->tex_delta[n] = bo[n]->delta;
1557                         }
1558                         state->tex_count = n;
1559                 }
1560
1561                 rewind = sna->kgem.nbatch;
1562                 OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count));
1563                 OUT_BATCH((1 << tex_count) - 1);
1564                 for (n = 0; n < tex_count; n++) {
1565                         OUT_BATCH(sampler[2*n + 0]);
1566                         OUT_BATCH(sampler[2*n + 1]);
1567                         OUT_BATCH(0);
1568                 }
1569                 if (state->last_sampler &&
1570                     memcmp(&sna->kgem.batch[state->last_sampler+1],
1571                            &sna->kgem.batch[rewind + 1],
1572                            (3*tex_count + 1)*sizeof(uint32_t)) == 0)
1573                         sna->kgem.nbatch = rewind;
1574                 else
1575                         state->last_sampler = rewind;
1576         }
1577
1578         gen3_composite_emit_shader(sna, op, op->op);
1579 }
1580
1581 static void gen3_magic_ca_pass(struct sna *sna,
1582                                const struct sna_composite_op *op)
1583 {
1584         if (!op->need_magic_ca_pass)
1585                 return;
1586
1587         DBG(("%s(%d)\n", __FUNCTION__,
1588              sna->render.vertex_index - sna->render.vertex_start));
1589
1590         OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
1591         OUT_BATCH(gen3_get_blend_cntl(PictOpAdd, true, op->dst.format));
1592         gen3_composite_emit_shader(sna, op, PictOpAdd);
1593
1594         OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
1595                   (sna->render.vertex_index - sna->render.vertex_start));
1596         OUT_BATCH(sna->render.vertex_start);
1597
1598         sna->render_state.gen3.last_blend = 0;
1599 }
1600
1601 static void gen3_vertex_flush(struct sna *sna)
1602 {
1603         assert(sna->render_state.gen3.vertex_offset);
1604
1605         DBG(("%s[%x] = %d\n", __FUNCTION__,
1606              4*sna->render_state.gen3.vertex_offset,
1607              sna->render.vertex_index - sna->render.vertex_start));
1608
1609         sna->kgem.batch[sna->render_state.gen3.vertex_offset] =
1610                 PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
1611                 (sna->render.vertex_index - sna->render.vertex_start);
1612         sna->kgem.batch[sna->render_state.gen3.vertex_offset + 1] =
1613                 sna->render.vertex_start;
1614
1615         sna->render_state.gen3.vertex_offset = 0;
1616 }
1617
1618 static int gen3_vertex_finish(struct sna *sna)
1619 {
1620         struct kgem_bo *bo;
1621
1622         DBG(("%s: used=%d/%d, vbo active? %d\n",
1623              __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
1624              sna->render.vbo ? sna->render.vbo->handle : 0));
1625         assert(sna->render.vertex_used);
1626         assert(sna->render.vertex_used <= sna->render.vertex_size);
1627
1628         bo = sna->render.vbo;
1629         if (bo) {
1630                 if (sna->render_state.gen3.vertex_offset)
1631                         gen3_vertex_flush(sna);
1632
1633                 DBG(("%s: reloc = %d\n", __FUNCTION__,
1634                      sna->render.vertex_reloc[0]));
1635
1636                 sna->kgem.batch[sna->render.vertex_reloc[0]] =
1637                         kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
1638                                        bo, I915_GEM_DOMAIN_VERTEX << 16, 0);
1639
1640                 sna->render.vertex_reloc[0] = 0;
1641                 sna->render.vertex_used = 0;
1642                 sna->render.vertex_index = 0;
1643
1644                 kgem_bo_destroy(&sna->kgem, bo);
1645         }
1646
1647         sna->render.vertices = NULL;
1648         sna->render.vbo = kgem_create_linear(&sna->kgem,
1649                                              256*1024, CREATE_GTT_MAP);
1650         if (sna->render.vbo)
1651                 sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
1652         if (sna->render.vertices == NULL) {
1653                 if (sna->render.vbo)
1654                         kgem_bo_destroy(&sna->kgem, sna->render.vbo);
1655                 sna->render.vbo = NULL;
1656                 return 0;
1657         }
1658         assert(sna->render.vbo->snoop == false);
1659
1660         if (sna->render.vertex_used) {
1661                 memcpy(sna->render.vertices,
1662                        sna->render.vertex_data,
1663                        sizeof(float)*sna->render.vertex_used);
1664         }
1665         sna->render.vertex_size = 64 * 1024 - 1;
1666         return sna->render.vertex_size - sna->render.vertex_used;
1667 }
1668
1669 static void gen3_vertex_close(struct sna *sna)
1670 {
1671         struct kgem_bo *bo, *free_bo = NULL;
1672         unsigned int delta = 0;
1673
1674         assert(sna->render_state.gen3.vertex_offset == 0);
1675
1676         DBG(("%s: used=%d/%d, vbo active? %d\n",
1677              __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
1678              sna->render.vbo ? sna->render.vbo->handle : 0));
1679
1680         if (sna->render.vertex_used == 0)
1681                 return;
1682
1683         bo = sna->render.vbo;
1684         if (bo) {
1685                 if (sna->render.vertex_size - sna->render.vertex_used < 64) {
1686                         DBG(("%s: discarding full vbo\n", __FUNCTION__));
1687                         sna->render.vbo = NULL;
1688                         sna->render.vertices = sna->render.vertex_data;
1689                         sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
1690                         free_bo = bo;
1691                 } else if (IS_CPU_MAP(bo->map)) {
1692                         DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
1693                         sna->render.vertices = kgem_bo_map__gtt(&sna->kgem, bo);
1694                         if (sna->render.vertices == NULL) {
1695                                 DBG(("%s: discarding non-mappable vertices\n",__FUNCTION__));
1696                                 sna->render.vbo = NULL;
1697                                 sna->render.vertices = sna->render.vertex_data;
1698                                 sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
1699                                 free_bo = bo;
1700                         }
1701                 }
1702         } else {
1703                 if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
1704                         DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
1705                              sna->render.vertex_used, sna->kgem.nbatch));
1706                         memcpy(sna->kgem.batch + sna->kgem.nbatch,
1707                                sna->render.vertex_data,
1708                                sna->render.vertex_used * 4);
1709                         delta = sna->kgem.nbatch * 4;
1710                         bo = NULL;
1711                         sna->kgem.nbatch += sna->render.vertex_used;
1712                 } else {
1713                         DBG(("%s: new vbo: %d\n", __FUNCTION__,
1714                              sna->render.vertex_used));
1715                         bo = kgem_create_linear(&sna->kgem,
1716                                                 4*sna->render.vertex_used, 0);
1717                         if (bo) {
1718                                 assert(bo->snoop == false);
1719                                 kgem_bo_write(&sna->kgem, bo,
1720                                               sna->render.vertex_data,
1721                                               4*sna->render.vertex_used);
1722                         }
1723                         free_bo = bo;
1724                 }
1725         }
1726
1727         DBG(("%s: reloc = %d\n", __FUNCTION__,
1728              sna->render.vertex_reloc[0]));
1729
1730         if (sna->render.vertex_reloc[0]) {
1731                 sna->kgem.batch[sna->render.vertex_reloc[0]] =
1732                         kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
1733                                        bo, I915_GEM_DOMAIN_VERTEX << 16, delta);
1734                 sna->render.vertex_reloc[0] = 0;
1735         }
1736
1737         if (sna->render.vbo == NULL) {
1738                 DBG(("%s: resetting vbo\n", __FUNCTION__));
1739                 sna->render.vertex_used = 0;
1740                 sna->render.vertex_index = 0;
1741                 assert(sna->render.vertices == sna->render.vertex_data);
1742                 assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data));
1743         }
1744
1745         if (free_bo)
1746                 kgem_bo_destroy(&sna->kgem, free_bo);
1747 }
1748
1749 static bool gen3_rectangle_begin(struct sna *sna,
1750                                  const struct sna_composite_op *op)
1751 {
1752         struct gen3_render_state *state = &sna->render_state.gen3;
1753         int ndwords, i1_cmd = 0, i1_len = 0;
1754
1755         ndwords = 2;
1756         if (op->need_magic_ca_pass)
1757                 ndwords += 100;
1758         if (sna->render.vertex_reloc[0] == 0)
1759                 i1_len++, i1_cmd |= I1_LOAD_S(0), ndwords++;
1760         if (state->floats_per_vertex != op->floats_per_vertex)
1761                 i1_len++, i1_cmd |= I1_LOAD_S(1), ndwords++;
1762
1763         if (!kgem_check_batch(&sna->kgem, ndwords+1))
1764                 return false;
1765
1766         if (i1_cmd) {
1767                 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | i1_cmd | (i1_len - 1));
1768                 if (sna->render.vertex_reloc[0] == 0)
1769                         sna->render.vertex_reloc[0] = sna->kgem.nbatch++;
1770                 if (state->floats_per_vertex != op->floats_per_vertex) {
1771                         state->floats_per_vertex = op->floats_per_vertex;
1772                         OUT_BATCH(state->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT |
1773                                   state->floats_per_vertex << S1_VERTEX_PITCH_SHIFT);
1774                 }
1775         }
1776
1777         if (sna->kgem.nbatch == 2 + state->last_vertex_offset) {
1778                 state->vertex_offset = state->last_vertex_offset;
1779         } else {
1780                 state->vertex_offset = sna->kgem.nbatch;
1781                 OUT_BATCH(MI_NOOP); /* to be filled later */
1782                 OUT_BATCH(MI_NOOP);
1783                 sna->render.vertex_start = sna->render.vertex_index;
1784                 state->last_vertex_offset = state->vertex_offset;
1785         }
1786
1787         return true;
1788 }
1789
1790 static int gen3_get_rectangles__flush(struct sna *sna,
1791                                       const struct sna_composite_op *op)
1792 {
1793         if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 105: 5))
1794                 return 0;
1795         if (!kgem_check_reloc_and_exec(&sna->kgem, 1))
1796                 return 0;
1797
1798         if (op->need_magic_ca_pass && sna->render.vbo)
1799                 return 0;
1800
1801         return gen3_vertex_finish(sna);
1802 }
1803
1804 inline static int gen3_get_rectangles(struct sna *sna,
1805                                       const struct sna_composite_op *op,
1806                                       int want)
1807 {
1808         int rem;
1809
1810         DBG(("%s: want=%d, rem=%d\n",
1811              __FUNCTION__, want*op->floats_per_rect, vertex_space(sna)));
1812
1813         assert(sna->render.vertex_index * op->floats_per_vertex == sna->render.vertex_used);
1814
1815 start:
1816         rem = vertex_space(sna);
1817         if (unlikely(op->floats_per_rect > rem)) {
1818                 DBG(("flushing vbo for %s: %d < %d\n",
1819                      __FUNCTION__, rem, op->floats_per_rect));
1820                 rem = gen3_get_rectangles__flush(sna, op);
1821                 if (unlikely(rem == 0))
1822                         goto flush;
1823         }
1824
1825         if (unlikely(sna->render_state.gen3.vertex_offset == 0 &&
1826                      !gen3_rectangle_begin(sna, op)))
1827                 goto flush;
1828
1829         if (want > 1 && want * op->floats_per_rect > rem)
1830                 want = rem / op->floats_per_rect;
1831         sna->render.vertex_index += 3*want;
1832
1833         assert(want);
1834         assert(sna->render.vertex_index * op->floats_per_vertex <= sna->render.vertex_size);
1835         return want;
1836
1837 flush:
1838         DBG(("%s: flushing batch\n", __FUNCTION__));
1839         if (sna->render_state.gen3.vertex_offset) {
1840                 gen3_vertex_flush(sna);
1841                 gen3_magic_ca_pass(sna, op);
1842         }
1843         _kgem_submit(&sna->kgem);
1844         gen3_emit_composite_state(sna, op);
1845         goto start;
1846 }
1847
1848 fastcall static void
1849 gen3_render_composite_blt(struct sna *sna,
1850                           const struct sna_composite_op *op,
1851                           const struct sna_composite_rectangles *r)
1852 {
1853         DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", __FUNCTION__,
1854              r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
1855              r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
1856              r->dst.x, r->dst.y, op->dst.x, op->dst.y,
1857              r->width, r->height));
1858
1859         gen3_get_rectangles(sna, op, 1);
1860
1861         op->prim_emit(sna, op, r);
1862 }
1863
1864 fastcall static void
1865 gen3_render_composite_box(struct sna *sna,
1866                           const struct sna_composite_op *op,
1867                           const BoxRec *box)
1868 {
1869         struct sna_composite_rectangles r;
1870
1871         DBG(("%s: src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n",
1872              __FUNCTION__,
1873              op->src.offset[0], op->src.offset[1],
1874              op->mask.offset[0], op->mask.offset[1],
1875              op->dst.x, op->dst.y));
1876
1877         gen3_get_rectangles(sna, op, 1);
1878
1879         r.dst.x  = box->x1;
1880         r.dst.y  = box->y1;
1881         r.width  = box->x2 - box->x1;
1882         r.height = box->y2 - box->y1;
1883         r.src = r.mask = r.dst;
1884
1885         op->prim_emit(sna, op, &r);
1886 }
1887
1888 static void
1889 gen3_render_composite_boxes(struct sna *sna,
1890                             const struct sna_composite_op *op,
1891                             const BoxRec *box, int nbox)
1892 {
1893         DBG(("%s: nbox=%d, src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n",
1894              __FUNCTION__, nbox,
1895              op->src.offset[0], op->src.offset[1],
1896              op->mask.offset[0], op->mask.offset[1],
1897              op->dst.x, op->dst.y));
1898
1899         do {
1900                 int nbox_this_time;
1901
1902                 nbox_this_time = gen3_get_rectangles(sna, op, nbox);
1903                 nbox -= nbox_this_time;
1904
1905                 do {
1906                         struct sna_composite_rectangles r;
1907
1908                         DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
1909                              box->x1, box->y1,
1910                              box->x2 - box->x1,
1911                              box->y2 - box->y1));
1912
1913                         r.dst.x  = box->x1; r.dst.y  = box->y1;
1914                         r.width = box->x2 - box->x1;
1915                         r.height = box->y2 - box->y1;
1916                         r.src = r.mask = r.dst;
1917
1918                         op->prim_emit(sna, op, &r);
1919                         box++;
1920                 } while (--nbox_this_time);
1921         } while (nbox);
1922 }
1923
1924 static void
1925 gen3_render_composite_done(struct sna *sna,
1926                            const struct sna_composite_op *op)
1927 {
1928         DBG(("%s()\n", __FUNCTION__));
1929
1930         if (sna->render_state.gen3.vertex_offset) {
1931                 gen3_vertex_flush(sna);
1932                 gen3_magic_ca_pass(sna, op);
1933         }
1934
1935         if (op->mask.bo)
1936                 kgem_bo_destroy(&sna->kgem, op->mask.bo);
1937         if (op->src.bo)
1938                 kgem_bo_destroy(&sna->kgem, op->src.bo);
1939
1940         sna_render_composite_redirect_done(sna, op);
1941 }
1942
1943 static void
1944 discard_vbo(struct sna *sna)
1945 {
1946         kgem_bo_destroy(&sna->kgem, sna->render.vbo);
1947         sna->render.vbo = NULL;
1948         sna->render.vertices = sna->render.vertex_data;
1949         sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
1950         sna->render.vertex_used = 0;
1951         sna->render.vertex_index = 0;
1952 }
1953
1954 static void
1955 gen3_render_reset(struct sna *sna)
1956 {
1957         struct gen3_render_state *state = &sna->render_state.gen3;
1958
1959         state->need_invariant = true;
1960         state->current_dst = 0;
1961         state->tex_count = 0;
1962         state->last_drawrect_limit = ~0U;
1963         state->last_target = 0;
1964         state->last_blend = 0;
1965         state->last_constants = 0;
1966         state->last_sampler = 0;
1967         state->last_shader = 0x7fffffff;
1968         state->last_diffuse = 0xcc00ffee;
1969         state->last_specular = 0xcc00ffee;
1970
1971         state->floats_per_vertex = 0;
1972         state->last_floats_per_vertex = 0;
1973         state->last_vertex_offset = 0;
1974         state->vertex_offset = 0;
1975
1976         if (sna->render.vbo != NULL &&
1977             !kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
1978                 DBG(("%s: discarding vbo as next access will stall: %d\n",
1979                      __FUNCTION__, sna->render.vbo->presumed_offset));
1980                 discard_vbo(sna);
1981         }
1982 }
1983
1984 static void
1985 gen3_render_retire(struct kgem *kgem)
1986 {
1987         struct sna *sna;
1988
1989         sna = container_of(kgem, struct sna, kgem);
1990         if (sna->render.vertex_reloc[0] == 0 &&
1991             sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
1992                 DBG(("%s: resetting idle vbo\n", __FUNCTION__));
1993                 sna->render.vertex_used = 0;
1994                 sna->render.vertex_index = 0;
1995         }
1996 }
1997
1998 static void
1999 gen3_render_expire(struct kgem *kgem)
2000 {
2001         struct sna *sna;
2002
2003         sna = container_of(kgem, struct sna, kgem);
2004         if (sna->render.vbo && !sna->render.vertex_used) {
2005                 DBG(("%s: discarding vbo\n", __FUNCTION__));
2006                 discard_vbo(sna);
2007         }
2008 }
2009
2010 static bool gen3_composite_channel_set_format(struct sna_composite_channel *channel,
2011                                               CARD32 format)
2012 {
2013         unsigned int i;
2014
2015         for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
2016                 if (gen3_tex_formats[i].fmt == format) {
2017                         channel->card_format = gen3_tex_formats[i].card_fmt;
2018                         channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
2019                         return true;
2020                 }
2021         }
2022         return false;
2023 }
2024
2025 static bool source_is_covered(PicturePtr picture,
2026                               int x, int y,
2027                               int width, int height)
2028 {
2029         int x1, y1, x2, y2;
2030
2031         if (picture->repeat && picture->repeatType != RepeatNone)
2032                 return true;
2033
2034         if (picture->pDrawable == NULL)
2035                 return false;
2036
2037         if (picture->transform) {
2038                 pixman_box16_t sample;
2039
2040                 sample.x1 = x;
2041                 sample.y1 = y;
2042                 sample.x2 = x + width;
2043                 sample.y2 = y + height;
2044
2045                 pixman_transform_bounds(picture->transform, &sample);
2046
2047                 x1 = sample.x1;
2048                 x2 = sample.x2;
2049                 y1 = sample.y1;
2050                 y2 = sample.y2;
2051         } else {
2052                 x1 = x;
2053                 y1 = y;
2054                 x2 = x + width;
2055                 y2 = y + height;
2056         }
2057
2058         return
2059                 x1 >= 0 && y1 >= 0 &&
2060                 x2 <= picture->pDrawable->width &&
2061                 y2 <= picture->pDrawable->height;
2062 }
2063
2064 static bool gen3_composite_channel_set_xformat(PicturePtr picture,
2065                                                struct sna_composite_channel *channel,
2066                                                int x, int y,
2067                                                int width, int height)
2068 {
2069         unsigned int i;
2070
2071         if (PICT_FORMAT_A(picture->format) != 0)
2072                 return false;
2073
2074         if (width == 0 || height == 0)
2075                 return false;
2076
2077         if (!source_is_covered(picture, x, y, width, height))
2078                 return false;
2079
2080         for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
2081                 if (gen3_tex_formats[i].xfmt == picture->format) {
2082                         channel->card_format = gen3_tex_formats[i].card_fmt;
2083                         channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
2084                         channel->alpha_fixup = true;
2085                         return true;
2086                 }
2087         }
2088
2089         return false;
2090 }
2091
2092 static int
2093 gen3_init_solid(struct sna_composite_channel *channel, uint32_t color)
2094 {
2095         channel->u.gen3.mode = color;
2096         channel->u.gen3.type = SHADER_CONSTANT;
2097         if (color == 0)
2098                 channel->u.gen3.type = SHADER_ZERO;
2099         else if (color == 0xff000000)
2100                 channel->u.gen3.type = SHADER_BLACK;
2101         else if (color == 0xffffffff)
2102                 channel->u.gen3.type = SHADER_WHITE;
2103
2104         channel->bo = NULL;
2105         channel->is_opaque = (color >> 24) == 0xff;
2106         channel->is_affine = 1;
2107         channel->alpha_fixup = 0;
2108         channel->rb_reversed = 0;
2109
2110         DBG(("%s: color=%08x, is_opaque=%d, type=%d\n",
2111              __FUNCTION__, color, channel->is_opaque, channel->u.gen3.type));
2112
2113         /* for consistency */
2114         channel->repeat = RepeatNormal;
2115         channel->filter = PictFilterNearest;
2116         channel->pict_format = PICT_a8r8g8b8;
2117         channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
2118
2119         return 1;
2120 }
2121
2122 static void gen3_composite_channel_convert(struct sna_composite_channel *channel)
2123 {
2124         if (channel->u.gen3.type == SHADER_TEXTURE)
2125                 channel->repeat = gen3_texture_repeat(channel->repeat);
2126         else
2127                 channel->repeat = gen3_gradient_repeat(channel->repeat);
2128
2129         channel->filter = gen3_filter(channel->filter);
2130         if (channel->card_format == 0)
2131                 gen3_composite_channel_set_format(channel, channel->pict_format);
2132 }
2133
2134 static bool gen3_gradient_setup(struct sna *sna,
2135                                 PicturePtr picture,
2136                                 struct sna_composite_channel *channel,
2137                                 int16_t ox, int16_t oy)
2138 {
2139         int16_t dx, dy;
2140
2141         if (picture->repeat == 0) {
2142                 channel->repeat = RepeatNone;
2143         } else switch (picture->repeatType) {
2144         case RepeatNone:
2145         case RepeatNormal:
2146         case RepeatPad:
2147         case RepeatReflect:
2148                 channel->repeat = picture->repeatType;
2149                 break;
2150         default:
2151                 return false;
2152         }
2153
2154         channel->bo =
2155                 sna_render_get_gradient(sna,
2156                                         (PictGradient *)picture->pSourcePict);
2157         if (channel->bo == NULL)
2158                 return false;
2159
2160         channel->pict_format = PICT_a8r8g8b8;
2161         channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
2162         channel->filter = PictFilterNearest;
2163         channel->is_affine = sna_transform_is_affine(picture->transform);
2164         if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
2165                 DBG(("%s: integer translation (%d, %d), removing\n",
2166                      __FUNCTION__, dx, dy));
2167                 ox += dx;
2168                 oy += dy;
2169                 channel->transform = NULL;
2170         } else
2171                 channel->transform = picture->transform;
2172         channel->width  = channel->bo->pitch / 4;
2173         channel->height = 1;
2174         channel->offset[0] = ox;
2175         channel->offset[1] = oy;
2176         channel->scale[0] = channel->scale[1] = 1;
2177         return true;
2178 }
2179
2180 static int
2181 gen3_init_linear(struct sna *sna,
2182                  PicturePtr picture,
2183                  struct sna_composite_op *op,
2184                  struct sna_composite_channel *channel,
2185                  int ox, int oy)
2186 {
2187         PictLinearGradient *linear =
2188                 (PictLinearGradient *)picture->pSourcePict;
2189         float x0, y0, sf;
2190         float dx, dy, offset;
2191         int n;
2192
2193         DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n",
2194              __FUNCTION__,
2195              xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y),
2196              xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y)));
2197
2198         if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
2199                 return 0;
2200
2201         dx = xFixedToDouble(linear->p2.x - linear->p1.x);
2202         dy = xFixedToDouble(linear->p2.y - linear->p1.y);
2203         sf = dx*dx + dy*dy;
2204         dx /= sf;
2205         dy /= sf;
2206
2207         x0 = xFixedToDouble(linear->p1.x);
2208         y0 = xFixedToDouble(linear->p1.y);
2209         offset = dx*x0 + dy*y0;
2210
2211         n = op->u.gen3.num_constants;
2212         channel->u.gen3.constants = FS_C0 + n / 4;
2213         op->u.gen3.constants[n++] = dx;
2214         op->u.gen3.constants[n++] = dy;
2215         op->u.gen3.constants[n++] = -offset;
2216         op->u.gen3.constants[n++] = 0;
2217
2218         if (!gen3_gradient_setup(sna, picture, channel, ox, oy))
2219                 return -1;
2220
2221         channel->u.gen3.type = SHADER_LINEAR;
2222         op->u.gen3.num_constants = n;
2223
2224         DBG(("%s: dx=%f, dy=%f, offset=%f, constants=%d\n",
2225              __FUNCTION__, dx, dy, -offset, channel->u.gen3.constants - FS_C0));
2226         return 1;
2227 }
2228
2229 static int
2230 gen3_init_radial(struct sna *sna,
2231                  PicturePtr picture,
2232                  struct sna_composite_op *op,
2233                  struct sna_composite_channel *channel,
2234                  int ox, int oy)
2235 {
2236         PictRadialGradient *radial = (PictRadialGradient *)picture->pSourcePict;
2237         double dx, dy, dr, r1;
2238         int n;
2239
2240         dx = xFixedToDouble(radial->c2.x - radial->c1.x);
2241         dy = xFixedToDouble(radial->c2.y - radial->c1.y);
2242         dr = xFixedToDouble(radial->c2.radius - radial->c1.radius);
2243
2244         r1 = xFixedToDouble(radial->c1.radius);
2245
2246         n = op->u.gen3.num_constants;
2247         channel->u.gen3.constants = FS_C0 + n / 4;
2248         if (radial->c2.x == radial->c1.x && radial->c2.y == radial->c1.y) {
2249                 if (radial->c2.radius == radial->c1.radius) {
2250                         channel->u.gen3.type = SHADER_ZERO;
2251                         return 1;
2252                 }
2253
2254                 op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.x) / dr;
2255                 op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.y) / dr;
2256                 op->u.gen3.constants[n++] = 1. / dr;
2257                 op->u.gen3.constants[n++] = -r1 / dr;
2258
2259                 channel->u.gen3.mode = RADIAL_ONE;
2260         } else {
2261                 op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.x);
2262                 op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.y);
2263                 op->u.gen3.constants[n++] = r1;
2264                 op->u.gen3.constants[n++] = -4 * (dx*dx + dy*dy - dr*dr);
2265
2266                 op->u.gen3.constants[n++] = -2 * dx;
2267                 op->u.gen3.constants[n++] = -2 * dy;
2268                 op->u.gen3.constants[n++] = -2 * r1 * dr;
2269                 op->u.gen3.constants[n++] = 1 / (2 * (dx*dx + dy*dy - dr*dr));
2270
2271                 channel->u.gen3.mode = RADIAL_TWO;
2272         }
2273
2274         if (!gen3_gradient_setup(sna, picture, channel, ox, oy))
2275                 return -1;
2276
2277         channel->u.gen3.type = SHADER_RADIAL;
2278         op->u.gen3.num_constants = n;
2279         return 1;
2280 }
2281
2282 static bool
2283 sna_picture_is_clear(PicturePtr picture,
2284                      int x, int y, int w, int h,
2285                      uint32_t *color)
2286 {
2287         struct sna_pixmap *priv;
2288
2289         if (!picture->pDrawable)
2290                 return false;
2291
2292         priv = sna_pixmap(get_drawable_pixmap(picture->pDrawable));
2293         if (priv == NULL || !priv->clear)
2294                 return false;
2295
2296         if (!source_is_covered(picture, x, y, w, h))
2297                 return false;
2298
2299         *color = priv->clear_color;
2300         return true;
2301 }
2302
2303 static int
2304 gen3_composite_picture(struct sna *sna,
2305                        PicturePtr picture,
2306                        struct sna_composite_op *op,
2307                        struct sna_composite_channel *channel,
2308                        int16_t x, int16_t y,
2309                        int16_t w, int16_t h,
2310                        int16_t dst_x, int16_t dst_y,
2311                        bool precise)
2312 {
2313         PixmapPtr pixmap;
2314         uint32_t color;
2315         int16_t dx, dy;
2316
2317         DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
2318              __FUNCTION__, x, y, w, h, dst_x, dst_y));
2319
2320         channel->card_format = 0;
2321
2322         if (picture->pDrawable == NULL) {
2323                 SourcePict *source = picture->pSourcePict;
2324                 int ret = -1;
2325
2326                 switch (source->type) {
2327                 case SourcePictTypeSolidFill:
2328                         DBG(("%s: solid fill [%08x], format %x\n",
2329                              __FUNCTION__, source->solidFill.color, picture->format));
2330                         ret = gen3_init_solid(channel, source->solidFill.color);
2331                         break;
2332
2333                 case SourcePictTypeLinear:
2334                         ret = gen3_init_linear(sna, picture, op, channel,
2335                                                x - dst_x, y - dst_y);
2336                         break;
2337
2338                 case SourcePictTypeRadial:
2339                         ret = gen3_init_radial(sna, picture, op, channel,
2340                                                x - dst_x, y - dst_y);
2341                         break;
2342                 }
2343
2344                 if (ret == -1) {
2345                         if (!precise)
2346                                 ret = sna_render_picture_approximate_gradient(sna, picture, channel,
2347                                                                               x, y, w, h, dst_x, dst_y);
2348                         if (ret == -1)
2349                                 ret = sna_render_picture_fixup(sna, picture, channel,
2350                                                                x, y, w, h, dst_x, dst_y);
2351                 }
2352                 return ret;
2353         }
2354
2355         if (picture->alphaMap) {
2356                 DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
2357                 return sna_render_picture_fixup(sna, picture, channel,
2358                                                 x, y, w, h, dst_x, dst_y);
2359         }
2360
2361         if (sna_picture_is_solid(picture, &color)) {
2362                 DBG(("%s: solid drawable [%08x]\n", __FUNCTION__, color));
2363                 return gen3_init_solid(channel, color);
2364         }
2365
2366         if (sna_picture_is_clear(picture, x, y, w, h, &color)) {
2367                 DBG(("%s: clear drawable [%08x]\n", __FUNCTION__, color));
2368                 return gen3_init_solid(channel, color_convert(color, picture->format, PICT_a8r8g8b8));
2369         }
2370
2371         if (!gen3_check_repeat(picture))
2372                 return sna_render_picture_fixup(sna, picture, channel,
2373                                                 x, y, w, h, dst_x, dst_y);
2374
2375         if (!gen3_check_filter(picture))
2376                 return sna_render_picture_fixup(sna, picture, channel,
2377                                                 x, y, w, h, dst_x, dst_y);
2378
2379         channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
2380         channel->filter = picture->filter;
2381         channel->pict_format = picture->format;
2382
2383         pixmap = get_drawable_pixmap(picture->pDrawable);
2384         get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
2385
2386         x += dx + picture->pDrawable->x;
2387         y += dy + picture->pDrawable->y;
2388
2389         if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
2390                 DBG(("%s: integer translation (%d, %d), removing\n",
2391                      __FUNCTION__, dx, dy));
2392                 x += dx;
2393                 y += dy;
2394                 channel->transform = NULL;
2395                 channel->filter = PictFilterNearest;
2396         } else {
2397                 channel->transform = picture->transform;
2398                 channel->is_affine = sna_transform_is_affine(picture->transform);
2399         }
2400
2401         if (!gen3_composite_channel_set_format(channel, picture->format) &&
2402             !gen3_composite_channel_set_xformat(picture, channel, x, y, w, h))
2403                 return sna_render_picture_convert(sna, picture, channel, pixmap,
2404                                                   x, y, w, h, dst_x, dst_y);
2405
2406         if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
2407                 DBG(("%s: pixmap too large (%dx%d), extracting (%d, %d)x(%d,%d)\n",
2408                      __FUNCTION__,
2409                      pixmap->drawable.width, pixmap->drawable.height,
2410                      x, y, w, h));
2411                 return sna_render_picture_extract(sna, picture, channel,
2412                                                   x, y, w, h, dst_x, dst_y);
2413         }
2414
2415         return sna_render_pixmap_bo(sna, channel, pixmap,
2416                                     x, y, w, h, dst_x, dst_y);
2417 }
2418
2419 static inline bool
2420 source_use_blt(struct sna *sna, PicturePtr picture)
2421 {
2422         /* If it is a solid, try to use the BLT paths */
2423         if (!picture->pDrawable)
2424                 return picture->pSourcePict->type == SourcePictTypeSolidFill;
2425
2426         if (picture->pDrawable->width  == 1 &&
2427             picture->pDrawable->height == 1 &&
2428             picture->repeat)
2429                 return true;
2430
2431         if (too_large(picture->pDrawable->width, picture->pDrawable->height))
2432                 return true;
2433
2434         return !is_gpu(picture->pDrawable);
2435 }
2436
2437 static bool
2438 try_blt(struct sna *sna,
2439         PicturePtr dst,
2440         PicturePtr src,
2441         int width, int height)
2442 {
2443         if (sna->kgem.mode != KGEM_RENDER) {
2444                 DBG(("%s: already performing BLT\n", __FUNCTION__));
2445                 return true;
2446         }
2447
2448         if (too_large(width, height)) {
2449                 DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
2450                      __FUNCTION__, width, height));
2451                 return true;
2452         }
2453
2454         if (too_large(dst->pDrawable->width, dst->pDrawable->height)) {
2455                 DBG(("%s: target too large for 3D pipe (%d, %d)\n",
2456                      __FUNCTION__,
2457                      dst->pDrawable->width, dst->pDrawable->height));
2458                 return true;
2459         }
2460
2461         /* is the source picture only in cpu memory e.g. a shm pixmap? */
2462         return source_use_blt(sna, src);
2463 }
2464
2465 static void
2466 gen3_align_vertex(struct sna *sna,
2467                   const struct sna_composite_op *op)
2468 {
2469         if (op->floats_per_vertex != sna->render_state.gen3.last_floats_per_vertex) {
2470                 if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
2471                         gen3_vertex_finish(sna);
2472
2473                 DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
2474                      sna->render_state.gen3.last_floats_per_vertex,
2475                      op->floats_per_vertex,
2476                      sna->render.vertex_index,
2477                      (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
2478                 sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
2479                 sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
2480                 assert(sna->render.vertex_used < sna->render.vertex_size - op->floats_per_rect);
2481                 sna->render_state.gen3.last_floats_per_vertex = op->floats_per_vertex;
2482         }
2483 }
2484
2485 static bool
2486 gen3_composite_set_target(struct sna *sna,
2487                           struct sna_composite_op *op,
2488                           PicturePtr dst,
2489                           int x, int y, int w, int h)
2490 {
2491         BoxRec box;
2492
2493         op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
2494         op->dst.format = dst->format;
2495         op->dst.width = op->dst.pixmap->drawable.width;
2496         op->dst.height = op->dst.pixmap->drawable.height;
2497
2498         if (w && h) {
2499                 box.x1 = x;
2500                 box.y1 = y;
2501                 box.x2 = x + w;
2502                 box.y2 = y + h;
2503         } else
2504                 sna_render_picture_extents(dst, &box);
2505
2506         op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
2507                                           PREFER_GPU | FORCE_GPU | RENDER_GPU,
2508                                           &box, &op->damage);
2509         if (op->dst.bo == NULL)
2510                 return false;
2511
2512         /* For single-stream mode there should be no minimum alignment
2513          * required, except that the width must be at least 2 elements.
2514          */
2515         if (op->dst.bo->pitch < 2*op->dst.pixmap->drawable.bitsPerPixel) {
2516                 struct sna_pixmap *priv;
2517
2518                 priv = sna_pixmap_move_to_gpu (op->dst.pixmap,
2519                                                MOVE_READ | MOVE_WRITE);
2520                 if (priv == NULL || priv->pinned)
2521                         return false;
2522
2523                 if (priv->gpu_bo->pitch < 2*op->dst.pixmap->drawable.bitsPerPixel) {
2524                         struct kgem_bo *bo;
2525
2526                         bo = kgem_replace_bo(&sna->kgem, priv->gpu_bo,
2527                                              op->dst.width, op->dst.height,
2528                                              2*op->dst.pixmap->drawable.bitsPerPixel,
2529                                              op->dst.pixmap->drawable.bitsPerPixel);
2530                         if (bo == NULL)
2531                                 return false;
2532
2533                         kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
2534                         priv->gpu_bo = bo;
2535                 }
2536
2537                 op->dst.bo = priv->gpu_bo;
2538                 op->damage = &priv->gpu_damage;
2539                 if (sna_damage_is_all(op->damage,
2540                                       op->dst.width, op->dst.height))
2541                         op->damage = NULL;
2542         }
2543
2544         get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
2545                             &op->dst.x, &op->dst.y);
2546
2547         DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
2548              __FUNCTION__,
2549              op->dst.pixmap, (int)op->dst.format,
2550              op->dst.width, op->dst.height,
2551              op->dst.bo->pitch,
2552              op->dst.x, op->dst.y,
2553              op->damage ? *op->damage : (void *)-1));
2554
2555         assert(op->dst.bo->proxy == NULL);
2556         return true;
2557 }
2558
2559 static inline uint8_t
2560 mul_8_8(uint8_t a, uint8_t b)
2561 {
2562     uint16_t t = a * (uint16_t)b + 0x7f;
2563     return ((t >> 8) + t) >> 8;
2564 }
2565
2566 static inline uint32_t multa(uint32_t s, uint32_t m, int shift)
2567 {
2568         return mul_8_8((s >> shift) & 0xff, m >> 24) << shift;
2569 }
2570
2571 static inline bool is_constant_ps(uint32_t type)
2572 {
2573         switch (type) {
2574         case SHADER_NONE: /* be warned! */
2575         case SHADER_ZERO:
2576         case SHADER_BLACK:
2577         case SHADER_WHITE:
2578         case SHADER_CONSTANT:
2579                 return true;
2580         default:
2581                 return false;
2582         }
2583 }
2584
2585 static bool
2586 has_alphamap(PicturePtr p)
2587 {
2588         return p->alphaMap != NULL;
2589 }
2590
2591 static bool
2592 untransformed(PicturePtr p)
2593 {
2594         return !p->transform || pixman_transform_is_int_translate(p->transform);
2595 }
2596
2597 static bool
2598 need_upload(PicturePtr p)
2599 {
2600         return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
2601 }
2602
2603 static bool
2604 source_is_busy(PixmapPtr pixmap)
2605 {
2606         struct sna_pixmap *priv = sna_pixmap(pixmap);
2607         if (priv == NULL)
2608                 return false;
2609
2610         if (priv->clear)
2611                 return false;
2612
2613         if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
2614                 return true;
2615
2616         if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
2617                 return true;
2618
2619         return priv->gpu_damage && !priv->cpu_damage;
2620 }
2621
2622 static bool
2623 source_fallback(PicturePtr p, PixmapPtr pixmap)
2624 {
2625         if (sna_picture_is_solid(p, NULL))
2626                 return false;
2627
2628         if (!gen3_check_xformat(p) || !gen3_check_repeat(p))
2629                 return true;
2630
2631         if (pixmap && source_is_busy(pixmap))
2632                 return false;
2633
2634         return has_alphamap(p) || !gen3_check_filter(p) || need_upload(p);
2635 }
2636
2637 static bool
2638 gen3_composite_fallback(struct sna *sna,
2639                         uint8_t op,
2640                         PicturePtr src,
2641                         PicturePtr mask,
2642                         PicturePtr dst)
2643 {
2644         struct sna_pixmap *priv;
2645         PixmapPtr src_pixmap;
2646         PixmapPtr mask_pixmap;
2647         PixmapPtr dst_pixmap;
2648         bool src_fallback, mask_fallback;
2649
2650         if (!gen3_check_dst_format(dst->format)) {
2651                 DBG(("%s: unknown destination format: %d\n",
2652                      __FUNCTION__, dst->format));
2653                 return true;
2654         }
2655
2656         dst_pixmap = get_drawable_pixmap(dst->pDrawable);
2657
2658         src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
2659         src_fallback = source_fallback(src, src_pixmap);
2660
2661         if (mask) {
2662                 mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
2663                 mask_fallback = source_fallback(mask, mask_pixmap);
2664         } else {
2665                 mask_pixmap = NULL;
2666                 mask_fallback = false;
2667         }
2668
2669         /* If we are using the destination as a source and need to
2670          * readback in order to upload the source, do it all
2671          * on the cpu.
2672          */
2673         if (src_pixmap == dst_pixmap && src_fallback) {
2674                 DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
2675                 return true;
2676         }
2677         if (mask_pixmap == dst_pixmap && mask_fallback) {
2678                 DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
2679                 return true;
2680         }
2681
2682         if (mask &&
2683             mask->componentAlpha && PICT_FORMAT_RGB(mask->format) &&
2684             op != PictOpOver &&
2685             gen3_blend_op[op].src_blend != BLENDFACT_ZERO)
2686         {
2687                 DBG(("%s: component-alpha mask with op=%d, should fallback\n",
2688                      __FUNCTION__, op));
2689                 return true;
2690         }
2691
2692         /* If anything is on the GPU, push everything out to the GPU */
2693         priv = sna_pixmap(dst_pixmap);
2694         if (priv && priv->gpu_damage && !priv->clear) {
2695                 DBG(("%s: dst is already on the GPU, try to use GPU\n",
2696                      __FUNCTION__));
2697                 return false;
2698         }
2699
2700         if (src_pixmap && !src_fallback) {
2701                 DBG(("%s: src is already on the GPU, try to use GPU\n",
2702                      __FUNCTION__));
2703                 return false;
2704         }
2705         if (mask_pixmap && !mask_fallback) {
2706                 DBG(("%s: mask is already on the GPU, try to use GPU\n",
2707                      __FUNCTION__));
2708                 return false;
2709         }
2710
2711         /* However if the dst is not on the GPU and we need to
2712          * render one of the sources using the CPU, we may
2713          * as well do the entire operation in place onthe CPU.
2714          */
2715         if (src_fallback) {
2716                 DBG(("%s: dst is on the CPU and src will fallback\n",
2717                      __FUNCTION__));
2718                 return true;
2719         }
2720
2721         if (mask && mask_fallback) {
2722                 DBG(("%s: dst is on the CPU and mask will fallback\n",
2723                      __FUNCTION__));
2724                 return true;
2725         }
2726
2727         if (too_large(dst_pixmap->drawable.width,
2728                       dst_pixmap->drawable.height) &&
2729             (priv == NULL || DAMAGE_IS_ALL(priv->cpu_damage))) {
2730                 DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
2731                 return true;
2732         }
2733
2734         DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
2735              __FUNCTION__));
2736         return false;
2737 }
2738
2739 static int
2740 reuse_source(struct sna *sna,
2741              PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
2742              PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
2743 {
2744         if (src_x != msk_x || src_y != msk_y)
2745                 return false;
2746
2747         if (mask == src) {
2748                 *mc = *sc;
2749                 if (mc->bo)
2750                         kgem_bo_reference(mc->bo);
2751                 return true;
2752         }
2753
2754         if ((src->pDrawable == NULL || mask->pDrawable != src->pDrawable))
2755                 return false;
2756
2757         if (sc->is_solid)
2758                 return false;
2759
2760         DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
2761
2762         if (!sna_transform_equal(src->transform, mask->transform))
2763                 return false;
2764
2765         if (!sna_picture_alphamap_equal(src, mask))
2766                 return false;
2767
2768         if (!gen3_check_repeat(mask))
2769                 return false;
2770
2771         if (!gen3_check_filter(mask))
2772                 return false;
2773
2774         if (!gen3_check_format(mask))
2775                 return false;
2776
2777         DBG(("%s: reusing source channel for mask with a twist\n",
2778              __FUNCTION__));
2779
2780         *mc = *sc;
2781         mc->repeat = gen3_texture_repeat(mask->repeat ? mask->repeatType : RepeatNone);
2782         mc->filter = gen3_filter(mask->filter);
2783         mc->pict_format = mask->format;
2784         gen3_composite_channel_set_format(mc, mask->format);
2785         if (mc->bo)
2786                 kgem_bo_reference(mc->bo);
2787         return true;
2788 }
2789
2790 static bool
2791 gen3_render_composite(struct sna *sna,
2792                       uint8_t op,
2793                       PicturePtr src,
2794                       PicturePtr mask,
2795                       PicturePtr dst,
2796                       int16_t src_x,  int16_t src_y,
2797                       int16_t mask_x, int16_t mask_y,
2798                       int16_t dst_x,  int16_t dst_y,
2799                       int16_t width,  int16_t height,
2800                       struct sna_composite_op *tmp)
2801 {
2802         DBG(("%s()\n", __FUNCTION__));
2803
2804         if (op >= ARRAY_SIZE(gen3_blend_op)) {
2805                 DBG(("%s: fallback due to unhandled blend op: %d\n",
2806                      __FUNCTION__, op));
2807                 return false;
2808         }
2809
2810         /* Try to use the BLT engine unless it implies a
2811          * 3D -> 2D context switch.
2812          */
2813         if (mask == NULL &&
2814             try_blt(sna, dst, src, width, height) &&
2815             sna_blt_composite(sna,
2816                               op, src, dst,
2817                               src_x, src_y,
2818                               dst_x, dst_y,
2819                               width, height,
2820                               tmp, false))
2821                 return true;
2822
2823         if (gen3_composite_fallback(sna, op, src, mask, dst))
2824                 return false;
2825
2826         if (need_tiling(sna, width, height))
2827                 return sna_tiling_composite(op, src, mask, dst,
2828                                             src_x,  src_y,
2829                                             mask_x, mask_y,
2830                                             dst_x,  dst_y,
2831                                             width,  height,
2832                                             tmp);
2833
2834         if (!gen3_composite_set_target(sna, tmp, dst,
2835                                        dst_x, dst_y, width, height)) {
2836                 DBG(("%s: unable to set render target\n",
2837                      __FUNCTION__));
2838                 return false;
2839         }
2840
2841         tmp->op = op;
2842         tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format);
2843         if (too_large(tmp->dst.width, tmp->dst.height) ||
2844             !gen3_check_pitch_3d(tmp->dst.bo)) {
2845                 if (!sna_render_composite_redirect(sna, tmp,
2846                                                    dst_x, dst_y, width, height))
2847                         return false;
2848         }
2849
2850         tmp->u.gen3.num_constants = 0;
2851         tmp->src.u.gen3.type = SHADER_TEXTURE;
2852         tmp->src.is_affine = true;
2853         DBG(("%s: preparing source\n", __FUNCTION__));
2854         switch (gen3_composite_picture(sna, src, tmp, &tmp->src,
2855                                        src_x, src_y,
2856                                        width, height,
2857                                        dst_x, dst_y,
2858                                        dst->polyMode == PolyModePrecise)) {
2859         case -1:
2860                 goto cleanup_dst;
2861         case 0:
2862                 tmp->src.u.gen3.type = SHADER_ZERO;
2863                 break;
2864         case 1:
2865                 if (mask == NULL && tmp->src.bo &&
2866                     sna_blt_composite__convert(sna,
2867                                                dst_x, dst_y, width, height,
2868                                                tmp))
2869                         return true;
2870
2871                 gen3_composite_channel_convert(&tmp->src);
2872                 break;
2873         }
2874         DBG(("%s: source type=%d\n", __FUNCTION__, tmp->src.u.gen3.type));
2875
2876         tmp->mask.u.gen3.type = SHADER_NONE;
2877         tmp->mask.is_affine = true;
2878         tmp->need_magic_ca_pass = false;
2879         tmp->has_component_alpha = false;
2880         if (mask && tmp->src.u.gen3.type != SHADER_ZERO) {
2881                 if (!reuse_source(sna,
2882                                   src, &tmp->src, src_x, src_y,
2883                                   mask, &tmp->mask, mask_x, mask_y)) {
2884                         tmp->mask.u.gen3.type = SHADER_TEXTURE;
2885                         DBG(("%s: preparing mask\n", __FUNCTION__));
2886                         switch (gen3_composite_picture(sna, mask, tmp, &tmp->mask,
2887                                                        mask_x, mask_y,
2888                                                        width,  height,
2889                                                        dst_x,  dst_y,
2890                                                        dst->polyMode == PolyModePrecise)) {
2891                         case -1:
2892                                 goto cleanup_src;
2893                         case 0:
2894                                 tmp->mask.u.gen3.type = SHADER_ZERO;
2895                                 break;
2896                         case 1:
2897                                 gen3_composite_channel_convert(&tmp->mask);
2898                                 break;
2899                         }
2900                 }
2901                 DBG(("%s: mask type=%d\n", __FUNCTION__, tmp->mask.u.gen3.type));
2902                 if (tmp->mask.u.gen3.type == SHADER_ZERO) {
2903                         if (tmp->src.bo) {
2904                                 kgem_bo_destroy(&sna->kgem,
2905                                                 tmp->src.bo);
2906                                 tmp->src.bo = NULL;
2907                         }
2908                         tmp->src.u.gen3.type = SHADER_ZERO;
2909                         tmp->mask.u.gen3.type = SHADER_NONE;
2910                 }
2911
2912                 if (tmp->mask.u.gen3.type != SHADER_NONE) {
2913                         if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
2914                                 /* Check if it's component alpha that relies on a source alpha
2915                                  * and on the source value.  We can only get one of those
2916                                  * into the single source value that we get to blend with.
2917                                  */
2918                                 DBG(("%s: component-alpha mask: %d\n",
2919                                      __FUNCTION__, tmp->mask.u.gen3.type));
2920                                 tmp->has_component_alpha = true;
2921                                 if (tmp->mask.u.gen3.type == SHADER_WHITE) {
2922                                         tmp->mask.u.gen3.type = SHADER_NONE;
2923                                         tmp->has_component_alpha = false;
2924                                 } else if (gen3_blend_op[op].src_alpha &&
2925                                            (gen3_blend_op[op].src_blend != BLENDFACT_ZERO)) {
2926                                         if (op != PictOpOver)
2927                                                 goto cleanup_mask;
2928
2929                                         tmp->need_magic_ca_pass = true;
2930                                         tmp->op = PictOpOutReverse;
2931                                         sna->render.vertex_start = sna->render.vertex_index;
2932                                 }
2933                         } else {
2934                                 if (tmp->mask.is_opaque) {
2935                                         tmp->mask.u.gen3.type = SHADER_NONE;
2936                                 } else if (is_constant_ps(tmp->src.u.gen3.type) &&
2937                                            is_constant_ps(tmp->mask.u.gen3.type)) {
2938                                         uint32_t v;
2939
2940                                         v = multa(tmp->src.u.gen3.mode,
2941                                                   tmp->mask.u.gen3.mode,
2942                                                   24);
2943                                         v |= multa(tmp->src.u.gen3.mode,
2944                                                    tmp->mask.u.gen3.mode,
2945                                                    16);
2946                                         v |= multa(tmp->src.u.gen3.mode,
2947                                                    tmp->mask.u.gen3.mode,
2948                                                    8);
2949                                         v |= multa(tmp->src.u.gen3.mode,
2950                                                    tmp->mask.u.gen3.mode,
2951                                                    0);
2952
2953                                         DBG(("%s: combining constant source/mask: %x x %x -> %x\n",
2954                                              __FUNCTION__,
2955                                              tmp->src.u.gen3.mode,
2956                                              tmp->mask.u.gen3.mode,
2957                                              v));
2958
2959                                         tmp->src.u.gen3.type = SHADER_CONSTANT;
2960                                         tmp->src.u.gen3.mode = v;
2961                                         tmp->src.is_opaque = false;
2962
2963                                         tmp->mask.u.gen3.type = SHADER_NONE;
2964                                 }
2965                         }
2966                 }
2967         }
2968         DBG(("%s: final src/mask type=%d/%d, affine=%d/%d\n", __FUNCTION__,
2969              tmp->src.u.gen3.type, tmp->mask.u.gen3.type,
2970              tmp->src.is_affine, tmp->mask.is_affine));
2971
2972         tmp->prim_emit = gen3_emit_composite_primitive;
2973         if (is_constant_ps(tmp->mask.u.gen3.type)) {
2974                 switch (tmp->src.u.gen3.type) {
2975                 case SHADER_NONE:
2976                 case SHADER_ZERO:
2977                 case SHADER_BLACK:
2978                 case SHADER_WHITE:
2979                 case SHADER_CONSTANT:
2980                         tmp->prim_emit = gen3_emit_composite_primitive_constant;
2981                         break;
2982                 case SHADER_LINEAR:
2983                 case SHADER_RADIAL:
2984                         if (tmp->src.transform == NULL)
2985                                 tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient;
2986                         else if (tmp->src.is_affine)
2987                                 tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient;
2988                         break;
2989                 case SHADER_TEXTURE:
2990                         if (tmp->src.transform == NULL) {
2991                                 if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0)
2992                                         tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset;
2993                                 else
2994                                         tmp->prim_emit = gen3_emit_composite_primitive_identity_source;
2995                         } else if (tmp->src.is_affine)
2996                                 tmp->prim_emit = gen3_emit_composite_primitive_affine_source;
2997                         break;
2998                 }
2999         } else if (tmp->mask.u.gen3.type == SHADER_TEXTURE) {
3000                 if (tmp->mask.transform == NULL) {
3001                         if (is_constant_ps(tmp->src.u.gen3.type)) {
3002                                 if ((tmp->mask.offset[0]|tmp->mask.offset[1]|tmp->dst.x|tmp->dst.y) == 0)
3003                                         tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset;
3004                                 else
3005                                         tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask;
3006                         } else if (tmp->src.transform == NULL)
3007                                 tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask;
3008                         else if (tmp->src.is_affine)
3009                                 tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask;
3010                 }
3011         }
3012
3013         tmp->floats_per_vertex = 2;
3014         if (!is_constant_ps(tmp->src.u.gen3.type))
3015                 tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4;
3016         if (!is_constant_ps(tmp->mask.u.gen3.type))
3017                 tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4;
3018         DBG(("%s: floats_per_vertex = 2 + %d + %d = %d [specialised emitter? %d]\n", __FUNCTION__,
3019              !is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0,
3020              !is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0,
3021              tmp->floats_per_vertex,
3022              tmp->prim_emit != gen3_emit_composite_primitive));
3023         tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
3024
3025         tmp->blt   = gen3_render_composite_blt;
3026         tmp->box   = gen3_render_composite_box;
3027         tmp->boxes = gen3_render_composite_boxes;
3028         tmp->done  = gen3_render_composite_done;
3029
3030         if (!kgem_check_bo(&sna->kgem,
3031                            tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
3032                            NULL)) {
3033                 kgem_submit(&sna->kgem);
3034                 if (!kgem_check_bo(&sna->kgem,
3035                                    tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
3036                                    NULL))
3037                         goto cleanup_mask;
3038         }
3039
3040         gen3_emit_composite_state(sna, tmp);
3041         gen3_align_vertex(sna, tmp);
3042         return true;
3043
3044 cleanup_mask:
3045         if (tmp->mask.bo)
3046                 kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
3047 cleanup_src:
3048         if (tmp->src.bo)
3049                 kgem_bo_destroy(&sna->kgem, tmp->src.bo);
3050 cleanup_dst:
3051         if (tmp->redirect.real_bo)
3052                 kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
3053         return false;
3054 }
3055
3056 static void
3057 gen3_emit_composite_spans_vertex(struct sna *sna,
3058                                  const struct sna_composite_spans_op *op,
3059                                  int16_t x, int16_t y,
3060                                  float opacity)
3061 {
3062         gen3_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y);
3063         gen3_emit_composite_texcoord(sna, &op->base.src, x, y);
3064         OUT_VERTEX(opacity);
3065 }
3066
3067 fastcall static void
3068 gen3_emit_composite_spans_primitive_zero(struct sna *sna,
3069                                          const struct sna_composite_spans_op *op,
3070                                          const BoxRec *box,
3071                                          float opacity)
3072 {
3073         float *v = sna->render.vertices + sna->render.vertex_used;
3074         sna->render.vertex_used += 6;
3075
3076         v[0] = op->base.dst.x + box->x2;
3077         v[1] = op->base.dst.y + box->y2;
3078
3079         v[2] = op->base.dst.x + box->x1;
3080         v[3] = v[1];
3081
3082         v[4] = v[2];
3083         v[5] = op->base.dst.x + box->y1;
3084 }
3085
3086 fastcall static void
3087 gen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna,
3088                                                    const struct sna_composite_spans_op *op,
3089                                                    const BoxRec *box,
3090                                                    float opacity)
3091 {
3092         float *v = sna->render.vertices + sna->render.vertex_used;
3093         sna->render.vertex_used += 6;
3094
3095         v[0] = box->x2;
3096         v[3] = v[1] = box->y2;
3097         v[4] = v[2] = box->x1;
3098         v[5] = box->y1;
3099 }
3100
3101 fastcall static void
3102 gen3_emit_composite_spans_primitive_constant(struct sna *sna,
3103                                              const struct sna_composite_spans_op *op,
3104                                              const BoxRec *box,
3105                                              float opacity)
3106 {
3107         float *v = sna->render.vertices + sna->render.vertex_used;
3108         sna->render.vertex_used += 9;
3109
3110         v[0] = op->base.dst.x + box->x2;
3111         v[6] = v[3] = op->base.dst.x + box->x1;
3112         v[4] = v[1] = op->base.dst.y + box->y2;
3113         v[7] = op->base.dst.y + box->y1;
3114         v[8] = v[5] = v[2] = opacity;
3115 }
3116
3117 fastcall static void
3118 gen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna,
3119                                                        const struct sna_composite_spans_op *op,
3120                                                        const BoxRec *box,
3121                                                        float opacity)
3122 {
3123         float *v = sna->render.vertices + sna->render.vertex_used;
3124         sna->render.vertex_used += 9;
3125
3126         v[0] = box->x2;
3127         v[6] = v[3] = box->x1;
3128         v[4] = v[1] = box->y2;
3129         v[7] = box->y1;
3130         v[8] = v[5] = v[2] = opacity;
3131 }
3132
3133 fastcall static void
3134 gen3_emit_composite_spans_primitive_identity_source(struct sna *sna,
3135                                                     const struct sna_composite_spans_op *op,
3136                                                     const BoxRec *box,
3137                                                     float opacity)
3138 {
3139         float *v = sna->render.vertices + sna->render.vertex_used;
3140         sna->render.vertex_used += 15;
3141
3142         v[0] = op->base.dst.x + box->x2;
3143         v[1] = op->base.dst.y + box->y2;
3144         v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
3145         v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
3146         v[4] = opacity;
3147
3148         v[5] = op->base.dst.x + box->x1;
3149         v[6] = v[1];
3150         v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
3151         v[8] = v[3];
3152         v[9] = opacity;
3153
3154         v[10] = v[5];
3155         v[11] = op->base.dst.y + box->y1;
3156         v[12] = v[7];
3157         v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
3158         v[14] = opacity;
3159 }
3160
3161 fastcall static void
3162 gen3_emit_composite_spans_primitive_affine_source(struct sna *sna,
3163                                                   const struct sna_composite_spans_op *op,
3164                                                   const BoxRec *box,
3165                                                   float opacity)
3166 {
3167         PictTransform *transform = op->base.src.transform;
3168         float x, y, *v;
3169
3170         v = sna->render.vertices + sna->render.vertex_used;
3171         sna->render.vertex_used += 15;
3172
3173         v[0]  = op->base.dst.x + box->x2;
3174         v[6]  = v[1] = op->base.dst.y + box->y2;
3175         v[10] = v[5] = op->base.dst.x + box->x1;
3176         v[11] = op->base.dst.y + box->y1;
3177         v[4]  = opacity;
3178         v[9]  = opacity;
3179         v[14] = opacity;
3180
3181         _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x2,
3182                                          (int)op->base.src.offset[1] + box->y2,
3183                                          transform,
3184                                          &x, &y);
3185         v[2] = x * op->base.src.scale[0];
3186         v[3] = y * op->base.src.scale[1];
3187
3188         _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1,
3189                                          (int)op->base.src.offset[1] + box->y2,
3190                                          transform,
3191                                          &x, &y);
3192         v[7] = x * op->base.src.scale[0];
3193         v[8] = y * op->base.src.scale[1];
3194
3195         _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1,
3196                                          (int)op->base.src.offset[1] + box->y1,
3197                                          transform,
3198                                          &x, &y);
3199         v[12] = x * op->base.src.scale[0];
3200         v[13] = y * op->base.src.scale[1];
3201 }
3202
3203 fastcall static void
3204 gen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna,
3205                                                       const struct sna_composite_spans_op *op,
3206                                                       const BoxRec *box,
3207                                                       float opacity)
3208 {
3209         float *v = sna->render.vertices + sna->render.vertex_used;
3210         sna->render.vertex_used += 15;
3211
3212         v[0] = op->base.dst.x + box->x2;
3213         v[1] = op->base.dst.y + box->y2;
3214         v[2] = op->base.src.offset[0] + box->x2;
3215         v[3] = op->base.src.offset[1] + box->y2;
3216         v[4] = opacity;
3217
3218         v[5] = op->base.dst.x + box->x1;
3219         v[6] = v[1];
3220         v[7] = op->base.src.offset[0] + box->x1;
3221         v[8] = v[3];
3222         v[9] = opacity;
3223
3224         v[10] = v[5];
3225         v[11] = op->base.dst.y + box->y1;
3226         v[12] = v[7];
3227         v[13] = op->base.src.offset[1] + box->y1;
3228         v[14] = opacity;
3229 }
3230
3231 fastcall static void
3232 gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna,
3233                                                     const struct sna_composite_spans_op *op,
3234                                                     const BoxRec *box,
3235                                                     float opacity)
3236 {
3237         PictTransform *transform = op->base.src.transform;
3238         float *v = sna->render.vertices + sna->render.vertex_used;
3239         sna->render.vertex_used += 15;
3240
3241         v[0] = op->base.dst.x + box->x2;
3242         v[1] = op->base.dst.y + box->y2;
3243         _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x2,
3244                                          (int)op->base.src.offset[1] + box->y2,
3245                                          transform,
3246                                          &v[2], &v[3]);
3247         v[4] = opacity;
3248
3249         v[5] = op->base.dst.x + box->x1;
3250         v[6] = v[1];
3251         _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1,
3252                                          (int)op->base.src.offset[1] + box->y2,
3253                                          transform,
3254                                          &v[7], &v[8]);
3255         v[9] = opacity;
3256
3257         v[10] = v[5];
3258         v[11] = op->base.dst.y + box->y1;
3259         _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1,
3260                                          (int)op->base.src.offset[1] + box->y1,
3261                                          transform,
3262                                          &v[12], &v[13]);
3263         v[14] = opacity;
3264 }
3265
3266 fastcall static void
3267 gen3_emit_composite_spans_primitive(struct sna *sna,
3268                                     const struct sna_composite_spans_op *op,
3269                                     const BoxRec *box,
3270                                     float opacity)
3271 {
3272         gen3_emit_composite_spans_vertex(sna, op,
3273                                          box->x2, box->y2,
3274                                          opacity);
3275         gen3_emit_composite_spans_vertex(sna, op,
3276                                          box->x1, box->y2,
3277                                          opacity);
3278         gen3_emit_composite_spans_vertex(sna, op,
3279                                          box->x1, box->y1,
3280                                          opacity);
3281 }
3282
3283 fastcall static void
3284 gen3_render_composite_spans_constant_box(struct sna *sna,
3285                                          const struct sna_composite_spans_op *op,
3286                                          const BoxRec *box, float opacity)
3287 {
3288         float *v;
3289         DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
3290              __FUNCTION__,
3291              op->base.src.offset[0], op->base.src.offset[1],
3292              opacity,
3293              op->base.dst.x, op->base.dst.y,
3294              box->x1, box->y1,
3295              box->x2 - box->x1,
3296              box->y2 - box->y1));
3297
3298         gen3_get_rectangles(sna, &op->base, 1);
3299
3300         v = sna->render.vertices + sna->render.vertex_used;
3301         sna->render.vertex_used += 9;
3302
3303         v[0] = box->x2;
3304         v[6] = v[3] = box->x1;
3305         v[4] = v[1] = box->y2;
3306         v[7] = box->y1;
3307         v[8] = v[5] = v[2] = opacity;
3308 }
3309
3310 fastcall static void
3311 gen3_render_composite_spans_box(struct sna *sna,
3312                                 const struct sna_composite_spans_op *op,
3313                                 const BoxRec *box, float opacity)
3314 {
3315         DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
3316              __FUNCTION__,
3317              op->base.src.offset[0], op->base.src.offset[1],
3318              opacity,
3319              op->base.dst.x, op->base.dst.y,
3320              box->x1, box->y1,
3321              box->x2 - box->x1,
3322              box->y2 - box->y1));
3323
3324         gen3_get_rectangles(sna, &op->base, 1);
3325         op->prim_emit(sna, op, box, opacity);
3326 }
3327
3328 static void
3329 gen3_render_composite_spans_boxes(struct sna *sna,
3330                                   const struct sna_composite_spans_op *op,
3331                                   const BoxRec *box, int nbox,
3332                                   float opacity)
3333 {
3334         DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
3335              __FUNCTION__, nbox,
3336              op->base.src.offset[0], op->base.src.offset[1],
3337              opacity,
3338              op->base.dst.x, op->base.dst.y));
3339
3340         do {
3341                 int nbox_this_time;
3342
3343                 nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
3344                 nbox -= nbox_this_time;
3345
3346                 do {
3347                         DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
3348                              box->x1, box->y1,
3349                              box->x2 - box->x1,
3350                              box->y2 - box->y1));
3351
3352                         op->prim_emit(sna, op, box++, opacity);
3353                 } while (--nbox_this_time);
3354         } while (nbox);
3355 }
3356
3357 fastcall static void
3358 gen3_render_composite_spans_done(struct sna *sna,
3359                                  const struct sna_composite_spans_op *op)
3360 {
3361         if (sna->render_state.gen3.vertex_offset)
3362                 gen3_vertex_flush(sna);
3363
3364         DBG(("%s()\n", __FUNCTION__));
3365
3366         if (op->base.src.bo)
3367                 kgem_bo_destroy(&sna->kgem, op->base.src.bo);
3368
3369         sna_render_composite_redirect_done(sna, &op->base);
3370 }
3371
3372 static bool
3373 gen3_check_composite_spans(struct sna *sna,
3374                            uint8_t op, PicturePtr src, PicturePtr dst,
3375                            int16_t width, int16_t height, unsigned flags)
3376 {
3377         if (op >= ARRAY_SIZE(gen3_blend_op))
3378                 return false;
3379
3380         if (gen3_composite_fallback(sna, op, src, NULL, dst))
3381                 return false;
3382
3383         if (need_tiling(sna, width, height)) {
3384                 if (!is_gpu(dst->pDrawable)) {
3385                         DBG(("%s: fallback, tiled operation not on GPU\n",
3386                              __FUNCTION__));
3387                         return false;
3388                 }
3389         }
3390
3391         return true;
3392 }
3393
3394 static bool
3395 gen3_render_composite_spans(struct sna *sna,
3396                             uint8_t op,
3397                             PicturePtr src,
3398                             PicturePtr dst,
3399                             int16_t src_x,  int16_t src_y,
3400                             int16_t dst_x,  int16_t dst_y,
3401                             int16_t width,  int16_t height,
3402                             unsigned flags,
3403                             struct sna_composite_spans_op *tmp)
3404 {
3405         bool no_offset;
3406
3407         DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__,
3408              src_x, src_y, dst_x, dst_y, width, height));
3409
3410         assert(gen3_check_composite_spans(sna, op, src, dst, width, height, flags));
3411
3412         if (need_tiling(sna, width, height)) {
3413                 DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
3414                      __FUNCTION__, width, height));
3415                 return sna_tiling_composite_spans(op, src, dst,
3416                                                   src_x, src_y, dst_x, dst_y,
3417                                                   width, height, flags, tmp);
3418         }
3419
3420         if (!gen3_composite_set_target(sna, &tmp->base, dst,
3421                                        dst_x, dst_y, width, height)) {
3422                 DBG(("%s: unable to set render target\n",
3423                      __FUNCTION__));
3424                 return false;
3425         }
3426
3427         tmp->base.op = op;
3428         tmp->base.rb_reversed = gen3_dst_rb_reversed(tmp->base.dst.format);
3429         if (too_large(tmp->base.dst.width, tmp->base.dst.height) ||
3430             !gen3_check_pitch_3d(tmp->base.dst.bo)) {
3431                 if (!sna_render_composite_redirect(sna, &tmp->base,
3432                                                    dst_x, dst_y, width, height))
3433                         return false;
3434         }
3435
3436         tmp->base.src.u.gen3.type = SHADER_TEXTURE;
3437         tmp->base.src.is_affine = true;
3438         DBG(("%s: preparing source\n", __FUNCTION__));
3439         switch (gen3_composite_picture(sna, src, &tmp->base, &tmp->base.src,
3440                                        src_x, src_y,
3441                                        width, height,
3442                                        dst_x, dst_y,
3443                                        dst->polyMode == PolyModePrecise)) {
3444         case -1:
3445                 goto cleanup_dst;
3446         case 0:
3447                 tmp->base.src.u.gen3.type = SHADER_ZERO;
3448                 break;
3449         case 1:
3450                 gen3_composite_channel_convert(&tmp->base.src);
3451                 break;
3452         }
3453         DBG(("%s: source type=%d\n", __FUNCTION__, tmp->base.src.u.gen3.type));
3454
3455         if (tmp->base.src.u.gen3.type != SHADER_ZERO)
3456                 tmp->base.mask.u.gen3.type = SHADER_OPACITY;
3457
3458         no_offset = tmp->base.dst.x == 0 && tmp->base.dst.y == 0;
3459         tmp->box   = gen3_render_composite_spans_box;
3460         tmp->boxes = gen3_render_composite_spans_boxes;
3461         tmp->done  = gen3_render_composite_spans_done;
3462         tmp->prim_emit = gen3_emit_composite_spans_primitive;
3463         switch (tmp->base.src.u.gen3.type) {
3464         case SHADER_NONE:
3465                 assert(0);
3466         case SHADER_ZERO:
3467                 tmp->prim_emit = no_offset ? gen3_emit_composite_spans_primitive_zero_no_offset : gen3_emit_composite_spans_primitive_zero;
3468                 break;
3469         case SHADER_BLACK:
3470         case SHADER_WHITE:
3471         case SHADER_CONSTANT:
3472                 if (no_offset) {
3473                         tmp->box = gen3_render_composite_spans_constant_box;
3474                         tmp->prim_emit = gen3_emit_composite_spans_primitive_constant_no_offset;
3475                 } else
3476                         tmp->prim_emit = gen3_emit_composite_spans_primitive_constant;
3477                 break;
3478         case SHADER_LINEAR:
3479         case SHADER_RADIAL:
3480                 if (tmp->base.src.transform == NULL)
3481                         tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient;
3482                 else if (tmp->base.src.is_affine)
3483                         tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient;
3484                 break;
3485         case SHADER_TEXTURE:
3486                 if (tmp->base.src.transform == NULL)
3487                         tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source;
3488                 else if (tmp->base.src.is_affine)
3489                         tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source;
3490                 break;
3491         }
3492
3493         tmp->base.mask.bo = NULL;
3494
3495         tmp->base.floats_per_vertex = 2;
3496         if (!is_constant_ps(tmp->base.src.u.gen3.type))
3497                 tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3;
3498         tmp->base.floats_per_vertex +=
3499                 tmp->base.mask.u.gen3.type == SHADER_OPACITY;
3500         tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex;
3501
3502         if (!kgem_check_bo(&sna->kgem,
3503                            tmp->base.dst.bo, tmp->base.src.bo,
3504                            NULL)) {
3505                 kgem_submit(&sna->kgem);
3506                 if (!kgem_check_bo(&sna->kgem,
3507                                    tmp->base.dst.bo, tmp->base.src.bo,
3508                                    NULL))
3509                         goto cleanup_src;
3510         }
3511
3512         gen3_emit_composite_state(sna, &tmp->base);
3513         gen3_align_vertex(sna, &tmp->base);
3514         return true;
3515
3516 cleanup_src:
3517         if (tmp->base.src.bo)
3518                 kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
3519 cleanup_dst:
3520         if (tmp->base.redirect.real_bo)
3521                 kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
3522         return false;
3523 }
3524
3525 static void
3526 gen3_emit_video_state(struct sna *sna,
3527                       struct sna_video *video,
3528                       struct sna_video_frame *frame,
3529                       PixmapPtr pixmap,
3530                       struct kgem_bo *dst_bo,
3531                       int width, int height)
3532 {
3533         struct gen3_render_state *state = &sna->render_state.gen3;
3534         uint32_t id, ms3, rewind;
3535
3536         gen3_emit_target(sna, dst_bo, width, height,
3537                          sna_format_for_depth(pixmap->drawable.depth));
3538
3539         /* XXX share with composite? Is it worth the effort? */
3540         if ((state->last_shader & (1<<31)) == 0) {
3541                 OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
3542                           I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) |
3543                           2);
3544                 OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
3545                 OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
3546                           S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
3547                           S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
3548                           S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
3549                           S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
3550                           S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
3551                           S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
3552                           S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
3553                 OUT_BATCH((2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
3554                           (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) |
3555                           S6_COLOR_WRITE_ENABLE);
3556
3557                 state->last_blend = 0;
3558                 state->floats_per_vertex = 4;
3559         }
3560
3561         if (!is_planar_fourcc(frame->id)) {
3562                 rewind = sna->kgem.nbatch;
3563                 OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4);
3564                 OUT_BATCH(0x0000001);   /* constant 0 */
3565                 /* constant 0: brightness/contrast */
3566                 OUT_BATCH_F(video->brightness / 128.0);
3567                 OUT_BATCH_F(video->contrast / 255.0);
3568                 OUT_BATCH_F(0.0);
3569                 OUT_BATCH_F(0.0);
3570                 if (state->last_constants &&
3571                     memcmp(&sna->kgem.batch[state->last_constants],
3572                            &sna->kgem.batch[rewind],
3573                            6*sizeof(uint32_t)) == 0)
3574                         sna->kgem.nbatch = rewind;
3575                 else
3576                         state->last_constants = rewind;
3577
3578                 rewind = sna->kgem.nbatch;
3579                 OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3);
3580                 OUT_BATCH(0x00000001);
3581                 OUT_BATCH(SS2_COLORSPACE_CONVERSION |
3582                           (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
3583                           (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
3584                 OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
3585                            SS3_TCX_ADDR_MODE_SHIFT) |
3586                           (TEXCOORDMODE_CLAMP_EDGE <<
3587                            SS3_TCY_ADDR_MODE_SHIFT) |
3588                           (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
3589                           SS3_NORMALIZED_COORDS);
3590                 OUT_BATCH(0x00000000);
3591                 if (state->last_sampler &&
3592                     memcmp(&sna->kgem.batch[state->last_sampler],
3593                            &sna->kgem.batch[rewind],
3594                            5*sizeof(uint32_t)) == 0)
3595                         sna->kgem.nbatch = rewind;
3596                 else
3597                         state->last_sampler = rewind;
3598
3599                 OUT_BATCH(_3DSTATE_MAP_STATE | 3);
3600                 OUT_BATCH(0x00000001);  /* texture map #1 */
3601                 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
3602                                          frame->bo,
3603                                          I915_GEM_DOMAIN_SAMPLER << 16,
3604                                          0));
3605
3606                 ms3 = MAPSURF_422;
3607                 switch (frame->id) {
3608                 case FOURCC_YUY2:
3609                         ms3 |= MT_422_YCRCB_NORMAL;
3610                         break;
3611                 case FOURCC_UYVY:
3612                         ms3 |= MT_422_YCRCB_SWAPY;
3613                         break;
3614                 }
3615                 ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT;
3616                 ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT;
3617                 OUT_BATCH(ms3);
3618                 OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
3619
3620                 id = 1<<31 | 1<<1 | !!video->brightness;
3621                 if (state->last_shader != id) {
3622                         state->last_shader = id;
3623                         id = sna->kgem.nbatch++;
3624
3625                         gen3_fs_dcl(FS_S0);
3626                         gen3_fs_dcl(FS_T0);
3627                         gen3_fs_texld(FS_OC, FS_S0, FS_T0);
3628                         if (video->brightness != 0) {
3629                                 gen3_fs_add(FS_OC,
3630                                             gen3_fs_operand_reg(FS_OC),
3631                                             gen3_fs_operand(FS_C0, X, X, X, ZERO));
3632                         }
3633
3634                         sna->kgem.batch[id] =
3635                                 _3DSTATE_PIXEL_SHADER_PROGRAM |
3636                                 (sna->kgem.nbatch - id - 2);
3637                 }
3638         } else {
3639                 /* For the planar formats, we set up three samplers --
3640                  * one for each plane, in a Y8 format.  Because I
3641                  * couldn't get the special PLANAR_TO_PACKED
3642                  * shader setup to work, I did the manual pixel shader:
3643                  *
3644                  * y' = y - .0625
3645                  * u' = u - .5
3646                  * v' = v - .5;
3647                  *
3648                  * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
3649                  * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
3650                  * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
3651                  *
3652                  * register assignment:
3653                  * r0 = (y',u',v',0)
3654                  * r1 = (y,y,y,y)
3655                  * r2 = (u,u,u,u)
3656                  * r3 = (v,v,v,v)
3657                  * OC = (r,g,b,1)
3658                  */
3659                 rewind = sna->kgem.nbatch;
3660                 OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2));
3661                 OUT_BATCH(0x000001f);   /* constants 0-4 */
3662                 /* constant 0: normalization offsets */
3663                 OUT_BATCH_F(-0.0625);
3664                 OUT_BATCH_F(-0.5);
3665                 OUT_BATCH_F(-0.5);
3666                 OUT_BATCH_F(0.0);
3667                 /* constant 1: r coefficients */
3668                 OUT_BATCH_F(1.1643);
3669                 OUT_BATCH_F(0.0);
3670                 OUT_BATCH_F(1.5958);
3671                 OUT_BATCH_F(0.0);
3672                 /* constant 2: g coefficients */
3673                 OUT_BATCH_F(1.1643);
3674                 OUT_BATCH_F(-0.39173);
3675                 OUT_BATCH_F(-0.81290);
3676                 OUT_BATCH_F(0.0);
3677                 /* constant 3: b coefficients */
3678                 OUT_BATCH_F(1.1643);
3679                 OUT_BATCH_F(2.017);
3680                 OUT_BATCH_F(0.0);
3681                 OUT_BATCH_F(0.0);
3682                 /* constant 4: brightness/contrast */
3683                 OUT_BATCH_F(video->brightness / 128.0);
3684                 OUT_BATCH_F(video->contrast / 255.0);
3685                 OUT_BATCH_F(0.0);
3686                 OUT_BATCH_F(0.0);
3687                 if (state->last_constants &&
3688                     memcmp(&sna->kgem.batch[state->last_constants],
3689                            &sna->kgem.batch[rewind],
3690                            22*sizeof(uint32_t)) == 0)
3691                         sna->kgem.nbatch = rewind;
3692                 else
3693                         state->last_constants = rewind;
3694
3695                 rewind = sna->kgem.nbatch;
3696                 OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9);
3697                 OUT_BATCH(0x00000007);
3698                 /* sampler 0 */
3699                 OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
3700                           (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
3701                 OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
3702                            SS3_TCX_ADDR_MODE_SHIFT) |
3703                           (TEXCOORDMODE_CLAMP_EDGE <<
3704                            SS3_TCY_ADDR_MODE_SHIFT) |
3705                           (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
3706                           SS3_NORMALIZED_COORDS);
3707                 OUT_BATCH(0x00000000);
3708                 /* sampler 1 */
3709                 OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
3710                           (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
3711                 OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
3712                            SS3_TCX_ADDR_MODE_SHIFT) |
3713                           (TEXCOORDMODE_CLAMP_EDGE <<
3714                            SS3_TCY_ADDR_MODE_SHIFT) |
3715                           (1 << SS3_TEXTUREMAP_INDEX_SHIFT) |
3716                           SS3_NORMALIZED_COORDS);
3717                 OUT_BATCH(0x00000000);
3718                 /* sampler 2 */
3719                 OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
3720                           (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
3721                 OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE <<
3722                            SS3_TCX_ADDR_MODE_SHIFT) |
3723                           (TEXCOORDMODE_CLAMP_EDGE <<
3724                            SS3_TCY_ADDR_MODE_SHIFT) |
3725                           (2 << SS3_TEXTUREMAP_INDEX_SHIFT) |
3726                           SS3_NORMALIZED_COORDS);
3727                 OUT_BATCH(0x00000000);
3728                 if (state->last_sampler &&
3729                     memcmp(&sna->kgem.batch[state->last_sampler],
3730                            &sna->kgem.batch[rewind],
3731                            11*sizeof(uint32_t)) == 0)
3732                         sna->kgem.nbatch = rewind;
3733                 else
3734                         state->last_sampler = rewind;
3735
3736                 OUT_BATCH(_3DSTATE_MAP_STATE | 9);
3737                 OUT_BATCH(0x00000007);
3738
3739                 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
3740                                          frame->bo,
3741                                          I915_GEM_DOMAIN_SAMPLER << 16,
3742                                          0));
3743
3744                 ms3 = MAPSURF_8BIT | MT_8BIT_I8;
3745                 ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT;
3746                 ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT;
3747                 OUT_BATCH(ms3);
3748                 /* check to see if Y has special pitch than normal
3749                  * double u/v pitch, e.g i915 XvMC hw requires at
3750                  * least 1K alignment, so Y pitch might
3751                  * be same as U/V's.*/
3752                 if (frame->pitch[1])
3753                         OUT_BATCH(((frame->pitch[1] / 4) - 1) << MS4_PITCH_SHIFT);
3754                 else
3755                         OUT_BATCH(((frame->pitch[0] * 2 / 4) - 1) << MS4_PITCH_SHIFT);
3756
3757                 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
3758                                          frame->bo,
3759                                          I915_GEM_DOMAIN_SAMPLER << 16,
3760                                          frame->UBufOffset));
3761
3762                 ms3 = MAPSURF_8BIT | MT_8BIT_I8;
3763                 ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT;
3764                 ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT;
3765                 OUT_BATCH(ms3);
3766                 OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
3767
3768                 OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
3769                                          frame->bo,
3770                                          I915_GEM_DOMAIN_SAMPLER << 16,
3771                                          frame->VBufOffset));
3772
3773                 ms3 = MAPSURF_8BIT | MT_8BIT_I8;
3774                 ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT;
3775                 ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT;
3776                 OUT_BATCH(ms3);
3777                 OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
3778
3779                 id = 1<<31 | 2<<1 | !!video->brightness;
3780                 if (state->last_shader != id) {
3781                         state->last_shader = id;
3782                         id = sna->kgem.nbatch++;
3783
3784                         /* Declare samplers */
3785                         gen3_fs_dcl(FS_S0);     /* Y */
3786                         gen3_fs_dcl(FS_S1);     /* U */
3787                         gen3_fs_dcl(FS_S2);     /* V */
3788                         gen3_fs_dcl(FS_T0);     /* normalized coords */
3789
3790                         /* Load samplers to temporaries. */
3791                         gen3_fs_texld(FS_R1, FS_S0, FS_T0);
3792                         gen3_fs_texld(FS_R2, FS_S1, FS_T0);
3793                         gen3_fs_texld(FS_R3, FS_S2, FS_T0);
3794
3795                         /* Move the sampled YUV data in R[123] to the first
3796                          * 3 channels of R0.
3797                          */
3798                         gen3_fs_mov_masked(FS_R0, MASK_X,
3799                                            gen3_fs_operand_reg(FS_R1));
3800                         gen3_fs_mov_masked(FS_R0, MASK_Y,
3801                                            gen3_fs_operand_reg(FS_R2));
3802                         gen3_fs_mov_masked(FS_R0, MASK_Z,
3803                                            gen3_fs_operand_reg(FS_R3));
3804
3805                         /* Normalize the YUV data */
3806                         gen3_fs_add(FS_R0, gen3_fs_operand_reg(FS_R0),
3807                                     gen3_fs_operand_reg(FS_C0));
3808                         /* dot-product the YUV data in R0 by the vectors of
3809                          * coefficients for calculating R, G, and B, storing
3810                          * the results in the R, G, or B channels of the output
3811                          * color.  The OC results are implicitly clamped
3812                          * at the end of the program.
3813                          */
3814                         gen3_fs_dp3(FS_OC, MASK_X,
3815                                     gen3_fs_operand_reg(FS_R0),
3816                                     gen3_fs_operand_reg(FS_C1));
3817                         gen3_fs_dp3(FS_OC, MASK_Y,
3818                                     gen3_fs_operand_reg(FS_R0),
3819                                     gen3_fs_operand_reg(FS_C2));
3820                         gen3_fs_dp3(FS_OC, MASK_Z,
3821                                     gen3_fs_operand_reg(FS_R0),
3822                                     gen3_fs_operand_reg(FS_C3));
3823                         /* Set alpha of the output to 1.0, by wiring W to 1
3824                          * and not actually using the source.
3825                          */
3826                         gen3_fs_mov_masked(FS_OC, MASK_W,
3827                                            gen3_fs_operand_one());
3828
3829                         if (video->brightness != 0) {
3830                                 gen3_fs_add(FS_OC,
3831                                             gen3_fs_operand_reg(FS_OC),
3832                                             gen3_fs_operand(FS_C4, X, X, X, ZERO));
3833                         }
3834
3835                         sna->kgem.batch[id] =
3836                                 _3DSTATE_PIXEL_SHADER_PROGRAM |
3837                                 (sna->kgem.nbatch - id - 2);
3838                 }
3839         }
3840
3841 }
3842
3843 static void
3844 gen3_video_get_batch(struct sna *sna)
3845 {
3846         kgem_set_mode(&sna->kgem, KGEM_RENDER);
3847
3848         if (!kgem_check_batch(&sna->kgem, 120) ||
3849             !kgem_check_reloc(&sna->kgem, 4) ||
3850             !kgem_check_exec(&sna->kgem, 2)) {
3851                 _kgem_submit(&sna->kgem);
3852                 _kgem_set_mode(&sna->kgem, KGEM_RENDER);
3853         }
3854
3855         if (sna->render_state.gen3.need_invariant)
3856                 gen3_emit_invariant(sna);
3857 }
3858
3859 static int
3860 gen3_get_inline_rectangles(struct sna *sna, int want, int floats_per_vertex)
3861 {
3862         int size = floats_per_vertex * 3;
3863         int rem = batch_space(sna) - 1;
3864
3865         if (size * want > rem)
3866                 want = rem / size;
3867
3868         return want;
3869 }
3870
3871 static bool
3872 gen3_render_video(struct sna *sna,
3873                   struct sna_video *video,
3874                   struct sna_video_frame *frame,
3875                   RegionPtr dstRegion,
3876                   short src_w, short src_h,
3877                   short drw_w, short drw_h,
3878                   PixmapPtr pixmap)
3879 {
3880         struct sna_pixmap *priv = sna_pixmap(pixmap);
3881         BoxPtr pbox = REGION_RECTS(dstRegion);
3882         int nbox = REGION_NUM_RECTS(dstRegion);
3883         int dxo = dstRegion->extents.x1;
3884         int dyo = dstRegion->extents.y1;
3885         int width = dstRegion->extents.x2 - dxo;
3886         int height = dstRegion->extents.y2 - dyo;
3887         float src_scale_x, src_scale_y;
3888         int pix_xoff, pix_yoff;
3889         struct kgem_bo *dst_bo;
3890         int copy = 0;
3891
3892         DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, src_w, src_h, drw_w, drw_h));
3893
3894         dst_bo = priv->gpu_bo;
3895         if (dst_bo == NULL)
3896                 return false;
3897
3898         if (too_large(pixmap->drawable.width, pixmap->drawable.height) ||
3899             !gen3_check_pitch_3d(dst_bo)) {
3900                 int bpp = pixmap->drawable.bitsPerPixel;
3901
3902                 dst_bo = kgem_create_2d(&sna->kgem,
3903                                         width, height, bpp,
3904                                         kgem_choose_tiling(&sna->kgem,
3905                                                            I915_TILING_X,
3906                                                            width, height, bpp),
3907                                         0);
3908                 if (!dst_bo)
3909                         return false;
3910
3911                 pix_xoff = -dxo;
3912                 pix_yoff = -dyo;
3913                 copy = 1;
3914         } else {
3915                 width = pixmap->drawable.width;
3916                 height = pixmap->drawable.height;
3917
3918                 /* Set up the offset for translating from the given region
3919                  * (in screen coordinates) to the backing pixmap.
3920                  */
3921 #ifdef COMPOSITE
3922                 pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
3923                 pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
3924 #else
3925                 pix_xoff = 0;
3926                 pix_yoff = 0;
3927 #endif
3928         }
3929
3930         src_scale_x = ((float)src_w / frame->width) / drw_w;
3931         src_scale_y = ((float)src_h / frame->height) / drw_h;
3932
3933         DBG(("%s: src offset=(%d, %d), scale=(%f, %f), dst offset=(%d, %d)\n",
3934              __FUNCTION__,
3935              dxo, dyo, src_scale_x, src_scale_y, pix_xoff, pix_yoff));
3936
3937         gen3_video_get_batch(sna);
3938         gen3_emit_video_state(sna, video, frame, pixmap,
3939                               dst_bo, width, height);
3940         do {
3941                 int nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
3942                 if (nbox_this_time == 0) {
3943                         gen3_video_get_batch(sna);
3944                         gen3_emit_video_state(sna, video, frame, pixmap,
3945                                               dst_bo, width, height);
3946                         nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
3947                 }
3948                 nbox -= nbox_this_time;
3949
3950                 OUT_BATCH(PRIM3D_RECTLIST | (12 * nbox_this_time - 1));
3951                 while (nbox_this_time--) {
3952                         int box_x1 = pbox->x1;
3953                         int box_y1 = pbox->y1;
3954                         int box_x2 = pbox->x2;
3955                         int box_y2 = pbox->y2;
3956
3957                         pbox++;
3958
3959                         DBG(("%s: box (%d, %d), (%d, %d)\n",
3960                              __FUNCTION__, box_x1, box_y1, box_x2, box_y2));
3961
3962                         /* bottom right */
3963                         OUT_BATCH_F(box_x2 + pix_xoff);
3964                         OUT_BATCH_F(box_y2 + pix_yoff);
3965                         OUT_BATCH_F((box_x2 - dxo) * src_scale_x);
3966                         OUT_BATCH_F((box_y2 - dyo) * src_scale_y);
3967
3968                         /* bottom left */
3969                         OUT_BATCH_F(box_x1 + pix_xoff);
3970                         OUT_BATCH_F(box_y2 + pix_yoff);
3971                         OUT_BATCH_F((box_x1 - dxo) * src_scale_x);
3972                         OUT_BATCH_F((box_y2 - dyo) * src_scale_y);
3973
3974                         /* top left */
3975                         OUT_BATCH_F(box_x1 + pix_xoff);
3976                         OUT_BATCH_F(box_y1 + pix_yoff);
3977                         OUT_BATCH_F((box_x1 - dxo) * src_scale_x);
3978                         OUT_BATCH_F((box_y1 - dyo) * src_scale_y);
3979                 }
3980         } while (nbox);
3981
3982         if (copy) {
3983 #ifdef COMPOSITE
3984                 pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
3985                 pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
3986 #else
3987                 pix_xoff = 0;
3988                 pix_yoff = 0;
3989 #endif
3990                 sna_blt_copy_boxes(sna, GXcopy,
3991                                    dst_bo, -dxo, -dyo,
3992                                    priv->gpu_bo, pix_xoff, pix_yoff,
3993                                    pixmap->drawable.bitsPerPixel,
3994                                    REGION_RECTS(dstRegion),
3995                                    REGION_NUM_RECTS(dstRegion));
3996
3997                 kgem_bo_destroy(&sna->kgem, dst_bo);
3998         }
3999
4000         if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
4001                 if ((pix_xoff | pix_yoff) == 0) {
4002                         sna_damage_add(&priv->gpu_damage, dstRegion);
4003                         sna_damage_subtract(&priv->cpu_damage, dstRegion);
4004                 } else {
4005                         sna_damage_add_boxes(&priv->gpu_damage,
4006                                              REGION_RECTS(dstRegion),
4007                                              REGION_NUM_RECTS(dstRegion),
4008                                              pix_xoff, pix_yoff);
4009                         sna_damage_subtract_boxes(&priv->cpu_damage,
4010                                                   REGION_RECTS(dstRegion),
4011                                                   REGION_NUM_RECTS(dstRegion),
4012                                                   pix_xoff, pix_yoff);
4013                 }
4014         }
4015         priv->clear = false;
4016
4017         return true;
4018 }
4019
4020 static void
4021 gen3_render_copy_setup_source(struct sna_composite_channel *channel,
4022                               PixmapPtr pixmap,
4023                               struct kgem_bo *bo)
4024 {
4025         channel->u.gen3.type = SHADER_TEXTURE;
4026         channel->filter = gen3_filter(PictFilterNearest);
4027         channel->repeat = gen3_texture_repeat(RepeatNone);
4028         channel->width  = pixmap->drawable.width;
4029         channel->height = pixmap->drawable.height;
4030         channel->scale[0] = 1.f/pixmap->drawable.width;
4031         channel->scale[1] = 1.f/pixmap->drawable.height;
4032         channel->offset[0] = 0;
4033         channel->offset[1] = 0;
4034         gen3_composite_channel_set_format(channel,
4035                                           sna_format_for_depth(pixmap->drawable.depth));
4036         channel->bo = bo;
4037         channel->is_affine = 1;
4038 }
4039
4040 static bool
4041 gen3_render_copy_boxes(struct sna *sna, uint8_t alu,
4042                        PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
4043                        PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
4044                        const BoxRec *box, int n, unsigned flags)
4045 {
4046         struct sna_composite_op tmp;
4047
4048 #if NO_COPY_BOXES
4049         if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
4050                 return false;
4051
4052         return sna_blt_copy_boxes(sna, alu,
4053                                   src_bo, src_dx, src_dy,
4054                                   dst_bo, dst_dx, dst_dy,
4055                                   dst->drawable.bitsPerPixel,
4056                                   box, n);
4057 #endif
4058
4059         DBG(("%s (%d, %d)->(%d, %d) x %d\n",
4060              __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
4061
4062         if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
4063             sna_blt_copy_boxes(sna, alu,
4064                                src_bo, src_dx, src_dy,
4065                                dst_bo, dst_dx, dst_dy,
4066                                dst->drawable.bitsPerPixel,
4067                                box, n))
4068                 return true;
4069
4070         if (!(alu == GXcopy || alu == GXclear) ||
4071             src_bo == dst_bo || /* XXX handle overlap using 3D ? */
4072             src_bo->pitch > MAX_3D_PITCH ||
4073             too_large(src->drawable.width, src->drawable.height)) {
4074 fallback_blt:
4075                 if (!kgem_bo_can_blt(&sna->kgem, src_bo) ||
4076                     !kgem_bo_can_blt(&sna->kgem, dst_bo))
4077                         return false;
4078
4079                 return sna_blt_copy_boxes_fallback(sna, alu,
4080                                                    src, src_bo, src_dx, src_dy,
4081                                                    dst, dst_bo, dst_dx, dst_dy,
4082                                                    box, n);
4083         }
4084
4085         if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
4086                 kgem_submit(&sna->kgem);
4087                 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
4088                         goto fallback_blt;
4089         }
4090
4091         memset(&tmp, 0, sizeof(tmp));
4092         tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
4093
4094         tmp.dst.pixmap = dst;
4095         tmp.dst.width = dst->drawable.width;
4096         tmp.dst.height = dst->drawable.height;
4097         tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
4098         tmp.dst.bo = dst_bo;
4099         tmp.dst.x = tmp.dst.y = 0;
4100         tmp.damage = NULL;
4101
4102         sna_render_composite_redirect_init(&tmp);
4103         if (too_large(tmp.dst.width, tmp.dst.height) ||
4104             dst_bo->pitch > MAX_3D_PITCH) {
4105                 BoxRec extents = box[0];
4106                 int i;
4107
4108                 for (i = 1; i < n; i++) {
4109                         if (box[i].x1 < extents.x1)
4110                                 extents.x1 = box[i].x1;
4111                         if (box[i].y1 < extents.y1)
4112                                 extents.y1 = box[i].y1;
4113
4114                         if (box[i].x2 > extents.x2)
4115                                 extents.x2 = box[i].x2;
4116                         if (box[i].y2 > extents.y2)
4117                                 extents.y2 = box[i].y2;
4118                 }
4119                 if (!sna_render_composite_redirect(sna, &tmp,
4120                                                    extents.x1 + dst_dx,
4121                                                    extents.y1 + dst_dy,
4122                                                    extents.x2 - extents.x1,
4123                                                    extents.y2 - extents.y1))
4124                         goto fallback_tiled;
4125         }
4126
4127         gen3_render_copy_setup_source(&tmp.src, src, src_bo);
4128
4129         tmp.floats_per_vertex = 4;
4130         tmp.floats_per_rect = 12;
4131         tmp.mask.bo = NULL;
4132         tmp.mask.u.gen3.type = SHADER_NONE;
4133
4134         dst_dx += tmp.dst.x;
4135         dst_dy += tmp.dst.y;
4136         tmp.dst.x = tmp.dst.y = 0;
4137
4138         gen3_emit_composite_state(sna, &tmp);
4139         gen3_align_vertex(sna, &tmp);
4140
4141         do {
4142                 int n_this_time;
4143
4144                 n_this_time = gen3_get_rectangles(sna, &tmp, n);
4145                 n -= n_this_time;
4146
4147                 do {
4148                         DBG(("  (%d, %d) -> (%d, %d) + (%d, %d)\n",
4149                              box->x1 + src_dx, box->y1 + src_dy,
4150                              box->x1 + dst_dx, box->y1 + dst_dy,
4151                              box->x2 - box->x1, box->y2 - box->y1));
4152                         OUT_VERTEX(box->x2 + dst_dx);
4153                         OUT_VERTEX(box->y2 + dst_dy);
4154                         OUT_VERTEX((box->x2 + src_dx) * tmp.src.scale[0]);
4155                         OUT_VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
4156
4157                         OUT_VERTEX(box->x1 + dst_dx);
4158                         OUT_VERTEX(box->y2 + dst_dy);
4159                         OUT_VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
4160                         OUT_VERTEX((box->y2 + src_dy) * tmp.src.scale[1]);
4161
4162                         OUT_VERTEX(box->x1 + dst_dx);
4163                         OUT_VERTEX(box->y1 + dst_dy);
4164                         OUT_VERTEX((box->x1 + src_dx) * tmp.src.scale[0]);
4165                         OUT_VERTEX((box->y1 + src_dy) * tmp.src.scale[1]);
4166
4167                         box++;
4168                 } while (--n_this_time);
4169         } while (n);
4170
4171         gen3_vertex_flush(sna);
4172         sna_render_composite_redirect_done(sna, &tmp);
4173         return true;
4174
4175 fallback_tiled:
4176         return sna_tiling_copy_boxes(sna, alu,
4177                                      src, src_bo, src_dx, src_dy,
4178                                      dst, dst_bo, dst_dx, dst_dy,
4179                                      box, n);
4180 }
4181
4182 static void
4183 gen3_render_copy_blt(struct sna *sna,
4184                      const struct sna_copy_op *op,
4185                      int16_t sx, int16_t sy,
4186                      int16_t w, int16_t h,
4187                      int16_t dx, int16_t dy)
4188 {
4189         gen3_get_rectangles(sna, &op->base, 1);
4190
4191         OUT_VERTEX(dx+w);
4192         OUT_VERTEX(dy+h);
4193         OUT_VERTEX((sx+w)*op->base.src.scale[0]);
4194         OUT_VERTEX((sy+h)*op->base.src.scale[1]);
4195
4196         OUT_VERTEX(dx);
4197         OUT_VERTEX(dy+h);
4198         OUT_VERTEX(sx*op->base.src.scale[0]);
4199         OUT_VERTEX((sy+h)*op->base.src.scale[1]);
4200
4201         OUT_VERTEX(dx);
4202         OUT_VERTEX(dy);
4203         OUT_VERTEX(sx*op->base.src.scale[0]);
4204         OUT_VERTEX(sy*op->base.src.scale[1]);
4205 }
4206
4207 static void
4208 gen3_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
4209 {
4210         if (sna->render_state.gen3.vertex_offset)
4211                 gen3_vertex_flush(sna);
4212 }
4213
4214 static bool
4215 gen3_render_copy(struct sna *sna, uint8_t alu,
4216                  PixmapPtr src, struct kgem_bo *src_bo,
4217                  PixmapPtr dst, struct kgem_bo *dst_bo,
4218                  struct sna_copy_op *tmp)
4219 {
4220 #if NO_COPY
4221         if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
4222                 return false;
4223
4224         return sna_blt_copy(sna, alu,
4225                             src_bo, dst_bo,
4226                             dst->drawable.bitsPerPixel,
4227                             tmp);
4228 #endif
4229
4230         /* Prefer to use the BLT */
4231         if (sna->kgem.mode != KGEM_RENDER &&
4232             sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
4233             sna_blt_copy(sna, alu,
4234                          src_bo, dst_bo,
4235                          dst->drawable.bitsPerPixel,
4236                          tmp))
4237                 return true;
4238
4239         /* Must use the BLT if we can't RENDER... */
4240         if (!(alu == GXcopy || alu == GXclear) ||
4241             too_large(src->drawable.width, src->drawable.height) ||
4242             too_large(dst->drawable.width, dst->drawable.height) ||
4243             src_bo->pitch > MAX_3D_PITCH || dst_bo->pitch > MAX_3D_PITCH) {
4244 fallback:
4245                 if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
4246                         return false;
4247
4248                 return sna_blt_copy(sna, alu, src_bo, dst_bo,
4249                                     dst->drawable.bitsPerPixel,
4250                                     tmp);
4251         }
4252
4253         tmp->base.op = alu == GXcopy ? PictOpSrc : PictOpClear;
4254
4255         tmp->base.dst.pixmap = dst;
4256         tmp->base.dst.width = dst->drawable.width;
4257         tmp->base.dst.height = dst->drawable.height;
4258         tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
4259         tmp->base.dst.bo = dst_bo;
4260
4261         gen3_render_copy_setup_source(&tmp->base.src, src, src_bo);
4262
4263         tmp->base.floats_per_vertex = 4;
4264         tmp->base.floats_per_rect = 12;
4265         tmp->base.mask.bo = NULL;
4266         tmp->base.mask.u.gen3.type = SHADER_NONE;
4267
4268         if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
4269                 kgem_submit(&sna->kgem);
4270                 if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
4271                         goto fallback;
4272         }
4273
4274         tmp->blt  = gen3_render_copy_blt;
4275         tmp->done = gen3_render_copy_done;
4276
4277         gen3_emit_composite_state(sna, &tmp->base);
4278         gen3_align_vertex(sna, &tmp->base);
4279         return true;
4280 }
4281
4282 static bool
4283 gen3_render_fill_boxes_try_blt(struct sna *sna,
4284                                CARD8 op, PictFormat format,
4285                                const xRenderColor *color,
4286                                PixmapPtr dst, struct kgem_bo *dst_bo,
4287                                const BoxRec *box, int n)
4288 {
4289         uint8_t alu;
4290         uint32_t pixel;
4291
4292         if (dst_bo->tiling == I915_TILING_Y) {
4293                 DBG(("%s: y-tiling, can't blit\n", __FUNCTION__));
4294                 assert(!too_large(dst->drawable.width, dst->drawable.height));
4295                 return false;
4296         }
4297
4298         if (op > PictOpSrc)
4299                 return false;
4300
4301         if (op == PictOpClear) {
4302                 alu = GXclear;
4303                 pixel = 0;
4304         } else if (!sna_get_pixel_from_rgba(&pixel,
4305                                             color->red,
4306                                             color->green,
4307                                             color->blue,
4308                                             color->alpha,
4309                                             format))
4310                 return false;
4311         else
4312                 alu = GXcopy;
4313
4314         return sna_blt_fill_boxes(sna, alu,
4315                                   dst_bo, dst->drawable.bitsPerPixel,
4316                                   pixel, box, n);
4317 }
4318
4319 static inline bool prefer_fill_blt(struct sna *sna)
4320 {
4321 #if PREFER_BLT_FILL
4322         return true;
4323 #else
4324         return sna->kgem.mode != KGEM_RENDER;
4325 #endif
4326 }
4327
4328 static bool
4329 gen3_render_fill_boxes(struct sna *sna,
4330                        CARD8 op,
4331                        PictFormat format,
4332                        const xRenderColor *color,
4333                        PixmapPtr dst, struct kgem_bo *dst_bo,
4334                        const BoxRec *box, int n)
4335 {
4336         struct sna_composite_op tmp;
4337         uint32_t pixel;
4338
4339         if (op >= ARRAY_SIZE(gen3_blend_op)) {
4340                 DBG(("%s: fallback due to unhandled blend op: %d\n",
4341                      __FUNCTION__, op));
4342                 return false;
4343         }
4344
4345 #if NO_FILL_BOXES
4346         return gen3_render_fill_boxes_try_blt(sna, op, format, color,
4347                                               dst, dst_bo,
4348                                               box, n);
4349 #endif
4350
4351         DBG(("%s (op=%d, format=%x, color=(%04x,%04x,%04x, %04x))\n",
4352              __FUNCTION__, op, (int)format,
4353              color->red, color->green, color->blue, color->alpha));
4354
4355         if (too_large(dst->drawable.width, dst->drawable.height) ||
4356             dst_bo->pitch > MAX_3D_PITCH ||
4357             !gen3_check_dst_format(format)) {
4358                 DBG(("%s: try blt, too large or incompatible destination\n",
4359                      __FUNCTION__));
4360                 if (gen3_render_fill_boxes_try_blt(sna, op, format, color,
4361                                                    dst, dst_bo,
4362                                                    box, n))
4363                         return true;
4364
4365                 if (!gen3_check_dst_format(format))
4366                         return false;
4367
4368                 return sna_tiling_fill_boxes(sna, op, format, color,
4369                                              dst, dst_bo, box, n);
4370         }
4371
4372         if (prefer_fill_blt(sna) &&
4373             gen3_render_fill_boxes_try_blt(sna, op, format, color,
4374                                            dst, dst_bo,
4375                                            box, n))
4376                 return true;
4377
4378         if (op == PictOpClear) {
4379                 pixel = 0;
4380         } else {
4381                 if (!sna_get_pixel_from_rgba(&pixel,
4382                                              color->red,
4383                                              color->green,
4384                                              color->blue,
4385                                              color->alpha,
4386                                              PICT_a8r8g8b8)) {
4387                         assert(0);
4388                         return false;
4389                 }
4390         }
4391         DBG(("%s: using shader for op=%d, format=%x, pixel=%x\n",
4392              __FUNCTION__, op, (int)format, pixel));
4393
4394         tmp.op = op;
4395         tmp.dst.pixmap = dst;
4396         tmp.dst.width = dst->drawable.width;
4397         tmp.dst.height = dst->drawable.height;
4398         tmp.dst.format = format;
4399         tmp.dst.bo = dst_bo;
4400         tmp.floats_per_vertex = 2;
4401         tmp.floats_per_rect = 6;
4402         tmp.rb_reversed = 0;
4403         tmp.has_component_alpha = 0;
4404         tmp.need_magic_ca_pass = false;
4405
4406         gen3_init_solid(&tmp.src, pixel);
4407         tmp.mask.bo = NULL;
4408         tmp.mask.u.gen3.type = SHADER_NONE;
4409         tmp.u.gen3.num_constants = 0;
4410
4411         if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
4412                 kgem_submit(&sna->kgem);
4413                 assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
4414         }
4415
4416         gen3_emit_composite_state(sna, &tmp);
4417         gen3_align_vertex(sna, &tmp);
4418
4419         do {
4420                 int n_this_time;
4421
4422                 n_this_time = gen3_get_rectangles(sna, &tmp, n);
4423                 n -= n_this_time;
4424
4425                 do {
4426                         DBG(("  (%d, %d), (%d, %d): %x\n",
4427                              box->x1, box->y1, box->x2, box->y2, pixel));
4428                         OUT_VERTEX(box->x2);
4429                         OUT_VERTEX(box->y2);
4430                         OUT_VERTEX(box->x1);
4431                         OUT_VERTEX(box->y2);
4432                         OUT_VERTEX(box->x1);
4433                         OUT_VERTEX(box->y1);
4434                         box++;
4435                 } while (--n_this_time);
4436         } while (n);
4437
4438         gen3_vertex_flush(sna);
4439         return true;
4440 }
4441
4442 static void
4443 gen3_render_fill_op_blt(struct sna *sna,
4444                         const struct sna_fill_op *op,
4445                         int16_t x, int16_t y, int16_t w, int16_t h)
4446 {
4447         gen3_get_rectangles(sna, &op->base, 1);
4448
4449         OUT_VERTEX(x+w);
4450         OUT_VERTEX(y+h);
4451         OUT_VERTEX(x);
4452         OUT_VERTEX(y+h);
4453         OUT_VERTEX(x);
4454         OUT_VERTEX(y);
4455 }
4456
4457 fastcall static void
4458 gen3_render_fill_op_box(struct sna *sna,
4459                         const struct sna_fill_op *op,
4460                         const BoxRec *box)
4461 {
4462         gen3_get_rectangles(sna, &op->base, 1);
4463
4464         OUT_VERTEX(box->x2);
4465         OUT_VERTEX(box->y2);
4466         OUT_VERTEX(box->x1);
4467         OUT_VERTEX(box->y2);
4468         OUT_VERTEX(box->x1);
4469         OUT_VERTEX(box->y1);
4470 }
4471
4472 fastcall static void
4473 gen3_render_fill_op_boxes(struct sna *sna,
4474                           const struct sna_fill_op *op,
4475                           const BoxRec *box,
4476                           int nbox)
4477 {
4478         DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
4479              box->x1, box->y1, box->x2, box->y2, nbox));
4480
4481         do {
4482                 int nbox_this_time;
4483
4484                 nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
4485                 nbox -= nbox_this_time;
4486
4487                 do {
4488                         OUT_VERTEX(box->x2);
4489                         OUT_VERTEX(box->y2);
4490                         OUT_VERTEX(box->x1);
4491                         OUT_VERTEX(box->y2);
4492                         OUT_VERTEX(box->x1);
4493                         OUT_VERTEX(box->y1);
4494                         box++;
4495                 } while (--nbox_this_time);
4496         } while (nbox);
4497 }
4498
4499 static void
4500 gen3_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
4501 {
4502         if (sna->render_state.gen3.vertex_offset)
4503                 gen3_vertex_flush(sna);
4504 }
4505
4506 static bool
4507 gen3_render_fill(struct sna *sna, uint8_t alu,
4508                  PixmapPtr dst, struct kgem_bo *dst_bo,
4509                  uint32_t color,
4510                  struct sna_fill_op *tmp)
4511 {
4512 #if NO_FILL
4513         return sna_blt_fill(sna, alu,
4514                             dst_bo, dst->drawable.bitsPerPixel,
4515                             color,
4516                             tmp);
4517 #endif
4518
4519         /* Prefer to use the BLT if already engaged */
4520         if (prefer_fill_blt(sna) &&
4521             sna_blt_fill(sna, alu,
4522                          dst_bo, dst->drawable.bitsPerPixel,
4523                          color,
4524                          tmp))
4525                 return true;
4526
4527         /* Must use the BLT if we can't RENDER... */
4528         if (!(alu == GXcopy || alu == GXclear) ||
4529             too_large(dst->drawable.width, dst->drawable.height) ||
4530             dst_bo->pitch > MAX_3D_PITCH)
4531                 return sna_blt_fill(sna, alu,
4532                                     dst_bo, dst->drawable.bitsPerPixel,
4533                                     color,
4534                                     tmp);
4535
4536         if (alu == GXclear)
4537                 color = 0;
4538
4539         tmp->base.op = color == 0 ? PictOpClear : PictOpSrc;
4540         tmp->base.dst.pixmap = dst;
4541         tmp->base.dst.width = dst->drawable.width;
4542         tmp->base.dst.height = dst->drawable.height;
4543         tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth);
4544         tmp->base.dst.bo = dst_bo;
4545         tmp->base.floats_per_vertex = 2;
4546         tmp->base.floats_per_rect = 6;
4547         tmp->base.need_magic_ca_pass = 0;
4548         tmp->base.has_component_alpha = 0;
4549         tmp->base.rb_reversed = 0;
4550
4551         gen3_init_solid(&tmp->base.src,
4552                         sna_rgba_for_color(color, dst->drawable.depth));
4553         tmp->base.mask.bo = NULL;
4554         tmp->base.mask.u.gen3.type = SHADER_NONE;
4555         tmp->base.u.gen3.num_constants = 0;
4556
4557         if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
4558                 kgem_submit(&sna->kgem);
4559                 assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
4560         }
4561
4562         tmp->blt   = gen3_render_fill_op_blt;
4563         tmp->box   = gen3_render_fill_op_box;
4564         tmp->boxes = gen3_render_fill_op_boxes;
4565         tmp->done  = gen3_render_fill_op_done;
4566
4567         gen3_emit_composite_state(sna, &tmp->base);
4568         gen3_align_vertex(sna, &tmp->base);
4569         return true;
4570 }
4571
4572 static bool
4573 gen3_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
4574                              uint32_t color,
4575                              int16_t x1, int16_t y1, int16_t x2, int16_t y2,
4576                              uint8_t alu)
4577 {
4578         BoxRec box;
4579
4580         box.x1 = x1;
4581         box.y1 = y1;
4582         box.x2 = x2;
4583         box.y2 = y2;
4584
4585         return sna_blt_fill_boxes(sna, alu,
4586                                   bo, dst->drawable.bitsPerPixel,
4587                                   color, &box, 1);
4588 }
4589
4590 static bool
4591 gen3_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
4592                      uint32_t color,
4593                      int16_t x1, int16_t y1,
4594                      int16_t x2, int16_t y2,
4595                      uint8_t alu)
4596 {
4597         struct sna_composite_op tmp;
4598
4599 #if NO_FILL_ONE
4600         return gen3_render_fill_one_try_blt(sna, dst, bo, color,
4601                                             x1, y1, x2, y2, alu);
4602 #endif
4603
4604         /* Prefer to use the BLT if already engaged */
4605         if (prefer_fill_blt(sna) &&
4606             gen3_render_fill_one_try_blt(sna, dst, bo, color,
4607                                          x1, y1, x2, y2, alu))
4608                 return true;
4609
4610         /* Must use the BLT if we can't RENDER... */
4611         if (!(alu == GXcopy || alu == GXclear) ||
4612             too_large(dst->drawable.width, dst->drawable.height) ||
4613             bo->pitch > MAX_3D_PITCH)
4614                 return gen3_render_fill_one_try_blt(sna, dst, bo, color,
4615                                                     x1, y1, x2, y2, alu);
4616
4617         if (alu == GXclear)
4618                 color = 0;
4619
4620         tmp.op = color == 0 ? PictOpClear : PictOpSrc;
4621         tmp.dst.pixmap = dst;
4622         tmp.dst.width = dst->drawable.width;
4623         tmp.dst.height = dst->drawable.height;
4624         tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
4625         tmp.dst.bo = bo;
4626         tmp.floats_per_vertex = 2;
4627         tmp.floats_per_rect = 6;
4628         tmp.need_magic_ca_pass = 0;
4629         tmp.has_component_alpha = 0;
4630         tmp.rb_reversed = 0;
4631
4632         gen3_init_solid(&tmp.src,
4633                         sna_rgba_for_color(color, dst->drawable.depth));
4634         tmp.mask.bo = NULL;
4635         tmp.mask.u.gen3.type = SHADER_NONE;
4636         tmp.u.gen3.num_constants = 0;
4637
4638         if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
4639                 kgem_submit(&sna->kgem);
4640                 if (gen3_render_fill_one_try_blt(sna, dst, bo, color,
4641                                                  x1, y1, x2, y2, alu))
4642                         return true;
4643         }
4644
4645         gen3_emit_composite_state(sna, &tmp);
4646         gen3_align_vertex(sna, &tmp);
4647         gen3_get_rectangles(sna, &tmp, 1);
4648         DBG(("  (%d, %d), (%d, %d): %x\n", x1, y1, x2, y2, color));
4649         OUT_VERTEX(x2);
4650         OUT_VERTEX(y2);
4651         OUT_VERTEX(x1);
4652         OUT_VERTEX(y2);
4653         OUT_VERTEX(x1);
4654         OUT_VERTEX(y1);
4655         gen3_vertex_flush(sna);
4656
4657         return true;
4658 }
4659
4660 static void gen3_render_flush(struct sna *sna)
4661 {
4662         gen3_vertex_close(sna);
4663 }
4664
4665 static void
4666 gen3_render_fini(struct sna *sna)
4667 {
4668 }
4669
4670 bool gen3_render_init(struct sna *sna)
4671 {
4672         struct sna_render *render = &sna->render;
4673
4674 #if !NO_COMPOSITE
4675         render->composite = gen3_render_composite;
4676 #endif
4677 #if !NO_COMPOSITE_SPANS
4678         render->check_composite_spans = gen3_check_composite_spans;
4679         render->composite_spans = gen3_render_composite_spans;
4680 #endif
4681
4682         render->video = gen3_render_video;
4683
4684         render->copy_boxes = gen3_render_copy_boxes;
4685         render->copy = gen3_render_copy;
4686
4687         render->fill_boxes = gen3_render_fill_boxes;
4688         render->fill = gen3_render_fill;
4689         render->fill_one = gen3_render_fill_one;
4690
4691         render->reset = gen3_render_reset;
4692         render->flush = gen3_render_flush;
4693         render->fini = gen3_render_fini;
4694
4695         render->max_3d_size = MAX_3D_SIZE;
4696         render->max_3d_pitch = MAX_3D_PITCH;
4697
4698         sna->kgem.retire = gen3_render_retire;
4699         sna->kgem.expire = gen3_render_expire;
4700         return true;
4701 }