Tizen 2.0 Release
[framework/graphics/cairo.git] / src / drm / cairo-drm-i915-shader.c
1 /* cairo - a vector graphics library with display and print output
2  *
3  * Copyright © 2009 Intel Corporation
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it either under the terms of the GNU Lesser General Public
7  * License version 2.1 as published by the Free Software Foundation
8  * (the "LGPL") or, at your option, under the terms of the Mozilla
9  * Public License Version 1.1 (the "MPL"). If you do not alter this
10  * notice, a recipient may use your version of this file under either
11  * the MPL or the LGPL.
12  *
13  * You should have received a copy of the LGPL along with this library
14  * in the file COPYING-LGPL-2.1; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335, USA
16  * You should have received a copy of the MPL along with this library
17  * in the file COPYING-MPL-1.1
18  *
19  * The contents of this file are subject to the Mozilla Public License
20  * Version 1.1 (the "License"); you may not use this file except in
21  * compliance with the License. You may obtain a copy of the License at
22  * http://www.mozilla.org/MPL/
23  *
24  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY
25  * OF ANY KIND, either express or implied. See the LGPL or the MPL for
26  * the specific language governing rights and limitations.
27  *
28  * The Original Code is the cairo graphics library.
29  *
30  * Contributor(s):
31  *      Chris Wilson <chris@chris-wilson.co.uk>
32  */
33
34 #include "cairoint.h"
35
36 #include "cairo-error-private.h"
37 #include "cairo-drm-i915-private.h"
38 #include "cairo-surface-offset-private.h"
39 #include "cairo-surface-subsurface-private.h"
40 #include "cairo-surface-snapshot-private.h"
41
42 #if 0
43 static cairo_status_t
44 i915_packed_pixel_surface_finish (void *abstract_surface)
45 {
46     i915_packed_pixel_surface_t *surface = abstract_surface;
47     i915_device_t *device;
48
49     device = i915_device_acquire (&surface->device->intel.base);
50
51     intel_bo_destroy (&device->intel, surface->bo);
52
53     if (surface->is_current_texture) {
54         if (surface->is_current_texture & CURRENT_SOURCE)
55             device->current_source = NULL;
56         if (surface->is_current_texture & CURRENT_MASK)
57             device->current_mask = NULL;
58         device->current_n_samplers = 0;
59     }
60
61     i915_device_release (device);
62
63     return CAIRO_STATUS_SUCCESS;
64 }
65
66 static const cairo_surface_backend_t i915_packed_pixel_surface_backend = {
67     I915_PACKED_PIXEL_SURFACE_TYPE,
68     i915_packed_pixel_surface_finish,
69 };
70
71 static cairo_surface_t *
72 i915_packed_pixel_surface_create (i915_device_t *device,
73                                    i915_packed_pixel_t pixel,
74                                    const uint8_t *data,
75                                    uint32_t length,
76                                    uint32_t width, uint32_t height)
77 {
78     i915_packed_pixel_surface_t *surface;
79     cairo_content_t content;
80     uint32_t tiling, size;
81     uint32_t stride, half_stride;
82     uint32_t i;
83
84     if (width > 2048 || height > 2048)
85         return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_INVALID_SIZE));
86
87     surface = malloc (sizeof (i915_packed_pixel_surface_t));
88     if (unlikely (surface == NULL))
89         return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY));
90
91     tiling = I915_TILING_NONE; /* XXX */
92     half_stride = stride = i915_tiling_stride (tiling, width/2);
93     if (stride < width)
94         stride *= 2 ;
95     height = i915_tiling_height (tiling, height);
96
97     switch (surface->pixel = pixel) {
98     case YUV_I420:
99         content = CAIRO_CONTENT_COLOR;
100
101         surface->offset[0] = 0;
102         surface->width[0] = width;
103         surface->height[0] = height;
104         surface->stride[0] = stride;
105         surface->map0[0] = MAPSURF_8BIT | MT_8BIT_I8 | MS3_tiling (tiling);
106         surface->map0[0] |= ((height - 1) << MS3_HEIGHT_SHIFT) |
107                             ((width - 1)  << MS3_WIDTH_SHIFT);
108         surface->map1[0] = (stride / 4 - 1) << MS4_PITCH_SHIFT;
109
110         surface->offset[1] = stride * height;
111         surface->width[1] = width / 2;
112         surface->height[1] = height / 2;
113         surface->stride[1] = half_stride;
114         surface->map0[1] = MAPSURF_8BIT | MT_8BIT_I8 | MS3_tiling (tiling);
115         surface->map0[1] |= ((height/2 - 1) << MS3_HEIGHT_SHIFT) |
116                             ((width/2 - 1)  << MS3_WIDTH_SHIFT);
117         surface->map1[1] = (half_stride / 4 - 1) << MS4_PITCH_SHIFT;
118
119         if (width < half_stride) {
120             surface->offset[2] = stride * height + half_stride / 2;
121             size = stride * height + half_stride * height / 2;
122         } else {
123             surface->offset[2] = stride * height + half_stride * height / 2;
124             size = stride * height + half_stride * height;
125         }
126         surface->width[2] = width / 2;
127         surface->height[2] = height / 2;
128         surface->stride[2] = half_stride;
129         surface->map0[2] = MAPSURF_8BIT | MT_8BIT_I8 | MS3_tiling (tiling);
130         surface->map0[2] |= ((height/2 - 1) << MS3_HEIGHT_SHIFT) |
131                             ((width/2 - 1)  << MS3_WIDTH_SHIFT);
132         surface->map1[2] = (half_stride / 4 - 1) << MS4_PITCH_SHIFT;
133         break;
134
135     case NONE:
136     case YUV_YV12:
137     case YUV_YUY2:
138     case YUV_UYVY:
139         ASSERT_NOT_REACHED;
140         break;
141     }
142
143     _cairo_surface_init (&surface->base,
144                          &i915_packed_pixel_surface_backend,
145                          content);
146
147     surface->bo = intel_bo_create (&device->intel, size, FALSE);
148     assert (surface->bo->tiling == I915_TILING_NONE);
149     if (unlikely (surface->bo == NULL)) {
150         free (surface);
151         return _cairo_surface_create_in_error (_cairo_error (CAIRO_STATUS_NO_MEMORY));
152     }
153
154     if (tiling == I915_TILING_NONE) {
155         intel_bo_t *bo = surface->bo;
156         uint32_t dst;
157         int uv;
158
159         dst = surface->offset[0];
160         if (width == stride) {
161             size = stride * height;
162             intel_bo_write (&device->intel, bo, dst, size, data);
163             data += size;
164         } else {
165             for (i = 0; i < height; i++) {
166                 intel_bo_write (&device->intel, bo, dst, width, data);
167                 dst += stride;
168                 data += width;
169             }
170         }
171
172         for (uv = 1; uv <= 2; uv++) {
173             dst = surface->offset[uv];
174             if (width / 2 == half_stride) {
175                 size = half_stride * height / 2;
176                 intel_bo_write (&device->intel, bo, dst, size, data);
177                 data += size;
178             } else {
179                 size = width / 2;
180                 for (i = 0; i < height / 2; i++) {
181                     intel_bo_write (&device->intel, bo, dst, size, data);
182                     dst += half_stride;
183                     data += size;
184                 }
185             }
186         }
187     } else {
188         uint8_t *dst, *base;
189
190         base = intel_bo_map (&device->intel, surface->bo);
191
192         dst = base + surface->offset[0];
193         if (width == stride) {
194             size = stride * height;
195             memcpy (dst, data, size);
196             data += size;
197         } else {
198             for (i = 0; i < height; i++) {
199                 memcpy (dst, data, width);
200                 dst += stride;
201                 data += width;
202             }
203         }
204
205         dst = base + surface->offset[1];
206         if (width / 2 == half_stride) {
207             size = half_stride * height / 2;
208             memcpy (dst, data, size);
209             data += size;
210         } else {
211             size = width / 2;
212             for (i = 0; i < height / 2; i++) {
213                 memcpy (dst, data, size);
214                 dst += half_stride;
215                 data += size;
216             }
217         }
218
219         dst = base + surface->offset[2];
220         if (width / 2 == half_stride) {
221             size = half_stride * height / 2;
222             memcpy (dst, data, size);
223             data += size;
224         } else {
225             size = width / 2;
226             for (i = 0; i < height / 2; i++) {
227                 memcpy (dst, data, size);
228                 dst += half_stride;
229                 data += size;
230             }
231         }
232     }
233
234     surface->device = device;
235     surface->is_current_texture = 0;
236
237     return &surface->base;
238 }
239
240 static cairo_int_status_t
241 i915_clone_yuv (i915_surface_t *surface,
242                  cairo_surface_t *source,
243                  int width, int height,
244                  cairo_surface_t **clone_out)
245 {
246     const uint8_t *mime_data = NULL;
247     unsigned int mime_data_length;
248     cairo_surface_t *clone;
249
250     cairo_surface_get_mime_data (source, "video/x-raw-yuv/i420",
251                                  &mime_data, &mime_data_length);
252     if (mime_data == NULL)
253         return CAIRO_INT_STATUS_UNSUPPORTED;
254
255     clone =
256         i915_packed_pixel_surface_create ((i915_device_t *) surface->base.device,
257                                            YUV_I420,
258                                            mime_data, mime_data_length,
259                                            width, height);
260     if (clone == NULL)
261         return CAIRO_INT_STATUS_UNSUPPORTED;
262     if (unlikely (clone->status))
263         return clone->status;
264
265     *clone_out = clone;
266     return CAIRO_STATUS_SUCCESS;
267 }
268 #endif
269
270 /* Max instruction count: 4 */
271 static void
272 i915_shader_linear_color (i915_device_t *device,
273                           enum i915_shader_linear_mode mode,
274                           int in, int c0, int c1, int out)
275 {
276     int tmp = FS_U0;
277
278     switch (mode) {
279     case LINEAR_TEXTURE:
280         ASSERT_NOT_REACHED;
281     case LINEAR_NONE:
282         tmp = in;
283         break;
284
285     case LINEAR_REPEAT:
286         i915_fs_frc (tmp, i915_fs_operand (in, X, X, X, X));
287         break;
288 #if 0
289     case LINEAR_REFLECT:
290         /* XXX needs an extra constant: C2 [0.5, 2.0, x, x] */
291         i915_fs_mul (tmp, in, 0.5);
292         i915_fs_frc (tmp, i915_fs_operand_reg (tmp));
293         i915_fs_mul (tmp, tmp, 2.0);
294         i915_fs_add (tmp, i915_fs_operand_one (),
295                      i915_fs_operand_reg_negate (tmp));
296         i915_fs_cmp (tmp,
297                      i915_fs_operand_reg (tmp),
298                      i915_fs_operand_reg (tmp),
299                      i915_fs_operand_reg_negate (tmp));
300         i915_fs_add (tmp, i915_fs_operand_one (),
301                      i915_fs_operand_reg_negate (tmp));
302 #endif
303     case LINEAR_PAD:
304         i915_fs_max (tmp,
305                      i915_fs_operand_zero (),
306                      i915_fs_operand (in, X, X, X, X));
307         i915_fs_min (tmp,
308                      i915_fs_operand_one (),
309                      i915_fs_operand_reg (tmp));
310         break;
311     }
312
313     /* interpolate */
314     i915_fs_mad (out, 0,
315                  i915_fs_operand (tmp, NEG_X, NEG_X, NEG_X, NEG_X),
316                  i915_fs_operand_reg (c0),
317                  i915_fs_operand_reg (c0));
318     i915_fs_mad (out, 0,
319                  i915_fs_operand (tmp, X, X, X, X),
320                  i915_fs_operand_reg (c1),
321                  i915_fs_operand_reg (out));
322 }
323
324 static void
325 i915_shader_radial_init (struct i915_shader_radial *r,
326                          const cairo_radial_pattern_t *radial)
327 {
328     double dx, dy, dr, r1;
329
330     dx = radial->cd2.center.x - radial->cd1.center.x;
331     dy = radial->cd2.center.y - radial->cd1.center.y;
332     dr = radial->cd2.radius   - radial->cd1.radius;
333
334     r1 = radial->cd1.radius;
335
336     if (radial->cd2.center.x == radial->cd1.center.x &&
337         radial->cd2.center.y == radial->cd1.center.y)
338     {
339         /* XXX dr == 0, meaningless with anything other than PAD */
340         r->constants[0] = radial->cd1.center.x / dr;
341         r->constants[1] = radial->cd1.center.y / dr;
342         r->constants[2] = 1. / dr;
343         r->constants[3] = -r1 / dr;
344
345         r->constants[4] = 0;
346         r->constants[5] = 0;
347         r->constants[6] = 0;
348         r->constants[7] = 0;
349
350         r->base.mode = RADIAL_ONE;
351     } else {
352         r->constants[0] = -radial->cd1.center.x;
353         r->constants[1] = -radial->cd1.center.y;
354         r->constants[2] = r1;
355         r->constants[3] = -4 * (dx*dx + dy*dy - dr*dr);
356
357         r->constants[4] = -2 * dx;
358         r->constants[5] = -2 * dy;
359         r->constants[6] = -2 * r1 * dr;
360         r->constants[7] = 1 / (2 * (dx*dx + dy*dy - dr*dr));
361
362         r->base.mode = RADIAL_TWO;
363     }
364
365     r->base.matrix = radial->base.base.matrix;
366 }
367
368 /* Max instruction count: 10 */
369 static void
370 i915_shader_radial_coord (i915_device_t *device,
371                           enum i915_shader_radial_mode mode,
372                           int in, int g0, int g1, int out)
373 {
374     switch (mode) {
375     case RADIAL_ONE:
376         /*
377            pdx = (x - c1x) / dr, pdy = (y - c1y) / dr;
378            r² = pdx*pdx + pdy*pdy
379            t = r²/sqrt(r²) - r1/dr;
380            */
381         i915_fs_mad (FS_U0, MASK_X | MASK_Y,
382                      i915_fs_operand (in, X, Y, ZERO, ZERO),
383                      i915_fs_operand (g0, Z, Z, ZERO, ZERO),
384                      i915_fs_operand (g0, NEG_X, NEG_Y, ZERO, ZERO));
385         i915_fs_dp2add (FS_U0, MASK_X,
386                         i915_fs_operand (FS_U0, X, Y, ZERO, ZERO),
387                         i915_fs_operand (FS_U0, X, Y, ZERO, ZERO),
388                         i915_fs_operand_zero ());
389         i915_fs_rsq (out, MASK_X, i915_fs_operand (FS_U0, X, X, X, X));
390         i915_fs_mad (out, MASK_X,
391                      i915_fs_operand (FS_U0, X, ZERO, ZERO, ZERO),
392                      i915_fs_operand (out, X, ZERO, ZERO, ZERO),
393                      i915_fs_operand (g0, W, ZERO, ZERO, ZERO));
394         break;
395
396     case RADIAL_TWO:
397         /*
398            pdx = x - c1x, pdy = y - c1y;
399            A = dx² + dy² - dr²
400            B = -2*(pdx*dx + pdy*dy + r1*dr);
401            C = pdx² + pdy² - r1²;
402            det = B*B - 4*A*C;
403            t = (-B + sqrt (det)) / (2 * A)
404            */
405
406         /* u0.x = pdx, u0.y = pdy, u[0].z = r1; */
407         i915_fs_add (FS_U0,
408                      i915_fs_operand (in, X, Y, ZERO, ZERO),
409                      i915_fs_operand (g0, X, Y, Z, ZERO));
410         /* u0.x = pdx, u0.y = pdy, u[0].z = r1, u[0].w = B; */
411         i915_fs_dp3 (FS_U0, MASK_W,
412                      i915_fs_operand (FS_U0, X, Y, ONE, ZERO),
413                      i915_fs_operand (g1, X, Y, Z, ZERO));
414         /* u1.x = pdx² + pdy² - r1²; [C] */
415         i915_fs_dp3 (FS_U1, MASK_X,
416                      i915_fs_operand (FS_U0, X, Y, Z, ZERO),
417                      i915_fs_operand (FS_U0, X, Y, NEG_Z, ZERO));
418         /* u1.x = C, u1.y = B, u1.z=-4*A; */
419         i915_fs_mov_masked (FS_U1, MASK_Y, i915_fs_operand (FS_U0, W, W, W, W));
420         i915_fs_mov_masked (FS_U1, MASK_Z, i915_fs_operand (g0, W, W, W, W));
421         /* u1.x = B² - 4*A*C */
422         i915_fs_dp2add (FS_U1, MASK_X,
423                         i915_fs_operand (FS_U1, X, Y, ZERO, ZERO),
424                         i915_fs_operand (FS_U1, Z, Y, ZERO, ZERO),
425                         i915_fs_operand_zero ());
426         /* out.x = -B + sqrt (B² - 4*A*C),
427          * out.y = -B - sqrt (B² - 4*A*C),
428          */
429         i915_fs_rsq (out, MASK_X, i915_fs_operand (FS_U1, X, X, X, X));
430         i915_fs_mad (out, MASK_X | MASK_Y,
431                      i915_fs_operand (out, X, X, ZERO, ZERO),
432                      i915_fs_operand (FS_U1, X, NEG_X, ZERO, ZERO),
433                      i915_fs_operand (FS_U0, NEG_W, NEG_W, ZERO, ZERO));
434         /* out.x = (-B + sqrt (B² - 4*A*C)) / (2 * A),
435          * out.y = (-B - sqrt (B² - 4*A*C)) / (2 * A)
436          */
437         i915_fs_mul (out,
438                      i915_fs_operand (out, X, Y, ZERO, ZERO),
439                      i915_fs_operand (g1, W, W, ZERO, ZERO));
440         /* if (A > 0)
441          *   out = (-B + sqrt (B² - 4*A*C)) / (2 * A),
442          * else
443          *   out = (-B - sqrt (B² - 4*A*C)) / (2 * A)
444          */
445         i915_fs_cmp (out,
446                      i915_fs_operand (g1, W, ZERO, ZERO, ZERO),
447                      i915_fs_operand (out, X, ZERO, ZERO, ZERO),
448                      i915_fs_operand (out, Y, ZERO, ZERO, ZERO));
449         break;
450     }
451 }
452
453 /* Max instruction count: 7 */
454 static inline void
455 i915_shader_yuv_color (i915_device_t *device,
456                        int y, int u, int v,
457                        int c0, int c1, int c2,
458                        int out)
459 {
460     i915_fs_mov_masked (FS_U0, MASK_X, i915_fs_operand_reg (y));
461     i915_fs_mov_masked (FS_U0, MASK_Y, i915_fs_operand_reg (u));
462     i915_fs_mov_masked (FS_U0, MASK_Z, i915_fs_operand_reg (v));
463
464     i915_fs_add (FS_U0,
465                  i915_fs_operand_reg (FS_U0),
466                  i915_fs_operand_reg (c0));
467     i915_fs_dp3 (out, MASK_X,
468                  i915_fs_operand_reg (FS_U0),
469                  i915_fs_operand (c1, X, ZERO, Y, ZERO));
470     i915_fs_dp3 (out, MASK_Z,
471                  i915_fs_operand_reg (FS_U0),
472                  i915_fs_operand (c1, Z, W, ZERO, ZERO));
473     i915_fs_dp3 (out, MASK_Y,
474                  i915_fs_operand_reg (FS_U0),
475                  i915_fs_operand_reg (c2));
476 }
477
478 static inline uint32_t
479 i915_shader_channel_key (const union i915_shader_channel *channel)
480 {
481     return (channel->type.fragment & 0x0f) | (channel->base.mode << FS_DETAILS_SHIFT);
482 }
483
484 static uint32_t
485 i915_shader_channel_get_num_tex_coords (const union i915_shader_channel *channel)
486 {
487     switch (channel->type.fragment) {
488     default:
489     case FS_ZERO:
490     case FS_ONE:
491     case FS_CONSTANT:
492     case FS_PURE:
493     case FS_DIFFUSE:
494         return 0;
495
496     case FS_LINEAR:
497     case FS_RADIAL:
498     case FS_TEXTURE:
499     case FS_SPANS:
500     case FS_YUV:
501         return 1;
502     }
503 }
504
505 static uint32_t
506 i915_shader_get_num_tex_coords (const i915_shader_t *shader)
507 {
508     uint32_t num_tex_coords;
509
510     num_tex_coords = 0;
511
512     num_tex_coords += i915_shader_channel_get_num_tex_coords (&shader->source);
513     num_tex_coords += i915_shader_channel_get_num_tex_coords (&shader->mask);
514     num_tex_coords += i915_shader_channel_get_num_tex_coords (&shader->clip);
515     num_tex_coords += i915_shader_channel_get_num_tex_coords (&shader->dst);
516
517     return num_tex_coords;
518 }
519
520 #define i915_fs_operand_impure(reg, channel, pure) \
521     (reg | \
522      (((pure & (1 << 0)) ? channel##_CHANNEL_VAL : ZERO_CHANNEL_VAL) << X_CHANNEL_SHIFT) | \
523      (((pure & (1 << 1)) ? channel##_CHANNEL_VAL : ZERO_CHANNEL_VAL) << Y_CHANNEL_SHIFT) | \
524      (((pure & (1 << 2)) ? channel##_CHANNEL_VAL : ZERO_CHANNEL_VAL) << Z_CHANNEL_SHIFT) | \
525      (((pure & (1 << 3)) ? channel##_CHANNEL_VAL : ZERO_CHANNEL_VAL) << W_CHANNEL_SHIFT))
526
527 #define i915_fs_operand_pure(pure) \
528     (FS_R0 | \
529      (((pure & (1 << 0)) ? ONE_CHANNEL_VAL : ZERO_CHANNEL_VAL) << X_CHANNEL_SHIFT) | \
530      (((pure & (1 << 1)) ? ONE_CHANNEL_VAL : ZERO_CHANNEL_VAL) << Y_CHANNEL_SHIFT) | \
531      (((pure & (1 << 2)) ? ONE_CHANNEL_VAL : ZERO_CHANNEL_VAL) << Z_CHANNEL_SHIFT) | \
532      (((pure & (1 << 3)) ? ONE_CHANNEL_VAL : ZERO_CHANNEL_VAL) << W_CHANNEL_SHIFT))
533
534 static void
535 i915_set_shader_program (i915_device_t *device,
536                          const i915_shader_t *shader)
537 {
538     uint32_t num_tex_coords;
539     uint32_t num_samplers;
540     uint32_t n;
541     uint32_t texture_offset = 0;
542     uint32_t constant_offset = 0;
543     uint32_t sampler_offset = 0;
544     uint32_t source_reg;
545     uint32_t source_pure;
546     uint32_t mask_reg;
547     uint32_t out_reg;
548     uint32_t dest_reg;
549     FS_LOCALS;
550
551     n = (i915_shader_channel_key (&shader->source) <<  0) |
552         (i915_shader_channel_key (&shader->mask)   <<  8) |
553         (i915_shader_channel_key (&shader->clip)   << 16) |
554         (shader->op << 24) |
555         ((shader->opacity < 1.) << 30) |
556         (((shader->content & CAIRO_CONTENT_ALPHA) == CAIRO_CONTENT_ALPHA) << 31);
557     if (n == device->current_program)
558         return;
559     device->current_program = n;
560
561     FS_BEGIN ();
562
563     if (shader->source.type.fragment == FS_ZERO) {
564         if (shader->clip.type.fragment == FS_TEXTURE) {
565             /* XXX need_combine */
566             assert (shader->mask.type.fragment == (i915_fragment_shader_t) -1);
567             i915_fs_dcl (FS_T0);
568             i915_fs_texld (FS_U0, FS_S0, FS_T0);
569             if ((shader->content & CAIRO_CONTENT_COLOR) == 0)
570                 i915_fs_mov (FS_OC, i915_fs_operand (FS_U0, W, W, W, W));
571             else
572                 i915_fs_mov (FS_OC, i915_fs_operand (FS_U0, ZERO, ZERO, ZERO, W));
573         } else {
574             i915_fs_mov (FS_OC, i915_fs_operand_zero ());
575         }
576
577         FS_END ();
578         return;
579     }
580
581     num_tex_coords = i915_shader_get_num_tex_coords (shader);
582     for (n = 0; n < num_tex_coords; n++)
583         i915_fs_dcl (FS_T0 + n);
584
585     num_samplers =
586         shader->source.base.n_samplers +
587         shader->mask.base.n_samplers +
588         shader->clip.base.n_samplers +
589         shader->dst.base.n_samplers;
590     for (n = 0; n < num_samplers; n++)
591         i915_fs_dcl (FS_S0 + n);
592
593     source_reg = ~0;
594     source_pure = 0;
595     out_reg = FS_R0;
596     if (! shader->need_combine &&
597         shader->mask.type.fragment == (i915_fragment_shader_t) -1 &&
598         shader->clip.type.fragment != FS_TEXTURE &&
599         shader->content != CAIRO_CONTENT_ALPHA)
600     {
601         out_reg = FS_OC;
602     }
603
604     switch (shader->source.type.fragment) {
605     default:
606     case FS_ZERO:
607     case FS_SPANS:
608         ASSERT_NOT_REACHED;
609
610     case FS_PURE:
611         source_pure = shader->source.solid.pure;
612     case FS_ONE:
613         break;
614
615     case FS_CONSTANT:
616         source_reg = FS_C0;
617         constant_offset += 1;
618         break;
619
620     case FS_DIFFUSE:
621         i915_fs_dcl (FS_T8);
622         source_reg = FS_T8;
623         break;
624
625     case FS_LINEAR:
626         i915_shader_linear_color (device, shader->source.base.mode,
627                                   FS_T0, /* input */
628                                   FS_C0, FS_C1, /* colour ramp */
629                                   FS_U3); /* unpremultiplied output */
630         /* XXX can we defer premultiplication? */
631         i915_fs_mul (out_reg,
632                      i915_fs_operand_reg (FS_U3),
633                      i915_fs_operand (FS_U3, W, W, W, ONE));
634
635         constant_offset += 2;
636         texture_offset += 1;
637         source_reg = out_reg;
638         break;
639
640     case FS_RADIAL:
641         i915_shader_radial_coord (device, shader->source.base.mode,
642                                   FS_T0, /* input */
643                                   FS_C0, FS_C1, /* gradient constants */
644                                   FS_R0); /* coordinate */
645
646         i915_fs_texld (out_reg, FS_S0, FS_R0);
647         constant_offset += 2;
648         texture_offset += 1;
649         sampler_offset += 1;
650         source_reg = out_reg;
651         break;
652
653     case FS_TEXTURE:
654         i915_fs_texld (out_reg, FS_S0, FS_T0);
655         texture_offset += 1;
656         sampler_offset += 1;
657         source_reg = out_reg;
658         break;
659
660     case FS_YUV:
661         /* Load samplers to temporaries. */
662         i915_fs_texld (FS_R0, FS_S0, FS_T0);
663         i915_fs_texld (FS_R1, FS_S1, FS_T0);
664         i915_fs_texld (FS_R2, FS_S2, FS_T0);
665
666         i915_shader_yuv_color (device,
667                                FS_R0, FS_R1, FS_R2, /* y, u, v */
668                                FS_C0, FS_C1, FS_C2, /* coefficients */
669                                out_reg);
670
671         constant_offset += 3;
672         texture_offset += 1;
673         sampler_offset += 3;
674         source_reg = out_reg;
675         break;
676     }
677
678     mask_reg = ~0;
679     switch (shader->mask.type.fragment) {
680     case FS_PURE:
681     case FS_ZERO:
682     case FS_YUV:
683     case FS_DIFFUSE:
684         ASSERT_NOT_REACHED;
685     case FS_ONE:
686     default:
687         break;
688
689     case FS_SPANS:
690         mask_reg = FS_T0 + texture_offset;
691         texture_offset += 1;
692         break;
693
694     case FS_CONSTANT:
695         mask_reg = FS_C0 + constant_offset;
696         constant_offset += 1;
697         break;
698
699     case FS_LINEAR:
700         i915_shader_linear_color (device, shader->mask.base.mode,
701                                   FS_T0 + texture_offset, /* input */
702                                   FS_C0 + constant_offset,
703                                   FS_C0 + constant_offset + 1, /* colour ramp */
704                                   FS_R1); /* unpremultiplied output */
705         constant_offset += 2;
706         texture_offset += 1;
707         mask_reg = FS_R1;
708         break;
709
710     case FS_RADIAL:
711         i915_shader_radial_coord (device, shader->mask.base.mode,
712                                   FS_T0 + texture_offset, /* input */
713                                   FS_C0 + constant_offset,
714                                   FS_C0 + constant_offset + 1, /* gradient constants */
715                                   FS_R1); /* coordinate */
716
717         i915_fs_texld (FS_R1, FS_S0 + sampler_offset, FS_R1);
718         constant_offset += 2;
719         texture_offset += 1;
720         sampler_offset += 1;
721         mask_reg = FS_R1;
722         break;
723
724     case FS_TEXTURE:
725         i915_fs_texld (FS_R1, FS_S0 + sampler_offset, FS_T0 + texture_offset);
726         texture_offset += 1;
727         sampler_offset += 1;
728         mask_reg = FS_R1;
729         break;
730     }
731
732     if (mask_reg != ~0U) {
733         if (! shader->need_combine &&
734             shader->clip.type.fragment != FS_TEXTURE &&
735             (shader->content != CAIRO_CONTENT_ALPHA || source_reg == ~0U))
736         {
737             out_reg = FS_OC;
738         }
739         if (source_reg == ~0U) {
740             if (source_pure) {
741                 if (shader->mask.type.fragment == FS_SPANS) {
742                     if (out_reg == FS_OC && shader->content == CAIRO_CONTENT_ALPHA) {
743                         if (source_pure & (1 << 3))
744                             i915_fs_mov (out_reg, i915_fs_operand (mask_reg, X, X, X, X));
745                         else
746                             i915_fs_mov (out_reg, i915_fs_operand_zero ());
747                     } else {
748                         i915_fs_mov (out_reg,
749                                      i915_fs_operand_impure (mask_reg, X, source_pure));
750                     }
751                 } else {
752                     /* XXX ComponentAlpha
753                        i915_fs_mov (out_reg,
754                        i915_fs_operand_pure (mask_reg,
755                        shader->source.solid.pure));
756                        */
757                     if (out_reg == FS_OC && shader->content == CAIRO_CONTENT_ALPHA) {
758                         if (source_pure & (1 << 3))
759                             i915_fs_mov (out_reg, i915_fs_operand (mask_reg, W, W, W, W));
760                         else
761                             i915_fs_mov (out_reg, i915_fs_operand_zero ());
762                     } else {
763                         i915_fs_mov (out_reg,
764                                      i915_fs_operand_impure (mask_reg, W, source_pure));
765                     }
766                 }
767                 source_reg = out_reg;
768             } else if (shader->mask.type.fragment == FS_SPANS) {
769                 i915_fs_mov (out_reg,
770                              i915_fs_operand (mask_reg, X, X, X, X));
771                 source_reg = out_reg;
772             } else {
773                 source_reg = mask_reg;
774             }
775         } else {
776             if (shader->mask.type.fragment == FS_SPANS) {
777                     if (out_reg == FS_OC && shader->content == CAIRO_CONTENT_ALPHA) {
778                         i915_fs_mul (out_reg,
779                                      i915_fs_operand (source_reg, W, W, W, W),
780                                      i915_fs_operand (mask_reg, X, X, X, X));
781                     } else {
782                         i915_fs_mul (out_reg,
783                                      i915_fs_operand_reg (source_reg),
784                                      i915_fs_operand (mask_reg, X, X, X, X));
785                     }
786             } else {
787                 /* XXX ComponentAlpha
788                 i915_fs_mul (FS_R0,
789                              i915_fs_operand_reg (source_reg),
790                              i915_fs_operand_reg (mask_reg));
791                  */
792                 if (out_reg == FS_OC && shader->content == CAIRO_CONTENT_ALPHA) {
793                     i915_fs_mul (out_reg,
794                                  i915_fs_operand (source_reg, W, W, W, W),
795                                  i915_fs_operand (mask_reg, W, W, W, W));
796                 } else {
797                     i915_fs_mul (out_reg,
798                                  i915_fs_operand_reg (source_reg),
799                                  i915_fs_operand (mask_reg, W, W, W, W));
800                 }
801             }
802
803             source_reg = out_reg;
804         }
805     }
806
807     if (shader->opacity < 1.) {
808         i915_fs_mul (source_reg,
809                      i915_fs_operand_reg (source_reg),
810                      i915_fs_operand_reg (FS_C0 + constant_offset));
811         constant_offset++;
812     }
813
814     /* need to preserve order of src, mask, clip, dst */
815     mask_reg = ~0;
816     if (shader->clip.type.fragment == FS_TEXTURE) {
817         i915_fs_texld (FS_R1, FS_S0 + sampler_offset, FS_T0 + texture_offset);
818         texture_offset += 1;
819         sampler_offset += 1;
820         mask_reg = FS_R1;
821     }
822
823     if (shader->need_combine) {
824         assert (shader->dst.type.fragment == FS_TEXTURE);
825
826         i915_fs_texld (FS_R2, FS_S0 + sampler_offset, FS_T0 + texture_offset);
827         texture_offset += 1;
828         sampler_offset += 1;
829         dest_reg = FS_R2;
830
831         switch (shader->op) {
832         case CAIRO_OPERATOR_CLEAR:
833         case CAIRO_OPERATOR_SOURCE:
834             ASSERT_NOT_REACHED;
835
836         case CAIRO_OPERATOR_OVER:
837             if (source_reg == ~0U) {
838                 /* XXX shader->source.type.fragment == FS_PURE */
839                 dest_reg = FS_OC;
840             } else {
841                 i915_fs_add (FS_U0,
842                              i915_fs_operand (source_reg, NEG_W, NEG_W, NEG_W, NEG_W),
843                              i915_fs_operand_one ());
844                 i915_fs_mul (FS_U0,
845                              i915_fs_operand_reg (FS_U0),
846                              dest_reg);
847                 i915_fs_add (FS_R3,
848                              i915_fs_operand_reg (source_reg),
849                              i915_fs_operand_reg (FS_U0));
850                 source_reg = FS_R3;
851             }
852             break;
853
854         case CAIRO_OPERATOR_IN:
855             if (source_reg == ~0U) {
856                 /* XXX shader->source.type.fragment == FS_PURE */
857                 source_reg = dest_reg;
858             } else {
859                 i915_fs_mul (FS_R3,
860                              i915_fs_operand_reg (source_reg),
861                              dest_reg);
862                 source_reg = FS_R3;
863             }
864             break;
865
866         case CAIRO_OPERATOR_OUT:
867             if (source_reg == ~0U) {
868                 /* XXX shader->source.type.fragment == FS_PURE */
869                 i915_fs_mov (FS_R3, i915_fs_operand_zero ());
870                 source_reg = FS_R3;
871             } else {
872                 i915_fs_add (FS_U0,
873                              i915_fs_operand (source_reg, NEG_W, NEG_W, NEG_W, NEG_W),
874                              i915_fs_operand_one ());
875                 i915_fs_mul (FS_R3,
876                              i915_fs_operand_reg (FS_U0),
877                              dest_reg);
878                 source_reg = FS_R3;
879             }
880             break;
881
882         case CAIRO_OPERATOR_ATOP:
883
884         case CAIRO_OPERATOR_DEST:
885         case CAIRO_OPERATOR_DEST_OVER:
886         case CAIRO_OPERATOR_DEST_IN:
887         case CAIRO_OPERATOR_DEST_OUT:
888         case CAIRO_OPERATOR_DEST_ATOP:
889
890         case CAIRO_OPERATOR_XOR:
891         case CAIRO_OPERATOR_ADD:
892         case CAIRO_OPERATOR_SATURATE:
893
894         case CAIRO_OPERATOR_MULTIPLY:
895         case CAIRO_OPERATOR_SCREEN:
896         case CAIRO_OPERATOR_OVERLAY:
897         case CAIRO_OPERATOR_DARKEN:
898         case CAIRO_OPERATOR_LIGHTEN:
899         case CAIRO_OPERATOR_COLOR_DODGE:
900         case CAIRO_OPERATOR_COLOR_BURN:
901         case CAIRO_OPERATOR_HARD_LIGHT:
902         case CAIRO_OPERATOR_SOFT_LIGHT:
903         case CAIRO_OPERATOR_DIFFERENCE:
904         case CAIRO_OPERATOR_EXCLUSION:
905         case CAIRO_OPERATOR_HSL_HUE:
906         case CAIRO_OPERATOR_HSL_SATURATION:
907         case CAIRO_OPERATOR_HSL_COLOR:
908         case CAIRO_OPERATOR_HSL_LUMINOSITY:
909             ASSERT_NOT_REACHED;
910             break;
911         }
912     }
913
914     if (shader->clip.type.fragment == FS_TEXTURE) {
915         assert (mask_reg != ~0U);
916
917         if (! shader->need_combine) {
918             /* (source IN clip) */
919             if (source_reg == ~0U) {
920                 if (source_pure == 0) {
921                     source_reg = mask_reg;
922                 } else {
923                     out_reg = FS_OC;
924                     if ((shader->content & CAIRO_CONTENT_COLOR) == 0) {
925                         if (source_pure & (1 << 3))
926                             i915_fs_mov (out_reg, i915_fs_operand (mask_reg, W, W, W, W));
927                         else
928                             i915_fs_mov (out_reg, i915_fs_operand_zero ());
929                     } else {
930                         i915_fs_mov (out_reg,
931                                      i915_fs_operand_impure (mask_reg, W, source_pure));
932                     }
933                     source_reg = out_reg;
934                 }
935             } else if (mask_reg) {
936                 out_reg = FS_OC;
937                 if ((shader->content & CAIRO_CONTENT_COLOR) == 0) {
938                     i915_fs_mul (out_reg,
939                                  i915_fs_operand (source_reg, W, W, W, W),
940                                  i915_fs_operand (mask_reg, W, W, W, W));
941                 } else {
942                     i915_fs_mul (out_reg,
943                                  i915_fs_operand_reg (source_reg),
944                                  i915_fs_operand (mask_reg, W, W, W, W));
945                 }
946
947                 source_reg = out_reg;
948             }
949         } else {
950             /* (source OP dest) LERP_clip dest */
951             if (source_reg == ~0U) {
952                 if (source_pure == 0) {
953                     i915_fs_mov (FS_R3,
954                                  i915_fs_operand (mask_reg, W, W, W, W));
955                 } else {
956                     i915_fs_mov (FS_R3,
957                                  i915_fs_operand_impure (mask_reg, W, source_pure));
958                 }
959             } else {
960                 i915_fs_mul (FS_R3,
961                              i915_fs_operand_reg (source_reg),
962                              i915_fs_operand (mask_reg, W, W, W, W));
963             }
964
965             i915_fs_add (mask_reg,
966                          i915_fs_operand_one (),
967                          i915_fs_operand (mask_reg, NEG_W, NEG_W, NEG_W, NEG_W));
968
969             if (dest_reg != FS_OC) {
970                 if (dest_reg == ~0U) {
971                     assert (shader->dst.type.fragment == FS_TEXTURE);
972
973                     i915_fs_texld (FS_R2, FS_S0 + sampler_offset, FS_T0 + texture_offset);
974                     texture_offset += 1;
975                     sampler_offset += 1;
976                     dest_reg = FS_R2;
977                 }
978
979                 i915_fs_mul (FS_U1,
980                              i915_fs_operand_reg (dest_reg),
981                              i915_fs_operand_reg (mask_reg));
982                 mask_reg = FS_U1;
983             }
984
985             source_reg = FS_OC;
986             if ((shader->content & CAIRO_CONTENT_COLOR) == 0) {
987                 i915_fs_add (source_reg,
988                              i915_fs_operand (FS_R3, W, W, W, W),
989                              i915_fs_operand (mask_reg, W, W, W, W));
990             } else {
991                 i915_fs_add (source_reg,
992                              i915_fs_operand_reg (FS_R3),
993                              i915_fs_operand_reg (mask_reg));
994             }
995         }
996     }
997
998     if (source_reg != FS_OC) {
999         if (source_reg == ~0U) {
1000             if (source_pure) {
1001                 if ((shader->content & CAIRO_CONTENT_COLOR) == 0) {
1002                     if (source_pure & (1 << 3))
1003                         i915_fs_mov (FS_OC, i915_fs_operand_one ());
1004                     else
1005                         i915_fs_mov (FS_OC, i915_fs_operand_zero ());
1006                 } else
1007                     i915_fs_mov (FS_OC, i915_fs_operand_pure (source_pure));
1008             } else {
1009                 i915_fs_mov (FS_OC, i915_fs_operand_one ());
1010             }
1011         } else if ((shader->content & CAIRO_CONTENT_COLOR) == 0) {
1012             i915_fs_mov (FS_OC, i915_fs_operand (source_reg, W, W, W, W));
1013         } else {
1014             i915_fs_mov (FS_OC, i915_fs_operand_reg (source_reg));
1015         }
1016     }
1017
1018     FS_END ();
1019 }
1020
1021 static cairo_bool_t
1022 i915_shader_linear_init (struct i915_shader_linear *l,
1023                          const cairo_linear_pattern_t *linear)
1024 {
1025     double x0, y0, sf;
1026     double dx, dy, offset;
1027
1028     dx = linear->pd2.x - linear->pd1.x;
1029     dy = linear->pd2.y - linear->pd1.y;
1030     sf = dx * dx + dy * dy;
1031     if (sf <= 1e-5)
1032         return FALSE;
1033
1034     dx /= sf;
1035     dy /= sf;
1036
1037     x0 = linear->pd1.x;
1038     y0 = linear->pd1.y;
1039     offset = dx*x0 + dy*y0;
1040
1041     if (_cairo_matrix_is_identity (&linear->base.base.matrix)) {
1042         l->dx = dx;
1043         l->dy = dy;
1044         l->offset = -offset;
1045     } else {
1046         cairo_matrix_t m;
1047
1048         cairo_matrix_init (&m, dx, 0, dy, 0, -offset, 0);
1049         cairo_matrix_multiply (&m, &linear->base.base.matrix, &m);
1050         l->dx = m.xx;
1051         l->dy = m.xy;
1052         l->offset = m.x0;
1053     }
1054
1055     return TRUE;
1056 }
1057
1058 static cairo_bool_t
1059 i915_shader_linear_contains_rectangle (struct i915_shader_linear *l,
1060                                        const cairo_rectangle_int_t *extents)
1061 {
1062     double v;
1063
1064     v = i915_shader_linear_texcoord (l,
1065                                      extents->x,
1066                                      extents->y);
1067     if (v < 0.)
1068         return FALSE;
1069     if (v > 1.)
1070         return FALSE;
1071
1072     v = i915_shader_linear_texcoord (l,
1073                                      extents->x + extents->width,
1074                                      extents->y);
1075     if (v < 0.)
1076         return FALSE;
1077     if (v > 1.)
1078         return FALSE;
1079
1080     v = i915_shader_linear_texcoord (l,
1081                                      extents->x,
1082                                      extents->y + extents->height);
1083     if (v < 0.)
1084         return FALSE;
1085     if (v > 1.)
1086         return FALSE;
1087
1088     v = i915_shader_linear_texcoord (l,
1089                                      extents->x + extents->width,
1090                                      extents->y + extents->height);
1091     if (v < 0.)
1092         return FALSE;
1093     if (v > 1.)
1094         return FALSE;
1095
1096     return TRUE;
1097 }
1098
1099 #define is_pure(C,mask) (((mask) == 0) || (C) <= 0x00ff || (C) >= 0xff00)
1100 #define is_one(C,mask) (((mask) != 0) && (C) >= 0xff00)
1101 #define is_zero(C,mask) (((mask) != 0) && (C) <= 0x00ff)
1102
1103 static cairo_status_t
1104 i915_shader_acquire_solid (i915_shader_t *shader,
1105                            union i915_shader_channel *src,
1106                            const cairo_solid_pattern_t *solid,
1107                            const cairo_rectangle_int_t *extents)
1108 {
1109     cairo_content_t content;
1110
1111     content = CAIRO_CONTENT_COLOR_ALPHA;
1112     src->solid.color = solid->color;
1113     if (content == 0 || solid->color.alpha_short <= 0x00ff)
1114     {
1115         src->base.content = CAIRO_CONTENT_ALPHA;
1116         src->type.fragment = FS_ZERO;
1117     }
1118     else if ((((content & CAIRO_CONTENT_COLOR) == 0)  ||
1119               (solid->color.red_short >= 0xff00 &&
1120                solid->color.green_short >= 0xff00 &&
1121                solid->color.blue_short >= 0xff00)) &&
1122              ((content & CAIRO_CONTENT_ALPHA) == 0 ||
1123               solid->color.alpha_short >= 0xff00))
1124     {
1125         src->base.content = CAIRO_CONTENT_ALPHA;
1126         src->type.fragment = FS_ONE;
1127     }
1128     else if (is_pure (solid->color.red_short, content & CAIRO_CONTENT_COLOR) &&
1129              is_pure (solid->color.green_short, content & CAIRO_CONTENT_COLOR) &&
1130              is_pure (solid->color.blue_short, content & CAIRO_CONTENT_COLOR) &&
1131              is_pure (solid->color.alpha_short, content & CAIRO_CONTENT_ALPHA))
1132     {
1133         src->solid.pure = 0;
1134         src->solid.pure |= is_one (solid->color.red_short,   content & CAIRO_CONTENT_COLOR) << 0;
1135         src->solid.pure |= is_one (solid->color.green_short, content & CAIRO_CONTENT_COLOR) << 1;
1136         src->solid.pure |= is_one (solid->color.blue_short,  content & CAIRO_CONTENT_COLOR) << 2;
1137         src->solid.pure |= (! is_zero (solid->color.alpha_short, content & CAIRO_CONTENT_ALPHA)) << 3;
1138
1139         if (src->solid.pure == 0) {
1140             src->base.content = CAIRO_CONTENT_ALPHA;
1141             src->type.fragment = FS_ZERO;
1142         } else if (src->solid.pure == 0x7) {
1143             src->base.content = CAIRO_CONTENT_ALPHA;
1144             src->type.fragment = FS_ONE;
1145         } else {
1146             src->base.content = content;
1147             src->type.fragment = FS_PURE;
1148             src->base.mode = src->solid.pure;
1149         }
1150     }
1151     else
1152     {
1153         src->base.content = content;
1154         src->type.fragment = src == &shader->source ? FS_DIFFUSE : FS_CONSTANT;
1155     }
1156     src->type.vertex = src->type.fragment == FS_ZERO ? VS_ZERO : VS_CONSTANT;
1157     src->type.pattern = PATTERN_CONSTANT;
1158
1159     return CAIRO_STATUS_SUCCESS;
1160 }
1161
1162 static cairo_status_t
1163 i915_shader_acquire_linear (i915_shader_t *shader,
1164                             union i915_shader_channel *src,
1165                             const cairo_linear_pattern_t *linear,
1166                             const cairo_rectangle_int_t *extents)
1167 {
1168     cairo_bool_t mode = LINEAR_TEXTURE;
1169     cairo_status_t status;
1170
1171     if (i915_shader_linear_init (&src->linear, linear) &&
1172         linear->base.n_stops == 2 &&
1173         linear->base.stops[0].offset == 0.0 &&
1174         linear->base.stops[1].offset == 1.0)
1175     {
1176         if (i915_shader_linear_contains_rectangle (&src->linear,
1177                                                    extents))
1178         {
1179             /* XXX can also lerp if contained within offset range */
1180             mode = LINEAR_NONE;
1181         }
1182         else switch (linear->base.base.extend) {
1183         case CAIRO_EXTEND_REPEAT:
1184             mode = LINEAR_REPEAT;
1185             break;
1186         case CAIRO_EXTEND_PAD:
1187             mode = LINEAR_PAD;
1188             break;
1189         case CAIRO_EXTEND_NONE:
1190             break;
1191         case CAIRO_EXTEND_REFLECT:
1192             break;
1193         default:
1194             ASSERT_NOT_REACHED;
1195             break;
1196         }
1197     }
1198
1199     src->type.vertex = VS_LINEAR;
1200     src->type.pattern = PATTERN_LINEAR;
1201     src->base.texfmt = TEXCOORDFMT_1D;
1202     src->base.content = CAIRO_CONTENT_COLOR_ALPHA;
1203     src->base.mode = mode;
1204     if (mode == LINEAR_TEXTURE) {
1205         intel_buffer_t buffer;
1206
1207         status = intel_gradient_render ((intel_device_t *) shader->target->intel.drm.base.device,
1208                                         &linear->base, &buffer);
1209         if (unlikely (status))
1210             return status;
1211
1212         src->type.fragment = FS_TEXTURE;
1213         src->base.bo = intel_bo_reference (buffer.bo);
1214         src->base.n_samplers = 1;
1215         src->base.offset[0] = buffer.offset;
1216         src->base.map[0] = buffer.map0;
1217         src->base.map[1] = buffer.map1;
1218         src->base.sampler[0] =
1219             (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) |
1220             i915_texture_filter (CAIRO_FILTER_BILINEAR);
1221         src->base.sampler[1] =
1222             SS3_NORMALIZED_COORDS |
1223             i915_texture_extend (linear->base.base.extend);
1224     } else {
1225         src->type.fragment = FS_LINEAR;
1226         src->linear.color0.red   = linear->base.stops[0].color.red;
1227         src->linear.color0.green = linear->base.stops[0].color.green;
1228         src->linear.color0.blue  = linear->base.stops[0].color.blue;
1229         src->linear.color0.alpha = linear->base.stops[0].color.alpha;
1230
1231         src->linear.color1.red   = linear->base.stops[1].color.red;
1232         src->linear.color1.green = linear->base.stops[1].color.green;
1233         src->linear.color1.blue  = linear->base.stops[1].color.blue;
1234         src->linear.color1.alpha = linear->base.stops[1].color.alpha;
1235     }
1236
1237     return CAIRO_STATUS_SUCCESS;
1238 }
1239
1240 static cairo_status_t
1241 i915_shader_acquire_radial (i915_shader_t *shader,
1242                             union i915_shader_channel *src,
1243                             const cairo_radial_pattern_t *radial,
1244                             const cairo_rectangle_int_t *extents)
1245 {
1246     intel_buffer_t buffer;
1247     cairo_status_t status;
1248
1249     status = intel_gradient_render ((intel_device_t *) shader->target->intel.drm.base.device,
1250                                     &radial->base, &buffer);
1251     if (unlikely (status))
1252         return status;
1253
1254     i915_shader_radial_init (&src->radial, radial);
1255
1256     src->type.vertex = VS_TEXTURE;
1257     src->type.fragment = FS_RADIAL;
1258     src->type.pattern = PATTERN_RADIAL;
1259     src->base.texfmt = TEXCOORDFMT_2D;
1260
1261     src->base.content = CAIRO_CONTENT_COLOR_ALPHA;
1262     src->base.bo = intel_bo_reference (buffer.bo);
1263     src->base.n_samplers = 1;
1264     src->base.offset[0] = buffer.offset;
1265     src->base.map[0] = buffer.map0;
1266     src->base.map[1] = buffer.map1;
1267     src->base.sampler[0] =
1268         (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) |
1269         i915_texture_filter (CAIRO_FILTER_BILINEAR);
1270     src->base.sampler[1] =
1271         SS3_NORMALIZED_COORDS |
1272         i915_texture_extend (radial->base.base.extend);
1273
1274     return CAIRO_STATUS_SUCCESS;
1275 }
1276
1277 static cairo_status_t
1278 i915_surface_clone (i915_device_t *device,
1279                     cairo_image_surface_t *image,
1280                     i915_surface_t **clone_out)
1281 {
1282     i915_surface_t *clone;
1283     cairo_status_t status;
1284
1285 #if 0
1286     clone =
1287         i915_surface_create_from_cacheable_image_internal (device, image);
1288     if (unlikely (clone->intel.drm.base.status))
1289         return clone->intel.drm.base.status;
1290 #else
1291     cairo_format_t format;
1292
1293     format = image->format;
1294     if (format == CAIRO_FORMAT_A1)
1295         format = CAIRO_FORMAT_A8;
1296
1297     clone = (i915_surface_t *)
1298         i915_surface_create_internal (&device->intel.base,
1299                                       format,
1300                                       image->width,
1301                                       image->height,
1302                                       I915_TILING_DEFAULT,
1303                                       FALSE);
1304     if (unlikely (clone->intel.drm.base.status))
1305         return clone->intel.drm.base.status;
1306
1307     status = intel_bo_put_image (&device->intel,
1308                                  to_intel_bo (clone->intel.drm.bo),
1309                                  image,
1310                                  0, 0,
1311                                  image->width, image->height,
1312                                  0, 0);
1313
1314     if (unlikely (status))
1315         return status;
1316 #endif
1317
1318     *clone_out = clone;
1319     return CAIRO_STATUS_SUCCESS;
1320 }
1321
1322 static cairo_status_t
1323 i915_surface_clone_subimage (i915_device_t *device,
1324                              cairo_image_surface_t *image,
1325                              const cairo_rectangle_int_t *extents,
1326                              i915_surface_t **clone_out)
1327 {
1328     i915_surface_t *clone;
1329     cairo_status_t status;
1330     cairo_format_t format;
1331
1332     format = image->format;
1333     if (format == CAIRO_FORMAT_A1)
1334         format = CAIRO_FORMAT_A8;
1335
1336     clone = (i915_surface_t *)
1337         i915_surface_create_internal (&device->intel.base,
1338                                       format,
1339                                       extents->width,
1340                                       extents->height,
1341                                       I915_TILING_NONE,
1342                                       FALSE);
1343     if (unlikely (clone->intel.drm.base.status))
1344         return clone->intel.drm.base.status;
1345
1346     status = intel_bo_put_image (&device->intel,
1347                                  to_intel_bo (clone->intel.drm.bo),
1348                                  image,
1349                                  extents->x, extents->y,
1350                                  extents->width, extents->height,
1351                                  0, 0);
1352
1353     if (unlikely (status))
1354         return status;
1355
1356     *clone_out = clone;
1357     return CAIRO_STATUS_SUCCESS;
1358 }
1359
1360 static cairo_status_t
1361 i915_surface_render_pattern (i915_device_t *device,
1362                              const cairo_surface_pattern_t *pattern,
1363                              const cairo_rectangle_int_t *extents,
1364                              i915_surface_t **clone_out)
1365 {
1366     i915_surface_t *clone;
1367     cairo_surface_t *image;
1368     cairo_status_t status;
1369     void *ptr;
1370
1371     clone = (i915_surface_t *)
1372         i915_surface_create_internal (&device->intel.base,
1373                                       _cairo_format_from_content (pattern->surface->content),
1374                                       extents->width,
1375                                       extents->height,
1376                                       I915_TILING_NONE,
1377                                       FALSE);
1378     if (unlikely (clone->intel.drm.base.status))
1379         return clone->intel.drm.base.status;
1380
1381     ptr = intel_bo_map (&device->intel,
1382                         to_intel_bo (clone->intel.drm.bo));
1383     if (unlikely (ptr == NULL)) {
1384         cairo_surface_destroy (&clone->intel.drm.base);
1385         return _cairo_error (CAIRO_STATUS_NO_MEMORY);
1386     }
1387
1388     image = cairo_image_surface_create_for_data (ptr,
1389                                                  clone->intel.drm.format,
1390                                                  clone->intel.drm.width,
1391                                                  clone->intel.drm.height,
1392                                                  clone->intel.drm.stride);
1393     if (unlikely (image->status)) {
1394         cairo_surface_destroy (&clone->intel.drm.base);
1395         return image->status;
1396     }
1397
1398     status = _cairo_surface_offset_paint (image,
1399                                           extents->x, extents->y,
1400                                           CAIRO_OPERATOR_SOURCE,
1401                                           &pattern->base,
1402                                           NULL);
1403     cairo_surface_destroy (image);
1404
1405     if (unlikely (status)) {
1406         cairo_surface_destroy (&clone->intel.drm.base);
1407         return status;
1408     }
1409
1410     *clone_out = clone;
1411     return CAIRO_STATUS_SUCCESS;
1412 }
1413
1414 static cairo_status_t
1415 i915_shader_acquire_solid_surface (i915_shader_t *shader,
1416                                    union i915_shader_channel *src,
1417                                    cairo_surface_t *surface,
1418                                    const cairo_rectangle_int_t *extents)
1419 {
1420     cairo_surface_pattern_t pattern;
1421     cairo_surface_t *pixel;
1422     cairo_image_surface_t *image;
1423     void *image_extra;
1424     cairo_status_t status;
1425     uint32_t argb;
1426
1427     status = _cairo_surface_acquire_source_image (surface, &image, &image_extra);
1428     if (unlikely (status))
1429         return status;
1430
1431     /* extract the pixel as argb32 */
1432     pixel = cairo_image_surface_create (CAIRO_FORMAT_ARGB32, 1, 1);
1433     _cairo_pattern_init_for_surface (&pattern, &image->base);
1434     cairo_matrix_init_translate (&pattern.base.matrix, extents->x, extents->y);
1435     pattern.base.filter = CAIRO_FILTER_NEAREST;
1436     status = _cairo_surface_paint (pixel, CAIRO_OPERATOR_SOURCE, &pattern.base, NULL);
1437     _cairo_pattern_fini (&pattern.base);
1438
1439     _cairo_surface_release_source_image (surface, image, image_extra);
1440
1441     if (unlikely (status)) {
1442         cairo_surface_destroy (pixel);
1443         return status;
1444     }
1445
1446     image = (cairo_image_surface_t *) pixel;
1447     argb = *(uint32_t *) image->data;
1448     cairo_surface_destroy (pixel);
1449
1450     if (argb >> 24 == 0) {
1451         _cairo_color_init_rgba (&src->solid.color, 0, 0, 0, 0);
1452     } else {
1453         uint8_t alpha = argb >> 24;
1454
1455         _cairo_color_init_rgba (&src->solid.color,
1456                                 ((((argb >> 16) & 0xff) * 255 + alpha / 2) / alpha) / 255.,
1457                                 ((((argb >>  8) & 0xff) * 255 + alpha / 2) / alpha) / 255.,
1458                                 ((((argb >>  0) & 0xff) * 255 + alpha / 2) / alpha) / 255.,
1459                                 alpha / 255.);
1460     }
1461
1462     src->base.content = CAIRO_CONTENT_COLOR_ALPHA;
1463     src->type.fragment = FS_CONSTANT;
1464     src->type.vertex = VS_CONSTANT;
1465     src->type.pattern = PATTERN_CONSTANT;
1466
1467     return CAIRO_STATUS_SUCCESS;
1468 }
1469
1470 static cairo_filter_t
1471 sampled_area (const cairo_surface_pattern_t *pattern,
1472               const cairo_rectangle_int_t *extents,
1473               cairo_rectangle_int_t *sample)
1474 {
1475     cairo_rectangle_int_t surface_extents;
1476     cairo_filter_t filter;
1477     double x1, x2, y1, y2;
1478     double pad;
1479
1480     x1 = extents->x;
1481     y1 = extents->y;
1482     x2 = extents->x + (int) extents->width;
1483     y2 = extents->y + (int) extents->height;
1484
1485     if (_cairo_matrix_is_translation (&pattern->base.matrix)) {
1486         x1 += pattern->base.matrix.x0; x2 += pattern->base.matrix.x0;
1487         y1 += pattern->base.matrix.y0; y2 += pattern->base.matrix.y0;
1488     } else {
1489         _cairo_matrix_transform_bounding_box (&pattern->base.matrix,
1490                                               &x1, &y1, &x2, &y2,
1491                                               NULL);
1492     }
1493
1494     filter = _cairo_pattern_analyze_filter (&pattern->base, &pad);
1495     sample->x = floor (x1 - pad);
1496     sample->y = floor (y1 - pad);
1497     sample->width  = ceil (x2 + pad) - sample->x;
1498     sample->height = ceil (y2 + pad) - sample->y;
1499
1500     if (_cairo_surface_get_extents (pattern->surface, &surface_extents))
1501         _cairo_rectangle_intersect (sample, &surface_extents);
1502
1503     return filter;
1504 }
1505
1506 static cairo_status_t
1507 i915_shader_acquire_surface (i915_shader_t *shader,
1508                              union i915_shader_channel *src,
1509                              const cairo_surface_pattern_t *pattern,
1510                              const cairo_rectangle_int_t *extents)
1511 {
1512     int surface_width, surface_height;
1513     cairo_surface_t *surface, *drm;
1514     cairo_extend_t extend;
1515     cairo_filter_t filter;
1516     cairo_matrix_t m;
1517     int src_x = 0, src_y = 0;
1518     cairo_surface_t *free_me = NULL;
1519     cairo_status_t status;
1520     cairo_rectangle_int_t sample;
1521
1522     assert (src->type.fragment == (i915_fragment_shader_t) -1);
1523     drm = surface = pattern->surface;
1524
1525     extend = pattern->base.extend;
1526     src->base.matrix = pattern->base.matrix;
1527     filter = sampled_area (pattern, extents, &sample);
1528
1529     if (surface->type == CAIRO_SURFACE_TYPE_DRM) {
1530         if (surface->backend->type == CAIRO_SURFACE_TYPE_SUBSURFACE) {
1531             drm = ((cairo_surface_subsurface_t *) surface)->target;
1532         } else if (surface->backend->type == CAIRO_INTERNAL_SURFACE_TYPE_SNAPSHOT) {
1533             drm = ((cairo_surface_snapshot_t *) surface)->target;
1534         }
1535     }
1536
1537     if (drm->type == CAIRO_SURFACE_TYPE_DRM) {
1538         i915_surface_t *s = (i915_surface_t *) drm;
1539
1540         if (surface->backend->type == CAIRO_SURFACE_TYPE_SUBSURFACE) {
1541             if (s->intel.drm.base.device == shader->target->intel.drm.base.device &&
1542                 s != shader->target)
1543             {
1544                 cairo_surface_subsurface_t *sub = (cairo_surface_subsurface_t *) surface;
1545                 int x;
1546
1547                 status = i915_surface_fallback_flush (s);
1548                 if (unlikely (status))
1549                     return status;
1550
1551                 /* XXX blt subimage and cache snapshot */
1552
1553                 if (to_intel_bo (s->intel.drm.bo)->batch_write_domain) {
1554                     /* XXX pipelined flush of RENDER/TEXTURE cache */
1555                 }
1556
1557                 src->type.fragment = FS_TEXTURE;
1558                 src->surface.pixel = NONE;
1559                 surface_width  = sub->extents.width;
1560                 surface_height = sub->extents.height;
1561
1562                 src->base.bo = intel_bo_reference (to_intel_bo (s->intel.drm.bo));
1563                 src->base.n_samplers = 1;
1564
1565                 x = sub->extents.x;
1566                 if (s->intel.drm.format != CAIRO_FORMAT_A8)
1567                     x *= 4;
1568
1569                 /* XXX tiling restrictions upon offset? */
1570                 src->base.offset[0] = s->offset + sub->extents.y * s->intel.drm.stride + x;
1571                 src->base.map[0] = s->map0;
1572                 src->base.map[0] &= ~((2047 << MS3_HEIGHT_SHIFT) | (2047 << MS3_WIDTH_SHIFT));
1573                 src->base.map[0] |=
1574                     ((sub->extents.height - 1) << MS3_HEIGHT_SHIFT) |
1575                     ((sub->extents.width - 1)  << MS3_WIDTH_SHIFT);
1576                 src->base.map[1] = (s->intel.drm.stride / 4 - 1) << MS4_PITCH_SHIFT;
1577             }
1578         } else {
1579             /* XXX if s == shader->dst allow if FILTER_NEAREST, EXTEND_NONE? */
1580             if (s->intel.drm.base.device == shader->target->intel.drm.base.device) {
1581                 status = i915_surface_fallback_flush (s);
1582                 if (unlikely (status))
1583                     return status;
1584
1585                 if (s == shader->target || i915_surface_needs_tiling (s)) {
1586                     status = i915_surface_copy_subimage (i915_device (shader->target),
1587                                                          s, &sample, TRUE, &s);
1588                     if (unlikely (status))
1589                         return status;
1590
1591                     free_me = drm = &s->intel.drm.base;
1592                 }
1593
1594                 src->type.fragment = FS_TEXTURE;
1595                 src->surface.pixel = NONE;
1596
1597                 surface_width  = s->intel.drm.width;
1598                 surface_height = s->intel.drm.height;
1599
1600                 src->base.bo = intel_bo_reference (to_intel_bo (s->intel.drm.bo));
1601                 src->base.n_samplers = 1;
1602                 src->base.offset[0] = s->offset;
1603                 src->base.map[0] = s->map0;
1604                 src->base.map[1] = s->map1;
1605             }
1606         }
1607     }
1608
1609     if (src->type.fragment == (i915_fragment_shader_t) -1) {
1610         i915_surface_t *s;
1611
1612         if (extents->width == 1 && extents->height == 1) {
1613             return i915_shader_acquire_solid_surface (shader, src,
1614                                                       surface, extents);
1615         }
1616
1617         s = (i915_surface_t *)
1618             _cairo_surface_has_snapshot (surface,
1619                                          shader->target->intel.drm.base.backend);
1620         if (s == NULL) {
1621             cairo_status_t status;
1622
1623 #if 0
1624             /* XXX hackity hack hack */
1625             status = i915_clone_yuv (surface, src,
1626                                      image->width, image->height,
1627                                      clone_out);
1628 #endif
1629
1630             if (sample.width > 2048 || sample.height > 2048) {
1631                 status = i915_surface_render_pattern (i915_device (shader->target),
1632                                                       pattern, extents,
1633                                                       &s);
1634                 if (unlikely (status))
1635                     return status;
1636
1637                 extend = CAIRO_EXTEND_NONE;
1638                 filter = CAIRO_FILTER_NEAREST;
1639                 cairo_matrix_init_translate (&src->base.matrix,
1640                                              -extents->x, -extents->y);
1641             } else {
1642                 cairo_image_surface_t *image;
1643                 void *image_extra;
1644
1645                 status = _cairo_surface_acquire_source_image (surface, &image, &image_extra);
1646                 if (unlikely (status))
1647                     return status;
1648
1649                 if (image->width  < 2048 &&
1650                     image->height < 2048 &&
1651                     sample.width  >= image->width / 4 &&
1652                     sample.height >= image->height /4)
1653                 {
1654
1655                     status = i915_surface_clone (i915_device (shader->target),
1656                                                  image, &s);
1657
1658                     if (likely (status == CAIRO_STATUS_SUCCESS)) {
1659                         _cairo_surface_attach_snapshot (surface,
1660                                                         &s->intel.drm.base,
1661                                                         intel_surface_detach_snapshot);
1662
1663                         status = intel_snapshot_cache_insert (&i915_device (shader->target)->intel,
1664                                                               &s->intel);
1665                         if (unlikely (status)) {
1666                             cairo_surface_finish (&s->intel.drm.base);
1667                             cairo_surface_destroy (&s->intel.drm.base);
1668                         }
1669                     }
1670                 }
1671                 else
1672                 {
1673                     status = i915_surface_clone_subimage (i915_device (shader->target),
1674                                                           image, &sample, &s);
1675                     src_x = -extents->x;
1676                     src_y = -extents->y;
1677                 }
1678
1679                 _cairo_surface_release_source_image (surface, image, image_extra);
1680                 if (unlikely (status))
1681                     return status;
1682             }
1683
1684             free_me = &s->intel.drm.base;
1685         }
1686
1687         src->type.fragment = FS_TEXTURE;
1688         src->surface.pixel = NONE;
1689
1690         src->base.bo = intel_bo_reference (to_intel_bo (s->intel.drm.bo));
1691         src->base.n_samplers = 1;
1692         src->base.offset[0] = s->offset;
1693         src->base.map[0] = s->map0;
1694         src->base.map[1] = s->map1;
1695
1696         drm = &s->intel.drm.base;
1697
1698         surface_width  = s->intel.drm.width;
1699         surface_height = s->intel.drm.height;
1700     }
1701
1702     /* XXX transform nx1 or 1xn surfaces to 1D */
1703
1704     src->type.pattern = PATTERN_TEXTURE;
1705     if (extend != CAIRO_EXTEND_NONE &&
1706         sample.x >= 0 && sample.y >= 0 &&
1707         sample.x + sample.width  <= surface_width &&
1708         sample.y + sample.height <= surface_height)
1709     {
1710         extend = CAIRO_EXTEND_NONE;
1711     }
1712     if (extend == CAIRO_EXTEND_NONE) {
1713         src->type.vertex = VS_TEXTURE_16;
1714         src->base.texfmt = TEXCOORDFMT_2D_16;
1715     } else {
1716         src->type.vertex = VS_TEXTURE;
1717         src->base.texfmt = TEXCOORDFMT_2D;
1718     }
1719     src->base.content = drm->content;
1720
1721     src->base.sampler[0] =
1722         (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) |
1723         i915_texture_filter (filter);
1724     src->base.sampler[1] =
1725         SS3_NORMALIZED_COORDS |
1726         i915_texture_extend (extend);
1727
1728     /* tweak the src matrix to map from dst to texture coordinates */
1729     if (src_x | src_y)
1730         cairo_matrix_translate (&src->base.matrix, src_x, src_x);
1731     cairo_matrix_init_scale (&m, 1. / surface_width, 1. / surface_height);
1732     cairo_matrix_multiply (&src->base.matrix, &src->base.matrix, &m);
1733
1734     if (free_me != NULL)
1735         cairo_surface_destroy (free_me);
1736
1737     return CAIRO_STATUS_SUCCESS;
1738 }
1739
1740 cairo_status_t
1741 i915_shader_acquire_pattern (i915_shader_t *shader,
1742                              union i915_shader_channel *src,
1743                              const cairo_pattern_t *pattern,
1744                              const cairo_rectangle_int_t *extents)
1745 {
1746     switch (pattern->type) {
1747     case CAIRO_PATTERN_TYPE_SOLID:
1748         return i915_shader_acquire_solid (shader, src,
1749                                           (cairo_solid_pattern_t *) pattern,
1750                                           extents);
1751
1752     case CAIRO_PATTERN_TYPE_LINEAR:
1753         return i915_shader_acquire_linear (shader, src,
1754                                            (cairo_linear_pattern_t *) pattern,
1755                                            extents);
1756
1757     case CAIRO_PATTERN_TYPE_RADIAL:
1758         return i915_shader_acquire_radial (shader, src,
1759                                            (cairo_radial_pattern_t *) pattern,
1760                                            extents);
1761
1762     case CAIRO_PATTERN_TYPE_SURFACE:
1763         return i915_shader_acquire_surface (shader, src,
1764                                             (cairo_surface_pattern_t *) pattern,
1765                                             extents);
1766
1767     default:
1768         ASSERT_NOT_REACHED;
1769         return CAIRO_STATUS_SUCCESS;
1770     }
1771 }
1772
1773 static uint32_t
1774 i915_get_blend (cairo_operator_t op,
1775                 i915_surface_t *dst)
1776 {
1777 #define SBLEND(X) ((BLENDFACT_##X) << S6_CBUF_SRC_BLEND_FACT_SHIFT)
1778 #define DBLEND(X) ((BLENDFACT_##X) << S6_CBUF_DST_BLEND_FACT_SHIFT)
1779     static const struct blendinfo {
1780         cairo_bool_t dst_alpha;
1781         uint32_t src_blend;
1782         uint32_t dst_blend;
1783         enum {
1784             BOUNDED,
1785             SIMPLE,
1786             XRENDER,
1787         } kind;
1788     } i915_blend_op[] = {
1789         {0, SBLEND (ZERO),          DBLEND (ZERO), BOUNDED}, /* Clear */
1790         {0, SBLEND (ONE),           DBLEND (ZERO), BOUNDED}, /* Src */
1791
1792         {0, SBLEND (ONE),           DBLEND (INV_SRC_ALPHA), SIMPLE}, /* Over */
1793         {1, SBLEND (DST_ALPHA),     DBLEND (ZERO), XRENDER}, /* In */
1794         {1, SBLEND (INV_DST_ALPHA), DBLEND (ZERO), XRENDER}, /* Out */
1795         {1, SBLEND (DST_ALPHA),     DBLEND (INV_SRC_ALPHA), SIMPLE}, /* Atop */
1796
1797         {0, SBLEND (ZERO),          DBLEND (ONE), SIMPLE}, /* Dst */
1798         {1, SBLEND (INV_DST_ALPHA), DBLEND (ONE), SIMPLE}, /* OverReverse */
1799         {0, SBLEND (ZERO),          DBLEND (SRC_ALPHA), XRENDER}, /* InReverse */
1800         {0, SBLEND (ZERO),          DBLEND (INV_SRC_ALPHA), SIMPLE}, /* OutReverse */
1801         {1, SBLEND (INV_DST_ALPHA), DBLEND (SRC_ALPHA), XRENDER}, /* AtopReverse */
1802
1803         {1, SBLEND (INV_DST_ALPHA), DBLEND (INV_SRC_ALPHA), SIMPLE}, /* Xor */
1804         {0, SBLEND (ONE),           DBLEND (ONE), SIMPLE}, /* Add */
1805         //{0, 0, SBLEND (SRC_ALPHA_SATURATE),       DBLEND (ONE), SIMPLE}, /* XXX Saturate */
1806     };
1807     uint32_t sblend, dblend;
1808
1809     if (op >= ARRAY_LENGTH (i915_blend_op))
1810         return 0;
1811
1812     if (i915_blend_op[op].kind == BOUNDED)
1813         return 0;
1814
1815     sblend = i915_blend_op[op].src_blend;
1816     dblend = i915_blend_op[op].dst_blend;
1817
1818     /* If there's no dst alpha channel, adjust the blend op so that we'll treat
1819      * it as always 1.
1820      */
1821     if ((dst->intel.drm.base.content & CAIRO_CONTENT_ALPHA) == 0 &&
1822         i915_blend_op[op].dst_alpha)
1823     {
1824         if (sblend == SBLEND (DST_ALPHA))
1825             sblend = SBLEND (ONE);
1826         else if (sblend == SBLEND (INV_DST_ALPHA))
1827             sblend = SBLEND (ZERO);
1828     }
1829
1830     /* i915 engine reads 8bit color buffer into green channel in cases
1831        like color buffer blending etc., and also writes back green channel.
1832        So with dst_alpha blend we should use color factor. See spec on
1833        "8-bit rendering" */
1834     if (dst->intel.drm.format == CAIRO_FORMAT_A8 && i915_blend_op[op].dst_alpha) {
1835         if (sblend == SBLEND (DST_ALPHA))
1836             sblend = SBLEND (DST_COLR);
1837         else if (sblend == SBLEND (INV_DST_ALPHA))
1838             sblend = SBLEND (INV_DST_COLR);
1839     }
1840
1841     return sblend | dblend;
1842 #undef SBLEND
1843 #undef DBLEND
1844 }
1845
1846 static void
1847 i915_shader_channel_init (union i915_shader_channel *channel)
1848 {
1849     channel->type.vertex = (i915_vertex_shader_t) -1;
1850     channel->type.fragment = (i915_fragment_shader_t) -1;
1851     channel->type.pattern = (i915_shader_channel_t) -1;
1852     channel->base.texfmt = TEXCOORDFMT_NOT_PRESENT;
1853     channel->base.bo = NULL;
1854     channel->base.n_samplers = 0;
1855     channel->base.mode = 0;
1856 }
1857
1858 static void
1859 i915_shader_channel_fini (i915_device_t *device,
1860                            union i915_shader_channel *channel)
1861 {
1862     switch (channel->type.pattern) {
1863     case PATTERN_TEXTURE:
1864     case PATTERN_BASE:
1865     case PATTERN_LINEAR:
1866     case PATTERN_RADIAL:
1867         if (channel->base.bo != NULL)
1868             intel_bo_destroy (&device->intel, channel->base.bo);
1869         break;
1870
1871     default:
1872     case PATTERN_CONSTANT:
1873         break;
1874     }
1875 }
1876
1877 static void
1878 i915_shader_channel_reset (i915_device_t *device,
1879                            union i915_shader_channel *channel)
1880 {
1881     i915_shader_channel_fini (device, channel);
1882     i915_shader_channel_init (channel);
1883 }
1884
1885 void
1886 i915_shader_init (i915_shader_t *shader,
1887                   i915_surface_t *dst,
1888                   cairo_operator_t op,
1889                   double opacity)
1890 {
1891     shader->committed = FALSE;
1892     shader->device = i915_device (dst);
1893     shader->target = dst;
1894     shader->op = op;
1895     shader->opacity = opacity;
1896
1897     shader->blend = i915_get_blend (op, dst);
1898     shader->need_combine = FALSE;
1899
1900     shader->content = dst->intel.drm.base.content;
1901
1902     i915_shader_channel_init (&shader->source);
1903     i915_shader_channel_init (&shader->mask);
1904     i915_shader_channel_init (&shader->clip);
1905     i915_shader_channel_init (&shader->dst);
1906 }
1907
1908 static void
1909 i915_set_shader_samplers (i915_device_t *device,
1910                           const i915_shader_t *shader)
1911 {
1912     uint32_t n_samplers, n_maps, n;
1913     uint32_t samplers[2*4];
1914     uint32_t maps[4*4];
1915     uint32_t mask, s, m;
1916
1917     n_maps =
1918         shader->source.base.n_samplers +
1919         shader->mask.base.n_samplers +
1920         shader->clip.base.n_samplers +
1921         shader->dst.base.n_samplers;
1922     assert (n_maps <= 4);
1923
1924     if (n_maps == 0)
1925         return;
1926
1927     n_samplers =
1928         !! shader->source.base.bo +
1929         !! shader->mask.base.bo +
1930         !! shader->clip.base.bo +
1931         !! shader->dst.base.bo;
1932
1933     mask  = (1 << n_maps) - 1;
1934
1935     /* We check for repeated setting of sample state mainly to catch
1936      * continuation of text strings across multiple show-glyphs.
1937      */
1938     s = m = 0;
1939     if (shader->source.base.bo != NULL) {
1940         samplers[s++] = shader->source.base.sampler[0];
1941         samplers[s++] = shader->source.base.sampler[1];
1942         maps[m++] = shader->source.base.bo->base.handle;
1943         for (n = 0; n < shader->source.base.n_samplers; n++) {
1944             maps[m++] = shader->source.base.offset[n];
1945             maps[m++] = shader->source.base.map[2*n+0];
1946             maps[m++] = shader->source.base.map[2*n+1];
1947         }
1948     }
1949     if (shader->mask.base.bo != NULL) {
1950         samplers[s++] = shader->mask.base.sampler[0];
1951         samplers[s++] = shader->mask.base.sampler[1];
1952         maps[m++] = shader->mask.base.bo->base.handle;
1953         for (n = 0; n < shader->mask.base.n_samplers; n++) {
1954             maps[m++] = shader->mask.base.offset[n];
1955             maps[m++] = shader->mask.base.map[2*n+0];
1956             maps[m++] = shader->mask.base.map[2*n+1];
1957         }
1958     }
1959     if (shader->clip.base.bo != NULL) {
1960         samplers[s++] = shader->clip.base.sampler[0];
1961         samplers[s++] = shader->clip.base.sampler[1];
1962         maps[m++] = shader->clip.base.bo->base.handle;
1963         for (n = 0; n < shader->clip.base.n_samplers; n++) {
1964             maps[m++] = shader->clip.base.offset[n];
1965             maps[m++] = shader->clip.base.map[2*n+0];
1966             maps[m++] = shader->clip.base.map[2*n+1];
1967         }
1968     }
1969     if (shader->dst.base.bo != NULL) {
1970         samplers[s++] = shader->dst.base.sampler[0];
1971         samplers[s++] = shader->dst.base.sampler[1];
1972         maps[m++] = shader->dst.base.bo->base.handle;
1973         for (n = 0; n < shader->dst.base.n_samplers; n++) {
1974             maps[m++] = shader->dst.base.offset[n];
1975             maps[m++] = shader->dst.base.map[2*n+0];
1976             maps[m++] = shader->dst.base.map[2*n+1];
1977         }
1978     }
1979
1980     if (n_maps > device->current_n_maps ||
1981         memcmp (device->current_maps,
1982                 maps,
1983                 m * sizeof (uint32_t)))
1984     {
1985         memcpy (device->current_maps, maps, m * sizeof (uint32_t));
1986         device->current_n_maps = n_maps;
1987
1988         if (device->current_source != NULL)
1989             *device->current_source = 0;
1990         if (device->current_mask != NULL)
1991             *device->current_mask = 0;
1992         if (device->current_clip != NULL)
1993             *device->current_clip = 0;
1994
1995 #if 0
1996         if (shader->source.type.pattern == PATTERN_TEXTURE) {
1997             switch ((int) shader->source.surface.surface->type) {
1998             case CAIRO_SURFACE_TYPE_DRM:
1999                 {
2000                     i915_surface_t *surface =
2001                         (i915_surface_t *) shader->source.surface.surface;
2002                     device->current_source = &surface->is_current_texture;
2003                     surface->is_current_texture |= CURRENT_SOURCE;
2004                     break;
2005                 }
2006
2007             case I915_PACKED_PIXEL_SURFACE_TYPE:
2008                 {
2009                     i915_packed_pixel_surface_t *surface =
2010                         (i915_packed_pixel_surface_t *) shader->source.surface.surface;
2011                     device->current_source = &surface->is_current_texture;
2012                     surface->is_current_texture |= CURRENT_SOURCE;
2013                     break;
2014                 }
2015
2016             default:
2017                 device->current_source = NULL;
2018                 break;
2019             }
2020         } else
2021             device->current_source = NULL;
2022
2023         if (shader->mask.type.pattern == PATTERN_TEXTURE) {
2024             switch ((int) shader->mask.surface.surface->type) {
2025             case CAIRO_SURFACE_TYPE_DRM:
2026                 {
2027                     i915_surface_t *surface =
2028                         (i915_surface_t *) shader->mask.surface.surface;
2029                     device->current_mask = &surface->is_current_texture;
2030                     surface->is_current_texture |= CURRENT_MASK;
2031                     break;
2032                 }
2033
2034             case I915_PACKED_PIXEL_SURFACE_TYPE:
2035                 {
2036                     i915_packed_pixel_surface_t *surface =
2037                         (i915_packed_pixel_surface_t *) shader->mask.surface.surface;
2038                     device->current_mask = &surface->is_current_texture;
2039                     surface->is_current_texture |= CURRENT_MASK;
2040                     break;
2041                 }
2042
2043             default:
2044                 device->current_mask = NULL;
2045                 break;
2046             }
2047         } else
2048             device->current_mask = NULL;
2049 #endif
2050
2051         OUT_DWORD (_3DSTATE_MAP_STATE | (3 * n_maps));
2052         OUT_DWORD (mask);
2053         for (n = 0; n < shader->source.base.n_samplers; n++) {
2054             i915_batch_emit_reloc (device, shader->source.base.bo,
2055                                    shader->source.base.offset[n],
2056                                    I915_GEM_DOMAIN_SAMPLER, 0,
2057                                    FALSE);
2058             OUT_DWORD (shader->source.base.map[2*n+0]);
2059             OUT_DWORD (shader->source.base.map[2*n+1]);
2060         }
2061         for (n = 0; n < shader->mask.base.n_samplers; n++) {
2062             i915_batch_emit_reloc (device, shader->mask.base.bo,
2063                                    shader->mask.base.offset[n],
2064                                    I915_GEM_DOMAIN_SAMPLER, 0,
2065                                    FALSE);
2066             OUT_DWORD (shader->mask.base.map[2*n+0]);
2067             OUT_DWORD (shader->mask.base.map[2*n+1]);
2068         }
2069         for (n = 0; n < shader->clip.base.n_samplers; n++) {
2070             i915_batch_emit_reloc (device, shader->clip.base.bo,
2071                                    shader->clip.base.offset[n],
2072                                    I915_GEM_DOMAIN_SAMPLER, 0,
2073                                    FALSE);
2074             OUT_DWORD (shader->clip.base.map[2*n+0]);
2075             OUT_DWORD (shader->clip.base.map[2*n+1]);
2076         }
2077         for (n = 0; n < shader->dst.base.n_samplers; n++) {
2078             i915_batch_emit_reloc (device, shader->dst.base.bo,
2079                                    shader->dst.base.offset[n],
2080                                    I915_GEM_DOMAIN_SAMPLER, 0,
2081                                    FALSE);
2082             OUT_DWORD (shader->dst.base.map[2*n+0]);
2083             OUT_DWORD (shader->dst.base.map[2*n+1]);
2084         }
2085     }
2086
2087     if (n_samplers > device->current_n_samplers ||
2088         memcmp (device->current_samplers,
2089                 samplers,
2090                 s * sizeof (uint32_t)))
2091     {
2092         device->current_n_samplers = s;
2093         memcpy (device->current_samplers, samplers, s * sizeof (uint32_t));
2094
2095         OUT_DWORD (_3DSTATE_SAMPLER_STATE | (3 * n_maps));
2096         OUT_DWORD (mask);
2097         s = 0;
2098         for (n = 0; n < shader->source.base.n_samplers; n++) {
2099             OUT_DWORD (shader->source.base.sampler[0]);
2100             OUT_DWORD (shader->source.base.sampler[1] |
2101                        (s << SS3_TEXTUREMAP_INDEX_SHIFT));
2102             OUT_DWORD (0x0);
2103             s++;
2104         }
2105         for (n = 0; n < shader->mask.base.n_samplers; n++) {
2106             OUT_DWORD (shader->mask.base.sampler[0]);
2107             OUT_DWORD (shader->mask.base.sampler[1] |
2108                        (s << SS3_TEXTUREMAP_INDEX_SHIFT));
2109             OUT_DWORD (0x0);
2110             s++;
2111         }
2112         for (n = 0; n < shader->clip.base.n_samplers; n++) {
2113             OUT_DWORD (shader->clip.base.sampler[0]);
2114             OUT_DWORD (shader->clip.base.sampler[1] |
2115                        (s << SS3_TEXTUREMAP_INDEX_SHIFT));
2116             OUT_DWORD (0x0);
2117             s++;
2118         }
2119         for (n = 0; n < shader->dst.base.n_samplers; n++) {
2120             OUT_DWORD (shader->dst.base.sampler[0]);
2121             OUT_DWORD (shader->dst.base.sampler[1] |
2122                        (s << SS3_TEXTUREMAP_INDEX_SHIFT));
2123             OUT_DWORD (0x0);
2124             s++;
2125         }
2126     }
2127 }
2128
2129 static uint32_t
2130 i915_shader_get_texcoords (const i915_shader_t *shader)
2131 {
2132     uint32_t texcoords;
2133     uint32_t tu;
2134
2135     texcoords = S2_TEXCOORD_NONE;
2136     tu = 0;
2137     if (shader->source.base.texfmt != TEXCOORDFMT_NOT_PRESENT) {
2138         texcoords &= ~S2_TEXCOORD_FMT (tu, S2_TEXCOORD_FMT0_MASK);
2139         texcoords |= S2_TEXCOORD_FMT (tu, shader->source.base.texfmt);
2140         tu++;
2141     }
2142     if (shader->mask.base.texfmt != TEXCOORDFMT_NOT_PRESENT) {
2143         texcoords &= ~S2_TEXCOORD_FMT (tu, S2_TEXCOORD_FMT0_MASK);
2144         texcoords |= S2_TEXCOORD_FMT (tu, shader->mask.base.texfmt);
2145         tu++;
2146     }
2147     if (shader->clip.base.texfmt != TEXCOORDFMT_NOT_PRESENT) {
2148         texcoords &= ~S2_TEXCOORD_FMT (tu, S2_TEXCOORD_FMT0_MASK);
2149         texcoords |= S2_TEXCOORD_FMT (tu, shader->clip.base.texfmt);
2150         tu++;
2151     }
2152     if (shader->dst.base.texfmt != TEXCOORDFMT_NOT_PRESENT) {
2153         texcoords &= ~S2_TEXCOORD_FMT (tu, S2_TEXCOORD_FMT0_MASK);
2154         texcoords |= S2_TEXCOORD_FMT (tu, shader->dst.base.texfmt);
2155         tu++;
2156     }
2157
2158     return texcoords;
2159 }
2160
2161 static void
2162 i915_set_shader_mode (i915_device_t *device,
2163                       const i915_shader_t *shader)
2164 {
2165     uint32_t texcoords;
2166     uint32_t mask, cnt;
2167
2168     texcoords = i915_shader_get_texcoords (shader);
2169
2170     mask = cnt = 0;
2171
2172     if (device->current_texcoords != texcoords)
2173         mask |= I1_LOAD_S (2), cnt++;
2174
2175     if (device->current_blend != shader->blend)
2176         mask |= I1_LOAD_S (6), cnt++;
2177
2178     if (cnt == 0)
2179         return;
2180
2181     OUT_DWORD (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | mask | (cnt-1));
2182
2183     if (device->current_texcoords != texcoords) {
2184         OUT_DWORD (texcoords);
2185         device->current_texcoords = texcoords;
2186     }
2187
2188     if (device->current_blend != shader->blend) {
2189         if (shader->blend) {
2190             OUT_DWORD (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
2191                        (BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT) |
2192                        shader->blend);
2193         } else {
2194             OUT_DWORD (S6_COLOR_WRITE_ENABLE);
2195         }
2196
2197         device->current_blend = shader->blend;
2198     }
2199 }
2200
2201 static void
2202 i915_set_constants (i915_device_t *device,
2203                     const uint32_t *constants,
2204                     uint32_t n_constants)
2205 {
2206     uint32_t n;
2207
2208     OUT_DWORD (_3DSTATE_PIXEL_SHADER_CONSTANTS | n_constants);
2209     OUT_DWORD ((1 << (n_constants >> 2)) - 1);
2210
2211     for (n = 0; n < n_constants; n++)
2212         OUT_DWORD (constants[n]);
2213
2214     device->current_n_constants = n_constants;
2215     memcpy (device->current_constants, constants, n_constants*4);
2216 }
2217
2218 static uint32_t
2219 pack_constants (const union i915_shader_channel *channel,
2220                 uint32_t *constants)
2221 {
2222     uint32_t count = 0, n;
2223
2224     switch (channel->type.fragment) {
2225     case FS_ZERO:
2226     case FS_ONE:
2227     case FS_PURE:
2228     case FS_DIFFUSE:
2229         break;
2230
2231     case FS_CONSTANT:
2232         constants[count++] = pack_float (channel->solid.color.red);
2233         constants[count++] = pack_float (channel->solid.color.green);
2234         constants[count++] = pack_float (channel->solid.color.blue);
2235         constants[count++] = pack_float (channel->solid.color.alpha);
2236         break;
2237
2238     case FS_LINEAR:
2239         constants[count++] = pack_float (channel->linear.color0.red);
2240         constants[count++] = pack_float (channel->linear.color0.green);
2241         constants[count++] = pack_float (channel->linear.color0.blue);
2242         constants[count++] = pack_float (channel->linear.color0.alpha);
2243
2244         constants[count++] = pack_float (channel->linear.color1.red);
2245         constants[count++] = pack_float (channel->linear.color1.green);
2246         constants[count++] = pack_float (channel->linear.color1.blue);
2247         constants[count++] = pack_float (channel->linear.color1.alpha);
2248         break;
2249
2250     case FS_RADIAL:
2251         for (n = 0; n < ARRAY_LENGTH (channel->radial.constants); n++)
2252             constants[count++] = pack_float (channel->radial.constants[n]);
2253         break;
2254
2255     case FS_TEXTURE:
2256     case FS_YUV:
2257     case FS_SPANS:
2258         break;
2259     }
2260
2261     return count;
2262 }
2263
2264 static void
2265 i915_set_shader_constants (i915_device_t *device,
2266                            const i915_shader_t *shader)
2267 {
2268     uint32_t constants[4*4*3+4];
2269     unsigned n_constants;
2270
2271     n_constants = 0;
2272     if (shader->source.type.fragment == FS_DIFFUSE) {
2273         uint32_t diffuse;
2274
2275         diffuse =
2276             ((shader->source.solid.color.alpha_short >> 8) << 24) |
2277             ((shader->source.solid.color.red_short   >> 8) << 16) |
2278             ((shader->source.solid.color.green_short >> 8) << 8) |
2279             ((shader->source.solid.color.blue_short  >> 8) << 0);
2280
2281         if (diffuse != device->current_diffuse) {
2282             OUT_DWORD (_3DSTATE_DFLT_DIFFUSE_CMD);
2283             OUT_DWORD (diffuse);
2284             device->current_diffuse = diffuse;
2285         }
2286     } else {
2287         n_constants += pack_constants (&shader->source, constants + n_constants);
2288     }
2289     n_constants += pack_constants (&shader->mask, constants + n_constants);
2290
2291     if (shader->opacity < 1.) {
2292         constants[n_constants+0] =
2293             constants[n_constants+1] =
2294             constants[n_constants+2] =
2295             constants[n_constants+3] = pack_float (shader->opacity);
2296         n_constants += 4;
2297     }
2298
2299     if (n_constants != 0 &&
2300         (device->current_n_constants != n_constants ||
2301          memcmp (device->current_constants, constants, n_constants*4)))
2302     {
2303         i915_set_constants (device, constants, n_constants);
2304     }
2305 }
2306
2307 static cairo_bool_t
2308 i915_shader_needs_update (const i915_shader_t *shader,
2309                           const i915_device_t *device)
2310 {
2311     uint32_t count, n;
2312     uint32_t buf[64];
2313
2314     if (device->current_target != shader->target)
2315         return TRUE;
2316
2317     count =
2318         !! shader->source.base.bo +
2319         !! shader->mask.base.bo +
2320         !! shader->clip.base.bo +
2321         !! shader->dst.base.bo;
2322     if (count > device->current_n_samplers)
2323         return TRUE;
2324
2325     count =
2326         shader->source.base.n_samplers +
2327         shader->mask.base.n_samplers +
2328         shader->clip.base.n_samplers +
2329         shader->dst.base.n_samplers;
2330     if (count > device->current_n_maps)
2331         return TRUE;
2332
2333     if (count) {
2334         count = 0;
2335         if (shader->source.base.bo != NULL) {
2336             buf[count++] = shader->source.base.sampler[0];
2337             buf[count++] = shader->source.base.sampler[1];
2338         }
2339         if (shader->mask.base.bo != NULL) {
2340             buf[count++] = shader->mask.base.sampler[0];
2341             buf[count++] = shader->mask.base.sampler[1];
2342         }
2343         if (shader->clip.base.bo != NULL) {
2344             buf[count++] = shader->clip.base.sampler[0];
2345             buf[count++] = shader->clip.base.sampler[1];
2346         }
2347         if (shader->dst.base.bo != NULL) {
2348             buf[count++] = shader->dst.base.sampler[0];
2349             buf[count++] = shader->dst.base.sampler[1];
2350         }
2351         if (memcmp (device->current_samplers, buf, count * sizeof (uint32_t)))
2352             return TRUE;
2353
2354         count = 0;
2355         if (shader->source.base.bo != NULL) {
2356             buf[count++] = shader->source.base.bo->base.handle;
2357             for (n = 0; n < shader->source.base.n_samplers; n++) {
2358                 buf[count++] = shader->source.base.offset[n];
2359                 buf[count++] = shader->source.base.map[2*n+0];
2360                 buf[count++] = shader->source.base.map[2*n+1];
2361             }
2362         }
2363         if (shader->mask.base.bo != NULL) {
2364             buf[count++] = shader->mask.base.bo->base.handle;
2365             for (n = 0; n < shader->mask.base.n_samplers; n++) {
2366                 buf[count++] = shader->mask.base.offset[n];
2367                 buf[count++] = shader->mask.base.map[2*n+0];
2368                 buf[count++] = shader->mask.base.map[2*n+1];
2369             }
2370         }
2371         if (shader->clip.base.bo != NULL) {
2372             buf[count++] = shader->clip.base.bo->base.handle;
2373             for (n = 0; n < shader->clip.base.n_samplers; n++) {
2374                 buf[count++] = shader->clip.base.offset[n];
2375                 buf[count++] = shader->clip.base.map[2*n+0];
2376                 buf[count++] = shader->clip.base.map[2*n+1];
2377             }
2378         }
2379         if (shader->dst.base.bo != NULL) {
2380             buf[count++] = shader->dst.base.bo->base.handle;
2381             for (n = 0; n < shader->dst.base.n_samplers; n++) {
2382                 buf[count++] = shader->dst.base.offset[n];
2383                 buf[count++] = shader->dst.base.map[2*n+0];
2384                 buf[count++] = shader->dst.base.map[2*n+1];
2385             }
2386         }
2387         if (memcmp (device->current_maps, buf, count * sizeof (uint32_t)))
2388             return TRUE;
2389     }
2390
2391     if (i915_shader_get_texcoords (shader) != device->current_texcoords)
2392         return TRUE;
2393     if (device->current_blend != shader->blend)
2394         return TRUE;
2395
2396     count = 0;
2397     if (shader->source.type.fragment == FS_DIFFUSE) {
2398         uint32_t diffuse;
2399
2400         diffuse =
2401             ((shader->source.solid.color.alpha_short >> 8) << 24) |
2402             ((shader->source.solid.color.red_short   >> 8) << 16) |
2403             ((shader->source.solid.color.green_short >> 8) << 8) |
2404             ((shader->source.solid.color.blue_short  >> 8) << 0);
2405
2406         if (diffuse != device->current_diffuse)
2407             return TRUE;
2408     } else {
2409         count += pack_constants (&shader->source, buf + count);
2410     }
2411     count += pack_constants (&shader->mask, buf + count);
2412
2413     if (count &&
2414         (device->current_n_constants != count ||
2415          memcmp (device->current_constants, buf, count*4)))
2416     {
2417         return TRUE;
2418     }
2419
2420     n = (i915_shader_channel_key (&shader->source) <<  0) |
2421         (i915_shader_channel_key (&shader->mask)   <<  8) |
2422         (i915_shader_channel_key (&shader->clip)   << 16) |
2423         (shader->op << 24) |
2424         ((shader->opacity < 1.) << 30) |
2425         (((shader->content & CAIRO_CONTENT_ALPHA) == CAIRO_CONTENT_ALPHA) << 31);
2426     return n != device->current_program;
2427 }
2428
2429 void
2430 i915_set_dst (i915_device_t *device, i915_surface_t *dst)
2431 {
2432     uint32_t size;
2433
2434     if (device->current_target != dst) {
2435         intel_bo_t *bo;
2436
2437         bo = to_intel_bo (dst->intel.drm.bo);
2438         assert (bo != NULL);
2439
2440         OUT_DWORD (_3DSTATE_BUF_INFO_CMD);
2441         OUT_DWORD (BUF_3D_ID_COLOR_BACK |
2442                    BUF_tiling (bo->tiling) |
2443                    BUF_3D_PITCH (dst->intel.drm.stride));
2444         OUT_RELOC (dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
2445
2446         device->current_target = dst;
2447     }
2448
2449     if (dst->colorbuf != device->current_colorbuf) {
2450         OUT_DWORD (_3DSTATE_DST_BUF_VARS_CMD);
2451         OUT_DWORD (dst->colorbuf);
2452         device->current_colorbuf = dst->colorbuf;
2453     }
2454
2455     size = DRAW_YMAX (dst->intel.drm.height) | DRAW_XMAX (dst->intel.drm.width);
2456     if (size != device->current_size) {
2457         OUT_DWORD (_3DSTATE_DRAW_RECT_CMD);
2458         OUT_DWORD (0); /* dither */
2459         OUT_DWORD (0); /* top-left */
2460         OUT_DWORD (size);
2461         OUT_DWORD (0);  /* origin */
2462         device->current_size = size;
2463     }
2464 }
2465
2466 static void
2467 i915_set_shader_target (i915_device_t *device,
2468                         const i915_shader_t *shader)
2469 {
2470     i915_set_dst (device, shader->target);
2471 }
2472
2473 int
2474 i915_shader_num_texcoords (const i915_shader_t *shader)
2475 {
2476     int cnt = 0;
2477
2478     switch (shader->source.base.texfmt) {
2479     default:
2480         ASSERT_NOT_REACHED;
2481     case TEXCOORDFMT_NOT_PRESENT: break;
2482     case TEXCOORDFMT_2D: cnt += 2; break;
2483     case TEXCOORDFMT_3D: cnt += 3; break;
2484     case TEXCOORDFMT_4D: cnt += 4; break;
2485     case TEXCOORDFMT_1D: cnt += 1; break;
2486     case TEXCOORDFMT_2D_16: cnt += 1; break;
2487     }
2488
2489     switch (shader->mask.base.texfmt) {
2490     default:
2491         ASSERT_NOT_REACHED;
2492     case TEXCOORDFMT_NOT_PRESENT: break;
2493     case TEXCOORDFMT_2D: cnt += 2; break;
2494     case TEXCOORDFMT_3D: cnt += 3; break;
2495     case TEXCOORDFMT_4D: cnt += 4; break;
2496     case TEXCOORDFMT_1D: cnt += 1; break;
2497     case TEXCOORDFMT_2D_16: cnt += 1; break;
2498     }
2499
2500     switch (shader->clip.base.texfmt) {
2501     default:
2502         ASSERT_NOT_REACHED;
2503     case TEXCOORDFMT_NOT_PRESENT: break;
2504     case TEXCOORDFMT_2D: cnt += 2; break;
2505     case TEXCOORDFMT_3D: cnt += 3; break;
2506     case TEXCOORDFMT_4D: cnt += 4; break;
2507     case TEXCOORDFMT_1D: cnt += 1; break;
2508     case TEXCOORDFMT_2D_16: cnt += 1; break;
2509     }
2510
2511     switch (shader->dst.base.texfmt) {
2512     default:
2513         ASSERT_NOT_REACHED;
2514     case TEXCOORDFMT_NOT_PRESENT: break;
2515     case TEXCOORDFMT_2D: cnt += 2; break;
2516     case TEXCOORDFMT_3D: cnt += 3; break;
2517     case TEXCOORDFMT_4D: cnt += 4; break;
2518     case TEXCOORDFMT_1D: cnt += 1; break;
2519     case TEXCOORDFMT_2D_16: cnt += 1; break;
2520     }
2521
2522     return cnt;
2523 }
2524
2525 void
2526 i915_shader_fini (i915_shader_t *shader)
2527 {
2528     i915_device_t *device = i915_device (shader->target);
2529
2530     i915_shader_channel_fini (device, &shader->source);
2531     i915_shader_channel_fini (device, &shader->mask);
2532     i915_shader_channel_fini (device, &shader->clip);
2533 }
2534
2535 void
2536 i915_shader_set_clip (i915_shader_t *shader,
2537                       cairo_clip_t *clip)
2538 {
2539     cairo_surface_t *clip_surface;
2540     int clip_x, clip_y;
2541     union i915_shader_channel *channel;
2542     i915_surface_t *s;
2543
2544     clip_surface = _cairo_clip_get_surface (clip, &shader->target->intel.drm.base, &clip_x, &clip_y);
2545     assert (clip_surface->status == CAIRO_STATUS_SUCCESS);
2546     assert (clip_surface->type == CAIRO_SURFACE_TYPE_DRM);
2547
2548     channel = &shader->clip;
2549     channel->type.vertex = VS_TEXTURE_16;
2550     channel->base.texfmt = TEXCOORDFMT_2D_16;
2551     channel->base.content = CAIRO_CONTENT_ALPHA;
2552
2553     channel->type.fragment = FS_TEXTURE;
2554     channel->surface.pixel = NONE;
2555
2556     s = (i915_surface_t *) clip_surface;
2557     channel->base.bo = to_intel_bo (s->intel.drm.bo);
2558     channel->base.n_samplers = 1;
2559     channel->base.offset[0] = s->offset;
2560     channel->base.map[0] = s->map0;
2561     channel->base.map[1] = s->map1;
2562
2563     channel->base.sampler[0] =
2564         (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) |
2565         i915_texture_filter (CAIRO_FILTER_NEAREST);
2566     channel->base.sampler[1] =
2567         SS3_NORMALIZED_COORDS |
2568         i915_texture_extend (CAIRO_EXTEND_NONE);
2569
2570     cairo_matrix_init_scale (&shader->clip.base.matrix,
2571                              1. / s->intel.drm.width,
2572                              1. / s->intel.drm.height);
2573     cairo_matrix_translate (&shader->clip.base.matrix,
2574                             -clip_x, -clip_y);
2575 }
2576
2577 static cairo_status_t
2578 i915_shader_check_aperture (i915_shader_t *shader,
2579                             i915_device_t *device)
2580 {
2581     cairo_status_t status;
2582     intel_bo_t *bo_array[4];
2583     uint32_t n = 0;
2584
2585     if (shader->target != device->current_target)
2586         bo_array[n++] = to_intel_bo (shader->target->intel.drm.bo);
2587
2588     if (shader->source.base.bo != NULL)
2589         bo_array[n++] = shader->source.base.bo;
2590
2591     if (shader->mask.base.bo != NULL)
2592         bo_array[n++] = shader->mask.base.bo;
2593
2594     if (shader->clip.base.bo != NULL)
2595         bo_array[n++] = shader->clip.base.bo;
2596
2597     if (n == 0 || i915_check_aperture (device, bo_array, n))
2598         return CAIRO_STATUS_SUCCESS;
2599
2600     status = i915_batch_flush (device);
2601     if (unlikely (status))
2602         return status;
2603
2604     assert (i915_check_aperture (device, bo_array, n));
2605     return CAIRO_STATUS_SUCCESS;
2606 }
2607
2608 static void
2609 i915_shader_combine_mask (i915_shader_t *shader, i915_device_t *device)
2610 {
2611     if (shader->mask.type.fragment == (i915_fragment_shader_t) -1 ||
2612         shader->mask.type.fragment == FS_CONSTANT)
2613     {
2614         return;
2615     }
2616
2617     if (shader->mask.type.fragment == FS_PURE) {
2618         if (shader->mask.solid.pure & (1<<3)) {
2619             shader->mask.type.fragment = FS_ONE;
2620         } else {
2621             shader->mask.type.fragment = FS_ZERO;
2622         }
2623     }
2624
2625     if (shader->mask.type.fragment == FS_ONE ||
2626         (shader->mask.base.content & CAIRO_CONTENT_ALPHA) == 0)
2627     {
2628         i915_shader_channel_reset (device, &shader->mask);
2629     }
2630
2631     if (shader->mask.type.fragment == FS_ZERO) {
2632         i915_shader_channel_fini (device, &shader->source);
2633
2634         shader->source.type.fragment = FS_ZERO;
2635         shader->source.type.vertex = VS_ZERO;
2636         shader->source.base.texfmt = TEXCOORDFMT_NOT_PRESENT;
2637         shader->source.base.mode = 0;
2638         shader->source.base.n_samplers = 0;
2639     }
2640
2641     if (shader->source.type.fragment == FS_ZERO) {
2642         i915_shader_channel_reset (device, &shader->mask);
2643         i915_shader_channel_reset (device, &shader->clip);
2644     }
2645 }
2646
2647 static void
2648 i915_shader_setup_dst (i915_shader_t *shader)
2649 {
2650     union i915_shader_channel *channel;
2651     i915_surface_t *s;
2652
2653     /* We need to manual blending if we have a clip surface and an unbounded op,
2654      * or an extended blend mode.
2655      */
2656     if (shader->need_combine ||
2657         (shader->op < CAIRO_OPERATOR_SATURATE &&
2658          (shader->clip.type.fragment == (i915_fragment_shader_t) -1 ||
2659           _cairo_operator_bounded_by_mask (shader->op))))
2660     {
2661         return;
2662     }
2663
2664     shader->need_combine = TRUE;
2665
2666     channel = &shader->dst;
2667     channel->type.vertex = VS_TEXTURE_16;
2668     channel->base.texfmt = TEXCOORDFMT_2D_16;
2669     channel->base.content = shader->content;
2670
2671     channel->type.fragment = FS_TEXTURE;
2672     channel->surface.pixel = NONE;
2673
2674     s = shader->target;
2675     channel->base.bo = to_intel_bo (s->intel.drm.bo);
2676     channel->base.n_samplers = 1;
2677     channel->base.offset[0] = s->offset;
2678     channel->base.map[0] = s->map0;
2679     channel->base.map[1] = s->map1;
2680
2681     channel->base.sampler[0] =
2682         (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT) |
2683         i915_texture_filter (CAIRO_FILTER_NEAREST);
2684     channel->base.sampler[1] =
2685         SS3_NORMALIZED_COORDS |
2686         i915_texture_extend (CAIRO_EXTEND_NONE);
2687
2688     cairo_matrix_init_scale (&shader->dst.base.matrix,
2689                              1. / s->intel.drm.width,
2690                              1. / s->intel.drm.height);
2691 }
2692
2693 static void
2694 i915_shader_combine_source (i915_shader_t *shader,
2695                             i915_device_t *device)
2696 {
2697     if (device->last_source_fragment == shader->source.type.fragment)
2698         return;
2699
2700     if (device->last_source_fragment == FS_DIFFUSE) {
2701         switch (shader->source.type.fragment) {
2702         case FS_ONE:
2703         case FS_PURE:
2704         case FS_CONSTANT:
2705         case FS_DIFFUSE:
2706             shader->source.type.fragment = FS_DIFFUSE;
2707             shader->source.base.mode = 0;
2708             break;
2709         case FS_ZERO:
2710         case FS_LINEAR:
2711         case FS_RADIAL:
2712         case FS_TEXTURE:
2713         case FS_YUV:
2714         case FS_SPANS:
2715         default:
2716             break;
2717         }
2718     }
2719
2720     device->last_source_fragment = shader->source.type.fragment;
2721 }
2722
2723 static inline float *
2724 i915_composite_vertex (float *v,
2725                        const i915_shader_t *shader,
2726                        double x, double y)
2727 {
2728     double s, t;
2729
2730     /* Each vertex is:
2731      *   2 vertex coordinates
2732      *   [0-2] source texture coordinates
2733      *   [0-2] mask texture coordinates
2734      */
2735
2736     *v++ = x; *v++ = y;
2737     switch (shader->source.type.vertex) {
2738     case VS_ZERO:
2739     case VS_CONSTANT:
2740         break;
2741     case VS_LINEAR:
2742         *v++ = i915_shader_linear_texcoord (&shader->source.linear, x, y);
2743         break;
2744     case VS_TEXTURE:
2745         s = x, t = y;
2746         cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t);
2747         *v++ = s; *v++ = t;
2748         break;
2749     case VS_TEXTURE_16:
2750         s = x, t = y;
2751         cairo_matrix_transform_point (&shader->source.base.matrix, &s, &t);
2752         *v++ = texcoord_2d_16 (s, t);
2753         break;
2754     }
2755     switch (shader->mask.type.vertex) {
2756     case VS_ZERO:
2757     case VS_CONSTANT:
2758         break;
2759     case VS_LINEAR:
2760         *v++ = i915_shader_linear_texcoord (&shader->mask.linear, x, y);
2761         break;
2762     case VS_TEXTURE:
2763         s = x, t = y;
2764         cairo_matrix_transform_point (&shader->mask.base.matrix, &s, &t);
2765         *v++ = s; *v++ = t;
2766         break;
2767     case VS_TEXTURE_16:
2768         s = x, t = y;
2769         cairo_matrix_transform_point (&shader->mask.base.matrix, &s, &t);
2770         *v++ = texcoord_2d_16 (s, t);
2771         break;
2772     }
2773
2774     return v;
2775 }
2776
2777 static inline void
2778 i915_shader_add_rectangle_general (const i915_shader_t *shader,
2779                                    int x, int y,
2780                                    int w, int h)
2781 {
2782     float *vertices;
2783
2784     vertices = i915_add_rectangle (shader->device);
2785     vertices = i915_composite_vertex (vertices, shader, x + w, y + h);
2786     vertices = i915_composite_vertex (vertices, shader, x, y + h);
2787     vertices = i915_composite_vertex (vertices, shader, x, y);
2788     /* XXX overflow! */
2789 }
2790
2791 void
2792 i915_vbo_flush (i915_device_t *device)
2793 {
2794     assert (device->floats_per_vertex);
2795     assert (device->vertex_count);
2796
2797     if (device->vbo == 0) {
2798         OUT_DWORD (_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
2799                    I1_LOAD_S (0) |
2800                    I1_LOAD_S (1) |
2801                    1);
2802         device->vbo = device->batch.used++;
2803         device->vbo_max_index = device->batch.used;
2804         OUT_DWORD ((device->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) |
2805                    (device->floats_per_vertex << S1_VERTEX_PITCH_SHIFT));
2806     }
2807
2808     OUT_DWORD (PRIM3D_RECTLIST |
2809                PRIM3D_INDIRECT_SEQUENTIAL |
2810                device->vertex_count);
2811     OUT_DWORD (device->vertex_index);
2812
2813     device->vertex_index += device->vertex_count;
2814     device->vertex_count = 0;
2815 }
2816
2817 cairo_status_t
2818 i915_shader_commit (i915_shader_t *shader,
2819                     i915_device_t *device)
2820 {
2821     unsigned floats_per_vertex;
2822     cairo_status_t status;
2823
2824     assert (CAIRO_MUTEX_IS_LOCKED (device->intel.base.base.mutex));
2825
2826     if (! shader->committed) {
2827         device->shader = shader;
2828
2829         i915_shader_combine_mask (shader, device);
2830         i915_shader_combine_source (shader, device);
2831         i915_shader_setup_dst (shader);
2832
2833         shader->add_rectangle = i915_shader_add_rectangle_general;
2834
2835         if ((status = setjmp (shader->unwind)))
2836             return status;
2837
2838         shader->committed = TRUE;
2839     }
2840
2841     if (i915_shader_needs_update (shader, device)) {
2842         if (i915_batch_space (device) < 256) {
2843             status = i915_batch_flush (device);
2844             if (unlikely (status))
2845                 return status;
2846         }
2847
2848         if (device->vertex_count)
2849             i915_vbo_flush (device);
2850
2851         status = i915_shader_check_aperture (shader, device);
2852         if (unlikely (status))
2853             return status;
2854
2855   update_shader:
2856         i915_set_shader_target (device, shader);
2857         i915_set_shader_mode (device, shader);
2858         i915_set_shader_samplers (device, shader);
2859         i915_set_shader_constants (device, shader);
2860         i915_set_shader_program (device, shader);
2861     }
2862
2863     floats_per_vertex = 2 + i915_shader_num_texcoords (shader);
2864     if (device->floats_per_vertex == floats_per_vertex)
2865         return CAIRO_STATUS_SUCCESS;
2866
2867     if (i915_batch_space (device) < 8) {
2868         status = i915_batch_flush (device);
2869         if (unlikely (status))
2870             return status;
2871
2872         goto update_shader;
2873     }
2874
2875     if (device->vertex_count)
2876         i915_vbo_flush (device);
2877
2878     if (device->vbo) {
2879         device->batch_base[device->vbo_max_index] |= device->vertex_index;
2880         OUT_DWORD (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S (1) | 0);
2881         device->vbo_max_index = device->batch.used;
2882         OUT_DWORD ((floats_per_vertex << S1_VERTEX_WIDTH_SHIFT) |
2883                    (floats_per_vertex << S1_VERTEX_PITCH_SHIFT));
2884     }
2885
2886     device->floats_per_vertex = floats_per_vertex;
2887     device->rectangle_size = floats_per_vertex * 3 * sizeof (float);
2888     device->vertex_index =
2889         (device->vbo_used + 4*floats_per_vertex - 1) / (4 * floats_per_vertex);
2890     device->vbo_offset = 4 * device->vertex_index * floats_per_vertex;
2891
2892     return CAIRO_STATUS_SUCCESS;
2893 }