Initial code release
[adaptation/xorg-x11-drv-intel.git] / src / i965_render.c
1 /*
2  * Copyright © 2006,2008 Intel Corporation
3  * Copyright © 2007 Red Hat, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  * Authors:
25  *    Wang Zhenyu <zhenyu.z.wang@intel.com>
26  *    Eric Anholt <eric@anholt.net>
27  *    Carl Worth <cworth@redhat.com>
28  *    Keith Packard <keithp@keithp.com>
29  *
30  */
31
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35
36 #include <assert.h>
37 #include "xf86.h"
38 #include "intel.h"
39 #include "i830_reg.h"
40 #include "i965_reg.h"
41
42 /* bring in brw structs */
43 #include "brw_defines.h"
44 #include "brw_structs.h"
45
46 // refer vol2, 3d rasterization 3.8.1
47
48 /* defined in brw_defines.h */
49 static const struct blendinfo {
50         Bool dst_alpha;
51         Bool src_alpha;
52         uint32_t src_blend;
53         uint32_t dst_blend;
54 } i965_blend_op[] = {
55         /* Clear */
56         {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ZERO},
57         /* Src */
58         {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ZERO},
59         /* Dst */
60         {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ONE},
61         /* Over */
62         {0, 1, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_INV_SRC_ALPHA},
63         /* OverReverse */
64         {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ONE},
65         /* In */
66         {1, 0, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_ZERO},
67         /* InReverse */
68         {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_SRC_ALPHA},
69         /* Out */
70         {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ZERO},
71         /* OutReverse */
72         {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_INV_SRC_ALPHA},
73         /* Atop */
74         {1, 1, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA},
75         /* AtopReverse */
76         {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_SRC_ALPHA},
77         /* Xor */
78         {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA},
79         /* Add */
80         {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ONE},
81 };
82
83 /**
84  * Highest-valued BLENDFACTOR used in i965_blend_op.
85  *
86  * This leaves out BRW_BLENDFACTOR_INV_DST_COLOR,
87  * BRW_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
88  * BRW_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
89  */
90 #define BRW_BLENDFACTOR_COUNT (BRW_BLENDFACTOR_INV_DST_ALPHA + 1)
91
92 /* FIXME: surface format defined in brw_defines.h, shared Sampling engine
93  * 1.7.2
94  */
95 static const struct formatinfo {
96         int fmt;
97         uint32_t card_fmt;
98 } i965_tex_formats[] = {
99         {PICT_a8, BRW_SURFACEFORMAT_A8_UNORM},
100         {PICT_a8r8g8b8, BRW_SURFACEFORMAT_B8G8R8A8_UNORM},
101         {PICT_x8r8g8b8, BRW_SURFACEFORMAT_B8G8R8X8_UNORM},
102         {PICT_a8b8g8r8, BRW_SURFACEFORMAT_R8G8B8A8_UNORM},
103         {PICT_x8b8g8r8, BRW_SURFACEFORMAT_R8G8B8X8_UNORM},
104         {PICT_r8g8b8, BRW_SURFACEFORMAT_R8G8B8_UNORM},
105         {PICT_r5g6b5, BRW_SURFACEFORMAT_B5G6R5_UNORM},
106         {PICT_a1r5g5b5, BRW_SURFACEFORMAT_B5G5R5A1_UNORM},
107 #if XORG_VERSION_CURRENT >= 10699900
108         {PICT_a2r10g10b10, BRW_SURFACEFORMAT_B10G10R10A2_UNORM},
109         {PICT_x2r10g10b10, BRW_SURFACEFORMAT_B10G10R10X2_UNORM},
110         {PICT_a2b10g10r10, BRW_SURFACEFORMAT_R10G10B10A2_UNORM},
111         {PICT_x2r10g10b10, BRW_SURFACEFORMAT_B10G10R10X2_UNORM},
112 #endif
113         {PICT_a4r4g4b4, BRW_SURFACEFORMAT_B4G4R4A4_UNORM},
114 };
115
116 static void i965_get_blend_cntl(int op, PicturePtr mask, uint32_t dst_format,
117                                 uint32_t * sblend, uint32_t * dblend)
118 {
119
120         *sblend = i965_blend_op[op].src_blend;
121         *dblend = i965_blend_op[op].dst_blend;
122
123         /* If there's no dst alpha channel, adjust the blend op so that we'll treat
124          * it as always 1.
125          */
126         if (PICT_FORMAT_A(dst_format) == 0 && i965_blend_op[op].dst_alpha) {
127                 if (*sblend == BRW_BLENDFACTOR_DST_ALPHA)
128                         *sblend = BRW_BLENDFACTOR_ONE;
129                 else if (*sblend == BRW_BLENDFACTOR_INV_DST_ALPHA)
130                         *sblend = BRW_BLENDFACTOR_ZERO;
131         }
132
133         /* If the source alpha is being used, then we should only be in a case where
134          * the source blend factor is 0, and the source blend value is the mask
135          * channels multiplied by the source picture's alpha.
136          */
137         if (mask && mask->componentAlpha && PICT_FORMAT_RGB(mask->format)
138             && i965_blend_op[op].src_alpha) {
139                 if (*dblend == BRW_BLENDFACTOR_SRC_ALPHA) {
140                         *dblend = BRW_BLENDFACTOR_SRC_COLOR;
141                 } else if (*dblend == BRW_BLENDFACTOR_INV_SRC_ALPHA) {
142                         *dblend = BRW_BLENDFACTOR_INV_SRC_COLOR;
143                 }
144         }
145
146 }
147
148 static uint32_t i965_get_dest_format(PicturePtr dest_picture)
149 {
150         switch (dest_picture->format) {
151         case PICT_a8r8g8b8:
152         case PICT_x8r8g8b8:
153                 return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
154         case PICT_a8b8g8r8:
155         case PICT_x8b8g8r8:
156                 return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
157 #if XORG_VERSION_CURRENT >= 10699900
158         case PICT_a2r10g10b10:
159         case PICT_x2r10g10b10:
160                 return BRW_SURFACEFORMAT_B10G10R10A2_UNORM;
161 #endif
162         case PICT_r5g6b5:
163                 return BRW_SURFACEFORMAT_B5G6R5_UNORM;
164         case PICT_x1r5g5b5:
165         case PICT_a1r5g5b5:
166                 return BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
167         case PICT_a8:
168                 return BRW_SURFACEFORMAT_A8_UNORM;
169         case PICT_a4r4g4b4:
170         case PICT_x4r4g4b4:
171                 return BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
172         default:
173                 return -1;
174         }
175 }
176
177 Bool
178 i965_check_composite(int op,
179                      PicturePtr source_picture,
180                      PicturePtr mask_picture,
181                      PicturePtr dest_picture,
182                      int width, int height)
183 {
184         ScrnInfoPtr scrn = xf86Screens[dest_picture->pDrawable->pScreen->myNum];
185
186         /* Check for unsupported compositing operations. */
187         if (op >= sizeof(i965_blend_op) / sizeof(i965_blend_op[0])) {
188                 intel_debug_fallback(scrn,
189                                      "Unsupported Composite op 0x%x\n", op);
190                 return FALSE;
191         }
192
193         if (mask_picture && mask_picture->componentAlpha &&
194             PICT_FORMAT_RGB(mask_picture->format)) {
195                 /* Check if it's component alpha that relies on a source alpha and on
196                  * the source value.  We can only get one of those into the single
197                  * source value that we get to blend with.
198                  */
199                 if (i965_blend_op[op].src_alpha &&
200                     (i965_blend_op[op].src_blend != BRW_BLENDFACTOR_ZERO)) {
201                         intel_debug_fallback(scrn,
202                                              "Component alpha not supported "
203                                              "with source alpha and source "
204                                              "value blending.\n");
205                         return FALSE;
206                 }
207         }
208
209         if (i965_get_dest_format(dest_picture) == -1) {
210                 intel_debug_fallback(scrn, "Usupported Color buffer format 0x%x\n",
211                                      (int)dest_picture->format);
212                 return FALSE;
213         }
214
215         return TRUE;
216 }
217
218 Bool
219 i965_check_composite_texture(ScreenPtr screen, PicturePtr picture)
220 {
221         if (picture->repeatType > RepeatReflect) {
222                 ScrnInfoPtr scrn = xf86Screens[screen->myNum];
223                 intel_debug_fallback(scrn,
224                                      "extended repeat (%d) not supported\n",
225                                      picture->repeatType);
226                 return FALSE;
227         }
228
229         if (picture->filter != PictFilterNearest &&
230             picture->filter != PictFilterBilinear) {
231                 ScrnInfoPtr scrn = xf86Screens[screen->myNum];
232                 intel_debug_fallback(scrn, "Unsupported filter 0x%x\n",
233                                      picture->filter);
234                 return FALSE;
235         }
236
237         if (picture->pDrawable) {
238                 int w, h, i;
239
240                 w = picture->pDrawable->width;
241                 h = picture->pDrawable->height;
242                 if ((w > 8192) || (h > 8192)) {
243                         ScrnInfoPtr scrn = xf86Screens[screen->myNum];
244                         intel_debug_fallback(scrn,
245                                              "Picture w/h too large (%dx%d)\n",
246                                              w, h);
247                         return FALSE;
248                 }
249
250                 for (i = 0;
251                      i < sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]);
252                      i++) {
253                         if (i965_tex_formats[i].fmt == picture->format)
254                                 break;
255                 }
256                 if (i == sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]))
257                 {
258                         ScrnInfoPtr scrn = xf86Screens[screen->myNum];
259                         intel_debug_fallback(scrn,
260                                              "Unsupported picture format "
261                                              "0x%x\n",
262                                              (int)picture->format);
263                         return FALSE;
264                 }
265
266                 return TRUE;
267         }
268
269         return FALSE;
270 }
271
272
273 #define BRW_GRF_BLOCKS(nreg)    ((nreg + 15) / 16 - 1)
274
275 /* Set up a default static partitioning of the URB, which is supposed to
276  * allow anything we would want to do, at potentially lower performance.
277  */
278 #define URB_CS_ENTRY_SIZE     0
279 #define URB_CS_ENTRIES        0
280
281 #define URB_VS_ENTRY_SIZE     1 // each 512-bit row
282 #define URB_VS_ENTRIES        8 // we needs at least 8 entries
283
284 #define URB_GS_ENTRY_SIZE     0
285 #define URB_GS_ENTRIES        0
286
287 #define URB_CLIP_ENTRY_SIZE   0
288 #define URB_CLIP_ENTRIES      0
289
290 #define URB_SF_ENTRY_SIZE     2
291 #define URB_SF_ENTRIES        1
292
293 /*
294  * this program computes dA/dx and dA/dy for the texture coordinates along
295  * with the base texture coordinate. It was extracted from the Mesa driver
296  */
297
298 #define SF_KERNEL_NUM_GRF  16
299 #define SF_MAX_THREADS     2
300
301 static const uint32_t sf_kernel_static[][4] = {
302 #include "exa_sf.g4b"
303 };
304
305 static const uint32_t sf_kernel_mask_static[][4] = {
306 #include "exa_sf_mask.g4b"
307 };
308
309 /* ps kernels */
310 #define PS_KERNEL_NUM_GRF   32
311 #define PS_MAX_THREADS      48
312
313 static const uint32_t ps_kernel_nomask_affine_static[][4] = {
314 #include "exa_wm_xy.g4b"
315 #include "exa_wm_src_affine.g4b"
316 #include "exa_wm_src_sample_argb.g4b"
317 #include "exa_wm_write.g4b"
318 };
319
320 static const uint32_t ps_kernel_nomask_projective_static[][4] = {
321 #include "exa_wm_xy.g4b"
322 #include "exa_wm_src_projective.g4b"
323 #include "exa_wm_src_sample_argb.g4b"
324 #include "exa_wm_write.g4b"
325 };
326
327 static const uint32_t ps_kernel_maskca_affine_static[][4] = {
328 #include "exa_wm_xy.g4b"
329 #include "exa_wm_src_affine.g4b"
330 #include "exa_wm_src_sample_argb.g4b"
331 #include "exa_wm_mask_affine.g4b"
332 #include "exa_wm_mask_sample_argb.g4b"
333 #include "exa_wm_ca.g4b"
334 #include "exa_wm_write.g4b"
335 };
336
337 static const uint32_t ps_kernel_maskca_projective_static[][4] = {
338 #include "exa_wm_xy.g4b"
339 #include "exa_wm_src_projective.g4b"
340 #include "exa_wm_src_sample_argb.g4b"
341 #include "exa_wm_mask_projective.g4b"
342 #include "exa_wm_mask_sample_argb.g4b"
343 #include "exa_wm_ca.g4b"
344 #include "exa_wm_write.g4b"
345 };
346
347 static const uint32_t ps_kernel_maskca_srcalpha_affine_static[][4] = {
348 #include "exa_wm_xy.g4b"
349 #include "exa_wm_src_affine.g4b"
350 #include "exa_wm_src_sample_a.g4b"
351 #include "exa_wm_mask_affine.g4b"
352 #include "exa_wm_mask_sample_argb.g4b"
353 #include "exa_wm_ca_srcalpha.g4b"
354 #include "exa_wm_write.g4b"
355 };
356
357 static const uint32_t ps_kernel_maskca_srcalpha_projective_static[][4] = {
358 #include "exa_wm_xy.g4b"
359 #include "exa_wm_src_projective.g4b"
360 #include "exa_wm_src_sample_a.g4b"
361 #include "exa_wm_mask_projective.g4b"
362 #include "exa_wm_mask_sample_argb.g4b"
363 #include "exa_wm_ca_srcalpha.g4b"
364 #include "exa_wm_write.g4b"
365 };
366
367 static const uint32_t ps_kernel_masknoca_affine_static[][4] = {
368 #include "exa_wm_xy.g4b"
369 #include "exa_wm_src_affine.g4b"
370 #include "exa_wm_src_sample_argb.g4b"
371 #include "exa_wm_mask_affine.g4b"
372 #include "exa_wm_mask_sample_a.g4b"
373 #include "exa_wm_noca.g4b"
374 #include "exa_wm_write.g4b"
375 };
376
377 static const uint32_t ps_kernel_masknoca_projective_static[][4] = {
378 #include "exa_wm_xy.g4b"
379 #include "exa_wm_src_projective.g4b"
380 #include "exa_wm_src_sample_argb.g4b"
381 #include "exa_wm_mask_projective.g4b"
382 #include "exa_wm_mask_sample_a.g4b"
383 #include "exa_wm_noca.g4b"
384 #include "exa_wm_write.g4b"
385 };
386
387 /* new programs for Ironlake */
388 static const uint32_t sf_kernel_static_gen5[][4] = {
389 #include "exa_sf.g4b.gen5"
390 };
391
392 static const uint32_t sf_kernel_mask_static_gen5[][4] = {
393 #include "exa_sf_mask.g4b.gen5"
394 };
395
396 static const uint32_t ps_kernel_nomask_affine_static_gen5[][4] = {
397 #include "exa_wm_xy.g4b.gen5"
398 #include "exa_wm_src_affine.g4b.gen5"
399 #include "exa_wm_src_sample_argb.g4b.gen5"
400 #include "exa_wm_write.g4b.gen5"
401 };
402
403 static const uint32_t ps_kernel_nomask_projective_static_gen5[][4] = {
404 #include "exa_wm_xy.g4b.gen5"
405 #include "exa_wm_src_projective.g4b.gen5"
406 #include "exa_wm_src_sample_argb.g4b.gen5"
407 #include "exa_wm_write.g4b.gen5"
408 };
409
410 static const uint32_t ps_kernel_maskca_affine_static_gen5[][4] = {
411 #include "exa_wm_xy.g4b.gen5"
412 #include "exa_wm_src_affine.g4b.gen5"
413 #include "exa_wm_src_sample_argb.g4b.gen5"
414 #include "exa_wm_mask_affine.g4b.gen5"
415 #include "exa_wm_mask_sample_argb.g4b.gen5"
416 #include "exa_wm_ca.g4b.gen5"
417 #include "exa_wm_write.g4b.gen5"
418 };
419
420 static const uint32_t ps_kernel_maskca_projective_static_gen5[][4] = {
421 #include "exa_wm_xy.g4b.gen5"
422 #include "exa_wm_src_projective.g4b.gen5"
423 #include "exa_wm_src_sample_argb.g4b.gen5"
424 #include "exa_wm_mask_projective.g4b.gen5"
425 #include "exa_wm_mask_sample_argb.g4b.gen5"
426 #include "exa_wm_ca.g4b.gen5"
427 #include "exa_wm_write.g4b.gen5"
428 };
429
430 static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen5[][4] = {
431 #include "exa_wm_xy.g4b.gen5"
432 #include "exa_wm_src_affine.g4b.gen5"
433 #include "exa_wm_src_sample_a.g4b.gen5"
434 #include "exa_wm_mask_affine.g4b.gen5"
435 #include "exa_wm_mask_sample_argb.g4b.gen5"
436 #include "exa_wm_ca_srcalpha.g4b.gen5"
437 #include "exa_wm_write.g4b.gen5"
438 };
439
440 static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen5[][4] = {
441 #include "exa_wm_xy.g4b.gen5"
442 #include "exa_wm_src_projective.g4b.gen5"
443 #include "exa_wm_src_sample_a.g4b.gen5"
444 #include "exa_wm_mask_projective.g4b.gen5"
445 #include "exa_wm_mask_sample_argb.g4b.gen5"
446 #include "exa_wm_ca_srcalpha.g4b.gen5"
447 #include "exa_wm_write.g4b.gen5"
448 };
449
450 static const uint32_t ps_kernel_masknoca_affine_static_gen5[][4] = {
451 #include "exa_wm_xy.g4b.gen5"
452 #include "exa_wm_src_affine.g4b.gen5"
453 #include "exa_wm_src_sample_argb.g4b.gen5"
454 #include "exa_wm_mask_affine.g4b.gen5"
455 #include "exa_wm_mask_sample_a.g4b.gen5"
456 #include "exa_wm_noca.g4b.gen5"
457 #include "exa_wm_write.g4b.gen5"
458 };
459
460 static const uint32_t ps_kernel_masknoca_projective_static_gen5[][4] = {
461 #include "exa_wm_xy.g4b.gen5"
462 #include "exa_wm_src_projective.g4b.gen5"
463 #include "exa_wm_src_sample_argb.g4b.gen5"
464 #include "exa_wm_mask_projective.g4b.gen5"
465 #include "exa_wm_mask_sample_a.g4b.gen5"
466 #include "exa_wm_noca.g4b.gen5"
467 #include "exa_wm_write.g4b.gen5"
468 };
469
470 /* programs for GEN6 */
471 static const uint32_t ps_kernel_nomask_affine_static_gen6[][4] = {
472 #include "exa_wm_src_affine.g6b"
473 #include "exa_wm_src_sample_argb.g6b"
474 #include "exa_wm_write.g6b"
475 };
476
477 static const uint32_t ps_kernel_nomask_projective_static_gen6[][4] = {
478 #include "exa_wm_src_projective.g6b"
479 #include "exa_wm_src_sample_argb.g6b"
480 #include "exa_wm_write.g6b"
481 };
482
483 static const uint32_t ps_kernel_maskca_affine_static_gen6[][4] = {
484 #include "exa_wm_src_affine.g6b"
485 #include "exa_wm_src_sample_argb.g6b"
486 #include "exa_wm_mask_affine.g6b"
487 #include "exa_wm_mask_sample_argb.g6b"
488 #include "exa_wm_ca.g6b"
489 #include "exa_wm_write.g6b"
490 };
491
492 static const uint32_t ps_kernel_maskca_projective_static_gen6[][4] = {
493 #include "exa_wm_src_projective.g6b"
494 #include "exa_wm_src_sample_argb.g6b"
495 #include "exa_wm_mask_projective.g6b"
496 #include "exa_wm_mask_sample_argb.g6b"
497 #include "exa_wm_ca.g4b.gen5"
498 #include "exa_wm_write.g6b"
499 };
500
501 static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen6[][4] = {
502 #include "exa_wm_src_affine.g6b"
503 #include "exa_wm_src_sample_a.g6b"
504 #include "exa_wm_mask_affine.g6b"
505 #include "exa_wm_mask_sample_argb.g6b"
506 #include "exa_wm_ca_srcalpha.g6b"
507 #include "exa_wm_write.g6b"
508 };
509
510 static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen6[][4] = {
511 #include "exa_wm_src_projective.g6b"
512 #include "exa_wm_src_sample_a.g6b"
513 #include "exa_wm_mask_projective.g6b"
514 #include "exa_wm_mask_sample_argb.g6b"
515 #include "exa_wm_ca_srcalpha.g6b"
516 #include "exa_wm_write.g6b"
517 };
518
519 static const uint32_t ps_kernel_masknoca_affine_static_gen6[][4] = {
520 #include "exa_wm_src_affine.g6b"
521 #include "exa_wm_src_sample_argb.g6b"
522 #include "exa_wm_mask_affine.g6b"
523 #include "exa_wm_mask_sample_a.g6b"
524 #include "exa_wm_noca.g6b"
525 #include "exa_wm_write.g6b"
526 };
527
528 static const uint32_t ps_kernel_masknoca_projective_static_gen6[][4] = {
529 #include "exa_wm_src_projective.g6b"
530 #include "exa_wm_src_sample_argb.g6b"
531 #include "exa_wm_mask_projective.g6b"
532 #include "exa_wm_mask_sample_a.g6b"
533 #include "exa_wm_noca.g6b"
534 #include "exa_wm_write.g6b"
535 };
536
537 typedef enum {
538         SAMPLER_STATE_FILTER_NEAREST,
539         SAMPLER_STATE_FILTER_BILINEAR,
540         FILTER_COUNT
541 } sampler_state_filter_t;
542
543 typedef enum {
544         SAMPLER_STATE_EXTEND_NONE,
545         SAMPLER_STATE_EXTEND_REPEAT,
546         SAMPLER_STATE_EXTEND_PAD,
547         SAMPLER_STATE_EXTEND_REFLECT,
548         EXTEND_COUNT
549 } sampler_state_extend_t;
550
551 typedef enum {
552         WM_KERNEL_NOMASK_AFFINE,
553         WM_KERNEL_NOMASK_PROJECTIVE,
554         WM_KERNEL_MASKCA_AFFINE,
555         WM_KERNEL_MASKCA_PROJECTIVE,
556         WM_KERNEL_MASKCA_SRCALPHA_AFFINE,
557         WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
558         WM_KERNEL_MASKNOCA_AFFINE,
559         WM_KERNEL_MASKNOCA_PROJECTIVE,
560         KERNEL_COUNT
561 } wm_kernel_t;
562
563 #define KERNEL(kernel_enum, kernel, masked) \
564     [kernel_enum] = {&kernel, sizeof(kernel), masked}
565 struct wm_kernel_info {
566         const void *data;
567         unsigned int size;
568         Bool has_mask;
569 };
570
571 static const struct wm_kernel_info wm_kernels_gen4[] = {
572         KERNEL(WM_KERNEL_NOMASK_AFFINE,
573                ps_kernel_nomask_affine_static, FALSE),
574         KERNEL(WM_KERNEL_NOMASK_PROJECTIVE,
575                ps_kernel_nomask_projective_static, FALSE),
576         KERNEL(WM_KERNEL_MASKCA_AFFINE,
577                ps_kernel_maskca_affine_static, TRUE),
578         KERNEL(WM_KERNEL_MASKCA_PROJECTIVE,
579                ps_kernel_maskca_projective_static, TRUE),
580         KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE,
581                ps_kernel_maskca_srcalpha_affine_static, TRUE),
582         KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
583                ps_kernel_maskca_srcalpha_projective_static, TRUE),
584         KERNEL(WM_KERNEL_MASKNOCA_AFFINE,
585                ps_kernel_masknoca_affine_static, TRUE),
586         KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE,
587                ps_kernel_masknoca_projective_static, TRUE),
588 };
589
590 static const struct wm_kernel_info wm_kernels_gen5[] = {
591         KERNEL(WM_KERNEL_NOMASK_AFFINE,
592                ps_kernel_nomask_affine_static_gen5, FALSE),
593         KERNEL(WM_KERNEL_NOMASK_PROJECTIVE,
594                ps_kernel_nomask_projective_static_gen5, FALSE),
595         KERNEL(WM_KERNEL_MASKCA_AFFINE,
596                ps_kernel_maskca_affine_static_gen5, TRUE),
597         KERNEL(WM_KERNEL_MASKCA_PROJECTIVE,
598                ps_kernel_maskca_projective_static_gen5, TRUE),
599         KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE,
600                ps_kernel_maskca_srcalpha_affine_static_gen5, TRUE),
601         KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
602                ps_kernel_maskca_srcalpha_projective_static_gen5, TRUE),
603         KERNEL(WM_KERNEL_MASKNOCA_AFFINE,
604                ps_kernel_masknoca_affine_static_gen5, TRUE),
605         KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE,
606                ps_kernel_masknoca_projective_static_gen5, TRUE),
607 };
608
609 static const struct wm_kernel_info wm_kernels_gen6[] = {
610         KERNEL(WM_KERNEL_NOMASK_AFFINE,
611                ps_kernel_nomask_affine_static_gen6, FALSE),
612         KERNEL(WM_KERNEL_NOMASK_PROJECTIVE,
613                ps_kernel_nomask_projective_static_gen6, FALSE),
614         KERNEL(WM_KERNEL_MASKCA_AFFINE,
615                ps_kernel_maskca_affine_static_gen6, TRUE),
616         KERNEL(WM_KERNEL_MASKCA_PROJECTIVE,
617                ps_kernel_maskca_projective_static_gen6, TRUE),
618         KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE,
619                ps_kernel_maskca_srcalpha_affine_static_gen6, TRUE),
620         KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
621                ps_kernel_maskca_srcalpha_projective_static_gen6, TRUE),
622         KERNEL(WM_KERNEL_MASKNOCA_AFFINE,
623                ps_kernel_masknoca_affine_static_gen6, TRUE),
624         KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE,
625                ps_kernel_masknoca_projective_static_gen6, TRUE),
626 };
627
628 #undef KERNEL
629
630 typedef struct _brw_cc_unit_state_padded {
631         struct brw_cc_unit_state state;
632         char pad[64 - sizeof(struct brw_cc_unit_state)];
633 } brw_cc_unit_state_padded;
634
635 typedef struct brw_surface_state_padded {
636         struct brw_surface_state state;
637         char pad[32 - sizeof(struct brw_surface_state)];
638 } brw_surface_state_padded;
639
640 struct gen4_cc_unit_state {
641         /* Index by [src_blend][dst_blend] */
642         brw_cc_unit_state_padded cc_state[BRW_BLENDFACTOR_COUNT][BRW_BLENDFACTOR_COUNT];
643 };
644
645 typedef struct gen4_composite_op {
646         int op;
647         sampler_state_filter_t src_filter;
648         sampler_state_filter_t mask_filter;
649         sampler_state_extend_t src_extend;
650         sampler_state_extend_t mask_extend;
651         Bool is_affine;
652         wm_kernel_t wm_kernel;
653         int vertex_id;
654 } gen4_composite_op;
655
656 /** Private data for gen4 render accel implementation. */
657 struct gen4_render_state {
658         drm_intel_bo *vs_state_bo;
659         drm_intel_bo *sf_state_bo;
660         drm_intel_bo *sf_mask_state_bo;
661         drm_intel_bo *cc_state_bo;
662         drm_intel_bo *wm_state_bo[KERNEL_COUNT]
663             [FILTER_COUNT] [EXTEND_COUNT]
664             [FILTER_COUNT] [EXTEND_COUNT];
665         drm_intel_bo *wm_kernel_bo[KERNEL_COUNT];
666
667         drm_intel_bo *cc_vp_bo;
668         drm_intel_bo *gen6_blend_bo;
669         drm_intel_bo *gen6_depth_stencil_bo;
670         drm_intel_bo *ps_sampler_state_bo[FILTER_COUNT]
671             [EXTEND_COUNT]
672             [FILTER_COUNT]
673             [EXTEND_COUNT];
674         gen4_composite_op composite_op;
675 };
676
677 static void gen6_emit_composite_state(struct intel_screen_private *intel);
678 static void gen6_render_state_init(ScrnInfoPtr scrn);
679
680 /**
681  * Sets up the SF state pointing at an SF kernel.
682  *
683  * The SF kernel does coord interp: for each attribute,
684  * calculate dA/dx and dA/dy.  Hand these interpolation coefficients
685  * back to SF which then hands pixels off to WM.
686  */
687 static drm_intel_bo *gen4_create_sf_state(intel_screen_private *intel,
688                                           drm_intel_bo * kernel_bo)
689 {
690         struct brw_sf_unit_state *sf_state;
691         drm_intel_bo *sf_state_bo;
692
693         sf_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 SF state",
694                                          sizeof(*sf_state), 4096);
695         drm_intel_bo_map(sf_state_bo, TRUE);
696         sf_state = sf_state_bo->virtual;
697
698         memset(sf_state, 0, sizeof(*sf_state));
699         sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
700         sf_state->thread0.kernel_start_pointer =
701             intel_emit_reloc(sf_state_bo,
702                              offsetof(struct brw_sf_unit_state, thread0),
703                              kernel_bo, sf_state->thread0.grf_reg_count << 1,
704                              I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6;
705         sf_state->sf1.single_program_flow = 1;
706         sf_state->sf1.binding_table_entry_count = 0;
707         sf_state->sf1.thread_priority = 0;
708         sf_state->sf1.floating_point_mode = 0;  /* Mesa does this */
709         sf_state->sf1.illegal_op_exception_enable = 1;
710         sf_state->sf1.mask_stack_exception_enable = 1;
711         sf_state->sf1.sw_exception_enable = 1;
712         sf_state->thread2.per_thread_scratch_space = 0;
713         /* scratch space is not used in our kernel */
714         sf_state->thread2.scratch_space_base_pointer = 0;
715         sf_state->thread3.const_urb_entry_read_length = 0;      /* no const URBs */
716         sf_state->thread3.const_urb_entry_read_offset = 0;      /* no const URBs */
717         sf_state->thread3.urb_entry_read_length = 1;    /* 1 URB per vertex */
718         /* don't smash vertex header, read start from dw8 */
719         sf_state->thread3.urb_entry_read_offset = 1;
720         sf_state->thread3.dispatch_grf_start_reg = 3;
721         sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
722         sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
723         sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
724         sf_state->sf5.viewport_transform = FALSE;       /* skip viewport */
725         sf_state->sf6.cull_mode = BRW_CULLMODE_NONE;
726         sf_state->sf6.scissor = 0;
727         sf_state->sf7.trifan_pv = 2;
728         sf_state->sf6.dest_org_vbias = 0x8;
729         sf_state->sf6.dest_org_hbias = 0x8;
730
731         drm_intel_bo_unmap(sf_state_bo);
732
733         return sf_state_bo;
734 }
735
736 static drm_intel_bo *sampler_border_color_create(intel_screen_private *intel)
737 {
738         struct brw_sampler_legacy_border_color sampler_border_color;
739
740         /* Set up the sampler border color (always transparent black) */
741         memset(&sampler_border_color, 0, sizeof(sampler_border_color));
742         sampler_border_color.color[0] = 0;      /* R */
743         sampler_border_color.color[1] = 0;      /* G */
744         sampler_border_color.color[2] = 0;      /* B */
745         sampler_border_color.color[3] = 0;      /* A */
746
747         return intel_bo_alloc_for_data(intel,
748                                        &sampler_border_color,
749                                        sizeof(sampler_border_color),
750                                        "gen4 render sampler border color");
751 }
752
753 static void
754 sampler_state_init(drm_intel_bo * sampler_state_bo,
755                    struct brw_sampler_state *sampler_state,
756                    sampler_state_filter_t filter,
757                    sampler_state_extend_t extend,
758                    drm_intel_bo * border_color_bo)
759 {
760         uint32_t sampler_state_offset;
761
762         sampler_state_offset = (char *)sampler_state -
763             (char *)sampler_state_bo->virtual;
764
765         /* PS kernel use this sampler */
766         memset(sampler_state, 0, sizeof(*sampler_state));
767
768         sampler_state->ss0.lod_preclamp = 1;    /* GL mode */
769
770         /* We use the legacy mode to get the semantics specified by
771          * the Render extension. */
772         sampler_state->ss0.border_color_mode = BRW_BORDER_COLOR_MODE_LEGACY;
773
774         switch (filter) {
775         default:
776         case SAMPLER_STATE_FILTER_NEAREST:
777                 sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST;
778                 sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
779                 break;
780         case SAMPLER_STATE_FILTER_BILINEAR:
781                 sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR;
782                 sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
783                 break;
784         }
785
786         switch (extend) {
787         default:
788         case SAMPLER_STATE_EXTEND_NONE:
789                 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
790                 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
791                 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
792                 break;
793         case SAMPLER_STATE_EXTEND_REPEAT:
794                 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
795                 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
796                 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
797                 break;
798         case SAMPLER_STATE_EXTEND_PAD:
799                 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
800                 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
801                 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
802                 break;
803         case SAMPLER_STATE_EXTEND_REFLECT:
804                 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_MIRROR;
805                 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_MIRROR;
806                 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_MIRROR;
807                 break;
808         }
809
810         sampler_state->ss2.border_color_pointer =
811             intel_emit_reloc(sampler_state_bo, sampler_state_offset +
812                              offsetof(struct brw_sampler_state, ss2),
813                              border_color_bo, 0,
814                              I915_GEM_DOMAIN_SAMPLER, 0) >> 5;
815
816         sampler_state->ss3.chroma_key_enable = 0;       /* disable chromakey */
817 }
818
819 static drm_intel_bo *gen4_create_sampler_state(intel_screen_private *intel,
820                                                sampler_state_filter_t src_filter,
821                                                sampler_state_extend_t src_extend,
822                                                sampler_state_filter_t mask_filter,
823                                                sampler_state_extend_t mask_extend,
824                                                drm_intel_bo * border_color_bo)
825 {
826         drm_intel_bo *sampler_state_bo;
827         struct brw_sampler_state *sampler_state;
828
829         sampler_state_bo =
830             drm_intel_bo_alloc(intel->bufmgr, "gen4 sampler state",
831                                sizeof(struct brw_sampler_state) * 2, 4096);
832         drm_intel_bo_map(sampler_state_bo, TRUE);
833         sampler_state = sampler_state_bo->virtual;
834
835         sampler_state_init(sampler_state_bo,
836                            &sampler_state[0],
837                            src_filter, src_extend, border_color_bo);
838         sampler_state_init(sampler_state_bo,
839                            &sampler_state[1],
840                            mask_filter, mask_extend, border_color_bo);
841
842         drm_intel_bo_unmap(sampler_state_bo);
843
844         return sampler_state_bo;
845 }
846
847 static void
848 cc_state_init(drm_intel_bo * cc_state_bo,
849               uint32_t cc_state_offset,
850               int src_blend, int dst_blend, drm_intel_bo * cc_vp_bo)
851 {
852         struct brw_cc_unit_state *cc_state;
853
854         cc_state = (struct brw_cc_unit_state *)((char *)cc_state_bo->virtual +
855                                                 cc_state_offset);
856
857         memset(cc_state, 0, sizeof(*cc_state));
858         cc_state->cc0.stencil_enable = 0;       /* disable stencil */
859         cc_state->cc2.depth_test = 0;   /* disable depth test */
860         cc_state->cc2.logicop_enable = 0;       /* disable logic op */
861         cc_state->cc3.ia_blend_enable = 0;      /* blend alpha same as colors */
862         cc_state->cc3.blend_enable = 1; /* enable color blend */
863         cc_state->cc3.alpha_test = 0;   /* disable alpha test */
864
865         cc_state->cc4.cc_viewport_state_offset =
866             intel_emit_reloc(cc_state_bo, cc_state_offset +
867                              offsetof(struct brw_cc_unit_state, cc4),
868                              cc_vp_bo, 0, I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5;
869
870         cc_state->cc5.dither_enable = 0;        /* disable dither */
871         cc_state->cc5.logicop_func = 0xc;       /* COPY */
872         cc_state->cc5.statistics_enable = 1;
873         cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD;
874
875         /* Fill in alpha blend factors same as color, for the future. */
876         cc_state->cc5.ia_src_blend_factor = src_blend;
877         cc_state->cc5.ia_dest_blend_factor = dst_blend;
878
879         cc_state->cc6.blend_function = BRW_BLENDFUNCTION_ADD;
880         cc_state->cc6.clamp_post_alpha_blend = 1;
881         cc_state->cc6.clamp_pre_alpha_blend = 1;
882         cc_state->cc6.clamp_range = 0;  /* clamp range [0,1] */
883
884         cc_state->cc6.src_blend_factor = src_blend;
885         cc_state->cc6.dest_blend_factor = dst_blend;
886 }
887
888 static drm_intel_bo *gen4_create_wm_state(intel_screen_private *intel,
889                                           Bool has_mask,
890                                           drm_intel_bo * kernel_bo,
891                                           drm_intel_bo * sampler_bo)
892 {
893         struct brw_wm_unit_state *state;
894         drm_intel_bo *wm_state_bo;
895
896         wm_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 WM state",
897                                          sizeof(*state), 4096);
898         drm_intel_bo_map(wm_state_bo, TRUE);
899         state = wm_state_bo->virtual;
900
901         memset(state, 0, sizeof(*state));
902         state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
903         state->thread0.kernel_start_pointer =
904             intel_emit_reloc(wm_state_bo,
905                              offsetof(struct brw_wm_unit_state, thread0),
906                              kernel_bo, state->thread0.grf_reg_count << 1,
907                              I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6;
908
909         state->thread1.single_program_flow = 0;
910
911         /* scratch space is not used in our kernel */
912         state->thread2.scratch_space_base_pointer = 0;
913         state->thread2.per_thread_scratch_space = 0;
914
915         state->thread3.const_urb_entry_read_length = 0;
916         state->thread3.const_urb_entry_read_offset = 0;
917
918         state->thread3.urb_entry_read_offset = 0;
919         /* wm kernel use urb from 3, see wm_program in compiler module */
920         state->thread3.dispatch_grf_start_reg = 3;      /* must match kernel */
921
922         if (IS_GEN5(intel))
923                 state->wm4.sampler_count = 0;   /* hardware requirement */
924         else
925                 state->wm4.sampler_count = 1;   /* 1-4 samplers used */
926
927         state->wm4.sampler_state_pointer =
928             intel_emit_reloc(wm_state_bo,
929                              offsetof(struct brw_wm_unit_state, wm4),
930                              sampler_bo,
931                              state->wm4.sampler_count << 2,
932                              I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5;
933         state->wm5.max_threads = PS_MAX_THREADS - 1;
934         state->wm5.transposed_urb_read = 0;
935         state->wm5.thread_dispatch_enable = 1;
936         /* just use 16-pixel dispatch (4 subspans), don't need to change kernel
937          * start point
938          */
939         state->wm5.enable_16_pix = 1;
940         state->wm5.enable_8_pix = 0;
941         state->wm5.early_depth_test = 1;
942
943         /* Each pair of attributes (src/mask coords) is two URB entries */
944         if (has_mask) {
945                 state->thread1.binding_table_entry_count = 3;   /* 2 tex and fb */
946                 state->thread3.urb_entry_read_length = 4;
947         } else {
948                 state->thread1.binding_table_entry_count = 2;   /* 1 tex and fb */
949                 state->thread3.urb_entry_read_length = 2;
950         }
951
952         /* binding table entry count is only used for prefetching, and it has to
953          * be set 0 for Ironlake
954          */
955         if (IS_GEN5(intel))
956                 state->thread1.binding_table_entry_count = 0;
957
958         drm_intel_bo_unmap(wm_state_bo);
959
960         return wm_state_bo;
961 }
962
963 static drm_intel_bo *gen4_create_cc_viewport(intel_screen_private *intel)
964 {
965         drm_intel_bo *bo;
966         struct brw_cc_viewport vp;
967
968         vp.min_depth = -1.e35;
969         vp.max_depth = 1.e35;
970
971         bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 render unit state",
972                                 sizeof(vp), 4096);
973         drm_intel_bo_subdata(bo, 0, sizeof(vp), &vp);
974
975         return bo;
976 }
977
978 static drm_intel_bo *gen4_create_vs_unit_state(intel_screen_private *intel)
979 {
980         struct brw_vs_unit_state vs_state;
981         memset(&vs_state, 0, sizeof(vs_state));
982
983         /* Set up the vertex shader to be disabled (passthrough) */
984         if (IS_GEN5(intel))
985                 vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;  /* hardware requirement */
986         else
987                 vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES;
988         vs_state.thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
989         vs_state.vs6.vs_enable = 0;
990         vs_state.vs6.vert_cache_disable = 1;
991
992         return intel_bo_alloc_for_data(intel, &vs_state, sizeof(vs_state),
993                                        "gen4 render VS state");
994 }
995
996 /**
997  * Set up all combinations of cc state: each blendfactor for source and
998  * dest.
999  */
1000 static drm_intel_bo *gen4_create_cc_unit_state(intel_screen_private *intel)
1001 {
1002         drm_intel_bo *cc_state_bo, *cc_vp_bo;
1003         int i, j;
1004
1005         cc_vp_bo = gen4_create_cc_viewport(intel);
1006
1007         cc_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 CC state",
1008                                          sizeof(struct gen4_cc_unit_state),
1009                                          4096);
1010         drm_intel_bo_map(cc_state_bo, TRUE);
1011         for (i = 0; i < BRW_BLENDFACTOR_COUNT; i++) {
1012                 for (j = 0; j < BRW_BLENDFACTOR_COUNT; j++) {
1013                         cc_state_init(cc_state_bo,
1014                                       offsetof(struct gen4_cc_unit_state,
1015                                                cc_state[i][j].state),
1016                                       i, j, cc_vp_bo);
1017                 }
1018         }
1019         drm_intel_bo_unmap(cc_state_bo);
1020
1021         drm_intel_bo_unreference(cc_vp_bo);
1022
1023         return cc_state_bo;
1024 }
1025
1026 static uint32_t i965_get_card_format(PicturePtr picture)
1027 {
1028         int i;
1029
1030         for (i = 0; i < sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]);
1031              i++) {
1032                 if (i965_tex_formats[i].fmt == picture->format)
1033                         break;
1034         }
1035         assert(i != sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]));
1036
1037         return i965_tex_formats[i].card_fmt;
1038 }
1039
1040 static sampler_state_filter_t sampler_state_filter_from_picture(int filter)
1041 {
1042         switch (filter) {
1043         case PictFilterNearest:
1044                 return SAMPLER_STATE_FILTER_NEAREST;
1045         case PictFilterBilinear:
1046                 return SAMPLER_STATE_FILTER_BILINEAR;
1047         default:
1048                 return -1;
1049         }
1050 }
1051
1052 static sampler_state_extend_t sampler_state_extend_from_picture(int repeat_type)
1053 {
1054         switch (repeat_type) {
1055         case RepeatNone:
1056                 return SAMPLER_STATE_EXTEND_NONE;
1057         case RepeatNormal:
1058                 return SAMPLER_STATE_EXTEND_REPEAT;
1059         case RepeatPad:
1060                 return SAMPLER_STATE_EXTEND_PAD;
1061         case RepeatReflect:
1062                 return SAMPLER_STATE_EXTEND_REFLECT;
1063         default:
1064                 return -1;
1065         }
1066 }
1067
1068 /**
1069  * Sets up the common fields for a surface state buffer for the given
1070  * picture in the given surface state buffer.
1071  */
1072 static int
1073 i965_set_picture_surface_state(intel_screen_private *intel,
1074                                PicturePtr picture, PixmapPtr pixmap,
1075                                Bool is_dst)
1076 {
1077         struct intel_pixmap *priv = intel_get_pixmap_private(pixmap);
1078         struct brw_surface_state *ss;
1079         uint32_t write_domain, read_domains;
1080         int offset;
1081
1082         if (is_dst) {
1083                 write_domain = I915_GEM_DOMAIN_RENDER;
1084                 read_domains = I915_GEM_DOMAIN_RENDER;
1085         } else {
1086                 write_domain = 0;
1087                 read_domains = I915_GEM_DOMAIN_SAMPLER;
1088         }
1089         intel_batch_mark_pixmap_domains(intel, priv,
1090                                         read_domains, write_domain);
1091         if (is_dst) {
1092                 if (priv->dst_bound)
1093                         return priv->dst_bound;
1094         } else {
1095                 if (priv->src_bound)
1096                         return priv->src_bound;
1097         }
1098
1099         ss = (struct brw_surface_state *)
1100                 (intel->surface_data + intel->surface_used);
1101
1102         memset(ss, 0, sizeof(*ss));
1103         ss->ss0.surface_type = BRW_SURFACE_2D;
1104         if (is_dst)
1105                 ss->ss0.surface_format = i965_get_dest_format(picture);
1106         else
1107                 ss->ss0.surface_format = i965_get_card_format(picture);
1108
1109         ss->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32;
1110         ss->ss0.color_blend = 1;
1111         ss->ss1.base_addr = priv->bo->offset;
1112
1113         ss->ss2.height = pixmap->drawable.height - 1;
1114         ss->ss2.width = pixmap->drawable.width - 1;
1115         ss->ss3.pitch = intel_pixmap_pitch(pixmap) - 1;
1116         ss->ss3.tile_walk = 0;  /* Tiled X */
1117         ss->ss3.tiled_surface = intel_pixmap_tiled(pixmap) ? 1 : 0;
1118
1119         dri_bo_emit_reloc(intel->surface_bo,
1120                           read_domains, write_domain,
1121                           0,
1122                           intel->surface_used +
1123                           offsetof(struct brw_surface_state, ss1),
1124                           priv->bo);
1125
1126         offset = intel->surface_used;
1127         intel->surface_used += sizeof(struct brw_surface_state_padded);
1128
1129         if (is_dst)
1130                 priv->dst_bound = offset;
1131         else
1132                 priv->src_bound = offset;
1133
1134         return offset;
1135 }
1136
1137 static void gen4_composite_vertex_elements(struct intel_screen_private *intel)
1138 {
1139         struct gen4_render_state *render_state = intel->gen4_render_state;
1140         gen4_composite_op *composite_op = &render_state->composite_op;
1141         Bool has_mask = intel->render_mask != NULL;
1142         Bool is_affine = composite_op->is_affine;
1143         /*
1144          * number of extra parameters per vertex
1145          */
1146         int nelem = has_mask ? 2 : 1;
1147         /*
1148          * size of extra parameters:
1149          *  3 for homogenous (xyzw)
1150          *  2 for cartesian (xy)
1151          */
1152         int selem = is_affine ? 2 : 3;
1153         uint32_t w_component;
1154         uint32_t src_format;
1155         int id;
1156
1157         id = has_mask << 1 | is_affine;
1158
1159         if (composite_op->vertex_id == id)
1160                 return;
1161
1162         composite_op->vertex_id = id;
1163
1164         if (is_affine) {
1165                 src_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
1166                 w_component = BRW_VFCOMPONENT_STORE_1_FLT;
1167         } else {
1168                 src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
1169                 w_component = BRW_VFCOMPONENT_STORE_SRC;
1170         }
1171
1172         if (IS_GEN5(intel)) {
1173                 /*
1174                  * The reason to add this extra vertex element in the header is that
1175                  * Ironlake has different vertex header definition and origin method to
1176                  * set destination element offset doesn't exist anymore, which means
1177                  * hardware requires a predefined vertex element layout.
1178                  *
1179                  * haihao proposed this approach to fill the first vertex element, so
1180                  * origin layout for Gen4 doesn't need to change, and origin shader
1181                  * programs behavior is also kept.
1182                  *
1183                  * I think this is not bad. - zhenyu
1184                  */
1185
1186                 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS |
1187                           ((2 * (2 + nelem)) - 1));
1188                 OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
1189                           (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1190                           (0 << VE0_OFFSET_SHIFT));
1191
1192                 OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
1193                           (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
1194                           (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
1195                           (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
1196         } else {
1197                 /* Set up our vertex elements, sourced from the single vertex buffer.
1198                  * that will be set up later.
1199                  */
1200                 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS |
1201                           ((2 * (1 + nelem)) - 1));
1202         }
1203
1204         /* x,y */
1205         OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
1206                   (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1207                   (0 << VE0_OFFSET_SHIFT));
1208
1209         if (IS_GEN5(intel))
1210                 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1211                           (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1212                           (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1213                           (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1214         else
1215                 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1216                           (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1217                           (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1218                           (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1219                           (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1220         /* u0, v0, w0 */
1221         OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
1222                   (src_format << VE0_FORMAT_SHIFT) |
1223                   ((2 * 4) << VE0_OFFSET_SHIFT));       /* offset vb in bytes */
1224
1225         if (IS_GEN5(intel))
1226                 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1227                           (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1228                           (w_component << VE1_VFCOMPONENT_2_SHIFT) |
1229                           (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1230         else
1231                 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1232                           (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1233                           (w_component << VE1_VFCOMPONENT_2_SHIFT) |
1234                           (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1235                           ((4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));   /* VUE offset in dwords */
1236         /* u1, v1, w1 */
1237         if (has_mask) {
1238                 OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
1239                           (src_format << VE0_FORMAT_SHIFT) |
1240                           (((2 + selem) * 4) << VE0_OFFSET_SHIFT));     /* vb offset in bytes */
1241
1242                 if (IS_GEN5(intel))
1243                         OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1244                                   (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1245                                   (w_component << VE1_VFCOMPONENT_2_SHIFT) |
1246                                   (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1247                 else
1248                         OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1249                                   (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1250                                   (w_component << VE1_VFCOMPONENT_2_SHIFT) |
1251                                   (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1252                                   ((4 + 4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));       /* VUE offset in dwords */
1253         }
1254 }
1255
1256 static void i965_emit_composite_state(struct intel_screen_private *intel)
1257 {
1258         struct gen4_render_state *render_state = intel->gen4_render_state;
1259         gen4_composite_op *composite_op = &render_state->composite_op;
1260         int op = composite_op->op;
1261         PicturePtr mask_picture = intel->render_mask_picture;
1262         PicturePtr dest_picture = intel->render_dest_picture;
1263         PixmapPtr mask = intel->render_mask;
1264         PixmapPtr dest = intel->render_dest;
1265         sampler_state_filter_t src_filter = composite_op->src_filter;
1266         sampler_state_filter_t mask_filter = composite_op->mask_filter;
1267         sampler_state_extend_t src_extend = composite_op->src_extend;
1268         sampler_state_extend_t mask_extend = composite_op->mask_extend;
1269         uint32_t src_blend, dst_blend;
1270
1271         intel->needs_render_state_emit = FALSE;
1272
1273         /* Begin the long sequence of commands needed to set up the 3D
1274          * rendering pipe
1275          */
1276
1277         if (intel->needs_3d_invariant) {
1278                 if (IS_GEN5(intel)) {
1279                         /* Ironlake errata workaround: Before disabling the clipper,
1280                          * you have to MI_FLUSH to get the pipeline idle.
1281                          */
1282                         OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
1283                 }
1284
1285                 /* Match Mesa driver setup */
1286                 if (INTEL_INFO(intel)->gen >= 45)
1287                         OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1288                 else
1289                         OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1290
1291                 /* Set system instruction pointer */
1292                 OUT_BATCH(BRW_STATE_SIP | 0);
1293                 OUT_BATCH(0);
1294
1295                 intel->needs_3d_invariant = FALSE;
1296         }
1297
1298         if (intel->surface_reloc == 0) {
1299                 /* Zero out the two base address registers so all offsets are
1300                  * absolute.
1301                  */
1302                 if (IS_GEN5(intel)) {
1303                         OUT_BATCH(BRW_STATE_BASE_ADDRESS | 6);
1304                         OUT_BATCH(0 | BASE_ADDRESS_MODIFY);     /* Generate state base address */
1305                         intel->surface_reloc = intel->batch_used;
1306                         intel_batch_emit_dword(intel,
1307                                                intel->surface_bo->offset | BASE_ADDRESS_MODIFY);
1308                         OUT_BATCH(0 | BASE_ADDRESS_MODIFY);     /* media base addr, don't care */
1309                         OUT_BATCH(0 | BASE_ADDRESS_MODIFY);     /* Instruction base address */
1310                         /* general state max addr, disabled */
1311                         OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
1312                         /* media object state max addr, disabled */
1313                         OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
1314                         /* Instruction max addr, disabled */
1315                         OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
1316                 } else {
1317                         OUT_BATCH(BRW_STATE_BASE_ADDRESS | 4);
1318                         OUT_BATCH(0 | BASE_ADDRESS_MODIFY);     /* Generate state base address */
1319                         intel->surface_reloc = intel->batch_used;
1320                         intel_batch_emit_dword(intel,
1321                                                intel->surface_bo->offset | BASE_ADDRESS_MODIFY);
1322                         OUT_BATCH(0 | BASE_ADDRESS_MODIFY);     /* media base addr, don't care */
1323                         /* general state max addr, disabled */
1324                         OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
1325                         /* media object state max addr, disabled */
1326                         OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
1327                 }
1328         }
1329
1330         i965_get_blend_cntl(op, mask_picture, dest_picture->format,
1331                             &src_blend, &dst_blend);
1332
1333         /* Binding table pointers */
1334         OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4);
1335         OUT_BATCH(0);   /* vs */
1336         OUT_BATCH(0);   /* gs */
1337         OUT_BATCH(0);   /* clip */
1338         OUT_BATCH(0);   /* sf */
1339         /* Only the PS uses the binding table */
1340         OUT_BATCH(intel->surface_table);
1341
1342         /* The drawing rectangle clipping is always on.  Set it to values that
1343          * shouldn't do any clipping.
1344          */
1345         OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2);
1346         OUT_BATCH(0x00000000);  /* ymin, xmin */
1347         OUT_BATCH(DRAW_YMAX(dest->drawable.height - 1) |
1348                   DRAW_XMAX(dest->drawable.width - 1)); /* ymax, xmax */
1349         OUT_BATCH(0x00000000);  /* yorigin, xorigin */
1350
1351         /* skip the depth buffer */
1352         /* skip the polygon stipple */
1353         /* skip the polygon stipple offset */
1354         /* skip the line stipple */
1355
1356         /* Set the pointers to the 3d pipeline state */
1357         OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5);
1358         OUT_RELOC(render_state->vs_state_bo,
1359                   I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1360         OUT_BATCH(BRW_GS_DISABLE);      /* disable GS, resulting in passthrough */
1361         OUT_BATCH(BRW_CLIP_DISABLE);    /* disable CLIP, resulting in passthrough */
1362         if (mask) {
1363                 OUT_RELOC(render_state->sf_mask_state_bo,
1364                           I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1365         } else {
1366                 OUT_RELOC(render_state->sf_state_bo,
1367                           I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1368         }
1369
1370         OUT_RELOC(render_state->wm_state_bo[composite_op->wm_kernel]
1371                   [src_filter][src_extend]
1372                   [mask_filter][mask_extend],
1373                   I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1374
1375         OUT_RELOC(render_state->cc_state_bo,
1376                   I915_GEM_DOMAIN_INSTRUCTION, 0,
1377                   offsetof(struct gen4_cc_unit_state,
1378                            cc_state[src_blend][dst_blend]));
1379
1380         {
1381                 int urb_vs_start, urb_vs_size;
1382                 int urb_gs_start, urb_gs_size;
1383                 int urb_clip_start, urb_clip_size;
1384                 int urb_sf_start, urb_sf_size;
1385                 int urb_cs_start, urb_cs_size;
1386
1387                 urb_vs_start = 0;
1388                 urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1389                 urb_gs_start = urb_vs_start + urb_vs_size;
1390                 urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1391                 urb_clip_start = urb_gs_start + urb_gs_size;
1392                 urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1393                 urb_sf_start = urb_clip_start + urb_clip_size;
1394                 urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1395                 urb_cs_start = urb_sf_start + urb_sf_size;
1396                 urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1397
1398                 /* Erratum (Vol 1a, p32):
1399                  *   URB_FENCE must not cross a cache-line (64 bytes).
1400                  */
1401                 if ((intel->batch_used & 15) > (16 - 3)) {
1402                         int cnt = 16 - (intel->batch_used & 15);
1403                         while (cnt--)
1404                                 OUT_BATCH(MI_NOOP);
1405                 }
1406
1407                 OUT_BATCH(BRW_URB_FENCE |
1408                           UF0_CS_REALLOC |
1409                           UF0_SF_REALLOC |
1410                           UF0_CLIP_REALLOC |
1411                           UF0_GS_REALLOC |
1412                           UF0_VS_REALLOC |
1413                           1);
1414                 OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1415                           ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1416                           ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1417                 OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1418                           ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1419
1420                 /* Constant buffer state */
1421                 OUT_BATCH(BRW_CS_URB_STATE | 0);
1422                 OUT_BATCH(((URB_CS_ENTRY_SIZE - 1) << 4) |
1423                           (URB_CS_ENTRIES << 0));
1424         }
1425
1426         gen4_composite_vertex_elements(intel);
1427 }
1428
1429 /**
1430  * Returns whether the current set of composite state plus vertex buffer is
1431  * expected to fit in the aperture.
1432  */
1433 static Bool i965_composite_check_aperture(intel_screen_private *intel)
1434 {
1435         struct gen4_render_state *render_state = intel->gen4_render_state;
1436         gen4_composite_op *composite_op = &render_state->composite_op;
1437         drm_intel_bo *bo_table[] = {
1438                 intel->batch_bo,
1439                 intel->vertex_bo,
1440                 intel->surface_bo,
1441                 render_state->vs_state_bo,
1442                 render_state->sf_state_bo,
1443                 render_state->sf_mask_state_bo,
1444                 render_state->wm_state_bo[composite_op->wm_kernel]
1445                     [composite_op->src_filter]
1446                     [composite_op->src_extend]
1447                     [composite_op->mask_filter]
1448                     [composite_op->mask_extend],
1449                 render_state->cc_state_bo,
1450         };
1451         drm_intel_bo *gen6_bo_table[] = {
1452                 intel->batch_bo,
1453                 intel->vertex_bo,
1454                 intel->surface_bo,
1455                 render_state->wm_kernel_bo[composite_op->wm_kernel],
1456                 render_state->ps_sampler_state_bo[composite_op->src_filter]
1457                     [composite_op->src_extend]
1458                     [composite_op->mask_filter]
1459                     [composite_op->mask_extend],
1460                 render_state->cc_vp_bo,
1461                 render_state->cc_state_bo,
1462                 render_state->gen6_blend_bo,
1463                 render_state->gen6_depth_stencil_bo,
1464         };
1465
1466         if (INTEL_INFO(intel)->gen >= 60)
1467                 return drm_intel_bufmgr_check_aperture_space(gen6_bo_table,
1468                                                         ARRAY_SIZE(gen6_bo_table)) == 0;
1469         else
1470                 return drm_intel_bufmgr_check_aperture_space(bo_table,
1471                                                         ARRAY_SIZE(bo_table)) == 0;
1472 }
1473
1474 static void i965_surface_flush(struct intel_screen_private *intel)
1475 {
1476         struct intel_pixmap *priv;
1477
1478         drm_intel_bo_subdata(intel->surface_bo,
1479                              0, intel->surface_used,
1480                              intel->surface_data);
1481         intel->surface_used = 0;
1482
1483         assert (intel->surface_reloc != 0);
1484         drm_intel_bo_emit_reloc(intel->batch_bo,
1485                                 intel->surface_reloc * 4,
1486                                 intel->surface_bo, BASE_ADDRESS_MODIFY,
1487                                 I915_GEM_DOMAIN_INSTRUCTION, 0);
1488         intel->surface_reloc = 0;
1489
1490         drm_intel_bo_unreference(intel->surface_bo);
1491         intel->surface_bo =
1492                 drm_intel_bo_alloc(intel->bufmgr, "surface data",
1493                                    sizeof(intel->surface_data), 4096);
1494
1495         list_foreach_entry(priv, struct intel_pixmap, &intel->batch_pixmaps, batch)
1496                 priv->dst_bound = priv->src_bound = 0;
1497 }
1498
1499 static void
1500 i965_emit_composite_primitive_identity_source(intel_screen_private *intel,
1501                                               int srcX, int srcY,
1502                                               int maskX, int maskY,
1503                                               int dstX, int dstY,
1504                                               int w, int h)
1505 {
1506         OUT_VERTEX(dstX + w);
1507         OUT_VERTEX(dstY + h);
1508         OUT_VERTEX((srcX + w) * intel->scale_units[0][0]);
1509         OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
1510
1511         OUT_VERTEX(dstX);
1512         OUT_VERTEX(dstY + h);
1513         OUT_VERTEX(srcX * intel->scale_units[0][0]);
1514         OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
1515
1516         OUT_VERTEX(dstX);
1517         OUT_VERTEX(dstY);
1518         OUT_VERTEX(srcX * intel->scale_units[0][0]);
1519         OUT_VERTEX(srcY * intel->scale_units[0][1]);
1520 }
1521
1522 static void
1523 i965_emit_composite_primitive_affine_source(intel_screen_private *intel,
1524                                             int srcX, int srcY,
1525                                             int maskX, int maskY,
1526                                             int dstX, int dstY,
1527                                             int w, int h)
1528 {
1529         float src_x[3], src_y[3];
1530
1531         if (!intel_get_transformed_coordinates(srcX, srcY,
1532                                               intel->transform[0],
1533                                               &src_x[0],
1534                                               &src_y[0]))
1535                 return;
1536
1537         if (!intel_get_transformed_coordinates(srcX, srcY + h,
1538                                               intel->transform[0],
1539                                               &src_x[1],
1540                                               &src_y[1]))
1541                 return;
1542
1543         if (!intel_get_transformed_coordinates(srcX + w, srcY + h,
1544                                               intel->transform[0],
1545                                               &src_x[2],
1546                                               &src_y[2]))
1547                 return;
1548
1549         OUT_VERTEX(dstX + w);
1550         OUT_VERTEX(dstY + h);
1551         OUT_VERTEX(src_x[2] * intel->scale_units[0][0]);
1552         OUT_VERTEX(src_y[2] * intel->scale_units[0][1]);
1553
1554         OUT_VERTEX(dstX);
1555         OUT_VERTEX(dstY + h);
1556         OUT_VERTEX(src_x[1] * intel->scale_units[0][0]);
1557         OUT_VERTEX(src_y[1] * intel->scale_units[0][1]);
1558
1559         OUT_VERTEX(dstX);
1560         OUT_VERTEX(dstY);
1561         OUT_VERTEX(src_x[0] * intel->scale_units[0][0]);
1562         OUT_VERTEX(src_y[0] * intel->scale_units[0][1]);
1563 }
1564
1565 static void
1566 i965_emit_composite_primitive_identity_source_mask(intel_screen_private *intel,
1567                                                    int srcX, int srcY,
1568                                                    int maskX, int maskY,
1569                                                    int dstX, int dstY,
1570                                                    int w, int h)
1571 {
1572         OUT_VERTEX(dstX + w);
1573         OUT_VERTEX(dstY + h);
1574         OUT_VERTEX((srcX + w) * intel->scale_units[0][0]);
1575         OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
1576         OUT_VERTEX((maskX + w) * intel->scale_units[1][0]);
1577         OUT_VERTEX((maskY + h) * intel->scale_units[1][1]);
1578
1579         OUT_VERTEX(dstX);
1580         OUT_VERTEX(dstY + h);
1581         OUT_VERTEX(srcX * intel->scale_units[0][0]);
1582         OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
1583         OUT_VERTEX(maskX * intel->scale_units[1][0]);
1584         OUT_VERTEX((maskY + h) * intel->scale_units[1][1]);
1585
1586         OUT_VERTEX(dstX);
1587         OUT_VERTEX(dstY);
1588         OUT_VERTEX(srcX * intel->scale_units[0][0]);
1589         OUT_VERTEX(srcY * intel->scale_units[0][1]);
1590         OUT_VERTEX(maskX * intel->scale_units[1][0]);
1591         OUT_VERTEX(maskY * intel->scale_units[1][1]);
1592 }
1593
1594 static void
1595 i965_emit_composite_primitive(intel_screen_private *intel,
1596                               int srcX, int srcY,
1597                               int maskX, int maskY,
1598                               int dstX, int dstY,
1599                               int w, int h)
1600 {
1601         float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
1602         Bool is_affine = intel->gen4_render_state->composite_op.is_affine;
1603
1604         if (! intel->render_source_is_solid) {
1605                 if (is_affine) {
1606                         if (!intel_get_transformed_coordinates(srcX, srcY,
1607                                                               intel->transform[0],
1608                                                               &src_x[0],
1609                                                               &src_y[0]))
1610                                 return;
1611
1612                         if (!intel_get_transformed_coordinates(srcX, srcY + h,
1613                                                               intel->transform[0],
1614                                                               &src_x[1],
1615                                                               &src_y[1]))
1616                                 return;
1617
1618                         if (!intel_get_transformed_coordinates(srcX + w, srcY + h,
1619                                                               intel->transform[0],
1620                                                               &src_x[2],
1621                                                               &src_y[2]))
1622                                 return;
1623                 } else {
1624                         if (!intel_get_transformed_coordinates_3d(srcX, srcY,
1625                                                                  intel->transform[0],
1626                                                                  &src_x[0],
1627                                                                  &src_y[0],
1628                                                                  &src_w[0]))
1629                                 return;
1630
1631                         if (!intel_get_transformed_coordinates_3d(srcX, srcY + h,
1632                                                                  intel->transform[0],
1633                                                                  &src_x[1],
1634                                                                  &src_y[1],
1635                                                                  &src_w[1]))
1636                                 return;
1637
1638                         if (!intel_get_transformed_coordinates_3d(srcX + w, srcY + h,
1639                                                                  intel->transform[0],
1640                                                                  &src_x[2],
1641                                                                  &src_y[2],
1642                                                                  &src_w[2]))
1643                                 return;
1644                 }
1645         }
1646
1647         if (intel->render_mask) {
1648                 if (is_affine) {
1649                         if (!intel_get_transformed_coordinates(maskX, maskY,
1650                                                               intel->transform[1],
1651                                                               &mask_x[0],
1652                                                               &mask_y[0]))
1653                                 return;
1654
1655                         if (!intel_get_transformed_coordinates(maskX, maskY + h,
1656                                                               intel->transform[1],
1657                                                               &mask_x[1],
1658                                                               &mask_y[1]))
1659                                 return;
1660
1661                         if (!intel_get_transformed_coordinates(maskX + w, maskY + h,
1662                                                               intel->transform[1],
1663                                                               &mask_x[2],
1664                                                               &mask_y[2]))
1665                                 return;
1666                 } else {
1667                         if (!intel_get_transformed_coordinates_3d(maskX, maskY,
1668                                                                  intel->transform[1],
1669                                                                  &mask_x[0],
1670                                                                  &mask_y[0],
1671                                                                  &mask_w[0]))
1672                                 return;
1673
1674                         if (!intel_get_transformed_coordinates_3d(maskX, maskY + h,
1675                                                                  intel->transform[1],
1676                                                                  &mask_x[1],
1677                                                                  &mask_y[1],
1678                                                                  &mask_w[1]))
1679                                 return;
1680
1681                         if (!intel_get_transformed_coordinates_3d(maskX + w, maskY + h,
1682                                                                  intel->transform[1],
1683                                                                  &mask_x[2],
1684                                                                  &mask_y[2],
1685                                                                  &mask_w[2]))
1686                                 return;
1687                 }
1688         }
1689
1690         OUT_VERTEX(dstX + w);
1691         OUT_VERTEX(dstY + h);
1692         OUT_VERTEX(src_x[2] * intel->scale_units[0][0]);
1693         OUT_VERTEX(src_y[2] * intel->scale_units[0][1]);
1694         if (!is_affine)
1695                 OUT_VERTEX(src_w[2]);
1696         if (intel->render_mask) {
1697                 OUT_VERTEX(mask_x[2] * intel->scale_units[1][0]);
1698                 OUT_VERTEX(mask_y[2] * intel->scale_units[1][1]);
1699                 if (!is_affine)
1700                         OUT_VERTEX(mask_w[2]);
1701         }
1702
1703         OUT_VERTEX(dstX);
1704         OUT_VERTEX(dstY + h);
1705         OUT_VERTEX(src_x[1] * intel->scale_units[0][0]);
1706         OUT_VERTEX(src_y[1] * intel->scale_units[0][1]);
1707         if (!is_affine)
1708                 OUT_VERTEX(src_w[1]);
1709         if (intel->render_mask) {
1710                 OUT_VERTEX(mask_x[1] * intel->scale_units[1][0]);
1711                 OUT_VERTEX(mask_y[1] * intel->scale_units[1][1]);
1712                 if (!is_affine)
1713                         OUT_VERTEX(mask_w[1]);
1714         }
1715
1716         OUT_VERTEX(dstX);
1717         OUT_VERTEX(dstY);
1718         OUT_VERTEX(src_x[0] * intel->scale_units[0][0]);
1719         OUT_VERTEX(src_y[0] * intel->scale_units[0][1]);
1720         if (!is_affine)
1721                 OUT_VERTEX(src_w[0]);
1722         if (intel->render_mask) {
1723                 OUT_VERTEX(mask_x[0] * intel->scale_units[1][0]);
1724                 OUT_VERTEX(mask_y[0] * intel->scale_units[1][1]);
1725                 if (!is_affine)
1726                         OUT_VERTEX(mask_w[0]);
1727         }
1728 }
1729
1730 Bool
1731 i965_prepare_composite(int op, PicturePtr source_picture,
1732                        PicturePtr mask_picture, PicturePtr dest_picture,
1733                        PixmapPtr source, PixmapPtr mask, PixmapPtr dest)
1734 {
1735         ScrnInfoPtr scrn = xf86Screens[dest_picture->pDrawable->pScreen->myNum];
1736         intel_screen_private *intel = intel_get_screen_private(scrn);
1737         struct gen4_render_state *render_state = intel->gen4_render_state;
1738         gen4_composite_op *composite_op = &render_state->composite_op;
1739
1740         composite_op->src_filter =
1741             sampler_state_filter_from_picture(source_picture->filter);
1742         if (composite_op->src_filter < 0) {
1743                 intel_debug_fallback(scrn, "Bad src filter 0x%x\n",
1744                                      source_picture->filter);
1745                 return FALSE;
1746         }
1747         composite_op->src_extend =
1748             sampler_state_extend_from_picture(source_picture->repeatType);
1749         if (composite_op->src_extend < 0) {
1750                 intel_debug_fallback(scrn, "Bad src repeat 0x%x\n",
1751                                      source_picture->repeatType);
1752                 return FALSE;
1753         }
1754
1755         if (mask_picture) {
1756                 if (mask_picture->componentAlpha &&
1757                     PICT_FORMAT_RGB(mask_picture->format)) {
1758                         /* Check if it's component alpha that relies on a source alpha and on
1759                          * the source value.  We can only get one of those into the single
1760                          * source value that we get to blend with.
1761                          */
1762                         if (i965_blend_op[op].src_alpha &&
1763                             (i965_blend_op[op].src_blend != BRW_BLENDFACTOR_ZERO)) {
1764                                 intel_debug_fallback(scrn,
1765                                                      "Component alpha not supported "
1766                                                      "with source alpha and source "
1767                                                      "value blending.\n");
1768                                 return FALSE;
1769                         }
1770                 }
1771
1772                 composite_op->mask_filter =
1773                     sampler_state_filter_from_picture(mask_picture->filter);
1774                 if (composite_op->mask_filter < 0) {
1775                         intel_debug_fallback(scrn, "Bad mask filter 0x%x\n",
1776                                              mask_picture->filter);
1777                         return FALSE;
1778                 }
1779                 composite_op->mask_extend =
1780                     sampler_state_extend_from_picture(mask_picture->repeatType);
1781                 if (composite_op->mask_extend < 0) {
1782                         intel_debug_fallback(scrn, "Bad mask repeat 0x%x\n",
1783                                              mask_picture->repeatType);
1784                         return FALSE;
1785                 }
1786         } else {
1787                 composite_op->mask_filter = SAMPLER_STATE_FILTER_NEAREST;
1788                 composite_op->mask_extend = SAMPLER_STATE_EXTEND_NONE;
1789         }
1790
1791         /* Flush any pending writes prior to relocating the textures. */
1792         if (intel_pixmap_is_dirty(source) ||
1793             (mask && intel_pixmap_is_dirty(mask)))
1794                 intel_batch_emit_flush(scrn);
1795
1796         composite_op->op = op;
1797         intel->render_source_picture = source_picture;
1798         intel->render_mask_picture = mask_picture;
1799         intel->render_dest_picture = dest_picture;
1800         intel->render_source = source;
1801         intel->render_mask = mask;
1802         intel->render_dest = dest;
1803
1804         intel->scale_units[0][0] = 1. / source->drawable.width;
1805         intel->scale_units[0][1] = 1. / source->drawable.height;
1806
1807         intel->transform[0] = source_picture->transform;
1808         composite_op->is_affine = intel_transform_is_affine(intel->transform[0]);
1809
1810         if (!mask) {
1811                 intel->transform[1] = NULL;
1812                 intel->scale_units[1][0] = -1;
1813                 intel->scale_units[1][1] = -1;
1814         } else {
1815                 intel->transform[1] = mask_picture->transform;
1816                 intel->scale_units[1][0] = 1. / mask->drawable.width;
1817                 intel->scale_units[1][1] = 1. / mask->drawable.height;
1818                 composite_op->is_affine &=
1819                     intel_transform_is_affine(intel->transform[1]);
1820         }
1821
1822         if (mask) {
1823                 if (mask_picture->componentAlpha &&
1824                     PICT_FORMAT_RGB(mask_picture->format)) {
1825                         if (i965_blend_op[op].src_alpha) {
1826                                 if (composite_op->is_affine)
1827                                         composite_op->wm_kernel =
1828                                             WM_KERNEL_MASKCA_SRCALPHA_AFFINE;
1829                                 else
1830                                         composite_op->wm_kernel =
1831                                             WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE;
1832                         } else {
1833                                 if (composite_op->is_affine)
1834                                         composite_op->wm_kernel =
1835                                             WM_KERNEL_MASKCA_AFFINE;
1836                                 else
1837                                         composite_op->wm_kernel =
1838                                             WM_KERNEL_MASKCA_PROJECTIVE;
1839                         }
1840                 } else {
1841                         if (composite_op->is_affine)
1842                                 composite_op->wm_kernel =
1843                                     WM_KERNEL_MASKNOCA_AFFINE;
1844                         else
1845                                 composite_op->wm_kernel =
1846                                     WM_KERNEL_MASKNOCA_PROJECTIVE;
1847                 }
1848         } else {
1849                 if (composite_op->is_affine)
1850                         composite_op->wm_kernel = WM_KERNEL_NOMASK_AFFINE;
1851                 else
1852                         composite_op->wm_kernel = WM_KERNEL_NOMASK_PROJECTIVE;
1853         }
1854
1855         intel->prim_emit = i965_emit_composite_primitive;
1856         if (!mask) {
1857                 if (intel->transform[0] == NULL)
1858                         intel->prim_emit = i965_emit_composite_primitive_identity_source;
1859                 else if (composite_op->is_affine)
1860                         intel->prim_emit = i965_emit_composite_primitive_affine_source;
1861         } else {
1862                 if (intel->transform[0] == NULL && intel->transform[1] == NULL)
1863                         intel->prim_emit = i965_emit_composite_primitive_identity_source_mask;
1864         }
1865
1866         intel->floats_per_vertex =
1867                 2 + (mask ? 2 : 1) * (composite_op->is_affine ? 2: 3);
1868
1869         if (!i965_composite_check_aperture(intel)) {
1870                 intel_batch_submit(scrn);
1871                 if (!i965_composite_check_aperture(intel)) {
1872                         intel_debug_fallback(scrn,
1873                                              "Couldn't fit render operation "
1874                                              "in aperture\n");
1875                         return FALSE;
1876                 }
1877         }
1878
1879         if (sizeof(intel->surface_data) - intel->surface_used <
1880             4 * sizeof(struct brw_surface_state_padded))
1881                 i965_surface_flush(intel);
1882
1883         intel->needs_render_state_emit = TRUE;
1884
1885         return TRUE;
1886 }
1887
1888 static void i965_select_vertex_buffer(struct intel_screen_private *intel)
1889 {
1890         int id = intel->gen4_render_state->composite_op.vertex_id;
1891
1892         if (intel->vertex_id & (1 << id))
1893                 return;
1894
1895         /* Set up the pointer to our (single) vertex buffer */
1896         OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3);
1897
1898         /* XXX could use multiple vbo to reduce relocations if
1899          * frequently switching between vertex sizes, like rgb10text.
1900          */
1901         if (INTEL_INFO(intel)->gen >= 60) {
1902                 OUT_BATCH((id << GEN6_VB0_BUFFER_INDEX_SHIFT) |
1903                           GEN6_VB0_VERTEXDATA |
1904                           (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
1905         } else {
1906                 OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) |
1907                           VB0_VERTEXDATA |
1908                           (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
1909         }
1910         OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
1911         if (INTEL_INFO(intel)->gen >= 50)
1912                 OUT_RELOC(intel->vertex_bo,
1913                           I915_GEM_DOMAIN_VERTEX, 0,
1914                           sizeof(intel->vertex_ptr) - 1);
1915         else
1916                 OUT_BATCH(0);
1917         OUT_BATCH(0);           // ignore for VERTEXDATA, but still there
1918
1919         intel->vertex_id |= 1 << id;
1920 }
1921
1922 static void i965_bind_surfaces(struct intel_screen_private *intel)
1923 {
1924         uint32_t *binding_table;
1925
1926         assert(intel->surface_used + 4 * sizeof(struct brw_surface_state_padded) <= sizeof(intel->surface_data));
1927
1928         binding_table = (uint32_t*) (intel->surface_data + intel->surface_used);
1929         intel->surface_table = intel->surface_used;
1930         intel->surface_used += sizeof(struct brw_surface_state_padded);
1931
1932         binding_table[0] =
1933                 i965_set_picture_surface_state(intel,
1934                                                intel->render_dest_picture,
1935                                                intel->render_dest,
1936                                                TRUE);
1937         binding_table[1] =
1938                 i965_set_picture_surface_state(intel,
1939                                                intel->render_source_picture,
1940                                                intel->render_source,
1941                                                FALSE);
1942         if (intel->render_mask) {
1943                 binding_table[2] =
1944                         i965_set_picture_surface_state(intel,
1945                                                        intel->render_mask_picture,
1946                                                        intel->render_mask,
1947                                                        FALSE);
1948         }
1949 }
1950
1951 void
1952 i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
1953                int dstX, int dstY, int w, int h)
1954 {
1955         ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
1956         intel_screen_private *intel = intel_get_screen_private(scrn);
1957
1958         intel_batch_start_atomic(scrn, 200);
1959         if (intel->needs_render_state_emit) {
1960                 i965_bind_surfaces(intel);
1961
1962                 if (INTEL_INFO(intel)->gen >= 60)
1963                         gen6_emit_composite_state(intel);
1964                 else
1965                         i965_emit_composite_state(intel);
1966         }
1967
1968         if (intel->floats_per_vertex != intel->last_floats_per_vertex) {
1969                 intel->vertex_index = (intel->vertex_used + intel->floats_per_vertex - 1) / intel->floats_per_vertex;
1970                 intel->vertex_used = intel->vertex_index * intel->floats_per_vertex;
1971                 intel->last_floats_per_vertex = intel->floats_per_vertex;
1972         }
1973         if (intel_vertex_space(intel) < 3*4*intel->floats_per_vertex) {
1974                 i965_vertex_flush(intel);
1975                 intel_next_vertex(intel);
1976                 intel->vertex_index = 0;
1977         }
1978         i965_select_vertex_buffer(intel);
1979
1980         if (intel->vertex_offset == 0) {
1981                 OUT_BATCH(BRW_3DPRIMITIVE |
1982                           BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
1983                           (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
1984                           (0 << 9) |
1985                           4);
1986                 intel->vertex_offset = intel->batch_used;
1987                 OUT_BATCH(0);   /* vertex count, to be filled in later */
1988                 OUT_BATCH(intel->vertex_index);
1989                 OUT_BATCH(1);   /* single instance */
1990                 OUT_BATCH(0);   /* start instance location */
1991                 OUT_BATCH(0);   /* index buffer offset, ignored */
1992                 intel->vertex_count = intel->vertex_index;
1993         }
1994
1995         intel->prim_emit(intel,
1996                          srcX, srcY,
1997                          maskX, maskY,
1998                          dstX, dstY,
1999                          w, h);
2000         intel->vertex_index += 3;
2001
2002         if (INTEL_INFO(intel)->gen < 50) {
2003             /* XXX OMG! */
2004             i965_vertex_flush(intel);
2005             OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
2006         }
2007
2008         intel_batch_end_atomic(scrn);
2009 }
2010
2011 void i965_batch_commit_notify(intel_screen_private *intel)
2012 {
2013         intel->needs_render_state_emit = TRUE;
2014         intel->needs_3d_invariant = TRUE;
2015         intel->last_floats_per_vertex = 0;
2016         intel->vertex_index = 0;
2017
2018         intel->gen4_render_state->composite_op.vertex_id = -1;
2019
2020         intel->gen6_render_state.num_sf_outputs = 0;
2021         intel->gen6_render_state.samplers = NULL;
2022         intel->gen6_render_state.blend = -1;
2023         intel->gen6_render_state.kernel = NULL;
2024         intel->gen6_render_state.drawrect = -1;
2025
2026         assert(intel->surface_reloc == 0);
2027 }
2028
2029 /**
2030  * Called at EnterVT so we can set up our offsets into the state buffer.
2031  */
2032 void gen4_render_state_init(ScrnInfoPtr scrn)
2033 {
2034         intel_screen_private *intel = intel_get_screen_private(scrn);
2035         struct gen4_render_state *render;
2036         const struct wm_kernel_info *wm_kernels;
2037         int i, j, k, l, m;
2038         drm_intel_bo *sf_kernel_bo, *sf_kernel_mask_bo;
2039         drm_intel_bo *border_color_bo;
2040
2041         intel->needs_3d_invariant = TRUE;
2042
2043         intel->surface_bo =
2044                 drm_intel_bo_alloc(intel->bufmgr, "surface data",
2045                                    sizeof(intel->surface_data), 4096);
2046         intel->surface_used = 0;
2047
2048         if (intel->gen4_render_state == NULL)
2049                 intel->gen4_render_state = calloc(sizeof(*render), 1);
2050
2051         if (INTEL_INFO(intel)->gen >= 60)
2052                 return gen6_render_state_init(scrn);
2053
2054         render = intel->gen4_render_state;
2055         render->composite_op.vertex_id = -1;
2056
2057         render->vs_state_bo = gen4_create_vs_unit_state(intel);
2058
2059         /* Set up the two SF states (one for blending with a mask, one without) */
2060         if (IS_GEN5(intel)) {
2061                 sf_kernel_bo = intel_bo_alloc_for_data(intel,
2062                                                        sf_kernel_static_gen5,
2063                                                        sizeof
2064                                                        (sf_kernel_static_gen5),
2065                                                        "sf kernel gen5");
2066                 sf_kernel_mask_bo =
2067                     intel_bo_alloc_for_data(intel, sf_kernel_mask_static_gen5,
2068                                             sizeof(sf_kernel_mask_static_gen5),
2069                                             "sf mask kernel");
2070         } else {
2071                 sf_kernel_bo = intel_bo_alloc_for_data(intel,
2072                                                        sf_kernel_static,
2073                                                        sizeof(sf_kernel_static),
2074                                                        "sf kernel");
2075                 sf_kernel_mask_bo = intel_bo_alloc_for_data(intel,
2076                                                             sf_kernel_mask_static,
2077                                                             sizeof
2078                                                             (sf_kernel_mask_static),
2079                                                             "sf mask kernel");
2080         }
2081         render->sf_state_bo = gen4_create_sf_state(intel, sf_kernel_bo);
2082         render->sf_mask_state_bo = gen4_create_sf_state(intel, sf_kernel_mask_bo);
2083         drm_intel_bo_unreference(sf_kernel_bo);
2084         drm_intel_bo_unreference(sf_kernel_mask_bo);
2085
2086         wm_kernels = IS_GEN5(intel) ? wm_kernels_gen5 : wm_kernels_gen4;
2087         for (m = 0; m < KERNEL_COUNT; m++) {
2088                 render->wm_kernel_bo[m] =
2089                         intel_bo_alloc_for_data(intel,
2090                                         wm_kernels[m].data,
2091                                         wm_kernels[m].size,
2092                                         "WM kernel");
2093         }
2094
2095         /* Set up the WM states: each filter/extend type for source and mask, per
2096          * kernel.
2097          */
2098         border_color_bo = sampler_border_color_create(intel);
2099         for (i = 0; i < FILTER_COUNT; i++) {
2100                 for (j = 0; j < EXTEND_COUNT; j++) {
2101                         for (k = 0; k < FILTER_COUNT; k++) {
2102                                 for (l = 0; l < EXTEND_COUNT; l++) {
2103                                         drm_intel_bo *sampler_state_bo;
2104
2105                                         sampler_state_bo =
2106                                             gen4_create_sampler_state(intel,
2107                                                                       i, j,
2108                                                                       k, l,
2109                                                                       border_color_bo);
2110
2111                                         for (m = 0; m < KERNEL_COUNT; m++) {
2112                                                 render->wm_state_bo[m][i][j][k][l] =
2113                                                         gen4_create_wm_state
2114                                                         (intel,
2115                                                          wm_kernels[m]. has_mask,
2116                                                          render->wm_kernel_bo[m],
2117                                                          sampler_state_bo);
2118                                         }
2119                                         drm_intel_bo_unreference(sampler_state_bo);
2120                                 }
2121                         }
2122                 }
2123         }
2124         drm_intel_bo_unreference(border_color_bo);
2125
2126         render->cc_state_bo = gen4_create_cc_unit_state(intel);
2127 }
2128
2129 /**
2130  * Called at LeaveVT.
2131  */
2132 void gen4_render_state_cleanup(ScrnInfoPtr scrn)
2133 {
2134         intel_screen_private *intel = intel_get_screen_private(scrn);
2135         struct gen4_render_state *render_state = intel->gen4_render_state;
2136         int i, j, k, l, m;
2137
2138         drm_intel_bo_unreference(intel->surface_bo);
2139         drm_intel_bo_unreference(render_state->vs_state_bo);
2140         drm_intel_bo_unreference(render_state->sf_state_bo);
2141         drm_intel_bo_unreference(render_state->sf_mask_state_bo);
2142
2143         for (i = 0; i < KERNEL_COUNT; i++)
2144                 drm_intel_bo_unreference(render_state->wm_kernel_bo[i]);
2145
2146         for (i = 0; i < FILTER_COUNT; i++)
2147                 for (j = 0; j < EXTEND_COUNT; j++)
2148                         for (k = 0; k < FILTER_COUNT; k++)
2149                                 for (l = 0; l < EXTEND_COUNT; l++)
2150                                         for (m = 0; m < KERNEL_COUNT; m++)
2151                                                 drm_intel_bo_unreference
2152                                                     (render_state->
2153                                                      wm_state_bo[m][i][j][k]
2154                                                      [l]);
2155
2156         for (i = 0; i < FILTER_COUNT; i++)
2157                 for (j = 0; j < EXTEND_COUNT; j++)
2158                         for (k = 0; k < FILTER_COUNT; k++)
2159                                 for (l = 0; l < EXTEND_COUNT; l++)
2160                                         drm_intel_bo_unreference(render_state->ps_sampler_state_bo[i][j][k][l]);
2161
2162         drm_intel_bo_unreference(render_state->cc_state_bo);
2163
2164         drm_intel_bo_unreference(render_state->cc_vp_bo);
2165         drm_intel_bo_unreference(render_state->gen6_blend_bo);
2166         drm_intel_bo_unreference(render_state->gen6_depth_stencil_bo);
2167
2168         free(intel->gen4_render_state);
2169         intel->gen4_render_state = NULL;
2170 }
2171
2172 /*
2173  * for GEN6+
2174  */
2175 #define GEN6_BLEND_STATE_PADDED_SIZE    ALIGN(sizeof(struct gen6_blend_state), 64)
2176
2177 static drm_intel_bo *
2178 gen6_composite_create_cc_state(intel_screen_private *intel)
2179 {
2180         struct gen6_color_calc_state *state;
2181         drm_intel_bo *cc_bo;
2182
2183         cc_bo = drm_intel_bo_alloc(intel->bufmgr,
2184                                 "gen6 CC state",
2185                                 sizeof(*state),
2186                                 4096);
2187         drm_intel_bo_map(cc_bo, TRUE);
2188         state = cc_bo->virtual;
2189         memset(state, 0, sizeof(*state));
2190         state->constant_r = 1.0;
2191         state->constant_g = 0.0;
2192         state->constant_b = 1.0;
2193         state->constant_a = 1.0;
2194         drm_intel_bo_unmap(cc_bo);
2195
2196         return cc_bo;
2197 }
2198
2199 static drm_intel_bo *
2200 gen6_composite_create_blend_state(intel_screen_private *intel)
2201 {
2202         drm_intel_bo *blend_bo;
2203         int src, dst;
2204
2205         blend_bo = drm_intel_bo_alloc(intel->bufmgr,
2206                                 "gen6 BLEND state",
2207                                 BRW_BLENDFACTOR_COUNT * BRW_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE,
2208                                 4096);
2209         drm_intel_bo_map(blend_bo, TRUE);
2210         memset(blend_bo->virtual, 0, blend_bo->size);
2211
2212         for (src = 0; src < BRW_BLENDFACTOR_COUNT; src++) {
2213                 for (dst = 0; dst < BRW_BLENDFACTOR_COUNT; dst++) {
2214                         uint32_t blend_state_offset = (src * BRW_BLENDFACTOR_COUNT + dst) * GEN6_BLEND_STATE_PADDED_SIZE;
2215                         struct gen6_blend_state *blend;
2216
2217                         blend = (struct gen6_blend_state *)((char *)blend_bo->virtual + blend_state_offset);
2218                         blend->blend0.dest_blend_factor = dst;
2219                         blend->blend0.source_blend_factor = src;
2220                         blend->blend0.blend_func = BRW_BLENDFUNCTION_ADD;
2221                         blend->blend0.blend_enable = 1;
2222
2223                         blend->blend1.post_blend_clamp_enable = 1;
2224                         blend->blend1.pre_blend_clamp_enable = 1;
2225                 }
2226         }
2227
2228         drm_intel_bo_unmap(blend_bo);
2229         return blend_bo;
2230 }
2231
2232 static drm_intel_bo *
2233 gen6_composite_create_depth_stencil_state(intel_screen_private *intel)
2234 {
2235         struct gen6_depth_stencil_state *state;
2236         drm_intel_bo *depth_stencil_bo;
2237
2238         depth_stencil_bo = drm_intel_bo_alloc(intel->bufmgr,
2239                                         "gen6 DEPTH_STENCIL state",
2240                                         sizeof(*state),
2241                                         4096);
2242         drm_intel_bo_map(depth_stencil_bo, TRUE);
2243         state = depth_stencil_bo->virtual;
2244         memset(state, 0, sizeof(*state));
2245         drm_intel_bo_unmap(depth_stencil_bo);
2246
2247         return depth_stencil_bo;
2248 }
2249
2250 static void
2251 gen6_composite_invariant_states(intel_screen_private *intel)
2252 {
2253         OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2254
2255         OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
2256         OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2257                   GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2258         OUT_BATCH(0);
2259
2260         OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2261         OUT_BATCH(1);
2262
2263         /* Set system instruction pointer */
2264         OUT_BATCH(BRW_STATE_SIP | 0);
2265         OUT_BATCH(0);
2266 }
2267
2268 static void
2269 gen6_composite_state_base_address(intel_screen_private *intel)
2270 {
2271         OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2));
2272         OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state base address */
2273         intel->surface_reloc = intel->batch_used;
2274         intel_batch_emit_dword(intel,
2275                                intel->surface_bo->offset | BASE_ADDRESS_MODIFY);
2276         OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2277         OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object base address */
2278         OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction base address */
2279         OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state upper bound */
2280         OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2281         OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2282         OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2283 }
2284
2285 static void
2286 gen6_composite_viewport_state_pointers(intel_screen_private *intel,
2287                                        drm_intel_bo *cc_vp_bo)
2288 {
2289
2290         OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
2291                   GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
2292                   (4 - 2));
2293         OUT_BATCH(0);
2294         OUT_BATCH(0);
2295         OUT_RELOC(cc_vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
2296 }
2297
2298 static void
2299 gen6_composite_urb(intel_screen_private *intel)
2300 {
2301         OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
2302         OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
2303                   (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
2304         OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
2305                 (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
2306 }
2307
2308 static void
2309 gen6_composite_cc_state_pointers(intel_screen_private *intel,
2310                                  uint32_t blend_offset)
2311 {
2312         struct gen4_render_state *render_state = intel->gen4_render_state;
2313
2314         if (intel->gen6_render_state.blend == blend_offset)
2315                 return;
2316
2317         OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
2318         OUT_RELOC(render_state->gen6_blend_bo,
2319                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2320                   blend_offset | 1);
2321         if (intel->gen6_render_state.blend == -1) {
2322                 OUT_RELOC(render_state->gen6_depth_stencil_bo,
2323                           I915_GEM_DOMAIN_INSTRUCTION, 0,
2324                           1);
2325                 OUT_RELOC(render_state->cc_state_bo,
2326                           I915_GEM_DOMAIN_INSTRUCTION, 0,
2327                           1);
2328         } else {
2329                 OUT_BATCH(0);
2330                 OUT_BATCH(0);
2331         }
2332
2333         intel->gen6_render_state.blend = blend_offset;
2334 }
2335
2336 static void
2337 gen6_composite_sampler_state_pointers(intel_screen_private *intel,
2338                                       drm_intel_bo *bo)
2339 {
2340         if (intel->gen6_render_state.samplers == bo)
2341                 return;
2342
2343         intel->gen6_render_state.samplers = bo;
2344
2345         OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
2346                   GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
2347                   (4 - 2));
2348         OUT_BATCH(0); /* VS */
2349         OUT_BATCH(0); /* GS */
2350         OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
2351 }
2352
2353 static void
2354 gen6_composite_vs_state(intel_screen_private *intel)
2355 {
2356         /* disable VS constant buffer */
2357         OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
2358         OUT_BATCH(0);
2359         OUT_BATCH(0);
2360         OUT_BATCH(0);
2361         OUT_BATCH(0);
2362
2363         OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
2364         OUT_BATCH(0); /* without VS kernel */
2365         OUT_BATCH(0);
2366         OUT_BATCH(0);
2367         OUT_BATCH(0);
2368         OUT_BATCH(0); /* pass-through */
2369 }
2370
2371 static void
2372 gen6_composite_gs_state(intel_screen_private *intel)
2373 {
2374         /* disable GS constant buffer */
2375         OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
2376         OUT_BATCH(0);
2377         OUT_BATCH(0);
2378         OUT_BATCH(0);
2379         OUT_BATCH(0);
2380
2381         OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
2382         OUT_BATCH(0); /* without GS kernel */
2383         OUT_BATCH(0);
2384         OUT_BATCH(0);
2385         OUT_BATCH(0);
2386         OUT_BATCH(0);
2387         OUT_BATCH(0); /* pass-through */
2388 }
2389
2390 static void
2391 gen6_composite_wm_constants(intel_screen_private *intel)
2392 {
2393         /* disable WM constant buffer */
2394         OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2));
2395         OUT_BATCH(0);
2396         OUT_BATCH(0);
2397         OUT_BATCH(0);
2398         OUT_BATCH(0);
2399 }
2400
2401 static void
2402 gen6_composite_clip_state(intel_screen_private *intel)
2403 {
2404         OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
2405         OUT_BATCH(0);
2406         OUT_BATCH(0); /* pass-through */
2407         OUT_BATCH(0);
2408 }
2409
2410 static void
2411 gen6_composite_sf_state(intel_screen_private *intel,
2412                         Bool has_mask)
2413 {
2414         int num_sf_outputs = has_mask ? 2 : 1;
2415
2416         if (intel->gen6_render_state.num_sf_outputs == num_sf_outputs)
2417                 return;
2418
2419         intel->gen6_render_state.num_sf_outputs = num_sf_outputs;
2420
2421         OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
2422         OUT_BATCH((num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
2423                   (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
2424                   (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
2425         OUT_BATCH(0);
2426         OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
2427         OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
2428         OUT_BATCH(0);
2429         OUT_BATCH(0);
2430         OUT_BATCH(0);
2431         OUT_BATCH(0);
2432         OUT_BATCH(0); /* DW9 */
2433         OUT_BATCH(0);
2434         OUT_BATCH(0);
2435         OUT_BATCH(0);
2436         OUT_BATCH(0);
2437         OUT_BATCH(0); /* DW14 */
2438         OUT_BATCH(0);
2439         OUT_BATCH(0);
2440         OUT_BATCH(0);
2441         OUT_BATCH(0);
2442         OUT_BATCH(0); /* DW19 */
2443 }
2444
2445 static void
2446 gen6_composite_wm_state(intel_screen_private *intel,
2447                         Bool has_mask,
2448                         drm_intel_bo *bo)
2449 {
2450         int num_surfaces = has_mask ? 3 : 2;
2451         int num_sf_outputs = has_mask ? 2 : 1;
2452
2453         if (intel->gen6_render_state.kernel == bo)
2454                 return;
2455
2456         intel->gen6_render_state.kernel = bo;
2457
2458         OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2));
2459         OUT_RELOC(bo,
2460                 I915_GEM_DOMAIN_INSTRUCTION, 0,
2461                 0);
2462         OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
2463                   (num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2464         OUT_BATCH(0);
2465         OUT_BATCH((6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2466         OUT_BATCH(((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2467                   GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2468                   GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2469         OUT_BATCH((num_sf_outputs << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2470                   GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2471         OUT_BATCH(0);
2472         OUT_BATCH(0);
2473 }
2474
2475 static void
2476 gen6_composite_binding_table_pointers(intel_screen_private *intel)
2477 {
2478         /* Binding table pointers */
2479         OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS |
2480                   GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS |
2481                   (4 - 2));
2482         OUT_BATCH(0);           /* vs */
2483         OUT_BATCH(0);           /* gs */
2484         /* Only the PS uses the binding table */
2485         OUT_BATCH(intel->surface_table);
2486 }
2487
2488 static void
2489 gen6_composite_depth_buffer_state(intel_screen_private *intel)
2490 {
2491         OUT_BATCH(BRW_3DSTATE_DEPTH_BUFFER | (7 - 2));
2492         OUT_BATCH((BRW_SURFACE_NULL << BRW_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT) |
2493                   (BRW_DEPTHFORMAT_D32_FLOAT << BRW_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT));
2494         OUT_BATCH(0);
2495         OUT_BATCH(0);
2496         OUT_BATCH(0);
2497         OUT_BATCH(0);
2498         OUT_BATCH(0);
2499
2500         OUT_BATCH(BRW_3DSTATE_CLEAR_PARAMS | (2 - 2));
2501         OUT_BATCH(0);
2502 }
2503
2504 static void
2505 gen6_composite_drawing_rectangle(intel_screen_private *intel,
2506                                  PixmapPtr dest)
2507 {
2508         uint32_t dw =
2509                 DRAW_YMAX(dest->drawable.height - 1) |
2510                 DRAW_XMAX(dest->drawable.width - 1);
2511
2512         /* XXX cacomposite depends upon the implicit non-pipelined flush */
2513         if (0 && intel->gen6_render_state.drawrect == dw)
2514                 return;
2515         intel->gen6_render_state.drawrect = dw;
2516
2517         OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
2518         OUT_BATCH(0x00000000);  /* ymin, xmin */
2519         OUT_BATCH(dw);  /* ymax, xmax */
2520         OUT_BATCH(0x00000000);  /* yorigin, xorigin */
2521 }
2522
2523 static void
2524 gen6_composite_vertex_element_state(intel_screen_private *intel,
2525                                     Bool has_mask,
2526                                     Bool is_affine)
2527 {
2528         /*
2529          * vertex data in vertex buffer
2530          *    position: (x, y)
2531          *    texture coordinate 0: (u0, v0) if (is_affine is TRUE) else (u0, v0, w0)
2532          *    texture coordinate 1 if (has_mask is TRUE): same as above
2533          */
2534         gen4_composite_op *composite_op = &intel->gen4_render_state->composite_op;
2535         int nelem = has_mask ? 2 : 1;
2536         int selem = is_affine ? 2 : 3;
2537         uint32_t w_component;
2538         uint32_t src_format;
2539         int id;
2540
2541         id = has_mask << 1 | is_affine;
2542
2543         if (composite_op->vertex_id == id)
2544                 return;
2545
2546         composite_op->vertex_id = id;
2547
2548         if (is_affine) {
2549                 src_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
2550                 w_component = BRW_VFCOMPONENT_STORE_1_FLT;
2551         } else {
2552                 src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
2553                 w_component = BRW_VFCOMPONENT_STORE_SRC;
2554         }
2555
2556         /* The VUE layout
2557          *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
2558          *    dword 4-7: position (x, y, 1.0, 1.0),
2559          *    dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
2560          *    dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
2561          *
2562          * dword 4-15 are fetched from vertex buffer
2563          */
2564         OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS |
2565                 ((2 * (2 + nelem)) + 1 - 2));
2566
2567         OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID |
2568                   (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2569                   (0 << VE0_OFFSET_SHIFT));
2570         OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
2571                   (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
2572                   (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
2573                   (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
2574
2575         /* x,y */
2576         OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID |
2577                   (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2578                   (0 << VE0_OFFSET_SHIFT)); /* offsets vb in bytes */
2579         OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2580                   (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2581                   (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2582                   (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2583
2584         /* u0, v0, w0 */
2585         OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID |
2586                   (src_format << VE0_FORMAT_SHIFT) |
2587                   ((2 * 4) << VE0_OFFSET_SHIFT));       /* offset vb in bytes */
2588         OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2589                   (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2590                   (w_component << VE1_VFCOMPONENT_2_SHIFT) |
2591                   (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2592
2593         /* u1, v1, w1 */
2594         if (has_mask) {
2595                 OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2596                           GEN6_VE0_VALID |
2597                           (src_format << VE0_FORMAT_SHIFT) |
2598                           (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */
2599                 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2600                           (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2601                           (w_component << VE1_VFCOMPONENT_2_SHIFT) |
2602                           (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2603         }
2604 }
2605
2606 static void
2607 gen6_emit_composite_state(struct intel_screen_private *intel)
2608 {
2609         struct gen4_render_state *render = intel->gen4_render_state;
2610         gen4_composite_op *composite_op = &render->composite_op;
2611         sampler_state_filter_t src_filter = composite_op->src_filter;
2612         sampler_state_filter_t mask_filter = composite_op->mask_filter;
2613         sampler_state_extend_t src_extend = composite_op->src_extend;
2614         sampler_state_extend_t mask_extend = composite_op->mask_extend;
2615         Bool is_affine = composite_op->is_affine;
2616         Bool has_mask = intel->render_mask != NULL;
2617         uint32_t src, dst;
2618         drm_intel_bo *ps_sampler_state_bo = render->ps_sampler_state_bo[src_filter][src_extend][mask_filter][mask_extend];
2619
2620         intel->needs_render_state_emit = FALSE;
2621         if (intel->needs_3d_invariant) {
2622                 gen6_composite_invariant_states(intel);
2623                 gen6_composite_viewport_state_pointers(intel,
2624                                                        render->cc_vp_bo);
2625                 gen6_composite_urb(intel);
2626
2627                 gen6_composite_vs_state(intel);
2628                 gen6_composite_gs_state(intel);
2629                 gen6_composite_clip_state(intel);
2630                 gen6_composite_wm_constants(intel);
2631                 gen6_composite_depth_buffer_state(intel);
2632
2633                 intel->needs_3d_invariant = FALSE;
2634         }
2635
2636         i965_get_blend_cntl(composite_op->op,
2637                             intel->render_mask_picture,
2638                             intel->render_dest_picture->format,
2639                             &src, &dst);
2640
2641         if (intel->surface_reloc == 0)
2642                 gen6_composite_state_base_address(intel);
2643
2644         gen6_composite_cc_state_pointers(intel,
2645                                         (src * BRW_BLENDFACTOR_COUNT + dst) * GEN6_BLEND_STATE_PADDED_SIZE);
2646         gen6_composite_sampler_state_pointers(intel, ps_sampler_state_bo);
2647         gen6_composite_sf_state(intel, has_mask);
2648         gen6_composite_wm_state(intel,
2649                                 has_mask,
2650                                 render->wm_kernel_bo[composite_op->wm_kernel]);
2651         gen6_composite_binding_table_pointers(intel);
2652
2653         gen6_composite_drawing_rectangle(intel, intel->render_dest);
2654         gen6_composite_vertex_element_state(intel, has_mask, is_affine);
2655 }
2656
2657 static void
2658 gen6_render_state_init(ScrnInfoPtr scrn)
2659 {
2660         intel_screen_private *intel = intel_get_screen_private(scrn);
2661         struct gen4_render_state *render;
2662         int i, j, k, l, m;
2663         drm_intel_bo *border_color_bo;
2664
2665         render= intel->gen4_render_state;
2666         render->composite_op.vertex_id = -1;
2667
2668         intel->gen6_render_state.num_sf_outputs = 0;
2669         intel->gen6_render_state.samplers = NULL;
2670         intel->gen6_render_state.blend = -1;
2671         intel->gen6_render_state.kernel = NULL;
2672         intel->gen6_render_state.drawrect = -1;
2673
2674         for (m = 0; m < KERNEL_COUNT; m++) {
2675                 render->wm_kernel_bo[m] =
2676                         intel_bo_alloc_for_data(intel,
2677                                         wm_kernels_gen6[m].data,
2678                                         wm_kernels_gen6[m].size,
2679                                         "WM kernel gen6");
2680         }
2681
2682         border_color_bo = sampler_border_color_create(intel);
2683
2684         for (i = 0; i < FILTER_COUNT; i++) {
2685                 for (j = 0; j < EXTEND_COUNT; j++) {
2686                         for (k = 0; k < FILTER_COUNT; k++) {
2687                                 for (l = 0; l < EXTEND_COUNT; l++) {
2688                                         render->ps_sampler_state_bo[i][j][k][l] =
2689                                                 gen4_create_sampler_state(intel,
2690                                                                 i, j,
2691                                                                 k, l,
2692                                                                 border_color_bo);
2693                                 }
2694                         }
2695                 }
2696         }
2697
2698         drm_intel_bo_unreference(border_color_bo);
2699         render->cc_vp_bo = gen4_create_cc_viewport(intel);
2700         render->cc_state_bo = gen6_composite_create_cc_state(intel);
2701         render->gen6_blend_bo = gen6_composite_create_blend_state(intel);
2702         render->gen6_depth_stencil_bo = gen6_composite_create_depth_stencil_state(intel);
2703 }
2704
2705 void i965_vertex_flush(struct intel_screen_private *intel)
2706 {
2707         if (intel->vertex_offset) {
2708                 intel->batch_ptr[intel->vertex_offset] =
2709                         intel->vertex_index - intel->vertex_count;
2710                 intel->vertex_offset = 0;
2711         }
2712 }
2713
2714 void i965_batch_flush(struct intel_screen_private *intel)
2715 {
2716         if (intel->surface_used)
2717                 i965_surface_flush(intel);
2718 }