2 * Copyright © 2006,2008 Intel Corporation
3 * Copyright © 2007 Red Hat, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 * Wang Zhenyu <zhenyu.z.wang@intel.com>
26 * Eric Anholt <eric@anholt.net>
27 * Carl Worth <cworth@redhat.com>
28 * Keith Packard <keithp@keithp.com>
42 /* bring in brw structs */
43 #include "brw_defines.h"
44 #include "brw_structs.h"
46 // refer vol2, 3d rasterization 3.8.1
48 /* defined in brw_defines.h */
49 static const struct blendinfo {
56 {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ZERO},
58 {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ZERO},
60 {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ONE},
62 {0, 1, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_INV_SRC_ALPHA},
64 {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ONE},
66 {1, 0, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_ZERO},
68 {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_SRC_ALPHA},
70 {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ZERO},
72 {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_INV_SRC_ALPHA},
74 {1, 1, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA},
76 {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_SRC_ALPHA},
78 {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA},
80 {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ONE},
84 * Highest-valued BLENDFACTOR used in i965_blend_op.
86 * This leaves out BRW_BLENDFACTOR_INV_DST_COLOR,
87 * BRW_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
88 * BRW_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
90 #define BRW_BLENDFACTOR_COUNT (BRW_BLENDFACTOR_INV_DST_ALPHA + 1)
92 /* FIXME: surface format defined in brw_defines.h, shared Sampling engine
95 static const struct formatinfo {
98 } i965_tex_formats[] = {
99 {PICT_a8, BRW_SURFACEFORMAT_A8_UNORM},
100 {PICT_a8r8g8b8, BRW_SURFACEFORMAT_B8G8R8A8_UNORM},
101 {PICT_x8r8g8b8, BRW_SURFACEFORMAT_B8G8R8X8_UNORM},
102 {PICT_a8b8g8r8, BRW_SURFACEFORMAT_R8G8B8A8_UNORM},
103 {PICT_x8b8g8r8, BRW_SURFACEFORMAT_R8G8B8X8_UNORM},
104 {PICT_r8g8b8, BRW_SURFACEFORMAT_R8G8B8_UNORM},
105 {PICT_r5g6b5, BRW_SURFACEFORMAT_B5G6R5_UNORM},
106 {PICT_a1r5g5b5, BRW_SURFACEFORMAT_B5G5R5A1_UNORM},
107 #if XORG_VERSION_CURRENT >= 10699900
108 {PICT_a2r10g10b10, BRW_SURFACEFORMAT_B10G10R10A2_UNORM},
109 {PICT_x2r10g10b10, BRW_SURFACEFORMAT_B10G10R10X2_UNORM},
110 {PICT_a2b10g10r10, BRW_SURFACEFORMAT_R10G10B10A2_UNORM},
111 {PICT_x2r10g10b10, BRW_SURFACEFORMAT_B10G10R10X2_UNORM},
113 {PICT_a4r4g4b4, BRW_SURFACEFORMAT_B4G4R4A4_UNORM},
116 static void i965_get_blend_cntl(int op, PicturePtr mask, uint32_t dst_format,
117 uint32_t * sblend, uint32_t * dblend)
120 *sblend = i965_blend_op[op].src_blend;
121 *dblend = i965_blend_op[op].dst_blend;
123 /* If there's no dst alpha channel, adjust the blend op so that we'll treat
126 if (PICT_FORMAT_A(dst_format) == 0 && i965_blend_op[op].dst_alpha) {
127 if (*sblend == BRW_BLENDFACTOR_DST_ALPHA)
128 *sblend = BRW_BLENDFACTOR_ONE;
129 else if (*sblend == BRW_BLENDFACTOR_INV_DST_ALPHA)
130 *sblend = BRW_BLENDFACTOR_ZERO;
133 /* If the source alpha is being used, then we should only be in a case where
134 * the source blend factor is 0, and the source blend value is the mask
135 * channels multiplied by the source picture's alpha.
137 if (mask && mask->componentAlpha && PICT_FORMAT_RGB(mask->format)
138 && i965_blend_op[op].src_alpha) {
139 if (*dblend == BRW_BLENDFACTOR_SRC_ALPHA) {
140 *dblend = BRW_BLENDFACTOR_SRC_COLOR;
141 } else if (*dblend == BRW_BLENDFACTOR_INV_SRC_ALPHA) {
142 *dblend = BRW_BLENDFACTOR_INV_SRC_COLOR;
148 static uint32_t i965_get_dest_format(PicturePtr dest_picture)
150 switch (dest_picture->format) {
153 return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
156 return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
157 #if XORG_VERSION_CURRENT >= 10699900
158 case PICT_a2r10g10b10:
159 case PICT_x2r10g10b10:
160 return BRW_SURFACEFORMAT_B10G10R10A2_UNORM;
163 return BRW_SURFACEFORMAT_B5G6R5_UNORM;
166 return BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
168 return BRW_SURFACEFORMAT_A8_UNORM;
171 return BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
178 i965_check_composite(int op,
179 PicturePtr source_picture,
180 PicturePtr mask_picture,
181 PicturePtr dest_picture,
182 int width, int height)
184 ScrnInfoPtr scrn = xf86Screens[dest_picture->pDrawable->pScreen->myNum];
186 /* Check for unsupported compositing operations. */
187 if (op >= sizeof(i965_blend_op) / sizeof(i965_blend_op[0])) {
188 intel_debug_fallback(scrn,
189 "Unsupported Composite op 0x%x\n", op);
193 if (mask_picture && mask_picture->componentAlpha &&
194 PICT_FORMAT_RGB(mask_picture->format)) {
195 /* Check if it's component alpha that relies on a source alpha and on
196 * the source value. We can only get one of those into the single
197 * source value that we get to blend with.
199 if (i965_blend_op[op].src_alpha &&
200 (i965_blend_op[op].src_blend != BRW_BLENDFACTOR_ZERO)) {
201 intel_debug_fallback(scrn,
202 "Component alpha not supported "
203 "with source alpha and source "
204 "value blending.\n");
209 if (i965_get_dest_format(dest_picture) == -1) {
210 intel_debug_fallback(scrn, "Usupported Color buffer format 0x%x\n",
211 (int)dest_picture->format);
219 i965_check_composite_texture(ScreenPtr screen, PicturePtr picture)
221 if (picture->repeatType > RepeatReflect) {
222 ScrnInfoPtr scrn = xf86Screens[screen->myNum];
223 intel_debug_fallback(scrn,
224 "extended repeat (%d) not supported\n",
225 picture->repeatType);
229 if (picture->filter != PictFilterNearest &&
230 picture->filter != PictFilterBilinear) {
231 ScrnInfoPtr scrn = xf86Screens[screen->myNum];
232 intel_debug_fallback(scrn, "Unsupported filter 0x%x\n",
237 if (picture->pDrawable) {
240 w = picture->pDrawable->width;
241 h = picture->pDrawable->height;
242 if ((w > 8192) || (h > 8192)) {
243 ScrnInfoPtr scrn = xf86Screens[screen->myNum];
244 intel_debug_fallback(scrn,
245 "Picture w/h too large (%dx%d)\n",
251 i < sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]);
253 if (i965_tex_formats[i].fmt == picture->format)
256 if (i == sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]))
258 ScrnInfoPtr scrn = xf86Screens[screen->myNum];
259 intel_debug_fallback(scrn,
260 "Unsupported picture format "
262 (int)picture->format);
273 #define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
275 /* Set up a default static partitioning of the URB, which is supposed to
276 * allow anything we would want to do, at potentially lower performance.
278 #define URB_CS_ENTRY_SIZE 0
279 #define URB_CS_ENTRIES 0
281 #define URB_VS_ENTRY_SIZE 1 // each 512-bit row
282 #define URB_VS_ENTRIES 8 // we needs at least 8 entries
284 #define URB_GS_ENTRY_SIZE 0
285 #define URB_GS_ENTRIES 0
287 #define URB_CLIP_ENTRY_SIZE 0
288 #define URB_CLIP_ENTRIES 0
290 #define URB_SF_ENTRY_SIZE 2
291 #define URB_SF_ENTRIES 1
294 * this program computes dA/dx and dA/dy for the texture coordinates along
295 * with the base texture coordinate. It was extracted from the Mesa driver
298 #define SF_KERNEL_NUM_GRF 16
299 #define SF_MAX_THREADS 2
301 static const uint32_t sf_kernel_static[][4] = {
302 #include "exa_sf.g4b"
305 static const uint32_t sf_kernel_mask_static[][4] = {
306 #include "exa_sf_mask.g4b"
310 #define PS_KERNEL_NUM_GRF 32
311 #define PS_MAX_THREADS 48
313 static const uint32_t ps_kernel_nomask_affine_static[][4] = {
314 #include "exa_wm_xy.g4b"
315 #include "exa_wm_src_affine.g4b"
316 #include "exa_wm_src_sample_argb.g4b"
317 #include "exa_wm_write.g4b"
320 static const uint32_t ps_kernel_nomask_projective_static[][4] = {
321 #include "exa_wm_xy.g4b"
322 #include "exa_wm_src_projective.g4b"
323 #include "exa_wm_src_sample_argb.g4b"
324 #include "exa_wm_write.g4b"
327 static const uint32_t ps_kernel_maskca_affine_static[][4] = {
328 #include "exa_wm_xy.g4b"
329 #include "exa_wm_src_affine.g4b"
330 #include "exa_wm_src_sample_argb.g4b"
331 #include "exa_wm_mask_affine.g4b"
332 #include "exa_wm_mask_sample_argb.g4b"
333 #include "exa_wm_ca.g4b"
334 #include "exa_wm_write.g4b"
337 static const uint32_t ps_kernel_maskca_projective_static[][4] = {
338 #include "exa_wm_xy.g4b"
339 #include "exa_wm_src_projective.g4b"
340 #include "exa_wm_src_sample_argb.g4b"
341 #include "exa_wm_mask_projective.g4b"
342 #include "exa_wm_mask_sample_argb.g4b"
343 #include "exa_wm_ca.g4b"
344 #include "exa_wm_write.g4b"
347 static const uint32_t ps_kernel_maskca_srcalpha_affine_static[][4] = {
348 #include "exa_wm_xy.g4b"
349 #include "exa_wm_src_affine.g4b"
350 #include "exa_wm_src_sample_a.g4b"
351 #include "exa_wm_mask_affine.g4b"
352 #include "exa_wm_mask_sample_argb.g4b"
353 #include "exa_wm_ca_srcalpha.g4b"
354 #include "exa_wm_write.g4b"
357 static const uint32_t ps_kernel_maskca_srcalpha_projective_static[][4] = {
358 #include "exa_wm_xy.g4b"
359 #include "exa_wm_src_projective.g4b"
360 #include "exa_wm_src_sample_a.g4b"
361 #include "exa_wm_mask_projective.g4b"
362 #include "exa_wm_mask_sample_argb.g4b"
363 #include "exa_wm_ca_srcalpha.g4b"
364 #include "exa_wm_write.g4b"
367 static const uint32_t ps_kernel_masknoca_affine_static[][4] = {
368 #include "exa_wm_xy.g4b"
369 #include "exa_wm_src_affine.g4b"
370 #include "exa_wm_src_sample_argb.g4b"
371 #include "exa_wm_mask_affine.g4b"
372 #include "exa_wm_mask_sample_a.g4b"
373 #include "exa_wm_noca.g4b"
374 #include "exa_wm_write.g4b"
377 static const uint32_t ps_kernel_masknoca_projective_static[][4] = {
378 #include "exa_wm_xy.g4b"
379 #include "exa_wm_src_projective.g4b"
380 #include "exa_wm_src_sample_argb.g4b"
381 #include "exa_wm_mask_projective.g4b"
382 #include "exa_wm_mask_sample_a.g4b"
383 #include "exa_wm_noca.g4b"
384 #include "exa_wm_write.g4b"
387 /* new programs for Ironlake */
388 static const uint32_t sf_kernel_static_gen5[][4] = {
389 #include "exa_sf.g4b.gen5"
392 static const uint32_t sf_kernel_mask_static_gen5[][4] = {
393 #include "exa_sf_mask.g4b.gen5"
396 static const uint32_t ps_kernel_nomask_affine_static_gen5[][4] = {
397 #include "exa_wm_xy.g4b.gen5"
398 #include "exa_wm_src_affine.g4b.gen5"
399 #include "exa_wm_src_sample_argb.g4b.gen5"
400 #include "exa_wm_write.g4b.gen5"
403 static const uint32_t ps_kernel_nomask_projective_static_gen5[][4] = {
404 #include "exa_wm_xy.g4b.gen5"
405 #include "exa_wm_src_projective.g4b.gen5"
406 #include "exa_wm_src_sample_argb.g4b.gen5"
407 #include "exa_wm_write.g4b.gen5"
410 static const uint32_t ps_kernel_maskca_affine_static_gen5[][4] = {
411 #include "exa_wm_xy.g4b.gen5"
412 #include "exa_wm_src_affine.g4b.gen5"
413 #include "exa_wm_src_sample_argb.g4b.gen5"
414 #include "exa_wm_mask_affine.g4b.gen5"
415 #include "exa_wm_mask_sample_argb.g4b.gen5"
416 #include "exa_wm_ca.g4b.gen5"
417 #include "exa_wm_write.g4b.gen5"
420 static const uint32_t ps_kernel_maskca_projective_static_gen5[][4] = {
421 #include "exa_wm_xy.g4b.gen5"
422 #include "exa_wm_src_projective.g4b.gen5"
423 #include "exa_wm_src_sample_argb.g4b.gen5"
424 #include "exa_wm_mask_projective.g4b.gen5"
425 #include "exa_wm_mask_sample_argb.g4b.gen5"
426 #include "exa_wm_ca.g4b.gen5"
427 #include "exa_wm_write.g4b.gen5"
430 static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen5[][4] = {
431 #include "exa_wm_xy.g4b.gen5"
432 #include "exa_wm_src_affine.g4b.gen5"
433 #include "exa_wm_src_sample_a.g4b.gen5"
434 #include "exa_wm_mask_affine.g4b.gen5"
435 #include "exa_wm_mask_sample_argb.g4b.gen5"
436 #include "exa_wm_ca_srcalpha.g4b.gen5"
437 #include "exa_wm_write.g4b.gen5"
440 static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen5[][4] = {
441 #include "exa_wm_xy.g4b.gen5"
442 #include "exa_wm_src_projective.g4b.gen5"
443 #include "exa_wm_src_sample_a.g4b.gen5"
444 #include "exa_wm_mask_projective.g4b.gen5"
445 #include "exa_wm_mask_sample_argb.g4b.gen5"
446 #include "exa_wm_ca_srcalpha.g4b.gen5"
447 #include "exa_wm_write.g4b.gen5"
450 static const uint32_t ps_kernel_masknoca_affine_static_gen5[][4] = {
451 #include "exa_wm_xy.g4b.gen5"
452 #include "exa_wm_src_affine.g4b.gen5"
453 #include "exa_wm_src_sample_argb.g4b.gen5"
454 #include "exa_wm_mask_affine.g4b.gen5"
455 #include "exa_wm_mask_sample_a.g4b.gen5"
456 #include "exa_wm_noca.g4b.gen5"
457 #include "exa_wm_write.g4b.gen5"
460 static const uint32_t ps_kernel_masknoca_projective_static_gen5[][4] = {
461 #include "exa_wm_xy.g4b.gen5"
462 #include "exa_wm_src_projective.g4b.gen5"
463 #include "exa_wm_src_sample_argb.g4b.gen5"
464 #include "exa_wm_mask_projective.g4b.gen5"
465 #include "exa_wm_mask_sample_a.g4b.gen5"
466 #include "exa_wm_noca.g4b.gen5"
467 #include "exa_wm_write.g4b.gen5"
470 /* programs for GEN6 */
471 static const uint32_t ps_kernel_nomask_affine_static_gen6[][4] = {
472 #include "exa_wm_src_affine.g6b"
473 #include "exa_wm_src_sample_argb.g6b"
474 #include "exa_wm_write.g6b"
477 static const uint32_t ps_kernel_nomask_projective_static_gen6[][4] = {
478 #include "exa_wm_src_projective.g6b"
479 #include "exa_wm_src_sample_argb.g6b"
480 #include "exa_wm_write.g6b"
483 static const uint32_t ps_kernel_maskca_affine_static_gen6[][4] = {
484 #include "exa_wm_src_affine.g6b"
485 #include "exa_wm_src_sample_argb.g6b"
486 #include "exa_wm_mask_affine.g6b"
487 #include "exa_wm_mask_sample_argb.g6b"
488 #include "exa_wm_ca.g6b"
489 #include "exa_wm_write.g6b"
492 static const uint32_t ps_kernel_maskca_projective_static_gen6[][4] = {
493 #include "exa_wm_src_projective.g6b"
494 #include "exa_wm_src_sample_argb.g6b"
495 #include "exa_wm_mask_projective.g6b"
496 #include "exa_wm_mask_sample_argb.g6b"
497 #include "exa_wm_ca.g4b.gen5"
498 #include "exa_wm_write.g6b"
501 static const uint32_t ps_kernel_maskca_srcalpha_affine_static_gen6[][4] = {
502 #include "exa_wm_src_affine.g6b"
503 #include "exa_wm_src_sample_a.g6b"
504 #include "exa_wm_mask_affine.g6b"
505 #include "exa_wm_mask_sample_argb.g6b"
506 #include "exa_wm_ca_srcalpha.g6b"
507 #include "exa_wm_write.g6b"
510 static const uint32_t ps_kernel_maskca_srcalpha_projective_static_gen6[][4] = {
511 #include "exa_wm_src_projective.g6b"
512 #include "exa_wm_src_sample_a.g6b"
513 #include "exa_wm_mask_projective.g6b"
514 #include "exa_wm_mask_sample_argb.g6b"
515 #include "exa_wm_ca_srcalpha.g6b"
516 #include "exa_wm_write.g6b"
519 static const uint32_t ps_kernel_masknoca_affine_static_gen6[][4] = {
520 #include "exa_wm_src_affine.g6b"
521 #include "exa_wm_src_sample_argb.g6b"
522 #include "exa_wm_mask_affine.g6b"
523 #include "exa_wm_mask_sample_a.g6b"
524 #include "exa_wm_noca.g6b"
525 #include "exa_wm_write.g6b"
528 static const uint32_t ps_kernel_masknoca_projective_static_gen6[][4] = {
529 #include "exa_wm_src_projective.g6b"
530 #include "exa_wm_src_sample_argb.g6b"
531 #include "exa_wm_mask_projective.g6b"
532 #include "exa_wm_mask_sample_a.g6b"
533 #include "exa_wm_noca.g6b"
534 #include "exa_wm_write.g6b"
538 SAMPLER_STATE_FILTER_NEAREST,
539 SAMPLER_STATE_FILTER_BILINEAR,
541 } sampler_state_filter_t;
544 SAMPLER_STATE_EXTEND_NONE,
545 SAMPLER_STATE_EXTEND_REPEAT,
546 SAMPLER_STATE_EXTEND_PAD,
547 SAMPLER_STATE_EXTEND_REFLECT,
549 } sampler_state_extend_t;
552 WM_KERNEL_NOMASK_AFFINE,
553 WM_KERNEL_NOMASK_PROJECTIVE,
554 WM_KERNEL_MASKCA_AFFINE,
555 WM_KERNEL_MASKCA_PROJECTIVE,
556 WM_KERNEL_MASKCA_SRCALPHA_AFFINE,
557 WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
558 WM_KERNEL_MASKNOCA_AFFINE,
559 WM_KERNEL_MASKNOCA_PROJECTIVE,
563 #define KERNEL(kernel_enum, kernel, masked) \
564 [kernel_enum] = {&kernel, sizeof(kernel), masked}
565 struct wm_kernel_info {
571 static const struct wm_kernel_info wm_kernels_gen4[] = {
572 KERNEL(WM_KERNEL_NOMASK_AFFINE,
573 ps_kernel_nomask_affine_static, FALSE),
574 KERNEL(WM_KERNEL_NOMASK_PROJECTIVE,
575 ps_kernel_nomask_projective_static, FALSE),
576 KERNEL(WM_KERNEL_MASKCA_AFFINE,
577 ps_kernel_maskca_affine_static, TRUE),
578 KERNEL(WM_KERNEL_MASKCA_PROJECTIVE,
579 ps_kernel_maskca_projective_static, TRUE),
580 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE,
581 ps_kernel_maskca_srcalpha_affine_static, TRUE),
582 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
583 ps_kernel_maskca_srcalpha_projective_static, TRUE),
584 KERNEL(WM_KERNEL_MASKNOCA_AFFINE,
585 ps_kernel_masknoca_affine_static, TRUE),
586 KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE,
587 ps_kernel_masknoca_projective_static, TRUE),
590 static const struct wm_kernel_info wm_kernels_gen5[] = {
591 KERNEL(WM_KERNEL_NOMASK_AFFINE,
592 ps_kernel_nomask_affine_static_gen5, FALSE),
593 KERNEL(WM_KERNEL_NOMASK_PROJECTIVE,
594 ps_kernel_nomask_projective_static_gen5, FALSE),
595 KERNEL(WM_KERNEL_MASKCA_AFFINE,
596 ps_kernel_maskca_affine_static_gen5, TRUE),
597 KERNEL(WM_KERNEL_MASKCA_PROJECTIVE,
598 ps_kernel_maskca_projective_static_gen5, TRUE),
599 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE,
600 ps_kernel_maskca_srcalpha_affine_static_gen5, TRUE),
601 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
602 ps_kernel_maskca_srcalpha_projective_static_gen5, TRUE),
603 KERNEL(WM_KERNEL_MASKNOCA_AFFINE,
604 ps_kernel_masknoca_affine_static_gen5, TRUE),
605 KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE,
606 ps_kernel_masknoca_projective_static_gen5, TRUE),
609 static const struct wm_kernel_info wm_kernels_gen6[] = {
610 KERNEL(WM_KERNEL_NOMASK_AFFINE,
611 ps_kernel_nomask_affine_static_gen6, FALSE),
612 KERNEL(WM_KERNEL_NOMASK_PROJECTIVE,
613 ps_kernel_nomask_projective_static_gen6, FALSE),
614 KERNEL(WM_KERNEL_MASKCA_AFFINE,
615 ps_kernel_maskca_affine_static_gen6, TRUE),
616 KERNEL(WM_KERNEL_MASKCA_PROJECTIVE,
617 ps_kernel_maskca_projective_static_gen6, TRUE),
618 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_AFFINE,
619 ps_kernel_maskca_srcalpha_affine_static_gen6, TRUE),
620 KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE,
621 ps_kernel_maskca_srcalpha_projective_static_gen6, TRUE),
622 KERNEL(WM_KERNEL_MASKNOCA_AFFINE,
623 ps_kernel_masknoca_affine_static_gen6, TRUE),
624 KERNEL(WM_KERNEL_MASKNOCA_PROJECTIVE,
625 ps_kernel_masknoca_projective_static_gen6, TRUE),
630 typedef struct _brw_cc_unit_state_padded {
631 struct brw_cc_unit_state state;
632 char pad[64 - sizeof(struct brw_cc_unit_state)];
633 } brw_cc_unit_state_padded;
635 typedef struct brw_surface_state_padded {
636 struct brw_surface_state state;
637 char pad[32 - sizeof(struct brw_surface_state)];
638 } brw_surface_state_padded;
640 struct gen4_cc_unit_state {
641 /* Index by [src_blend][dst_blend] */
642 brw_cc_unit_state_padded cc_state[BRW_BLENDFACTOR_COUNT][BRW_BLENDFACTOR_COUNT];
645 typedef struct gen4_composite_op {
647 sampler_state_filter_t src_filter;
648 sampler_state_filter_t mask_filter;
649 sampler_state_extend_t src_extend;
650 sampler_state_extend_t mask_extend;
652 wm_kernel_t wm_kernel;
656 /** Private data for gen4 render accel implementation. */
657 struct gen4_render_state {
658 drm_intel_bo *vs_state_bo;
659 drm_intel_bo *sf_state_bo;
660 drm_intel_bo *sf_mask_state_bo;
661 drm_intel_bo *cc_state_bo;
662 drm_intel_bo *wm_state_bo[KERNEL_COUNT]
663 [FILTER_COUNT] [EXTEND_COUNT]
664 [FILTER_COUNT] [EXTEND_COUNT];
665 drm_intel_bo *wm_kernel_bo[KERNEL_COUNT];
667 drm_intel_bo *cc_vp_bo;
668 drm_intel_bo *gen6_blend_bo;
669 drm_intel_bo *gen6_depth_stencil_bo;
670 drm_intel_bo *ps_sampler_state_bo[FILTER_COUNT]
674 gen4_composite_op composite_op;
677 static void gen6_emit_composite_state(struct intel_screen_private *intel);
678 static void gen6_render_state_init(ScrnInfoPtr scrn);
681 * Sets up the SF state pointing at an SF kernel.
683 * The SF kernel does coord interp: for each attribute,
684 * calculate dA/dx and dA/dy. Hand these interpolation coefficients
685 * back to SF which then hands pixels off to WM.
687 static drm_intel_bo *gen4_create_sf_state(intel_screen_private *intel,
688 drm_intel_bo * kernel_bo)
690 struct brw_sf_unit_state *sf_state;
691 drm_intel_bo *sf_state_bo;
693 sf_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 SF state",
694 sizeof(*sf_state), 4096);
695 drm_intel_bo_map(sf_state_bo, TRUE);
696 sf_state = sf_state_bo->virtual;
698 memset(sf_state, 0, sizeof(*sf_state));
699 sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
700 sf_state->thread0.kernel_start_pointer =
701 intel_emit_reloc(sf_state_bo,
702 offsetof(struct brw_sf_unit_state, thread0),
703 kernel_bo, sf_state->thread0.grf_reg_count << 1,
704 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6;
705 sf_state->sf1.single_program_flow = 1;
706 sf_state->sf1.binding_table_entry_count = 0;
707 sf_state->sf1.thread_priority = 0;
708 sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
709 sf_state->sf1.illegal_op_exception_enable = 1;
710 sf_state->sf1.mask_stack_exception_enable = 1;
711 sf_state->sf1.sw_exception_enable = 1;
712 sf_state->thread2.per_thread_scratch_space = 0;
713 /* scratch space is not used in our kernel */
714 sf_state->thread2.scratch_space_base_pointer = 0;
715 sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
716 sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
717 sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
718 /* don't smash vertex header, read start from dw8 */
719 sf_state->thread3.urb_entry_read_offset = 1;
720 sf_state->thread3.dispatch_grf_start_reg = 3;
721 sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
722 sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
723 sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
724 sf_state->sf5.viewport_transform = FALSE; /* skip viewport */
725 sf_state->sf6.cull_mode = BRW_CULLMODE_NONE;
726 sf_state->sf6.scissor = 0;
727 sf_state->sf7.trifan_pv = 2;
728 sf_state->sf6.dest_org_vbias = 0x8;
729 sf_state->sf6.dest_org_hbias = 0x8;
731 drm_intel_bo_unmap(sf_state_bo);
736 static drm_intel_bo *sampler_border_color_create(intel_screen_private *intel)
738 struct brw_sampler_legacy_border_color sampler_border_color;
740 /* Set up the sampler border color (always transparent black) */
741 memset(&sampler_border_color, 0, sizeof(sampler_border_color));
742 sampler_border_color.color[0] = 0; /* R */
743 sampler_border_color.color[1] = 0; /* G */
744 sampler_border_color.color[2] = 0; /* B */
745 sampler_border_color.color[3] = 0; /* A */
747 return intel_bo_alloc_for_data(intel,
748 &sampler_border_color,
749 sizeof(sampler_border_color),
750 "gen4 render sampler border color");
754 sampler_state_init(drm_intel_bo * sampler_state_bo,
755 struct brw_sampler_state *sampler_state,
756 sampler_state_filter_t filter,
757 sampler_state_extend_t extend,
758 drm_intel_bo * border_color_bo)
760 uint32_t sampler_state_offset;
762 sampler_state_offset = (char *)sampler_state -
763 (char *)sampler_state_bo->virtual;
765 /* PS kernel use this sampler */
766 memset(sampler_state, 0, sizeof(*sampler_state));
768 sampler_state->ss0.lod_preclamp = 1; /* GL mode */
770 /* We use the legacy mode to get the semantics specified by
771 * the Render extension. */
772 sampler_state->ss0.border_color_mode = BRW_BORDER_COLOR_MODE_LEGACY;
776 case SAMPLER_STATE_FILTER_NEAREST:
777 sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST;
778 sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
780 case SAMPLER_STATE_FILTER_BILINEAR:
781 sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR;
782 sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
788 case SAMPLER_STATE_EXTEND_NONE:
789 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
790 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
791 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
793 case SAMPLER_STATE_EXTEND_REPEAT:
794 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
795 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
796 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
798 case SAMPLER_STATE_EXTEND_PAD:
799 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
800 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
801 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
803 case SAMPLER_STATE_EXTEND_REFLECT:
804 sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_MIRROR;
805 sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_MIRROR;
806 sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_MIRROR;
810 sampler_state->ss2.border_color_pointer =
811 intel_emit_reloc(sampler_state_bo, sampler_state_offset +
812 offsetof(struct brw_sampler_state, ss2),
814 I915_GEM_DOMAIN_SAMPLER, 0) >> 5;
816 sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */
819 static drm_intel_bo *gen4_create_sampler_state(intel_screen_private *intel,
820 sampler_state_filter_t src_filter,
821 sampler_state_extend_t src_extend,
822 sampler_state_filter_t mask_filter,
823 sampler_state_extend_t mask_extend,
824 drm_intel_bo * border_color_bo)
826 drm_intel_bo *sampler_state_bo;
827 struct brw_sampler_state *sampler_state;
830 drm_intel_bo_alloc(intel->bufmgr, "gen4 sampler state",
831 sizeof(struct brw_sampler_state) * 2, 4096);
832 drm_intel_bo_map(sampler_state_bo, TRUE);
833 sampler_state = sampler_state_bo->virtual;
835 sampler_state_init(sampler_state_bo,
837 src_filter, src_extend, border_color_bo);
838 sampler_state_init(sampler_state_bo,
840 mask_filter, mask_extend, border_color_bo);
842 drm_intel_bo_unmap(sampler_state_bo);
844 return sampler_state_bo;
848 cc_state_init(drm_intel_bo * cc_state_bo,
849 uint32_t cc_state_offset,
850 int src_blend, int dst_blend, drm_intel_bo * cc_vp_bo)
852 struct brw_cc_unit_state *cc_state;
854 cc_state = (struct brw_cc_unit_state *)((char *)cc_state_bo->virtual +
857 memset(cc_state, 0, sizeof(*cc_state));
858 cc_state->cc0.stencil_enable = 0; /* disable stencil */
859 cc_state->cc2.depth_test = 0; /* disable depth test */
860 cc_state->cc2.logicop_enable = 0; /* disable logic op */
861 cc_state->cc3.ia_blend_enable = 0; /* blend alpha same as colors */
862 cc_state->cc3.blend_enable = 1; /* enable color blend */
863 cc_state->cc3.alpha_test = 0; /* disable alpha test */
865 cc_state->cc4.cc_viewport_state_offset =
866 intel_emit_reloc(cc_state_bo, cc_state_offset +
867 offsetof(struct brw_cc_unit_state, cc4),
868 cc_vp_bo, 0, I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5;
870 cc_state->cc5.dither_enable = 0; /* disable dither */
871 cc_state->cc5.logicop_func = 0xc; /* COPY */
872 cc_state->cc5.statistics_enable = 1;
873 cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD;
875 /* Fill in alpha blend factors same as color, for the future. */
876 cc_state->cc5.ia_src_blend_factor = src_blend;
877 cc_state->cc5.ia_dest_blend_factor = dst_blend;
879 cc_state->cc6.blend_function = BRW_BLENDFUNCTION_ADD;
880 cc_state->cc6.clamp_post_alpha_blend = 1;
881 cc_state->cc6.clamp_pre_alpha_blend = 1;
882 cc_state->cc6.clamp_range = 0; /* clamp range [0,1] */
884 cc_state->cc6.src_blend_factor = src_blend;
885 cc_state->cc6.dest_blend_factor = dst_blend;
888 static drm_intel_bo *gen4_create_wm_state(intel_screen_private *intel,
890 drm_intel_bo * kernel_bo,
891 drm_intel_bo * sampler_bo)
893 struct brw_wm_unit_state *state;
894 drm_intel_bo *wm_state_bo;
896 wm_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 WM state",
897 sizeof(*state), 4096);
898 drm_intel_bo_map(wm_state_bo, TRUE);
899 state = wm_state_bo->virtual;
901 memset(state, 0, sizeof(*state));
902 state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
903 state->thread0.kernel_start_pointer =
904 intel_emit_reloc(wm_state_bo,
905 offsetof(struct brw_wm_unit_state, thread0),
906 kernel_bo, state->thread0.grf_reg_count << 1,
907 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6;
909 state->thread1.single_program_flow = 0;
911 /* scratch space is not used in our kernel */
912 state->thread2.scratch_space_base_pointer = 0;
913 state->thread2.per_thread_scratch_space = 0;
915 state->thread3.const_urb_entry_read_length = 0;
916 state->thread3.const_urb_entry_read_offset = 0;
918 state->thread3.urb_entry_read_offset = 0;
919 /* wm kernel use urb from 3, see wm_program in compiler module */
920 state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */
923 state->wm4.sampler_count = 0; /* hardware requirement */
925 state->wm4.sampler_count = 1; /* 1-4 samplers used */
927 state->wm4.sampler_state_pointer =
928 intel_emit_reloc(wm_state_bo,
929 offsetof(struct brw_wm_unit_state, wm4),
931 state->wm4.sampler_count << 2,
932 I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5;
933 state->wm5.max_threads = PS_MAX_THREADS - 1;
934 state->wm5.transposed_urb_read = 0;
935 state->wm5.thread_dispatch_enable = 1;
936 /* just use 16-pixel dispatch (4 subspans), don't need to change kernel
939 state->wm5.enable_16_pix = 1;
940 state->wm5.enable_8_pix = 0;
941 state->wm5.early_depth_test = 1;
943 /* Each pair of attributes (src/mask coords) is two URB entries */
945 state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */
946 state->thread3.urb_entry_read_length = 4;
948 state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */
949 state->thread3.urb_entry_read_length = 2;
952 /* binding table entry count is only used for prefetching, and it has to
953 * be set 0 for Ironlake
956 state->thread1.binding_table_entry_count = 0;
958 drm_intel_bo_unmap(wm_state_bo);
963 static drm_intel_bo *gen4_create_cc_viewport(intel_screen_private *intel)
966 struct brw_cc_viewport vp;
968 vp.min_depth = -1.e35;
969 vp.max_depth = 1.e35;
971 bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 render unit state",
973 drm_intel_bo_subdata(bo, 0, sizeof(vp), &vp);
978 static drm_intel_bo *gen4_create_vs_unit_state(intel_screen_private *intel)
980 struct brw_vs_unit_state vs_state;
981 memset(&vs_state, 0, sizeof(vs_state));
983 /* Set up the vertex shader to be disabled (passthrough) */
985 vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES >> 2; /* hardware requirement */
987 vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES;
988 vs_state.thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
989 vs_state.vs6.vs_enable = 0;
990 vs_state.vs6.vert_cache_disable = 1;
992 return intel_bo_alloc_for_data(intel, &vs_state, sizeof(vs_state),
993 "gen4 render VS state");
997 * Set up all combinations of cc state: each blendfactor for source and
1000 static drm_intel_bo *gen4_create_cc_unit_state(intel_screen_private *intel)
1002 drm_intel_bo *cc_state_bo, *cc_vp_bo;
1005 cc_vp_bo = gen4_create_cc_viewport(intel);
1007 cc_state_bo = drm_intel_bo_alloc(intel->bufmgr, "gen4 CC state",
1008 sizeof(struct gen4_cc_unit_state),
1010 drm_intel_bo_map(cc_state_bo, TRUE);
1011 for (i = 0; i < BRW_BLENDFACTOR_COUNT; i++) {
1012 for (j = 0; j < BRW_BLENDFACTOR_COUNT; j++) {
1013 cc_state_init(cc_state_bo,
1014 offsetof(struct gen4_cc_unit_state,
1015 cc_state[i][j].state),
1019 drm_intel_bo_unmap(cc_state_bo);
1021 drm_intel_bo_unreference(cc_vp_bo);
1026 static uint32_t i965_get_card_format(PicturePtr picture)
1030 for (i = 0; i < sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]);
1032 if (i965_tex_formats[i].fmt == picture->format)
1035 assert(i != sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0]));
1037 return i965_tex_formats[i].card_fmt;
1040 static sampler_state_filter_t sampler_state_filter_from_picture(int filter)
1043 case PictFilterNearest:
1044 return SAMPLER_STATE_FILTER_NEAREST;
1045 case PictFilterBilinear:
1046 return SAMPLER_STATE_FILTER_BILINEAR;
1052 static sampler_state_extend_t sampler_state_extend_from_picture(int repeat_type)
1054 switch (repeat_type) {
1056 return SAMPLER_STATE_EXTEND_NONE;
1058 return SAMPLER_STATE_EXTEND_REPEAT;
1060 return SAMPLER_STATE_EXTEND_PAD;
1062 return SAMPLER_STATE_EXTEND_REFLECT;
1069 * Sets up the common fields for a surface state buffer for the given
1070 * picture in the given surface state buffer.
1073 i965_set_picture_surface_state(intel_screen_private *intel,
1074 PicturePtr picture, PixmapPtr pixmap,
1077 struct intel_pixmap *priv = intel_get_pixmap_private(pixmap);
1078 struct brw_surface_state *ss;
1079 uint32_t write_domain, read_domains;
1083 write_domain = I915_GEM_DOMAIN_RENDER;
1084 read_domains = I915_GEM_DOMAIN_RENDER;
1087 read_domains = I915_GEM_DOMAIN_SAMPLER;
1089 intel_batch_mark_pixmap_domains(intel, priv,
1090 read_domains, write_domain);
1092 if (priv->dst_bound)
1093 return priv->dst_bound;
1095 if (priv->src_bound)
1096 return priv->src_bound;
1099 ss = (struct brw_surface_state *)
1100 (intel->surface_data + intel->surface_used);
1102 memset(ss, 0, sizeof(*ss));
1103 ss->ss0.surface_type = BRW_SURFACE_2D;
1105 ss->ss0.surface_format = i965_get_dest_format(picture);
1107 ss->ss0.surface_format = i965_get_card_format(picture);
1109 ss->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32;
1110 ss->ss0.color_blend = 1;
1111 ss->ss1.base_addr = priv->bo->offset;
1113 ss->ss2.height = pixmap->drawable.height - 1;
1114 ss->ss2.width = pixmap->drawable.width - 1;
1115 ss->ss3.pitch = intel_pixmap_pitch(pixmap) - 1;
1116 ss->ss3.tile_walk = 0; /* Tiled X */
1117 ss->ss3.tiled_surface = intel_pixmap_tiled(pixmap) ? 1 : 0;
1119 dri_bo_emit_reloc(intel->surface_bo,
1120 read_domains, write_domain,
1122 intel->surface_used +
1123 offsetof(struct brw_surface_state, ss1),
1126 offset = intel->surface_used;
1127 intel->surface_used += sizeof(struct brw_surface_state_padded);
1130 priv->dst_bound = offset;
1132 priv->src_bound = offset;
1137 static void gen4_composite_vertex_elements(struct intel_screen_private *intel)
1139 struct gen4_render_state *render_state = intel->gen4_render_state;
1140 gen4_composite_op *composite_op = &render_state->composite_op;
1141 Bool has_mask = intel->render_mask != NULL;
1142 Bool is_affine = composite_op->is_affine;
1144 * number of extra parameters per vertex
1146 int nelem = has_mask ? 2 : 1;
1148 * size of extra parameters:
1149 * 3 for homogenous (xyzw)
1150 * 2 for cartesian (xy)
1152 int selem = is_affine ? 2 : 3;
1153 uint32_t w_component;
1154 uint32_t src_format;
1157 id = has_mask << 1 | is_affine;
1159 if (composite_op->vertex_id == id)
1162 composite_op->vertex_id = id;
1165 src_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
1166 w_component = BRW_VFCOMPONENT_STORE_1_FLT;
1168 src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
1169 w_component = BRW_VFCOMPONENT_STORE_SRC;
1172 if (IS_GEN5(intel)) {
1174 * The reason to add this extra vertex element in the header is that
1175 * Ironlake has different vertex header definition and origin method to
1176 * set destination element offset doesn't exist anymore, which means
1177 * hardware requires a predefined vertex element layout.
1179 * haihao proposed this approach to fill the first vertex element, so
1180 * origin layout for Gen4 doesn't need to change, and origin shader
1181 * programs behavior is also kept.
1183 * I think this is not bad. - zhenyu
1186 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS |
1187 ((2 * (2 + nelem)) - 1));
1188 OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
1189 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1190 (0 << VE0_OFFSET_SHIFT));
1192 OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
1193 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
1194 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
1195 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
1197 /* Set up our vertex elements, sourced from the single vertex buffer.
1198 * that will be set up later.
1200 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS |
1201 ((2 * (1 + nelem)) - 1));
1205 OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
1206 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1207 (0 << VE0_OFFSET_SHIFT));
1210 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1211 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1212 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1213 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1215 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1216 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1217 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1218 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1219 (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1221 OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
1222 (src_format << VE0_FORMAT_SHIFT) |
1223 ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */
1226 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1227 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1228 (w_component << VE1_VFCOMPONENT_2_SHIFT) |
1229 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1231 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1232 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1233 (w_component << VE1_VFCOMPONENT_2_SHIFT) |
1234 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1235 ((4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */
1238 OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
1239 (src_format << VE0_FORMAT_SHIFT) |
1240 (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */
1243 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1244 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1245 (w_component << VE1_VFCOMPONENT_2_SHIFT) |
1246 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1248 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1249 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1250 (w_component << VE1_VFCOMPONENT_2_SHIFT) |
1251 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1252 ((4 + 4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */
1256 static void i965_emit_composite_state(struct intel_screen_private *intel)
1258 struct gen4_render_state *render_state = intel->gen4_render_state;
1259 gen4_composite_op *composite_op = &render_state->composite_op;
1260 int op = composite_op->op;
1261 PicturePtr mask_picture = intel->render_mask_picture;
1262 PicturePtr dest_picture = intel->render_dest_picture;
1263 PixmapPtr mask = intel->render_mask;
1264 PixmapPtr dest = intel->render_dest;
1265 sampler_state_filter_t src_filter = composite_op->src_filter;
1266 sampler_state_filter_t mask_filter = composite_op->mask_filter;
1267 sampler_state_extend_t src_extend = composite_op->src_extend;
1268 sampler_state_extend_t mask_extend = composite_op->mask_extend;
1269 uint32_t src_blend, dst_blend;
1271 intel->needs_render_state_emit = FALSE;
1273 /* Begin the long sequence of commands needed to set up the 3D
1277 if (intel->needs_3d_invariant) {
1278 if (IS_GEN5(intel)) {
1279 /* Ironlake errata workaround: Before disabling the clipper,
1280 * you have to MI_FLUSH to get the pipeline idle.
1282 OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
1285 /* Match Mesa driver setup */
1286 if (INTEL_INFO(intel)->gen >= 45)
1287 OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1289 OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1291 /* Set system instruction pointer */
1292 OUT_BATCH(BRW_STATE_SIP | 0);
1295 intel->needs_3d_invariant = FALSE;
1298 if (intel->surface_reloc == 0) {
1299 /* Zero out the two base address registers so all offsets are
1302 if (IS_GEN5(intel)) {
1303 OUT_BATCH(BRW_STATE_BASE_ADDRESS | 6);
1304 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */
1305 intel->surface_reloc = intel->batch_used;
1306 intel_batch_emit_dword(intel,
1307 intel->surface_bo->offset | BASE_ADDRESS_MODIFY);
1308 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */
1309 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Instruction base address */
1310 /* general state max addr, disabled */
1311 OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
1312 /* media object state max addr, disabled */
1313 OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
1314 /* Instruction max addr, disabled */
1315 OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
1317 OUT_BATCH(BRW_STATE_BASE_ADDRESS | 4);
1318 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */
1319 intel->surface_reloc = intel->batch_used;
1320 intel_batch_emit_dword(intel,
1321 intel->surface_bo->offset | BASE_ADDRESS_MODIFY);
1322 OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */
1323 /* general state max addr, disabled */
1324 OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
1325 /* media object state max addr, disabled */
1326 OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
1330 i965_get_blend_cntl(op, mask_picture, dest_picture->format,
1331 &src_blend, &dst_blend);
1333 /* Binding table pointers */
1334 OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4);
1335 OUT_BATCH(0); /* vs */
1336 OUT_BATCH(0); /* gs */
1337 OUT_BATCH(0); /* clip */
1338 OUT_BATCH(0); /* sf */
1339 /* Only the PS uses the binding table */
1340 OUT_BATCH(intel->surface_table);
1342 /* The drawing rectangle clipping is always on. Set it to values that
1343 * shouldn't do any clipping.
1345 OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2);
1346 OUT_BATCH(0x00000000); /* ymin, xmin */
1347 OUT_BATCH(DRAW_YMAX(dest->drawable.height - 1) |
1348 DRAW_XMAX(dest->drawable.width - 1)); /* ymax, xmax */
1349 OUT_BATCH(0x00000000); /* yorigin, xorigin */
1351 /* skip the depth buffer */
1352 /* skip the polygon stipple */
1353 /* skip the polygon stipple offset */
1354 /* skip the line stipple */
1356 /* Set the pointers to the 3d pipeline state */
1357 OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5);
1358 OUT_RELOC(render_state->vs_state_bo,
1359 I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1360 OUT_BATCH(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */
1361 OUT_BATCH(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */
1363 OUT_RELOC(render_state->sf_mask_state_bo,
1364 I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1366 OUT_RELOC(render_state->sf_state_bo,
1367 I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1370 OUT_RELOC(render_state->wm_state_bo[composite_op->wm_kernel]
1371 [src_filter][src_extend]
1372 [mask_filter][mask_extend],
1373 I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1375 OUT_RELOC(render_state->cc_state_bo,
1376 I915_GEM_DOMAIN_INSTRUCTION, 0,
1377 offsetof(struct gen4_cc_unit_state,
1378 cc_state[src_blend][dst_blend]));
1381 int urb_vs_start, urb_vs_size;
1382 int urb_gs_start, urb_gs_size;
1383 int urb_clip_start, urb_clip_size;
1384 int urb_sf_start, urb_sf_size;
1385 int urb_cs_start, urb_cs_size;
1388 urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1389 urb_gs_start = urb_vs_start + urb_vs_size;
1390 urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1391 urb_clip_start = urb_gs_start + urb_gs_size;
1392 urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1393 urb_sf_start = urb_clip_start + urb_clip_size;
1394 urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1395 urb_cs_start = urb_sf_start + urb_sf_size;
1396 urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1398 /* Erratum (Vol 1a, p32):
1399 * URB_FENCE must not cross a cache-line (64 bytes).
1401 if ((intel->batch_used & 15) > (16 - 3)) {
1402 int cnt = 16 - (intel->batch_used & 15);
1407 OUT_BATCH(BRW_URB_FENCE |
1414 OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1415 ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1416 ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1417 OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1418 ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1420 /* Constant buffer state */
1421 OUT_BATCH(BRW_CS_URB_STATE | 0);
1422 OUT_BATCH(((URB_CS_ENTRY_SIZE - 1) << 4) |
1423 (URB_CS_ENTRIES << 0));
1426 gen4_composite_vertex_elements(intel);
1430 * Returns whether the current set of composite state plus vertex buffer is
1431 * expected to fit in the aperture.
1433 static Bool i965_composite_check_aperture(intel_screen_private *intel)
1435 struct gen4_render_state *render_state = intel->gen4_render_state;
1436 gen4_composite_op *composite_op = &render_state->composite_op;
1437 drm_intel_bo *bo_table[] = {
1441 render_state->vs_state_bo,
1442 render_state->sf_state_bo,
1443 render_state->sf_mask_state_bo,
1444 render_state->wm_state_bo[composite_op->wm_kernel]
1445 [composite_op->src_filter]
1446 [composite_op->src_extend]
1447 [composite_op->mask_filter]
1448 [composite_op->mask_extend],
1449 render_state->cc_state_bo,
1451 drm_intel_bo *gen6_bo_table[] = {
1455 render_state->wm_kernel_bo[composite_op->wm_kernel],
1456 render_state->ps_sampler_state_bo[composite_op->src_filter]
1457 [composite_op->src_extend]
1458 [composite_op->mask_filter]
1459 [composite_op->mask_extend],
1460 render_state->cc_vp_bo,
1461 render_state->cc_state_bo,
1462 render_state->gen6_blend_bo,
1463 render_state->gen6_depth_stencil_bo,
1466 if (INTEL_INFO(intel)->gen >= 60)
1467 return drm_intel_bufmgr_check_aperture_space(gen6_bo_table,
1468 ARRAY_SIZE(gen6_bo_table)) == 0;
1470 return drm_intel_bufmgr_check_aperture_space(bo_table,
1471 ARRAY_SIZE(bo_table)) == 0;
1474 static void i965_surface_flush(struct intel_screen_private *intel)
1476 struct intel_pixmap *priv;
1478 drm_intel_bo_subdata(intel->surface_bo,
1479 0, intel->surface_used,
1480 intel->surface_data);
1481 intel->surface_used = 0;
1483 assert (intel->surface_reloc != 0);
1484 drm_intel_bo_emit_reloc(intel->batch_bo,
1485 intel->surface_reloc * 4,
1486 intel->surface_bo, BASE_ADDRESS_MODIFY,
1487 I915_GEM_DOMAIN_INSTRUCTION, 0);
1488 intel->surface_reloc = 0;
1490 drm_intel_bo_unreference(intel->surface_bo);
1492 drm_intel_bo_alloc(intel->bufmgr, "surface data",
1493 sizeof(intel->surface_data), 4096);
1495 list_foreach_entry(priv, struct intel_pixmap, &intel->batch_pixmaps, batch)
1496 priv->dst_bound = priv->src_bound = 0;
1500 i965_emit_composite_primitive_identity_source(intel_screen_private *intel,
1502 int maskX, int maskY,
1506 OUT_VERTEX(dstX + w);
1507 OUT_VERTEX(dstY + h);
1508 OUT_VERTEX((srcX + w) * intel->scale_units[0][0]);
1509 OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
1512 OUT_VERTEX(dstY + h);
1513 OUT_VERTEX(srcX * intel->scale_units[0][0]);
1514 OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
1518 OUT_VERTEX(srcX * intel->scale_units[0][0]);
1519 OUT_VERTEX(srcY * intel->scale_units[0][1]);
1523 i965_emit_composite_primitive_affine_source(intel_screen_private *intel,
1525 int maskX, int maskY,
1529 float src_x[3], src_y[3];
1531 if (!intel_get_transformed_coordinates(srcX, srcY,
1532 intel->transform[0],
1537 if (!intel_get_transformed_coordinates(srcX, srcY + h,
1538 intel->transform[0],
1543 if (!intel_get_transformed_coordinates(srcX + w, srcY + h,
1544 intel->transform[0],
1549 OUT_VERTEX(dstX + w);
1550 OUT_VERTEX(dstY + h);
1551 OUT_VERTEX(src_x[2] * intel->scale_units[0][0]);
1552 OUT_VERTEX(src_y[2] * intel->scale_units[0][1]);
1555 OUT_VERTEX(dstY + h);
1556 OUT_VERTEX(src_x[1] * intel->scale_units[0][0]);
1557 OUT_VERTEX(src_y[1] * intel->scale_units[0][1]);
1561 OUT_VERTEX(src_x[0] * intel->scale_units[0][0]);
1562 OUT_VERTEX(src_y[0] * intel->scale_units[0][1]);
1566 i965_emit_composite_primitive_identity_source_mask(intel_screen_private *intel,
1568 int maskX, int maskY,
1572 OUT_VERTEX(dstX + w);
1573 OUT_VERTEX(dstY + h);
1574 OUT_VERTEX((srcX + w) * intel->scale_units[0][0]);
1575 OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
1576 OUT_VERTEX((maskX + w) * intel->scale_units[1][0]);
1577 OUT_VERTEX((maskY + h) * intel->scale_units[1][1]);
1580 OUT_VERTEX(dstY + h);
1581 OUT_VERTEX(srcX * intel->scale_units[0][0]);
1582 OUT_VERTEX((srcY + h) * intel->scale_units[0][1]);
1583 OUT_VERTEX(maskX * intel->scale_units[1][0]);
1584 OUT_VERTEX((maskY + h) * intel->scale_units[1][1]);
1588 OUT_VERTEX(srcX * intel->scale_units[0][0]);
1589 OUT_VERTEX(srcY * intel->scale_units[0][1]);
1590 OUT_VERTEX(maskX * intel->scale_units[1][0]);
1591 OUT_VERTEX(maskY * intel->scale_units[1][1]);
1595 i965_emit_composite_primitive(intel_screen_private *intel,
1597 int maskX, int maskY,
1601 float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3];
1602 Bool is_affine = intel->gen4_render_state->composite_op.is_affine;
1604 if (! intel->render_source_is_solid) {
1606 if (!intel_get_transformed_coordinates(srcX, srcY,
1607 intel->transform[0],
1612 if (!intel_get_transformed_coordinates(srcX, srcY + h,
1613 intel->transform[0],
1618 if (!intel_get_transformed_coordinates(srcX + w, srcY + h,
1619 intel->transform[0],
1624 if (!intel_get_transformed_coordinates_3d(srcX, srcY,
1625 intel->transform[0],
1631 if (!intel_get_transformed_coordinates_3d(srcX, srcY + h,
1632 intel->transform[0],
1638 if (!intel_get_transformed_coordinates_3d(srcX + w, srcY + h,
1639 intel->transform[0],
1647 if (intel->render_mask) {
1649 if (!intel_get_transformed_coordinates(maskX, maskY,
1650 intel->transform[1],
1655 if (!intel_get_transformed_coordinates(maskX, maskY + h,
1656 intel->transform[1],
1661 if (!intel_get_transformed_coordinates(maskX + w, maskY + h,
1662 intel->transform[1],
1667 if (!intel_get_transformed_coordinates_3d(maskX, maskY,
1668 intel->transform[1],
1674 if (!intel_get_transformed_coordinates_3d(maskX, maskY + h,
1675 intel->transform[1],
1681 if (!intel_get_transformed_coordinates_3d(maskX + w, maskY + h,
1682 intel->transform[1],
1690 OUT_VERTEX(dstX + w);
1691 OUT_VERTEX(dstY + h);
1692 OUT_VERTEX(src_x[2] * intel->scale_units[0][0]);
1693 OUT_VERTEX(src_y[2] * intel->scale_units[0][1]);
1695 OUT_VERTEX(src_w[2]);
1696 if (intel->render_mask) {
1697 OUT_VERTEX(mask_x[2] * intel->scale_units[1][0]);
1698 OUT_VERTEX(mask_y[2] * intel->scale_units[1][1]);
1700 OUT_VERTEX(mask_w[2]);
1704 OUT_VERTEX(dstY + h);
1705 OUT_VERTEX(src_x[1] * intel->scale_units[0][0]);
1706 OUT_VERTEX(src_y[1] * intel->scale_units[0][1]);
1708 OUT_VERTEX(src_w[1]);
1709 if (intel->render_mask) {
1710 OUT_VERTEX(mask_x[1] * intel->scale_units[1][0]);
1711 OUT_VERTEX(mask_y[1] * intel->scale_units[1][1]);
1713 OUT_VERTEX(mask_w[1]);
1718 OUT_VERTEX(src_x[0] * intel->scale_units[0][0]);
1719 OUT_VERTEX(src_y[0] * intel->scale_units[0][1]);
1721 OUT_VERTEX(src_w[0]);
1722 if (intel->render_mask) {
1723 OUT_VERTEX(mask_x[0] * intel->scale_units[1][0]);
1724 OUT_VERTEX(mask_y[0] * intel->scale_units[1][1]);
1726 OUT_VERTEX(mask_w[0]);
1731 i965_prepare_composite(int op, PicturePtr source_picture,
1732 PicturePtr mask_picture, PicturePtr dest_picture,
1733 PixmapPtr source, PixmapPtr mask, PixmapPtr dest)
1735 ScrnInfoPtr scrn = xf86Screens[dest_picture->pDrawable->pScreen->myNum];
1736 intel_screen_private *intel = intel_get_screen_private(scrn);
1737 struct gen4_render_state *render_state = intel->gen4_render_state;
1738 gen4_composite_op *composite_op = &render_state->composite_op;
1740 composite_op->src_filter =
1741 sampler_state_filter_from_picture(source_picture->filter);
1742 if (composite_op->src_filter < 0) {
1743 intel_debug_fallback(scrn, "Bad src filter 0x%x\n",
1744 source_picture->filter);
1747 composite_op->src_extend =
1748 sampler_state_extend_from_picture(source_picture->repeatType);
1749 if (composite_op->src_extend < 0) {
1750 intel_debug_fallback(scrn, "Bad src repeat 0x%x\n",
1751 source_picture->repeatType);
1756 if (mask_picture->componentAlpha &&
1757 PICT_FORMAT_RGB(mask_picture->format)) {
1758 /* Check if it's component alpha that relies on a source alpha and on
1759 * the source value. We can only get one of those into the single
1760 * source value that we get to blend with.
1762 if (i965_blend_op[op].src_alpha &&
1763 (i965_blend_op[op].src_blend != BRW_BLENDFACTOR_ZERO)) {
1764 intel_debug_fallback(scrn,
1765 "Component alpha not supported "
1766 "with source alpha and source "
1767 "value blending.\n");
1772 composite_op->mask_filter =
1773 sampler_state_filter_from_picture(mask_picture->filter);
1774 if (composite_op->mask_filter < 0) {
1775 intel_debug_fallback(scrn, "Bad mask filter 0x%x\n",
1776 mask_picture->filter);
1779 composite_op->mask_extend =
1780 sampler_state_extend_from_picture(mask_picture->repeatType);
1781 if (composite_op->mask_extend < 0) {
1782 intel_debug_fallback(scrn, "Bad mask repeat 0x%x\n",
1783 mask_picture->repeatType);
1787 composite_op->mask_filter = SAMPLER_STATE_FILTER_NEAREST;
1788 composite_op->mask_extend = SAMPLER_STATE_EXTEND_NONE;
1791 /* Flush any pending writes prior to relocating the textures. */
1792 if (intel_pixmap_is_dirty(source) ||
1793 (mask && intel_pixmap_is_dirty(mask)))
1794 intel_batch_emit_flush(scrn);
1796 composite_op->op = op;
1797 intel->render_source_picture = source_picture;
1798 intel->render_mask_picture = mask_picture;
1799 intel->render_dest_picture = dest_picture;
1800 intel->render_source = source;
1801 intel->render_mask = mask;
1802 intel->render_dest = dest;
1804 intel->scale_units[0][0] = 1. / source->drawable.width;
1805 intel->scale_units[0][1] = 1. / source->drawable.height;
1807 intel->transform[0] = source_picture->transform;
1808 composite_op->is_affine = intel_transform_is_affine(intel->transform[0]);
1811 intel->transform[1] = NULL;
1812 intel->scale_units[1][0] = -1;
1813 intel->scale_units[1][1] = -1;
1815 intel->transform[1] = mask_picture->transform;
1816 intel->scale_units[1][0] = 1. / mask->drawable.width;
1817 intel->scale_units[1][1] = 1. / mask->drawable.height;
1818 composite_op->is_affine &=
1819 intel_transform_is_affine(intel->transform[1]);
1823 if (mask_picture->componentAlpha &&
1824 PICT_FORMAT_RGB(mask_picture->format)) {
1825 if (i965_blend_op[op].src_alpha) {
1826 if (composite_op->is_affine)
1827 composite_op->wm_kernel =
1828 WM_KERNEL_MASKCA_SRCALPHA_AFFINE;
1830 composite_op->wm_kernel =
1831 WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE;
1833 if (composite_op->is_affine)
1834 composite_op->wm_kernel =
1835 WM_KERNEL_MASKCA_AFFINE;
1837 composite_op->wm_kernel =
1838 WM_KERNEL_MASKCA_PROJECTIVE;
1841 if (composite_op->is_affine)
1842 composite_op->wm_kernel =
1843 WM_KERNEL_MASKNOCA_AFFINE;
1845 composite_op->wm_kernel =
1846 WM_KERNEL_MASKNOCA_PROJECTIVE;
1849 if (composite_op->is_affine)
1850 composite_op->wm_kernel = WM_KERNEL_NOMASK_AFFINE;
1852 composite_op->wm_kernel = WM_KERNEL_NOMASK_PROJECTIVE;
1855 intel->prim_emit = i965_emit_composite_primitive;
1857 if (intel->transform[0] == NULL)
1858 intel->prim_emit = i965_emit_composite_primitive_identity_source;
1859 else if (composite_op->is_affine)
1860 intel->prim_emit = i965_emit_composite_primitive_affine_source;
1862 if (intel->transform[0] == NULL && intel->transform[1] == NULL)
1863 intel->prim_emit = i965_emit_composite_primitive_identity_source_mask;
1866 intel->floats_per_vertex =
1867 2 + (mask ? 2 : 1) * (composite_op->is_affine ? 2: 3);
1869 if (!i965_composite_check_aperture(intel)) {
1870 intel_batch_submit(scrn);
1871 if (!i965_composite_check_aperture(intel)) {
1872 intel_debug_fallback(scrn,
1873 "Couldn't fit render operation "
1879 if (sizeof(intel->surface_data) - intel->surface_used <
1880 4 * sizeof(struct brw_surface_state_padded))
1881 i965_surface_flush(intel);
1883 intel->needs_render_state_emit = TRUE;
1888 static void i965_select_vertex_buffer(struct intel_screen_private *intel)
1890 int id = intel->gen4_render_state->composite_op.vertex_id;
1892 if (intel->vertex_id & (1 << id))
1895 /* Set up the pointer to our (single) vertex buffer */
1896 OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3);
1898 /* XXX could use multiple vbo to reduce relocations if
1899 * frequently switching between vertex sizes, like rgb10text.
1901 if (INTEL_INFO(intel)->gen >= 60) {
1902 OUT_BATCH((id << GEN6_VB0_BUFFER_INDEX_SHIFT) |
1903 GEN6_VB0_VERTEXDATA |
1904 (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
1906 OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) |
1908 (4*intel->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
1910 OUT_RELOC(intel->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
1911 if (INTEL_INFO(intel)->gen >= 50)
1912 OUT_RELOC(intel->vertex_bo,
1913 I915_GEM_DOMAIN_VERTEX, 0,
1914 sizeof(intel->vertex_ptr) - 1);
1917 OUT_BATCH(0); // ignore for VERTEXDATA, but still there
1919 intel->vertex_id |= 1 << id;
1922 static void i965_bind_surfaces(struct intel_screen_private *intel)
1924 uint32_t *binding_table;
1926 assert(intel->surface_used + 4 * sizeof(struct brw_surface_state_padded) <= sizeof(intel->surface_data));
1928 binding_table = (uint32_t*) (intel->surface_data + intel->surface_used);
1929 intel->surface_table = intel->surface_used;
1930 intel->surface_used += sizeof(struct brw_surface_state_padded);
1933 i965_set_picture_surface_state(intel,
1934 intel->render_dest_picture,
1938 i965_set_picture_surface_state(intel,
1939 intel->render_source_picture,
1940 intel->render_source,
1942 if (intel->render_mask) {
1944 i965_set_picture_surface_state(intel,
1945 intel->render_mask_picture,
1952 i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
1953 int dstX, int dstY, int w, int h)
1955 ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
1956 intel_screen_private *intel = intel_get_screen_private(scrn);
1958 intel_batch_start_atomic(scrn, 200);
1959 if (intel->needs_render_state_emit) {
1960 i965_bind_surfaces(intel);
1962 if (INTEL_INFO(intel)->gen >= 60)
1963 gen6_emit_composite_state(intel);
1965 i965_emit_composite_state(intel);
1968 if (intel->floats_per_vertex != intel->last_floats_per_vertex) {
1969 intel->vertex_index = (intel->vertex_used + intel->floats_per_vertex - 1) / intel->floats_per_vertex;
1970 intel->vertex_used = intel->vertex_index * intel->floats_per_vertex;
1971 intel->last_floats_per_vertex = intel->floats_per_vertex;
1973 if (intel_vertex_space(intel) < 3*4*intel->floats_per_vertex) {
1974 i965_vertex_flush(intel);
1975 intel_next_vertex(intel);
1976 intel->vertex_index = 0;
1978 i965_select_vertex_buffer(intel);
1980 if (intel->vertex_offset == 0) {
1981 OUT_BATCH(BRW_3DPRIMITIVE |
1982 BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
1983 (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
1986 intel->vertex_offset = intel->batch_used;
1987 OUT_BATCH(0); /* vertex count, to be filled in later */
1988 OUT_BATCH(intel->vertex_index);
1989 OUT_BATCH(1); /* single instance */
1990 OUT_BATCH(0); /* start instance location */
1991 OUT_BATCH(0); /* index buffer offset, ignored */
1992 intel->vertex_count = intel->vertex_index;
1995 intel->prim_emit(intel,
2000 intel->vertex_index += 3;
2002 if (INTEL_INFO(intel)->gen < 50) {
2004 i965_vertex_flush(intel);
2005 OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
2008 intel_batch_end_atomic(scrn);
2011 void i965_batch_commit_notify(intel_screen_private *intel)
2013 intel->needs_render_state_emit = TRUE;
2014 intel->needs_3d_invariant = TRUE;
2015 intel->last_floats_per_vertex = 0;
2016 intel->vertex_index = 0;
2018 intel->gen4_render_state->composite_op.vertex_id = -1;
2020 intel->gen6_render_state.num_sf_outputs = 0;
2021 intel->gen6_render_state.samplers = NULL;
2022 intel->gen6_render_state.blend = -1;
2023 intel->gen6_render_state.kernel = NULL;
2024 intel->gen6_render_state.drawrect = -1;
2026 assert(intel->surface_reloc == 0);
2030 * Called at EnterVT so we can set up our offsets into the state buffer.
2032 void gen4_render_state_init(ScrnInfoPtr scrn)
2034 intel_screen_private *intel = intel_get_screen_private(scrn);
2035 struct gen4_render_state *render;
2036 const struct wm_kernel_info *wm_kernels;
2038 drm_intel_bo *sf_kernel_bo, *sf_kernel_mask_bo;
2039 drm_intel_bo *border_color_bo;
2041 intel->needs_3d_invariant = TRUE;
2044 drm_intel_bo_alloc(intel->bufmgr, "surface data",
2045 sizeof(intel->surface_data), 4096);
2046 intel->surface_used = 0;
2048 if (intel->gen4_render_state == NULL)
2049 intel->gen4_render_state = calloc(sizeof(*render), 1);
2051 if (INTEL_INFO(intel)->gen >= 60)
2052 return gen6_render_state_init(scrn);
2054 render = intel->gen4_render_state;
2055 render->composite_op.vertex_id = -1;
2057 render->vs_state_bo = gen4_create_vs_unit_state(intel);
2059 /* Set up the two SF states (one for blending with a mask, one without) */
2060 if (IS_GEN5(intel)) {
2061 sf_kernel_bo = intel_bo_alloc_for_data(intel,
2062 sf_kernel_static_gen5,
2064 (sf_kernel_static_gen5),
2067 intel_bo_alloc_for_data(intel, sf_kernel_mask_static_gen5,
2068 sizeof(sf_kernel_mask_static_gen5),
2071 sf_kernel_bo = intel_bo_alloc_for_data(intel,
2073 sizeof(sf_kernel_static),
2075 sf_kernel_mask_bo = intel_bo_alloc_for_data(intel,
2076 sf_kernel_mask_static,
2078 (sf_kernel_mask_static),
2081 render->sf_state_bo = gen4_create_sf_state(intel, sf_kernel_bo);
2082 render->sf_mask_state_bo = gen4_create_sf_state(intel, sf_kernel_mask_bo);
2083 drm_intel_bo_unreference(sf_kernel_bo);
2084 drm_intel_bo_unreference(sf_kernel_mask_bo);
2086 wm_kernels = IS_GEN5(intel) ? wm_kernels_gen5 : wm_kernels_gen4;
2087 for (m = 0; m < KERNEL_COUNT; m++) {
2088 render->wm_kernel_bo[m] =
2089 intel_bo_alloc_for_data(intel,
2095 /* Set up the WM states: each filter/extend type for source and mask, per
2098 border_color_bo = sampler_border_color_create(intel);
2099 for (i = 0; i < FILTER_COUNT; i++) {
2100 for (j = 0; j < EXTEND_COUNT; j++) {
2101 for (k = 0; k < FILTER_COUNT; k++) {
2102 for (l = 0; l < EXTEND_COUNT; l++) {
2103 drm_intel_bo *sampler_state_bo;
2106 gen4_create_sampler_state(intel,
2111 for (m = 0; m < KERNEL_COUNT; m++) {
2112 render->wm_state_bo[m][i][j][k][l] =
2113 gen4_create_wm_state
2115 wm_kernels[m]. has_mask,
2116 render->wm_kernel_bo[m],
2119 drm_intel_bo_unreference(sampler_state_bo);
2124 drm_intel_bo_unreference(border_color_bo);
2126 render->cc_state_bo = gen4_create_cc_unit_state(intel);
2130 * Called at LeaveVT.
2132 void gen4_render_state_cleanup(ScrnInfoPtr scrn)
2134 intel_screen_private *intel = intel_get_screen_private(scrn);
2135 struct gen4_render_state *render_state = intel->gen4_render_state;
2138 drm_intel_bo_unreference(intel->surface_bo);
2139 drm_intel_bo_unreference(render_state->vs_state_bo);
2140 drm_intel_bo_unreference(render_state->sf_state_bo);
2141 drm_intel_bo_unreference(render_state->sf_mask_state_bo);
2143 for (i = 0; i < KERNEL_COUNT; i++)
2144 drm_intel_bo_unreference(render_state->wm_kernel_bo[i]);
2146 for (i = 0; i < FILTER_COUNT; i++)
2147 for (j = 0; j < EXTEND_COUNT; j++)
2148 for (k = 0; k < FILTER_COUNT; k++)
2149 for (l = 0; l < EXTEND_COUNT; l++)
2150 for (m = 0; m < KERNEL_COUNT; m++)
2151 drm_intel_bo_unreference
2153 wm_state_bo[m][i][j][k]
2156 for (i = 0; i < FILTER_COUNT; i++)
2157 for (j = 0; j < EXTEND_COUNT; j++)
2158 for (k = 0; k < FILTER_COUNT; k++)
2159 for (l = 0; l < EXTEND_COUNT; l++)
2160 drm_intel_bo_unreference(render_state->ps_sampler_state_bo[i][j][k][l]);
2162 drm_intel_bo_unreference(render_state->cc_state_bo);
2164 drm_intel_bo_unreference(render_state->cc_vp_bo);
2165 drm_intel_bo_unreference(render_state->gen6_blend_bo);
2166 drm_intel_bo_unreference(render_state->gen6_depth_stencil_bo);
2168 free(intel->gen4_render_state);
2169 intel->gen4_render_state = NULL;
2175 #define GEN6_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen6_blend_state), 64)
2177 static drm_intel_bo *
2178 gen6_composite_create_cc_state(intel_screen_private *intel)
2180 struct gen6_color_calc_state *state;
2181 drm_intel_bo *cc_bo;
2183 cc_bo = drm_intel_bo_alloc(intel->bufmgr,
2187 drm_intel_bo_map(cc_bo, TRUE);
2188 state = cc_bo->virtual;
2189 memset(state, 0, sizeof(*state));
2190 state->constant_r = 1.0;
2191 state->constant_g = 0.0;
2192 state->constant_b = 1.0;
2193 state->constant_a = 1.0;
2194 drm_intel_bo_unmap(cc_bo);
2199 static drm_intel_bo *
2200 gen6_composite_create_blend_state(intel_screen_private *intel)
2202 drm_intel_bo *blend_bo;
2205 blend_bo = drm_intel_bo_alloc(intel->bufmgr,
2207 BRW_BLENDFACTOR_COUNT * BRW_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE,
2209 drm_intel_bo_map(blend_bo, TRUE);
2210 memset(blend_bo->virtual, 0, blend_bo->size);
2212 for (src = 0; src < BRW_BLENDFACTOR_COUNT; src++) {
2213 for (dst = 0; dst < BRW_BLENDFACTOR_COUNT; dst++) {
2214 uint32_t blend_state_offset = (src * BRW_BLENDFACTOR_COUNT + dst) * GEN6_BLEND_STATE_PADDED_SIZE;
2215 struct gen6_blend_state *blend;
2217 blend = (struct gen6_blend_state *)((char *)blend_bo->virtual + blend_state_offset);
2218 blend->blend0.dest_blend_factor = dst;
2219 blend->blend0.source_blend_factor = src;
2220 blend->blend0.blend_func = BRW_BLENDFUNCTION_ADD;
2221 blend->blend0.blend_enable = 1;
2223 blend->blend1.post_blend_clamp_enable = 1;
2224 blend->blend1.pre_blend_clamp_enable = 1;
2228 drm_intel_bo_unmap(blend_bo);
2232 static drm_intel_bo *
2233 gen6_composite_create_depth_stencil_state(intel_screen_private *intel)
2235 struct gen6_depth_stencil_state *state;
2236 drm_intel_bo *depth_stencil_bo;
2238 depth_stencil_bo = drm_intel_bo_alloc(intel->bufmgr,
2239 "gen6 DEPTH_STENCIL state",
2242 drm_intel_bo_map(depth_stencil_bo, TRUE);
2243 state = depth_stencil_bo->virtual;
2244 memset(state, 0, sizeof(*state));
2245 drm_intel_bo_unmap(depth_stencil_bo);
2247 return depth_stencil_bo;
2251 gen6_composite_invariant_states(intel_screen_private *intel)
2253 OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2255 OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
2256 OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2257 GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2260 OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2263 /* Set system instruction pointer */
2264 OUT_BATCH(BRW_STATE_SIP | 0);
2269 gen6_composite_state_base_address(intel_screen_private *intel)
2271 OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2));
2272 OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state base address */
2273 intel->surface_reloc = intel->batch_used;
2274 intel_batch_emit_dword(intel,
2275 intel->surface_bo->offset | BASE_ADDRESS_MODIFY);
2276 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2277 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object base address */
2278 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction base address */
2279 OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state upper bound */
2280 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2281 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2282 OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2286 gen6_composite_viewport_state_pointers(intel_screen_private *intel,
2287 drm_intel_bo *cc_vp_bo)
2290 OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
2291 GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
2295 OUT_RELOC(cc_vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
2299 gen6_composite_urb(intel_screen_private *intel)
2301 OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
2302 OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
2303 (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
2304 OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
2305 (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
2309 gen6_composite_cc_state_pointers(intel_screen_private *intel,
2310 uint32_t blend_offset)
2312 struct gen4_render_state *render_state = intel->gen4_render_state;
2314 if (intel->gen6_render_state.blend == blend_offset)
2317 OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
2318 OUT_RELOC(render_state->gen6_blend_bo,
2319 I915_GEM_DOMAIN_INSTRUCTION, 0,
2321 if (intel->gen6_render_state.blend == -1) {
2322 OUT_RELOC(render_state->gen6_depth_stencil_bo,
2323 I915_GEM_DOMAIN_INSTRUCTION, 0,
2325 OUT_RELOC(render_state->cc_state_bo,
2326 I915_GEM_DOMAIN_INSTRUCTION, 0,
2333 intel->gen6_render_state.blend = blend_offset;
2337 gen6_composite_sampler_state_pointers(intel_screen_private *intel,
2340 if (intel->gen6_render_state.samplers == bo)
2343 intel->gen6_render_state.samplers = bo;
2345 OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
2346 GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
2348 OUT_BATCH(0); /* VS */
2349 OUT_BATCH(0); /* GS */
2350 OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
2354 gen6_composite_vs_state(intel_screen_private *intel)
2356 /* disable VS constant buffer */
2357 OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
2363 OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
2364 OUT_BATCH(0); /* without VS kernel */
2368 OUT_BATCH(0); /* pass-through */
2372 gen6_composite_gs_state(intel_screen_private *intel)
2374 /* disable GS constant buffer */
2375 OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
2381 OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
2382 OUT_BATCH(0); /* without GS kernel */
2387 OUT_BATCH(0); /* pass-through */
2391 gen6_composite_wm_constants(intel_screen_private *intel)
2393 /* disable WM constant buffer */
2394 OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2));
2402 gen6_composite_clip_state(intel_screen_private *intel)
2404 OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
2406 OUT_BATCH(0); /* pass-through */
2411 gen6_composite_sf_state(intel_screen_private *intel,
2414 int num_sf_outputs = has_mask ? 2 : 1;
2416 if (intel->gen6_render_state.num_sf_outputs == num_sf_outputs)
2419 intel->gen6_render_state.num_sf_outputs = num_sf_outputs;
2421 OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
2422 OUT_BATCH((num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
2423 (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
2424 (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
2426 OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
2427 OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
2432 OUT_BATCH(0); /* DW9 */
2437 OUT_BATCH(0); /* DW14 */
2442 OUT_BATCH(0); /* DW19 */
2446 gen6_composite_wm_state(intel_screen_private *intel,
2450 int num_surfaces = has_mask ? 3 : 2;
2451 int num_sf_outputs = has_mask ? 2 : 1;
2453 if (intel->gen6_render_state.kernel == bo)
2456 intel->gen6_render_state.kernel = bo;
2458 OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2));
2460 I915_GEM_DOMAIN_INSTRUCTION, 0,
2462 OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
2463 (num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2465 OUT_BATCH((6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2466 OUT_BATCH(((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2467 GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2468 GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2469 OUT_BATCH((num_sf_outputs << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2470 GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2476 gen6_composite_binding_table_pointers(intel_screen_private *intel)
2478 /* Binding table pointers */
2479 OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS |
2480 GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS |
2482 OUT_BATCH(0); /* vs */
2483 OUT_BATCH(0); /* gs */
2484 /* Only the PS uses the binding table */
2485 OUT_BATCH(intel->surface_table);
2489 gen6_composite_depth_buffer_state(intel_screen_private *intel)
2491 OUT_BATCH(BRW_3DSTATE_DEPTH_BUFFER | (7 - 2));
2492 OUT_BATCH((BRW_SURFACE_NULL << BRW_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT) |
2493 (BRW_DEPTHFORMAT_D32_FLOAT << BRW_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT));
2500 OUT_BATCH(BRW_3DSTATE_CLEAR_PARAMS | (2 - 2));
2505 gen6_composite_drawing_rectangle(intel_screen_private *intel,
2509 DRAW_YMAX(dest->drawable.height - 1) |
2510 DRAW_XMAX(dest->drawable.width - 1);
2512 /* XXX cacomposite depends upon the implicit non-pipelined flush */
2513 if (0 && intel->gen6_render_state.drawrect == dw)
2515 intel->gen6_render_state.drawrect = dw;
2517 OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
2518 OUT_BATCH(0x00000000); /* ymin, xmin */
2519 OUT_BATCH(dw); /* ymax, xmax */
2520 OUT_BATCH(0x00000000); /* yorigin, xorigin */
2524 gen6_composite_vertex_element_state(intel_screen_private *intel,
2529 * vertex data in vertex buffer
2531 * texture coordinate 0: (u0, v0) if (is_affine is TRUE) else (u0, v0, w0)
2532 * texture coordinate 1 if (has_mask is TRUE): same as above
2534 gen4_composite_op *composite_op = &intel->gen4_render_state->composite_op;
2535 int nelem = has_mask ? 2 : 1;
2536 int selem = is_affine ? 2 : 3;
2537 uint32_t w_component;
2538 uint32_t src_format;
2541 id = has_mask << 1 | is_affine;
2543 if (composite_op->vertex_id == id)
2546 composite_op->vertex_id = id;
2549 src_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
2550 w_component = BRW_VFCOMPONENT_STORE_1_FLT;
2552 src_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
2553 w_component = BRW_VFCOMPONENT_STORE_SRC;
2557 * dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
2558 * dword 4-7: position (x, y, 1.0, 1.0),
2559 * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
2560 * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
2562 * dword 4-15 are fetched from vertex buffer
2564 OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS |
2565 ((2 * (2 + nelem)) + 1 - 2));
2567 OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID |
2568 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2569 (0 << VE0_OFFSET_SHIFT));
2570 OUT_BATCH((BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
2571 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
2572 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
2573 (BRW_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
2576 OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID |
2577 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2578 (0 << VE0_OFFSET_SHIFT)); /* offsets vb in bytes */
2579 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2580 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2581 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2582 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2585 OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | GEN6_VE0_VALID |
2586 (src_format << VE0_FORMAT_SHIFT) |
2587 ((2 * 4) << VE0_OFFSET_SHIFT)); /* offset vb in bytes */
2588 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2589 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2590 (w_component << VE1_VFCOMPONENT_2_SHIFT) |
2591 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2595 OUT_BATCH((id << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2597 (src_format << VE0_FORMAT_SHIFT) |
2598 (((2 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */
2599 OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2600 (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2601 (w_component << VE1_VFCOMPONENT_2_SHIFT) |
2602 (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2607 gen6_emit_composite_state(struct intel_screen_private *intel)
2609 struct gen4_render_state *render = intel->gen4_render_state;
2610 gen4_composite_op *composite_op = &render->composite_op;
2611 sampler_state_filter_t src_filter = composite_op->src_filter;
2612 sampler_state_filter_t mask_filter = composite_op->mask_filter;
2613 sampler_state_extend_t src_extend = composite_op->src_extend;
2614 sampler_state_extend_t mask_extend = composite_op->mask_extend;
2615 Bool is_affine = composite_op->is_affine;
2616 Bool has_mask = intel->render_mask != NULL;
2618 drm_intel_bo *ps_sampler_state_bo = render->ps_sampler_state_bo[src_filter][src_extend][mask_filter][mask_extend];
2620 intel->needs_render_state_emit = FALSE;
2621 if (intel->needs_3d_invariant) {
2622 gen6_composite_invariant_states(intel);
2623 gen6_composite_viewport_state_pointers(intel,
2625 gen6_composite_urb(intel);
2627 gen6_composite_vs_state(intel);
2628 gen6_composite_gs_state(intel);
2629 gen6_composite_clip_state(intel);
2630 gen6_composite_wm_constants(intel);
2631 gen6_composite_depth_buffer_state(intel);
2633 intel->needs_3d_invariant = FALSE;
2636 i965_get_blend_cntl(composite_op->op,
2637 intel->render_mask_picture,
2638 intel->render_dest_picture->format,
2641 if (intel->surface_reloc == 0)
2642 gen6_composite_state_base_address(intel);
2644 gen6_composite_cc_state_pointers(intel,
2645 (src * BRW_BLENDFACTOR_COUNT + dst) * GEN6_BLEND_STATE_PADDED_SIZE);
2646 gen6_composite_sampler_state_pointers(intel, ps_sampler_state_bo);
2647 gen6_composite_sf_state(intel, has_mask);
2648 gen6_composite_wm_state(intel,
2650 render->wm_kernel_bo[composite_op->wm_kernel]);
2651 gen6_composite_binding_table_pointers(intel);
2653 gen6_composite_drawing_rectangle(intel, intel->render_dest);
2654 gen6_composite_vertex_element_state(intel, has_mask, is_affine);
2658 gen6_render_state_init(ScrnInfoPtr scrn)
2660 intel_screen_private *intel = intel_get_screen_private(scrn);
2661 struct gen4_render_state *render;
2663 drm_intel_bo *border_color_bo;
2665 render= intel->gen4_render_state;
2666 render->composite_op.vertex_id = -1;
2668 intel->gen6_render_state.num_sf_outputs = 0;
2669 intel->gen6_render_state.samplers = NULL;
2670 intel->gen6_render_state.blend = -1;
2671 intel->gen6_render_state.kernel = NULL;
2672 intel->gen6_render_state.drawrect = -1;
2674 for (m = 0; m < KERNEL_COUNT; m++) {
2675 render->wm_kernel_bo[m] =
2676 intel_bo_alloc_for_data(intel,
2677 wm_kernels_gen6[m].data,
2678 wm_kernels_gen6[m].size,
2682 border_color_bo = sampler_border_color_create(intel);
2684 for (i = 0; i < FILTER_COUNT; i++) {
2685 for (j = 0; j < EXTEND_COUNT; j++) {
2686 for (k = 0; k < FILTER_COUNT; k++) {
2687 for (l = 0; l < EXTEND_COUNT; l++) {
2688 render->ps_sampler_state_bo[i][j][k][l] =
2689 gen4_create_sampler_state(intel,
2698 drm_intel_bo_unreference(border_color_bo);
2699 render->cc_vp_bo = gen4_create_cc_viewport(intel);
2700 render->cc_state_bo = gen6_composite_create_cc_state(intel);
2701 render->gen6_blend_bo = gen6_composite_create_blend_state(intel);
2702 render->gen6_depth_stencil_bo = gen6_composite_create_depth_stencil_state(intel);
2705 void i965_vertex_flush(struct intel_screen_private *intel)
2707 if (intel->vertex_offset) {
2708 intel->batch_ptr[intel->vertex_offset] =
2709 intel->vertex_index - intel->vertex_count;
2710 intel->vertex_offset = 0;
2714 void i965_batch_flush(struct intel_screen_private *intel)
2716 if (intel->surface_used)
2717 i965_surface_flush(intel);