VEBOX/bdw: set downsample method
[platform/upstream/libva-intel-driver.git] / src / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <math.h>
39
40 #include <va/va_drmcommon.h>
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44 #include "i965_defines.h"
45 #include "i965_drv_video.h"
46 #include "i965_structs.h"
47
48 #include "i965_render.h"
49
50 #define SF_KERNEL_NUM_GRF       16
51 #define SF_MAX_THREADS          1
52
53 static const uint32_t sf_kernel_static[][4] = 
54 {
55 #include "shaders/render/exa_sf.g4b"
56 };
57
58 #define PS_KERNEL_NUM_GRF       48
59 #define PS_MAX_THREADS          32
60
61 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
62
63 static const uint32_t ps_kernel_static[][4] = 
64 {
65 #include "shaders/render/exa_wm_xy.g4b"
66 #include "shaders/render/exa_wm_src_affine.g4b"
67 #include "shaders/render/exa_wm_src_sample_planar.g4b"
68 #include "shaders/render/exa_wm_yuv_color_balance.g4b"
69 #include "shaders/render/exa_wm_yuv_rgb.g4b"
70 #include "shaders/render/exa_wm_write.g4b"
71 };
72 static const uint32_t ps_subpic_kernel_static[][4] = 
73 {
74 #include "shaders/render/exa_wm_xy.g4b"
75 #include "shaders/render/exa_wm_src_affine.g4b"
76 #include "shaders/render/exa_wm_src_sample_argb.g4b"
77 #include "shaders/render/exa_wm_write.g4b"
78 };
79
80 /* On IRONLAKE */
81 static const uint32_t sf_kernel_static_gen5[][4] = 
82 {
83 #include "shaders/render/exa_sf.g4b.gen5"
84 };
85
86 static const uint32_t ps_kernel_static_gen5[][4] = 
87 {
88 #include "shaders/render/exa_wm_xy.g4b.gen5"
89 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
90 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
91 #include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5"
92 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
93 #include "shaders/render/exa_wm_write.g4b.gen5"
94 };
95 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
96 {
97 #include "shaders/render/exa_wm_xy.g4b.gen5"
98 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
99 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
100 #include "shaders/render/exa_wm_write.g4b.gen5"
101 };
102
103 /* programs for Sandybridge */
104 static const uint32_t sf_kernel_static_gen6[][4] = 
105 {
106 };
107
108 static const uint32_t ps_kernel_static_gen6[][4] = {
109 #include "shaders/render/exa_wm_src_affine.g6b"
110 #include "shaders/render/exa_wm_src_sample_planar.g6b"
111 #include "shaders/render/exa_wm_yuv_color_balance.g6b"
112 #include "shaders/render/exa_wm_yuv_rgb.g6b"
113 #include "shaders/render/exa_wm_write.g6b"
114 };
115
116 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
117 #include "shaders/render/exa_wm_src_affine.g6b"
118 #include "shaders/render/exa_wm_src_sample_argb.g6b"
119 #include "shaders/render/exa_wm_write.g6b"
120 };
121
122 /* programs for Ivybridge */
123 static const uint32_t sf_kernel_static_gen7[][4] = 
124 {
125 };
126
127 static const uint32_t ps_kernel_static_gen7[][4] = {
128 #include "shaders/render/exa_wm_src_affine.g7b"
129 #include "shaders/render/exa_wm_src_sample_planar.g7b"
130 #include "shaders/render/exa_wm_yuv_color_balance.g7b"
131 #include "shaders/render/exa_wm_yuv_rgb.g7b"
132 #include "shaders/render/exa_wm_write.g7b"
133 };
134
135 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
136 #include "shaders/render/exa_wm_src_affine.g7b"
137 #include "shaders/render/exa_wm_src_sample_argb.g7b"
138 #include "shaders/render/exa_wm_write.g7b"
139 };
140
141 /* Programs for Haswell */
142 static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
143 #include "shaders/render/exa_wm_src_affine.g7b"
144 #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
145 #include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell"
146 #include "shaders/render/exa_wm_yuv_rgb.g7b"
147 #include "shaders/render/exa_wm_write.g7b"
148 };
149
150 /*TODO: Modify the shader for GEN8.
151  * Now it only uses the shader for gen7/haswell
152  */
153 /* Programs for Gen8 */
154 static const uint32_t sf_kernel_static_gen8[][4] = 
155 {
156 };
157 static const uint32_t ps_kernel_static_gen8[][4] = {
158 #include "shaders/render/exa_wm_src_affine.g8b"
159 #include "shaders/render/exa_wm_src_sample_planar.g8b"
160 #include "shaders/render/exa_wm_yuv_color_balance.g8b"
161 #include "shaders/render/exa_wm_yuv_rgb.g8b"
162 #include "shaders/render/exa_wm_write.g8b"
163 };
164
165 static const uint32_t ps_subpic_kernel_static_gen8[][4] = {
166 #include "shaders/render/exa_wm_src_affine.g8b"
167 #include "shaders/render/exa_wm_src_sample_argb.g8b"
168 #include "shaders/render/exa_wm_write.g8b"
169 };
170
171
172 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_GEN8, \
173                                 MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7))
174
175 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
176 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
177
178 static uint32_t float_to_uint (float f) 
179 {
180     union {
181         uint32_t i; 
182         float f;
183     } x;
184
185     x.f = f;
186     return x.i;
187 }
188
189 enum 
190 {
191     SF_KERNEL = 0,
192     PS_KERNEL,
193     PS_SUBPIC_KERNEL
194 };
195
196 static struct i965_kernel render_kernels_gen4[] = {
197     {
198         "SF",
199         SF_KERNEL,
200         sf_kernel_static,
201         sizeof(sf_kernel_static),
202         NULL
203     },
204     {
205         "PS",
206         PS_KERNEL,
207         ps_kernel_static,
208         sizeof(ps_kernel_static),
209         NULL
210     },
211
212     {
213         "PS_SUBPIC",
214         PS_SUBPIC_KERNEL,
215         ps_subpic_kernel_static,
216         sizeof(ps_subpic_kernel_static),
217         NULL
218     }
219 };
220
221 static struct i965_kernel render_kernels_gen5[] = {
222     {
223         "SF",
224         SF_KERNEL,
225         sf_kernel_static_gen5,
226         sizeof(sf_kernel_static_gen5),
227         NULL
228     },
229     {
230         "PS",
231         PS_KERNEL,
232         ps_kernel_static_gen5,
233         sizeof(ps_kernel_static_gen5),
234         NULL
235     },
236
237     {
238         "PS_SUBPIC",
239         PS_SUBPIC_KERNEL,
240         ps_subpic_kernel_static_gen5,
241         sizeof(ps_subpic_kernel_static_gen5),
242         NULL
243     }
244 };
245
246 static struct i965_kernel render_kernels_gen6[] = {
247     {
248         "SF",
249         SF_KERNEL,
250         sf_kernel_static_gen6,
251         sizeof(sf_kernel_static_gen6),
252         NULL
253     },
254     {
255         "PS",
256         PS_KERNEL,
257         ps_kernel_static_gen6,
258         sizeof(ps_kernel_static_gen6),
259         NULL
260     },
261
262     {
263         "PS_SUBPIC",
264         PS_SUBPIC_KERNEL,
265         ps_subpic_kernel_static_gen6,
266         sizeof(ps_subpic_kernel_static_gen6),
267         NULL
268     }
269 };
270
271 static struct i965_kernel render_kernels_gen7[] = {
272     {
273         "SF",
274         SF_KERNEL,
275         sf_kernel_static_gen7,
276         sizeof(sf_kernel_static_gen7),
277         NULL
278     },
279     {
280         "PS",
281         PS_KERNEL,
282         ps_kernel_static_gen7,
283         sizeof(ps_kernel_static_gen7),
284         NULL
285     },
286
287     {
288         "PS_SUBPIC",
289         PS_SUBPIC_KERNEL,
290         ps_subpic_kernel_static_gen7,
291         sizeof(ps_subpic_kernel_static_gen7),
292         NULL
293     }
294 };
295
296 static struct i965_kernel render_kernels_gen7_haswell[] = {
297     {
298         "SF",
299         SF_KERNEL,
300         sf_kernel_static_gen7,
301         sizeof(sf_kernel_static_gen7),
302         NULL
303     },
304     {
305         "PS",
306         PS_KERNEL,
307         ps_kernel_static_gen7_haswell,
308         sizeof(ps_kernel_static_gen7_haswell),
309         NULL
310     },
311
312     {
313         "PS_SUBPIC",
314         PS_SUBPIC_KERNEL,
315         ps_subpic_kernel_static_gen7,
316         sizeof(ps_subpic_kernel_static_gen7),
317         NULL
318     }
319 };
320
321 static struct i965_kernel render_kernels_gen8[] = {
322     {
323         "SF",
324         SF_KERNEL,
325         sf_kernel_static_gen8,
326         sizeof(sf_kernel_static_gen8),
327         NULL
328     },
329     {
330         "PS",
331         PS_KERNEL,
332         ps_kernel_static_gen8,
333         sizeof(ps_kernel_static_gen8),
334         NULL
335     },
336
337     {
338         "PS_SUBPIC",
339         PS_SUBPIC_KERNEL,
340         ps_subpic_kernel_static_gen8,
341         sizeof(ps_subpic_kernel_static_gen8),
342         NULL
343     }
344 };
345
346 #define URB_VS_ENTRIES        8
347 #define URB_VS_ENTRY_SIZE     1
348
349 #define URB_GS_ENTRIES        0
350 #define URB_GS_ENTRY_SIZE     0
351
352 #define URB_CLIP_ENTRIES      0
353 #define URB_CLIP_ENTRY_SIZE   0
354
355 #define URB_SF_ENTRIES        1
356 #define URB_SF_ENTRY_SIZE     2
357
358 #define URB_CS_ENTRIES        4
359 #define URB_CS_ENTRY_SIZE     4
360
361 static float yuv_to_rgb_bt601[3][4] = {
362 {1.164,         0,      1.596,          -0.06275,},
363 {1.164,         -0.392, -0.813,         -0.50196,},
364 {1.164,         2.017,  0,              -0.50196,},
365 };
366
367 static float yuv_to_rgb_bt709[3][4] = {
368 {1.164,         0,      1.793,          -0.06275,},
369 {1.164,         -0.213, -0.533,         -0.50196,},
370 {1.164,         2.112,  0,              -0.50196,},
371 };
372
373 static float yuv_to_rgb_smpte_240[3][4] = {
374 {1.164,         0,      1.794,          -0.06275,},
375 {1.164,         -0.258, -0.5425,        -0.50196,},
376 {1.164,         2.078,  0,              -0.50196,},
377 };
378
379 static void
380 i965_render_vs_unit(VADriverContextP ctx)
381 {
382     struct i965_driver_data *i965 = i965_driver_data(ctx);
383     struct i965_render_state *render_state = &i965->render_state;
384     struct i965_vs_unit_state *vs_state;
385
386     dri_bo_map(render_state->vs.state, 1);
387     assert(render_state->vs.state->virtual);
388     vs_state = render_state->vs.state->virtual;
389     memset(vs_state, 0, sizeof(*vs_state));
390
391     if (IS_IRONLAKE(i965->intel.device_id))
392         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
393     else
394         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
395
396     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
397     vs_state->vs6.vs_enable = 0;
398     vs_state->vs6.vert_cache_disable = 1;
399     
400     dri_bo_unmap(render_state->vs.state);
401 }
402
403 static void
404 i965_render_sf_unit(VADriverContextP ctx)
405 {
406     struct i965_driver_data *i965 = i965_driver_data(ctx);
407     struct i965_render_state *render_state = &i965->render_state;
408     struct i965_sf_unit_state *sf_state;
409
410     dri_bo_map(render_state->sf.state, 1);
411     assert(render_state->sf.state->virtual);
412     sf_state = render_state->sf.state->virtual;
413     memset(sf_state, 0, sizeof(*sf_state));
414
415     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
416     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
417
418     sf_state->sf1.single_program_flow = 1; /* XXX */
419     sf_state->sf1.binding_table_entry_count = 0;
420     sf_state->sf1.thread_priority = 0;
421     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
422     sf_state->sf1.illegal_op_exception_enable = 1;
423     sf_state->sf1.mask_stack_exception_enable = 1;
424     sf_state->sf1.sw_exception_enable = 1;
425
426     /* scratch space is not used in our kernel */
427     sf_state->thread2.per_thread_scratch_space = 0;
428     sf_state->thread2.scratch_space_base_pointer = 0;
429
430     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
431     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
432     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
433     sf_state->thread3.urb_entry_read_offset = 0;
434     sf_state->thread3.dispatch_grf_start_reg = 3;
435
436     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
437     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
438     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
439     sf_state->thread4.stats_enable = 1;
440
441     sf_state->sf5.viewport_transform = 0; /* skip viewport */
442
443     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
444     sf_state->sf6.scissor = 0;
445
446     sf_state->sf7.trifan_pv = 2;
447
448     sf_state->sf6.dest_org_vbias = 0x8;
449     sf_state->sf6.dest_org_hbias = 0x8;
450
451     dri_bo_emit_reloc(render_state->sf.state,
452                       I915_GEM_DOMAIN_INSTRUCTION, 0,
453                       sf_state->thread0.grf_reg_count << 1,
454                       offsetof(struct i965_sf_unit_state, thread0),
455                       render_state->render_kernels[SF_KERNEL].bo);
456
457     dri_bo_unmap(render_state->sf.state);
458 }
459
460 static void 
461 i965_render_sampler(VADriverContextP ctx)
462 {
463     struct i965_driver_data *i965 = i965_driver_data(ctx);
464     struct i965_render_state *render_state = &i965->render_state;
465     struct i965_sampler_state *sampler_state;
466     int i;
467     
468     assert(render_state->wm.sampler_count > 0);
469     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
470
471     dri_bo_map(render_state->wm.sampler, 1);
472     assert(render_state->wm.sampler->virtual);
473     sampler_state = render_state->wm.sampler->virtual;
474     for (i = 0; i < render_state->wm.sampler_count; i++) {
475         memset(sampler_state, 0, sizeof(*sampler_state));
476         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
477         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
478         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
479         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
480         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
481         sampler_state++;
482     }
483
484     dri_bo_unmap(render_state->wm.sampler);
485 }
486 static void
487 i965_subpic_render_wm_unit(VADriverContextP ctx)
488 {
489     struct i965_driver_data *i965 = i965_driver_data(ctx);
490     struct i965_render_state *render_state = &i965->render_state;
491     struct i965_wm_unit_state *wm_state;
492
493     assert(render_state->wm.sampler);
494
495     dri_bo_map(render_state->wm.state, 1);
496     assert(render_state->wm.state->virtual);
497     wm_state = render_state->wm.state->virtual;
498     memset(wm_state, 0, sizeof(*wm_state));
499
500     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
501     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
502
503     wm_state->thread1.single_program_flow = 1; /* XXX */
504
505     if (IS_IRONLAKE(i965->intel.device_id))
506         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
507     else
508         wm_state->thread1.binding_table_entry_count = 7;
509
510     wm_state->thread2.scratch_space_base_pointer = 0;
511     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
512
513     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
514     wm_state->thread3.const_urb_entry_read_length = 4;
515     wm_state->thread3.const_urb_entry_read_offset = 0;
516     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
517     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
518
519     wm_state->wm4.stats_enable = 0;
520     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
521
522     if (IS_IRONLAKE(i965->intel.device_id)) {
523         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
524     } else {
525         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
526     }
527
528     wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
529     wm_state->wm5.thread_dispatch_enable = 1;
530     wm_state->wm5.enable_16_pix = 1;
531     wm_state->wm5.enable_8_pix = 0;
532     wm_state->wm5.early_depth_test = 1;
533
534     dri_bo_emit_reloc(render_state->wm.state,
535                       I915_GEM_DOMAIN_INSTRUCTION, 0,
536                       wm_state->thread0.grf_reg_count << 1,
537                       offsetof(struct i965_wm_unit_state, thread0),
538                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
539
540     dri_bo_emit_reloc(render_state->wm.state,
541                       I915_GEM_DOMAIN_INSTRUCTION, 0,
542                       wm_state->wm4.sampler_count << 2,
543                       offsetof(struct i965_wm_unit_state, wm4),
544                       render_state->wm.sampler);
545
546     dri_bo_unmap(render_state->wm.state);
547 }
548
549
550 static void
551 i965_render_wm_unit(VADriverContextP ctx)
552 {
553     struct i965_driver_data *i965 = i965_driver_data(ctx);
554     struct i965_render_state *render_state = &i965->render_state;
555     struct i965_wm_unit_state *wm_state;
556
557     assert(render_state->wm.sampler);
558
559     dri_bo_map(render_state->wm.state, 1);
560     assert(render_state->wm.state->virtual);
561     wm_state = render_state->wm.state->virtual;
562     memset(wm_state, 0, sizeof(*wm_state));
563
564     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
565     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
566
567     wm_state->thread1.single_program_flow = 1; /* XXX */
568
569     if (IS_IRONLAKE(i965->intel.device_id))
570         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
571     else
572         wm_state->thread1.binding_table_entry_count = 7;
573
574     wm_state->thread2.scratch_space_base_pointer = 0;
575     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
576
577     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
578     wm_state->thread3.const_urb_entry_read_length = 4;
579     wm_state->thread3.const_urb_entry_read_offset = 0;
580     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
581     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
582
583     wm_state->wm4.stats_enable = 0;
584     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
585
586     if (IS_IRONLAKE(i965->intel.device_id)) {
587         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
588     } else {
589         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
590     }
591
592     wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
593     wm_state->wm5.thread_dispatch_enable = 1;
594     wm_state->wm5.enable_16_pix = 1;
595     wm_state->wm5.enable_8_pix = 0;
596     wm_state->wm5.early_depth_test = 1;
597
598     dri_bo_emit_reloc(render_state->wm.state,
599                       I915_GEM_DOMAIN_INSTRUCTION, 0,
600                       wm_state->thread0.grf_reg_count << 1,
601                       offsetof(struct i965_wm_unit_state, thread0),
602                       render_state->render_kernels[PS_KERNEL].bo);
603
604     dri_bo_emit_reloc(render_state->wm.state,
605                       I915_GEM_DOMAIN_INSTRUCTION, 0,
606                       wm_state->wm4.sampler_count << 2,
607                       offsetof(struct i965_wm_unit_state, wm4),
608                       render_state->wm.sampler);
609
610     dri_bo_unmap(render_state->wm.state);
611 }
612
613 static void 
614 i965_render_cc_viewport(VADriverContextP ctx)
615 {
616     struct i965_driver_data *i965 = i965_driver_data(ctx);
617     struct i965_render_state *render_state = &i965->render_state;
618     struct i965_cc_viewport *cc_viewport;
619
620     dri_bo_map(render_state->cc.viewport, 1);
621     assert(render_state->cc.viewport->virtual);
622     cc_viewport = render_state->cc.viewport->virtual;
623     memset(cc_viewport, 0, sizeof(*cc_viewport));
624     
625     cc_viewport->min_depth = -1.e35;
626     cc_viewport->max_depth = 1.e35;
627
628     dri_bo_unmap(render_state->cc.viewport);
629 }
630
631 static void 
632 i965_subpic_render_cc_unit(VADriverContextP ctx)
633 {
634     struct i965_driver_data *i965 = i965_driver_data(ctx);
635     struct i965_render_state *render_state = &i965->render_state;
636     struct i965_cc_unit_state *cc_state;
637
638     assert(render_state->cc.viewport);
639
640     dri_bo_map(render_state->cc.state, 1);
641     assert(render_state->cc.state->virtual);
642     cc_state = render_state->cc.state->virtual;
643     memset(cc_state, 0, sizeof(*cc_state));
644
645     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
646     cc_state->cc2.depth_test = 0;       /* disable depth test */
647     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
648     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
649     cc_state->cc3.blend_enable = 1;     /* enable color blend */
650     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
651     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
652     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
653     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
654
655     cc_state->cc5.dither_enable = 0;    /* disable dither */
656     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
657     cc_state->cc5.statistics_enable = 1;
658     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
659     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
660     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
661
662     cc_state->cc6.clamp_post_alpha_blend = 0; 
663     cc_state->cc6.clamp_pre_alpha_blend  =0; 
664     
665     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
666     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
667     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
668     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
669    
670     /*alpha test reference*/
671     cc_state->cc7.alpha_ref.f =0.0 ;
672
673
674     dri_bo_emit_reloc(render_state->cc.state,
675                       I915_GEM_DOMAIN_INSTRUCTION, 0,
676                       0,
677                       offsetof(struct i965_cc_unit_state, cc4),
678                       render_state->cc.viewport);
679
680     dri_bo_unmap(render_state->cc.state);
681 }
682
683
684 static void 
685 i965_render_cc_unit(VADriverContextP ctx)
686 {
687     struct i965_driver_data *i965 = i965_driver_data(ctx);
688     struct i965_render_state *render_state = &i965->render_state;
689     struct i965_cc_unit_state *cc_state;
690
691     assert(render_state->cc.viewport);
692
693     dri_bo_map(render_state->cc.state, 1);
694     assert(render_state->cc.state->virtual);
695     cc_state = render_state->cc.state->virtual;
696     memset(cc_state, 0, sizeof(*cc_state));
697
698     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
699     cc_state->cc2.depth_test = 0;       /* disable depth test */
700     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
701     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
702     cc_state->cc3.blend_enable = 0;     /* disable color blend */
703     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
704     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
705
706     cc_state->cc5.dither_enable = 0;    /* disable dither */
707     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
708     cc_state->cc5.statistics_enable = 1;
709     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
710     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
711     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
712
713     dri_bo_emit_reloc(render_state->cc.state,
714                       I915_GEM_DOMAIN_INSTRUCTION, 0,
715                       0,
716                       offsetof(struct i965_cc_unit_state, cc4),
717                       render_state->cc.viewport);
718
719     dri_bo_unmap(render_state->cc.state);
720 }
721
722 static void
723 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
724 {
725     switch (tiling) {
726     case I915_TILING_NONE:
727         ss->ss3.tiled_surface = 0;
728         ss->ss3.tile_walk = 0;
729         break;
730     case I915_TILING_X:
731         ss->ss3.tiled_surface = 1;
732         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
733         break;
734     case I915_TILING_Y:
735         ss->ss3.tiled_surface = 1;
736         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
737         break;
738     }
739 }
740
741 static void
742 i965_render_set_surface_state(
743     struct i965_surface_state *ss,
744     dri_bo                    *bo,
745     unsigned long              offset,
746     unsigned int               width,
747     unsigned int               height,
748     unsigned int               pitch,
749     unsigned int               format,
750     unsigned int               flags
751 )
752 {
753     unsigned int tiling;
754     unsigned int swizzle;
755
756     memset(ss, 0, sizeof(*ss));
757
758     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
759     case I965_PP_FLAG_BOTTOM_FIELD:
760         ss->ss0.vert_line_stride_ofs = 1;
761         /* fall-through */
762     case I965_PP_FLAG_TOP_FIELD:
763         ss->ss0.vert_line_stride = 1;
764         height /= 2;
765         break;
766     }
767
768     ss->ss0.surface_type = I965_SURFACE_2D;
769     ss->ss0.surface_format = format;
770     ss->ss0.color_blend = 1;
771
772     ss->ss1.base_addr = bo->offset + offset;
773
774     ss->ss2.width = width - 1;
775     ss->ss2.height = height - 1;
776
777     ss->ss3.pitch = pitch - 1;
778
779     dri_bo_get_tiling(bo, &tiling, &swizzle);
780     i965_render_set_surface_tiling(ss, tiling);
781 }
782
783 static void
784 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
785 {
786    switch (tiling) {
787    case I915_TILING_NONE:
788       ss->ss0.tiled_surface = 0;
789       ss->ss0.tile_walk = 0;
790       break;
791    case I915_TILING_X:
792       ss->ss0.tiled_surface = 1;
793       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
794       break;
795    case I915_TILING_Y:
796       ss->ss0.tiled_surface = 1;
797       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
798       break;
799    }
800 }
801
802 static void
803 gen8_render_set_surface_tiling(struct gen8_surface_state *ss, uint32_t tiling)
804 {
805    switch (tiling) {
806    case I915_TILING_NONE:
807       ss->ss0.tiled_surface = 0;
808       ss->ss0.tile_walk = 0;
809       break;
810    case I915_TILING_X:
811       ss->ss0.tiled_surface = 1;
812       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
813       break;
814    case I915_TILING_Y:
815       ss->ss0.tiled_surface = 1;
816       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
817       break;
818    }
819 }
820
821 /* Set "Shader Channel Select" */
822 void
823 gen7_render_set_surface_scs(struct gen7_surface_state *ss)
824 {
825     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
826     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
827     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
828     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
829 }
830
831 /* Set "Shader Channel Select" for GEN8+ */
832 void
833 gen8_render_set_surface_scs(struct gen8_surface_state *ss)
834 {
835     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
836     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
837     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
838     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
839 }
840
841 static void
842 gen7_render_set_surface_state(
843     struct gen7_surface_state *ss,
844     dri_bo                    *bo,
845     unsigned long              offset,
846     int                        width,
847     int                        height,
848     int                        pitch,
849     int                        format,
850     unsigned int               flags
851 )
852 {
853     unsigned int tiling;
854     unsigned int swizzle;
855
856     memset(ss, 0, sizeof(*ss));
857
858     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
859     case I965_PP_FLAG_BOTTOM_FIELD:
860         ss->ss0.vert_line_stride_ofs = 1;
861         /* fall-through */
862     case I965_PP_FLAG_TOP_FIELD:
863         ss->ss0.vert_line_stride = 1;
864         height /= 2;
865         break;
866     }
867
868     ss->ss0.surface_type = I965_SURFACE_2D;
869     ss->ss0.surface_format = format;
870
871     ss->ss1.base_addr = bo->offset + offset;
872
873     ss->ss2.width = width - 1;
874     ss->ss2.height = height - 1;
875
876     ss->ss3.pitch = pitch - 1;
877
878     dri_bo_get_tiling(bo, &tiling, &swizzle);
879     gen7_render_set_surface_tiling(ss, tiling);
880 }
881
882
883 static void
884 gen8_render_set_surface_state(
885     struct gen8_surface_state *ss,
886     dri_bo                    *bo,
887     unsigned long              offset,
888     int                        width,
889     int                        height,
890     int                        pitch,
891     int                        format,
892     unsigned int               flags
893 )
894 {
895     unsigned int tiling;
896     unsigned int swizzle;
897
898     memset(ss, 0, sizeof(*ss));
899
900     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
901     case I965_PP_FLAG_BOTTOM_FIELD:
902         ss->ss0.vert_line_stride_ofs = 1;
903         /* fall-through */
904     case I965_PP_FLAG_TOP_FIELD:
905         ss->ss0.vert_line_stride = 1;
906         height /= 2;
907         break;
908     }
909
910     ss->ss0.surface_type = I965_SURFACE_2D;
911     ss->ss0.surface_format = format;
912
913     ss->ss8.base_addr = bo->offset + offset;
914
915     ss->ss2.width = width - 1;
916     ss->ss2.height = height - 1;
917
918     ss->ss3.pitch = pitch - 1;
919
920     /* Always set 1(align 4 mode) per B-spec */
921     ss->ss0.vertical_alignment = 1;
922     ss->ss0.horizontal_alignment = 1;
923
924     dri_bo_get_tiling(bo, &tiling, &swizzle);
925     gen8_render_set_surface_tiling(ss, tiling);
926 }
927
928 static void
929 i965_render_src_surface_state(
930     VADriverContextP ctx, 
931     int              index,
932     dri_bo          *region,
933     unsigned long    offset,
934     int              w,
935     int              h,
936     int              pitch,
937     int              format,
938     unsigned int     flags
939 )
940 {
941     struct i965_driver_data *i965 = i965_driver_data(ctx);  
942     struct i965_render_state *render_state = &i965->render_state;
943     void *ss;
944     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
945
946     assert(index < MAX_RENDER_SURFACES);
947
948     dri_bo_map(ss_bo, 1);
949     assert(ss_bo->virtual);
950     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
951
952     if (IS_GEN8(i965->intel.device_id)) {
953         gen8_render_set_surface_state(ss,
954                                       region, offset,
955                                       w, h,
956                                       pitch, format, flags);
957         gen8_render_set_surface_scs(ss);
958         dri_bo_emit_reloc(ss_bo,
959                           I915_GEM_DOMAIN_SAMPLER, 0,
960                           offset,
961                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
962                           region);
963     } else  if (IS_GEN7(i965->intel.device_id)) {
964         gen7_render_set_surface_state(ss,
965                                       region, offset,
966                                       w, h,
967                                       pitch, format, flags);
968         if (IS_HASWELL(i965->intel.device_id))
969             gen7_render_set_surface_scs(ss);
970         dri_bo_emit_reloc(ss_bo,
971                           I915_GEM_DOMAIN_SAMPLER, 0,
972                           offset,
973                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
974                           region);
975     } else {
976         i965_render_set_surface_state(ss,
977                                       region, offset,
978                                       w, h,
979                                       pitch, format, flags);
980         dri_bo_emit_reloc(ss_bo,
981                           I915_GEM_DOMAIN_SAMPLER, 0,
982                           offset,
983                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
984                           region);
985     }
986
987     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
988     dri_bo_unmap(ss_bo);
989     render_state->wm.sampler_count++;
990 }
991
992 static void
993 i965_render_src_surfaces_state(
994     VADriverContextP ctx,
995     struct object_surface *obj_surface,
996     unsigned int     flags
997 )
998 {
999     int region_pitch;
1000     int rw, rh;
1001     dri_bo *region;
1002
1003     region_pitch = obj_surface->width;
1004     rw = obj_surface->orig_width;
1005     rh = obj_surface->orig_height;
1006     region = obj_surface->bo;
1007
1008     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
1009     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
1010
1011     if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) {
1012         i965_render_src_surface_state(ctx, 3, region,
1013                                       region_pitch * obj_surface->y_cb_offset,
1014                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1015                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
1016         i965_render_src_surface_state(ctx, 4, region,
1017                                       region_pitch * obj_surface->y_cb_offset,
1018                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1019                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
1020     } else {
1021         i965_render_src_surface_state(ctx, 3, region,
1022                                       region_pitch * obj_surface->y_cb_offset,
1023                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1024                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
1025         i965_render_src_surface_state(ctx, 4, region,
1026                                       region_pitch * obj_surface->y_cb_offset,
1027                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1028                                       I965_SURFACEFORMAT_R8_UNORM, flags);
1029         i965_render_src_surface_state(ctx, 5, region,
1030                                       region_pitch * obj_surface->y_cr_offset,
1031                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1032                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
1033         i965_render_src_surface_state(ctx, 6, region,
1034                                       region_pitch * obj_surface->y_cr_offset,
1035                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1036                                       I965_SURFACEFORMAT_R8_UNORM, flags);
1037     }
1038 }
1039
1040 static void
1041 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
1042                                       struct object_surface *obj_surface)
1043 {
1044     dri_bo *subpic_region;
1045     unsigned int index = obj_surface->subpic_render_idx;
1046     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1047     struct object_image *obj_image = obj_subpic->obj_image;
1048
1049     assert(obj_surface);
1050     assert(obj_surface->bo);
1051     subpic_region = obj_image->bo;
1052     /*subpicture surface*/
1053     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
1054     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
1055 }
1056
1057 static void
1058 i965_render_dest_surface_state(VADriverContextP ctx, int index)
1059 {
1060     struct i965_driver_data *i965 = i965_driver_data(ctx);  
1061     struct i965_render_state *render_state = &i965->render_state;
1062     struct intel_region *dest_region = render_state->draw_region;
1063     void *ss;
1064     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
1065     int format;
1066     assert(index < MAX_RENDER_SURFACES);
1067
1068     if (dest_region->cpp == 2) {
1069         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
1070     } else {
1071         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
1072     }
1073
1074     dri_bo_map(ss_bo, 1);
1075     assert(ss_bo->virtual);
1076     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
1077
1078     if (IS_GEN8(i965->intel.device_id)) {
1079         gen8_render_set_surface_state(ss,
1080                                       dest_region->bo, 0,
1081                                       dest_region->width, dest_region->height,
1082                                       dest_region->pitch, format, 0);
1083         gen8_render_set_surface_scs(ss);
1084         dri_bo_emit_reloc(ss_bo,
1085                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1086                           0,
1087                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
1088                           dest_region->bo);
1089     } else if (IS_GEN7(i965->intel.device_id)) {
1090         gen7_render_set_surface_state(ss,
1091                                       dest_region->bo, 0,
1092                                       dest_region->width, dest_region->height,
1093                                       dest_region->pitch, format, 0);
1094         if (IS_HASWELL(i965->intel.device_id))
1095             gen7_render_set_surface_scs(ss);
1096         dri_bo_emit_reloc(ss_bo,
1097                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1098                           0,
1099                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1100                           dest_region->bo);
1101     } else {
1102         i965_render_set_surface_state(ss,
1103                                       dest_region->bo, 0,
1104                                       dest_region->width, dest_region->height,
1105                                       dest_region->pitch, format, 0);
1106         dri_bo_emit_reloc(ss_bo,
1107                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1108                           0,
1109                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1110                           dest_region->bo);
1111     }
1112
1113     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1114     dri_bo_unmap(ss_bo);
1115 }
1116
1117 static void
1118 i965_fill_vertex_buffer(
1119     VADriverContextP ctx,
1120     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
1121     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
1122 )
1123 {
1124     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1125     float vb[12];
1126
1127     enum { X1, Y1, X2, Y2 };
1128
1129     static const unsigned int g_rotation_indices[][6] = {
1130         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
1131         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
1132         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
1133         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
1134     };
1135
1136     const unsigned int * const rotation_indices =
1137         g_rotation_indices[i965->rotation_attrib->value];
1138
1139     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
1140     vb[1]  = tex_coords[rotation_indices[1]];
1141     vb[2]  = vid_coords[X2];
1142     vb[3]  = vid_coords[Y2];
1143
1144     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
1145     vb[5]  = tex_coords[rotation_indices[3]];
1146     vb[6]  = vid_coords[X1];
1147     vb[7]  = vid_coords[Y2];
1148
1149     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
1150     vb[9]  = tex_coords[rotation_indices[5]];
1151     vb[10] = vid_coords[X1];
1152     vb[11] = vid_coords[Y1];
1153
1154     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
1155 }
1156
1157 static void 
1158 i965_subpic_render_upload_vertex(VADriverContextP ctx,
1159                                  struct object_surface *obj_surface,
1160                                  const VARectangle *output_rect)
1161 {    
1162     unsigned int index = obj_surface->subpic_render_idx;
1163     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
1164     float tex_coords[4], vid_coords[4];
1165     VARectangle dst_rect;
1166
1167     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
1168         dst_rect = obj_subpic->dst_rect;
1169     else {
1170         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
1171         const float sy  = (float)output_rect->height / obj_surface->orig_height;
1172         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
1173         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
1174         dst_rect.width  = sx * obj_subpic->dst_rect.width;
1175         dst_rect.height = sy * obj_subpic->dst_rect.height;
1176     }
1177
1178     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
1179     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
1180     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
1181     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
1182
1183     vid_coords[0] = dst_rect.x;
1184     vid_coords[1] = dst_rect.y;
1185     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
1186     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
1187
1188     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1189 }
1190
1191 static void 
1192 i965_render_upload_vertex(
1193     VADriverContextP   ctx,
1194     struct object_surface *obj_surface,
1195     const VARectangle *src_rect,
1196     const VARectangle *dst_rect
1197 )
1198 {
1199     struct i965_driver_data *i965 = i965_driver_data(ctx);
1200     struct i965_render_state *render_state = &i965->render_state;
1201     struct intel_region *dest_region = render_state->draw_region;
1202     float tex_coords[4], vid_coords[4];
1203     int width, height;
1204
1205     width  = obj_surface->orig_width;
1206     height = obj_surface->orig_height;
1207
1208     tex_coords[0] = (float)src_rect->x / width;
1209     tex_coords[1] = (float)src_rect->y / height;
1210     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
1211     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
1212
1213     vid_coords[0] = dest_region->x + dst_rect->x;
1214     vid_coords[1] = dest_region->y + dst_rect->y;
1215     vid_coords[2] = vid_coords[0] + dst_rect->width;
1216     vid_coords[3] = vid_coords[1] + dst_rect->height;
1217
1218     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1219 }
1220
1221 #define PI  3.1415926
1222
1223 static void
1224 i965_render_upload_constants(VADriverContextP ctx,
1225                              struct object_surface *obj_surface,
1226                              unsigned int flags)
1227 {
1228     struct i965_driver_data *i965 = i965_driver_data(ctx);
1229     struct i965_render_state *render_state = &i965->render_state;
1230     unsigned short *constant_buffer;
1231     float *color_balance_base;
1232     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
1233     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
1234     float hue = (float)i965->hue_attrib->value / 180 * PI;
1235     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
1236     float *yuv_to_rgb;
1237     unsigned int color_flag;
1238
1239     dri_bo_map(render_state->curbe.bo, 1);
1240     assert(render_state->curbe.bo->virtual);
1241     constant_buffer = render_state->curbe.bo->virtual;
1242
1243     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
1244         assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0'));
1245
1246         constant_buffer[0] = 2;
1247     } else {
1248         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'))
1249             constant_buffer[0] = 1;
1250         else
1251             constant_buffer[0] = 0;
1252     }
1253
1254     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
1255         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
1256         i965->hue_attrib->value == DEFAULT_HUE &&
1257         i965->saturation_attrib->value == DEFAULT_SATURATION)
1258         constant_buffer[1] = 1; /* skip color balance transformation */
1259     else
1260         constant_buffer[1] = 0;
1261
1262     color_balance_base = (float *)constant_buffer + 4;
1263     *color_balance_base++ = contrast;
1264     *color_balance_base++ = brightness;
1265     *color_balance_base++ = cos(hue) * contrast * saturation;
1266     *color_balance_base++ = sin(hue) * contrast * saturation;
1267
1268     color_flag = flags & VA_SRC_COLOR_MASK;
1269     yuv_to_rgb = (float *)constant_buffer + 8;
1270     if (color_flag == VA_SRC_BT709)
1271         memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709));
1272     else if (color_flag == VA_SRC_SMPTE_240)
1273         memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240));
1274     else
1275         memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601));
1276
1277     dri_bo_unmap(render_state->curbe.bo);
1278 }
1279
1280 static void
1281 i965_subpic_render_upload_constants(VADriverContextP ctx,
1282                                     struct object_surface *obj_surface)
1283 {
1284     struct i965_driver_data *i965 = i965_driver_data(ctx);
1285     struct i965_render_state *render_state = &i965->render_state;
1286     float *constant_buffer;
1287     float global_alpha = 1.0;
1288     unsigned int index = obj_surface->subpic_render_idx;
1289     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1290     
1291     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1292         global_alpha = obj_subpic->global_alpha;
1293     }
1294
1295     dri_bo_map(render_state->curbe.bo, 1);
1296
1297     assert(render_state->curbe.bo->virtual);
1298     constant_buffer = render_state->curbe.bo->virtual;
1299     *constant_buffer = global_alpha;
1300
1301     dri_bo_unmap(render_state->curbe.bo);
1302 }
1303  
1304 static void
1305 i965_surface_render_state_setup(
1306     VADriverContextP   ctx,
1307     struct object_surface *obj_surface,
1308     const VARectangle *src_rect,
1309     const VARectangle *dst_rect,
1310     unsigned int       flags
1311 )
1312 {
1313     i965_render_vs_unit(ctx);
1314     i965_render_sf_unit(ctx);
1315     i965_render_dest_surface_state(ctx, 0);
1316     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1317     i965_render_sampler(ctx);
1318     i965_render_wm_unit(ctx);
1319     i965_render_cc_viewport(ctx);
1320     i965_render_cc_unit(ctx);
1321     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1322     i965_render_upload_constants(ctx, obj_surface, flags);
1323 }
1324
1325 static void
1326 i965_subpic_render_state_setup(
1327     VADriverContextP   ctx,
1328     struct object_surface *obj_surface,
1329     const VARectangle *src_rect,
1330     const VARectangle *dst_rect
1331 )
1332 {
1333     i965_render_vs_unit(ctx);
1334     i965_render_sf_unit(ctx);
1335     i965_render_dest_surface_state(ctx, 0);
1336     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
1337     i965_render_sampler(ctx);
1338     i965_subpic_render_wm_unit(ctx);
1339     i965_render_cc_viewport(ctx);
1340     i965_subpic_render_cc_unit(ctx);
1341     i965_subpic_render_upload_constants(ctx, obj_surface);
1342     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1343 }
1344
1345
1346 static void
1347 i965_render_pipeline_select(VADriverContextP ctx)
1348 {
1349     struct i965_driver_data *i965 = i965_driver_data(ctx);
1350     struct intel_batchbuffer *batch = i965->batch;
1351  
1352     BEGIN_BATCH(batch, 1);
1353     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1354     ADVANCE_BATCH(batch);
1355 }
1356
1357 static void
1358 i965_render_state_sip(VADriverContextP ctx)
1359 {
1360     struct i965_driver_data *i965 = i965_driver_data(ctx);
1361     struct intel_batchbuffer *batch = i965->batch;
1362
1363     BEGIN_BATCH(batch, 2);
1364     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1365     OUT_BATCH(batch, 0);
1366     ADVANCE_BATCH(batch);
1367 }
1368
1369 static void
1370 i965_render_state_base_address(VADriverContextP ctx)
1371 {
1372     struct i965_driver_data *i965 = i965_driver_data(ctx);
1373     struct intel_batchbuffer *batch = i965->batch;
1374     struct i965_render_state *render_state = &i965->render_state;
1375
1376     if (IS_IRONLAKE(i965->intel.device_id)) {
1377         BEGIN_BATCH(batch, 8);
1378         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1379         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1380         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1381         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1382         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1383         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1384         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1385         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1386         ADVANCE_BATCH(batch);
1387     } else {
1388         BEGIN_BATCH(batch, 6);
1389         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1390         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1391         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1392         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1393         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1394         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1395         ADVANCE_BATCH(batch);
1396     }
1397 }
1398
1399 static void
1400 i965_render_binding_table_pointers(VADriverContextP ctx)
1401 {
1402     struct i965_driver_data *i965 = i965_driver_data(ctx);
1403     struct intel_batchbuffer *batch = i965->batch;
1404
1405     BEGIN_BATCH(batch, 6);
1406     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1407     OUT_BATCH(batch, 0); /* vs */
1408     OUT_BATCH(batch, 0); /* gs */
1409     OUT_BATCH(batch, 0); /* clip */
1410     OUT_BATCH(batch, 0); /* sf */
1411     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1412     ADVANCE_BATCH(batch);
1413 }
1414
1415 static void 
1416 i965_render_constant_color(VADriverContextP ctx)
1417 {
1418     struct i965_driver_data *i965 = i965_driver_data(ctx);
1419     struct intel_batchbuffer *batch = i965->batch;
1420
1421     BEGIN_BATCH(batch, 5);
1422     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1423     OUT_BATCH(batch, float_to_uint(1.0));
1424     OUT_BATCH(batch, float_to_uint(0.0));
1425     OUT_BATCH(batch, float_to_uint(1.0));
1426     OUT_BATCH(batch, float_to_uint(1.0));
1427     ADVANCE_BATCH(batch);
1428 }
1429
1430 static void
1431 i965_render_pipelined_pointers(VADriverContextP ctx)
1432 {
1433     struct i965_driver_data *i965 = i965_driver_data(ctx);
1434     struct intel_batchbuffer *batch = i965->batch;
1435     struct i965_render_state *render_state = &i965->render_state;
1436
1437     BEGIN_BATCH(batch, 7);
1438     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1439     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1440     OUT_BATCH(batch, 0);  /* disable GS */
1441     OUT_BATCH(batch, 0);  /* disable CLIP */
1442     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1443     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1444     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1445     ADVANCE_BATCH(batch);
1446 }
1447
1448 static void
1449 i965_render_urb_layout(VADriverContextP ctx)
1450 {
1451     struct i965_driver_data *i965 = i965_driver_data(ctx);
1452     struct intel_batchbuffer *batch = i965->batch;
1453     int urb_vs_start, urb_vs_size;
1454     int urb_gs_start, urb_gs_size;
1455     int urb_clip_start, urb_clip_size;
1456     int urb_sf_start, urb_sf_size;
1457     int urb_cs_start, urb_cs_size;
1458
1459     urb_vs_start = 0;
1460     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1461     urb_gs_start = urb_vs_start + urb_vs_size;
1462     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1463     urb_clip_start = urb_gs_start + urb_gs_size;
1464     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1465     urb_sf_start = urb_clip_start + urb_clip_size;
1466     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1467     urb_cs_start = urb_sf_start + urb_sf_size;
1468     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1469
1470     BEGIN_BATCH(batch, 3);
1471     OUT_BATCH(batch, 
1472               CMD_URB_FENCE |
1473               UF0_CS_REALLOC |
1474               UF0_SF_REALLOC |
1475               UF0_CLIP_REALLOC |
1476               UF0_GS_REALLOC |
1477               UF0_VS_REALLOC |
1478               1);
1479     OUT_BATCH(batch, 
1480               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1481               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1482               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1483     OUT_BATCH(batch,
1484               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1485               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1486     ADVANCE_BATCH(batch);
1487 }
1488
1489 static void 
1490 i965_render_cs_urb_layout(VADriverContextP ctx)
1491 {
1492     struct i965_driver_data *i965 = i965_driver_data(ctx);
1493     struct intel_batchbuffer *batch = i965->batch;
1494
1495     BEGIN_BATCH(batch, 2);
1496     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1497     OUT_BATCH(batch,
1498               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1499               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1500     ADVANCE_BATCH(batch);
1501 }
1502
1503 static void
1504 i965_render_constant_buffer(VADriverContextP ctx)
1505 {
1506     struct i965_driver_data *i965 = i965_driver_data(ctx);
1507     struct intel_batchbuffer *batch = i965->batch;
1508     struct i965_render_state *render_state = &i965->render_state;
1509
1510     BEGIN_BATCH(batch, 2);
1511     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1512     OUT_RELOC(batch, render_state->curbe.bo,
1513               I915_GEM_DOMAIN_INSTRUCTION, 0,
1514               URB_CS_ENTRY_SIZE - 1);
1515     ADVANCE_BATCH(batch);    
1516 }
1517
1518 static void
1519 i965_render_drawing_rectangle(VADriverContextP ctx)
1520 {
1521     struct i965_driver_data *i965 = i965_driver_data(ctx);
1522     struct intel_batchbuffer *batch = i965->batch;
1523     struct i965_render_state *render_state = &i965->render_state;
1524     struct intel_region *dest_region = render_state->draw_region;
1525
1526     BEGIN_BATCH(batch, 4);
1527     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1528     OUT_BATCH(batch, 0x00000000);
1529     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1530     OUT_BATCH(batch, 0x00000000);         
1531     ADVANCE_BATCH(batch);
1532 }
1533
1534 static void
1535 i965_render_vertex_elements(VADriverContextP ctx)
1536 {
1537     struct i965_driver_data *i965 = i965_driver_data(ctx);
1538     struct intel_batchbuffer *batch = i965->batch;
1539
1540     if (IS_IRONLAKE(i965->intel.device_id)) {
1541         BEGIN_BATCH(batch, 5);
1542         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1543         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1544         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1545                   VE0_VALID |
1546                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1547                   (0 << VE0_OFFSET_SHIFT));
1548         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1549                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1550                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1551                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1552         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1553         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1554                   VE0_VALID |
1555                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1556                   (8 << VE0_OFFSET_SHIFT));
1557         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1558                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1559                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1560                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1561         ADVANCE_BATCH(batch);
1562     } else {
1563         BEGIN_BATCH(batch, 5);
1564         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1565         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1566         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1567                   VE0_VALID |
1568                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1569                   (0 << VE0_OFFSET_SHIFT));
1570         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1571                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1572                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1573                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1574                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1575         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1576         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1577                   VE0_VALID |
1578                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1579                   (8 << VE0_OFFSET_SHIFT));
1580         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1581                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1582                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1583                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1584                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1585         ADVANCE_BATCH(batch);
1586     }
1587 }
1588
1589 static void
1590 i965_render_upload_image_palette(
1591     VADriverContextP ctx,
1592     struct object_image *obj_image,
1593     unsigned int     alpha
1594 )
1595 {
1596     struct i965_driver_data *i965 = i965_driver_data(ctx);
1597     struct intel_batchbuffer *batch = i965->batch;
1598     unsigned int i;
1599
1600     assert(obj_image);
1601
1602     if (!obj_image)
1603         return;
1604
1605     if (obj_image->image.num_palette_entries == 0)
1606         return;
1607
1608     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1609     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1610     /*fill palette*/
1611     //int32_t out[16]; //0-23:color 23-31:alpha
1612     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1613         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1614     ADVANCE_BATCH(batch);
1615 }
1616
1617 static void
1618 i965_render_startup(VADriverContextP ctx)
1619 {
1620     struct i965_driver_data *i965 = i965_driver_data(ctx);
1621     struct intel_batchbuffer *batch = i965->batch;
1622     struct i965_render_state *render_state = &i965->render_state;
1623
1624     BEGIN_BATCH(batch, 11);
1625     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1626     OUT_BATCH(batch, 
1627               (0 << VB0_BUFFER_INDEX_SHIFT) |
1628               VB0_VERTEXDATA |
1629               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1630     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1631
1632     if (IS_IRONLAKE(i965->intel.device_id))
1633         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1634     else
1635         OUT_BATCH(batch, 3);
1636
1637     OUT_BATCH(batch, 0);
1638
1639     OUT_BATCH(batch, 
1640               CMD_3DPRIMITIVE |
1641               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1642               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1643               (0 << 9) |
1644               4);
1645     OUT_BATCH(batch, 3); /* vertex count per instance */
1646     OUT_BATCH(batch, 0); /* start vertex offset */
1647     OUT_BATCH(batch, 1); /* single instance */
1648     OUT_BATCH(batch, 0); /* start instance location */
1649     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1650     ADVANCE_BATCH(batch);
1651 }
1652
1653 static void 
1654 i965_clear_dest_region(VADriverContextP ctx)
1655 {
1656     struct i965_driver_data *i965 = i965_driver_data(ctx);
1657     struct intel_batchbuffer *batch = i965->batch;
1658     struct i965_render_state *render_state = &i965->render_state;
1659     struct intel_region *dest_region = render_state->draw_region;
1660     unsigned int blt_cmd, br13;
1661     int pitch;
1662
1663     blt_cmd = XY_COLOR_BLT_CMD;
1664     br13 = 0xf0 << 16;
1665     pitch = dest_region->pitch;
1666
1667     if (dest_region->cpp == 4) {
1668         br13 |= BR13_8888;
1669         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1670     } else {
1671         assert(dest_region->cpp == 2);
1672         br13 |= BR13_565;
1673     }
1674
1675     if (dest_region->tiling != I915_TILING_NONE) {
1676         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1677         pitch /= 4;
1678     }
1679
1680     br13 |= pitch;
1681
1682     if (IS_GEN6(i965->intel.device_id) ||
1683         IS_GEN7(i965->intel.device_id) ||
1684         IS_GEN8(i965->intel.device_id)) {
1685         intel_batchbuffer_start_atomic_blt(batch, 24);
1686         BEGIN_BLT_BATCH(batch, 6);
1687     } else {
1688         intel_batchbuffer_start_atomic(batch, 24);
1689         BEGIN_BATCH(batch, 6);
1690     }
1691
1692     OUT_BATCH(batch, blt_cmd);
1693     OUT_BATCH(batch, br13);
1694     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1695     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1696               (dest_region->x + dest_region->width));
1697     OUT_RELOC(batch, dest_region->bo, 
1698               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1699               0);
1700     OUT_BATCH(batch, 0x0);
1701     ADVANCE_BATCH(batch);
1702     intel_batchbuffer_end_atomic(batch);
1703 }
1704
1705 static void 
1706 gen8_clear_dest_region(VADriverContextP ctx)
1707 {
1708     struct i965_driver_data *i965 = i965_driver_data(ctx);
1709     struct intel_batchbuffer *batch = i965->batch;
1710     struct i965_render_state *render_state = &i965->render_state;
1711     struct intel_region *dest_region = render_state->draw_region;
1712     unsigned int blt_cmd, br13;
1713     int pitch;
1714
1715     blt_cmd = GEN8_XY_COLOR_BLT_CMD;
1716     br13 = 0xf0 << 16;
1717     pitch = dest_region->pitch;
1718
1719     if (dest_region->cpp == 4) {
1720         br13 |= BR13_8888;
1721         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1722     } else {
1723         assert(dest_region->cpp == 2);
1724         br13 |= BR13_565;
1725     }
1726
1727     if (dest_region->tiling != I915_TILING_NONE) {
1728         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1729         pitch /= 4;
1730     }
1731
1732     br13 |= pitch;
1733
1734     intel_batchbuffer_start_atomic_blt(batch, 24);
1735     BEGIN_BLT_BATCH(batch, 7);
1736
1737     OUT_BATCH(batch, blt_cmd);
1738     OUT_BATCH(batch, br13);
1739     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1740     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1741               (dest_region->x + dest_region->width));
1742     OUT_RELOC(batch, dest_region->bo, 
1743               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1744               0);
1745     OUT_BATCH(batch, 0x0);
1746     OUT_BATCH(batch, 0x0);
1747     ADVANCE_BATCH(batch);
1748     intel_batchbuffer_end_atomic(batch);
1749 }
1750
1751 static void
1752 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1753 {
1754     struct i965_driver_data *i965 = i965_driver_data(ctx);
1755     struct intel_batchbuffer *batch = i965->batch;
1756
1757     i965_clear_dest_region(ctx);
1758     intel_batchbuffer_start_atomic(batch, 0x1000);
1759     intel_batchbuffer_emit_mi_flush(batch);
1760     i965_render_pipeline_select(ctx);
1761     i965_render_state_sip(ctx);
1762     i965_render_state_base_address(ctx);
1763     i965_render_binding_table_pointers(ctx);
1764     i965_render_constant_color(ctx);
1765     i965_render_pipelined_pointers(ctx);
1766     i965_render_urb_layout(ctx);
1767     i965_render_cs_urb_layout(ctx);
1768     i965_render_constant_buffer(ctx);
1769     i965_render_drawing_rectangle(ctx);
1770     i965_render_vertex_elements(ctx);
1771     i965_render_startup(ctx);
1772     intel_batchbuffer_end_atomic(batch);
1773 }
1774
1775 static void
1776 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1777 {
1778     struct i965_driver_data *i965 = i965_driver_data(ctx);
1779     struct intel_batchbuffer *batch = i965->batch;
1780
1781     intel_batchbuffer_start_atomic(batch, 0x1000);
1782     intel_batchbuffer_emit_mi_flush(batch);
1783     i965_render_pipeline_select(ctx);
1784     i965_render_state_sip(ctx);
1785     i965_render_state_base_address(ctx);
1786     i965_render_binding_table_pointers(ctx);
1787     i965_render_constant_color(ctx);
1788     i965_render_pipelined_pointers(ctx);
1789     i965_render_urb_layout(ctx);
1790     i965_render_cs_urb_layout(ctx);
1791     i965_render_constant_buffer(ctx);
1792     i965_render_drawing_rectangle(ctx);
1793     i965_render_vertex_elements(ctx);
1794     i965_render_startup(ctx);
1795     intel_batchbuffer_end_atomic(batch);
1796 }
1797
1798
1799 static void 
1800 i965_render_initialize(VADriverContextP ctx)
1801 {
1802     struct i965_driver_data *i965 = i965_driver_data(ctx);
1803     struct i965_render_state *render_state = &i965->render_state;
1804     dri_bo *bo;
1805
1806     /* VERTEX BUFFER */
1807     dri_bo_unreference(render_state->vb.vertex_buffer);
1808     bo = dri_bo_alloc(i965->intel.bufmgr,
1809                       "vertex buffer",
1810                       4096,
1811                       4096);
1812     assert(bo);
1813     render_state->vb.vertex_buffer = bo;
1814
1815     /* VS */
1816     dri_bo_unreference(render_state->vs.state);
1817     bo = dri_bo_alloc(i965->intel.bufmgr,
1818                       "vs state",
1819                       sizeof(struct i965_vs_unit_state),
1820                       64);
1821     assert(bo);
1822     render_state->vs.state = bo;
1823
1824     /* GS */
1825     /* CLIP */
1826     /* SF */
1827     dri_bo_unreference(render_state->sf.state);
1828     bo = dri_bo_alloc(i965->intel.bufmgr,
1829                       "sf state",
1830                       sizeof(struct i965_sf_unit_state),
1831                       64);
1832     assert(bo);
1833     render_state->sf.state = bo;
1834
1835     /* WM */
1836     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1837     bo = dri_bo_alloc(i965->intel.bufmgr,
1838                       "surface state & binding table",
1839                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1840                       4096);
1841     assert(bo);
1842     render_state->wm.surface_state_binding_table_bo = bo;
1843
1844     dri_bo_unreference(render_state->wm.sampler);
1845     bo = dri_bo_alloc(i965->intel.bufmgr,
1846                       "sampler state",
1847                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1848                       64);
1849     assert(bo);
1850     render_state->wm.sampler = bo;
1851     render_state->wm.sampler_count = 0;
1852
1853     dri_bo_unreference(render_state->wm.state);
1854     bo = dri_bo_alloc(i965->intel.bufmgr,
1855                       "wm state",
1856                       sizeof(struct i965_wm_unit_state),
1857                       64);
1858     assert(bo);
1859     render_state->wm.state = bo;
1860
1861     /* COLOR CALCULATOR */
1862     dri_bo_unreference(render_state->cc.state);
1863     bo = dri_bo_alloc(i965->intel.bufmgr,
1864                       "color calc state",
1865                       sizeof(struct i965_cc_unit_state),
1866                       64);
1867     assert(bo);
1868     render_state->cc.state = bo;
1869
1870     dri_bo_unreference(render_state->cc.viewport);
1871     bo = dri_bo_alloc(i965->intel.bufmgr,
1872                       "cc viewport",
1873                       sizeof(struct i965_cc_viewport),
1874                       64);
1875     assert(bo);
1876     render_state->cc.viewport = bo;
1877 }
1878
1879 static void
1880 i965_render_put_surface(
1881     VADriverContextP   ctx,
1882     struct object_surface *obj_surface,
1883     const VARectangle *src_rect,
1884     const VARectangle *dst_rect,
1885     unsigned int       flags
1886 )
1887 {
1888     struct i965_driver_data *i965 = i965_driver_data(ctx);
1889     struct intel_batchbuffer *batch = i965->batch;
1890
1891     i965_render_initialize(ctx);
1892     i965_surface_render_state_setup(ctx, obj_surface, src_rect, dst_rect, flags);
1893     i965_surface_render_pipeline_setup(ctx);
1894     intel_batchbuffer_flush(batch);
1895 }
1896
1897 static void
1898 i965_render_put_subpicture(
1899     VADriverContextP   ctx,
1900     struct object_surface *obj_surface,
1901     const VARectangle *src_rect,
1902     const VARectangle *dst_rect
1903 )
1904 {
1905     struct i965_driver_data *i965 = i965_driver_data(ctx);
1906     struct intel_batchbuffer *batch = i965->batch;
1907     unsigned int index = obj_surface->subpic_render_idx;
1908     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1909
1910     assert(obj_subpic);
1911
1912     i965_render_initialize(ctx);
1913     i965_subpic_render_state_setup(ctx, obj_surface, src_rect, dst_rect);
1914     i965_subpic_render_pipeline_setup(ctx);
1915     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1916     intel_batchbuffer_flush(batch);
1917 }
1918
1919 /*
1920  * for GEN6+
1921  */
1922 static void 
1923 gen6_render_initialize(VADriverContextP ctx)
1924 {
1925     struct i965_driver_data *i965 = i965_driver_data(ctx);
1926     struct i965_render_state *render_state = &i965->render_state;
1927     dri_bo *bo;
1928
1929     /* VERTEX BUFFER */
1930     dri_bo_unreference(render_state->vb.vertex_buffer);
1931     bo = dri_bo_alloc(i965->intel.bufmgr,
1932                       "vertex buffer",
1933                       4096,
1934                       4096);
1935     assert(bo);
1936     render_state->vb.vertex_buffer = bo;
1937
1938     /* WM */
1939     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1940     bo = dri_bo_alloc(i965->intel.bufmgr,
1941                       "surface state & binding table",
1942                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1943                       4096);
1944     assert(bo);
1945     render_state->wm.surface_state_binding_table_bo = bo;
1946
1947     dri_bo_unreference(render_state->wm.sampler);
1948     bo = dri_bo_alloc(i965->intel.bufmgr,
1949                       "sampler state",
1950                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1951                       4096);
1952     assert(bo);
1953     render_state->wm.sampler = bo;
1954     render_state->wm.sampler_count = 0;
1955
1956     /* COLOR CALCULATOR */
1957     dri_bo_unreference(render_state->cc.state);
1958     bo = dri_bo_alloc(i965->intel.bufmgr,
1959                       "color calc state",
1960                       sizeof(struct gen6_color_calc_state),
1961                       4096);
1962     assert(bo);
1963     render_state->cc.state = bo;
1964
1965     /* CC VIEWPORT */
1966     dri_bo_unreference(render_state->cc.viewport);
1967     bo = dri_bo_alloc(i965->intel.bufmgr,
1968                       "cc viewport",
1969                       sizeof(struct i965_cc_viewport),
1970                       4096);
1971     assert(bo);
1972     render_state->cc.viewport = bo;
1973
1974     /* BLEND STATE */
1975     dri_bo_unreference(render_state->cc.blend);
1976     bo = dri_bo_alloc(i965->intel.bufmgr,
1977                       "blend state",
1978                       sizeof(struct gen6_blend_state),
1979                       4096);
1980     assert(bo);
1981     render_state->cc.blend = bo;
1982
1983     /* DEPTH & STENCIL STATE */
1984     dri_bo_unreference(render_state->cc.depth_stencil);
1985     bo = dri_bo_alloc(i965->intel.bufmgr,
1986                       "depth & stencil state",
1987                       sizeof(struct gen6_depth_stencil_state),
1988                       4096);
1989     assert(bo);
1990     render_state->cc.depth_stencil = bo;
1991 }
1992
1993 static void
1994 gen6_render_color_calc_state(VADriverContextP ctx)
1995 {
1996     struct i965_driver_data *i965 = i965_driver_data(ctx);
1997     struct i965_render_state *render_state = &i965->render_state;
1998     struct gen6_color_calc_state *color_calc_state;
1999     
2000     dri_bo_map(render_state->cc.state, 1);
2001     assert(render_state->cc.state->virtual);
2002     color_calc_state = render_state->cc.state->virtual;
2003     memset(color_calc_state, 0, sizeof(*color_calc_state));
2004     color_calc_state->constant_r = 1.0;
2005     color_calc_state->constant_g = 0.0;
2006     color_calc_state->constant_b = 1.0;
2007     color_calc_state->constant_a = 1.0;
2008     dri_bo_unmap(render_state->cc.state);
2009 }
2010
2011 static void
2012 gen6_render_blend_state(VADriverContextP ctx)
2013 {
2014     struct i965_driver_data *i965 = i965_driver_data(ctx);
2015     struct i965_render_state *render_state = &i965->render_state;
2016     struct gen6_blend_state *blend_state;
2017     
2018     dri_bo_map(render_state->cc.blend, 1);
2019     assert(render_state->cc.blend->virtual);
2020     blend_state = render_state->cc.blend->virtual;
2021     memset(blend_state, 0, sizeof(*blend_state));
2022     blend_state->blend1.logic_op_enable = 1;
2023     blend_state->blend1.logic_op_func = 0xc;
2024     dri_bo_unmap(render_state->cc.blend);
2025 }
2026
2027 static void
2028 gen6_render_depth_stencil_state(VADriverContextP ctx)
2029 {
2030     struct i965_driver_data *i965 = i965_driver_data(ctx);
2031     struct i965_render_state *render_state = &i965->render_state;
2032     struct gen6_depth_stencil_state *depth_stencil_state;
2033     
2034     dri_bo_map(render_state->cc.depth_stencil, 1);
2035     assert(render_state->cc.depth_stencil->virtual);
2036     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2037     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2038     dri_bo_unmap(render_state->cc.depth_stencil);
2039 }
2040
2041 static void
2042 gen6_render_setup_states(
2043     VADriverContextP   ctx,
2044     struct object_surface *obj_surface,
2045     const VARectangle *src_rect,
2046     const VARectangle *dst_rect,
2047     unsigned int       flags
2048 )
2049 {
2050     i965_render_dest_surface_state(ctx, 0);
2051     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2052     i965_render_sampler(ctx);
2053     i965_render_cc_viewport(ctx);
2054     gen6_render_color_calc_state(ctx);
2055     gen6_render_blend_state(ctx);
2056     gen6_render_depth_stencil_state(ctx);
2057     i965_render_upload_constants(ctx, obj_surface, flags);
2058     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2059 }
2060
2061 static void
2062 gen6_emit_invarient_states(VADriverContextP ctx)
2063 {
2064     struct i965_driver_data *i965 = i965_driver_data(ctx);
2065     struct intel_batchbuffer *batch = i965->batch;
2066
2067     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2068
2069     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
2070     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2071               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2072     OUT_BATCH(batch, 0);
2073
2074     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2075     OUT_BATCH(batch, 1);
2076
2077     /* Set system instruction pointer */
2078     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2079     OUT_BATCH(batch, 0);
2080 }
2081
2082 static void
2083 gen6_emit_state_base_address(VADriverContextP ctx)
2084 {
2085     struct i965_driver_data *i965 = i965_driver_data(ctx);
2086     struct intel_batchbuffer *batch = i965->batch;
2087     struct i965_render_state *render_state = &i965->render_state;
2088
2089     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2090     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2091     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2092     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2093     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2094     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2095     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2096     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2097     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2098     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2099 }
2100
2101 static void
2102 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
2103 {
2104     struct i965_driver_data *i965 = i965_driver_data(ctx);
2105     struct intel_batchbuffer *batch = i965->batch;
2106     struct i965_render_state *render_state = &i965->render_state;
2107
2108     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
2109               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
2110               (4 - 2));
2111     OUT_BATCH(batch, 0);
2112     OUT_BATCH(batch, 0);
2113     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
2114 }
2115
2116 static void
2117 gen6_emit_urb(VADriverContextP ctx)
2118 {
2119     struct i965_driver_data *i965 = i965_driver_data(ctx);
2120     struct intel_batchbuffer *batch = i965->batch;
2121
2122     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
2123     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
2124               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
2125     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
2126               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
2127 }
2128
2129 static void
2130 gen6_emit_cc_state_pointers(VADriverContextP ctx)
2131 {
2132     struct i965_driver_data *i965 = i965_driver_data(ctx);
2133     struct intel_batchbuffer *batch = i965->batch;
2134     struct i965_render_state *render_state = &i965->render_state;
2135
2136     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
2137     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
2138     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
2139     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
2140 }
2141
2142 static void
2143 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
2144 {
2145     struct i965_driver_data *i965 = i965_driver_data(ctx);
2146     struct intel_batchbuffer *batch = i965->batch;
2147     struct i965_render_state *render_state = &i965->render_state;
2148
2149     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
2150               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
2151               (4 - 2));
2152     OUT_BATCH(batch, 0); /* VS */
2153     OUT_BATCH(batch, 0); /* GS */
2154     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
2155 }
2156
2157 static void
2158 gen6_emit_binding_table(VADriverContextP ctx)
2159 {
2160     struct i965_driver_data *i965 = i965_driver_data(ctx);
2161     struct intel_batchbuffer *batch = i965->batch;
2162
2163     /* Binding table pointers */
2164     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
2165               GEN6_BINDING_TABLE_MODIFY_PS |
2166               (4 - 2));
2167     OUT_BATCH(batch, 0);                /* vs */
2168     OUT_BATCH(batch, 0);                /* gs */
2169     /* Only the PS uses the binding table */
2170     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2171 }
2172
2173 static void
2174 gen6_emit_depth_buffer_state(VADriverContextP ctx)
2175 {
2176     struct i965_driver_data *i965 = i965_driver_data(ctx);
2177     struct intel_batchbuffer *batch = i965->batch;
2178
2179     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
2180     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
2181               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
2182     OUT_BATCH(batch, 0);
2183     OUT_BATCH(batch, 0);
2184     OUT_BATCH(batch, 0);
2185     OUT_BATCH(batch, 0);
2186     OUT_BATCH(batch, 0);
2187
2188     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
2189     OUT_BATCH(batch, 0);
2190 }
2191
2192 static void
2193 gen6_emit_drawing_rectangle(VADriverContextP ctx)
2194 {
2195     i965_render_drawing_rectangle(ctx);
2196 }
2197
2198 static void 
2199 gen6_emit_vs_state(VADriverContextP ctx)
2200 {
2201     struct i965_driver_data *i965 = i965_driver_data(ctx);
2202     struct intel_batchbuffer *batch = i965->batch;
2203
2204     /* disable VS constant buffer */
2205     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
2206     OUT_BATCH(batch, 0);
2207     OUT_BATCH(batch, 0);
2208     OUT_BATCH(batch, 0);
2209     OUT_BATCH(batch, 0);
2210         
2211     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2212     OUT_BATCH(batch, 0); /* without VS kernel */
2213     OUT_BATCH(batch, 0);
2214     OUT_BATCH(batch, 0);
2215     OUT_BATCH(batch, 0);
2216     OUT_BATCH(batch, 0); /* pass-through */
2217 }
2218
2219 static void 
2220 gen6_emit_gs_state(VADriverContextP ctx)
2221 {
2222     struct i965_driver_data *i965 = i965_driver_data(ctx);
2223     struct intel_batchbuffer *batch = i965->batch;
2224
2225     /* disable GS constant buffer */
2226     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
2227     OUT_BATCH(batch, 0);
2228     OUT_BATCH(batch, 0);
2229     OUT_BATCH(batch, 0);
2230     OUT_BATCH(batch, 0);
2231         
2232     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2233     OUT_BATCH(batch, 0); /* without GS kernel */
2234     OUT_BATCH(batch, 0);
2235     OUT_BATCH(batch, 0);
2236     OUT_BATCH(batch, 0);
2237     OUT_BATCH(batch, 0);
2238     OUT_BATCH(batch, 0); /* pass-through */
2239 }
2240
2241 static void 
2242 gen6_emit_clip_state(VADriverContextP ctx)
2243 {
2244     struct i965_driver_data *i965 = i965_driver_data(ctx);
2245     struct intel_batchbuffer *batch = i965->batch;
2246
2247     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2248     OUT_BATCH(batch, 0);
2249     OUT_BATCH(batch, 0); /* pass-through */
2250     OUT_BATCH(batch, 0);
2251 }
2252
2253 static void 
2254 gen6_emit_sf_state(VADriverContextP ctx)
2255 {
2256     struct i965_driver_data *i965 = i965_driver_data(ctx);
2257     struct intel_batchbuffer *batch = i965->batch;
2258
2259     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
2260     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
2261               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
2262               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
2263     OUT_BATCH(batch, 0);
2264     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2265     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
2266     OUT_BATCH(batch, 0);
2267     OUT_BATCH(batch, 0);
2268     OUT_BATCH(batch, 0);
2269     OUT_BATCH(batch, 0);
2270     OUT_BATCH(batch, 0); /* DW9 */
2271     OUT_BATCH(batch, 0);
2272     OUT_BATCH(batch, 0);
2273     OUT_BATCH(batch, 0);
2274     OUT_BATCH(batch, 0);
2275     OUT_BATCH(batch, 0); /* DW14 */
2276     OUT_BATCH(batch, 0);
2277     OUT_BATCH(batch, 0);
2278     OUT_BATCH(batch, 0);
2279     OUT_BATCH(batch, 0);
2280     OUT_BATCH(batch, 0); /* DW19 */
2281 }
2282
2283 static void 
2284 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
2285 {
2286     struct i965_driver_data *i965 = i965_driver_data(ctx);
2287     struct intel_batchbuffer *batch = i965->batch;
2288     struct i965_render_state *render_state = &i965->render_state;
2289
2290     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
2291               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
2292               (5 - 2));
2293     OUT_RELOC(batch, 
2294               render_state->curbe.bo,
2295               I915_GEM_DOMAIN_INSTRUCTION, 0,
2296               (URB_CS_ENTRY_SIZE-1));
2297     OUT_BATCH(batch, 0);
2298     OUT_BATCH(batch, 0);
2299     OUT_BATCH(batch, 0);
2300
2301     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
2302     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
2303               I915_GEM_DOMAIN_INSTRUCTION, 0,
2304               0);
2305     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
2306               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2307     OUT_BATCH(batch, 0);
2308     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2309     OUT_BATCH(batch, ((render_state->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2310               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2311               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2312     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2313               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2314     OUT_BATCH(batch, 0);
2315     OUT_BATCH(batch, 0);
2316 }
2317
2318 static void
2319 gen6_emit_vertex_element_state(VADriverContextP ctx)
2320 {
2321     struct i965_driver_data *i965 = i965_driver_data(ctx);
2322     struct intel_batchbuffer *batch = i965->batch;
2323
2324     /* Set up our vertex elements, sourced from the single vertex buffer. */
2325     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2326     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2327     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2328               GEN6_VE0_VALID |
2329               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2330               (0 << VE0_OFFSET_SHIFT));
2331     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2332               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2333               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2334               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2335     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2336     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2337               GEN6_VE0_VALID |
2338               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2339               (8 << VE0_OFFSET_SHIFT));
2340     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2341               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2342               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2343               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2344 }
2345
2346 static void
2347 gen6_emit_vertices(VADriverContextP ctx)
2348 {
2349     struct i965_driver_data *i965 = i965_driver_data(ctx);
2350     struct intel_batchbuffer *batch = i965->batch;
2351     struct i965_render_state *render_state = &i965->render_state;
2352
2353     BEGIN_BATCH(batch, 11);
2354     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2355     OUT_BATCH(batch, 
2356               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2357               GEN6_VB0_VERTEXDATA |
2358               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2359     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2360     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2361     OUT_BATCH(batch, 0);
2362
2363     OUT_BATCH(batch, 
2364               CMD_3DPRIMITIVE |
2365               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2366               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2367               (0 << 9) |
2368               4);
2369     OUT_BATCH(batch, 3); /* vertex count per instance */
2370     OUT_BATCH(batch, 0); /* start vertex offset */
2371     OUT_BATCH(batch, 1); /* single instance */
2372     OUT_BATCH(batch, 0); /* start instance location */
2373     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2374     ADVANCE_BATCH(batch);
2375 }
2376
2377 static void
2378 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2379 {
2380     struct i965_driver_data *i965 = i965_driver_data(ctx);
2381     struct intel_batchbuffer *batch = i965->batch;
2382
2383     intel_batchbuffer_start_atomic(batch, 0x1000);
2384     intel_batchbuffer_emit_mi_flush(batch);
2385     gen6_emit_invarient_states(ctx);
2386     gen6_emit_state_base_address(ctx);
2387     gen6_emit_viewport_state_pointers(ctx);
2388     gen6_emit_urb(ctx);
2389     gen6_emit_cc_state_pointers(ctx);
2390     gen6_emit_sampler_state_pointers(ctx);
2391     gen6_emit_vs_state(ctx);
2392     gen6_emit_gs_state(ctx);
2393     gen6_emit_clip_state(ctx);
2394     gen6_emit_sf_state(ctx);
2395     gen6_emit_wm_state(ctx, kernel);
2396     gen6_emit_binding_table(ctx);
2397     gen6_emit_depth_buffer_state(ctx);
2398     gen6_emit_drawing_rectangle(ctx);
2399     gen6_emit_vertex_element_state(ctx);
2400     gen6_emit_vertices(ctx);
2401     intel_batchbuffer_end_atomic(batch);
2402 }
2403
2404 static void
2405 gen6_render_put_surface(
2406     VADriverContextP   ctx,
2407     struct object_surface *obj_surface,
2408     const VARectangle *src_rect,
2409     const VARectangle *dst_rect,
2410     unsigned int       flags
2411 )
2412 {
2413     struct i965_driver_data *i965 = i965_driver_data(ctx);
2414     struct intel_batchbuffer *batch = i965->batch;
2415
2416     gen6_render_initialize(ctx);
2417     gen6_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2418     i965_clear_dest_region(ctx);
2419     gen6_render_emit_states(ctx, PS_KERNEL);
2420     intel_batchbuffer_flush(batch);
2421 }
2422
2423 static void
2424 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2425 {
2426     struct i965_driver_data *i965 = i965_driver_data(ctx);
2427     struct i965_render_state *render_state = &i965->render_state;
2428     struct gen6_blend_state *blend_state;
2429
2430     dri_bo_unmap(render_state->cc.state);    
2431     dri_bo_map(render_state->cc.blend, 1);
2432     assert(render_state->cc.blend->virtual);
2433     blend_state = render_state->cc.blend->virtual;
2434     memset(blend_state, 0, sizeof(*blend_state));
2435     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2436     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2437     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2438     blend_state->blend0.blend_enable = 1;
2439     blend_state->blend1.post_blend_clamp_enable = 1;
2440     blend_state->blend1.pre_blend_clamp_enable = 1;
2441     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2442     dri_bo_unmap(render_state->cc.blend);
2443 }
2444
2445 static void
2446 gen6_subpicture_render_setup_states(
2447     VADriverContextP   ctx,
2448     struct object_surface *obj_surface,
2449     const VARectangle *src_rect,
2450     const VARectangle *dst_rect
2451 )
2452 {
2453     i965_render_dest_surface_state(ctx, 0);
2454     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
2455     i965_render_sampler(ctx);
2456     i965_render_cc_viewport(ctx);
2457     gen6_render_color_calc_state(ctx);
2458     gen6_subpicture_render_blend_state(ctx);
2459     gen6_render_depth_stencil_state(ctx);
2460     i965_subpic_render_upload_constants(ctx, obj_surface);
2461     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
2462 }
2463
2464 static void
2465 gen6_render_put_subpicture(
2466     VADriverContextP   ctx,
2467     struct object_surface *obj_surface,
2468     const VARectangle *src_rect,
2469     const VARectangle *dst_rect
2470 )
2471 {
2472     struct i965_driver_data *i965 = i965_driver_data(ctx);
2473     struct intel_batchbuffer *batch = i965->batch;
2474     unsigned int index = obj_surface->subpic_render_idx;
2475     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
2476
2477     assert(obj_subpic);
2478     gen6_render_initialize(ctx);
2479     gen6_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
2480     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2481     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
2482     intel_batchbuffer_flush(batch);
2483 }
2484
2485 /*
2486  * for GEN7
2487  */
2488 static void 
2489 gen7_render_initialize(VADriverContextP ctx)
2490 {
2491     struct i965_driver_data *i965 = i965_driver_data(ctx);
2492     struct i965_render_state *render_state = &i965->render_state;
2493     dri_bo *bo;
2494
2495     /* VERTEX BUFFER */
2496     dri_bo_unreference(render_state->vb.vertex_buffer);
2497     bo = dri_bo_alloc(i965->intel.bufmgr,
2498                       "vertex buffer",
2499                       4096,
2500                       4096);
2501     assert(bo);
2502     render_state->vb.vertex_buffer = bo;
2503
2504     /* WM */
2505     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2506     bo = dri_bo_alloc(i965->intel.bufmgr,
2507                       "surface state & binding table",
2508                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2509                       4096);
2510     assert(bo);
2511     render_state->wm.surface_state_binding_table_bo = bo;
2512
2513     dri_bo_unreference(render_state->wm.sampler);
2514     bo = dri_bo_alloc(i965->intel.bufmgr,
2515                       "sampler state",
2516                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2517                       4096);
2518     assert(bo);
2519     render_state->wm.sampler = bo;
2520     render_state->wm.sampler_count = 0;
2521
2522     /* COLOR CALCULATOR */
2523     dri_bo_unreference(render_state->cc.state);
2524     bo = dri_bo_alloc(i965->intel.bufmgr,
2525                       "color calc state",
2526                       sizeof(struct gen6_color_calc_state),
2527                       4096);
2528     assert(bo);
2529     render_state->cc.state = bo;
2530
2531     /* CC VIEWPORT */
2532     dri_bo_unreference(render_state->cc.viewport);
2533     bo = dri_bo_alloc(i965->intel.bufmgr,
2534                       "cc viewport",
2535                       sizeof(struct i965_cc_viewport),
2536                       4096);
2537     assert(bo);
2538     render_state->cc.viewport = bo;
2539
2540     /* BLEND STATE */
2541     dri_bo_unreference(render_state->cc.blend);
2542     bo = dri_bo_alloc(i965->intel.bufmgr,
2543                       "blend state",
2544                       sizeof(struct gen6_blend_state),
2545                       4096);
2546     assert(bo);
2547     render_state->cc.blend = bo;
2548
2549     /* DEPTH & STENCIL STATE */
2550     dri_bo_unreference(render_state->cc.depth_stencil);
2551     bo = dri_bo_alloc(i965->intel.bufmgr,
2552                       "depth & stencil state",
2553                       sizeof(struct gen6_depth_stencil_state),
2554                       4096);
2555     assert(bo);
2556     render_state->cc.depth_stencil = bo;
2557 }
2558
2559 /*
2560  * for GEN8
2561  */
2562 #define ALIGNMENT       64
2563
2564 static void 
2565 gen8_render_initialize(VADriverContextP ctx)
2566 {
2567     struct i965_driver_data *i965 = i965_driver_data(ctx);
2568     struct i965_render_state *render_state = &i965->render_state;
2569     dri_bo *bo;
2570     int size;
2571     unsigned int end_offset;
2572
2573     /* VERTEX BUFFER */
2574     dri_bo_unreference(render_state->vb.vertex_buffer);
2575     bo = dri_bo_alloc(i965->intel.bufmgr,
2576                       "vertex buffer",
2577                       4096,
2578                       4096);
2579     assert(bo);
2580     render_state->vb.vertex_buffer = bo;
2581
2582     /* WM */
2583     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2584     bo = dri_bo_alloc(i965->intel.bufmgr,
2585                       "surface state & binding table",
2586                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2587                       4096);
2588     assert(bo);
2589     render_state->wm.surface_state_binding_table_bo = bo;
2590
2591     render_state->curbe_size = 256;
2592
2593     render_state->wm.sampler_count = 0;
2594
2595     render_state->sampler_size = MAX_SAMPLERS * sizeof(struct gen8_sampler_state);
2596
2597     render_state->cc_state_size = sizeof(struct gen6_color_calc_state);
2598
2599     render_state->cc_viewport_size = sizeof(struct i965_cc_viewport);
2600
2601     render_state->blend_state_size = sizeof(struct gen8_global_blend_state) +
2602                         16 * sizeof(struct gen8_blend_state_rt);
2603
2604     render_state->sf_clip_size = 1024;
2605
2606     render_state->scissor_size = 1024;
2607
2608     size = ALIGN(render_state->curbe_size, ALIGNMENT) +
2609         ALIGN(render_state->sampler_size, ALIGNMENT) +
2610         ALIGN(render_state->cc_viewport_size, ALIGNMENT) +
2611         ALIGN(render_state->cc_state_size, ALIGNMENT) +
2612         ALIGN(render_state->blend_state_size, ALIGNMENT) +
2613         ALIGN(render_state->sf_clip_size, ALIGNMENT) +
2614         ALIGN(render_state->scissor_size, ALIGNMENT);
2615
2616     dri_bo_unreference(render_state->dynamic_state.bo);
2617     bo = dri_bo_alloc(i965->intel.bufmgr,
2618                       "dynamic_state",
2619                       size,
2620                       4096);
2621
2622     render_state->dynamic_state.bo = bo;
2623
2624     end_offset = 0;
2625     render_state->dynamic_state.end_offset = 0;
2626
2627     /* Constant buffer offset */
2628     render_state->curbe_offset = end_offset;
2629     end_offset += ALIGN(render_state->curbe_size, ALIGNMENT);
2630
2631     /* Sampler_state  */
2632     render_state->sampler_offset = end_offset;
2633     end_offset += ALIGN(render_state->sampler_size, ALIGNMENT);
2634
2635     /* CC_VIEWPORT_state  */
2636     render_state->cc_viewport_offset = end_offset;
2637     end_offset += ALIGN(render_state->cc_viewport_size, ALIGNMENT);
2638
2639     /* CC_STATE_state  */
2640     render_state->cc_state_offset = end_offset;
2641     end_offset += ALIGN(render_state->cc_state_size, ALIGNMENT);
2642
2643     /* Blend_state  */
2644     render_state->blend_state_offset = end_offset;
2645     end_offset += ALIGN(render_state->blend_state_size, ALIGNMENT);
2646
2647     /* SF_CLIP_state  */
2648     render_state->sf_clip_offset = end_offset;
2649     end_offset += ALIGN(render_state->sf_clip_size, ALIGNMENT);
2650
2651     /* SCISSOR_state  */
2652     render_state->scissor_offset = end_offset;
2653     end_offset += ALIGN(render_state->scissor_size, ALIGNMENT);
2654
2655     /* update the end offset of dynamic_state */
2656     render_state->dynamic_state.end_offset = end_offset;
2657
2658 }
2659
2660 static void
2661 gen7_render_color_calc_state(VADriverContextP ctx)
2662 {
2663     struct i965_driver_data *i965 = i965_driver_data(ctx);
2664     struct i965_render_state *render_state = &i965->render_state;
2665     struct gen6_color_calc_state *color_calc_state;
2666     
2667     dri_bo_map(render_state->cc.state, 1);
2668     assert(render_state->cc.state->virtual);
2669     color_calc_state = render_state->cc.state->virtual;
2670     memset(color_calc_state, 0, sizeof(*color_calc_state));
2671     color_calc_state->constant_r = 1.0;
2672     color_calc_state->constant_g = 0.0;
2673     color_calc_state->constant_b = 1.0;
2674     color_calc_state->constant_a = 1.0;
2675     dri_bo_unmap(render_state->cc.state);
2676 }
2677
2678 static void
2679 gen7_render_blend_state(VADriverContextP ctx)
2680 {
2681     struct i965_driver_data *i965 = i965_driver_data(ctx);
2682     struct i965_render_state *render_state = &i965->render_state;
2683     struct gen6_blend_state *blend_state;
2684     
2685     dri_bo_map(render_state->cc.blend, 1);
2686     assert(render_state->cc.blend->virtual);
2687     blend_state = render_state->cc.blend->virtual;
2688     memset(blend_state, 0, sizeof(*blend_state));
2689     blend_state->blend1.logic_op_enable = 1;
2690     blend_state->blend1.logic_op_func = 0xc;
2691     blend_state->blend1.pre_blend_clamp_enable = 1;
2692     dri_bo_unmap(render_state->cc.blend);
2693 }
2694
2695 static void
2696 gen7_render_depth_stencil_state(VADriverContextP ctx)
2697 {
2698     struct i965_driver_data *i965 = i965_driver_data(ctx);
2699     struct i965_render_state *render_state = &i965->render_state;
2700     struct gen6_depth_stencil_state *depth_stencil_state;
2701     
2702     dri_bo_map(render_state->cc.depth_stencil, 1);
2703     assert(render_state->cc.depth_stencil->virtual);
2704     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2705     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2706     dri_bo_unmap(render_state->cc.depth_stencil);
2707 }
2708
2709 static void 
2710 gen7_render_sampler(VADriverContextP ctx)
2711 {
2712     struct i965_driver_data *i965 = i965_driver_data(ctx);
2713     struct i965_render_state *render_state = &i965->render_state;
2714     struct gen7_sampler_state *sampler_state;
2715     int i;
2716     
2717     assert(render_state->wm.sampler_count > 0);
2718     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2719
2720     dri_bo_map(render_state->wm.sampler, 1);
2721     assert(render_state->wm.sampler->virtual);
2722     sampler_state = render_state->wm.sampler->virtual;
2723     for (i = 0; i < render_state->wm.sampler_count; i++) {
2724         memset(sampler_state, 0, sizeof(*sampler_state));
2725         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2726         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2727         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2728         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2729         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2730         sampler_state++;
2731     }
2732
2733     dri_bo_unmap(render_state->wm.sampler);
2734 }
2735
2736 static void 
2737 gen8_render_sampler(VADriverContextP ctx)
2738 {
2739     struct i965_driver_data *i965 = i965_driver_data(ctx);
2740     struct i965_render_state *render_state = &i965->render_state;
2741     struct gen8_sampler_state *sampler_state;
2742     int i;
2743     unsigned char *cc_ptr;
2744     
2745     assert(render_state->wm.sampler_count > 0);
2746     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2747
2748     dri_bo_map(render_state->dynamic_state.bo, 1);
2749     assert(render_state->dynamic_state.bo->virtual);
2750
2751     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
2752                         render_state->sampler_offset;
2753
2754     sampler_state = (struct gen8_sampler_state *) cc_ptr;
2755
2756     for (i = 0; i < render_state->wm.sampler_count; i++) {
2757         memset(sampler_state, 0, sizeof(*sampler_state));
2758         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2759         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2760         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2761         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2762         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2763         sampler_state++;
2764     }
2765
2766     dri_bo_unmap(render_state->dynamic_state.bo);
2767 }
2768
2769
2770 static void
2771 gen7_render_setup_states(
2772     VADriverContextP   ctx,
2773     struct object_surface *obj_surface,
2774     const VARectangle *src_rect,
2775     const VARectangle *dst_rect,
2776     unsigned int       flags
2777 )
2778 {
2779     i965_render_dest_surface_state(ctx, 0);
2780     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2781     gen7_render_sampler(ctx);
2782     i965_render_cc_viewport(ctx);
2783     gen7_render_color_calc_state(ctx);
2784     gen7_render_blend_state(ctx);
2785     gen7_render_depth_stencil_state(ctx);
2786     i965_render_upload_constants(ctx, obj_surface, flags);
2787     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2788 }
2789
2790 static void
2791 gen8_render_blend_state(VADriverContextP ctx)
2792 {
2793     struct i965_driver_data *i965 = i965_driver_data(ctx);
2794     struct i965_render_state *render_state = &i965->render_state;
2795     struct gen8_global_blend_state *global_blend_state;
2796     struct gen8_blend_state_rt *blend_state;
2797     unsigned char *cc_ptr;
2798     
2799     dri_bo_map(render_state->dynamic_state.bo, 1);
2800     assert(render_state->dynamic_state.bo->virtual);
2801
2802     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
2803                         render_state->blend_state_offset;
2804
2805     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
2806
2807     memset(global_blend_state, 0, render_state->blend_state_size);
2808     /* Global blend state + blend_state for Render Target */
2809     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
2810     blend_state->blend1.logic_op_enable = 1;
2811     blend_state->blend1.logic_op_func = 0xc;
2812     blend_state->blend1.pre_blend_clamp_enable = 1;
2813
2814     dri_bo_unmap(render_state->dynamic_state.bo);
2815 }
2816
2817
2818 static void 
2819 gen8_render_cc_viewport(VADriverContextP ctx)
2820 {
2821     struct i965_driver_data *i965 = i965_driver_data(ctx);
2822     struct i965_render_state *render_state = &i965->render_state;
2823     struct i965_cc_viewport *cc_viewport;
2824     unsigned char *cc_ptr;
2825
2826     dri_bo_map(render_state->dynamic_state.bo, 1);
2827     assert(render_state->dynamic_state.bo->virtual);
2828
2829     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
2830                         render_state->cc_viewport_offset;
2831
2832     cc_viewport = (struct i965_cc_viewport *) cc_ptr;
2833
2834     memset(cc_viewport, 0, sizeof(*cc_viewport));
2835     
2836     cc_viewport->min_depth = -1.e35;
2837     cc_viewport->max_depth = 1.e35;
2838
2839     dri_bo_unmap(render_state->dynamic_state.bo);
2840 }
2841
2842 static void
2843 gen8_render_color_calc_state(VADriverContextP ctx)
2844 {
2845     struct i965_driver_data *i965 = i965_driver_data(ctx);
2846     struct i965_render_state *render_state = &i965->render_state;
2847     struct gen6_color_calc_state *color_calc_state;
2848     unsigned char *cc_ptr;
2849
2850     dri_bo_map(render_state->dynamic_state.bo, 1);
2851     assert(render_state->dynamic_state.bo->virtual);
2852
2853     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
2854                         render_state->cc_state_offset;
2855
2856     color_calc_state = (struct gen6_color_calc_state *) cc_ptr;
2857
2858     memset(color_calc_state, 0, sizeof(*color_calc_state));
2859     color_calc_state->constant_r = 1.0;
2860     color_calc_state->constant_g = 0.0;
2861     color_calc_state->constant_b = 1.0;
2862     color_calc_state->constant_a = 1.0;
2863     dri_bo_unmap(render_state->dynamic_state.bo);
2864 }
2865
2866 static void
2867 gen8_render_upload_constants(VADriverContextP ctx,
2868                              struct object_surface *obj_surface,
2869                              unsigned int flags)
2870 {
2871     struct i965_driver_data *i965 = i965_driver_data(ctx);
2872     struct i965_render_state *render_state = &i965->render_state;
2873     unsigned short *constant_buffer;
2874     unsigned char *cc_ptr;
2875     float *color_balance_base;
2876     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
2877     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
2878     float hue = (float)i965->hue_attrib->value / 180 * PI;
2879     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
2880     float *yuv_to_rgb;
2881     unsigned int color_flag;
2882
2883     dri_bo_map(render_state->dynamic_state.bo, 1);
2884     assert(render_state->dynamic_state.bo->virtual);
2885
2886     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
2887                         render_state->curbe_offset;
2888
2889     constant_buffer = (unsigned short *) cc_ptr;
2890
2891     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
2892         assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0'));
2893
2894         *constant_buffer = 2;
2895     } else {
2896         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'))
2897             *constant_buffer = 1;
2898         else
2899             *constant_buffer = 0;
2900     }
2901
2902     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
2903         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
2904         i965->hue_attrib->value == DEFAULT_HUE &&
2905         i965->saturation_attrib->value == DEFAULT_SATURATION)
2906         constant_buffer[1] = 1; /* skip color balance transformation */
2907     else
2908         constant_buffer[1] = 0;
2909
2910     color_balance_base = (float *)constant_buffer + 4;
2911     *color_balance_base++ = contrast;
2912     *color_balance_base++ = brightness;
2913     *color_balance_base++ = cos(hue) * contrast * saturation;
2914     *color_balance_base++ = sin(hue) * contrast * saturation;
2915
2916     color_flag = flags & VA_SRC_COLOR_MASK;
2917     yuv_to_rgb = (float *)constant_buffer + 8;
2918     if (color_flag == VA_SRC_BT709)
2919         memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709));
2920     else if (color_flag == VA_SRC_SMPTE_240)
2921         memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240));
2922     else
2923         memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601));
2924
2925     dri_bo_unmap(render_state->dynamic_state.bo);
2926 }
2927
2928 static void
2929 gen8_render_setup_states(
2930     VADriverContextP   ctx,
2931     struct object_surface *obj_surface,
2932     const VARectangle *src_rect,
2933     const VARectangle *dst_rect,
2934     unsigned int       flags
2935 )
2936 {
2937     i965_render_dest_surface_state(ctx, 0);
2938     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2939     gen8_render_sampler(ctx);
2940     gen8_render_cc_viewport(ctx);
2941     gen8_render_color_calc_state(ctx);
2942     gen8_render_blend_state(ctx);
2943     gen8_render_upload_constants(ctx, obj_surface, flags);
2944     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2945 }
2946
2947 static void
2948 gen7_emit_invarient_states(VADriverContextP ctx)
2949 {
2950     struct i965_driver_data *i965 = i965_driver_data(ctx);
2951     struct intel_batchbuffer *batch = i965->batch;
2952
2953     BEGIN_BATCH(batch, 1);
2954     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2955     ADVANCE_BATCH(batch);
2956
2957     BEGIN_BATCH(batch, 4);
2958     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2959     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2960               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2961     OUT_BATCH(batch, 0);
2962     OUT_BATCH(batch, 0);
2963     ADVANCE_BATCH(batch);
2964
2965     BEGIN_BATCH(batch, 2);
2966     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2967     OUT_BATCH(batch, 1);
2968     ADVANCE_BATCH(batch);
2969
2970     /* Set system instruction pointer */
2971     BEGIN_BATCH(batch, 2);
2972     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2973     OUT_BATCH(batch, 0);
2974     ADVANCE_BATCH(batch);
2975 }
2976
2977 static void
2978 gen7_emit_state_base_address(VADriverContextP ctx)
2979 {
2980     struct i965_driver_data *i965 = i965_driver_data(ctx);
2981     struct intel_batchbuffer *batch = i965->batch;
2982     struct i965_render_state *render_state = &i965->render_state;
2983
2984     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2985     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2986     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2987     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2988     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2989     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2990     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2991     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2992     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2993     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2994 }
2995
2996 static void
2997 gen8_emit_state_base_address(VADriverContextP ctx)
2998 {
2999     struct i965_driver_data *i965 = i965_driver_data(ctx);
3000     struct intel_batchbuffer *batch = i965->batch;
3001     struct i965_render_state *render_state = &i965->render_state;
3002
3003     BEGIN_BATCH(batch, 16);
3004     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2));
3005     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
3006         OUT_BATCH(batch, 0);
3007         OUT_BATCH(batch, 0);
3008         /*DW4 */
3009     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
3010         OUT_BATCH(batch, 0);
3011
3012         /*DW6*/
3013     /* Dynamic state base address */
3014     OUT_RELOC(batch, render_state->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
3015                 0, BASE_ADDRESS_MODIFY);
3016     OUT_BATCH(batch, 0);
3017
3018         /*DW8*/
3019     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
3020     OUT_BATCH(batch, 0);
3021
3022         /*DW10 */
3023     /* Instruction base address */
3024     OUT_RELOC(batch, render_state->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
3025     OUT_BATCH(batch, 0);
3026
3027         /*DW12 */       
3028     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */
3029     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
3030     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
3031     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
3032     ADVANCE_BATCH(batch);
3033 }
3034
3035 static void
3036 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
3037 {
3038     struct i965_driver_data *i965 = i965_driver_data(ctx);
3039     struct intel_batchbuffer *batch = i965->batch;
3040     struct i965_render_state *render_state = &i965->render_state;
3041
3042     BEGIN_BATCH(batch, 2);
3043     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
3044     OUT_RELOC(batch,
3045               render_state->cc.viewport,
3046               I915_GEM_DOMAIN_INSTRUCTION, 0,
3047               0);
3048     ADVANCE_BATCH(batch);
3049
3050     BEGIN_BATCH(batch, 2);
3051     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
3052     OUT_BATCH(batch, 0);
3053     ADVANCE_BATCH(batch);
3054 }
3055
3056 /*
3057  * URB layout on GEN7 
3058  * ----------------------------------------
3059  * | PS Push Constants (8KB) | VS entries |
3060  * ----------------------------------------
3061  */
3062 static void
3063 gen7_emit_urb(VADriverContextP ctx)
3064 {
3065     struct i965_driver_data *i965 = i965_driver_data(ctx);
3066     struct intel_batchbuffer *batch = i965->batch;
3067     unsigned int num_urb_entries = 32;
3068
3069     if (IS_HASWELL(i965->intel.device_id))
3070         num_urb_entries = 64;
3071
3072     BEGIN_BATCH(batch, 2);
3073     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
3074     OUT_BATCH(batch, 8); /* in 1KBs */
3075     ADVANCE_BATCH(batch);
3076
3077     BEGIN_BATCH(batch, 2);
3078     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
3079     OUT_BATCH(batch, 
3080               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
3081               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
3082               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3083    ADVANCE_BATCH(batch);
3084
3085    BEGIN_BATCH(batch, 2);
3086    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
3087    OUT_BATCH(batch,
3088              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3089              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3090    ADVANCE_BATCH(batch);
3091
3092    BEGIN_BATCH(batch, 2);
3093    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
3094    OUT_BATCH(batch,
3095              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3096              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3097    ADVANCE_BATCH(batch);
3098
3099    BEGIN_BATCH(batch, 2);
3100    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
3101    OUT_BATCH(batch,
3102              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3103              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3104    ADVANCE_BATCH(batch);
3105 }
3106
3107 static void
3108 gen7_emit_cc_state_pointers(VADriverContextP ctx)
3109 {
3110     struct i965_driver_data *i965 = i965_driver_data(ctx);
3111     struct intel_batchbuffer *batch = i965->batch;
3112     struct i965_render_state *render_state = &i965->render_state;
3113
3114     BEGIN_BATCH(batch, 2);
3115     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
3116     OUT_RELOC(batch,
3117               render_state->cc.state,
3118               I915_GEM_DOMAIN_INSTRUCTION, 0,
3119               1);
3120     ADVANCE_BATCH(batch);
3121
3122     BEGIN_BATCH(batch, 2);
3123     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
3124     OUT_RELOC(batch,
3125               render_state->cc.blend,
3126               I915_GEM_DOMAIN_INSTRUCTION, 0,
3127               1);
3128     ADVANCE_BATCH(batch);
3129
3130     BEGIN_BATCH(batch, 2);
3131     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
3132     OUT_RELOC(batch,
3133               render_state->cc.depth_stencil,
3134               I915_GEM_DOMAIN_INSTRUCTION, 0, 
3135               1);
3136     ADVANCE_BATCH(batch);
3137 }
3138
3139 static void
3140 gen8_emit_cc_state_pointers(VADriverContextP ctx)
3141 {
3142     struct i965_driver_data *i965 = i965_driver_data(ctx);
3143     struct intel_batchbuffer *batch = i965->batch;
3144     struct i965_render_state *render_state = &i965->render_state;
3145
3146     BEGIN_BATCH(batch, 2);
3147     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
3148     OUT_BATCH(batch, (render_state->cc_state_offset + 1));
3149     ADVANCE_BATCH(batch);
3150
3151     BEGIN_BATCH(batch, 2);
3152     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
3153     OUT_BATCH(batch, (render_state->blend_state_offset + 1));
3154     ADVANCE_BATCH(batch);
3155
3156 }
3157
3158 static void
3159 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
3160 {
3161     struct i965_driver_data *i965 = i965_driver_data(ctx);
3162     struct intel_batchbuffer *batch = i965->batch;
3163     struct i965_render_state *render_state = &i965->render_state;
3164
3165     BEGIN_BATCH(batch, 2);
3166     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
3167     OUT_RELOC(batch,
3168               render_state->wm.sampler,
3169               I915_GEM_DOMAIN_INSTRUCTION, 0,
3170               0);
3171     ADVANCE_BATCH(batch);
3172 }
3173
3174 static void
3175 gen7_emit_binding_table(VADriverContextP ctx)
3176 {
3177     struct i965_driver_data *i965 = i965_driver_data(ctx);
3178     struct intel_batchbuffer *batch = i965->batch;
3179
3180     BEGIN_BATCH(batch, 2);
3181     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
3182     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
3183     ADVANCE_BATCH(batch);
3184 }
3185
3186 static void
3187 gen7_emit_depth_buffer_state(VADriverContextP ctx)
3188 {
3189     struct i965_driver_data *i965 = i965_driver_data(ctx);
3190     struct intel_batchbuffer *batch = i965->batch;
3191
3192     BEGIN_BATCH(batch, 7);
3193     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
3194     OUT_BATCH(batch,
3195               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
3196               (I965_SURFACE_NULL << 29));
3197     OUT_BATCH(batch, 0);
3198     OUT_BATCH(batch, 0);
3199     OUT_BATCH(batch, 0);
3200     OUT_BATCH(batch, 0);
3201     OUT_BATCH(batch, 0);
3202     ADVANCE_BATCH(batch);
3203
3204     BEGIN_BATCH(batch, 3);
3205     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
3206     OUT_BATCH(batch, 0);
3207     OUT_BATCH(batch, 0);
3208     ADVANCE_BATCH(batch);
3209 }
3210
3211 static void
3212 gen7_emit_drawing_rectangle(VADriverContextP ctx)
3213 {
3214     i965_render_drawing_rectangle(ctx);
3215 }
3216
3217 static void 
3218 gen7_emit_vs_state(VADriverContextP ctx)
3219 {
3220     struct i965_driver_data *i965 = i965_driver_data(ctx);
3221     struct intel_batchbuffer *batch = i965->batch;
3222
3223     /* disable VS constant buffer */
3224     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
3225     OUT_BATCH(batch, 0);
3226     OUT_BATCH(batch, 0);
3227     OUT_BATCH(batch, 0);
3228     OUT_BATCH(batch, 0);
3229     OUT_BATCH(batch, 0);
3230     OUT_BATCH(batch, 0);
3231         
3232     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
3233     OUT_BATCH(batch, 0); /* without VS kernel */
3234     OUT_BATCH(batch, 0);
3235     OUT_BATCH(batch, 0);
3236     OUT_BATCH(batch, 0);
3237     OUT_BATCH(batch, 0); /* pass-through */
3238 }
3239
3240 static void 
3241 gen7_emit_bypass_state(VADriverContextP ctx)
3242 {
3243     struct i965_driver_data *i965 = i965_driver_data(ctx);
3244     struct intel_batchbuffer *batch = i965->batch;
3245
3246     /* bypass GS */
3247     BEGIN_BATCH(batch, 7);
3248     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
3249     OUT_BATCH(batch, 0);
3250     OUT_BATCH(batch, 0);
3251     OUT_BATCH(batch, 0);
3252     OUT_BATCH(batch, 0);
3253     OUT_BATCH(batch, 0);
3254     OUT_BATCH(batch, 0);
3255     ADVANCE_BATCH(batch);
3256
3257     BEGIN_BATCH(batch, 7);      
3258     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
3259     OUT_BATCH(batch, 0); /* without GS kernel */
3260     OUT_BATCH(batch, 0);
3261     OUT_BATCH(batch, 0);
3262     OUT_BATCH(batch, 0);
3263     OUT_BATCH(batch, 0);
3264     OUT_BATCH(batch, 0); /* pass-through */
3265     ADVANCE_BATCH(batch);
3266
3267     BEGIN_BATCH(batch, 2);
3268     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
3269     OUT_BATCH(batch, 0);
3270     ADVANCE_BATCH(batch);
3271
3272     /* disable HS */
3273     BEGIN_BATCH(batch, 7);
3274     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
3275     OUT_BATCH(batch, 0);
3276     OUT_BATCH(batch, 0);
3277     OUT_BATCH(batch, 0);
3278     OUT_BATCH(batch, 0);
3279     OUT_BATCH(batch, 0);
3280     OUT_BATCH(batch, 0);
3281     ADVANCE_BATCH(batch);
3282
3283     BEGIN_BATCH(batch, 7);
3284     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
3285     OUT_BATCH(batch, 0);
3286     OUT_BATCH(batch, 0);
3287     OUT_BATCH(batch, 0);
3288     OUT_BATCH(batch, 0);
3289     OUT_BATCH(batch, 0);
3290     OUT_BATCH(batch, 0);
3291     ADVANCE_BATCH(batch);
3292
3293     BEGIN_BATCH(batch, 2);
3294     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
3295     OUT_BATCH(batch, 0);
3296     ADVANCE_BATCH(batch);
3297
3298     /* Disable TE */
3299     BEGIN_BATCH(batch, 4);
3300     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
3301     OUT_BATCH(batch, 0);
3302     OUT_BATCH(batch, 0);
3303     OUT_BATCH(batch, 0);
3304     ADVANCE_BATCH(batch);
3305
3306     /* Disable DS */
3307     BEGIN_BATCH(batch, 7);
3308     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
3309     OUT_BATCH(batch, 0);
3310     OUT_BATCH(batch, 0);
3311     OUT_BATCH(batch, 0);
3312     OUT_BATCH(batch, 0);
3313     OUT_BATCH(batch, 0);
3314     OUT_BATCH(batch, 0);
3315     ADVANCE_BATCH(batch);
3316
3317     BEGIN_BATCH(batch, 6);
3318     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
3319     OUT_BATCH(batch, 0);
3320     OUT_BATCH(batch, 0);
3321     OUT_BATCH(batch, 0);
3322     OUT_BATCH(batch, 0);
3323     OUT_BATCH(batch, 0);
3324     ADVANCE_BATCH(batch);
3325
3326     BEGIN_BATCH(batch, 2);
3327     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
3328     OUT_BATCH(batch, 0);
3329     ADVANCE_BATCH(batch);
3330
3331     /* Disable STREAMOUT */
3332     BEGIN_BATCH(batch, 3);
3333     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
3334     OUT_BATCH(batch, 0);
3335     OUT_BATCH(batch, 0);
3336     ADVANCE_BATCH(batch);
3337 }
3338
3339 static void 
3340 gen7_emit_clip_state(VADriverContextP ctx)
3341 {
3342     struct i965_driver_data *i965 = i965_driver_data(ctx);
3343     struct intel_batchbuffer *batch = i965->batch;
3344
3345     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
3346     OUT_BATCH(batch, 0);
3347     OUT_BATCH(batch, 0); /* pass-through */
3348     OUT_BATCH(batch, 0);
3349 }
3350
3351 static void 
3352 gen7_emit_sf_state(VADriverContextP ctx)
3353 {
3354     struct i965_driver_data *i965 = i965_driver_data(ctx);
3355     struct intel_batchbuffer *batch = i965->batch;
3356
3357     BEGIN_BATCH(batch, 14);
3358     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
3359     OUT_BATCH(batch,
3360               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
3361               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
3362               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
3363     OUT_BATCH(batch, 0);
3364     OUT_BATCH(batch, 0);
3365     OUT_BATCH(batch, 0); /* DW4 */
3366     OUT_BATCH(batch, 0);
3367     OUT_BATCH(batch, 0);
3368     OUT_BATCH(batch, 0);
3369     OUT_BATCH(batch, 0);
3370     OUT_BATCH(batch, 0); /* DW9 */
3371     OUT_BATCH(batch, 0);
3372     OUT_BATCH(batch, 0);
3373     OUT_BATCH(batch, 0);
3374     OUT_BATCH(batch, 0);
3375     ADVANCE_BATCH(batch);
3376
3377     BEGIN_BATCH(batch, 7);
3378     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
3379     OUT_BATCH(batch, 0);
3380     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
3381     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
3382     OUT_BATCH(batch, 0);
3383     OUT_BATCH(batch, 0);
3384     OUT_BATCH(batch, 0);
3385     ADVANCE_BATCH(batch);
3386 }
3387
3388 static void 
3389 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
3390 {
3391     struct i965_driver_data *i965 = i965_driver_data(ctx);
3392     struct intel_batchbuffer *batch = i965->batch;
3393     struct i965_render_state *render_state = &i965->render_state;
3394     unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
3395     unsigned int num_samples = 0;
3396
3397     if (IS_HASWELL(i965->intel.device_id)) {
3398         max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
3399         num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
3400     }
3401
3402     BEGIN_BATCH(batch, 3);
3403     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
3404     OUT_BATCH(batch,
3405               GEN7_WM_DISPATCH_ENABLE |
3406               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
3407     OUT_BATCH(batch, 0);
3408     ADVANCE_BATCH(batch);
3409
3410     BEGIN_BATCH(batch, 7);
3411     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
3412     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
3413     OUT_BATCH(batch, 0);
3414     OUT_RELOC(batch, 
3415               render_state->curbe.bo,
3416               I915_GEM_DOMAIN_INSTRUCTION, 0,
3417               0);
3418     OUT_BATCH(batch, 0);
3419     OUT_BATCH(batch, 0);
3420     OUT_BATCH(batch, 0);
3421     ADVANCE_BATCH(batch);
3422
3423     BEGIN_BATCH(batch, 8);
3424     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
3425     OUT_RELOC(batch, 
3426               render_state->render_kernels[kernel].bo,
3427               I915_GEM_DOMAIN_INSTRUCTION, 0,
3428               0);
3429     OUT_BATCH(batch, 
3430               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
3431               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
3432     OUT_BATCH(batch, 0); /* scratch space base offset */
3433     OUT_BATCH(batch, 
3434               ((render_state->max_wm_threads - 1) << max_threads_shift) | num_samples |
3435               GEN7_PS_PUSH_CONSTANT_ENABLE |
3436               GEN7_PS_ATTRIBUTE_ENABLE |
3437               GEN7_PS_16_DISPATCH_ENABLE);
3438     OUT_BATCH(batch, 
3439               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
3440     OUT_BATCH(batch, 0); /* kernel 1 pointer */
3441     OUT_BATCH(batch, 0); /* kernel 2 pointer */
3442     ADVANCE_BATCH(batch);
3443 }
3444
3445 static void
3446 gen7_emit_vertex_element_state(VADriverContextP ctx)
3447 {
3448     struct i965_driver_data *i965 = i965_driver_data(ctx);
3449     struct intel_batchbuffer *batch = i965->batch;
3450
3451     /* Set up our vertex elements, sourced from the single vertex buffer. */
3452     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
3453     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
3454     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3455               GEN6_VE0_VALID |
3456               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3457               (0 << VE0_OFFSET_SHIFT));
3458     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
3459               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
3460               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
3461               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
3462     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
3463     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3464               GEN6_VE0_VALID |
3465               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3466               (8 << VE0_OFFSET_SHIFT));
3467     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
3468               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
3469               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
3470               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
3471 }
3472
3473 static void
3474 gen7_emit_vertices(VADriverContextP ctx)
3475 {
3476     struct i965_driver_data *i965 = i965_driver_data(ctx);
3477     struct intel_batchbuffer *batch = i965->batch;
3478     struct i965_render_state *render_state = &i965->render_state;
3479
3480     BEGIN_BATCH(batch, 5);
3481     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
3482     OUT_BATCH(batch, 
3483               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
3484               GEN6_VB0_VERTEXDATA |
3485               GEN7_VB0_ADDRESS_MODIFYENABLE |
3486               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
3487     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
3488     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
3489     OUT_BATCH(batch, 0);
3490     ADVANCE_BATCH(batch);
3491
3492     BEGIN_BATCH(batch, 7);
3493     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
3494     OUT_BATCH(batch,
3495               _3DPRIM_RECTLIST |
3496               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
3497     OUT_BATCH(batch, 3); /* vertex count per instance */
3498     OUT_BATCH(batch, 0); /* start vertex offset */
3499     OUT_BATCH(batch, 1); /* single instance */
3500     OUT_BATCH(batch, 0); /* start instance location */
3501     OUT_BATCH(batch, 0);
3502     ADVANCE_BATCH(batch);
3503 }
3504
3505 static void
3506 gen7_render_emit_states(VADriverContextP ctx, int kernel)
3507 {
3508     struct i965_driver_data *i965 = i965_driver_data(ctx);
3509     struct intel_batchbuffer *batch = i965->batch;
3510
3511     intel_batchbuffer_start_atomic(batch, 0x1000);
3512     intel_batchbuffer_emit_mi_flush(batch);
3513     gen7_emit_invarient_states(ctx);
3514     gen7_emit_state_base_address(ctx);
3515     gen7_emit_viewport_state_pointers(ctx);
3516     gen7_emit_urb(ctx);
3517     gen7_emit_cc_state_pointers(ctx);
3518     gen7_emit_sampler_state_pointers(ctx);
3519     gen7_emit_bypass_state(ctx);
3520     gen7_emit_vs_state(ctx);
3521     gen7_emit_clip_state(ctx);
3522     gen7_emit_sf_state(ctx);
3523     gen7_emit_wm_state(ctx, kernel);
3524     gen7_emit_binding_table(ctx);
3525     gen7_emit_depth_buffer_state(ctx);
3526     gen7_emit_drawing_rectangle(ctx);
3527     gen7_emit_vertex_element_state(ctx);
3528     gen7_emit_vertices(ctx);
3529     intel_batchbuffer_end_atomic(batch);
3530 }
3531
3532 static void
3533 gen8_emit_vertices(VADriverContextP ctx)
3534 {
3535     struct i965_driver_data *i965 = i965_driver_data(ctx);
3536     struct intel_batchbuffer *batch = i965->batch;
3537     struct i965_render_state *render_state = &i965->render_state;
3538
3539     BEGIN_BATCH(batch, 5);
3540     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
3541     OUT_BATCH(batch, 
3542               (0 << GEN8_VB0_BUFFER_INDEX_SHIFT) |
3543               (0 << GEN8_VB0_MOCS_SHIFT) |
3544               GEN7_VB0_ADDRESS_MODIFYENABLE |
3545               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
3546     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
3547     OUT_BATCH(batch, 0);
3548     OUT_BATCH(batch, 12 * 4);
3549     ADVANCE_BATCH(batch);
3550
3551     /* Topology in 3D primitive is overrided by VF_TOPOLOGY command */
3552     BEGIN_BATCH(batch, 2);
3553     OUT_BATCH(batch, GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2));
3554     OUT_BATCH(batch,
3555               _3DPRIM_RECTLIST);
3556     ADVANCE_BATCH(batch);
3557
3558     
3559     BEGIN_BATCH(batch, 7);
3560     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
3561     OUT_BATCH(batch,
3562               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
3563     OUT_BATCH(batch, 3); /* vertex count per instance */
3564     OUT_BATCH(batch, 0); /* start vertex offset */
3565     OUT_BATCH(batch, 1); /* single instance */
3566     OUT_BATCH(batch, 0); /* start instance location */
3567     OUT_BATCH(batch, 0);
3568     ADVANCE_BATCH(batch);
3569 }
3570
3571 static void
3572 gen8_emit_vertex_element_state(VADriverContextP ctx)
3573 {
3574     struct i965_driver_data *i965 = i965_driver_data(ctx);
3575     struct intel_batchbuffer *batch = i965->batch;
3576
3577     /*
3578      * The VUE layout
3579      * dword 0-3: pad (0, 0, 0. 0)
3580      * dword 4-7: position (x, y, 1.0, 1.0),
3581      * dword 8-11: texture coordinate 0 (u0, v0, 1.0, 1.0)
3582      */
3583
3584     /* Set up our vertex elements, sourced from the single vertex buffer. */
3585     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (7 - 2));
3586
3587     /* Element state 0. These are 4 dwords of 0 required for the VUE format.
3588      * We don't really know or care what they do.
3589      */
3590
3591     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3592               GEN8_VE0_VALID |
3593               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3594               (0 << VE0_OFFSET_SHIFT));
3595     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
3596               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
3597               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
3598               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
3599
3600     /* offset 8: X, Y -> {x, y, 1.0, 1.0} */
3601     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3602               GEN8_VE0_VALID |
3603               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3604               (8 << VE0_OFFSET_SHIFT));
3605     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
3606               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
3607               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
3608               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
3609
3610     /* offset 0: u,v -> {U, V, 1.0, 1.0} */
3611     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3612               GEN8_VE0_VALID |
3613               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3614               (0 << VE0_OFFSET_SHIFT));
3615     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
3616               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
3617               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
3618               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
3619 }
3620
3621 static void 
3622 gen8_emit_vs_state(VADriverContextP ctx)
3623 {
3624     struct i965_driver_data *i965 = i965_driver_data(ctx);
3625     struct intel_batchbuffer *batch = i965->batch;
3626
3627     /* disable VS constant buffer */
3628     BEGIN_BATCH(batch, 11);
3629     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (11 - 2));
3630     OUT_BATCH(batch, 0);
3631     OUT_BATCH(batch, 0);
3632     /* CS Buffer 0 */
3633     OUT_BATCH(batch, 0);
3634     OUT_BATCH(batch, 0);
3635     /* CS Buffer 1 */
3636     OUT_BATCH(batch, 0);
3637     OUT_BATCH(batch, 0);
3638     /* CS Buffer 2 */
3639     OUT_BATCH(batch, 0);
3640     OUT_BATCH(batch, 0);
3641     /* CS Buffer 3 */
3642     OUT_BATCH(batch, 0);
3643     OUT_BATCH(batch, 0);
3644     ADVANCE_BATCH(batch);
3645         
3646     BEGIN_BATCH(batch, 9);
3647     OUT_BATCH(batch, GEN6_3DSTATE_VS | (9 - 2));
3648     OUT_BATCH(batch, 0); /* without VS kernel */
3649     OUT_BATCH(batch, 0);
3650     /* VS shader dispatch flag */
3651     OUT_BATCH(batch, 0);
3652     OUT_BATCH(batch, 0);
3653     OUT_BATCH(batch, 0);
3654     /* DW6. VS shader GRF and URB buffer definition */
3655     OUT_BATCH(batch, 0);
3656     OUT_BATCH(batch, 0); /* pass-through */
3657     OUT_BATCH(batch, 0);
3658     ADVANCE_BATCH(batch);
3659
3660     BEGIN_BATCH(batch, 2);
3661     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
3662     OUT_BATCH(batch, 0);
3663     ADVANCE_BATCH(batch);
3664
3665     BEGIN_BATCH(batch, 2);
3666     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
3667     OUT_BATCH(batch, 0);
3668     ADVANCE_BATCH(batch);
3669
3670 }
3671
3672 /*
3673  * URB layout on GEN8 
3674  * ----------------------------------------
3675  * | PS Push Constants (8KB) | VS entries |
3676  * ----------------------------------------
3677  */
3678 static void
3679 gen8_emit_urb(VADriverContextP ctx)
3680 {
3681     struct i965_driver_data *i965 = i965_driver_data(ctx);
3682     struct intel_batchbuffer *batch = i965->batch;
3683     unsigned int num_urb_entries = 64;
3684
3685     /* The minimum urb entries is 64 */
3686
3687     BEGIN_BATCH(batch, 2);
3688     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2));
3689     OUT_BATCH(batch, 0);
3690     ADVANCE_BATCH(batch);
3691
3692     BEGIN_BATCH(batch, 2);
3693     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2));
3694     OUT_BATCH(batch, 0);
3695     ADVANCE_BATCH(batch);
3696
3697     BEGIN_BATCH(batch, 2);
3698     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2));
3699     OUT_BATCH(batch, 0);
3700     ADVANCE_BATCH(batch);
3701
3702     BEGIN_BATCH(batch, 2);
3703     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2));
3704     OUT_BATCH(batch, 0);
3705     ADVANCE_BATCH(batch);
3706
3707     /* Size is 8Kbs and base address is 0Kb */
3708     BEGIN_BATCH(batch, 2);
3709     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
3710     /* Size is 8Kbs and base address is 0Kb */
3711     OUT_BATCH(batch,
3712                 (0 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) |
3713                 (8 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT));
3714     ADVANCE_BATCH(batch);
3715
3716     BEGIN_BATCH(batch, 2);
3717     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
3718     OUT_BATCH(batch, 
3719               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
3720               (4 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
3721               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3722    ADVANCE_BATCH(batch);
3723
3724    BEGIN_BATCH(batch, 2);
3725    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
3726    OUT_BATCH(batch,
3727              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3728              (5 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3729    ADVANCE_BATCH(batch);
3730
3731    BEGIN_BATCH(batch, 2);
3732    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
3733    OUT_BATCH(batch,
3734              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3735              (6 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3736    ADVANCE_BATCH(batch);
3737
3738    BEGIN_BATCH(batch, 2);
3739    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
3740    OUT_BATCH(batch,
3741              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3742              (7 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3743    ADVANCE_BATCH(batch);
3744 }
3745
3746 static void 
3747 gen8_emit_bypass_state(VADriverContextP ctx)
3748 {
3749     struct i965_driver_data *i965 = i965_driver_data(ctx);
3750     struct intel_batchbuffer *batch = i965->batch;
3751
3752     /* bypass GS */
3753     BEGIN_BATCH(batch, 11);
3754     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (11 - 2));
3755     OUT_BATCH(batch, 0);
3756     OUT_BATCH(batch, 0);
3757     OUT_BATCH(batch, 0);
3758     OUT_BATCH(batch, 0);
3759     OUT_BATCH(batch, 0);
3760     OUT_BATCH(batch, 0);
3761     OUT_BATCH(batch, 0);
3762     OUT_BATCH(batch, 0);
3763     OUT_BATCH(batch, 0);
3764     OUT_BATCH(batch, 0);
3765     ADVANCE_BATCH(batch);
3766
3767     BEGIN_BATCH(batch, 10);     
3768     OUT_BATCH(batch, GEN6_3DSTATE_GS | (10 - 2));
3769     /* GS shader address */
3770     OUT_BATCH(batch, 0); /* without GS kernel */
3771     OUT_BATCH(batch, 0);
3772     /* DW3. GS shader dispatch flag */
3773     OUT_BATCH(batch, 0);
3774     OUT_BATCH(batch, 0);
3775     OUT_BATCH(batch, 0);
3776     /* DW6. GS shader GRF and URB offset/length */
3777     OUT_BATCH(batch, 0);
3778     OUT_BATCH(batch, 0); /* pass-through */
3779     OUT_BATCH(batch, 0);
3780     OUT_BATCH(batch, 0);
3781     ADVANCE_BATCH(batch);
3782
3783     BEGIN_BATCH(batch, 2);
3784     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
3785     OUT_BATCH(batch, 0);
3786     ADVANCE_BATCH(batch);
3787
3788     BEGIN_BATCH(batch, 2);
3789     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
3790     OUT_BATCH(batch, 0);
3791     ADVANCE_BATCH(batch);
3792
3793     /* disable HS */
3794     BEGIN_BATCH(batch, 11);
3795     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (11 - 2));
3796     OUT_BATCH(batch, 0);
3797     OUT_BATCH(batch, 0);
3798     OUT_BATCH(batch, 0);
3799     OUT_BATCH(batch, 0);
3800     OUT_BATCH(batch, 0);
3801     OUT_BATCH(batch, 0);
3802     OUT_BATCH(batch, 0);
3803     OUT_BATCH(batch, 0);
3804     OUT_BATCH(batch, 0);
3805     OUT_BATCH(batch, 0);
3806     ADVANCE_BATCH(batch);
3807
3808     BEGIN_BATCH(batch, 9);
3809     OUT_BATCH(batch, GEN7_3DSTATE_HS | (9 - 2));
3810     OUT_BATCH(batch, 0);
3811     /*DW2. HS pass-through */
3812     OUT_BATCH(batch, 0);
3813     /*DW3. HS shader address */
3814     OUT_BATCH(batch, 0);
3815     OUT_BATCH(batch, 0);
3816     /*DW5. HS shader flag. URB offset/length and so on */
3817     OUT_BATCH(batch, 0);
3818     OUT_BATCH(batch, 0);
3819     OUT_BATCH(batch, 0);
3820     OUT_BATCH(batch, 0);
3821     ADVANCE_BATCH(batch);
3822
3823     BEGIN_BATCH(batch, 2);
3824     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
3825     OUT_BATCH(batch, 0);
3826     ADVANCE_BATCH(batch);
3827
3828     BEGIN_BATCH(batch, 2);
3829     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
3830     OUT_BATCH(batch, 0);
3831     ADVANCE_BATCH(batch);
3832
3833     /* Disable TE */
3834     BEGIN_BATCH(batch, 4);
3835     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
3836     OUT_BATCH(batch, 0);
3837     OUT_BATCH(batch, 0);
3838     OUT_BATCH(batch, 0);
3839     ADVANCE_BATCH(batch);
3840
3841     /* Disable DS */
3842     BEGIN_BATCH(batch, 11);
3843     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (11 - 2));
3844     OUT_BATCH(batch, 0);
3845     OUT_BATCH(batch, 0);
3846     OUT_BATCH(batch, 0);
3847     OUT_BATCH(batch, 0);
3848     OUT_BATCH(batch, 0);
3849     OUT_BATCH(batch, 0);
3850     OUT_BATCH(batch, 0);
3851     OUT_BATCH(batch, 0);
3852     OUT_BATCH(batch, 0);
3853     OUT_BATCH(batch, 0);
3854     ADVANCE_BATCH(batch);
3855
3856     BEGIN_BATCH(batch, 9);
3857     OUT_BATCH(batch, GEN7_3DSTATE_DS | (9 - 2));
3858     /* DW1. DS shader pointer */
3859     OUT_BATCH(batch, 0);
3860     OUT_BATCH(batch, 0);
3861     /* DW3-5. DS shader dispatch flag.*/
3862     OUT_BATCH(batch, 0);
3863     OUT_BATCH(batch, 0);
3864     OUT_BATCH(batch, 0);
3865     /* DW6-7. DS shader pass-through, GRF,URB offset/Length,Thread Number*/
3866     OUT_BATCH(batch, 0);
3867     OUT_BATCH(batch, 0);
3868     /* DW8. DS shader output URB */
3869     OUT_BATCH(batch, 0);
3870     ADVANCE_BATCH(batch);
3871
3872     BEGIN_BATCH(batch, 2);
3873     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
3874     OUT_BATCH(batch, 0);
3875     ADVANCE_BATCH(batch);
3876
3877     BEGIN_BATCH(batch, 2);
3878     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
3879     OUT_BATCH(batch, 0);
3880     ADVANCE_BATCH(batch);
3881
3882     /* Disable STREAMOUT */
3883     BEGIN_BATCH(batch, 5);
3884     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (5 - 2));
3885     OUT_BATCH(batch, 0);
3886     OUT_BATCH(batch, 0);
3887     OUT_BATCH(batch, 0);
3888     OUT_BATCH(batch, 0);
3889     ADVANCE_BATCH(batch);
3890 }
3891
3892 static void
3893 gen8_emit_invarient_states(VADriverContextP ctx)
3894 {
3895     struct i965_driver_data *i965 = i965_driver_data(ctx);
3896     struct intel_batchbuffer *batch = i965->batch;
3897
3898     BEGIN_BATCH(batch, 1);
3899     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
3900     ADVANCE_BATCH(batch);
3901
3902     BEGIN_BATCH(batch, 2);
3903     OUT_BATCH(batch, GEN8_3DSTATE_MULTISAMPLE | (2 - 2));
3904     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
3905               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
3906     ADVANCE_BATCH(batch);
3907
3908     /* Update 3D Multisample pattern */
3909     BEGIN_BATCH(batch, 9);
3910     OUT_BATCH(batch, GEN8_3DSTATE_SAMPLE_PATTERN | (9 - 2));
3911     OUT_BATCH(batch, 0);
3912     OUT_BATCH(batch, 0);
3913     OUT_BATCH(batch, 0);
3914     OUT_BATCH(batch, 0);
3915     OUT_BATCH(batch, 0);
3916     OUT_BATCH(batch, 0);
3917     OUT_BATCH(batch, 0);
3918     OUT_BATCH(batch, 0);
3919     ADVANCE_BATCH(batch);
3920
3921
3922     BEGIN_BATCH(batch, 2);
3923     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
3924     OUT_BATCH(batch, 1);
3925     ADVANCE_BATCH(batch);
3926
3927     /* Set system instruction pointer */
3928     BEGIN_BATCH(batch, 3);
3929     OUT_BATCH(batch, CMD_STATE_SIP | 0);
3930     OUT_BATCH(batch, 0);
3931     OUT_BATCH(batch, 0);
3932     ADVANCE_BATCH(batch);
3933 }
3934
3935 static void 
3936 gen8_emit_clip_state(VADriverContextP ctx)
3937 {
3938     struct i965_driver_data *i965 = i965_driver_data(ctx);
3939     struct intel_batchbuffer *batch = i965->batch;
3940
3941     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
3942     OUT_BATCH(batch, 0);
3943     OUT_BATCH(batch, 0); /* pass-through */
3944     OUT_BATCH(batch, 0);
3945 }
3946
3947 static void 
3948 gen8_emit_sf_state(VADriverContextP ctx)
3949 {
3950     struct i965_driver_data *i965 = i965_driver_data(ctx);
3951     struct intel_batchbuffer *batch = i965->batch;
3952
3953     BEGIN_BATCH(batch, 5);
3954     OUT_BATCH(batch, GEN8_3DSTATE_RASTER | (5 - 2));
3955     OUT_BATCH(batch, GEN8_3DSTATE_RASTER_CULL_NONE);
3956     OUT_BATCH(batch, 0);
3957     OUT_BATCH(batch, 0);
3958     OUT_BATCH(batch, 0);
3959     ADVANCE_BATCH(batch);
3960
3961
3962     BEGIN_BATCH(batch, 4);
3963     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (4 - 2));
3964     OUT_BATCH(batch,
3965               (GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH) |
3966               (GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET) |
3967               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
3968               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
3969               (1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
3970     OUT_BATCH(batch, 0);
3971     OUT_BATCH(batch, 0);
3972     ADVANCE_BATCH(batch);
3973
3974     /* SBE for backend setup */
3975     BEGIN_BATCH(batch, 11);
3976     OUT_BATCH(batch, GEN8_3DSTATE_SBE_SWIZ | (11 - 2));
3977     OUT_BATCH(batch, 0);
3978     OUT_BATCH(batch, 0);
3979     OUT_BATCH(batch, 0);
3980     OUT_BATCH(batch, 0);
3981     OUT_BATCH(batch, 0);
3982     OUT_BATCH(batch, 0);
3983     OUT_BATCH(batch, 0);
3984     OUT_BATCH(batch, 0);
3985     OUT_BATCH(batch, 0);
3986     OUT_BATCH(batch, 0);
3987     ADVANCE_BATCH(batch);
3988
3989     BEGIN_BATCH(batch, 4);
3990     OUT_BATCH(batch, GEN6_3DSTATE_SF | (4 - 2));
3991     OUT_BATCH(batch, 0);
3992     OUT_BATCH(batch, 0);
3993     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
3994     ADVANCE_BATCH(batch);
3995 }
3996
3997 static void 
3998 gen8_emit_wm_state(VADriverContextP ctx, int kernel)
3999 {
4000     struct i965_driver_data *i965 = i965_driver_data(ctx);
4001     struct intel_batchbuffer *batch = i965->batch;
4002     struct i965_render_state *render_state = &i965->render_state;
4003     unsigned int num_samples = 0;
4004     unsigned int max_threads;
4005
4006     max_threads = render_state->max_wm_threads - 2;
4007
4008     BEGIN_BATCH(batch, 2);
4009     OUT_BATCH(batch, GEN8_3DSTATE_PSEXTRA | (2 - 2));
4010     OUT_BATCH(batch,
4011               (GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE));
4012     ADVANCE_BATCH(batch);
4013
4014     
4015     if (kernel == PS_KERNEL) {
4016         BEGIN_BATCH(batch, 2);
4017         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
4018         OUT_BATCH(batch,
4019                 GEN8_PS_BLEND_HAS_WRITEABLE_RT);
4020         ADVANCE_BATCH(batch);
4021     } else if (kernel == PS_SUBPIC_KERNEL) {
4022         BEGIN_BATCH(batch, 2);
4023         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
4024         OUT_BATCH(batch,
4025                 (GEN8_PS_BLEND_HAS_WRITEABLE_RT |
4026                  GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE |
4027                  (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT) |
4028                  (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT) |
4029                  (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT) |
4030                  (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT)));
4031         ADVANCE_BATCH(batch);
4032     }
4033
4034     BEGIN_BATCH(batch, 2);
4035     OUT_BATCH(batch, GEN6_3DSTATE_WM | (2 - 2));
4036     OUT_BATCH(batch,
4037               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
4038     ADVANCE_BATCH(batch);
4039
4040     BEGIN_BATCH(batch, 11);
4041     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (11 - 2));
4042     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
4043     OUT_BATCH(batch, 0);
4044     /*DW3-4. Constant buffer 0 */
4045     OUT_BATCH(batch, render_state->curbe_offset);
4046     OUT_BATCH(batch, 0);
4047
4048     /*DW5-10. Constant buffer 1-3 */
4049     OUT_BATCH(batch, 0);
4050     OUT_BATCH(batch, 0);
4051     OUT_BATCH(batch, 0);
4052     OUT_BATCH(batch, 0);
4053     OUT_BATCH(batch, 0);
4054     OUT_BATCH(batch, 0);
4055     ADVANCE_BATCH(batch);
4056
4057     BEGIN_BATCH(batch, 12);
4058     OUT_BATCH(batch, GEN7_3DSTATE_PS | (12 - 2));
4059     /* PS shader address */
4060     OUT_BATCH(batch, render_state->render_kernels[kernel].kernel_offset);
4061
4062     OUT_BATCH(batch, 0);
4063     /* DW3. PS shader flag .Binding table cnt/sample cnt */
4064     OUT_BATCH(batch, 
4065               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
4066               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
4067     /* DW4-5. Scatch space */
4068     OUT_BATCH(batch, 0); /* scratch space base offset */
4069     OUT_BATCH(batch, 0);
4070     /* DW6. PS shader threads. */
4071     OUT_BATCH(batch, 
4072               ((max_threads - 1) << GEN8_PS_MAX_THREADS_SHIFT) | num_samples |
4073               GEN7_PS_PUSH_CONSTANT_ENABLE |
4074               GEN7_PS_16_DISPATCH_ENABLE);
4075     /* DW7. PS shader GRF */
4076     OUT_BATCH(batch, 
4077               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
4078     OUT_BATCH(batch, 0); /* kernel 1 pointer */
4079     OUT_BATCH(batch, 0);
4080     OUT_BATCH(batch, 0); /* kernel 2 pointer */
4081     OUT_BATCH(batch, 0);
4082     ADVANCE_BATCH(batch);
4083
4084     BEGIN_BATCH(batch, 2);
4085     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
4086     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
4087     ADVANCE_BATCH(batch);
4088 }
4089
4090 static void
4091 gen8_emit_depth_buffer_state(VADriverContextP ctx)
4092 {
4093     struct i965_driver_data *i965 = i965_driver_data(ctx);
4094     struct intel_batchbuffer *batch = i965->batch;
4095
4096     BEGIN_BATCH(batch, 8);
4097     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (8 - 2));
4098     OUT_BATCH(batch,
4099               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
4100               (I965_SURFACE_NULL << 29));
4101     /* DW2-3. Depth Buffer Address */
4102     OUT_BATCH(batch, 0);
4103     OUT_BATCH(batch, 0);
4104     /* DW4-7. Surface structure */
4105     OUT_BATCH(batch, 0);
4106     OUT_BATCH(batch, 0);
4107     OUT_BATCH(batch, 0);
4108     OUT_BATCH(batch, 0);
4109     ADVANCE_BATCH(batch);
4110
4111     /* Update the Hier Depth buffer */
4112     BEGIN_BATCH(batch, 5);
4113     OUT_BATCH(batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2));
4114     OUT_BATCH(batch, 0);
4115     OUT_BATCH(batch, 0);
4116     OUT_BATCH(batch, 0);
4117     OUT_BATCH(batch, 0);
4118     ADVANCE_BATCH(batch);
4119     
4120     /* Update the stencil buffer */
4121     BEGIN_BATCH(batch, 5);
4122     OUT_BATCH(batch, GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2));
4123     OUT_BATCH(batch, 0);
4124     OUT_BATCH(batch, 0);
4125     OUT_BATCH(batch, 0);
4126     OUT_BATCH(batch, 0);
4127     ADVANCE_BATCH(batch);
4128     
4129     BEGIN_BATCH(batch, 3);
4130     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
4131     OUT_BATCH(batch, 0);
4132     OUT_BATCH(batch, 0);
4133     ADVANCE_BATCH(batch);
4134 }
4135
4136 static void
4137 gen8_emit_depth_stencil_state(VADriverContextP ctx)
4138 {
4139     struct i965_driver_data *i965 = i965_driver_data(ctx);
4140     struct intel_batchbuffer *batch = i965->batch;
4141
4142     BEGIN_BATCH(batch, 3);
4143     OUT_BATCH(batch, GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2));
4144     OUT_BATCH(batch, 0);
4145     OUT_BATCH(batch, 0);
4146     ADVANCE_BATCH(batch);
4147 }
4148
4149 static void
4150 gen8_emit_wm_hz_op(VADriverContextP ctx)
4151 {
4152     struct i965_driver_data *i965 = i965_driver_data(ctx);
4153     struct intel_batchbuffer *batch = i965->batch;
4154
4155     BEGIN_BATCH(batch, 5);
4156     OUT_BATCH(batch, GEN8_3DSTATE_WM_HZ_OP | (5 - 2));
4157     OUT_BATCH(batch, 0);
4158     OUT_BATCH(batch, 0);
4159     OUT_BATCH(batch, 0);
4160     OUT_BATCH(batch, 0);
4161     ADVANCE_BATCH(batch);
4162 }
4163
4164 static void
4165 gen8_emit_viewport_state_pointers(VADriverContextP ctx)
4166 {
4167     struct i965_driver_data *i965 = i965_driver_data(ctx);
4168     struct intel_batchbuffer *batch = i965->batch;
4169     struct i965_render_state *render_state = &i965->render_state;
4170
4171     BEGIN_BATCH(batch, 2);
4172     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
4173     OUT_BATCH(batch, render_state->cc_viewport_offset);
4174     ADVANCE_BATCH(batch);
4175
4176     BEGIN_BATCH(batch, 2);
4177     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
4178     OUT_BATCH(batch, 0);
4179     ADVANCE_BATCH(batch);
4180 }
4181
4182 static void
4183 gen8_emit_sampler_state_pointers(VADriverContextP ctx)
4184 {
4185     struct i965_driver_data *i965 = i965_driver_data(ctx);
4186     struct intel_batchbuffer *batch = i965->batch;
4187     struct i965_render_state *render_state = &i965->render_state;
4188
4189     BEGIN_BATCH(batch, 2);
4190     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
4191     OUT_BATCH(batch, render_state->sampler_offset);
4192     ADVANCE_BATCH(batch);
4193 }
4194
4195
4196 static void
4197 gen8_render_emit_states(VADriverContextP ctx, int kernel)
4198 {
4199     struct i965_driver_data *i965 = i965_driver_data(ctx);
4200     struct intel_batchbuffer *batch = i965->batch;
4201
4202     intel_batchbuffer_start_atomic(batch, 0x1000);
4203     intel_batchbuffer_emit_mi_flush(batch);
4204     gen8_emit_invarient_states(ctx);
4205     gen8_emit_state_base_address(ctx);
4206     gen8_emit_viewport_state_pointers(ctx);
4207     gen8_emit_urb(ctx);
4208     gen8_emit_cc_state_pointers(ctx);
4209     gen8_emit_sampler_state_pointers(ctx);
4210     gen8_emit_wm_hz_op(ctx);
4211     gen8_emit_bypass_state(ctx);
4212     gen8_emit_vs_state(ctx);
4213     gen8_emit_clip_state(ctx);
4214     gen8_emit_sf_state(ctx);
4215     gen8_emit_depth_stencil_state(ctx);
4216     gen8_emit_wm_state(ctx, kernel);
4217     gen8_emit_depth_buffer_state(ctx);
4218     gen7_emit_drawing_rectangle(ctx);
4219     gen8_emit_vertex_element_state(ctx);
4220     gen8_emit_vertices(ctx);
4221     intel_batchbuffer_end_atomic(batch);
4222 }
4223
4224 static void
4225 gen7_render_put_surface(
4226     VADriverContextP   ctx,
4227     struct object_surface *obj_surface,    
4228     const VARectangle *src_rect,
4229     const VARectangle *dst_rect,
4230     unsigned int       flags
4231 )
4232 {
4233     struct i965_driver_data *i965 = i965_driver_data(ctx);
4234     struct intel_batchbuffer *batch = i965->batch;
4235
4236     gen7_render_initialize(ctx);
4237     gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
4238     i965_clear_dest_region(ctx);
4239     gen7_render_emit_states(ctx, PS_KERNEL);
4240     intel_batchbuffer_flush(batch);
4241 }
4242
4243 static void
4244 gen8_render_put_surface(
4245     VADriverContextP   ctx,
4246     struct object_surface *obj_surface,    
4247     const VARectangle *src_rect,
4248     const VARectangle *dst_rect,
4249     unsigned int       flags
4250 )
4251 {
4252     struct i965_driver_data *i965 = i965_driver_data(ctx);
4253     struct intel_batchbuffer *batch = i965->batch;
4254
4255     gen8_render_initialize(ctx);
4256     gen8_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
4257     gen8_clear_dest_region(ctx);
4258     gen8_render_emit_states(ctx, PS_KERNEL);
4259     intel_batchbuffer_flush(batch);
4260 }
4261
4262 static void
4263 gen7_subpicture_render_blend_state(VADriverContextP ctx)
4264 {
4265     struct i965_driver_data *i965 = i965_driver_data(ctx);
4266     struct i965_render_state *render_state = &i965->render_state;
4267     struct gen6_blend_state *blend_state;
4268
4269     dri_bo_unmap(render_state->cc.state);    
4270     dri_bo_map(render_state->cc.blend, 1);
4271     assert(render_state->cc.blend->virtual);
4272     blend_state = render_state->cc.blend->virtual;
4273     memset(blend_state, 0, sizeof(*blend_state));
4274     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
4275     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
4276     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
4277     blend_state->blend0.blend_enable = 1;
4278     blend_state->blend1.post_blend_clamp_enable = 1;
4279     blend_state->blend1.pre_blend_clamp_enable = 1;
4280     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
4281     dri_bo_unmap(render_state->cc.blend);
4282 }
4283
4284 static void
4285 gen8_subpicture_render_blend_state(VADriverContextP ctx)
4286 {
4287     struct i965_driver_data *i965 = i965_driver_data(ctx);
4288     struct i965_render_state *render_state = &i965->render_state;
4289     struct gen8_global_blend_state *global_blend_state;
4290     struct gen8_blend_state_rt *blend_state;
4291     unsigned char *cc_ptr;
4292     
4293     dri_bo_map(render_state->dynamic_state.bo, 1);
4294     assert(render_state->dynamic_state.bo->virtual);
4295
4296     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
4297                         render_state->blend_state_offset;
4298
4299     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
4300
4301     memset(global_blend_state, 0, render_state->blend_state_size);
4302     /* Global blend state + blend_state for Render Target */
4303     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
4304     blend_state->blend0.color_blend_func = I965_BLENDFUNCTION_ADD;
4305     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
4306     blend_state->blend0.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
4307     blend_state->blend0.alpha_blend_func = I965_BLENDFUNCTION_ADD;
4308     blend_state->blend0.ia_dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
4309     blend_state->blend0.ia_src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
4310     blend_state->blend0.colorbuf_blend = 1;
4311     blend_state->blend1.post_blend_clamp_enable = 1;
4312     blend_state->blend1.pre_blend_clamp_enable = 1;
4313     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
4314
4315     dri_bo_unmap(render_state->dynamic_state.bo);
4316 }
4317
4318 static void
4319 gen7_subpicture_render_setup_states(
4320     VADriverContextP   ctx,
4321     struct object_surface *obj_surface,
4322     const VARectangle *src_rect,
4323     const VARectangle *dst_rect
4324 )
4325 {
4326     i965_render_dest_surface_state(ctx, 0);
4327     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
4328     i965_render_sampler(ctx);
4329     i965_render_cc_viewport(ctx);
4330     gen7_render_color_calc_state(ctx);
4331     gen7_subpicture_render_blend_state(ctx);
4332     gen7_render_depth_stencil_state(ctx);
4333     i965_subpic_render_upload_constants(ctx, obj_surface);
4334     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
4335 }
4336
4337 static void
4338 gen8_subpic_render_upload_constants(VADriverContextP ctx,
4339                                     struct object_surface *obj_surface)
4340 {
4341     struct i965_driver_data *i965 = i965_driver_data(ctx);
4342     struct i965_render_state *render_state = &i965->render_state;
4343     float *constant_buffer;
4344     float global_alpha = 1.0;
4345     unsigned int index = obj_surface->subpic_render_idx;
4346     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
4347     unsigned char *cc_ptr;
4348
4349     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
4350         global_alpha = obj_subpic->global_alpha;
4351     }
4352
4353
4354     dri_bo_map(render_state->dynamic_state.bo, 1);
4355     assert(render_state->dynamic_state.bo->virtual);
4356
4357     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
4358                                 render_state->curbe_offset;
4359
4360     constant_buffer = (float *) cc_ptr;
4361     *constant_buffer = global_alpha;
4362
4363     dri_bo_unmap(render_state->dynamic_state.bo);
4364 }
4365
4366 static void
4367 gen8_subpicture_render_setup_states(
4368     VADriverContextP   ctx,
4369     struct object_surface *obj_surface,
4370     const VARectangle *src_rect,
4371     const VARectangle *dst_rect
4372 )
4373 {
4374     i965_render_dest_surface_state(ctx, 0);
4375     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
4376     gen8_render_sampler(ctx);
4377     gen8_render_cc_viewport(ctx);
4378     gen8_render_color_calc_state(ctx);
4379     gen8_subpicture_render_blend_state(ctx);
4380     gen8_subpic_render_upload_constants(ctx, obj_surface);
4381     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
4382 }
4383
4384 static void
4385 gen7_render_put_subpicture(
4386     VADriverContextP   ctx,
4387     struct object_surface *obj_surface,
4388     const VARectangle *src_rect,
4389     const VARectangle *dst_rect
4390 )
4391 {
4392     struct i965_driver_data *i965 = i965_driver_data(ctx);
4393     struct intel_batchbuffer *batch = i965->batch;
4394     unsigned int index = obj_surface->subpic_render_idx;
4395     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
4396
4397     assert(obj_subpic);
4398     gen7_render_initialize(ctx);
4399     gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
4400     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
4401     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
4402     intel_batchbuffer_flush(batch);
4403 }
4404
4405 static void
4406 gen8_render_put_subpicture(
4407     VADriverContextP   ctx,
4408     struct object_surface *obj_surface,
4409     const VARectangle *src_rect,
4410     const VARectangle *dst_rect
4411 )
4412 {
4413     struct i965_driver_data *i965 = i965_driver_data(ctx);
4414     struct intel_batchbuffer *batch = i965->batch;
4415     unsigned int index = obj_surface->subpic_render_idx;
4416     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
4417
4418     assert(obj_subpic);
4419     gen8_render_initialize(ctx);
4420     gen8_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
4421     gen8_render_emit_states(ctx, PS_SUBPIC_KERNEL);
4422     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
4423     intel_batchbuffer_flush(batch);
4424 }
4425
4426 /*
4427  * global functions
4428  */
4429 VAStatus 
4430 i965_DestroySurfaces(VADriverContextP ctx,
4431                      VASurfaceID *surface_list,
4432                      int num_surfaces);
4433 void
4434 intel_render_put_surface(
4435     VADriverContextP   ctx,
4436     struct object_surface *obj_surface,
4437     const VARectangle *src_rect,
4438     const VARectangle *dst_rect,
4439     unsigned int       flags
4440 )
4441 {
4442     struct i965_driver_data *i965 = i965_driver_data(ctx);
4443     int has_done_scaling = 0;
4444     VASurfaceID out_surface_id = i965_post_processing(ctx,
4445                                                       obj_surface,
4446                                                       src_rect,
4447                                                       dst_rect,
4448                                                       flags,
4449                                                       &has_done_scaling);
4450
4451     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
4452
4453     if (out_surface_id != VA_INVALID_ID) {
4454         struct object_surface *new_obj_surface = SURFACE(out_surface_id);
4455         
4456         if (new_obj_surface && new_obj_surface->bo)
4457             obj_surface = new_obj_surface;
4458
4459         if (has_done_scaling)
4460             src_rect = dst_rect;
4461     }
4462
4463     if (IS_GEN8(i965->intel.device_id))
4464         gen8_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
4465     else if (IS_GEN7(i965->intel.device_id))
4466         gen7_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
4467     else if (IS_GEN6(i965->intel.device_id))
4468         gen6_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
4469     else
4470         i965_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
4471
4472     if (out_surface_id != VA_INVALID_ID)
4473         i965_DestroySurfaces(ctx, &out_surface_id, 1);
4474 }
4475
4476 void
4477 intel_render_put_subpicture(
4478     VADriverContextP   ctx,
4479     struct object_surface *obj_surface,
4480     const VARectangle *src_rect,
4481     const VARectangle *dst_rect
4482 )
4483 {
4484     struct i965_driver_data *i965 = i965_driver_data(ctx);
4485
4486     if (IS_GEN8(i965->intel.device_id))
4487         gen8_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
4488     else if (IS_GEN7(i965->intel.device_id))
4489         gen7_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
4490     else if (IS_GEN6(i965->intel.device_id))
4491         gen6_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
4492     else
4493         i965_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
4494 }
4495
4496 static bool 
4497 gen8_render_init(VADriverContextP ctx)
4498 {
4499     struct i965_driver_data *i965 = i965_driver_data(ctx);
4500     struct i965_render_state *render_state = &i965->render_state;
4501     int i, kernel_size;
4502     unsigned int kernel_offset, end_offset;
4503     unsigned char *kernel_ptr;
4504     struct i965_kernel *kernel;
4505
4506
4507     if (IS_GEN8(i965->intel.device_id)) {
4508         memcpy(render_state->render_kernels, render_kernels_gen8,
4509                         sizeof(render_state->render_kernels));
4510     }
4511
4512     kernel_size = 4096;
4513
4514     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
4515         kernel = &render_state->render_kernels[i];
4516
4517         if (!kernel->size)
4518             continue;
4519
4520         kernel_size += kernel->size;
4521     }
4522
4523     render_state->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
4524                                   "kernel shader",
4525                                   kernel_size,
4526                                   0x1000);
4527     if (render_state->instruction_state.bo == NULL) {
4528         WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
4529         return false;
4530     }
4531
4532     assert(render_state->instruction_state.bo);
4533
4534     render_state->instruction_state.bo_size = kernel_size;
4535     render_state->instruction_state.end_offset = 0;
4536     end_offset = 0;
4537
4538     dri_bo_map(render_state->instruction_state.bo, 1);
4539     kernel_ptr = (unsigned char *)(render_state->instruction_state.bo->virtual);
4540     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
4541         kernel = &render_state->render_kernels[i];
4542         kernel_offset = end_offset;
4543         kernel->kernel_offset = kernel_offset;
4544
4545         if (!kernel->size)
4546             continue;
4547
4548         memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
4549
4550         end_offset += ALIGN(kernel->size, ALIGNMENT);
4551     }
4552
4553     render_state->instruction_state.end_offset = end_offset;
4554
4555     dri_bo_unmap(render_state->instruction_state.bo);
4556
4557
4558     if (IS_GEN8(i965->intel.device_id)) {
4559         render_state->max_wm_threads = 64;
4560     } else {
4561         /* should never get here !!! */
4562         assert(0);
4563     }
4564
4565     return true;
4566 }
4567
4568
4569 bool 
4570 i965_render_init(VADriverContextP ctx)
4571 {
4572     struct i965_driver_data *i965 = i965_driver_data(ctx);
4573     struct i965_render_state *render_state = &i965->render_state;
4574     int i;
4575
4576     /* kernel */
4577     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
4578                                  sizeof(render_kernels_gen5[0])));
4579     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
4580                                  sizeof(render_kernels_gen6[0])));
4581
4582     if (IS_GEN8(i965->intel.device_id)) {
4583         return gen8_render_init(ctx);
4584     } else  if (IS_GEN7(i965->intel.device_id)) 
4585         memcpy(render_state->render_kernels,
4586                (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7),
4587                sizeof(render_state->render_kernels));
4588     else if (IS_GEN6(i965->intel.device_id))
4589         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
4590     else if (IS_IRONLAKE(i965->intel.device_id))
4591         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
4592     else
4593         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
4594
4595     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
4596         struct i965_kernel *kernel = &render_state->render_kernels[i];
4597
4598         if (!kernel->size)
4599             continue;
4600
4601         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
4602                                   kernel->name, 
4603                                   kernel->size, 0x1000);
4604         assert(kernel->bo);
4605         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
4606     }
4607
4608     /* constant buffer */
4609     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
4610                       "constant buffer",
4611                       4096, 64);
4612     assert(render_state->curbe.bo);
4613
4614     if (IS_HSW_GT1(i965->intel.device_id)) {
4615         render_state->max_wm_threads = 102;
4616     } else if (IS_HSW_GT2(i965->intel.device_id)) {
4617         render_state->max_wm_threads = 204;
4618     } else if (IS_HSW_GT3(i965->intel.device_id)) {
4619         render_state->max_wm_threads = 408;
4620     } else if (IS_IVB_GT1(i965->intel.device_id) || IS_BAYTRAIL(i965->intel.device_id)) {
4621         render_state->max_wm_threads = 48;
4622     } else if (IS_IVB_GT2(i965->intel.device_id)) {
4623         render_state->max_wm_threads = 172;
4624     } else if (IS_SNB_GT1(i965->intel.device_id)) {
4625         render_state->max_wm_threads = 40;
4626     } else if (IS_SNB_GT2(i965->intel.device_id)) {
4627         render_state->max_wm_threads = 80;
4628     } else if (IS_IRONLAKE(i965->intel.device_id)) {
4629         render_state->max_wm_threads = 72; /* 12 * 6 */
4630     } else if (IS_G4X(i965->intel.device_id)) {
4631         render_state->max_wm_threads = 50; /* 12 * 5 */
4632     } else {
4633         /* should never get here !!! */
4634         assert(0);
4635     }
4636
4637     return true;
4638 }
4639
4640 static void 
4641 gen8_render_terminate(VADriverContextP ctx)
4642 {
4643     int i;
4644     struct i965_driver_data *i965 = i965_driver_data(ctx);
4645     struct i965_render_state *render_state = &i965->render_state;
4646
4647     dri_bo_unreference(render_state->vb.vertex_buffer);
4648     render_state->vb.vertex_buffer = NULL;
4649
4650     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
4651     render_state->wm.surface_state_binding_table_bo = NULL;
4652    
4653     if (render_state->instruction_state.bo) {
4654         dri_bo_unreference(render_state->instruction_state.bo);
4655         render_state->instruction_state.bo = NULL;
4656     }
4657
4658     if (render_state->dynamic_state.bo) {
4659         dri_bo_unreference(render_state->dynamic_state.bo);
4660         render_state->dynamic_state.bo = NULL;
4661     }
4662
4663     if (render_state->indirect_state.bo) {
4664         dri_bo_unreference(render_state->indirect_state.bo);
4665         render_state->indirect_state.bo = NULL;
4666     }
4667
4668     if (render_state->draw_region) {
4669         dri_bo_unreference(render_state->draw_region->bo);
4670         free(render_state->draw_region);
4671         render_state->draw_region = NULL;
4672     }
4673 }
4674
4675 void 
4676 i965_render_terminate(VADriverContextP ctx)
4677 {
4678     int i;
4679     struct i965_driver_data *i965 = i965_driver_data(ctx);
4680     struct i965_render_state *render_state = &i965->render_state;
4681
4682     if (IS_GEN8(i965->intel.device_id)) {
4683         gen8_render_terminate(ctx);
4684         return;
4685     } 
4686
4687     dri_bo_unreference(render_state->curbe.bo);
4688     render_state->curbe.bo = NULL;
4689
4690     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
4691         struct i965_kernel *kernel = &render_state->render_kernels[i];
4692         
4693         dri_bo_unreference(kernel->bo);
4694         kernel->bo = NULL;
4695     }
4696
4697     dri_bo_unreference(render_state->vb.vertex_buffer);
4698     render_state->vb.vertex_buffer = NULL;
4699     dri_bo_unreference(render_state->vs.state);
4700     render_state->vs.state = NULL;
4701     dri_bo_unreference(render_state->sf.state);
4702     render_state->sf.state = NULL;
4703     dri_bo_unreference(render_state->wm.sampler);
4704     render_state->wm.sampler = NULL;
4705     dri_bo_unreference(render_state->wm.state);
4706     render_state->wm.state = NULL;
4707     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
4708     dri_bo_unreference(render_state->cc.viewport);
4709     render_state->cc.viewport = NULL;
4710     dri_bo_unreference(render_state->cc.state);
4711     render_state->cc.state = NULL;
4712     dri_bo_unreference(render_state->cc.blend);
4713     render_state->cc.blend = NULL;
4714     dri_bo_unreference(render_state->cc.depth_stencil);
4715     render_state->cc.depth_stencil = NULL;
4716
4717     if (render_state->draw_region) {
4718         dri_bo_unreference(render_state->draw_region->bo);
4719         free(render_state->draw_region);
4720         render_state->draw_region = NULL;
4721     }
4722 }
4723