BDW doesn't support H.264 Baseline profile
[platform/upstream/libva-intel-driver.git] / src / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <math.h>
39
40 #include <va/va_drmcommon.h>
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44 #include "i965_defines.h"
45 #include "i965_drv_video.h"
46 #include "i965_structs.h"
47
48 #include "i965_render.h"
49
50 #define SF_KERNEL_NUM_GRF       16
51 #define SF_MAX_THREADS          1
52
53 static const uint32_t sf_kernel_static[][4] = 
54 {
55 #include "shaders/render/exa_sf.g4b"
56 };
57
58 #define PS_KERNEL_NUM_GRF       48
59 #define PS_MAX_THREADS          32
60
61 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
62
63 static const uint32_t ps_kernel_static[][4] = 
64 {
65 #include "shaders/render/exa_wm_xy.g4b"
66 #include "shaders/render/exa_wm_src_affine.g4b"
67 #include "shaders/render/exa_wm_src_sample_planar.g4b"
68 #include "shaders/render/exa_wm_yuv_color_balance.g4b"
69 #include "shaders/render/exa_wm_yuv_rgb.g4b"
70 #include "shaders/render/exa_wm_write.g4b"
71 };
72 static const uint32_t ps_subpic_kernel_static[][4] = 
73 {
74 #include "shaders/render/exa_wm_xy.g4b"
75 #include "shaders/render/exa_wm_src_affine.g4b"
76 #include "shaders/render/exa_wm_src_sample_argb.g4b"
77 #include "shaders/render/exa_wm_write.g4b"
78 };
79
80 /* On IRONLAKE */
81 static const uint32_t sf_kernel_static_gen5[][4] = 
82 {
83 #include "shaders/render/exa_sf.g4b.gen5"
84 };
85
86 static const uint32_t ps_kernel_static_gen5[][4] = 
87 {
88 #include "shaders/render/exa_wm_xy.g4b.gen5"
89 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
90 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
91 #include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5"
92 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
93 #include "shaders/render/exa_wm_write.g4b.gen5"
94 };
95 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
96 {
97 #include "shaders/render/exa_wm_xy.g4b.gen5"
98 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
99 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
100 #include "shaders/render/exa_wm_write.g4b.gen5"
101 };
102
103 /* programs for Sandybridge */
104 static const uint32_t sf_kernel_static_gen6[][4] = 
105 {
106 };
107
108 static const uint32_t ps_kernel_static_gen6[][4] = {
109 #include "shaders/render/exa_wm_src_affine.g6b"
110 #include "shaders/render/exa_wm_src_sample_planar.g6b"
111 #include "shaders/render/exa_wm_yuv_color_balance.g6b"
112 #include "shaders/render/exa_wm_yuv_rgb.g6b"
113 #include "shaders/render/exa_wm_write.g6b"
114 };
115
116 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
117 #include "shaders/render/exa_wm_src_affine.g6b"
118 #include "shaders/render/exa_wm_src_sample_argb.g6b"
119 #include "shaders/render/exa_wm_write.g6b"
120 };
121
122 /* programs for Ivybridge */
123 static const uint32_t sf_kernel_static_gen7[][4] = 
124 {
125 };
126
127 static const uint32_t ps_kernel_static_gen7[][4] = {
128 #include "shaders/render/exa_wm_src_affine.g7b"
129 #include "shaders/render/exa_wm_src_sample_planar.g7b"
130 #include "shaders/render/exa_wm_yuv_color_balance.g7b"
131 #include "shaders/render/exa_wm_yuv_rgb.g7b"
132 #include "shaders/render/exa_wm_write.g7b"
133 };
134
135 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
136 #include "shaders/render/exa_wm_src_affine.g7b"
137 #include "shaders/render/exa_wm_src_sample_argb.g7b"
138 #include "shaders/render/exa_wm_write.g7b"
139 };
140
141 /* Programs for Haswell */
142 static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
143 #include "shaders/render/exa_wm_src_affine.g7b"
144 #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
145 #include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell"
146 #include "shaders/render/exa_wm_yuv_rgb.g7b"
147 #include "shaders/render/exa_wm_write.g7b"
148 };
149
150 /*TODO: Modify the shader for GEN8.
151  * Now it only uses the shader for gen7/haswell
152  */
153 /* Programs for Gen8 */
154 static const uint32_t sf_kernel_static_gen8[][4] = 
155 {
156 };
157 static const uint32_t ps_kernel_static_gen8[][4] = {
158 #include "shaders/render/exa_wm_src_affine.g8b"
159 #include "shaders/render/exa_wm_src_sample_planar.g8b"
160 #include "shaders/render/exa_wm_yuv_rgb.g8b"
161 #include "shaders/render/exa_wm_write.g8b"
162 };
163
164 static const uint32_t ps_subpic_kernel_static_gen8[][4] = {
165 #include "shaders/render/exa_wm_src_affine.g8b"
166 #include "shaders/render/exa_wm_src_sample_argb.g8b"
167 #include "shaders/render/exa_wm_write.g8b"
168 };
169
170
171 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_GEN8, \
172                                 MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7))
173
174 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
175 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
176
177 static uint32_t float_to_uint (float f) 
178 {
179     union {
180         uint32_t i; 
181         float f;
182     } x;
183
184     x.f = f;
185     return x.i;
186 }
187
188 enum 
189 {
190     SF_KERNEL = 0,
191     PS_KERNEL,
192     PS_SUBPIC_KERNEL
193 };
194
195 static struct i965_kernel render_kernels_gen4[] = {
196     {
197         "SF",
198         SF_KERNEL,
199         sf_kernel_static,
200         sizeof(sf_kernel_static),
201         NULL
202     },
203     {
204         "PS",
205         PS_KERNEL,
206         ps_kernel_static,
207         sizeof(ps_kernel_static),
208         NULL
209     },
210
211     {
212         "PS_SUBPIC",
213         PS_SUBPIC_KERNEL,
214         ps_subpic_kernel_static,
215         sizeof(ps_subpic_kernel_static),
216         NULL
217     }
218 };
219
220 static struct i965_kernel render_kernels_gen5[] = {
221     {
222         "SF",
223         SF_KERNEL,
224         sf_kernel_static_gen5,
225         sizeof(sf_kernel_static_gen5),
226         NULL
227     },
228     {
229         "PS",
230         PS_KERNEL,
231         ps_kernel_static_gen5,
232         sizeof(ps_kernel_static_gen5),
233         NULL
234     },
235
236     {
237         "PS_SUBPIC",
238         PS_SUBPIC_KERNEL,
239         ps_subpic_kernel_static_gen5,
240         sizeof(ps_subpic_kernel_static_gen5),
241         NULL
242     }
243 };
244
245 static struct i965_kernel render_kernels_gen6[] = {
246     {
247         "SF",
248         SF_KERNEL,
249         sf_kernel_static_gen6,
250         sizeof(sf_kernel_static_gen6),
251         NULL
252     },
253     {
254         "PS",
255         PS_KERNEL,
256         ps_kernel_static_gen6,
257         sizeof(ps_kernel_static_gen6),
258         NULL
259     },
260
261     {
262         "PS_SUBPIC",
263         PS_SUBPIC_KERNEL,
264         ps_subpic_kernel_static_gen6,
265         sizeof(ps_subpic_kernel_static_gen6),
266         NULL
267     }
268 };
269
270 static struct i965_kernel render_kernels_gen7[] = {
271     {
272         "SF",
273         SF_KERNEL,
274         sf_kernel_static_gen7,
275         sizeof(sf_kernel_static_gen7),
276         NULL
277     },
278     {
279         "PS",
280         PS_KERNEL,
281         ps_kernel_static_gen7,
282         sizeof(ps_kernel_static_gen7),
283         NULL
284     },
285
286     {
287         "PS_SUBPIC",
288         PS_SUBPIC_KERNEL,
289         ps_subpic_kernel_static_gen7,
290         sizeof(ps_subpic_kernel_static_gen7),
291         NULL
292     }
293 };
294
295 static struct i965_kernel render_kernels_gen7_haswell[] = {
296     {
297         "SF",
298         SF_KERNEL,
299         sf_kernel_static_gen7,
300         sizeof(sf_kernel_static_gen7),
301         NULL
302     },
303     {
304         "PS",
305         PS_KERNEL,
306         ps_kernel_static_gen7_haswell,
307         sizeof(ps_kernel_static_gen7_haswell),
308         NULL
309     },
310
311     {
312         "PS_SUBPIC",
313         PS_SUBPIC_KERNEL,
314         ps_subpic_kernel_static_gen7,
315         sizeof(ps_subpic_kernel_static_gen7),
316         NULL
317     }
318 };
319
320 static struct i965_kernel render_kernels_gen8[] = {
321     {
322         "SF",
323         SF_KERNEL,
324         sf_kernel_static_gen8,
325         sizeof(sf_kernel_static_gen8),
326         NULL
327     },
328     {
329         "PS",
330         PS_KERNEL,
331         ps_kernel_static_gen8,
332         sizeof(ps_kernel_static_gen8),
333         NULL
334     },
335
336     {
337         "PS_SUBPIC",
338         PS_SUBPIC_KERNEL,
339         ps_subpic_kernel_static_gen8,
340         sizeof(ps_subpic_kernel_static_gen8),
341         NULL
342     }
343 };
344
345 #define URB_VS_ENTRIES        8
346 #define URB_VS_ENTRY_SIZE     1
347
348 #define URB_GS_ENTRIES        0
349 #define URB_GS_ENTRY_SIZE     0
350
351 #define URB_CLIP_ENTRIES      0
352 #define URB_CLIP_ENTRY_SIZE   0
353
354 #define URB_SF_ENTRIES        1
355 #define URB_SF_ENTRY_SIZE     2
356
357 #define URB_CS_ENTRIES        4
358 #define URB_CS_ENTRY_SIZE     4
359
360 static float yuv_to_rgb_bt601[3][4] = {
361 {1.164,         0,      1.596,          -0.06275,},
362 {1.164,         -0.392, -0.813,         -0.50196,},
363 {1.164,         2.017,  0,              -0.50196,},
364 };
365
366 static float yuv_to_rgb_bt709[3][4] = {
367 {1.164,         0,      1.793,          -0.06275,},
368 {1.164,         -0.213, -0.533,         -0.50196,},
369 {1.164,         2.112,  0,              -0.50196,},
370 };
371
372 static float yuv_to_rgb_smpte_240[3][4] = {
373 {1.164,         0,      1.794,          -0.06275,},
374 {1.164,         -0.258, -0.5425,        -0.50196,},
375 {1.164,         2.078,  0,              -0.50196,},
376 };
377
378 static void
379 i965_render_vs_unit(VADriverContextP ctx)
380 {
381     struct i965_driver_data *i965 = i965_driver_data(ctx);
382     struct i965_render_state *render_state = &i965->render_state;
383     struct i965_vs_unit_state *vs_state;
384
385     dri_bo_map(render_state->vs.state, 1);
386     assert(render_state->vs.state->virtual);
387     vs_state = render_state->vs.state->virtual;
388     memset(vs_state, 0, sizeof(*vs_state));
389
390     if (IS_IRONLAKE(i965->intel.device_id))
391         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
392     else
393         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
394
395     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
396     vs_state->vs6.vs_enable = 0;
397     vs_state->vs6.vert_cache_disable = 1;
398     
399     dri_bo_unmap(render_state->vs.state);
400 }
401
402 static void
403 i965_render_sf_unit(VADriverContextP ctx)
404 {
405     struct i965_driver_data *i965 = i965_driver_data(ctx);
406     struct i965_render_state *render_state = &i965->render_state;
407     struct i965_sf_unit_state *sf_state;
408
409     dri_bo_map(render_state->sf.state, 1);
410     assert(render_state->sf.state->virtual);
411     sf_state = render_state->sf.state->virtual;
412     memset(sf_state, 0, sizeof(*sf_state));
413
414     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
415     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
416
417     sf_state->sf1.single_program_flow = 1; /* XXX */
418     sf_state->sf1.binding_table_entry_count = 0;
419     sf_state->sf1.thread_priority = 0;
420     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
421     sf_state->sf1.illegal_op_exception_enable = 1;
422     sf_state->sf1.mask_stack_exception_enable = 1;
423     sf_state->sf1.sw_exception_enable = 1;
424
425     /* scratch space is not used in our kernel */
426     sf_state->thread2.per_thread_scratch_space = 0;
427     sf_state->thread2.scratch_space_base_pointer = 0;
428
429     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
430     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
431     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
432     sf_state->thread3.urb_entry_read_offset = 0;
433     sf_state->thread3.dispatch_grf_start_reg = 3;
434
435     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
436     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
437     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
438     sf_state->thread4.stats_enable = 1;
439
440     sf_state->sf5.viewport_transform = 0; /* skip viewport */
441
442     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
443     sf_state->sf6.scissor = 0;
444
445     sf_state->sf7.trifan_pv = 2;
446
447     sf_state->sf6.dest_org_vbias = 0x8;
448     sf_state->sf6.dest_org_hbias = 0x8;
449
450     dri_bo_emit_reloc(render_state->sf.state,
451                       I915_GEM_DOMAIN_INSTRUCTION, 0,
452                       sf_state->thread0.grf_reg_count << 1,
453                       offsetof(struct i965_sf_unit_state, thread0),
454                       render_state->render_kernels[SF_KERNEL].bo);
455
456     dri_bo_unmap(render_state->sf.state);
457 }
458
459 static void 
460 i965_render_sampler(VADriverContextP ctx)
461 {
462     struct i965_driver_data *i965 = i965_driver_data(ctx);
463     struct i965_render_state *render_state = &i965->render_state;
464     struct i965_sampler_state *sampler_state;
465     int i;
466     
467     assert(render_state->wm.sampler_count > 0);
468     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
469
470     dri_bo_map(render_state->wm.sampler, 1);
471     assert(render_state->wm.sampler->virtual);
472     sampler_state = render_state->wm.sampler->virtual;
473     for (i = 0; i < render_state->wm.sampler_count; i++) {
474         memset(sampler_state, 0, sizeof(*sampler_state));
475         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
476         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
477         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
478         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
479         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
480         sampler_state++;
481     }
482
483     dri_bo_unmap(render_state->wm.sampler);
484 }
485 static void
486 i965_subpic_render_wm_unit(VADriverContextP ctx)
487 {
488     struct i965_driver_data *i965 = i965_driver_data(ctx);
489     struct i965_render_state *render_state = &i965->render_state;
490     struct i965_wm_unit_state *wm_state;
491
492     assert(render_state->wm.sampler);
493
494     dri_bo_map(render_state->wm.state, 1);
495     assert(render_state->wm.state->virtual);
496     wm_state = render_state->wm.state->virtual;
497     memset(wm_state, 0, sizeof(*wm_state));
498
499     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
500     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
501
502     wm_state->thread1.single_program_flow = 1; /* XXX */
503
504     if (IS_IRONLAKE(i965->intel.device_id))
505         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
506     else
507         wm_state->thread1.binding_table_entry_count = 7;
508
509     wm_state->thread2.scratch_space_base_pointer = 0;
510     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
511
512     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
513     wm_state->thread3.const_urb_entry_read_length = 4;
514     wm_state->thread3.const_urb_entry_read_offset = 0;
515     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
516     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
517
518     wm_state->wm4.stats_enable = 0;
519     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
520
521     if (IS_IRONLAKE(i965->intel.device_id)) {
522         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
523     } else {
524         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
525     }
526
527     wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
528     wm_state->wm5.thread_dispatch_enable = 1;
529     wm_state->wm5.enable_16_pix = 1;
530     wm_state->wm5.enable_8_pix = 0;
531     wm_state->wm5.early_depth_test = 1;
532
533     dri_bo_emit_reloc(render_state->wm.state,
534                       I915_GEM_DOMAIN_INSTRUCTION, 0,
535                       wm_state->thread0.grf_reg_count << 1,
536                       offsetof(struct i965_wm_unit_state, thread0),
537                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
538
539     dri_bo_emit_reloc(render_state->wm.state,
540                       I915_GEM_DOMAIN_INSTRUCTION, 0,
541                       wm_state->wm4.sampler_count << 2,
542                       offsetof(struct i965_wm_unit_state, wm4),
543                       render_state->wm.sampler);
544
545     dri_bo_unmap(render_state->wm.state);
546 }
547
548
549 static void
550 i965_render_wm_unit(VADriverContextP ctx)
551 {
552     struct i965_driver_data *i965 = i965_driver_data(ctx);
553     struct i965_render_state *render_state = &i965->render_state;
554     struct i965_wm_unit_state *wm_state;
555
556     assert(render_state->wm.sampler);
557
558     dri_bo_map(render_state->wm.state, 1);
559     assert(render_state->wm.state->virtual);
560     wm_state = render_state->wm.state->virtual;
561     memset(wm_state, 0, sizeof(*wm_state));
562
563     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
564     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
565
566     wm_state->thread1.single_program_flow = 1; /* XXX */
567
568     if (IS_IRONLAKE(i965->intel.device_id))
569         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
570     else
571         wm_state->thread1.binding_table_entry_count = 7;
572
573     wm_state->thread2.scratch_space_base_pointer = 0;
574     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
575
576     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
577     wm_state->thread3.const_urb_entry_read_length = 4;
578     wm_state->thread3.const_urb_entry_read_offset = 0;
579     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
580     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
581
582     wm_state->wm4.stats_enable = 0;
583     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
584
585     if (IS_IRONLAKE(i965->intel.device_id)) {
586         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
587     } else {
588         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
589     }
590
591     wm_state->wm5.max_threads = render_state->max_wm_threads - 1;
592     wm_state->wm5.thread_dispatch_enable = 1;
593     wm_state->wm5.enable_16_pix = 1;
594     wm_state->wm5.enable_8_pix = 0;
595     wm_state->wm5.early_depth_test = 1;
596
597     dri_bo_emit_reloc(render_state->wm.state,
598                       I915_GEM_DOMAIN_INSTRUCTION, 0,
599                       wm_state->thread0.grf_reg_count << 1,
600                       offsetof(struct i965_wm_unit_state, thread0),
601                       render_state->render_kernels[PS_KERNEL].bo);
602
603     dri_bo_emit_reloc(render_state->wm.state,
604                       I915_GEM_DOMAIN_INSTRUCTION, 0,
605                       wm_state->wm4.sampler_count << 2,
606                       offsetof(struct i965_wm_unit_state, wm4),
607                       render_state->wm.sampler);
608
609     dri_bo_unmap(render_state->wm.state);
610 }
611
612 static void 
613 i965_render_cc_viewport(VADriverContextP ctx)
614 {
615     struct i965_driver_data *i965 = i965_driver_data(ctx);
616     struct i965_render_state *render_state = &i965->render_state;
617     struct i965_cc_viewport *cc_viewport;
618
619     dri_bo_map(render_state->cc.viewport, 1);
620     assert(render_state->cc.viewport->virtual);
621     cc_viewport = render_state->cc.viewport->virtual;
622     memset(cc_viewport, 0, sizeof(*cc_viewport));
623     
624     cc_viewport->min_depth = -1.e35;
625     cc_viewport->max_depth = 1.e35;
626
627     dri_bo_unmap(render_state->cc.viewport);
628 }
629
630 static void 
631 i965_subpic_render_cc_unit(VADriverContextP ctx)
632 {
633     struct i965_driver_data *i965 = i965_driver_data(ctx);
634     struct i965_render_state *render_state = &i965->render_state;
635     struct i965_cc_unit_state *cc_state;
636
637     assert(render_state->cc.viewport);
638
639     dri_bo_map(render_state->cc.state, 1);
640     assert(render_state->cc.state->virtual);
641     cc_state = render_state->cc.state->virtual;
642     memset(cc_state, 0, sizeof(*cc_state));
643
644     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
645     cc_state->cc2.depth_test = 0;       /* disable depth test */
646     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
647     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
648     cc_state->cc3.blend_enable = 1;     /* enable color blend */
649     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
650     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
651     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
652     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
653
654     cc_state->cc5.dither_enable = 0;    /* disable dither */
655     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
656     cc_state->cc5.statistics_enable = 1;
657     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
658     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
659     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
660
661     cc_state->cc6.clamp_post_alpha_blend = 0; 
662     cc_state->cc6.clamp_pre_alpha_blend  =0; 
663     
664     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
665     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
666     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
667     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
668    
669     /*alpha test reference*/
670     cc_state->cc7.alpha_ref.f =0.0 ;
671
672
673     dri_bo_emit_reloc(render_state->cc.state,
674                       I915_GEM_DOMAIN_INSTRUCTION, 0,
675                       0,
676                       offsetof(struct i965_cc_unit_state, cc4),
677                       render_state->cc.viewport);
678
679     dri_bo_unmap(render_state->cc.state);
680 }
681
682
683 static void 
684 i965_render_cc_unit(VADriverContextP ctx)
685 {
686     struct i965_driver_data *i965 = i965_driver_data(ctx);
687     struct i965_render_state *render_state = &i965->render_state;
688     struct i965_cc_unit_state *cc_state;
689
690     assert(render_state->cc.viewport);
691
692     dri_bo_map(render_state->cc.state, 1);
693     assert(render_state->cc.state->virtual);
694     cc_state = render_state->cc.state->virtual;
695     memset(cc_state, 0, sizeof(*cc_state));
696
697     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
698     cc_state->cc2.depth_test = 0;       /* disable depth test */
699     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
700     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
701     cc_state->cc3.blend_enable = 0;     /* disable color blend */
702     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
703     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
704
705     cc_state->cc5.dither_enable = 0;    /* disable dither */
706     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
707     cc_state->cc5.statistics_enable = 1;
708     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
709     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
710     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
711
712     dri_bo_emit_reloc(render_state->cc.state,
713                       I915_GEM_DOMAIN_INSTRUCTION, 0,
714                       0,
715                       offsetof(struct i965_cc_unit_state, cc4),
716                       render_state->cc.viewport);
717
718     dri_bo_unmap(render_state->cc.state);
719 }
720
721 static void
722 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
723 {
724     switch (tiling) {
725     case I915_TILING_NONE:
726         ss->ss3.tiled_surface = 0;
727         ss->ss3.tile_walk = 0;
728         break;
729     case I915_TILING_X:
730         ss->ss3.tiled_surface = 1;
731         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
732         break;
733     case I915_TILING_Y:
734         ss->ss3.tiled_surface = 1;
735         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
736         break;
737     }
738 }
739
740 static void
741 i965_render_set_surface_state(
742     struct i965_surface_state *ss,
743     dri_bo                    *bo,
744     unsigned long              offset,
745     unsigned int               width,
746     unsigned int               height,
747     unsigned int               pitch,
748     unsigned int               format,
749     unsigned int               flags
750 )
751 {
752     unsigned int tiling;
753     unsigned int swizzle;
754
755     memset(ss, 0, sizeof(*ss));
756
757     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
758     case I965_PP_FLAG_BOTTOM_FIELD:
759         ss->ss0.vert_line_stride_ofs = 1;
760         /* fall-through */
761     case I965_PP_FLAG_TOP_FIELD:
762         ss->ss0.vert_line_stride = 1;
763         height /= 2;
764         break;
765     }
766
767     ss->ss0.surface_type = I965_SURFACE_2D;
768     ss->ss0.surface_format = format;
769     ss->ss0.color_blend = 1;
770
771     ss->ss1.base_addr = bo->offset + offset;
772
773     ss->ss2.width = width - 1;
774     ss->ss2.height = height - 1;
775
776     ss->ss3.pitch = pitch - 1;
777
778     dri_bo_get_tiling(bo, &tiling, &swizzle);
779     i965_render_set_surface_tiling(ss, tiling);
780 }
781
782 static void
783 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
784 {
785    switch (tiling) {
786    case I915_TILING_NONE:
787       ss->ss0.tiled_surface = 0;
788       ss->ss0.tile_walk = 0;
789       break;
790    case I915_TILING_X:
791       ss->ss0.tiled_surface = 1;
792       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
793       break;
794    case I915_TILING_Y:
795       ss->ss0.tiled_surface = 1;
796       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
797       break;
798    }
799 }
800
801 static void
802 gen8_render_set_surface_tiling(struct gen8_surface_state *ss, uint32_t tiling)
803 {
804    switch (tiling) {
805    case I915_TILING_NONE:
806       ss->ss0.tiled_surface = 0;
807       ss->ss0.tile_walk = 0;
808       break;
809    case I915_TILING_X:
810       ss->ss0.tiled_surface = 1;
811       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
812       break;
813    case I915_TILING_Y:
814       ss->ss0.tiled_surface = 1;
815       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
816       break;
817    }
818 }
819
820 /* Set "Shader Channel Select" */
821 void
822 gen7_render_set_surface_scs(struct gen7_surface_state *ss)
823 {
824     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
825     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
826     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
827     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
828 }
829
830 /* Set "Shader Channel Select" for GEN8+ */
831 void
832 gen8_render_set_surface_scs(struct gen8_surface_state *ss)
833 {
834     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
835     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
836     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
837     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
838 }
839
840 static void
841 gen7_render_set_surface_state(
842     struct gen7_surface_state *ss,
843     dri_bo                    *bo,
844     unsigned long              offset,
845     int                        width,
846     int                        height,
847     int                        pitch,
848     int                        format,
849     unsigned int               flags
850 )
851 {
852     unsigned int tiling;
853     unsigned int swizzle;
854
855     memset(ss, 0, sizeof(*ss));
856
857     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
858     case I965_PP_FLAG_BOTTOM_FIELD:
859         ss->ss0.vert_line_stride_ofs = 1;
860         /* fall-through */
861     case I965_PP_FLAG_TOP_FIELD:
862         ss->ss0.vert_line_stride = 1;
863         height /= 2;
864         break;
865     }
866
867     ss->ss0.surface_type = I965_SURFACE_2D;
868     ss->ss0.surface_format = format;
869
870     ss->ss1.base_addr = bo->offset + offset;
871
872     ss->ss2.width = width - 1;
873     ss->ss2.height = height - 1;
874
875     ss->ss3.pitch = pitch - 1;
876
877     dri_bo_get_tiling(bo, &tiling, &swizzle);
878     gen7_render_set_surface_tiling(ss, tiling);
879 }
880
881
882 static void
883 gen8_render_set_surface_state(
884     struct gen8_surface_state *ss,
885     dri_bo                    *bo,
886     unsigned long              offset,
887     int                        width,
888     int                        height,
889     int                        pitch,
890     int                        format,
891     unsigned int               flags
892 )
893 {
894     unsigned int tiling;
895     unsigned int swizzle;
896
897     memset(ss, 0, sizeof(*ss));
898
899     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
900     case I965_PP_FLAG_BOTTOM_FIELD:
901         ss->ss0.vert_line_stride_ofs = 1;
902         /* fall-through */
903     case I965_PP_FLAG_TOP_FIELD:
904         ss->ss0.vert_line_stride = 1;
905         height /= 2;
906         break;
907     }
908
909     ss->ss0.surface_type = I965_SURFACE_2D;
910     ss->ss0.surface_format = format;
911
912     ss->ss8.base_addr = bo->offset + offset;
913
914     ss->ss2.width = width - 1;
915     ss->ss2.height = height - 1;
916
917     ss->ss3.pitch = pitch - 1;
918
919     /* Always set 1(align 4 mode) per B-spec */
920     ss->ss0.vertical_alignment = 1;
921     ss->ss0.horizontal_alignment = 1;
922
923     dri_bo_get_tiling(bo, &tiling, &swizzle);
924     gen8_render_set_surface_tiling(ss, tiling);
925 }
926
927 static void
928 i965_render_src_surface_state(
929     VADriverContextP ctx, 
930     int              index,
931     dri_bo          *region,
932     unsigned long    offset,
933     int              w,
934     int              h,
935     int              pitch,
936     int              format,
937     unsigned int     flags
938 )
939 {
940     struct i965_driver_data *i965 = i965_driver_data(ctx);  
941     struct i965_render_state *render_state = &i965->render_state;
942     void *ss;
943     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
944
945     assert(index < MAX_RENDER_SURFACES);
946
947     dri_bo_map(ss_bo, 1);
948     assert(ss_bo->virtual);
949     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
950
951     if (IS_GEN8(i965->intel.device_id)) {
952         gen8_render_set_surface_state(ss,
953                                       region, offset,
954                                       w, h,
955                                       pitch, format, flags);
956         gen8_render_set_surface_scs(ss);
957         dri_bo_emit_reloc(ss_bo,
958                           I915_GEM_DOMAIN_SAMPLER, 0,
959                           offset,
960                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
961                           region);
962     } else  if (IS_GEN7(i965->intel.device_id)) {
963         gen7_render_set_surface_state(ss,
964                                       region, offset,
965                                       w, h,
966                                       pitch, format, flags);
967         if (IS_HASWELL(i965->intel.device_id))
968             gen7_render_set_surface_scs(ss);
969         dri_bo_emit_reloc(ss_bo,
970                           I915_GEM_DOMAIN_SAMPLER, 0,
971                           offset,
972                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
973                           region);
974     } else {
975         i965_render_set_surface_state(ss,
976                                       region, offset,
977                                       w, h,
978                                       pitch, format, flags);
979         dri_bo_emit_reloc(ss_bo,
980                           I915_GEM_DOMAIN_SAMPLER, 0,
981                           offset,
982                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
983                           region);
984     }
985
986     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
987     dri_bo_unmap(ss_bo);
988     render_state->wm.sampler_count++;
989 }
990
991 static void
992 i965_render_src_surfaces_state(
993     VADriverContextP ctx,
994     struct object_surface *obj_surface,
995     unsigned int     flags
996 )
997 {
998     int region_pitch;
999     int rw, rh;
1000     dri_bo *region;
1001
1002     region_pitch = obj_surface->width;
1003     rw = obj_surface->orig_width;
1004     rh = obj_surface->orig_height;
1005     region = obj_surface->bo;
1006
1007     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
1008     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
1009
1010     if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) {
1011         i965_render_src_surface_state(ctx, 3, region,
1012                                       region_pitch * obj_surface->y_cb_offset,
1013                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1014                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
1015         i965_render_src_surface_state(ctx, 4, region,
1016                                       region_pitch * obj_surface->y_cb_offset,
1017                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1018                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
1019     } else {
1020         i965_render_src_surface_state(ctx, 3, region,
1021                                       region_pitch * obj_surface->y_cb_offset,
1022                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1023                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
1024         i965_render_src_surface_state(ctx, 4, region,
1025                                       region_pitch * obj_surface->y_cb_offset,
1026                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1027                                       I965_SURFACEFORMAT_R8_UNORM, flags);
1028         i965_render_src_surface_state(ctx, 5, region,
1029                                       region_pitch * obj_surface->y_cr_offset,
1030                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1031                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
1032         i965_render_src_surface_state(ctx, 6, region,
1033                                       region_pitch * obj_surface->y_cr_offset,
1034                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
1035                                       I965_SURFACEFORMAT_R8_UNORM, flags);
1036     }
1037 }
1038
1039 static void
1040 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
1041                                       struct object_surface *obj_surface)
1042 {
1043     dri_bo *subpic_region;
1044     unsigned int index = obj_surface->subpic_render_idx;
1045     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1046     struct object_image *obj_image = obj_subpic->obj_image;
1047
1048     assert(obj_surface);
1049     assert(obj_surface->bo);
1050     subpic_region = obj_image->bo;
1051     /*subpicture surface*/
1052     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
1053     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
1054 }
1055
1056 static void
1057 i965_render_dest_surface_state(VADriverContextP ctx, int index)
1058 {
1059     struct i965_driver_data *i965 = i965_driver_data(ctx);  
1060     struct i965_render_state *render_state = &i965->render_state;
1061     struct intel_region *dest_region = render_state->draw_region;
1062     void *ss;
1063     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
1064     int format;
1065     assert(index < MAX_RENDER_SURFACES);
1066
1067     if (dest_region->cpp == 2) {
1068         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
1069     } else {
1070         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
1071     }
1072
1073     dri_bo_map(ss_bo, 1);
1074     assert(ss_bo->virtual);
1075     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
1076
1077     if (IS_GEN8(i965->intel.device_id)) {
1078         gen8_render_set_surface_state(ss,
1079                                       dest_region->bo, 0,
1080                                       dest_region->width, dest_region->height,
1081                                       dest_region->pitch, format, 0);
1082         gen8_render_set_surface_scs(ss);
1083         dri_bo_emit_reloc(ss_bo,
1084                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1085                           0,
1086                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
1087                           dest_region->bo);
1088     } else if (IS_GEN7(i965->intel.device_id)) {
1089         gen7_render_set_surface_state(ss,
1090                                       dest_region->bo, 0,
1091                                       dest_region->width, dest_region->height,
1092                                       dest_region->pitch, format, 0);
1093         if (IS_HASWELL(i965->intel.device_id))
1094             gen7_render_set_surface_scs(ss);
1095         dri_bo_emit_reloc(ss_bo,
1096                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1097                           0,
1098                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1099                           dest_region->bo);
1100     } else {
1101         i965_render_set_surface_state(ss,
1102                                       dest_region->bo, 0,
1103                                       dest_region->width, dest_region->height,
1104                                       dest_region->pitch, format, 0);
1105         dri_bo_emit_reloc(ss_bo,
1106                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1107                           0,
1108                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1109                           dest_region->bo);
1110     }
1111
1112     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1113     dri_bo_unmap(ss_bo);
1114 }
1115
1116 static void
1117 i965_fill_vertex_buffer(
1118     VADriverContextP ctx,
1119     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
1120     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
1121 )
1122 {
1123     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1124     float vb[12];
1125
1126     enum { X1, Y1, X2, Y2 };
1127
1128     static const unsigned int g_rotation_indices[][6] = {
1129         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
1130         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
1131         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
1132         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
1133     };
1134
1135     const unsigned int * const rotation_indices =
1136         g_rotation_indices[i965->rotation_attrib->value];
1137
1138     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
1139     vb[1]  = tex_coords[rotation_indices[1]];
1140     vb[2]  = vid_coords[X2];
1141     vb[3]  = vid_coords[Y2];
1142
1143     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
1144     vb[5]  = tex_coords[rotation_indices[3]];
1145     vb[6]  = vid_coords[X1];
1146     vb[7]  = vid_coords[Y2];
1147
1148     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
1149     vb[9]  = tex_coords[rotation_indices[5]];
1150     vb[10] = vid_coords[X1];
1151     vb[11] = vid_coords[Y1];
1152
1153     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
1154 }
1155
1156 static void 
1157 i965_subpic_render_upload_vertex(VADriverContextP ctx,
1158                                  struct object_surface *obj_surface,
1159                                  const VARectangle *output_rect)
1160 {    
1161     unsigned int index = obj_surface->subpic_render_idx;
1162     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
1163     float tex_coords[4], vid_coords[4];
1164     VARectangle dst_rect;
1165
1166     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
1167         dst_rect = obj_subpic->dst_rect;
1168     else {
1169         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
1170         const float sy  = (float)output_rect->height / obj_surface->orig_height;
1171         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
1172         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
1173         dst_rect.width  = sx * obj_subpic->dst_rect.width;
1174         dst_rect.height = sy * obj_subpic->dst_rect.height;
1175     }
1176
1177     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
1178     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
1179     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
1180     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
1181
1182     vid_coords[0] = dst_rect.x;
1183     vid_coords[1] = dst_rect.y;
1184     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
1185     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
1186
1187     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1188 }
1189
1190 static void 
1191 i965_render_upload_vertex(
1192     VADriverContextP   ctx,
1193     struct object_surface *obj_surface,
1194     const VARectangle *src_rect,
1195     const VARectangle *dst_rect
1196 )
1197 {
1198     struct i965_driver_data *i965 = i965_driver_data(ctx);
1199     struct i965_render_state *render_state = &i965->render_state;
1200     struct intel_region *dest_region = render_state->draw_region;
1201     float tex_coords[4], vid_coords[4];
1202     int width, height;
1203
1204     width  = obj_surface->orig_width;
1205     height = obj_surface->orig_height;
1206
1207     tex_coords[0] = (float)src_rect->x / width;
1208     tex_coords[1] = (float)src_rect->y / height;
1209     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
1210     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
1211
1212     vid_coords[0] = dest_region->x + dst_rect->x;
1213     vid_coords[1] = dest_region->y + dst_rect->y;
1214     vid_coords[2] = vid_coords[0] + dst_rect->width;
1215     vid_coords[3] = vid_coords[1] + dst_rect->height;
1216
1217     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1218 }
1219
1220 #define PI  3.1415926
1221
1222 static void
1223 i965_render_upload_constants(VADriverContextP ctx,
1224                              struct object_surface *obj_surface,
1225                              unsigned int flags)
1226 {
1227     struct i965_driver_data *i965 = i965_driver_data(ctx);
1228     struct i965_render_state *render_state = &i965->render_state;
1229     unsigned short *constant_buffer;
1230     float *color_balance_base;
1231     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
1232     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
1233     float hue = (float)i965->hue_attrib->value / 180 * PI;
1234     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
1235     float *yuv_to_rgb;
1236     unsigned int color_flag;
1237
1238     dri_bo_map(render_state->curbe.bo, 1);
1239     assert(render_state->curbe.bo->virtual);
1240     constant_buffer = render_state->curbe.bo->virtual;
1241
1242     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
1243         assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0'));
1244
1245         constant_buffer[0] = 2;
1246     } else {
1247         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'))
1248             constant_buffer[0] = 1;
1249         else
1250             constant_buffer[0] = 0;
1251     }
1252
1253     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
1254         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
1255         i965->hue_attrib->value == DEFAULT_HUE &&
1256         i965->saturation_attrib->value == DEFAULT_SATURATION)
1257         constant_buffer[1] = 1; /* skip color balance transformation */
1258     else
1259         constant_buffer[1] = 0;
1260
1261     color_balance_base = (float *)constant_buffer + 4;
1262     *color_balance_base++ = contrast;
1263     *color_balance_base++ = brightness;
1264     *color_balance_base++ = cos(hue) * contrast * saturation;
1265     *color_balance_base++ = sin(hue) * contrast * saturation;
1266
1267     color_flag = flags & VA_SRC_COLOR_MASK;
1268     yuv_to_rgb = (float *)constant_buffer + 8;
1269     if (color_flag == VA_SRC_BT709)
1270         memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709));
1271     else if (color_flag == VA_SRC_SMPTE_240)
1272         memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240));
1273     else
1274         memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601));
1275
1276     dri_bo_unmap(render_state->curbe.bo);
1277 }
1278
1279 static void
1280 i965_subpic_render_upload_constants(VADriverContextP ctx,
1281                                     struct object_surface *obj_surface)
1282 {
1283     struct i965_driver_data *i965 = i965_driver_data(ctx);
1284     struct i965_render_state *render_state = &i965->render_state;
1285     float *constant_buffer;
1286     float global_alpha = 1.0;
1287     unsigned int index = obj_surface->subpic_render_idx;
1288     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1289     
1290     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1291         global_alpha = obj_subpic->global_alpha;
1292     }
1293
1294     dri_bo_map(render_state->curbe.bo, 1);
1295
1296     assert(render_state->curbe.bo->virtual);
1297     constant_buffer = render_state->curbe.bo->virtual;
1298     *constant_buffer = global_alpha;
1299
1300     dri_bo_unmap(render_state->curbe.bo);
1301 }
1302  
1303 static void
1304 i965_surface_render_state_setup(
1305     VADriverContextP   ctx,
1306     struct object_surface *obj_surface,
1307     const VARectangle *src_rect,
1308     const VARectangle *dst_rect,
1309     unsigned int       flags
1310 )
1311 {
1312     i965_render_vs_unit(ctx);
1313     i965_render_sf_unit(ctx);
1314     i965_render_dest_surface_state(ctx, 0);
1315     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1316     i965_render_sampler(ctx);
1317     i965_render_wm_unit(ctx);
1318     i965_render_cc_viewport(ctx);
1319     i965_render_cc_unit(ctx);
1320     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1321     i965_render_upload_constants(ctx, obj_surface, flags);
1322 }
1323
1324 static void
1325 i965_subpic_render_state_setup(
1326     VADriverContextP   ctx,
1327     struct object_surface *obj_surface,
1328     const VARectangle *src_rect,
1329     const VARectangle *dst_rect
1330 )
1331 {
1332     i965_render_vs_unit(ctx);
1333     i965_render_sf_unit(ctx);
1334     i965_render_dest_surface_state(ctx, 0);
1335     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
1336     i965_render_sampler(ctx);
1337     i965_subpic_render_wm_unit(ctx);
1338     i965_render_cc_viewport(ctx);
1339     i965_subpic_render_cc_unit(ctx);
1340     i965_subpic_render_upload_constants(ctx, obj_surface);
1341     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1342 }
1343
1344
1345 static void
1346 i965_render_pipeline_select(VADriverContextP ctx)
1347 {
1348     struct i965_driver_data *i965 = i965_driver_data(ctx);
1349     struct intel_batchbuffer *batch = i965->batch;
1350  
1351     BEGIN_BATCH(batch, 1);
1352     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1353     ADVANCE_BATCH(batch);
1354 }
1355
1356 static void
1357 i965_render_state_sip(VADriverContextP ctx)
1358 {
1359     struct i965_driver_data *i965 = i965_driver_data(ctx);
1360     struct intel_batchbuffer *batch = i965->batch;
1361
1362     BEGIN_BATCH(batch, 2);
1363     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1364     OUT_BATCH(batch, 0);
1365     ADVANCE_BATCH(batch);
1366 }
1367
1368 static void
1369 i965_render_state_base_address(VADriverContextP ctx)
1370 {
1371     struct i965_driver_data *i965 = i965_driver_data(ctx);
1372     struct intel_batchbuffer *batch = i965->batch;
1373     struct i965_render_state *render_state = &i965->render_state;
1374
1375     if (IS_IRONLAKE(i965->intel.device_id)) {
1376         BEGIN_BATCH(batch, 8);
1377         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1378         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1379         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1380         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1381         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1382         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1383         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1384         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1385         ADVANCE_BATCH(batch);
1386     } else {
1387         BEGIN_BATCH(batch, 6);
1388         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1389         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1390         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1391         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1392         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1393         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1394         ADVANCE_BATCH(batch);
1395     }
1396 }
1397
1398 static void
1399 i965_render_binding_table_pointers(VADriverContextP ctx)
1400 {
1401     struct i965_driver_data *i965 = i965_driver_data(ctx);
1402     struct intel_batchbuffer *batch = i965->batch;
1403
1404     BEGIN_BATCH(batch, 6);
1405     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1406     OUT_BATCH(batch, 0); /* vs */
1407     OUT_BATCH(batch, 0); /* gs */
1408     OUT_BATCH(batch, 0); /* clip */
1409     OUT_BATCH(batch, 0); /* sf */
1410     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1411     ADVANCE_BATCH(batch);
1412 }
1413
1414 static void 
1415 i965_render_constant_color(VADriverContextP ctx)
1416 {
1417     struct i965_driver_data *i965 = i965_driver_data(ctx);
1418     struct intel_batchbuffer *batch = i965->batch;
1419
1420     BEGIN_BATCH(batch, 5);
1421     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1422     OUT_BATCH(batch, float_to_uint(1.0));
1423     OUT_BATCH(batch, float_to_uint(0.0));
1424     OUT_BATCH(batch, float_to_uint(1.0));
1425     OUT_BATCH(batch, float_to_uint(1.0));
1426     ADVANCE_BATCH(batch);
1427 }
1428
1429 static void
1430 i965_render_pipelined_pointers(VADriverContextP ctx)
1431 {
1432     struct i965_driver_data *i965 = i965_driver_data(ctx);
1433     struct intel_batchbuffer *batch = i965->batch;
1434     struct i965_render_state *render_state = &i965->render_state;
1435
1436     BEGIN_BATCH(batch, 7);
1437     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1438     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1439     OUT_BATCH(batch, 0);  /* disable GS */
1440     OUT_BATCH(batch, 0);  /* disable CLIP */
1441     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1442     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1443     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1444     ADVANCE_BATCH(batch);
1445 }
1446
1447 static void
1448 i965_render_urb_layout(VADriverContextP ctx)
1449 {
1450     struct i965_driver_data *i965 = i965_driver_data(ctx);
1451     struct intel_batchbuffer *batch = i965->batch;
1452     int urb_vs_start, urb_vs_size;
1453     int urb_gs_start, urb_gs_size;
1454     int urb_clip_start, urb_clip_size;
1455     int urb_sf_start, urb_sf_size;
1456     int urb_cs_start, urb_cs_size;
1457
1458     urb_vs_start = 0;
1459     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1460     urb_gs_start = urb_vs_start + urb_vs_size;
1461     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1462     urb_clip_start = urb_gs_start + urb_gs_size;
1463     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1464     urb_sf_start = urb_clip_start + urb_clip_size;
1465     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1466     urb_cs_start = urb_sf_start + urb_sf_size;
1467     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1468
1469     BEGIN_BATCH(batch, 3);
1470     OUT_BATCH(batch, 
1471               CMD_URB_FENCE |
1472               UF0_CS_REALLOC |
1473               UF0_SF_REALLOC |
1474               UF0_CLIP_REALLOC |
1475               UF0_GS_REALLOC |
1476               UF0_VS_REALLOC |
1477               1);
1478     OUT_BATCH(batch, 
1479               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1480               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1481               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1482     OUT_BATCH(batch,
1483               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1484               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1485     ADVANCE_BATCH(batch);
1486 }
1487
1488 static void 
1489 i965_render_cs_urb_layout(VADriverContextP ctx)
1490 {
1491     struct i965_driver_data *i965 = i965_driver_data(ctx);
1492     struct intel_batchbuffer *batch = i965->batch;
1493
1494     BEGIN_BATCH(batch, 2);
1495     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1496     OUT_BATCH(batch,
1497               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1498               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1499     ADVANCE_BATCH(batch);
1500 }
1501
1502 static void
1503 i965_render_constant_buffer(VADriverContextP ctx)
1504 {
1505     struct i965_driver_data *i965 = i965_driver_data(ctx);
1506     struct intel_batchbuffer *batch = i965->batch;
1507     struct i965_render_state *render_state = &i965->render_state;
1508
1509     BEGIN_BATCH(batch, 2);
1510     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1511     OUT_RELOC(batch, render_state->curbe.bo,
1512               I915_GEM_DOMAIN_INSTRUCTION, 0,
1513               URB_CS_ENTRY_SIZE - 1);
1514     ADVANCE_BATCH(batch);    
1515 }
1516
1517 static void
1518 i965_render_drawing_rectangle(VADriverContextP ctx)
1519 {
1520     struct i965_driver_data *i965 = i965_driver_data(ctx);
1521     struct intel_batchbuffer *batch = i965->batch;
1522     struct i965_render_state *render_state = &i965->render_state;
1523     struct intel_region *dest_region = render_state->draw_region;
1524
1525     BEGIN_BATCH(batch, 4);
1526     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1527     OUT_BATCH(batch, 0x00000000);
1528     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1529     OUT_BATCH(batch, 0x00000000);         
1530     ADVANCE_BATCH(batch);
1531 }
1532
1533 static void
1534 i965_render_vertex_elements(VADriverContextP ctx)
1535 {
1536     struct i965_driver_data *i965 = i965_driver_data(ctx);
1537     struct intel_batchbuffer *batch = i965->batch;
1538
1539     if (IS_IRONLAKE(i965->intel.device_id)) {
1540         BEGIN_BATCH(batch, 5);
1541         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1542         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1543         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1544                   VE0_VALID |
1545                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1546                   (0 << VE0_OFFSET_SHIFT));
1547         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1548                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1549                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1550                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1551         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1552         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1553                   VE0_VALID |
1554                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1555                   (8 << VE0_OFFSET_SHIFT));
1556         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1557                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1558                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1559                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1560         ADVANCE_BATCH(batch);
1561     } else {
1562         BEGIN_BATCH(batch, 5);
1563         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1564         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1565         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1566                   VE0_VALID |
1567                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1568                   (0 << VE0_OFFSET_SHIFT));
1569         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1570                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1571                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1572                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1573                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1574         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1575         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1576                   VE0_VALID |
1577                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1578                   (8 << VE0_OFFSET_SHIFT));
1579         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1580                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1581                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1582                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1583                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1584         ADVANCE_BATCH(batch);
1585     }
1586 }
1587
1588 static void
1589 i965_render_upload_image_palette(
1590     VADriverContextP ctx,
1591     struct object_image *obj_image,
1592     unsigned int     alpha
1593 )
1594 {
1595     struct i965_driver_data *i965 = i965_driver_data(ctx);
1596     struct intel_batchbuffer *batch = i965->batch;
1597     unsigned int i;
1598
1599     assert(obj_image);
1600
1601     if (!obj_image)
1602         return;
1603
1604     if (obj_image->image.num_palette_entries == 0)
1605         return;
1606
1607     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1608     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1609     /*fill palette*/
1610     //int32_t out[16]; //0-23:color 23-31:alpha
1611     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1612         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1613     ADVANCE_BATCH(batch);
1614 }
1615
1616 static void
1617 i965_render_startup(VADriverContextP ctx)
1618 {
1619     struct i965_driver_data *i965 = i965_driver_data(ctx);
1620     struct intel_batchbuffer *batch = i965->batch;
1621     struct i965_render_state *render_state = &i965->render_state;
1622
1623     BEGIN_BATCH(batch, 11);
1624     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1625     OUT_BATCH(batch, 
1626               (0 << VB0_BUFFER_INDEX_SHIFT) |
1627               VB0_VERTEXDATA |
1628               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1629     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1630
1631     if (IS_IRONLAKE(i965->intel.device_id))
1632         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1633     else
1634         OUT_BATCH(batch, 3);
1635
1636     OUT_BATCH(batch, 0);
1637
1638     OUT_BATCH(batch, 
1639               CMD_3DPRIMITIVE |
1640               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1641               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1642               (0 << 9) |
1643               4);
1644     OUT_BATCH(batch, 3); /* vertex count per instance */
1645     OUT_BATCH(batch, 0); /* start vertex offset */
1646     OUT_BATCH(batch, 1); /* single instance */
1647     OUT_BATCH(batch, 0); /* start instance location */
1648     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1649     ADVANCE_BATCH(batch);
1650 }
1651
1652 static void 
1653 i965_clear_dest_region(VADriverContextP ctx)
1654 {
1655     struct i965_driver_data *i965 = i965_driver_data(ctx);
1656     struct intel_batchbuffer *batch = i965->batch;
1657     struct i965_render_state *render_state = &i965->render_state;
1658     struct intel_region *dest_region = render_state->draw_region;
1659     unsigned int blt_cmd, br13;
1660     int pitch;
1661
1662     blt_cmd = XY_COLOR_BLT_CMD;
1663     br13 = 0xf0 << 16;
1664     pitch = dest_region->pitch;
1665
1666     if (dest_region->cpp == 4) {
1667         br13 |= BR13_8888;
1668         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1669     } else {
1670         assert(dest_region->cpp == 2);
1671         br13 |= BR13_565;
1672     }
1673
1674     if (dest_region->tiling != I915_TILING_NONE) {
1675         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1676         pitch /= 4;
1677     }
1678
1679     br13 |= pitch;
1680
1681     if (IS_GEN6(i965->intel.device_id) ||
1682         IS_GEN7(i965->intel.device_id) ||
1683         IS_GEN8(i965->intel.device_id)) {
1684         intel_batchbuffer_start_atomic_blt(batch, 24);
1685         BEGIN_BLT_BATCH(batch, 6);
1686     } else {
1687         intel_batchbuffer_start_atomic(batch, 24);
1688         BEGIN_BATCH(batch, 6);
1689     }
1690
1691     OUT_BATCH(batch, blt_cmd);
1692     OUT_BATCH(batch, br13);
1693     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1694     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1695               (dest_region->x + dest_region->width));
1696     OUT_RELOC(batch, dest_region->bo, 
1697               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1698               0);
1699     OUT_BATCH(batch, 0x0);
1700     ADVANCE_BATCH(batch);
1701     intel_batchbuffer_end_atomic(batch);
1702 }
1703
1704 static void 
1705 gen8_clear_dest_region(VADriverContextP ctx)
1706 {
1707     struct i965_driver_data *i965 = i965_driver_data(ctx);
1708     struct intel_batchbuffer *batch = i965->batch;
1709     struct i965_render_state *render_state = &i965->render_state;
1710     struct intel_region *dest_region = render_state->draw_region;
1711     unsigned int blt_cmd, br13;
1712     int pitch;
1713
1714     blt_cmd = GEN8_XY_COLOR_BLT_CMD;
1715     br13 = 0xf0 << 16;
1716     pitch = dest_region->pitch;
1717
1718     if (dest_region->cpp == 4) {
1719         br13 |= BR13_8888;
1720         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1721     } else {
1722         assert(dest_region->cpp == 2);
1723         br13 |= BR13_565;
1724     }
1725
1726     if (dest_region->tiling != I915_TILING_NONE) {
1727         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1728         pitch /= 4;
1729     }
1730
1731     br13 |= pitch;
1732
1733     intel_batchbuffer_start_atomic_blt(batch, 24);
1734     BEGIN_BLT_BATCH(batch, 7);
1735
1736     OUT_BATCH(batch, blt_cmd);
1737     OUT_BATCH(batch, br13);
1738     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1739     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1740               (dest_region->x + dest_region->width));
1741     OUT_RELOC(batch, dest_region->bo, 
1742               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1743               0);
1744     OUT_BATCH(batch, 0x0);
1745     OUT_BATCH(batch, 0x0);
1746     ADVANCE_BATCH(batch);
1747     intel_batchbuffer_end_atomic(batch);
1748 }
1749
1750 static void
1751 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1752 {
1753     struct i965_driver_data *i965 = i965_driver_data(ctx);
1754     struct intel_batchbuffer *batch = i965->batch;
1755
1756     i965_clear_dest_region(ctx);
1757     intel_batchbuffer_start_atomic(batch, 0x1000);
1758     intel_batchbuffer_emit_mi_flush(batch);
1759     i965_render_pipeline_select(ctx);
1760     i965_render_state_sip(ctx);
1761     i965_render_state_base_address(ctx);
1762     i965_render_binding_table_pointers(ctx);
1763     i965_render_constant_color(ctx);
1764     i965_render_pipelined_pointers(ctx);
1765     i965_render_urb_layout(ctx);
1766     i965_render_cs_urb_layout(ctx);
1767     i965_render_constant_buffer(ctx);
1768     i965_render_drawing_rectangle(ctx);
1769     i965_render_vertex_elements(ctx);
1770     i965_render_startup(ctx);
1771     intel_batchbuffer_end_atomic(batch);
1772 }
1773
1774 static void
1775 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1776 {
1777     struct i965_driver_data *i965 = i965_driver_data(ctx);
1778     struct intel_batchbuffer *batch = i965->batch;
1779
1780     intel_batchbuffer_start_atomic(batch, 0x1000);
1781     intel_batchbuffer_emit_mi_flush(batch);
1782     i965_render_pipeline_select(ctx);
1783     i965_render_state_sip(ctx);
1784     i965_render_state_base_address(ctx);
1785     i965_render_binding_table_pointers(ctx);
1786     i965_render_constant_color(ctx);
1787     i965_render_pipelined_pointers(ctx);
1788     i965_render_urb_layout(ctx);
1789     i965_render_cs_urb_layout(ctx);
1790     i965_render_constant_buffer(ctx);
1791     i965_render_drawing_rectangle(ctx);
1792     i965_render_vertex_elements(ctx);
1793     i965_render_startup(ctx);
1794     intel_batchbuffer_end_atomic(batch);
1795 }
1796
1797
1798 static void 
1799 i965_render_initialize(VADriverContextP ctx)
1800 {
1801     struct i965_driver_data *i965 = i965_driver_data(ctx);
1802     struct i965_render_state *render_state = &i965->render_state;
1803     dri_bo *bo;
1804
1805     /* VERTEX BUFFER */
1806     dri_bo_unreference(render_state->vb.vertex_buffer);
1807     bo = dri_bo_alloc(i965->intel.bufmgr,
1808                       "vertex buffer",
1809                       4096,
1810                       4096);
1811     assert(bo);
1812     render_state->vb.vertex_buffer = bo;
1813
1814     /* VS */
1815     dri_bo_unreference(render_state->vs.state);
1816     bo = dri_bo_alloc(i965->intel.bufmgr,
1817                       "vs state",
1818                       sizeof(struct i965_vs_unit_state),
1819                       64);
1820     assert(bo);
1821     render_state->vs.state = bo;
1822
1823     /* GS */
1824     /* CLIP */
1825     /* SF */
1826     dri_bo_unreference(render_state->sf.state);
1827     bo = dri_bo_alloc(i965->intel.bufmgr,
1828                       "sf state",
1829                       sizeof(struct i965_sf_unit_state),
1830                       64);
1831     assert(bo);
1832     render_state->sf.state = bo;
1833
1834     /* WM */
1835     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1836     bo = dri_bo_alloc(i965->intel.bufmgr,
1837                       "surface state & binding table",
1838                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1839                       4096);
1840     assert(bo);
1841     render_state->wm.surface_state_binding_table_bo = bo;
1842
1843     dri_bo_unreference(render_state->wm.sampler);
1844     bo = dri_bo_alloc(i965->intel.bufmgr,
1845                       "sampler state",
1846                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1847                       64);
1848     assert(bo);
1849     render_state->wm.sampler = bo;
1850     render_state->wm.sampler_count = 0;
1851
1852     dri_bo_unreference(render_state->wm.state);
1853     bo = dri_bo_alloc(i965->intel.bufmgr,
1854                       "wm state",
1855                       sizeof(struct i965_wm_unit_state),
1856                       64);
1857     assert(bo);
1858     render_state->wm.state = bo;
1859
1860     /* COLOR CALCULATOR */
1861     dri_bo_unreference(render_state->cc.state);
1862     bo = dri_bo_alloc(i965->intel.bufmgr,
1863                       "color calc state",
1864                       sizeof(struct i965_cc_unit_state),
1865                       64);
1866     assert(bo);
1867     render_state->cc.state = bo;
1868
1869     dri_bo_unreference(render_state->cc.viewport);
1870     bo = dri_bo_alloc(i965->intel.bufmgr,
1871                       "cc viewport",
1872                       sizeof(struct i965_cc_viewport),
1873                       64);
1874     assert(bo);
1875     render_state->cc.viewport = bo;
1876 }
1877
1878 static void
1879 i965_render_put_surface(
1880     VADriverContextP   ctx,
1881     struct object_surface *obj_surface,
1882     const VARectangle *src_rect,
1883     const VARectangle *dst_rect,
1884     unsigned int       flags
1885 )
1886 {
1887     struct i965_driver_data *i965 = i965_driver_data(ctx);
1888     struct intel_batchbuffer *batch = i965->batch;
1889
1890     i965_render_initialize(ctx);
1891     i965_surface_render_state_setup(ctx, obj_surface, src_rect, dst_rect, flags);
1892     i965_surface_render_pipeline_setup(ctx);
1893     intel_batchbuffer_flush(batch);
1894 }
1895
1896 static void
1897 i965_render_put_subpicture(
1898     VADriverContextP   ctx,
1899     struct object_surface *obj_surface,
1900     const VARectangle *src_rect,
1901     const VARectangle *dst_rect
1902 )
1903 {
1904     struct i965_driver_data *i965 = i965_driver_data(ctx);
1905     struct intel_batchbuffer *batch = i965->batch;
1906     unsigned int index = obj_surface->subpic_render_idx;
1907     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1908
1909     assert(obj_subpic);
1910
1911     i965_render_initialize(ctx);
1912     i965_subpic_render_state_setup(ctx, obj_surface, src_rect, dst_rect);
1913     i965_subpic_render_pipeline_setup(ctx);
1914     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1915     intel_batchbuffer_flush(batch);
1916 }
1917
1918 /*
1919  * for GEN6+
1920  */
1921 static void 
1922 gen6_render_initialize(VADriverContextP ctx)
1923 {
1924     struct i965_driver_data *i965 = i965_driver_data(ctx);
1925     struct i965_render_state *render_state = &i965->render_state;
1926     dri_bo *bo;
1927
1928     /* VERTEX BUFFER */
1929     dri_bo_unreference(render_state->vb.vertex_buffer);
1930     bo = dri_bo_alloc(i965->intel.bufmgr,
1931                       "vertex buffer",
1932                       4096,
1933                       4096);
1934     assert(bo);
1935     render_state->vb.vertex_buffer = bo;
1936
1937     /* WM */
1938     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1939     bo = dri_bo_alloc(i965->intel.bufmgr,
1940                       "surface state & binding table",
1941                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1942                       4096);
1943     assert(bo);
1944     render_state->wm.surface_state_binding_table_bo = bo;
1945
1946     dri_bo_unreference(render_state->wm.sampler);
1947     bo = dri_bo_alloc(i965->intel.bufmgr,
1948                       "sampler state",
1949                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1950                       4096);
1951     assert(bo);
1952     render_state->wm.sampler = bo;
1953     render_state->wm.sampler_count = 0;
1954
1955     /* COLOR CALCULATOR */
1956     dri_bo_unreference(render_state->cc.state);
1957     bo = dri_bo_alloc(i965->intel.bufmgr,
1958                       "color calc state",
1959                       sizeof(struct gen6_color_calc_state),
1960                       4096);
1961     assert(bo);
1962     render_state->cc.state = bo;
1963
1964     /* CC VIEWPORT */
1965     dri_bo_unreference(render_state->cc.viewport);
1966     bo = dri_bo_alloc(i965->intel.bufmgr,
1967                       "cc viewport",
1968                       sizeof(struct i965_cc_viewport),
1969                       4096);
1970     assert(bo);
1971     render_state->cc.viewport = bo;
1972
1973     /* BLEND STATE */
1974     dri_bo_unreference(render_state->cc.blend);
1975     bo = dri_bo_alloc(i965->intel.bufmgr,
1976                       "blend state",
1977                       sizeof(struct gen6_blend_state),
1978                       4096);
1979     assert(bo);
1980     render_state->cc.blend = bo;
1981
1982     /* DEPTH & STENCIL STATE */
1983     dri_bo_unreference(render_state->cc.depth_stencil);
1984     bo = dri_bo_alloc(i965->intel.bufmgr,
1985                       "depth & stencil state",
1986                       sizeof(struct gen6_depth_stencil_state),
1987                       4096);
1988     assert(bo);
1989     render_state->cc.depth_stencil = bo;
1990 }
1991
1992 static void
1993 gen6_render_color_calc_state(VADriverContextP ctx)
1994 {
1995     struct i965_driver_data *i965 = i965_driver_data(ctx);
1996     struct i965_render_state *render_state = &i965->render_state;
1997     struct gen6_color_calc_state *color_calc_state;
1998     
1999     dri_bo_map(render_state->cc.state, 1);
2000     assert(render_state->cc.state->virtual);
2001     color_calc_state = render_state->cc.state->virtual;
2002     memset(color_calc_state, 0, sizeof(*color_calc_state));
2003     color_calc_state->constant_r = 1.0;
2004     color_calc_state->constant_g = 0.0;
2005     color_calc_state->constant_b = 1.0;
2006     color_calc_state->constant_a = 1.0;
2007     dri_bo_unmap(render_state->cc.state);
2008 }
2009
2010 static void
2011 gen6_render_blend_state(VADriverContextP ctx)
2012 {
2013     struct i965_driver_data *i965 = i965_driver_data(ctx);
2014     struct i965_render_state *render_state = &i965->render_state;
2015     struct gen6_blend_state *blend_state;
2016     
2017     dri_bo_map(render_state->cc.blend, 1);
2018     assert(render_state->cc.blend->virtual);
2019     blend_state = render_state->cc.blend->virtual;
2020     memset(blend_state, 0, sizeof(*blend_state));
2021     blend_state->blend1.logic_op_enable = 1;
2022     blend_state->blend1.logic_op_func = 0xc;
2023     dri_bo_unmap(render_state->cc.blend);
2024 }
2025
2026 static void
2027 gen6_render_depth_stencil_state(VADriverContextP ctx)
2028 {
2029     struct i965_driver_data *i965 = i965_driver_data(ctx);
2030     struct i965_render_state *render_state = &i965->render_state;
2031     struct gen6_depth_stencil_state *depth_stencil_state;
2032     
2033     dri_bo_map(render_state->cc.depth_stencil, 1);
2034     assert(render_state->cc.depth_stencil->virtual);
2035     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2036     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2037     dri_bo_unmap(render_state->cc.depth_stencil);
2038 }
2039
2040 static void
2041 gen6_render_setup_states(
2042     VADriverContextP   ctx,
2043     struct object_surface *obj_surface,
2044     const VARectangle *src_rect,
2045     const VARectangle *dst_rect,
2046     unsigned int       flags
2047 )
2048 {
2049     i965_render_dest_surface_state(ctx, 0);
2050     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2051     i965_render_sampler(ctx);
2052     i965_render_cc_viewport(ctx);
2053     gen6_render_color_calc_state(ctx);
2054     gen6_render_blend_state(ctx);
2055     gen6_render_depth_stencil_state(ctx);
2056     i965_render_upload_constants(ctx, obj_surface, flags);
2057     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2058 }
2059
2060 static void
2061 gen6_emit_invarient_states(VADriverContextP ctx)
2062 {
2063     struct i965_driver_data *i965 = i965_driver_data(ctx);
2064     struct intel_batchbuffer *batch = i965->batch;
2065
2066     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2067
2068     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
2069     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2070               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2071     OUT_BATCH(batch, 0);
2072
2073     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2074     OUT_BATCH(batch, 1);
2075
2076     /* Set system instruction pointer */
2077     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2078     OUT_BATCH(batch, 0);
2079 }
2080
2081 static void
2082 gen6_emit_state_base_address(VADriverContextP ctx)
2083 {
2084     struct i965_driver_data *i965 = i965_driver_data(ctx);
2085     struct intel_batchbuffer *batch = i965->batch;
2086     struct i965_render_state *render_state = &i965->render_state;
2087
2088     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2089     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2090     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2091     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2092     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2093     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2094     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2095     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2096     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2097     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2098 }
2099
2100 static void
2101 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
2102 {
2103     struct i965_driver_data *i965 = i965_driver_data(ctx);
2104     struct intel_batchbuffer *batch = i965->batch;
2105     struct i965_render_state *render_state = &i965->render_state;
2106
2107     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
2108               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
2109               (4 - 2));
2110     OUT_BATCH(batch, 0);
2111     OUT_BATCH(batch, 0);
2112     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
2113 }
2114
2115 static void
2116 gen6_emit_urb(VADriverContextP ctx)
2117 {
2118     struct i965_driver_data *i965 = i965_driver_data(ctx);
2119     struct intel_batchbuffer *batch = i965->batch;
2120
2121     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
2122     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
2123               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
2124     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
2125               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
2126 }
2127
2128 static void
2129 gen6_emit_cc_state_pointers(VADriverContextP ctx)
2130 {
2131     struct i965_driver_data *i965 = i965_driver_data(ctx);
2132     struct intel_batchbuffer *batch = i965->batch;
2133     struct i965_render_state *render_state = &i965->render_state;
2134
2135     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
2136     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
2137     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
2138     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
2139 }
2140
2141 static void
2142 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
2143 {
2144     struct i965_driver_data *i965 = i965_driver_data(ctx);
2145     struct intel_batchbuffer *batch = i965->batch;
2146     struct i965_render_state *render_state = &i965->render_state;
2147
2148     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
2149               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
2150               (4 - 2));
2151     OUT_BATCH(batch, 0); /* VS */
2152     OUT_BATCH(batch, 0); /* GS */
2153     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
2154 }
2155
2156 static void
2157 gen6_emit_binding_table(VADriverContextP ctx)
2158 {
2159     struct i965_driver_data *i965 = i965_driver_data(ctx);
2160     struct intel_batchbuffer *batch = i965->batch;
2161
2162     /* Binding table pointers */
2163     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
2164               GEN6_BINDING_TABLE_MODIFY_PS |
2165               (4 - 2));
2166     OUT_BATCH(batch, 0);                /* vs */
2167     OUT_BATCH(batch, 0);                /* gs */
2168     /* Only the PS uses the binding table */
2169     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2170 }
2171
2172 static void
2173 gen6_emit_depth_buffer_state(VADriverContextP ctx)
2174 {
2175     struct i965_driver_data *i965 = i965_driver_data(ctx);
2176     struct intel_batchbuffer *batch = i965->batch;
2177
2178     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
2179     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
2180               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
2181     OUT_BATCH(batch, 0);
2182     OUT_BATCH(batch, 0);
2183     OUT_BATCH(batch, 0);
2184     OUT_BATCH(batch, 0);
2185     OUT_BATCH(batch, 0);
2186
2187     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
2188     OUT_BATCH(batch, 0);
2189 }
2190
2191 static void
2192 gen6_emit_drawing_rectangle(VADriverContextP ctx)
2193 {
2194     i965_render_drawing_rectangle(ctx);
2195 }
2196
2197 static void 
2198 gen6_emit_vs_state(VADriverContextP ctx)
2199 {
2200     struct i965_driver_data *i965 = i965_driver_data(ctx);
2201     struct intel_batchbuffer *batch = i965->batch;
2202
2203     /* disable VS constant buffer */
2204     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
2205     OUT_BATCH(batch, 0);
2206     OUT_BATCH(batch, 0);
2207     OUT_BATCH(batch, 0);
2208     OUT_BATCH(batch, 0);
2209         
2210     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2211     OUT_BATCH(batch, 0); /* without VS kernel */
2212     OUT_BATCH(batch, 0);
2213     OUT_BATCH(batch, 0);
2214     OUT_BATCH(batch, 0);
2215     OUT_BATCH(batch, 0); /* pass-through */
2216 }
2217
2218 static void 
2219 gen6_emit_gs_state(VADriverContextP ctx)
2220 {
2221     struct i965_driver_data *i965 = i965_driver_data(ctx);
2222     struct intel_batchbuffer *batch = i965->batch;
2223
2224     /* disable GS constant buffer */
2225     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
2226     OUT_BATCH(batch, 0);
2227     OUT_BATCH(batch, 0);
2228     OUT_BATCH(batch, 0);
2229     OUT_BATCH(batch, 0);
2230         
2231     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2232     OUT_BATCH(batch, 0); /* without GS kernel */
2233     OUT_BATCH(batch, 0);
2234     OUT_BATCH(batch, 0);
2235     OUT_BATCH(batch, 0);
2236     OUT_BATCH(batch, 0);
2237     OUT_BATCH(batch, 0); /* pass-through */
2238 }
2239
2240 static void 
2241 gen6_emit_clip_state(VADriverContextP ctx)
2242 {
2243     struct i965_driver_data *i965 = i965_driver_data(ctx);
2244     struct intel_batchbuffer *batch = i965->batch;
2245
2246     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2247     OUT_BATCH(batch, 0);
2248     OUT_BATCH(batch, 0); /* pass-through */
2249     OUT_BATCH(batch, 0);
2250 }
2251
2252 static void 
2253 gen6_emit_sf_state(VADriverContextP ctx)
2254 {
2255     struct i965_driver_data *i965 = i965_driver_data(ctx);
2256     struct intel_batchbuffer *batch = i965->batch;
2257
2258     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
2259     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
2260               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
2261               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
2262     OUT_BATCH(batch, 0);
2263     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2264     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
2265     OUT_BATCH(batch, 0);
2266     OUT_BATCH(batch, 0);
2267     OUT_BATCH(batch, 0);
2268     OUT_BATCH(batch, 0);
2269     OUT_BATCH(batch, 0); /* DW9 */
2270     OUT_BATCH(batch, 0);
2271     OUT_BATCH(batch, 0);
2272     OUT_BATCH(batch, 0);
2273     OUT_BATCH(batch, 0);
2274     OUT_BATCH(batch, 0); /* DW14 */
2275     OUT_BATCH(batch, 0);
2276     OUT_BATCH(batch, 0);
2277     OUT_BATCH(batch, 0);
2278     OUT_BATCH(batch, 0);
2279     OUT_BATCH(batch, 0); /* DW19 */
2280 }
2281
2282 static void 
2283 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
2284 {
2285     struct i965_driver_data *i965 = i965_driver_data(ctx);
2286     struct intel_batchbuffer *batch = i965->batch;
2287     struct i965_render_state *render_state = &i965->render_state;
2288
2289     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
2290               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
2291               (5 - 2));
2292     OUT_RELOC(batch, 
2293               render_state->curbe.bo,
2294               I915_GEM_DOMAIN_INSTRUCTION, 0,
2295               (URB_CS_ENTRY_SIZE-1));
2296     OUT_BATCH(batch, 0);
2297     OUT_BATCH(batch, 0);
2298     OUT_BATCH(batch, 0);
2299
2300     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
2301     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
2302               I915_GEM_DOMAIN_INSTRUCTION, 0,
2303               0);
2304     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
2305               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2306     OUT_BATCH(batch, 0);
2307     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2308     OUT_BATCH(batch, ((render_state->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2309               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2310               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2311     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2312               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2313     OUT_BATCH(batch, 0);
2314     OUT_BATCH(batch, 0);
2315 }
2316
2317 static void
2318 gen6_emit_vertex_element_state(VADriverContextP ctx)
2319 {
2320     struct i965_driver_data *i965 = i965_driver_data(ctx);
2321     struct intel_batchbuffer *batch = i965->batch;
2322
2323     /* Set up our vertex elements, sourced from the single vertex buffer. */
2324     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2325     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2326     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2327               GEN6_VE0_VALID |
2328               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2329               (0 << VE0_OFFSET_SHIFT));
2330     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2331               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2332               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2333               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2334     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2335     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2336               GEN6_VE0_VALID |
2337               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2338               (8 << VE0_OFFSET_SHIFT));
2339     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2340               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2341               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2342               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2343 }
2344
2345 static void
2346 gen6_emit_vertices(VADriverContextP ctx)
2347 {
2348     struct i965_driver_data *i965 = i965_driver_data(ctx);
2349     struct intel_batchbuffer *batch = i965->batch;
2350     struct i965_render_state *render_state = &i965->render_state;
2351
2352     BEGIN_BATCH(batch, 11);
2353     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2354     OUT_BATCH(batch, 
2355               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2356               GEN6_VB0_VERTEXDATA |
2357               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2358     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2359     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2360     OUT_BATCH(batch, 0);
2361
2362     OUT_BATCH(batch, 
2363               CMD_3DPRIMITIVE |
2364               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2365               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2366               (0 << 9) |
2367               4);
2368     OUT_BATCH(batch, 3); /* vertex count per instance */
2369     OUT_BATCH(batch, 0); /* start vertex offset */
2370     OUT_BATCH(batch, 1); /* single instance */
2371     OUT_BATCH(batch, 0); /* start instance location */
2372     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2373     ADVANCE_BATCH(batch);
2374 }
2375
2376 static void
2377 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2378 {
2379     struct i965_driver_data *i965 = i965_driver_data(ctx);
2380     struct intel_batchbuffer *batch = i965->batch;
2381
2382     intel_batchbuffer_start_atomic(batch, 0x1000);
2383     intel_batchbuffer_emit_mi_flush(batch);
2384     gen6_emit_invarient_states(ctx);
2385     gen6_emit_state_base_address(ctx);
2386     gen6_emit_viewport_state_pointers(ctx);
2387     gen6_emit_urb(ctx);
2388     gen6_emit_cc_state_pointers(ctx);
2389     gen6_emit_sampler_state_pointers(ctx);
2390     gen6_emit_vs_state(ctx);
2391     gen6_emit_gs_state(ctx);
2392     gen6_emit_clip_state(ctx);
2393     gen6_emit_sf_state(ctx);
2394     gen6_emit_wm_state(ctx, kernel);
2395     gen6_emit_binding_table(ctx);
2396     gen6_emit_depth_buffer_state(ctx);
2397     gen6_emit_drawing_rectangle(ctx);
2398     gen6_emit_vertex_element_state(ctx);
2399     gen6_emit_vertices(ctx);
2400     intel_batchbuffer_end_atomic(batch);
2401 }
2402
2403 static void
2404 gen6_render_put_surface(
2405     VADriverContextP   ctx,
2406     struct object_surface *obj_surface,
2407     const VARectangle *src_rect,
2408     const VARectangle *dst_rect,
2409     unsigned int       flags
2410 )
2411 {
2412     struct i965_driver_data *i965 = i965_driver_data(ctx);
2413     struct intel_batchbuffer *batch = i965->batch;
2414
2415     gen6_render_initialize(ctx);
2416     gen6_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2417     i965_clear_dest_region(ctx);
2418     gen6_render_emit_states(ctx, PS_KERNEL);
2419     intel_batchbuffer_flush(batch);
2420 }
2421
2422 static void
2423 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2424 {
2425     struct i965_driver_data *i965 = i965_driver_data(ctx);
2426     struct i965_render_state *render_state = &i965->render_state;
2427     struct gen6_blend_state *blend_state;
2428
2429     dri_bo_unmap(render_state->cc.state);    
2430     dri_bo_map(render_state->cc.blend, 1);
2431     assert(render_state->cc.blend->virtual);
2432     blend_state = render_state->cc.blend->virtual;
2433     memset(blend_state, 0, sizeof(*blend_state));
2434     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2435     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2436     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2437     blend_state->blend0.blend_enable = 1;
2438     blend_state->blend1.post_blend_clamp_enable = 1;
2439     blend_state->blend1.pre_blend_clamp_enable = 1;
2440     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2441     dri_bo_unmap(render_state->cc.blend);
2442 }
2443
2444 static void
2445 gen6_subpicture_render_setup_states(
2446     VADriverContextP   ctx,
2447     struct object_surface *obj_surface,
2448     const VARectangle *src_rect,
2449     const VARectangle *dst_rect
2450 )
2451 {
2452     i965_render_dest_surface_state(ctx, 0);
2453     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
2454     i965_render_sampler(ctx);
2455     i965_render_cc_viewport(ctx);
2456     gen6_render_color_calc_state(ctx);
2457     gen6_subpicture_render_blend_state(ctx);
2458     gen6_render_depth_stencil_state(ctx);
2459     i965_subpic_render_upload_constants(ctx, obj_surface);
2460     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
2461 }
2462
2463 static void
2464 gen6_render_put_subpicture(
2465     VADriverContextP   ctx,
2466     struct object_surface *obj_surface,
2467     const VARectangle *src_rect,
2468     const VARectangle *dst_rect
2469 )
2470 {
2471     struct i965_driver_data *i965 = i965_driver_data(ctx);
2472     struct intel_batchbuffer *batch = i965->batch;
2473     unsigned int index = obj_surface->subpic_render_idx;
2474     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
2475
2476     assert(obj_subpic);
2477     gen6_render_initialize(ctx);
2478     gen6_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
2479     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2480     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
2481     intel_batchbuffer_flush(batch);
2482 }
2483
2484 /*
2485  * for GEN7
2486  */
2487 static void 
2488 gen7_render_initialize(VADriverContextP ctx)
2489 {
2490     struct i965_driver_data *i965 = i965_driver_data(ctx);
2491     struct i965_render_state *render_state = &i965->render_state;
2492     dri_bo *bo;
2493     int size;
2494
2495     /* VERTEX BUFFER */
2496     dri_bo_unreference(render_state->vb.vertex_buffer);
2497     bo = dri_bo_alloc(i965->intel.bufmgr,
2498                       "vertex buffer",
2499                       4096,
2500                       4096);
2501     assert(bo);
2502     render_state->vb.vertex_buffer = bo;
2503
2504     /* WM */
2505     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2506     bo = dri_bo_alloc(i965->intel.bufmgr,
2507                       "surface state & binding table",
2508                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2509                       4096);
2510     assert(bo);
2511     render_state->wm.surface_state_binding_table_bo = bo;
2512
2513     dri_bo_unreference(render_state->wm.sampler);
2514     bo = dri_bo_alloc(i965->intel.bufmgr,
2515                       "sampler state",
2516                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2517                       4096);
2518     assert(bo);
2519     render_state->wm.sampler = bo;
2520     render_state->wm.sampler_count = 0;
2521
2522     /* COLOR CALCULATOR */
2523     dri_bo_unreference(render_state->cc.state);
2524     bo = dri_bo_alloc(i965->intel.bufmgr,
2525                       "color calc state",
2526                       sizeof(struct gen6_color_calc_state),
2527                       4096);
2528     assert(bo);
2529     render_state->cc.state = bo;
2530
2531     /* CC VIEWPORT */
2532     dri_bo_unreference(render_state->cc.viewport);
2533     bo = dri_bo_alloc(i965->intel.bufmgr,
2534                       "cc viewport",
2535                       sizeof(struct i965_cc_viewport),
2536                       4096);
2537     assert(bo);
2538     render_state->cc.viewport = bo;
2539
2540     /* BLEND STATE */
2541     dri_bo_unreference(render_state->cc.blend);
2542     size = sizeof(struct gen8_global_blend_state) + 2 * sizeof(struct gen8_blend_state_rt);
2543     bo = dri_bo_alloc(i965->intel.bufmgr,
2544                       "blend state",
2545                       size,
2546                       4096);
2547     assert(bo);
2548     render_state->cc.blend = bo;
2549
2550     /* DEPTH & STENCIL STATE */
2551     dri_bo_unreference(render_state->cc.depth_stencil);
2552     bo = dri_bo_alloc(i965->intel.bufmgr,
2553                       "depth & stencil state",
2554                       sizeof(struct gen6_depth_stencil_state),
2555                       4096);
2556     assert(bo);
2557     render_state->cc.depth_stencil = bo;
2558 }
2559
2560 /*
2561  * for GEN8
2562  */
2563 static void 
2564 gen8_render_initialize(VADriverContextP ctx)
2565 {
2566     struct i965_driver_data *i965 = i965_driver_data(ctx);
2567     struct i965_render_state *render_state = &i965->render_state;
2568     dri_bo *bo;
2569     int size;
2570     unsigned int end_offset;
2571
2572     /* VERTEX BUFFER */
2573     dri_bo_unreference(render_state->vb.vertex_buffer);
2574     bo = dri_bo_alloc(i965->intel.bufmgr,
2575                       "vertex buffer",
2576                       4096,
2577                       4096);
2578     assert(bo);
2579     render_state->vb.vertex_buffer = bo;
2580
2581     /* WM */
2582     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2583     bo = dri_bo_alloc(i965->intel.bufmgr,
2584                       "surface state & binding table",
2585                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2586                       4096);
2587     assert(bo);
2588     render_state->wm.surface_state_binding_table_bo = bo;
2589
2590     render_state->curbe_size = 256;
2591
2592     render_state->wm.sampler_count = 0;
2593
2594     render_state->sampler_size = MAX_SAMPLERS * sizeof(struct gen8_sampler_state);
2595
2596     render_state->cc_state_size = sizeof(struct gen6_color_calc_state);
2597
2598     render_state->cc_viewport_size = sizeof(struct i965_cc_viewport);
2599
2600     render_state->blend_state_size = sizeof(struct gen8_global_blend_state) +
2601                         16 * sizeof(struct gen8_blend_state_rt);
2602
2603     render_state->sf_clip_size = 1024;
2604
2605     render_state->scissor_size = 1024;
2606
2607     size = 4096 + render_state->curbe_size + render_state->sampler_size +
2608                 render_state->cc_state_size + render_state->cc_viewport_size +
2609                 render_state->blend_state_size + render_state->sf_clip_size +
2610                 render_state->scissor_size;
2611
2612     dri_bo_unreference(render_state->dynamic_state.bo);
2613     bo = dri_bo_alloc(i965->intel.bufmgr,
2614                       "dynamic_state",
2615                       size,
2616                       4096);
2617
2618     render_state->dynamic_state.bo = bo;
2619
2620     end_offset = 0;
2621     render_state->dynamic_state.end_offset = 0;
2622
2623     /* Constant buffer offset */
2624     render_state->curbe_offset = ALIGN(end_offset, 64);
2625     end_offset += render_state->curbe_size;
2626
2627     /* Sampler_state  */
2628     render_state->sampler_offset = ALIGN(end_offset, 64);
2629     end_offset += render_state->sampler_size;
2630
2631     /* CC_VIEWPORT_state  */
2632     render_state->cc_viewport_offset = ALIGN(end_offset, 64);
2633     end_offset += render_state->cc_viewport_size;
2634
2635     /* CC_STATE_state  */
2636     render_state->cc_state_offset = ALIGN(end_offset, 64);
2637     end_offset += render_state->cc_state_size;
2638
2639     /* Blend_state  */
2640     render_state->blend_state_offset = ALIGN(end_offset, 64);
2641     end_offset += render_state->blend_state_size;
2642
2643     /* SF_CLIP_state  */
2644     render_state->sf_clip_offset = ALIGN(end_offset, 64);
2645     end_offset += render_state->sf_clip_size;
2646
2647     /* SCISSOR_state  */
2648     render_state->scissor_offset = ALIGN(end_offset, 64);
2649     end_offset += render_state->scissor_size;
2650
2651     /* update the end offset of dynamic_state */
2652     render_state->dynamic_state.end_offset = ALIGN(end_offset, 64);
2653
2654 }
2655
2656 static void
2657 gen7_render_color_calc_state(VADriverContextP ctx)
2658 {
2659     struct i965_driver_data *i965 = i965_driver_data(ctx);
2660     struct i965_render_state *render_state = &i965->render_state;
2661     struct gen6_color_calc_state *color_calc_state;
2662     
2663     dri_bo_map(render_state->cc.state, 1);
2664     assert(render_state->cc.state->virtual);
2665     color_calc_state = render_state->cc.state->virtual;
2666     memset(color_calc_state, 0, sizeof(*color_calc_state));
2667     color_calc_state->constant_r = 1.0;
2668     color_calc_state->constant_g = 0.0;
2669     color_calc_state->constant_b = 1.0;
2670     color_calc_state->constant_a = 1.0;
2671     dri_bo_unmap(render_state->cc.state);
2672 }
2673
2674 static void
2675 gen7_render_blend_state(VADriverContextP ctx)
2676 {
2677     struct i965_driver_data *i965 = i965_driver_data(ctx);
2678     struct i965_render_state *render_state = &i965->render_state;
2679     struct gen6_blend_state *blend_state;
2680     
2681     dri_bo_map(render_state->cc.blend, 1);
2682     assert(render_state->cc.blend->virtual);
2683     blend_state = render_state->cc.blend->virtual;
2684     memset(blend_state, 0, sizeof(*blend_state));
2685     blend_state->blend1.logic_op_enable = 1;
2686     blend_state->blend1.logic_op_func = 0xc;
2687     blend_state->blend1.pre_blend_clamp_enable = 1;
2688     dri_bo_unmap(render_state->cc.blend);
2689 }
2690
2691 static void
2692 gen7_render_depth_stencil_state(VADriverContextP ctx)
2693 {
2694     struct i965_driver_data *i965 = i965_driver_data(ctx);
2695     struct i965_render_state *render_state = &i965->render_state;
2696     struct gen6_depth_stencil_state *depth_stencil_state;
2697     
2698     dri_bo_map(render_state->cc.depth_stencil, 1);
2699     assert(render_state->cc.depth_stencil->virtual);
2700     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2701     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2702     dri_bo_unmap(render_state->cc.depth_stencil);
2703 }
2704
2705 static void 
2706 gen7_render_sampler(VADriverContextP ctx)
2707 {
2708     struct i965_driver_data *i965 = i965_driver_data(ctx);
2709     struct i965_render_state *render_state = &i965->render_state;
2710     struct gen7_sampler_state *sampler_state;
2711     int i;
2712     
2713     assert(render_state->wm.sampler_count > 0);
2714     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2715
2716     dri_bo_map(render_state->wm.sampler, 1);
2717     assert(render_state->wm.sampler->virtual);
2718     sampler_state = render_state->wm.sampler->virtual;
2719     for (i = 0; i < render_state->wm.sampler_count; i++) {
2720         memset(sampler_state, 0, sizeof(*sampler_state));
2721         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2722         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2723         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2724         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2725         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2726         sampler_state++;
2727     }
2728
2729     dri_bo_unmap(render_state->wm.sampler);
2730 }
2731
2732 static void 
2733 gen8_render_sampler(VADriverContextP ctx)
2734 {
2735     struct i965_driver_data *i965 = i965_driver_data(ctx);
2736     struct i965_render_state *render_state = &i965->render_state;
2737     struct gen8_sampler_state *sampler_state;
2738     int i;
2739     unsigned char *cc_ptr;
2740     
2741     assert(render_state->wm.sampler_count > 0);
2742     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2743
2744     dri_bo_map(render_state->dynamic_state.bo, 1);
2745     assert(render_state->dynamic_state.bo->virtual);
2746
2747     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
2748                         render_state->sampler_offset;
2749
2750     sampler_state = (struct gen8_sampler_state *) cc_ptr;
2751
2752     for (i = 0; i < render_state->wm.sampler_count; i++) {
2753         memset(sampler_state, 0, sizeof(*sampler_state));
2754         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2755         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2756         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2757         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2758         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2759         sampler_state++;
2760     }
2761
2762     dri_bo_unmap(render_state->dynamic_state.bo);
2763 }
2764
2765
2766 static void
2767 gen7_render_setup_states(
2768     VADriverContextP   ctx,
2769     struct object_surface *obj_surface,
2770     const VARectangle *src_rect,
2771     const VARectangle *dst_rect,
2772     unsigned int       flags
2773 )
2774 {
2775     i965_render_dest_surface_state(ctx, 0);
2776     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2777     gen7_render_sampler(ctx);
2778     i965_render_cc_viewport(ctx);
2779     gen7_render_color_calc_state(ctx);
2780     gen7_render_blend_state(ctx);
2781     gen7_render_depth_stencil_state(ctx);
2782     i965_render_upload_constants(ctx, obj_surface, flags);
2783     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2784 }
2785
2786 static void
2787 gen8_render_blend_state(VADriverContextP ctx)
2788 {
2789     struct i965_driver_data *i965 = i965_driver_data(ctx);
2790     struct i965_render_state *render_state = &i965->render_state;
2791     struct gen8_global_blend_state *global_blend_state;
2792     struct gen8_blend_state_rt *blend_state;
2793     unsigned char *cc_ptr;
2794     
2795     dri_bo_map(render_state->dynamic_state.bo, 1);
2796     assert(render_state->dynamic_state.bo->virtual);
2797
2798     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
2799                         render_state->blend_state_offset;
2800
2801     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
2802
2803     memset(global_blend_state, 0, sizeof(*global_blend_state));
2804     /* Global blend state + blend_state for Render Target */
2805     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
2806     blend_state->blend1.logic_op_enable = 1;
2807     blend_state->blend1.logic_op_func = 0xc;
2808     blend_state->blend1.pre_blend_clamp_enable = 1;
2809
2810     dri_bo_unmap(render_state->dynamic_state.bo);
2811 }
2812
2813
2814 static void 
2815 gen8_render_cc_viewport(VADriverContextP ctx)
2816 {
2817     struct i965_driver_data *i965 = i965_driver_data(ctx);
2818     struct i965_render_state *render_state = &i965->render_state;
2819     struct i965_cc_viewport *cc_viewport;
2820     unsigned char *cc_ptr;
2821
2822     dri_bo_map(render_state->dynamic_state.bo, 1);
2823     assert(render_state->dynamic_state.bo->virtual);
2824
2825     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
2826                         render_state->cc_viewport_offset;
2827
2828     cc_viewport = (struct i965_cc_viewport *) cc_ptr;
2829
2830     memset(cc_viewport, 0, sizeof(*cc_viewport));
2831     
2832     cc_viewport->min_depth = -1.e35;
2833     cc_viewport->max_depth = 1.e35;
2834
2835     dri_bo_unmap(render_state->dynamic_state.bo);
2836 }
2837
2838 static void
2839 gen8_render_color_calc_state(VADriverContextP ctx)
2840 {
2841     struct i965_driver_data *i965 = i965_driver_data(ctx);
2842     struct i965_render_state *render_state = &i965->render_state;
2843     struct gen6_color_calc_state *color_calc_state;
2844     unsigned char *cc_ptr;
2845
2846     dri_bo_map(render_state->dynamic_state.bo, 1);
2847     assert(render_state->dynamic_state.bo->virtual);
2848
2849     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
2850                         render_state->cc_state_offset;
2851
2852     color_calc_state = (struct gen6_color_calc_state *) cc_ptr;
2853
2854     memset(color_calc_state, 0, sizeof(*color_calc_state));
2855     color_calc_state->constant_r = 1.0;
2856     color_calc_state->constant_g = 0.0;
2857     color_calc_state->constant_b = 1.0;
2858     color_calc_state->constant_a = 1.0;
2859     dri_bo_unmap(render_state->dynamic_state.bo);
2860 }
2861
2862 static void
2863 gen8_render_upload_constants(VADriverContextP ctx,
2864                              struct object_surface *obj_surface)
2865 {
2866     struct i965_driver_data *i965 = i965_driver_data(ctx);
2867     struct i965_render_state *render_state = &i965->render_state;
2868     unsigned short *constant_buffer;
2869     unsigned char *cc_ptr;
2870
2871     dri_bo_map(render_state->dynamic_state.bo, 1);
2872     assert(render_state->dynamic_state.bo->virtual);
2873
2874     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
2875                         render_state->curbe_offset;
2876
2877     constant_buffer = (unsigned short *) cc_ptr;
2878
2879     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
2880         assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0'));
2881
2882         *constant_buffer = 2;
2883     } else {
2884         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'))
2885             *constant_buffer = 1;
2886         else
2887             *constant_buffer = 0;
2888     }
2889
2890     dri_bo_unmap(render_state->dynamic_state.bo);
2891 }
2892
2893 static void
2894 gen8_render_setup_states(
2895     VADriverContextP   ctx,
2896     struct object_surface *obj_surface,
2897     const VARectangle *src_rect,
2898     const VARectangle *dst_rect,
2899     unsigned int       flags
2900 )
2901 {
2902     i965_render_dest_surface_state(ctx, 0);
2903     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2904     gen8_render_sampler(ctx);
2905     gen8_render_cc_viewport(ctx);
2906     gen8_render_color_calc_state(ctx);
2907     gen8_render_blend_state(ctx);
2908     gen8_render_upload_constants(ctx, obj_surface);
2909     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2910 }
2911
2912 static void
2913 gen7_emit_invarient_states(VADriverContextP ctx)
2914 {
2915     struct i965_driver_data *i965 = i965_driver_data(ctx);
2916     struct intel_batchbuffer *batch = i965->batch;
2917
2918     BEGIN_BATCH(batch, 1);
2919     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2920     ADVANCE_BATCH(batch);
2921
2922     BEGIN_BATCH(batch, 4);
2923     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2924     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2925               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2926     OUT_BATCH(batch, 0);
2927     OUT_BATCH(batch, 0);
2928     ADVANCE_BATCH(batch);
2929
2930     BEGIN_BATCH(batch, 2);
2931     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2932     OUT_BATCH(batch, 1);
2933     ADVANCE_BATCH(batch);
2934
2935     /* Set system instruction pointer */
2936     BEGIN_BATCH(batch, 2);
2937     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2938     OUT_BATCH(batch, 0);
2939     ADVANCE_BATCH(batch);
2940 }
2941
2942 static void
2943 gen7_emit_state_base_address(VADriverContextP ctx)
2944 {
2945     struct i965_driver_data *i965 = i965_driver_data(ctx);
2946     struct intel_batchbuffer *batch = i965->batch;
2947     struct i965_render_state *render_state = &i965->render_state;
2948
2949     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2950     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2951     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2952     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2953     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2954     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2955     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2956     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2957     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2958     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2959 }
2960
2961 static void
2962 gen8_emit_state_base_address(VADriverContextP ctx)
2963 {
2964     struct i965_driver_data *i965 = i965_driver_data(ctx);
2965     struct intel_batchbuffer *batch = i965->batch;
2966     struct i965_render_state *render_state = &i965->render_state;
2967
2968     BEGIN_BATCH(batch, 16);
2969     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2));
2970     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2971         OUT_BATCH(batch, 0);
2972         OUT_BATCH(batch, 0);
2973         /*DW4 */
2974     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2975         OUT_BATCH(batch, 0);
2976
2977         /*DW6*/
2978     /* Dynamic state base address */
2979     OUT_RELOC(batch, render_state->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
2980                 0, BASE_ADDRESS_MODIFY);
2981     OUT_BATCH(batch, 0);
2982
2983         /*DW8*/
2984     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2985     OUT_BATCH(batch, 0);
2986
2987         /*DW10 */
2988     /* Instruction base address */
2989     OUT_RELOC(batch, render_state->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
2990     OUT_BATCH(batch, 0);
2991
2992         /*DW12 */       
2993     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */
2994     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2995     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2996     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2997     ADVANCE_BATCH(batch);
2998 }
2999
3000 static void
3001 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
3002 {
3003     struct i965_driver_data *i965 = i965_driver_data(ctx);
3004     struct intel_batchbuffer *batch = i965->batch;
3005     struct i965_render_state *render_state = &i965->render_state;
3006
3007     BEGIN_BATCH(batch, 2);
3008     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
3009     OUT_RELOC(batch,
3010               render_state->cc.viewport,
3011               I915_GEM_DOMAIN_INSTRUCTION, 0,
3012               0);
3013     ADVANCE_BATCH(batch);
3014
3015     BEGIN_BATCH(batch, 2);
3016     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
3017     OUT_BATCH(batch, 0);
3018     ADVANCE_BATCH(batch);
3019 }
3020
3021 /*
3022  * URB layout on GEN7 
3023  * ----------------------------------------
3024  * | PS Push Constants (8KB) | VS entries |
3025  * ----------------------------------------
3026  */
3027 static void
3028 gen7_emit_urb(VADriverContextP ctx)
3029 {
3030     struct i965_driver_data *i965 = i965_driver_data(ctx);
3031     struct intel_batchbuffer *batch = i965->batch;
3032     unsigned int num_urb_entries = 32;
3033
3034     if (IS_HASWELL(i965->intel.device_id))
3035         num_urb_entries = 64;
3036
3037     BEGIN_BATCH(batch, 2);
3038     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
3039     OUT_BATCH(batch, 8); /* in 1KBs */
3040     ADVANCE_BATCH(batch);
3041
3042     BEGIN_BATCH(batch, 2);
3043     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
3044     OUT_BATCH(batch, 
3045               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
3046               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
3047               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3048    ADVANCE_BATCH(batch);
3049
3050    BEGIN_BATCH(batch, 2);
3051    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
3052    OUT_BATCH(batch,
3053              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3054              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3055    ADVANCE_BATCH(batch);
3056
3057    BEGIN_BATCH(batch, 2);
3058    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
3059    OUT_BATCH(batch,
3060              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3061              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3062    ADVANCE_BATCH(batch);
3063
3064    BEGIN_BATCH(batch, 2);
3065    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
3066    OUT_BATCH(batch,
3067              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3068              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3069    ADVANCE_BATCH(batch);
3070 }
3071
3072 static void
3073 gen7_emit_cc_state_pointers(VADriverContextP ctx)
3074 {
3075     struct i965_driver_data *i965 = i965_driver_data(ctx);
3076     struct intel_batchbuffer *batch = i965->batch;
3077     struct i965_render_state *render_state = &i965->render_state;
3078
3079     BEGIN_BATCH(batch, 2);
3080     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
3081     OUT_RELOC(batch,
3082               render_state->cc.state,
3083               I915_GEM_DOMAIN_INSTRUCTION, 0,
3084               1);
3085     ADVANCE_BATCH(batch);
3086
3087     BEGIN_BATCH(batch, 2);
3088     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
3089     OUT_RELOC(batch,
3090               render_state->cc.blend,
3091               I915_GEM_DOMAIN_INSTRUCTION, 0,
3092               1);
3093     ADVANCE_BATCH(batch);
3094
3095     BEGIN_BATCH(batch, 2);
3096     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
3097     OUT_RELOC(batch,
3098               render_state->cc.depth_stencil,
3099               I915_GEM_DOMAIN_INSTRUCTION, 0, 
3100               1);
3101     ADVANCE_BATCH(batch);
3102 }
3103
3104 static void
3105 gen8_emit_cc_state_pointers(VADriverContextP ctx)
3106 {
3107     struct i965_driver_data *i965 = i965_driver_data(ctx);
3108     struct intel_batchbuffer *batch = i965->batch;
3109     struct i965_render_state *render_state = &i965->render_state;
3110
3111     BEGIN_BATCH(batch, 2);
3112     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
3113     OUT_BATCH(batch, (render_state->cc_state_offset + 1));
3114     ADVANCE_BATCH(batch);
3115
3116     BEGIN_BATCH(batch, 2);
3117     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
3118     OUT_BATCH(batch, (render_state->blend_state_offset + 1));
3119     ADVANCE_BATCH(batch);
3120
3121 }
3122
3123 static void
3124 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
3125 {
3126     struct i965_driver_data *i965 = i965_driver_data(ctx);
3127     struct intel_batchbuffer *batch = i965->batch;
3128     struct i965_render_state *render_state = &i965->render_state;
3129
3130     BEGIN_BATCH(batch, 2);
3131     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
3132     OUT_RELOC(batch,
3133               render_state->wm.sampler,
3134               I915_GEM_DOMAIN_INSTRUCTION, 0,
3135               0);
3136     ADVANCE_BATCH(batch);
3137 }
3138
3139 static void
3140 gen7_emit_binding_table(VADriverContextP ctx)
3141 {
3142     struct i965_driver_data *i965 = i965_driver_data(ctx);
3143     struct intel_batchbuffer *batch = i965->batch;
3144
3145     BEGIN_BATCH(batch, 2);
3146     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
3147     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
3148     ADVANCE_BATCH(batch);
3149 }
3150
3151 static void
3152 gen7_emit_depth_buffer_state(VADriverContextP ctx)
3153 {
3154     struct i965_driver_data *i965 = i965_driver_data(ctx);
3155     struct intel_batchbuffer *batch = i965->batch;
3156
3157     BEGIN_BATCH(batch, 7);
3158     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
3159     OUT_BATCH(batch,
3160               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
3161               (I965_SURFACE_NULL << 29));
3162     OUT_BATCH(batch, 0);
3163     OUT_BATCH(batch, 0);
3164     OUT_BATCH(batch, 0);
3165     OUT_BATCH(batch, 0);
3166     OUT_BATCH(batch, 0);
3167     ADVANCE_BATCH(batch);
3168
3169     BEGIN_BATCH(batch, 3);
3170     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
3171     OUT_BATCH(batch, 0);
3172     OUT_BATCH(batch, 0);
3173     ADVANCE_BATCH(batch);
3174 }
3175
3176 static void
3177 gen7_emit_drawing_rectangle(VADriverContextP ctx)
3178 {
3179     i965_render_drawing_rectangle(ctx);
3180 }
3181
3182 static void 
3183 gen7_emit_vs_state(VADriverContextP ctx)
3184 {
3185     struct i965_driver_data *i965 = i965_driver_data(ctx);
3186     struct intel_batchbuffer *batch = i965->batch;
3187
3188     /* disable VS constant buffer */
3189     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
3190     OUT_BATCH(batch, 0);
3191     OUT_BATCH(batch, 0);
3192     OUT_BATCH(batch, 0);
3193     OUT_BATCH(batch, 0);
3194     OUT_BATCH(batch, 0);
3195     OUT_BATCH(batch, 0);
3196         
3197     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
3198     OUT_BATCH(batch, 0); /* without VS kernel */
3199     OUT_BATCH(batch, 0);
3200     OUT_BATCH(batch, 0);
3201     OUT_BATCH(batch, 0);
3202     OUT_BATCH(batch, 0); /* pass-through */
3203 }
3204
3205 static void 
3206 gen7_emit_bypass_state(VADriverContextP ctx)
3207 {
3208     struct i965_driver_data *i965 = i965_driver_data(ctx);
3209     struct intel_batchbuffer *batch = i965->batch;
3210
3211     /* bypass GS */
3212     BEGIN_BATCH(batch, 7);
3213     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
3214     OUT_BATCH(batch, 0);
3215     OUT_BATCH(batch, 0);
3216     OUT_BATCH(batch, 0);
3217     OUT_BATCH(batch, 0);
3218     OUT_BATCH(batch, 0);
3219     OUT_BATCH(batch, 0);
3220     ADVANCE_BATCH(batch);
3221
3222     BEGIN_BATCH(batch, 7);      
3223     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
3224     OUT_BATCH(batch, 0); /* without GS kernel */
3225     OUT_BATCH(batch, 0);
3226     OUT_BATCH(batch, 0);
3227     OUT_BATCH(batch, 0);
3228     OUT_BATCH(batch, 0);
3229     OUT_BATCH(batch, 0); /* pass-through */
3230     ADVANCE_BATCH(batch);
3231
3232     BEGIN_BATCH(batch, 2);
3233     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
3234     OUT_BATCH(batch, 0);
3235     ADVANCE_BATCH(batch);
3236
3237     /* disable HS */
3238     BEGIN_BATCH(batch, 7);
3239     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
3240     OUT_BATCH(batch, 0);
3241     OUT_BATCH(batch, 0);
3242     OUT_BATCH(batch, 0);
3243     OUT_BATCH(batch, 0);
3244     OUT_BATCH(batch, 0);
3245     OUT_BATCH(batch, 0);
3246     ADVANCE_BATCH(batch);
3247
3248     BEGIN_BATCH(batch, 7);
3249     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
3250     OUT_BATCH(batch, 0);
3251     OUT_BATCH(batch, 0);
3252     OUT_BATCH(batch, 0);
3253     OUT_BATCH(batch, 0);
3254     OUT_BATCH(batch, 0);
3255     OUT_BATCH(batch, 0);
3256     ADVANCE_BATCH(batch);
3257
3258     BEGIN_BATCH(batch, 2);
3259     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
3260     OUT_BATCH(batch, 0);
3261     ADVANCE_BATCH(batch);
3262
3263     /* Disable TE */
3264     BEGIN_BATCH(batch, 4);
3265     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
3266     OUT_BATCH(batch, 0);
3267     OUT_BATCH(batch, 0);
3268     OUT_BATCH(batch, 0);
3269     ADVANCE_BATCH(batch);
3270
3271     /* Disable DS */
3272     BEGIN_BATCH(batch, 7);
3273     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
3274     OUT_BATCH(batch, 0);
3275     OUT_BATCH(batch, 0);
3276     OUT_BATCH(batch, 0);
3277     OUT_BATCH(batch, 0);
3278     OUT_BATCH(batch, 0);
3279     OUT_BATCH(batch, 0);
3280     ADVANCE_BATCH(batch);
3281
3282     BEGIN_BATCH(batch, 6);
3283     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
3284     OUT_BATCH(batch, 0);
3285     OUT_BATCH(batch, 0);
3286     OUT_BATCH(batch, 0);
3287     OUT_BATCH(batch, 0);
3288     OUT_BATCH(batch, 0);
3289     ADVANCE_BATCH(batch);
3290
3291     BEGIN_BATCH(batch, 2);
3292     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
3293     OUT_BATCH(batch, 0);
3294     ADVANCE_BATCH(batch);
3295
3296     /* Disable STREAMOUT */
3297     BEGIN_BATCH(batch, 3);
3298     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
3299     OUT_BATCH(batch, 0);
3300     OUT_BATCH(batch, 0);
3301     ADVANCE_BATCH(batch);
3302 }
3303
3304 static void 
3305 gen7_emit_clip_state(VADriverContextP ctx)
3306 {
3307     struct i965_driver_data *i965 = i965_driver_data(ctx);
3308     struct intel_batchbuffer *batch = i965->batch;
3309
3310     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
3311     OUT_BATCH(batch, 0);
3312     OUT_BATCH(batch, 0); /* pass-through */
3313     OUT_BATCH(batch, 0);
3314 }
3315
3316 static void 
3317 gen7_emit_sf_state(VADriverContextP ctx)
3318 {
3319     struct i965_driver_data *i965 = i965_driver_data(ctx);
3320     struct intel_batchbuffer *batch = i965->batch;
3321
3322     BEGIN_BATCH(batch, 14);
3323     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
3324     OUT_BATCH(batch,
3325               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
3326               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
3327               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
3328     OUT_BATCH(batch, 0);
3329     OUT_BATCH(batch, 0);
3330     OUT_BATCH(batch, 0); /* DW4 */
3331     OUT_BATCH(batch, 0);
3332     OUT_BATCH(batch, 0);
3333     OUT_BATCH(batch, 0);
3334     OUT_BATCH(batch, 0);
3335     OUT_BATCH(batch, 0); /* DW9 */
3336     OUT_BATCH(batch, 0);
3337     OUT_BATCH(batch, 0);
3338     OUT_BATCH(batch, 0);
3339     OUT_BATCH(batch, 0);
3340     ADVANCE_BATCH(batch);
3341
3342     BEGIN_BATCH(batch, 7);
3343     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
3344     OUT_BATCH(batch, 0);
3345     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
3346     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
3347     OUT_BATCH(batch, 0);
3348     OUT_BATCH(batch, 0);
3349     OUT_BATCH(batch, 0);
3350     ADVANCE_BATCH(batch);
3351 }
3352
3353 static void 
3354 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
3355 {
3356     struct i965_driver_data *i965 = i965_driver_data(ctx);
3357     struct intel_batchbuffer *batch = i965->batch;
3358     struct i965_render_state *render_state = &i965->render_state;
3359     unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
3360     unsigned int num_samples = 0;
3361
3362     if (IS_HASWELL(i965->intel.device_id)) {
3363         max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
3364         num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
3365     }
3366
3367     BEGIN_BATCH(batch, 3);
3368     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
3369     OUT_BATCH(batch,
3370               GEN7_WM_DISPATCH_ENABLE |
3371               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
3372     OUT_BATCH(batch, 0);
3373     ADVANCE_BATCH(batch);
3374
3375     BEGIN_BATCH(batch, 7);
3376     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
3377     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
3378     OUT_BATCH(batch, 0);
3379     OUT_RELOC(batch, 
3380               render_state->curbe.bo,
3381               I915_GEM_DOMAIN_INSTRUCTION, 0,
3382               0);
3383     OUT_BATCH(batch, 0);
3384     OUT_BATCH(batch, 0);
3385     OUT_BATCH(batch, 0);
3386     ADVANCE_BATCH(batch);
3387
3388     BEGIN_BATCH(batch, 8);
3389     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
3390     OUT_RELOC(batch, 
3391               render_state->render_kernels[kernel].bo,
3392               I915_GEM_DOMAIN_INSTRUCTION, 0,
3393               0);
3394     OUT_BATCH(batch, 
3395               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
3396               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
3397     OUT_BATCH(batch, 0); /* scratch space base offset */
3398     OUT_BATCH(batch, 
3399               ((render_state->max_wm_threads - 1) << max_threads_shift) | num_samples |
3400               GEN7_PS_PUSH_CONSTANT_ENABLE |
3401               GEN7_PS_ATTRIBUTE_ENABLE |
3402               GEN7_PS_16_DISPATCH_ENABLE);
3403     OUT_BATCH(batch, 
3404               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
3405     OUT_BATCH(batch, 0); /* kernel 1 pointer */
3406     OUT_BATCH(batch, 0); /* kernel 2 pointer */
3407     ADVANCE_BATCH(batch);
3408 }
3409
3410 static void
3411 gen7_emit_vertex_element_state(VADriverContextP ctx)
3412 {
3413     struct i965_driver_data *i965 = i965_driver_data(ctx);
3414     struct intel_batchbuffer *batch = i965->batch;
3415
3416     /* Set up our vertex elements, sourced from the single vertex buffer. */
3417     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
3418     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
3419     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3420               GEN6_VE0_VALID |
3421               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3422               (0 << VE0_OFFSET_SHIFT));
3423     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
3424               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
3425               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
3426               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
3427     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
3428     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3429               GEN6_VE0_VALID |
3430               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3431               (8 << VE0_OFFSET_SHIFT));
3432     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
3433               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
3434               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
3435               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
3436 }
3437
3438 static void
3439 gen7_emit_vertices(VADriverContextP ctx)
3440 {
3441     struct i965_driver_data *i965 = i965_driver_data(ctx);
3442     struct intel_batchbuffer *batch = i965->batch;
3443     struct i965_render_state *render_state = &i965->render_state;
3444
3445     BEGIN_BATCH(batch, 5);
3446     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
3447     OUT_BATCH(batch, 
3448               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
3449               GEN6_VB0_VERTEXDATA |
3450               GEN7_VB0_ADDRESS_MODIFYENABLE |
3451               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
3452     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
3453     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
3454     OUT_BATCH(batch, 0);
3455     ADVANCE_BATCH(batch);
3456
3457     BEGIN_BATCH(batch, 7);
3458     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
3459     OUT_BATCH(batch,
3460               _3DPRIM_RECTLIST |
3461               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
3462     OUT_BATCH(batch, 3); /* vertex count per instance */
3463     OUT_BATCH(batch, 0); /* start vertex offset */
3464     OUT_BATCH(batch, 1); /* single instance */
3465     OUT_BATCH(batch, 0); /* start instance location */
3466     OUT_BATCH(batch, 0);
3467     ADVANCE_BATCH(batch);
3468 }
3469
3470 static void
3471 gen7_render_emit_states(VADriverContextP ctx, int kernel)
3472 {
3473     struct i965_driver_data *i965 = i965_driver_data(ctx);
3474     struct intel_batchbuffer *batch = i965->batch;
3475
3476     intel_batchbuffer_start_atomic(batch, 0x1000);
3477     intel_batchbuffer_emit_mi_flush(batch);
3478     gen7_emit_invarient_states(ctx);
3479     gen7_emit_state_base_address(ctx);
3480     gen7_emit_viewport_state_pointers(ctx);
3481     gen7_emit_urb(ctx);
3482     gen7_emit_cc_state_pointers(ctx);
3483     gen7_emit_sampler_state_pointers(ctx);
3484     gen7_emit_bypass_state(ctx);
3485     gen7_emit_vs_state(ctx);
3486     gen7_emit_clip_state(ctx);
3487     gen7_emit_sf_state(ctx);
3488     gen7_emit_wm_state(ctx, kernel);
3489     gen7_emit_binding_table(ctx);
3490     gen7_emit_depth_buffer_state(ctx);
3491     gen7_emit_drawing_rectangle(ctx);
3492     gen7_emit_vertex_element_state(ctx);
3493     gen7_emit_vertices(ctx);
3494     intel_batchbuffer_end_atomic(batch);
3495 }
3496
3497 static void
3498 gen8_emit_vertices(VADriverContextP ctx)
3499 {
3500     struct i965_driver_data *i965 = i965_driver_data(ctx);
3501     struct intel_batchbuffer *batch = i965->batch;
3502     struct i965_render_state *render_state = &i965->render_state;
3503
3504     BEGIN_BATCH(batch, 5);
3505     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
3506     OUT_BATCH(batch, 
3507               (0 << GEN8_VB0_BUFFER_INDEX_SHIFT) |
3508               (0 << GEN8_VB0_MOCS_SHIFT) |
3509               GEN7_VB0_ADDRESS_MODIFYENABLE |
3510               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
3511     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
3512     OUT_BATCH(batch, 0);
3513     OUT_BATCH(batch, 12 * 4);
3514     ADVANCE_BATCH(batch);
3515
3516     /* Topology in 3D primitive is overrided by VF_TOPOLOGY command */
3517     BEGIN_BATCH(batch, 2);
3518     OUT_BATCH(batch, GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2));
3519     OUT_BATCH(batch,
3520               _3DPRIM_RECTLIST);
3521     ADVANCE_BATCH(batch);
3522
3523     
3524     BEGIN_BATCH(batch, 7);
3525     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
3526     OUT_BATCH(batch,
3527               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
3528     OUT_BATCH(batch, 3); /* vertex count per instance */
3529     OUT_BATCH(batch, 0); /* start vertex offset */
3530     OUT_BATCH(batch, 1); /* single instance */
3531     OUT_BATCH(batch, 0); /* start instance location */
3532     OUT_BATCH(batch, 0);
3533     ADVANCE_BATCH(batch);
3534 }
3535
3536 static void
3537 gen8_emit_vertex_element_state(VADriverContextP ctx)
3538 {
3539     struct i965_driver_data *i965 = i965_driver_data(ctx);
3540     struct intel_batchbuffer *batch = i965->batch;
3541
3542     /*
3543      * The VUE layout
3544      * dword 0-3: pad (0, 0, 0. 0)
3545      * dword 4-7: position (x, y, 1.0, 1.0),
3546      * dword 8-11: texture coordinate 0 (u0, v0, 1.0, 1.0)
3547      */
3548
3549     /* Set up our vertex elements, sourced from the single vertex buffer. */
3550     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (7 - 2));
3551
3552     /* Element state 0. These are 4 dwords of 0 required for the VUE format.
3553      * We don't really know or care what they do.
3554      */
3555
3556     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3557               GEN8_VE0_VALID |
3558               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3559               (0 << VE0_OFFSET_SHIFT));
3560     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
3561               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
3562               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
3563               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
3564
3565     /* offset 8: X, Y -> {x, y, 1.0, 1.0} */
3566     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3567               GEN8_VE0_VALID |
3568               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3569               (8 << VE0_OFFSET_SHIFT));
3570     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
3571               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
3572               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
3573               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
3574
3575     /* offset 0: u,v -> {U, V, 1.0, 1.0} */
3576     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
3577               GEN8_VE0_VALID |
3578               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
3579               (0 << VE0_OFFSET_SHIFT));
3580     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
3581               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
3582               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
3583               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
3584 }
3585
3586 static void 
3587 gen8_emit_vs_state(VADriverContextP ctx)
3588 {
3589     struct i965_driver_data *i965 = i965_driver_data(ctx);
3590     struct intel_batchbuffer *batch = i965->batch;
3591
3592     /* disable VS constant buffer */
3593     BEGIN_BATCH(batch, 11);
3594     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (11 - 2));
3595     OUT_BATCH(batch, 0);
3596     OUT_BATCH(batch, 0);
3597     /* CS Buffer 0 */
3598     OUT_BATCH(batch, 0);
3599     OUT_BATCH(batch, 0);
3600     /* CS Buffer 1 */
3601     OUT_BATCH(batch, 0);
3602     OUT_BATCH(batch, 0);
3603     /* CS Buffer 2 */
3604     OUT_BATCH(batch, 0);
3605     OUT_BATCH(batch, 0);
3606     /* CS Buffer 3 */
3607     OUT_BATCH(batch, 0);
3608     OUT_BATCH(batch, 0);
3609     ADVANCE_BATCH(batch);
3610         
3611     BEGIN_BATCH(batch, 9);
3612     OUT_BATCH(batch, GEN6_3DSTATE_VS | (9 - 2));
3613     OUT_BATCH(batch, 0); /* without VS kernel */
3614     OUT_BATCH(batch, 0);
3615     /* VS shader dispatch flag */
3616     OUT_BATCH(batch, 0);
3617     OUT_BATCH(batch, 0);
3618     OUT_BATCH(batch, 0);
3619     /* DW6. VS shader GRF and URB buffer definition */
3620     OUT_BATCH(batch, 0);
3621     OUT_BATCH(batch, 0); /* pass-through */
3622     OUT_BATCH(batch, 0);
3623     ADVANCE_BATCH(batch);
3624
3625     BEGIN_BATCH(batch, 2);
3626     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
3627     OUT_BATCH(batch, 0);
3628     ADVANCE_BATCH(batch);
3629
3630     BEGIN_BATCH(batch, 2);
3631     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
3632     OUT_BATCH(batch, 0);
3633     ADVANCE_BATCH(batch);
3634
3635 }
3636
3637 /*
3638  * URB layout on GEN8 
3639  * ----------------------------------------
3640  * | PS Push Constants (8KB) | VS entries |
3641  * ----------------------------------------
3642  */
3643 static void
3644 gen8_emit_urb(VADriverContextP ctx)
3645 {
3646     struct i965_driver_data *i965 = i965_driver_data(ctx);
3647     struct intel_batchbuffer *batch = i965->batch;
3648     unsigned int num_urb_entries = 64;
3649
3650     /* The minimum urb entries is 64 */
3651
3652     BEGIN_BATCH(batch, 2);
3653     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2));
3654     OUT_BATCH(batch, 0);
3655     ADVANCE_BATCH(batch);
3656
3657     BEGIN_BATCH(batch, 2);
3658     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2));
3659     OUT_BATCH(batch, 0);
3660     ADVANCE_BATCH(batch);
3661
3662     BEGIN_BATCH(batch, 2);
3663     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2));
3664     OUT_BATCH(batch, 0);
3665     ADVANCE_BATCH(batch);
3666
3667     BEGIN_BATCH(batch, 2);
3668     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2));
3669     OUT_BATCH(batch, 0);
3670     ADVANCE_BATCH(batch);
3671
3672     /* Size is 8Kbs and base address is 0Kb */
3673     BEGIN_BATCH(batch, 2);
3674     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
3675     /* Size is 8Kbs and base address is 0Kb */
3676     OUT_BATCH(batch,
3677                 (1 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) |
3678                 (4 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT));
3679     ADVANCE_BATCH(batch);
3680
3681     BEGIN_BATCH(batch, 2);
3682     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
3683     OUT_BATCH(batch, 
3684               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
3685               (4 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
3686               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3687    ADVANCE_BATCH(batch);
3688
3689    BEGIN_BATCH(batch, 2);
3690    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
3691    OUT_BATCH(batch,
3692              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3693              (5 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3694    ADVANCE_BATCH(batch);
3695
3696    BEGIN_BATCH(batch, 2);
3697    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
3698    OUT_BATCH(batch,
3699              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3700              (6 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3701    ADVANCE_BATCH(batch);
3702
3703    BEGIN_BATCH(batch, 2);
3704    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
3705    OUT_BATCH(batch,
3706              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
3707              (7 << GEN7_URB_STARTING_ADDRESS_SHIFT));
3708    ADVANCE_BATCH(batch);
3709 }
3710
3711 static void 
3712 gen8_emit_bypass_state(VADriverContextP ctx)
3713 {
3714     struct i965_driver_data *i965 = i965_driver_data(ctx);
3715     struct intel_batchbuffer *batch = i965->batch;
3716
3717     /* bypass GS */
3718     BEGIN_BATCH(batch, 11);
3719     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (11 - 2));
3720     OUT_BATCH(batch, 0);
3721     OUT_BATCH(batch, 0);
3722     OUT_BATCH(batch, 0);
3723     OUT_BATCH(batch, 0);
3724     OUT_BATCH(batch, 0);
3725     OUT_BATCH(batch, 0);
3726     OUT_BATCH(batch, 0);
3727     OUT_BATCH(batch, 0);
3728     OUT_BATCH(batch, 0);
3729     OUT_BATCH(batch, 0);
3730     ADVANCE_BATCH(batch);
3731
3732     BEGIN_BATCH(batch, 10);     
3733     OUT_BATCH(batch, GEN6_3DSTATE_GS | (10 - 2));
3734     /* GS shader address */
3735     OUT_BATCH(batch, 0); /* without GS kernel */
3736     OUT_BATCH(batch, 0);
3737     /* DW3. GS shader dispatch flag */
3738     OUT_BATCH(batch, 0);
3739     OUT_BATCH(batch, 0);
3740     OUT_BATCH(batch, 0);
3741     /* DW6. GS shader GRF and URB offset/length */
3742     OUT_BATCH(batch, 0);
3743     OUT_BATCH(batch, 0); /* pass-through */
3744     OUT_BATCH(batch, 0);
3745     OUT_BATCH(batch, 0);
3746     ADVANCE_BATCH(batch);
3747
3748     BEGIN_BATCH(batch, 2);
3749     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
3750     OUT_BATCH(batch, 0);
3751     ADVANCE_BATCH(batch);
3752
3753     BEGIN_BATCH(batch, 2);
3754     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
3755     OUT_BATCH(batch, 0);
3756     ADVANCE_BATCH(batch);
3757
3758     /* disable HS */
3759     BEGIN_BATCH(batch, 11);
3760     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (11 - 2));
3761     OUT_BATCH(batch, 0);
3762     OUT_BATCH(batch, 0);
3763     OUT_BATCH(batch, 0);
3764     OUT_BATCH(batch, 0);
3765     OUT_BATCH(batch, 0);
3766     OUT_BATCH(batch, 0);
3767     OUT_BATCH(batch, 0);
3768     OUT_BATCH(batch, 0);
3769     OUT_BATCH(batch, 0);
3770     OUT_BATCH(batch, 0);
3771     ADVANCE_BATCH(batch);
3772
3773     BEGIN_BATCH(batch, 9);
3774     OUT_BATCH(batch, GEN7_3DSTATE_HS | (9 - 2));
3775     OUT_BATCH(batch, 0);
3776     /*DW2. HS pass-through */
3777     OUT_BATCH(batch, 0);
3778     /*DW3. HS shader address */
3779     OUT_BATCH(batch, 0);
3780     OUT_BATCH(batch, 0);
3781     /*DW5. HS shader flag. URB offset/length and so on */
3782     OUT_BATCH(batch, 0);
3783     OUT_BATCH(batch, 0);
3784     OUT_BATCH(batch, 0);
3785     OUT_BATCH(batch, 0);
3786     ADVANCE_BATCH(batch);
3787
3788     BEGIN_BATCH(batch, 2);
3789     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
3790     OUT_BATCH(batch, 0);
3791     ADVANCE_BATCH(batch);
3792
3793     BEGIN_BATCH(batch, 2);
3794     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
3795     OUT_BATCH(batch, 0);
3796     ADVANCE_BATCH(batch);
3797
3798     /* Disable TE */
3799     BEGIN_BATCH(batch, 4);
3800     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
3801     OUT_BATCH(batch, 0);
3802     OUT_BATCH(batch, 0);
3803     OUT_BATCH(batch, 0);
3804     ADVANCE_BATCH(batch);
3805
3806     /* Disable DS */
3807     BEGIN_BATCH(batch, 11);
3808     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (11 - 2));
3809     OUT_BATCH(batch, 0);
3810     OUT_BATCH(batch, 0);
3811     OUT_BATCH(batch, 0);
3812     OUT_BATCH(batch, 0);
3813     OUT_BATCH(batch, 0);
3814     OUT_BATCH(batch, 0);
3815     OUT_BATCH(batch, 0);
3816     OUT_BATCH(batch, 0);
3817     OUT_BATCH(batch, 0);
3818     OUT_BATCH(batch, 0);
3819     ADVANCE_BATCH(batch);
3820
3821     BEGIN_BATCH(batch, 9);
3822     OUT_BATCH(batch, GEN7_3DSTATE_DS | (9 - 2));
3823     /* DW1. DS shader pointer */
3824     OUT_BATCH(batch, 0);
3825     OUT_BATCH(batch, 0);
3826     /* DW3-5. DS shader dispatch flag.*/
3827     OUT_BATCH(batch, 0);
3828     OUT_BATCH(batch, 0);
3829     OUT_BATCH(batch, 0);
3830     /* DW6-7. DS shader pass-through, GRF,URB offset/Length,Thread Number*/
3831     OUT_BATCH(batch, 0);
3832     OUT_BATCH(batch, 0);
3833     /* DW8. DS shader output URB */
3834     OUT_BATCH(batch, 0);
3835     ADVANCE_BATCH(batch);
3836
3837     BEGIN_BATCH(batch, 2);
3838     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
3839     OUT_BATCH(batch, 0);
3840     ADVANCE_BATCH(batch);
3841
3842     BEGIN_BATCH(batch, 2);
3843     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
3844     OUT_BATCH(batch, 0);
3845     ADVANCE_BATCH(batch);
3846
3847     /* Disable STREAMOUT */
3848     BEGIN_BATCH(batch, 5);
3849     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (5 - 2));
3850     OUT_BATCH(batch, 0);
3851     OUT_BATCH(batch, 0);
3852     OUT_BATCH(batch, 0);
3853     OUT_BATCH(batch, 0);
3854     ADVANCE_BATCH(batch);
3855 }
3856
3857 static void
3858 gen8_emit_invarient_states(VADriverContextP ctx)
3859 {
3860     struct i965_driver_data *i965 = i965_driver_data(ctx);
3861     struct intel_batchbuffer *batch = i965->batch;
3862
3863     BEGIN_BATCH(batch, 1);
3864     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
3865     ADVANCE_BATCH(batch);
3866
3867     BEGIN_BATCH(batch, 2);
3868     OUT_BATCH(batch, GEN8_3DSTATE_MULTISAMPLE | (2 - 2));
3869     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
3870               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
3871     ADVANCE_BATCH(batch);
3872
3873     /* Update 3D Multisample pattern */
3874     BEGIN_BATCH(batch, 9);
3875     OUT_BATCH(batch, GEN8_3DSTATE_SAMPLE_PATTERN | (9 - 2));
3876     OUT_BATCH(batch, 0);
3877     OUT_BATCH(batch, 0);
3878     OUT_BATCH(batch, 0);
3879     OUT_BATCH(batch, 0);
3880     OUT_BATCH(batch, 0);
3881     OUT_BATCH(batch, 0);
3882     OUT_BATCH(batch, 0);
3883     OUT_BATCH(batch, 0);
3884     ADVANCE_BATCH(batch);
3885
3886
3887     BEGIN_BATCH(batch, 2);
3888     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
3889     OUT_BATCH(batch, 1);
3890     ADVANCE_BATCH(batch);
3891
3892     /* Set system instruction pointer */
3893     BEGIN_BATCH(batch, 3);
3894     OUT_BATCH(batch, CMD_STATE_SIP | 0);
3895     OUT_BATCH(batch, 0);
3896     OUT_BATCH(batch, 0);
3897     ADVANCE_BATCH(batch);
3898 }
3899
3900 static void 
3901 gen8_emit_clip_state(VADriverContextP ctx)
3902 {
3903     struct i965_driver_data *i965 = i965_driver_data(ctx);
3904     struct intel_batchbuffer *batch = i965->batch;
3905
3906     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
3907     OUT_BATCH(batch, 0);
3908     OUT_BATCH(batch, 0); /* pass-through */
3909     OUT_BATCH(batch, 0);
3910 }
3911
3912 static void 
3913 gen8_emit_sf_state(VADriverContextP ctx)
3914 {
3915     struct i965_driver_data *i965 = i965_driver_data(ctx);
3916     struct intel_batchbuffer *batch = i965->batch;
3917
3918     BEGIN_BATCH(batch, 5);
3919     OUT_BATCH(batch, GEN8_3DSTATE_RASTER | (5 - 2));
3920     OUT_BATCH(batch, GEN8_3DSTATE_RASTER_CULL_NONE);
3921     OUT_BATCH(batch, 0);
3922     OUT_BATCH(batch, 0);
3923     OUT_BATCH(batch, 0);
3924     ADVANCE_BATCH(batch);
3925
3926
3927     BEGIN_BATCH(batch, 4);
3928     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (4 - 2));
3929     OUT_BATCH(batch,
3930               (GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH) |
3931               (GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET) |
3932               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
3933               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
3934               (1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
3935     OUT_BATCH(batch, 0);
3936     OUT_BATCH(batch, 0);
3937     ADVANCE_BATCH(batch);
3938
3939     /* SBE for backend setup */
3940     BEGIN_BATCH(batch, 11);
3941     OUT_BATCH(batch, GEN8_3DSTATE_SBE_SWIZ | (11 - 2));
3942     OUT_BATCH(batch, 0);
3943     OUT_BATCH(batch, 0);
3944     OUT_BATCH(batch, 0);
3945     OUT_BATCH(batch, 0);
3946     OUT_BATCH(batch, 0);
3947     OUT_BATCH(batch, 0);
3948     OUT_BATCH(batch, 0);
3949     OUT_BATCH(batch, 0);
3950     OUT_BATCH(batch, 0);
3951     OUT_BATCH(batch, 0);
3952     ADVANCE_BATCH(batch);
3953
3954     BEGIN_BATCH(batch, 4);
3955     OUT_BATCH(batch, GEN6_3DSTATE_SF | (4 - 2));
3956     OUT_BATCH(batch, 0);
3957     OUT_BATCH(batch, 0);
3958     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
3959     ADVANCE_BATCH(batch);
3960 }
3961
3962 static void 
3963 gen8_emit_wm_state(VADriverContextP ctx, int kernel)
3964 {
3965     struct i965_driver_data *i965 = i965_driver_data(ctx);
3966     struct intel_batchbuffer *batch = i965->batch;
3967     struct i965_render_state *render_state = &i965->render_state;
3968     unsigned int num_samples = 0;
3969     unsigned int max_threads;
3970
3971     max_threads = render_state->max_wm_threads - 2;
3972
3973     BEGIN_BATCH(batch, 2);
3974     OUT_BATCH(batch, GEN8_3DSTATE_PSEXTRA | (2 - 2));
3975     OUT_BATCH(batch,
3976               (GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE));
3977     ADVANCE_BATCH(batch);
3978
3979     
3980     if (kernel == PS_KERNEL) {
3981         BEGIN_BATCH(batch, 2);
3982         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
3983         OUT_BATCH(batch,
3984                 GEN8_PS_BLEND_HAS_WRITEABLE_RT);
3985         ADVANCE_BATCH(batch);
3986     } else if (kernel == PS_SUBPIC_KERNEL) {
3987         BEGIN_BATCH(batch, 2);
3988         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
3989         OUT_BATCH(batch,
3990                 (GEN8_PS_BLEND_HAS_WRITEABLE_RT |
3991                  GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE |
3992                  (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT) |
3993                  (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT) |
3994                  (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT) |
3995                  (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT)));
3996         ADVANCE_BATCH(batch);
3997     }
3998
3999     BEGIN_BATCH(batch, 2);
4000     OUT_BATCH(batch, GEN6_3DSTATE_WM | (2 - 2));
4001     OUT_BATCH(batch,
4002               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
4003     ADVANCE_BATCH(batch);
4004
4005     BEGIN_BATCH(batch, 11);
4006     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (11 - 2));
4007     OUT_BATCH(batch, 1);
4008     OUT_BATCH(batch, 0);
4009     /*DW3-4. Constant buffer 0 */
4010     OUT_BATCH(batch, render_state->curbe_offset);
4011     OUT_BATCH(batch, 0);
4012
4013     /*DW5-10. Constant buffer 1-3 */
4014     OUT_BATCH(batch, 0);
4015     OUT_BATCH(batch, 0);
4016     OUT_BATCH(batch, 0);
4017     OUT_BATCH(batch, 0);
4018     OUT_BATCH(batch, 0);
4019     OUT_BATCH(batch, 0);
4020     ADVANCE_BATCH(batch);
4021
4022     BEGIN_BATCH(batch, 12);
4023     OUT_BATCH(batch, GEN7_3DSTATE_PS | (12 - 2));
4024     /* PS shader address */
4025     OUT_BATCH(batch, render_state->render_kernels[kernel].kernel_offset);
4026
4027     OUT_BATCH(batch, 0);
4028     /* DW3. PS shader flag .Binding table cnt/sample cnt */
4029     OUT_BATCH(batch, 
4030               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
4031               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
4032     /* DW4-5. Scatch space */
4033     OUT_BATCH(batch, 0); /* scratch space base offset */
4034     OUT_BATCH(batch, 0);
4035     /* DW6. PS shader threads. */
4036     OUT_BATCH(batch, 
4037               ((max_threads - 1) << GEN8_PS_MAX_THREADS_SHIFT) | num_samples |
4038               GEN7_PS_PUSH_CONSTANT_ENABLE |
4039               GEN7_PS_16_DISPATCH_ENABLE);
4040     /* DW7. PS shader GRF */
4041     OUT_BATCH(batch, 
4042               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
4043     OUT_BATCH(batch, 0); /* kernel 1 pointer */
4044     OUT_BATCH(batch, 0);
4045     OUT_BATCH(batch, 0); /* kernel 2 pointer */
4046     OUT_BATCH(batch, 0);
4047     ADVANCE_BATCH(batch);
4048
4049     BEGIN_BATCH(batch, 2);
4050     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
4051     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
4052     ADVANCE_BATCH(batch);
4053 }
4054
4055 static void
4056 gen8_emit_depth_buffer_state(VADriverContextP ctx)
4057 {
4058     struct i965_driver_data *i965 = i965_driver_data(ctx);
4059     struct intel_batchbuffer *batch = i965->batch;
4060
4061     BEGIN_BATCH(batch, 8);
4062     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (8 - 2));
4063     OUT_BATCH(batch,
4064               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
4065               (I965_SURFACE_NULL << 29));
4066     /* DW2-3. Depth Buffer Address */
4067     OUT_BATCH(batch, 0);
4068     OUT_BATCH(batch, 0);
4069     /* DW4-7. Surface structure */
4070     OUT_BATCH(batch, 0);
4071     OUT_BATCH(batch, 0);
4072     OUT_BATCH(batch, 0);
4073     OUT_BATCH(batch, 0);
4074     ADVANCE_BATCH(batch);
4075
4076     /* Update the Hier Depth buffer */
4077     BEGIN_BATCH(batch, 5);
4078     OUT_BATCH(batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2));
4079     OUT_BATCH(batch, 0);
4080     OUT_BATCH(batch, 0);
4081     OUT_BATCH(batch, 0);
4082     OUT_BATCH(batch, 0);
4083     ADVANCE_BATCH(batch);
4084     
4085     /* Update the stencil buffer */
4086     BEGIN_BATCH(batch, 5);
4087     OUT_BATCH(batch, GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2));
4088     OUT_BATCH(batch, 0);
4089     OUT_BATCH(batch, 0);
4090     OUT_BATCH(batch, 0);
4091     OUT_BATCH(batch, 0);
4092     ADVANCE_BATCH(batch);
4093     
4094     BEGIN_BATCH(batch, 3);
4095     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
4096     OUT_BATCH(batch, 0);
4097     OUT_BATCH(batch, 0);
4098     ADVANCE_BATCH(batch);
4099 }
4100
4101 static void
4102 gen8_emit_depth_stencil_state(VADriverContextP ctx)
4103 {
4104     struct i965_driver_data *i965 = i965_driver_data(ctx);
4105     struct intel_batchbuffer *batch = i965->batch;
4106
4107     BEGIN_BATCH(batch, 3);
4108     OUT_BATCH(batch, GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2));
4109     OUT_BATCH(batch, 0);
4110     OUT_BATCH(batch, 0);
4111     ADVANCE_BATCH(batch);
4112 }
4113
4114 static void
4115 gen8_emit_wm_hz_op(VADriverContextP ctx)
4116 {
4117     struct i965_driver_data *i965 = i965_driver_data(ctx);
4118     struct intel_batchbuffer *batch = i965->batch;
4119
4120     BEGIN_BATCH(batch, 5);
4121     OUT_BATCH(batch, GEN8_3DSTATE_WM_HZ_OP | (5 - 2));
4122     OUT_BATCH(batch, 0);
4123     OUT_BATCH(batch, 0);
4124     OUT_BATCH(batch, 0);
4125     OUT_BATCH(batch, 0);
4126     ADVANCE_BATCH(batch);
4127 }
4128
4129 static void
4130 gen8_emit_viewport_state_pointers(VADriverContextP ctx)
4131 {
4132     struct i965_driver_data *i965 = i965_driver_data(ctx);
4133     struct intel_batchbuffer *batch = i965->batch;
4134     struct i965_render_state *render_state = &i965->render_state;
4135
4136     BEGIN_BATCH(batch, 2);
4137     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
4138     OUT_BATCH(batch, render_state->cc_viewport_offset);
4139     ADVANCE_BATCH(batch);
4140
4141     BEGIN_BATCH(batch, 2);
4142     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
4143     OUT_BATCH(batch, 0);
4144     ADVANCE_BATCH(batch);
4145 }
4146
4147 static void
4148 gen8_emit_sampler_state_pointers(VADriverContextP ctx)
4149 {
4150     struct i965_driver_data *i965 = i965_driver_data(ctx);
4151     struct intel_batchbuffer *batch = i965->batch;
4152     struct i965_render_state *render_state = &i965->render_state;
4153
4154     BEGIN_BATCH(batch, 2);
4155     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
4156     OUT_BATCH(batch, render_state->sampler_offset);
4157     ADVANCE_BATCH(batch);
4158 }
4159
4160
4161 static void
4162 gen8_render_emit_states(VADriverContextP ctx, int kernel)
4163 {
4164     struct i965_driver_data *i965 = i965_driver_data(ctx);
4165     struct intel_batchbuffer *batch = i965->batch;
4166
4167     intel_batchbuffer_start_atomic(batch, 0x1000);
4168     intel_batchbuffer_emit_mi_flush(batch);
4169     gen8_emit_invarient_states(ctx);
4170     gen8_emit_state_base_address(ctx);
4171     gen8_emit_viewport_state_pointers(ctx);
4172     gen8_emit_urb(ctx);
4173     gen8_emit_cc_state_pointers(ctx);
4174     gen8_emit_sampler_state_pointers(ctx);
4175     gen8_emit_wm_hz_op(ctx);
4176     gen8_emit_bypass_state(ctx);
4177     gen8_emit_vs_state(ctx);
4178     gen8_emit_clip_state(ctx);
4179     gen8_emit_sf_state(ctx);
4180     gen8_emit_depth_stencil_state(ctx);
4181     gen8_emit_wm_state(ctx, kernel);
4182     gen8_emit_depth_buffer_state(ctx);
4183     gen7_emit_drawing_rectangle(ctx);
4184     gen8_emit_vertex_element_state(ctx);
4185     gen8_emit_vertices(ctx);
4186     intel_batchbuffer_end_atomic(batch);
4187 }
4188
4189 static void
4190 gen7_render_put_surface(
4191     VADriverContextP   ctx,
4192     struct object_surface *obj_surface,    
4193     const VARectangle *src_rect,
4194     const VARectangle *dst_rect,
4195     unsigned int       flags
4196 )
4197 {
4198     struct i965_driver_data *i965 = i965_driver_data(ctx);
4199     struct intel_batchbuffer *batch = i965->batch;
4200
4201     gen7_render_initialize(ctx);
4202     gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
4203     i965_clear_dest_region(ctx);
4204     gen7_render_emit_states(ctx, PS_KERNEL);
4205     intel_batchbuffer_flush(batch);
4206 }
4207
4208 static void
4209 gen8_render_put_surface(
4210     VADriverContextP   ctx,
4211     struct object_surface *obj_surface,    
4212     const VARectangle *src_rect,
4213     const VARectangle *dst_rect,
4214     unsigned int       flags
4215 )
4216 {
4217     struct i965_driver_data *i965 = i965_driver_data(ctx);
4218     struct intel_batchbuffer *batch = i965->batch;
4219
4220     gen8_render_initialize(ctx);
4221     gen8_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
4222     gen8_clear_dest_region(ctx);
4223     gen8_render_emit_states(ctx, PS_KERNEL);
4224     intel_batchbuffer_flush(batch);
4225 }
4226
4227 static void
4228 gen7_subpicture_render_blend_state(VADriverContextP ctx)
4229 {
4230     struct i965_driver_data *i965 = i965_driver_data(ctx);
4231     struct i965_render_state *render_state = &i965->render_state;
4232     struct gen6_blend_state *blend_state;
4233
4234     dri_bo_unmap(render_state->cc.state);    
4235     dri_bo_map(render_state->cc.blend, 1);
4236     assert(render_state->cc.blend->virtual);
4237     blend_state = render_state->cc.blend->virtual;
4238     memset(blend_state, 0, sizeof(*blend_state));
4239     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
4240     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
4241     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
4242     blend_state->blend0.blend_enable = 1;
4243     blend_state->blend1.post_blend_clamp_enable = 1;
4244     blend_state->blend1.pre_blend_clamp_enable = 1;
4245     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
4246     dri_bo_unmap(render_state->cc.blend);
4247 }
4248
4249 static void
4250 gen8_subpicture_render_blend_state(VADriverContextP ctx)
4251 {
4252     struct i965_driver_data *i965 = i965_driver_data(ctx);
4253     struct i965_render_state *render_state = &i965->render_state;
4254     struct gen8_global_blend_state *global_blend_state;
4255     struct gen8_blend_state_rt *blend_state;
4256     unsigned char *cc_ptr;
4257     
4258     dri_bo_map(render_state->dynamic_state.bo, 1);
4259     assert(render_state->dynamic_state.bo->virtual);
4260
4261     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
4262                         render_state->blend_state_offset;
4263
4264     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
4265
4266     memset(global_blend_state, 0, sizeof(*global_blend_state));
4267     /* Global blend state + blend_state for Render Target */
4268     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
4269     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
4270     blend_state->blend0.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
4271     blend_state->blend0.alpha_blend_func = I965_BLENDFUNCTION_ADD;
4272     blend_state->blend0.ia_dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
4273     blend_state->blend0.ia_src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
4274     blend_state->blend0.colorbuf_blend = 1;
4275     blend_state->blend1.post_blend_clamp_enable = 1;
4276     blend_state->blend1.pre_blend_clamp_enable = 1;
4277     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
4278
4279     dri_bo_unmap(render_state->dynamic_state.bo);
4280 }
4281
4282 static void
4283 gen7_subpicture_render_setup_states(
4284     VADriverContextP   ctx,
4285     struct object_surface *obj_surface,
4286     const VARectangle *src_rect,
4287     const VARectangle *dst_rect
4288 )
4289 {
4290     i965_render_dest_surface_state(ctx, 0);
4291     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
4292     i965_render_sampler(ctx);
4293     i965_render_cc_viewport(ctx);
4294     gen7_render_color_calc_state(ctx);
4295     gen7_subpicture_render_blend_state(ctx);
4296     gen7_render_depth_stencil_state(ctx);
4297     i965_subpic_render_upload_constants(ctx, obj_surface);
4298     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
4299 }
4300
4301 static void
4302 gen8_subpicture_render_setup_states(
4303     VADriverContextP   ctx,
4304     struct object_surface *obj_surface,
4305     const VARectangle *src_rect,
4306     const VARectangle *dst_rect
4307 )
4308 {
4309     i965_render_dest_surface_state(ctx, 0);
4310     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
4311     gen8_render_sampler(ctx);
4312     i965_render_cc_viewport(ctx);
4313     gen7_render_color_calc_state(ctx);
4314     gen8_subpicture_render_blend_state(ctx);
4315     i965_subpic_render_upload_constants(ctx, obj_surface);
4316     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
4317 }
4318
4319 static void
4320 gen7_render_put_subpicture(
4321     VADriverContextP   ctx,
4322     struct object_surface *obj_surface,
4323     const VARectangle *src_rect,
4324     const VARectangle *dst_rect
4325 )
4326 {
4327     struct i965_driver_data *i965 = i965_driver_data(ctx);
4328     struct intel_batchbuffer *batch = i965->batch;
4329     unsigned int index = obj_surface->subpic_render_idx;
4330     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
4331
4332     assert(obj_subpic);
4333     gen7_render_initialize(ctx);
4334     gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
4335     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
4336     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
4337     intel_batchbuffer_flush(batch);
4338 }
4339
4340 static void
4341 gen8_render_put_subpicture(
4342     VADriverContextP   ctx,
4343     struct object_surface *obj_surface,
4344     const VARectangle *src_rect,
4345     const VARectangle *dst_rect
4346 )
4347 {
4348     struct i965_driver_data *i965 = i965_driver_data(ctx);
4349     struct intel_batchbuffer *batch = i965->batch;
4350     unsigned int index = obj_surface->subpic_render_idx;
4351     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
4352
4353     assert(obj_subpic);
4354     gen8_render_initialize(ctx);
4355     gen8_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
4356     gen8_render_emit_states(ctx, PS_SUBPIC_KERNEL);
4357     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
4358     intel_batchbuffer_flush(batch);
4359 }
4360
4361 /*
4362  * global functions
4363  */
4364 VAStatus 
4365 i965_DestroySurfaces(VADriverContextP ctx,
4366                      VASurfaceID *surface_list,
4367                      int num_surfaces);
4368 void
4369 intel_render_put_surface(
4370     VADriverContextP   ctx,
4371     struct object_surface *obj_surface,
4372     const VARectangle *src_rect,
4373     const VARectangle *dst_rect,
4374     unsigned int       flags
4375 )
4376 {
4377     struct i965_driver_data *i965 = i965_driver_data(ctx);
4378     int has_done_scaling = 0;
4379     VASurfaceID out_surface_id = i965_post_processing(ctx,
4380                                                       obj_surface,
4381                                                       src_rect,
4382                                                       dst_rect,
4383                                                       flags,
4384                                                       &has_done_scaling);
4385
4386     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
4387
4388     if (out_surface_id != VA_INVALID_ID) {
4389         struct object_surface *new_obj_surface = SURFACE(out_surface_id);
4390         
4391         if (new_obj_surface && new_obj_surface->bo)
4392             obj_surface = new_obj_surface;
4393
4394         if (has_done_scaling)
4395             src_rect = dst_rect;
4396     }
4397
4398     if (IS_GEN8(i965->intel.device_id))
4399         gen8_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
4400     else if (IS_GEN7(i965->intel.device_id))
4401         gen7_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
4402     else if (IS_GEN6(i965->intel.device_id))
4403         gen6_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
4404     else
4405         i965_render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
4406
4407     if (out_surface_id != VA_INVALID_ID)
4408         i965_DestroySurfaces(ctx, &out_surface_id, 1);
4409 }
4410
4411 void
4412 intel_render_put_subpicture(
4413     VADriverContextP   ctx,
4414     struct object_surface *obj_surface,
4415     const VARectangle *src_rect,
4416     const VARectangle *dst_rect
4417 )
4418 {
4419     struct i965_driver_data *i965 = i965_driver_data(ctx);
4420
4421     if (IS_GEN8(i965->intel.device_id))
4422         gen8_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
4423     else if (IS_GEN7(i965->intel.device_id))
4424         gen7_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
4425     else if (IS_GEN6(i965->intel.device_id))
4426         gen6_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
4427     else
4428         i965_render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
4429 }
4430
4431 static bool 
4432 gen8_render_init(VADriverContextP ctx)
4433 {
4434     struct i965_driver_data *i965 = i965_driver_data(ctx);
4435     struct i965_render_state *render_state = &i965->render_state;
4436     int i, kernel_size;
4437     unsigned int kernel_offset, end_offset;
4438     unsigned char *kernel_ptr;
4439     struct i965_kernel *kernel;
4440
4441
4442     if (IS_GEN8(i965->intel.device_id)) {
4443         memcpy(render_state->render_kernels, render_kernels_gen8,
4444                         sizeof(render_state->render_kernels));
4445     }
4446
4447     kernel_size = 4096;
4448
4449     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
4450         kernel = &render_state->render_kernels[i];
4451
4452         if (!kernel->size)
4453             continue;
4454
4455         kernel_size += kernel->size;
4456     }
4457
4458     render_state->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
4459                                   "kernel shader",
4460                                   kernel_size,
4461                                   0x1000);
4462     if (render_state->instruction_state.bo == NULL) {
4463         WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
4464         return false;
4465     }
4466
4467     assert(render_state->instruction_state.bo);
4468
4469     render_state->instruction_state.bo_size = kernel_size;
4470     render_state->instruction_state.end_offset = 0;
4471     end_offset = 0;
4472
4473     dri_bo_map(render_state->instruction_state.bo, 1);
4474     kernel_ptr = (unsigned char *)(render_state->instruction_state.bo->virtual);
4475     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
4476         kernel = &render_state->render_kernels[i];
4477         kernel_offset = ALIGN(end_offset, 64);
4478         kernel->kernel_offset = kernel_offset;
4479
4480         if (!kernel->size)
4481             continue;
4482
4483         memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
4484
4485         end_offset += kernel->size;
4486     }
4487
4488     render_state->instruction_state.end_offset = end_offset;
4489
4490     dri_bo_unmap(render_state->instruction_state.bo);
4491
4492
4493     if (IS_GEN8(i965->intel.device_id)) {
4494         render_state->max_wm_threads = 64;
4495     } else {
4496         /* should never get here !!! */
4497         assert(0);
4498     }
4499
4500     return true;
4501 }
4502
4503
4504 bool 
4505 i965_render_init(VADriverContextP ctx)
4506 {
4507     struct i965_driver_data *i965 = i965_driver_data(ctx);
4508     struct i965_render_state *render_state = &i965->render_state;
4509     int i;
4510
4511     /* kernel */
4512     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
4513                                  sizeof(render_kernels_gen5[0])));
4514     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
4515                                  sizeof(render_kernels_gen6[0])));
4516
4517     if (IS_GEN8(i965->intel.device_id)) {
4518         return gen8_render_init(ctx);
4519     } else  if (IS_GEN7(i965->intel.device_id)) 
4520         memcpy(render_state->render_kernels,
4521                (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7),
4522                sizeof(render_state->render_kernels));
4523     else if (IS_GEN6(i965->intel.device_id))
4524         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
4525     else if (IS_IRONLAKE(i965->intel.device_id))
4526         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
4527     else
4528         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
4529
4530     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
4531         struct i965_kernel *kernel = &render_state->render_kernels[i];
4532
4533         if (!kernel->size)
4534             continue;
4535
4536         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
4537                                   kernel->name, 
4538                                   kernel->size, 0x1000);
4539         assert(kernel->bo);
4540         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
4541     }
4542
4543     /* constant buffer */
4544     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
4545                       "constant buffer",
4546                       4096, 64);
4547     assert(render_state->curbe.bo);
4548
4549     if (IS_HSW_GT1(i965->intel.device_id)) {
4550         render_state->max_wm_threads = 102;
4551     } else if (IS_HSW_GT2(i965->intel.device_id)) {
4552         render_state->max_wm_threads = 204;
4553     } else if (IS_HSW_GT3(i965->intel.device_id)) {
4554         render_state->max_wm_threads = 408;
4555     } else if (IS_IVB_GT1(i965->intel.device_id) || IS_BAYTRAIL(i965->intel.device_id)) {
4556         render_state->max_wm_threads = 48;
4557     } else if (IS_IVB_GT2(i965->intel.device_id)) {
4558         render_state->max_wm_threads = 172;
4559     } else if (IS_SNB_GT1(i965->intel.device_id)) {
4560         render_state->max_wm_threads = 40;
4561     } else if (IS_SNB_GT2(i965->intel.device_id)) {
4562         render_state->max_wm_threads = 80;
4563     } else if (IS_IRONLAKE(i965->intel.device_id)) {
4564         render_state->max_wm_threads = 72; /* 12 * 6 */
4565     } else if (IS_G4X(i965->intel.device_id)) {
4566         render_state->max_wm_threads = 50; /* 12 * 5 */
4567     } else {
4568         /* should never get here !!! */
4569         assert(0);
4570     }
4571
4572     return true;
4573 }
4574
4575 static void 
4576 gen8_render_terminate(VADriverContextP ctx)
4577 {
4578     int i;
4579     struct i965_driver_data *i965 = i965_driver_data(ctx);
4580     struct i965_render_state *render_state = &i965->render_state;
4581
4582     dri_bo_unreference(render_state->vb.vertex_buffer);
4583     render_state->vb.vertex_buffer = NULL;
4584
4585     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
4586     render_state->wm.surface_state_binding_table_bo = NULL;
4587    
4588     if (render_state->instruction_state.bo) {
4589         dri_bo_unreference(render_state->instruction_state.bo);
4590         render_state->instruction_state.bo = NULL;
4591     }
4592
4593     if (render_state->dynamic_state.bo) {
4594         dri_bo_unreference(render_state->dynamic_state.bo);
4595         render_state->dynamic_state.bo = NULL;
4596     }
4597
4598     if (render_state->indirect_state.bo) {
4599         dri_bo_unreference(render_state->indirect_state.bo);
4600         render_state->indirect_state.bo = NULL;
4601     }
4602
4603     if (render_state->draw_region) {
4604         dri_bo_unreference(render_state->draw_region->bo);
4605         free(render_state->draw_region);
4606         render_state->draw_region = NULL;
4607     }
4608 }
4609
4610 void 
4611 i965_render_terminate(VADriverContextP ctx)
4612 {
4613     int i;
4614     struct i965_driver_data *i965 = i965_driver_data(ctx);
4615     struct i965_render_state *render_state = &i965->render_state;
4616
4617     if (IS_GEN8(i965->intel.device_id)) {
4618         gen8_render_terminate(ctx);
4619         return;
4620     } 
4621
4622     dri_bo_unreference(render_state->curbe.bo);
4623     render_state->curbe.bo = NULL;
4624
4625     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
4626         struct i965_kernel *kernel = &render_state->render_kernels[i];
4627         
4628         dri_bo_unreference(kernel->bo);
4629         kernel->bo = NULL;
4630     }
4631
4632     dri_bo_unreference(render_state->vb.vertex_buffer);
4633     render_state->vb.vertex_buffer = NULL;
4634     dri_bo_unreference(render_state->vs.state);
4635     render_state->vs.state = NULL;
4636     dri_bo_unreference(render_state->sf.state);
4637     render_state->sf.state = NULL;
4638     dri_bo_unreference(render_state->wm.sampler);
4639     render_state->wm.sampler = NULL;
4640     dri_bo_unreference(render_state->wm.state);
4641     render_state->wm.state = NULL;
4642     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
4643     dri_bo_unreference(render_state->cc.viewport);
4644     render_state->cc.viewport = NULL;
4645     dri_bo_unreference(render_state->cc.state);
4646     render_state->cc.state = NULL;
4647     dri_bo_unreference(render_state->cc.blend);
4648     render_state->cc.blend = NULL;
4649     dri_bo_unreference(render_state->cc.depth_stencil);
4650     render_state->cc.depth_stencil = NULL;
4651
4652     if (render_state->draw_region) {
4653         dri_bo_unreference(render_state->draw_region->bo);
4654         free(render_state->draw_region);
4655         render_state->draw_region = NULL;
4656     }
4657 }
4658