15643f360f3faecebd80364345526fab77b75449
[platform/upstream/libva-intel-driver.git] / src / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <math.h>
39
40 #include <va/va_drmcommon.h>
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44 #include "i965_defines.h"
45 #include "i965_drv_video.h"
46 #include "i965_structs.h"
47
48 #include "i965_render.h"
49
50 #define SF_KERNEL_NUM_GRF       16
51 #define SF_MAX_THREADS          1
52
53 static const uint32_t sf_kernel_static[][4] = 
54 {
55 #include "shaders/render/exa_sf.g4b"
56 };
57
58 #define PS_KERNEL_NUM_GRF       48
59 #define PS_MAX_THREADS          32
60
61 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
62
63 static const uint32_t ps_kernel_static[][4] = 
64 {
65 #include "shaders/render/exa_wm_xy.g4b"
66 #include "shaders/render/exa_wm_src_affine.g4b"
67 #include "shaders/render/exa_wm_src_sample_planar.g4b"
68 #include "shaders/render/exa_wm_yuv_color_balance.g4b"
69 #include "shaders/render/exa_wm_yuv_rgb.g4b"
70 #include "shaders/render/exa_wm_write.g4b"
71 };
72 static const uint32_t ps_subpic_kernel_static[][4] = 
73 {
74 #include "shaders/render/exa_wm_xy.g4b"
75 #include "shaders/render/exa_wm_src_affine.g4b"
76 #include "shaders/render/exa_wm_src_sample_argb.g4b"
77 #include "shaders/render/exa_wm_write.g4b"
78 };
79
80 /* On IRONLAKE */
81 static const uint32_t sf_kernel_static_gen5[][4] = 
82 {
83 #include "shaders/render/exa_sf.g4b.gen5"
84 };
85
86 static const uint32_t ps_kernel_static_gen5[][4] = 
87 {
88 #include "shaders/render/exa_wm_xy.g4b.gen5"
89 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
90 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
91 #include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5"
92 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
93 #include "shaders/render/exa_wm_write.g4b.gen5"
94 };
95 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
96 {
97 #include "shaders/render/exa_wm_xy.g4b.gen5"
98 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
99 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
100 #include "shaders/render/exa_wm_write.g4b.gen5"
101 };
102
103 /* programs for Sandybridge */
104 static const uint32_t sf_kernel_static_gen6[][4] = 
105 {
106 };
107
108 static const uint32_t ps_kernel_static_gen6[][4] = {
109 #include "shaders/render/exa_wm_src_affine.g6b"
110 #include "shaders/render/exa_wm_src_sample_planar.g6b"
111 #include "shaders/render/exa_wm_yuv_color_balance.g6b"
112 #include "shaders/render/exa_wm_yuv_rgb.g6b"
113 #include "shaders/render/exa_wm_write.g6b"
114 };
115
116 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
117 #include "shaders/render/exa_wm_src_affine.g6b"
118 #include "shaders/render/exa_wm_src_sample_argb.g6b"
119 #include "shaders/render/exa_wm_write.g6b"
120 };
121
122 /* programs for Ivybridge */
123 static const uint32_t sf_kernel_static_gen7[][4] = 
124 {
125 };
126
127 static const uint32_t ps_kernel_static_gen7[][4] = {
128 #include "shaders/render/exa_wm_src_affine.g7b"
129 #include "shaders/render/exa_wm_src_sample_planar.g7b"
130 #include "shaders/render/exa_wm_yuv_color_balance.g7b"
131 #include "shaders/render/exa_wm_yuv_rgb.g7b"
132 #include "shaders/render/exa_wm_write.g7b"
133 };
134
135 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
136 #include "shaders/render/exa_wm_src_affine.g7b"
137 #include "shaders/render/exa_wm_src_sample_argb.g7b"
138 #include "shaders/render/exa_wm_write.g7b"
139 };
140
141 /* Programs for Haswell */
142 static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
143 #include "shaders/render/exa_wm_src_affine.g7b"
144 #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
145 #include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell"
146 #include "shaders/render/exa_wm_yuv_rgb.g7b"
147 #include "shaders/render/exa_wm_write.g7b"
148 };
149
150
151 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
152
153 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
154 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
155
156 static uint32_t float_to_uint (float f) 
157 {
158     union {
159         uint32_t i; 
160         float f;
161     } x;
162
163     x.f = f;
164     return x.i;
165 }
166
167 enum 
168 {
169     SF_KERNEL = 0,
170     PS_KERNEL,
171     PS_SUBPIC_KERNEL
172 };
173
174 static struct i965_kernel render_kernels_gen4[] = {
175     {
176         "SF",
177         SF_KERNEL,
178         sf_kernel_static,
179         sizeof(sf_kernel_static),
180         NULL
181     },
182     {
183         "PS",
184         PS_KERNEL,
185         ps_kernel_static,
186         sizeof(ps_kernel_static),
187         NULL
188     },
189
190     {
191         "PS_SUBPIC",
192         PS_SUBPIC_KERNEL,
193         ps_subpic_kernel_static,
194         sizeof(ps_subpic_kernel_static),
195         NULL
196     }
197 };
198
199 static struct i965_kernel render_kernels_gen5[] = {
200     {
201         "SF",
202         SF_KERNEL,
203         sf_kernel_static_gen5,
204         sizeof(sf_kernel_static_gen5),
205         NULL
206     },
207     {
208         "PS",
209         PS_KERNEL,
210         ps_kernel_static_gen5,
211         sizeof(ps_kernel_static_gen5),
212         NULL
213     },
214
215     {
216         "PS_SUBPIC",
217         PS_SUBPIC_KERNEL,
218         ps_subpic_kernel_static_gen5,
219         sizeof(ps_subpic_kernel_static_gen5),
220         NULL
221     }
222 };
223
224 static struct i965_kernel render_kernels_gen6[] = {
225     {
226         "SF",
227         SF_KERNEL,
228         sf_kernel_static_gen6,
229         sizeof(sf_kernel_static_gen6),
230         NULL
231     },
232     {
233         "PS",
234         PS_KERNEL,
235         ps_kernel_static_gen6,
236         sizeof(ps_kernel_static_gen6),
237         NULL
238     },
239
240     {
241         "PS_SUBPIC",
242         PS_SUBPIC_KERNEL,
243         ps_subpic_kernel_static_gen6,
244         sizeof(ps_subpic_kernel_static_gen6),
245         NULL
246     }
247 };
248
249 static struct i965_kernel render_kernels_gen7[] = {
250     {
251         "SF",
252         SF_KERNEL,
253         sf_kernel_static_gen7,
254         sizeof(sf_kernel_static_gen7),
255         NULL
256     },
257     {
258         "PS",
259         PS_KERNEL,
260         ps_kernel_static_gen7,
261         sizeof(ps_kernel_static_gen7),
262         NULL
263     },
264
265     {
266         "PS_SUBPIC",
267         PS_SUBPIC_KERNEL,
268         ps_subpic_kernel_static_gen7,
269         sizeof(ps_subpic_kernel_static_gen7),
270         NULL
271     }
272 };
273
274 static struct i965_kernel render_kernels_gen7_haswell[] = {
275     {
276         "SF",
277         SF_KERNEL,
278         sf_kernel_static_gen7,
279         sizeof(sf_kernel_static_gen7),
280         NULL
281     },
282     {
283         "PS",
284         PS_KERNEL,
285         ps_kernel_static_gen7_haswell,
286         sizeof(ps_kernel_static_gen7_haswell),
287         NULL
288     },
289
290     {
291         "PS_SUBPIC",
292         PS_SUBPIC_KERNEL,
293         ps_subpic_kernel_static_gen7,
294         sizeof(ps_subpic_kernel_static_gen7),
295         NULL
296     }
297 };
298
299 #define URB_VS_ENTRIES        8
300 #define URB_VS_ENTRY_SIZE     1
301
302 #define URB_GS_ENTRIES        0
303 #define URB_GS_ENTRY_SIZE     0
304
305 #define URB_CLIP_ENTRIES      0
306 #define URB_CLIP_ENTRY_SIZE   0
307
308 #define URB_SF_ENTRIES        1
309 #define URB_SF_ENTRY_SIZE     2
310
311 #define URB_CS_ENTRIES        4
312 #define URB_CS_ENTRY_SIZE     4
313
314 static float yuv_to_rgb_bt601[3][4] = {
315 {1.164,         0,      1.596,          -0.06275,},
316 {1.164,         -0.392, -0.813,         -0.50196,},
317 {1.164,         2.017,  0,              -0.50196,},
318 };
319
320 static float yuv_to_rgb_bt709[3][4] = {
321 {1.164,         0,      1.793,          -0.06275,},
322 {1.164,         -0.213, -0.533,         -0.50196,},
323 {1.164,         2.112,  0,              -0.50196,},
324 };
325
326 static float yuv_to_rgb_smpte_240[3][4] = {
327 {1.164,         0,      1.794,          -0.06275,},
328 {1.164,         -0.258, -0.5425,        -0.50196,},
329 {1.164,         2.078,  0,              -0.50196,},
330 };
331
332 static void
333 i965_render_vs_unit(VADriverContextP ctx)
334 {
335     struct i965_driver_data *i965 = i965_driver_data(ctx);
336     struct i965_render_state *render_state = &i965->render_state;
337     struct i965_vs_unit_state *vs_state;
338
339     dri_bo_map(render_state->vs.state, 1);
340     assert(render_state->vs.state->virtual);
341     vs_state = render_state->vs.state->virtual;
342     memset(vs_state, 0, sizeof(*vs_state));
343
344     if (IS_IRONLAKE(i965->intel.device_id))
345         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
346     else
347         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
348
349     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
350     vs_state->vs6.vs_enable = 0;
351     vs_state->vs6.vert_cache_disable = 1;
352     
353     dri_bo_unmap(render_state->vs.state);
354 }
355
356 static void
357 i965_render_sf_unit(VADriverContextP ctx)
358 {
359     struct i965_driver_data *i965 = i965_driver_data(ctx);
360     struct i965_render_state *render_state = &i965->render_state;
361     struct i965_sf_unit_state *sf_state;
362
363     dri_bo_map(render_state->sf.state, 1);
364     assert(render_state->sf.state->virtual);
365     sf_state = render_state->sf.state->virtual;
366     memset(sf_state, 0, sizeof(*sf_state));
367
368     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
369     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
370
371     sf_state->sf1.single_program_flow = 1; /* XXX */
372     sf_state->sf1.binding_table_entry_count = 0;
373     sf_state->sf1.thread_priority = 0;
374     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
375     sf_state->sf1.illegal_op_exception_enable = 1;
376     sf_state->sf1.mask_stack_exception_enable = 1;
377     sf_state->sf1.sw_exception_enable = 1;
378
379     /* scratch space is not used in our kernel */
380     sf_state->thread2.per_thread_scratch_space = 0;
381     sf_state->thread2.scratch_space_base_pointer = 0;
382
383     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
384     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
385     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
386     sf_state->thread3.urb_entry_read_offset = 0;
387     sf_state->thread3.dispatch_grf_start_reg = 3;
388
389     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
390     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
391     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
392     sf_state->thread4.stats_enable = 1;
393
394     sf_state->sf5.viewport_transform = 0; /* skip viewport */
395
396     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
397     sf_state->sf6.scissor = 0;
398
399     sf_state->sf7.trifan_pv = 2;
400
401     sf_state->sf6.dest_org_vbias = 0x8;
402     sf_state->sf6.dest_org_hbias = 0x8;
403
404     dri_bo_emit_reloc(render_state->sf.state,
405                       I915_GEM_DOMAIN_INSTRUCTION, 0,
406                       sf_state->thread0.grf_reg_count << 1,
407                       offsetof(struct i965_sf_unit_state, thread0),
408                       render_state->render_kernels[SF_KERNEL].bo);
409
410     dri_bo_unmap(render_state->sf.state);
411 }
412
413 static void 
414 i965_render_sampler(VADriverContextP ctx)
415 {
416     struct i965_driver_data *i965 = i965_driver_data(ctx);
417     struct i965_render_state *render_state = &i965->render_state;
418     struct i965_sampler_state *sampler_state;
419     int i;
420     
421     assert(render_state->wm.sampler_count > 0);
422     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
423
424     dri_bo_map(render_state->wm.sampler, 1);
425     assert(render_state->wm.sampler->virtual);
426     sampler_state = render_state->wm.sampler->virtual;
427     for (i = 0; i < render_state->wm.sampler_count; i++) {
428         memset(sampler_state, 0, sizeof(*sampler_state));
429         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
430         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
431         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
432         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
433         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
434         sampler_state++;
435     }
436
437     dri_bo_unmap(render_state->wm.sampler);
438 }
439 static void
440 i965_subpic_render_wm_unit(VADriverContextP ctx)
441 {
442     struct i965_driver_data *i965 = i965_driver_data(ctx);
443     struct i965_render_state *render_state = &i965->render_state;
444     struct i965_wm_unit_state *wm_state;
445
446     assert(render_state->wm.sampler);
447
448     dri_bo_map(render_state->wm.state, 1);
449     assert(render_state->wm.state->virtual);
450     wm_state = render_state->wm.state->virtual;
451     memset(wm_state, 0, sizeof(*wm_state));
452
453     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
454     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
455
456     wm_state->thread1.single_program_flow = 1; /* XXX */
457
458     if (IS_IRONLAKE(i965->intel.device_id))
459         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
460     else
461         wm_state->thread1.binding_table_entry_count = 7;
462
463     wm_state->thread2.scratch_space_base_pointer = 0;
464     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
465
466     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
467     wm_state->thread3.const_urb_entry_read_length = 4;
468     wm_state->thread3.const_urb_entry_read_offset = 0;
469     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
470     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
471
472     wm_state->wm4.stats_enable = 0;
473     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
474
475     if (IS_IRONLAKE(i965->intel.device_id)) {
476         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
477     } else {
478         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
479     }
480
481     wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1;
482     wm_state->wm5.thread_dispatch_enable = 1;
483     wm_state->wm5.enable_16_pix = 1;
484     wm_state->wm5.enable_8_pix = 0;
485     wm_state->wm5.early_depth_test = 1;
486
487     dri_bo_emit_reloc(render_state->wm.state,
488                       I915_GEM_DOMAIN_INSTRUCTION, 0,
489                       wm_state->thread0.grf_reg_count << 1,
490                       offsetof(struct i965_wm_unit_state, thread0),
491                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
492
493     dri_bo_emit_reloc(render_state->wm.state,
494                       I915_GEM_DOMAIN_INSTRUCTION, 0,
495                       wm_state->wm4.sampler_count << 2,
496                       offsetof(struct i965_wm_unit_state, wm4),
497                       render_state->wm.sampler);
498
499     dri_bo_unmap(render_state->wm.state);
500 }
501
502
503 static void
504 i965_render_wm_unit(VADriverContextP ctx)
505 {
506     struct i965_driver_data *i965 = i965_driver_data(ctx);
507     struct i965_render_state *render_state = &i965->render_state;
508     struct i965_wm_unit_state *wm_state;
509
510     assert(render_state->wm.sampler);
511
512     dri_bo_map(render_state->wm.state, 1);
513     assert(render_state->wm.state->virtual);
514     wm_state = render_state->wm.state->virtual;
515     memset(wm_state, 0, sizeof(*wm_state));
516
517     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
518     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
519
520     wm_state->thread1.single_program_flow = 1; /* XXX */
521
522     if (IS_IRONLAKE(i965->intel.device_id))
523         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
524     else
525         wm_state->thread1.binding_table_entry_count = 7;
526
527     wm_state->thread2.scratch_space_base_pointer = 0;
528     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
529
530     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
531     wm_state->thread3.const_urb_entry_read_length = 4;
532     wm_state->thread3.const_urb_entry_read_offset = 0;
533     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
534     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
535
536     wm_state->wm4.stats_enable = 0;
537     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
538
539     if (IS_IRONLAKE(i965->intel.device_id)) {
540         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
541     } else {
542         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
543     }
544
545     wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1;
546     wm_state->wm5.thread_dispatch_enable = 1;
547     wm_state->wm5.enable_16_pix = 1;
548     wm_state->wm5.enable_8_pix = 0;
549     wm_state->wm5.early_depth_test = 1;
550
551     dri_bo_emit_reloc(render_state->wm.state,
552                       I915_GEM_DOMAIN_INSTRUCTION, 0,
553                       wm_state->thread0.grf_reg_count << 1,
554                       offsetof(struct i965_wm_unit_state, thread0),
555                       render_state->render_kernels[PS_KERNEL].bo);
556
557     dri_bo_emit_reloc(render_state->wm.state,
558                       I915_GEM_DOMAIN_INSTRUCTION, 0,
559                       wm_state->wm4.sampler_count << 2,
560                       offsetof(struct i965_wm_unit_state, wm4),
561                       render_state->wm.sampler);
562
563     dri_bo_unmap(render_state->wm.state);
564 }
565
566 static void 
567 i965_render_cc_viewport(VADriverContextP ctx)
568 {
569     struct i965_driver_data *i965 = i965_driver_data(ctx);
570     struct i965_render_state *render_state = &i965->render_state;
571     struct i965_cc_viewport *cc_viewport;
572
573     dri_bo_map(render_state->cc.viewport, 1);
574     assert(render_state->cc.viewport->virtual);
575     cc_viewport = render_state->cc.viewport->virtual;
576     memset(cc_viewport, 0, sizeof(*cc_viewport));
577     
578     cc_viewport->min_depth = -1.e35;
579     cc_viewport->max_depth = 1.e35;
580
581     dri_bo_unmap(render_state->cc.viewport);
582 }
583
584 static void 
585 i965_subpic_render_cc_unit(VADriverContextP ctx)
586 {
587     struct i965_driver_data *i965 = i965_driver_data(ctx);
588     struct i965_render_state *render_state = &i965->render_state;
589     struct i965_cc_unit_state *cc_state;
590
591     assert(render_state->cc.viewport);
592
593     dri_bo_map(render_state->cc.state, 1);
594     assert(render_state->cc.state->virtual);
595     cc_state = render_state->cc.state->virtual;
596     memset(cc_state, 0, sizeof(*cc_state));
597
598     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
599     cc_state->cc2.depth_test = 0;       /* disable depth test */
600     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
601     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
602     cc_state->cc3.blend_enable = 1;     /* enable color blend */
603     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
604     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
605     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
606     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
607
608     cc_state->cc5.dither_enable = 0;    /* disable dither */
609     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
610     cc_state->cc5.statistics_enable = 1;
611     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
612     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
613     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
614
615     cc_state->cc6.clamp_post_alpha_blend = 0; 
616     cc_state->cc6.clamp_pre_alpha_blend  =0; 
617     
618     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
619     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
620     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
621     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
622    
623     /*alpha test reference*/
624     cc_state->cc7.alpha_ref.f =0.0 ;
625
626
627     dri_bo_emit_reloc(render_state->cc.state,
628                       I915_GEM_DOMAIN_INSTRUCTION, 0,
629                       0,
630                       offsetof(struct i965_cc_unit_state, cc4),
631                       render_state->cc.viewport);
632
633     dri_bo_unmap(render_state->cc.state);
634 }
635
636
637 static void 
638 i965_render_cc_unit(VADriverContextP ctx)
639 {
640     struct i965_driver_data *i965 = i965_driver_data(ctx);
641     struct i965_render_state *render_state = &i965->render_state;
642     struct i965_cc_unit_state *cc_state;
643
644     assert(render_state->cc.viewport);
645
646     dri_bo_map(render_state->cc.state, 1);
647     assert(render_state->cc.state->virtual);
648     cc_state = render_state->cc.state->virtual;
649     memset(cc_state, 0, sizeof(*cc_state));
650
651     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
652     cc_state->cc2.depth_test = 0;       /* disable depth test */
653     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
654     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
655     cc_state->cc3.blend_enable = 0;     /* disable color blend */
656     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
657     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
658
659     cc_state->cc5.dither_enable = 0;    /* disable dither */
660     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
661     cc_state->cc5.statistics_enable = 1;
662     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
663     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
664     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
665
666     dri_bo_emit_reloc(render_state->cc.state,
667                       I915_GEM_DOMAIN_INSTRUCTION, 0,
668                       0,
669                       offsetof(struct i965_cc_unit_state, cc4),
670                       render_state->cc.viewport);
671
672     dri_bo_unmap(render_state->cc.state);
673 }
674
675 static void
676 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
677 {
678     switch (tiling) {
679     case I915_TILING_NONE:
680         ss->ss3.tiled_surface = 0;
681         ss->ss3.tile_walk = 0;
682         break;
683     case I915_TILING_X:
684         ss->ss3.tiled_surface = 1;
685         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
686         break;
687     case I915_TILING_Y:
688         ss->ss3.tiled_surface = 1;
689         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
690         break;
691     }
692 }
693
694 static void
695 i965_render_set_surface_state(
696     struct i965_surface_state *ss,
697     dri_bo                    *bo,
698     unsigned long              offset,
699     unsigned int               width,
700     unsigned int               height,
701     unsigned int               pitch,
702     unsigned int               format,
703     unsigned int               flags
704 )
705 {
706     unsigned int tiling;
707     unsigned int swizzle;
708
709     memset(ss, 0, sizeof(*ss));
710
711     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
712     case I965_PP_FLAG_BOTTOM_FIELD:
713         ss->ss0.vert_line_stride_ofs = 1;
714         /* fall-through */
715     case I965_PP_FLAG_TOP_FIELD:
716         ss->ss0.vert_line_stride = 1;
717         height /= 2;
718         break;
719     }
720
721     ss->ss0.surface_type = I965_SURFACE_2D;
722     ss->ss0.surface_format = format;
723     ss->ss0.color_blend = 1;
724
725     ss->ss1.base_addr = bo->offset + offset;
726
727     ss->ss2.width = width - 1;
728     ss->ss2.height = height - 1;
729
730     ss->ss3.pitch = pitch - 1;
731
732     dri_bo_get_tiling(bo, &tiling, &swizzle);
733     i965_render_set_surface_tiling(ss, tiling);
734 }
735
736 static void
737 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
738 {
739    switch (tiling) {
740    case I915_TILING_NONE:
741       ss->ss0.tiled_surface = 0;
742       ss->ss0.tile_walk = 0;
743       break;
744    case I915_TILING_X:
745       ss->ss0.tiled_surface = 1;
746       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
747       break;
748    case I915_TILING_Y:
749       ss->ss0.tiled_surface = 1;
750       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
751       break;
752    }
753 }
754
755 /* Set "Shader Channel Select" */
756 void
757 gen7_render_set_surface_scs(struct gen7_surface_state *ss)
758 {
759     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
760     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
761     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
762     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
763 }
764
765 static void
766 gen7_render_set_surface_state(
767     struct gen7_surface_state *ss,
768     dri_bo                    *bo,
769     unsigned long              offset,
770     int                        width,
771     int                        height,
772     int                        pitch,
773     int                        format,
774     unsigned int               flags
775 )
776 {
777     unsigned int tiling;
778     unsigned int swizzle;
779
780     memset(ss, 0, sizeof(*ss));
781
782     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
783     case I965_PP_FLAG_BOTTOM_FIELD:
784         ss->ss0.vert_line_stride_ofs = 1;
785         /* fall-through */
786     case I965_PP_FLAG_TOP_FIELD:
787         ss->ss0.vert_line_stride = 1;
788         height /= 2;
789         break;
790     }
791
792     ss->ss0.surface_type = I965_SURFACE_2D;
793     ss->ss0.surface_format = format;
794
795     ss->ss1.base_addr = bo->offset + offset;
796
797     ss->ss2.width = width - 1;
798     ss->ss2.height = height - 1;
799
800     ss->ss3.pitch = pitch - 1;
801
802     dri_bo_get_tiling(bo, &tiling, &swizzle);
803     gen7_render_set_surface_tiling(ss, tiling);
804 }
805
806
807 static void
808 i965_render_src_surface_state(
809     VADriverContextP ctx, 
810     int              index,
811     dri_bo          *region,
812     unsigned long    offset,
813     int              w,
814     int              h,
815     int              pitch,
816     int              format,
817     unsigned int     flags
818 )
819 {
820     struct i965_driver_data *i965 = i965_driver_data(ctx);  
821     struct i965_render_state *render_state = &i965->render_state;
822     void *ss;
823     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
824
825     assert(index < MAX_RENDER_SURFACES);
826
827     dri_bo_map(ss_bo, 1);
828     assert(ss_bo->virtual);
829     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
830
831     if (IS_GEN7(i965->intel.device_id)) {
832         gen7_render_set_surface_state(ss,
833                                       region, offset,
834                                       w, h,
835                                       pitch, format, flags);
836         if (IS_HASWELL(i965->intel.device_id))
837             gen7_render_set_surface_scs(ss);
838         dri_bo_emit_reloc(ss_bo,
839                           I915_GEM_DOMAIN_SAMPLER, 0,
840                           offset,
841                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
842                           region);
843     } else {
844         i965_render_set_surface_state(ss,
845                                       region, offset,
846                                       w, h,
847                                       pitch, format, flags);
848         dri_bo_emit_reloc(ss_bo,
849                           I915_GEM_DOMAIN_SAMPLER, 0,
850                           offset,
851                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
852                           region);
853     }
854
855     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
856     dri_bo_unmap(ss_bo);
857     render_state->wm.sampler_count++;
858 }
859
860 static void
861 i965_render_src_surfaces_state(
862     VADriverContextP ctx,
863     struct object_surface *obj_surface,
864     unsigned int     flags
865 )
866 {
867     int region_pitch;
868     int rw, rh;
869     dri_bo *region;
870
871     region_pitch = obj_surface->width;
872     rw = obj_surface->orig_width;
873     rh = obj_surface->orig_height;
874     region = obj_surface->bo;
875
876     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
877     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
878
879     if (obj_surface->fourcc == VA_FOURCC_NV12) {
880         i965_render_src_surface_state(ctx, 3, region,
881                                       region_pitch * obj_surface->y_cb_offset,
882                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
883                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
884         i965_render_src_surface_state(ctx, 4, region,
885                                       region_pitch * obj_surface->y_cb_offset,
886                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
887                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
888     } else {
889         i965_render_src_surface_state(ctx, 3, region,
890                                       region_pitch * obj_surface->y_cb_offset,
891                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
892                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
893         i965_render_src_surface_state(ctx, 4, region,
894                                       region_pitch * obj_surface->y_cb_offset,
895                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
896                                       I965_SURFACEFORMAT_R8_UNORM, flags);
897         i965_render_src_surface_state(ctx, 5, region,
898                                       region_pitch * obj_surface->y_cr_offset,
899                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
900                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
901         i965_render_src_surface_state(ctx, 6, region,
902                                       region_pitch * obj_surface->y_cr_offset,
903                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
904                                       I965_SURFACEFORMAT_R8_UNORM, flags);
905     }
906 }
907
908 static void
909 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
910                                       struct object_surface *obj_surface)
911 {
912     dri_bo *subpic_region;
913     unsigned int index = obj_surface->subpic_render_idx;
914     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
915     struct object_image *obj_image = obj_subpic->obj_image;
916
917     assert(obj_surface);
918     assert(obj_surface->bo);
919     subpic_region = obj_image->bo;
920     /*subpicture surface*/
921     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
922     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
923 }
924
925 static void
926 i965_render_dest_surface_state(VADriverContextP ctx, int index)
927 {
928     struct i965_driver_data *i965 = i965_driver_data(ctx);  
929     struct i965_render_state *render_state = &i965->render_state;
930     struct intel_region *dest_region = render_state->draw_region;
931     void *ss;
932     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
933     int format;
934     assert(index < MAX_RENDER_SURFACES);
935
936     if (dest_region->cpp == 2) {
937         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
938     } else {
939         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
940     }
941
942     dri_bo_map(ss_bo, 1);
943     assert(ss_bo->virtual);
944     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
945
946     if (IS_GEN7(i965->intel.device_id)) {
947         gen7_render_set_surface_state(ss,
948                                       dest_region->bo, 0,
949                                       dest_region->width, dest_region->height,
950                                       dest_region->pitch, format, 0);
951         if (IS_HASWELL(i965->intel.device_id))
952             gen7_render_set_surface_scs(ss);
953         dri_bo_emit_reloc(ss_bo,
954                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
955                           0,
956                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
957                           dest_region->bo);
958     } else {
959         i965_render_set_surface_state(ss,
960                                       dest_region->bo, 0,
961                                       dest_region->width, dest_region->height,
962                                       dest_region->pitch, format, 0);
963         dri_bo_emit_reloc(ss_bo,
964                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
965                           0,
966                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
967                           dest_region->bo);
968     }
969
970     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
971     dri_bo_unmap(ss_bo);
972 }
973
974 static void
975 i965_fill_vertex_buffer(
976     VADriverContextP ctx,
977     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
978     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
979 )
980 {
981     struct i965_driver_data * const i965 = i965_driver_data(ctx);
982     float vb[12];
983
984     enum { X1, Y1, X2, Y2 };
985
986     static const unsigned int g_rotation_indices[][6] = {
987         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
988         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
989         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
990         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
991     };
992
993     const unsigned int * const rotation_indices =
994         g_rotation_indices[i965->rotation_attrib->value];
995
996     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
997     vb[1]  = tex_coords[rotation_indices[1]];
998     vb[2]  = vid_coords[X2];
999     vb[3]  = vid_coords[Y2];
1000
1001     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
1002     vb[5]  = tex_coords[rotation_indices[3]];
1003     vb[6]  = vid_coords[X1];
1004     vb[7]  = vid_coords[Y2];
1005
1006     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
1007     vb[9]  = tex_coords[rotation_indices[5]];
1008     vb[10] = vid_coords[X1];
1009     vb[11] = vid_coords[Y1];
1010
1011     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
1012 }
1013
1014 static void 
1015 i965_subpic_render_upload_vertex(VADriverContextP ctx,
1016                                  struct object_surface *obj_surface,
1017                                  const VARectangle *output_rect)
1018 {    
1019     unsigned int index = obj_surface->subpic_render_idx;
1020     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
1021     float tex_coords[4], vid_coords[4];
1022     VARectangle dst_rect;
1023
1024     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
1025         dst_rect = obj_subpic->dst_rect;
1026     else {
1027         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
1028         const float sy  = (float)output_rect->height / obj_surface->orig_height;
1029         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
1030         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
1031         dst_rect.width  = sx * obj_subpic->dst_rect.width;
1032         dst_rect.height = sy * obj_subpic->dst_rect.height;
1033     }
1034
1035     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
1036     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
1037     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
1038     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
1039
1040     vid_coords[0] = dst_rect.x;
1041     vid_coords[1] = dst_rect.y;
1042     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
1043     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
1044
1045     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1046 }
1047
1048 static void 
1049 i965_render_upload_vertex(
1050     VADriverContextP   ctx,
1051     struct object_surface *obj_surface,
1052     const VARectangle *src_rect,
1053     const VARectangle *dst_rect
1054 )
1055 {
1056     struct i965_driver_data *i965 = i965_driver_data(ctx);
1057     struct i965_render_state *render_state = &i965->render_state;
1058     struct intel_region *dest_region = render_state->draw_region;
1059     float tex_coords[4], vid_coords[4];
1060     int width, height;
1061
1062     width  = obj_surface->orig_width;
1063     height = obj_surface->orig_height;
1064
1065     tex_coords[0] = (float)src_rect->x / width;
1066     tex_coords[1] = (float)src_rect->y / height;
1067     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
1068     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
1069
1070     vid_coords[0] = dest_region->x + dst_rect->x;
1071     vid_coords[1] = dest_region->y + dst_rect->y;
1072     vid_coords[2] = vid_coords[0] + dst_rect->width;
1073     vid_coords[3] = vid_coords[1] + dst_rect->height;
1074
1075     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1076 }
1077
1078 #define PI  3.1415926
1079
1080 static void
1081 i965_render_upload_constants(VADriverContextP ctx,
1082                              struct object_surface *obj_surface,
1083                              unsigned int flags)
1084 {
1085     struct i965_driver_data *i965 = i965_driver_data(ctx);
1086     struct i965_render_state *render_state = &i965->render_state;
1087     unsigned short *constant_buffer;
1088     float *color_balance_base;
1089     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
1090     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
1091     float hue = (float)i965->hue_attrib->value / 180 * PI;
1092     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
1093     float *yuv_to_rgb;
1094     unsigned int color_flag;
1095
1096     dri_bo_map(render_state->curbe.bo, 1);
1097     assert(render_state->curbe.bo->virtual);
1098     constant_buffer = render_state->curbe.bo->virtual;
1099
1100     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
1101         assert(obj_surface->fourcc == VA_FOURCC_Y800);
1102
1103         constant_buffer[0] = 2;
1104     } else {
1105         if (obj_surface->fourcc == VA_FOURCC_NV12)
1106             constant_buffer[0] = 1;
1107         else
1108             constant_buffer[0] = 0;
1109     }
1110
1111     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
1112         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
1113         i965->hue_attrib->value == DEFAULT_HUE &&
1114         i965->saturation_attrib->value == DEFAULT_SATURATION)
1115         constant_buffer[1] = 1; /* skip color balance transformation */
1116     else
1117         constant_buffer[1] = 0;
1118
1119     color_balance_base = (float *)constant_buffer + 4;
1120     *color_balance_base++ = contrast;
1121     *color_balance_base++ = brightness;
1122     *color_balance_base++ = cos(hue) * contrast * saturation;
1123     *color_balance_base++ = sin(hue) * contrast * saturation;
1124
1125     color_flag = flags & VA_SRC_COLOR_MASK;
1126     yuv_to_rgb = (float *)constant_buffer + 8;
1127     if (color_flag == VA_SRC_BT709)
1128         memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709));
1129     else if (color_flag == VA_SRC_SMPTE_240)
1130         memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240));
1131     else
1132         memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601));
1133
1134     dri_bo_unmap(render_state->curbe.bo);
1135 }
1136
1137 static void
1138 i965_subpic_render_upload_constants(VADriverContextP ctx,
1139                                     struct object_surface *obj_surface)
1140 {
1141     struct i965_driver_data *i965 = i965_driver_data(ctx);
1142     struct i965_render_state *render_state = &i965->render_state;
1143     float *constant_buffer;
1144     float global_alpha = 1.0;
1145     unsigned int index = obj_surface->subpic_render_idx;
1146     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1147     
1148     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1149         global_alpha = obj_subpic->global_alpha;
1150     }
1151
1152     dri_bo_map(render_state->curbe.bo, 1);
1153
1154     assert(render_state->curbe.bo->virtual);
1155     constant_buffer = render_state->curbe.bo->virtual;
1156     *constant_buffer = global_alpha;
1157
1158     dri_bo_unmap(render_state->curbe.bo);
1159 }
1160  
1161 static void
1162 i965_surface_render_state_setup(
1163     VADriverContextP   ctx,
1164     struct object_surface *obj_surface,
1165     const VARectangle *src_rect,
1166     const VARectangle *dst_rect,
1167     unsigned int       flags
1168 )
1169 {
1170     i965_render_vs_unit(ctx);
1171     i965_render_sf_unit(ctx);
1172     i965_render_dest_surface_state(ctx, 0);
1173     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1174     i965_render_sampler(ctx);
1175     i965_render_wm_unit(ctx);
1176     i965_render_cc_viewport(ctx);
1177     i965_render_cc_unit(ctx);
1178     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1179     i965_render_upload_constants(ctx, obj_surface, flags);
1180 }
1181
1182 static void
1183 i965_subpic_render_state_setup(
1184     VADriverContextP   ctx,
1185     struct object_surface *obj_surface,
1186     const VARectangle *src_rect,
1187     const VARectangle *dst_rect
1188 )
1189 {
1190     i965_render_vs_unit(ctx);
1191     i965_render_sf_unit(ctx);
1192     i965_render_dest_surface_state(ctx, 0);
1193     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
1194     i965_render_sampler(ctx);
1195     i965_subpic_render_wm_unit(ctx);
1196     i965_render_cc_viewport(ctx);
1197     i965_subpic_render_cc_unit(ctx);
1198     i965_subpic_render_upload_constants(ctx, obj_surface);
1199     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1200 }
1201
1202
1203 static void
1204 i965_render_pipeline_select(VADriverContextP ctx)
1205 {
1206     struct i965_driver_data *i965 = i965_driver_data(ctx);
1207     struct intel_batchbuffer *batch = i965->batch;
1208  
1209     BEGIN_BATCH(batch, 1);
1210     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1211     ADVANCE_BATCH(batch);
1212 }
1213
1214 static void
1215 i965_render_state_sip(VADriverContextP ctx)
1216 {
1217     struct i965_driver_data *i965 = i965_driver_data(ctx);
1218     struct intel_batchbuffer *batch = i965->batch;
1219
1220     BEGIN_BATCH(batch, 2);
1221     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1222     OUT_BATCH(batch, 0);
1223     ADVANCE_BATCH(batch);
1224 }
1225
1226 static void
1227 i965_render_state_base_address(VADriverContextP ctx)
1228 {
1229     struct i965_driver_data *i965 = i965_driver_data(ctx);
1230     struct intel_batchbuffer *batch = i965->batch;
1231     struct i965_render_state *render_state = &i965->render_state;
1232
1233     if (IS_IRONLAKE(i965->intel.device_id)) {
1234         BEGIN_BATCH(batch, 8);
1235         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1236         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1237         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1238         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1239         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1240         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1241         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1242         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1243         ADVANCE_BATCH(batch);
1244     } else {
1245         BEGIN_BATCH(batch, 6);
1246         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1247         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1248         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1249         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1250         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1251         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1252         ADVANCE_BATCH(batch);
1253     }
1254 }
1255
1256 static void
1257 i965_render_binding_table_pointers(VADriverContextP ctx)
1258 {
1259     struct i965_driver_data *i965 = i965_driver_data(ctx);
1260     struct intel_batchbuffer *batch = i965->batch;
1261
1262     BEGIN_BATCH(batch, 6);
1263     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1264     OUT_BATCH(batch, 0); /* vs */
1265     OUT_BATCH(batch, 0); /* gs */
1266     OUT_BATCH(batch, 0); /* clip */
1267     OUT_BATCH(batch, 0); /* sf */
1268     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1269     ADVANCE_BATCH(batch);
1270 }
1271
1272 static void 
1273 i965_render_constant_color(VADriverContextP ctx)
1274 {
1275     struct i965_driver_data *i965 = i965_driver_data(ctx);
1276     struct intel_batchbuffer *batch = i965->batch;
1277
1278     BEGIN_BATCH(batch, 5);
1279     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1280     OUT_BATCH(batch, float_to_uint(1.0));
1281     OUT_BATCH(batch, float_to_uint(0.0));
1282     OUT_BATCH(batch, float_to_uint(1.0));
1283     OUT_BATCH(batch, float_to_uint(1.0));
1284     ADVANCE_BATCH(batch);
1285 }
1286
1287 static void
1288 i965_render_pipelined_pointers(VADriverContextP ctx)
1289 {
1290     struct i965_driver_data *i965 = i965_driver_data(ctx);
1291     struct intel_batchbuffer *batch = i965->batch;
1292     struct i965_render_state *render_state = &i965->render_state;
1293
1294     BEGIN_BATCH(batch, 7);
1295     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1296     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1297     OUT_BATCH(batch, 0);  /* disable GS */
1298     OUT_BATCH(batch, 0);  /* disable CLIP */
1299     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1300     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1301     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1302     ADVANCE_BATCH(batch);
1303 }
1304
1305 static void
1306 i965_render_urb_layout(VADriverContextP ctx)
1307 {
1308     struct i965_driver_data *i965 = i965_driver_data(ctx);
1309     struct intel_batchbuffer *batch = i965->batch;
1310     int urb_vs_start, urb_vs_size;
1311     int urb_gs_start, urb_gs_size;
1312     int urb_clip_start, urb_clip_size;
1313     int urb_sf_start, urb_sf_size;
1314     int urb_cs_start, urb_cs_size;
1315
1316     urb_vs_start = 0;
1317     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1318     urb_gs_start = urb_vs_start + urb_vs_size;
1319     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1320     urb_clip_start = urb_gs_start + urb_gs_size;
1321     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1322     urb_sf_start = urb_clip_start + urb_clip_size;
1323     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1324     urb_cs_start = urb_sf_start + urb_sf_size;
1325     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1326
1327     BEGIN_BATCH(batch, 3);
1328     OUT_BATCH(batch, 
1329               CMD_URB_FENCE |
1330               UF0_CS_REALLOC |
1331               UF0_SF_REALLOC |
1332               UF0_CLIP_REALLOC |
1333               UF0_GS_REALLOC |
1334               UF0_VS_REALLOC |
1335               1);
1336     OUT_BATCH(batch, 
1337               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1338               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1339               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1340     OUT_BATCH(batch,
1341               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1342               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1343     ADVANCE_BATCH(batch);
1344 }
1345
1346 static void 
1347 i965_render_cs_urb_layout(VADriverContextP ctx)
1348 {
1349     struct i965_driver_data *i965 = i965_driver_data(ctx);
1350     struct intel_batchbuffer *batch = i965->batch;
1351
1352     BEGIN_BATCH(batch, 2);
1353     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1354     OUT_BATCH(batch,
1355               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1356               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1357     ADVANCE_BATCH(batch);
1358 }
1359
1360 static void
1361 i965_render_constant_buffer(VADriverContextP ctx)
1362 {
1363     struct i965_driver_data *i965 = i965_driver_data(ctx);
1364     struct intel_batchbuffer *batch = i965->batch;
1365     struct i965_render_state *render_state = &i965->render_state;
1366
1367     BEGIN_BATCH(batch, 2);
1368     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1369     OUT_RELOC(batch, render_state->curbe.bo,
1370               I915_GEM_DOMAIN_INSTRUCTION, 0,
1371               URB_CS_ENTRY_SIZE - 1);
1372     ADVANCE_BATCH(batch);    
1373 }
1374
1375 static void
1376 i965_render_drawing_rectangle(VADriverContextP ctx)
1377 {
1378     struct i965_driver_data *i965 = i965_driver_data(ctx);
1379     struct intel_batchbuffer *batch = i965->batch;
1380     struct i965_render_state *render_state = &i965->render_state;
1381     struct intel_region *dest_region = render_state->draw_region;
1382
1383     BEGIN_BATCH(batch, 4);
1384     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1385     OUT_BATCH(batch, 0x00000000);
1386     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1387     OUT_BATCH(batch, 0x00000000);         
1388     ADVANCE_BATCH(batch);
1389 }
1390
1391 static void
1392 i965_render_vertex_elements(VADriverContextP ctx)
1393 {
1394     struct i965_driver_data *i965 = i965_driver_data(ctx);
1395     struct intel_batchbuffer *batch = i965->batch;
1396
1397     if (IS_IRONLAKE(i965->intel.device_id)) {
1398         BEGIN_BATCH(batch, 5);
1399         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1400         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1401         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1402                   VE0_VALID |
1403                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1404                   (0 << VE0_OFFSET_SHIFT));
1405         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1406                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1407                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1408                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1409         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1410         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1411                   VE0_VALID |
1412                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1413                   (8 << VE0_OFFSET_SHIFT));
1414         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1415                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1416                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1417                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1418         ADVANCE_BATCH(batch);
1419     } else {
1420         BEGIN_BATCH(batch, 5);
1421         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1422         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1423         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1424                   VE0_VALID |
1425                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1426                   (0 << VE0_OFFSET_SHIFT));
1427         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1428                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1429                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1430                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1431                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1432         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1433         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1434                   VE0_VALID |
1435                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1436                   (8 << VE0_OFFSET_SHIFT));
1437         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1438                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1439                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1440                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1441                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1442         ADVANCE_BATCH(batch);
1443     }
1444 }
1445
1446 static void
1447 i965_render_upload_image_palette(
1448     VADriverContextP ctx,
1449     struct object_image *obj_image,
1450     unsigned int     alpha
1451 )
1452 {
1453     struct i965_driver_data *i965 = i965_driver_data(ctx);
1454     struct intel_batchbuffer *batch = i965->batch;
1455     unsigned int i;
1456
1457     assert(obj_image);
1458
1459     if (!obj_image)
1460         return;
1461
1462     if (obj_image->image.num_palette_entries == 0)
1463         return;
1464
1465     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1466     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1467     /*fill palette*/
1468     //int32_t out[16]; //0-23:color 23-31:alpha
1469     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1470         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1471     ADVANCE_BATCH(batch);
1472 }
1473
1474 static void
1475 i965_render_startup(VADriverContextP ctx)
1476 {
1477     struct i965_driver_data *i965 = i965_driver_data(ctx);
1478     struct intel_batchbuffer *batch = i965->batch;
1479     struct i965_render_state *render_state = &i965->render_state;
1480
1481     BEGIN_BATCH(batch, 11);
1482     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1483     OUT_BATCH(batch, 
1484               (0 << VB0_BUFFER_INDEX_SHIFT) |
1485               VB0_VERTEXDATA |
1486               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1487     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1488
1489     if (IS_IRONLAKE(i965->intel.device_id))
1490         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1491     else
1492         OUT_BATCH(batch, 3);
1493
1494     OUT_BATCH(batch, 0);
1495
1496     OUT_BATCH(batch, 
1497               CMD_3DPRIMITIVE |
1498               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1499               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1500               (0 << 9) |
1501               4);
1502     OUT_BATCH(batch, 3); /* vertex count per instance */
1503     OUT_BATCH(batch, 0); /* start vertex offset */
1504     OUT_BATCH(batch, 1); /* single instance */
1505     OUT_BATCH(batch, 0); /* start instance location */
1506     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1507     ADVANCE_BATCH(batch);
1508 }
1509
1510 static void 
1511 i965_clear_dest_region(VADriverContextP ctx)
1512 {
1513     struct i965_driver_data *i965 = i965_driver_data(ctx);
1514     struct intel_batchbuffer *batch = i965->batch;
1515     struct i965_render_state *render_state = &i965->render_state;
1516     struct intel_region *dest_region = render_state->draw_region;
1517     unsigned int blt_cmd, br13;
1518     int pitch;
1519
1520     blt_cmd = XY_COLOR_BLT_CMD;
1521     br13 = 0xf0 << 16;
1522     pitch = dest_region->pitch;
1523
1524     if (dest_region->cpp == 4) {
1525         br13 |= BR13_8888;
1526         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1527     } else {
1528         assert(dest_region->cpp == 2);
1529         br13 |= BR13_565;
1530     }
1531
1532     if (dest_region->tiling != I915_TILING_NONE) {
1533         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1534         pitch /= 4;
1535     }
1536
1537     br13 |= pitch;
1538
1539     if (IS_GEN6(i965->intel.device_id) ||
1540         IS_GEN7(i965->intel.device_id) ||
1541         IS_GEN8(i965->intel.device_id)) {
1542         intel_batchbuffer_start_atomic_blt(batch, 24);
1543         BEGIN_BLT_BATCH(batch, 6);
1544     } else {
1545         intel_batchbuffer_start_atomic(batch, 24);
1546         BEGIN_BATCH(batch, 6);
1547     }
1548
1549     OUT_BATCH(batch, blt_cmd);
1550     OUT_BATCH(batch, br13);
1551     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1552     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1553               (dest_region->x + dest_region->width));
1554     OUT_RELOC(batch, dest_region->bo, 
1555               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1556               0);
1557     OUT_BATCH(batch, 0x0);
1558     ADVANCE_BATCH(batch);
1559     intel_batchbuffer_end_atomic(batch);
1560 }
1561
1562 static void
1563 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1564 {
1565     struct i965_driver_data *i965 = i965_driver_data(ctx);
1566     struct intel_batchbuffer *batch = i965->batch;
1567
1568     i965_clear_dest_region(ctx);
1569     intel_batchbuffer_start_atomic(batch, 0x1000);
1570     intel_batchbuffer_emit_mi_flush(batch);
1571     i965_render_pipeline_select(ctx);
1572     i965_render_state_sip(ctx);
1573     i965_render_state_base_address(ctx);
1574     i965_render_binding_table_pointers(ctx);
1575     i965_render_constant_color(ctx);
1576     i965_render_pipelined_pointers(ctx);
1577     i965_render_urb_layout(ctx);
1578     i965_render_cs_urb_layout(ctx);
1579     i965_render_constant_buffer(ctx);
1580     i965_render_drawing_rectangle(ctx);
1581     i965_render_vertex_elements(ctx);
1582     i965_render_startup(ctx);
1583     intel_batchbuffer_end_atomic(batch);
1584 }
1585
1586 static void
1587 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1588 {
1589     struct i965_driver_data *i965 = i965_driver_data(ctx);
1590     struct intel_batchbuffer *batch = i965->batch;
1591
1592     intel_batchbuffer_start_atomic(batch, 0x1000);
1593     intel_batchbuffer_emit_mi_flush(batch);
1594     i965_render_pipeline_select(ctx);
1595     i965_render_state_sip(ctx);
1596     i965_render_state_base_address(ctx);
1597     i965_render_binding_table_pointers(ctx);
1598     i965_render_constant_color(ctx);
1599     i965_render_pipelined_pointers(ctx);
1600     i965_render_urb_layout(ctx);
1601     i965_render_cs_urb_layout(ctx);
1602     i965_render_constant_buffer(ctx);
1603     i965_render_drawing_rectangle(ctx);
1604     i965_render_vertex_elements(ctx);
1605     i965_render_startup(ctx);
1606     intel_batchbuffer_end_atomic(batch);
1607 }
1608
1609
1610 static void 
1611 i965_render_initialize(VADriverContextP ctx)
1612 {
1613     struct i965_driver_data *i965 = i965_driver_data(ctx);
1614     struct i965_render_state *render_state = &i965->render_state;
1615     dri_bo *bo;
1616
1617     /* VERTEX BUFFER */
1618     dri_bo_unreference(render_state->vb.vertex_buffer);
1619     bo = dri_bo_alloc(i965->intel.bufmgr,
1620                       "vertex buffer",
1621                       4096,
1622                       4096);
1623     assert(bo);
1624     render_state->vb.vertex_buffer = bo;
1625
1626     /* VS */
1627     dri_bo_unreference(render_state->vs.state);
1628     bo = dri_bo_alloc(i965->intel.bufmgr,
1629                       "vs state",
1630                       sizeof(struct i965_vs_unit_state),
1631                       64);
1632     assert(bo);
1633     render_state->vs.state = bo;
1634
1635     /* GS */
1636     /* CLIP */
1637     /* SF */
1638     dri_bo_unreference(render_state->sf.state);
1639     bo = dri_bo_alloc(i965->intel.bufmgr,
1640                       "sf state",
1641                       sizeof(struct i965_sf_unit_state),
1642                       64);
1643     assert(bo);
1644     render_state->sf.state = bo;
1645
1646     /* WM */
1647     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1648     bo = dri_bo_alloc(i965->intel.bufmgr,
1649                       "surface state & binding table",
1650                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1651                       4096);
1652     assert(bo);
1653     render_state->wm.surface_state_binding_table_bo = bo;
1654
1655     dri_bo_unreference(render_state->wm.sampler);
1656     bo = dri_bo_alloc(i965->intel.bufmgr,
1657                       "sampler state",
1658                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1659                       64);
1660     assert(bo);
1661     render_state->wm.sampler = bo;
1662     render_state->wm.sampler_count = 0;
1663
1664     dri_bo_unreference(render_state->wm.state);
1665     bo = dri_bo_alloc(i965->intel.bufmgr,
1666                       "wm state",
1667                       sizeof(struct i965_wm_unit_state),
1668                       64);
1669     assert(bo);
1670     render_state->wm.state = bo;
1671
1672     /* COLOR CALCULATOR */
1673     dri_bo_unreference(render_state->cc.state);
1674     bo = dri_bo_alloc(i965->intel.bufmgr,
1675                       "color calc state",
1676                       sizeof(struct i965_cc_unit_state),
1677                       64);
1678     assert(bo);
1679     render_state->cc.state = bo;
1680
1681     dri_bo_unreference(render_state->cc.viewport);
1682     bo = dri_bo_alloc(i965->intel.bufmgr,
1683                       "cc viewport",
1684                       sizeof(struct i965_cc_viewport),
1685                       64);
1686     assert(bo);
1687     render_state->cc.viewport = bo;
1688 }
1689
1690 static void
1691 i965_render_put_surface(
1692     VADriverContextP   ctx,
1693     struct object_surface *obj_surface,
1694     const VARectangle *src_rect,
1695     const VARectangle *dst_rect,
1696     unsigned int       flags
1697 )
1698 {
1699     struct i965_driver_data *i965 = i965_driver_data(ctx);
1700     struct intel_batchbuffer *batch = i965->batch;
1701
1702     i965_render_initialize(ctx);
1703     i965_surface_render_state_setup(ctx, obj_surface, src_rect, dst_rect, flags);
1704     i965_surface_render_pipeline_setup(ctx);
1705     intel_batchbuffer_flush(batch);
1706 }
1707
1708 static void
1709 i965_render_put_subpicture(
1710     VADriverContextP   ctx,
1711     struct object_surface *obj_surface,
1712     const VARectangle *src_rect,
1713     const VARectangle *dst_rect
1714 )
1715 {
1716     struct i965_driver_data *i965 = i965_driver_data(ctx);
1717     struct intel_batchbuffer *batch = i965->batch;
1718     unsigned int index = obj_surface->subpic_render_idx;
1719     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1720
1721     assert(obj_subpic);
1722
1723     i965_render_initialize(ctx);
1724     i965_subpic_render_state_setup(ctx, obj_surface, src_rect, dst_rect);
1725     i965_subpic_render_pipeline_setup(ctx);
1726     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1727     intel_batchbuffer_flush(batch);
1728 }
1729
1730 /*
1731  * for GEN6+
1732  */
1733 static void 
1734 gen6_render_initialize(VADriverContextP ctx)
1735 {
1736     struct i965_driver_data *i965 = i965_driver_data(ctx);
1737     struct i965_render_state *render_state = &i965->render_state;
1738     dri_bo *bo;
1739
1740     /* VERTEX BUFFER */
1741     dri_bo_unreference(render_state->vb.vertex_buffer);
1742     bo = dri_bo_alloc(i965->intel.bufmgr,
1743                       "vertex buffer",
1744                       4096,
1745                       4096);
1746     assert(bo);
1747     render_state->vb.vertex_buffer = bo;
1748
1749     /* WM */
1750     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1751     bo = dri_bo_alloc(i965->intel.bufmgr,
1752                       "surface state & binding table",
1753                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1754                       4096);
1755     assert(bo);
1756     render_state->wm.surface_state_binding_table_bo = bo;
1757
1758     dri_bo_unreference(render_state->wm.sampler);
1759     bo = dri_bo_alloc(i965->intel.bufmgr,
1760                       "sampler state",
1761                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1762                       4096);
1763     assert(bo);
1764     render_state->wm.sampler = bo;
1765     render_state->wm.sampler_count = 0;
1766
1767     /* COLOR CALCULATOR */
1768     dri_bo_unreference(render_state->cc.state);
1769     bo = dri_bo_alloc(i965->intel.bufmgr,
1770                       "color calc state",
1771                       sizeof(struct gen6_color_calc_state),
1772                       4096);
1773     assert(bo);
1774     render_state->cc.state = bo;
1775
1776     /* CC VIEWPORT */
1777     dri_bo_unreference(render_state->cc.viewport);
1778     bo = dri_bo_alloc(i965->intel.bufmgr,
1779                       "cc viewport",
1780                       sizeof(struct i965_cc_viewport),
1781                       4096);
1782     assert(bo);
1783     render_state->cc.viewport = bo;
1784
1785     /* BLEND STATE */
1786     dri_bo_unreference(render_state->cc.blend);
1787     bo = dri_bo_alloc(i965->intel.bufmgr,
1788                       "blend state",
1789                       sizeof(struct gen6_blend_state),
1790                       4096);
1791     assert(bo);
1792     render_state->cc.blend = bo;
1793
1794     /* DEPTH & STENCIL STATE */
1795     dri_bo_unreference(render_state->cc.depth_stencil);
1796     bo = dri_bo_alloc(i965->intel.bufmgr,
1797                       "depth & stencil state",
1798                       sizeof(struct gen6_depth_stencil_state),
1799                       4096);
1800     assert(bo);
1801     render_state->cc.depth_stencil = bo;
1802 }
1803
1804 static void
1805 gen6_render_color_calc_state(VADriverContextP ctx)
1806 {
1807     struct i965_driver_data *i965 = i965_driver_data(ctx);
1808     struct i965_render_state *render_state = &i965->render_state;
1809     struct gen6_color_calc_state *color_calc_state;
1810     
1811     dri_bo_map(render_state->cc.state, 1);
1812     assert(render_state->cc.state->virtual);
1813     color_calc_state = render_state->cc.state->virtual;
1814     memset(color_calc_state, 0, sizeof(*color_calc_state));
1815     color_calc_state->constant_r = 1.0;
1816     color_calc_state->constant_g = 0.0;
1817     color_calc_state->constant_b = 1.0;
1818     color_calc_state->constant_a = 1.0;
1819     dri_bo_unmap(render_state->cc.state);
1820 }
1821
1822 static void
1823 gen6_render_blend_state(VADriverContextP ctx)
1824 {
1825     struct i965_driver_data *i965 = i965_driver_data(ctx);
1826     struct i965_render_state *render_state = &i965->render_state;
1827     struct gen6_blend_state *blend_state;
1828     
1829     dri_bo_map(render_state->cc.blend, 1);
1830     assert(render_state->cc.blend->virtual);
1831     blend_state = render_state->cc.blend->virtual;
1832     memset(blend_state, 0, sizeof(*blend_state));
1833     blend_state->blend1.logic_op_enable = 1;
1834     blend_state->blend1.logic_op_func = 0xc;
1835     dri_bo_unmap(render_state->cc.blend);
1836 }
1837
1838 static void
1839 gen6_render_depth_stencil_state(VADriverContextP ctx)
1840 {
1841     struct i965_driver_data *i965 = i965_driver_data(ctx);
1842     struct i965_render_state *render_state = &i965->render_state;
1843     struct gen6_depth_stencil_state *depth_stencil_state;
1844     
1845     dri_bo_map(render_state->cc.depth_stencil, 1);
1846     assert(render_state->cc.depth_stencil->virtual);
1847     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1848     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1849     dri_bo_unmap(render_state->cc.depth_stencil);
1850 }
1851
1852 static void
1853 gen6_render_setup_states(
1854     VADriverContextP   ctx,
1855     struct object_surface *obj_surface,
1856     const VARectangle *src_rect,
1857     const VARectangle *dst_rect,
1858     unsigned int       flags
1859 )
1860 {
1861     i965_render_dest_surface_state(ctx, 0);
1862     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1863     i965_render_sampler(ctx);
1864     i965_render_cc_viewport(ctx);
1865     gen6_render_color_calc_state(ctx);
1866     gen6_render_blend_state(ctx);
1867     gen6_render_depth_stencil_state(ctx);
1868     i965_render_upload_constants(ctx, obj_surface, flags);
1869     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1870 }
1871
1872 static void
1873 gen6_emit_invarient_states(VADriverContextP ctx)
1874 {
1875     struct i965_driver_data *i965 = i965_driver_data(ctx);
1876     struct intel_batchbuffer *batch = i965->batch;
1877
1878     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1879
1880     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1881     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1882               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1883     OUT_BATCH(batch, 0);
1884
1885     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1886     OUT_BATCH(batch, 1);
1887
1888     /* Set system instruction pointer */
1889     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1890     OUT_BATCH(batch, 0);
1891 }
1892
1893 static void
1894 gen6_emit_state_base_address(VADriverContextP ctx)
1895 {
1896     struct i965_driver_data *i965 = i965_driver_data(ctx);
1897     struct intel_batchbuffer *batch = i965->batch;
1898     struct i965_render_state *render_state = &i965->render_state;
1899
1900     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1901     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
1902     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1903     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1904     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1905     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
1906     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
1907     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1908     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1909     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1910 }
1911
1912 static void
1913 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1914 {
1915     struct i965_driver_data *i965 = i965_driver_data(ctx);
1916     struct intel_batchbuffer *batch = i965->batch;
1917     struct i965_render_state *render_state = &i965->render_state;
1918
1919     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1920               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1921               (4 - 2));
1922     OUT_BATCH(batch, 0);
1923     OUT_BATCH(batch, 0);
1924     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1925 }
1926
1927 static void
1928 gen6_emit_urb(VADriverContextP ctx)
1929 {
1930     struct i965_driver_data *i965 = i965_driver_data(ctx);
1931     struct intel_batchbuffer *batch = i965->batch;
1932
1933     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
1934     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1935               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1936     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1937               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1938 }
1939
1940 static void
1941 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1942 {
1943     struct i965_driver_data *i965 = i965_driver_data(ctx);
1944     struct intel_batchbuffer *batch = i965->batch;
1945     struct i965_render_state *render_state = &i965->render_state;
1946
1947     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1948     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1949     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1950     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1951 }
1952
1953 static void
1954 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1955 {
1956     struct i965_driver_data *i965 = i965_driver_data(ctx);
1957     struct intel_batchbuffer *batch = i965->batch;
1958     struct i965_render_state *render_state = &i965->render_state;
1959
1960     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1961               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1962               (4 - 2));
1963     OUT_BATCH(batch, 0); /* VS */
1964     OUT_BATCH(batch, 0); /* GS */
1965     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1966 }
1967
1968 static void
1969 gen6_emit_binding_table(VADriverContextP ctx)
1970 {
1971     struct i965_driver_data *i965 = i965_driver_data(ctx);
1972     struct intel_batchbuffer *batch = i965->batch;
1973
1974     /* Binding table pointers */
1975     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
1976               GEN6_BINDING_TABLE_MODIFY_PS |
1977               (4 - 2));
1978     OUT_BATCH(batch, 0);                /* vs */
1979     OUT_BATCH(batch, 0);                /* gs */
1980     /* Only the PS uses the binding table */
1981     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1982 }
1983
1984 static void
1985 gen6_emit_depth_buffer_state(VADriverContextP ctx)
1986 {
1987     struct i965_driver_data *i965 = i965_driver_data(ctx);
1988     struct intel_batchbuffer *batch = i965->batch;
1989
1990     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
1991     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
1992               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
1993     OUT_BATCH(batch, 0);
1994     OUT_BATCH(batch, 0);
1995     OUT_BATCH(batch, 0);
1996     OUT_BATCH(batch, 0);
1997     OUT_BATCH(batch, 0);
1998
1999     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
2000     OUT_BATCH(batch, 0);
2001 }
2002
2003 static void
2004 gen6_emit_drawing_rectangle(VADriverContextP ctx)
2005 {
2006     i965_render_drawing_rectangle(ctx);
2007 }
2008
2009 static void 
2010 gen6_emit_vs_state(VADriverContextP ctx)
2011 {
2012     struct i965_driver_data *i965 = i965_driver_data(ctx);
2013     struct intel_batchbuffer *batch = i965->batch;
2014
2015     /* disable VS constant buffer */
2016     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
2017     OUT_BATCH(batch, 0);
2018     OUT_BATCH(batch, 0);
2019     OUT_BATCH(batch, 0);
2020     OUT_BATCH(batch, 0);
2021         
2022     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2023     OUT_BATCH(batch, 0); /* without VS kernel */
2024     OUT_BATCH(batch, 0);
2025     OUT_BATCH(batch, 0);
2026     OUT_BATCH(batch, 0);
2027     OUT_BATCH(batch, 0); /* pass-through */
2028 }
2029
2030 static void 
2031 gen6_emit_gs_state(VADriverContextP ctx)
2032 {
2033     struct i965_driver_data *i965 = i965_driver_data(ctx);
2034     struct intel_batchbuffer *batch = i965->batch;
2035
2036     /* disable GS constant buffer */
2037     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
2038     OUT_BATCH(batch, 0);
2039     OUT_BATCH(batch, 0);
2040     OUT_BATCH(batch, 0);
2041     OUT_BATCH(batch, 0);
2042         
2043     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2044     OUT_BATCH(batch, 0); /* without GS kernel */
2045     OUT_BATCH(batch, 0);
2046     OUT_BATCH(batch, 0);
2047     OUT_BATCH(batch, 0);
2048     OUT_BATCH(batch, 0);
2049     OUT_BATCH(batch, 0); /* pass-through */
2050 }
2051
2052 static void 
2053 gen6_emit_clip_state(VADriverContextP ctx)
2054 {
2055     struct i965_driver_data *i965 = i965_driver_data(ctx);
2056     struct intel_batchbuffer *batch = i965->batch;
2057
2058     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2059     OUT_BATCH(batch, 0);
2060     OUT_BATCH(batch, 0); /* pass-through */
2061     OUT_BATCH(batch, 0);
2062 }
2063
2064 static void 
2065 gen6_emit_sf_state(VADriverContextP ctx)
2066 {
2067     struct i965_driver_data *i965 = i965_driver_data(ctx);
2068     struct intel_batchbuffer *batch = i965->batch;
2069
2070     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
2071     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
2072               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
2073               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
2074     OUT_BATCH(batch, 0);
2075     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2076     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
2077     OUT_BATCH(batch, 0);
2078     OUT_BATCH(batch, 0);
2079     OUT_BATCH(batch, 0);
2080     OUT_BATCH(batch, 0);
2081     OUT_BATCH(batch, 0); /* DW9 */
2082     OUT_BATCH(batch, 0);
2083     OUT_BATCH(batch, 0);
2084     OUT_BATCH(batch, 0);
2085     OUT_BATCH(batch, 0);
2086     OUT_BATCH(batch, 0); /* DW14 */
2087     OUT_BATCH(batch, 0);
2088     OUT_BATCH(batch, 0);
2089     OUT_BATCH(batch, 0);
2090     OUT_BATCH(batch, 0);
2091     OUT_BATCH(batch, 0); /* DW19 */
2092 }
2093
2094 static void 
2095 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
2096 {
2097     struct i965_driver_data *i965 = i965_driver_data(ctx);
2098     struct intel_batchbuffer *batch = i965->batch;
2099     struct i965_render_state *render_state = &i965->render_state;
2100
2101     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
2102               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
2103               (5 - 2));
2104     OUT_RELOC(batch, 
2105               render_state->curbe.bo,
2106               I915_GEM_DOMAIN_INSTRUCTION, 0,
2107               (URB_CS_ENTRY_SIZE-1));
2108     OUT_BATCH(batch, 0);
2109     OUT_BATCH(batch, 0);
2110     OUT_BATCH(batch, 0);
2111
2112     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
2113     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
2114               I915_GEM_DOMAIN_INSTRUCTION, 0,
2115               0);
2116     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
2117               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2118     OUT_BATCH(batch, 0);
2119     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2120     OUT_BATCH(batch, ((i965->intel.device_info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2121               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2122               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2123     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2124               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2125     OUT_BATCH(batch, 0);
2126     OUT_BATCH(batch, 0);
2127 }
2128
2129 static void
2130 gen6_emit_vertex_element_state(VADriverContextP ctx)
2131 {
2132     struct i965_driver_data *i965 = i965_driver_data(ctx);
2133     struct intel_batchbuffer *batch = i965->batch;
2134
2135     /* Set up our vertex elements, sourced from the single vertex buffer. */
2136     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2137     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2138     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2139               GEN6_VE0_VALID |
2140               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2141               (0 << VE0_OFFSET_SHIFT));
2142     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2143               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2144               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2145               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2146     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2147     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2148               GEN6_VE0_VALID |
2149               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2150               (8 << VE0_OFFSET_SHIFT));
2151     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2152               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2153               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2154               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2155 }
2156
2157 static void
2158 gen6_emit_vertices(VADriverContextP ctx)
2159 {
2160     struct i965_driver_data *i965 = i965_driver_data(ctx);
2161     struct intel_batchbuffer *batch = i965->batch;
2162     struct i965_render_state *render_state = &i965->render_state;
2163
2164     BEGIN_BATCH(batch, 11);
2165     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2166     OUT_BATCH(batch, 
2167               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2168               GEN6_VB0_VERTEXDATA |
2169               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2170     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2171     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2172     OUT_BATCH(batch, 0);
2173
2174     OUT_BATCH(batch, 
2175               CMD_3DPRIMITIVE |
2176               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2177               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2178               (0 << 9) |
2179               4);
2180     OUT_BATCH(batch, 3); /* vertex count per instance */
2181     OUT_BATCH(batch, 0); /* start vertex offset */
2182     OUT_BATCH(batch, 1); /* single instance */
2183     OUT_BATCH(batch, 0); /* start instance location */
2184     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2185     ADVANCE_BATCH(batch);
2186 }
2187
2188 static void
2189 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2190 {
2191     struct i965_driver_data *i965 = i965_driver_data(ctx);
2192     struct intel_batchbuffer *batch = i965->batch;
2193
2194     intel_batchbuffer_start_atomic(batch, 0x1000);
2195     intel_batchbuffer_emit_mi_flush(batch);
2196     gen6_emit_invarient_states(ctx);
2197     gen6_emit_state_base_address(ctx);
2198     gen6_emit_viewport_state_pointers(ctx);
2199     gen6_emit_urb(ctx);
2200     gen6_emit_cc_state_pointers(ctx);
2201     gen6_emit_sampler_state_pointers(ctx);
2202     gen6_emit_vs_state(ctx);
2203     gen6_emit_gs_state(ctx);
2204     gen6_emit_clip_state(ctx);
2205     gen6_emit_sf_state(ctx);
2206     gen6_emit_wm_state(ctx, kernel);
2207     gen6_emit_binding_table(ctx);
2208     gen6_emit_depth_buffer_state(ctx);
2209     gen6_emit_drawing_rectangle(ctx);
2210     gen6_emit_vertex_element_state(ctx);
2211     gen6_emit_vertices(ctx);
2212     intel_batchbuffer_end_atomic(batch);
2213 }
2214
2215 static void
2216 gen6_render_put_surface(
2217     VADriverContextP   ctx,
2218     struct object_surface *obj_surface,
2219     const VARectangle *src_rect,
2220     const VARectangle *dst_rect,
2221     unsigned int       flags
2222 )
2223 {
2224     struct i965_driver_data *i965 = i965_driver_data(ctx);
2225     struct intel_batchbuffer *batch = i965->batch;
2226
2227     gen6_render_initialize(ctx);
2228     gen6_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2229     i965_clear_dest_region(ctx);
2230     gen6_render_emit_states(ctx, PS_KERNEL);
2231     intel_batchbuffer_flush(batch);
2232 }
2233
2234 static void
2235 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2236 {
2237     struct i965_driver_data *i965 = i965_driver_data(ctx);
2238     struct i965_render_state *render_state = &i965->render_state;
2239     struct gen6_blend_state *blend_state;
2240
2241     dri_bo_unmap(render_state->cc.state);    
2242     dri_bo_map(render_state->cc.blend, 1);
2243     assert(render_state->cc.blend->virtual);
2244     blend_state = render_state->cc.blend->virtual;
2245     memset(blend_state, 0, sizeof(*blend_state));
2246     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2247     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2248     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2249     blend_state->blend0.blend_enable = 1;
2250     blend_state->blend1.post_blend_clamp_enable = 1;
2251     blend_state->blend1.pre_blend_clamp_enable = 1;
2252     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2253     dri_bo_unmap(render_state->cc.blend);
2254 }
2255
2256 static void
2257 gen6_subpicture_render_setup_states(
2258     VADriverContextP   ctx,
2259     struct object_surface *obj_surface,
2260     const VARectangle *src_rect,
2261     const VARectangle *dst_rect
2262 )
2263 {
2264     i965_render_dest_surface_state(ctx, 0);
2265     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
2266     i965_render_sampler(ctx);
2267     i965_render_cc_viewport(ctx);
2268     gen6_render_color_calc_state(ctx);
2269     gen6_subpicture_render_blend_state(ctx);
2270     gen6_render_depth_stencil_state(ctx);
2271     i965_subpic_render_upload_constants(ctx, obj_surface);
2272     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
2273 }
2274
2275 static void
2276 gen6_render_put_subpicture(
2277     VADriverContextP   ctx,
2278     struct object_surface *obj_surface,
2279     const VARectangle *src_rect,
2280     const VARectangle *dst_rect
2281 )
2282 {
2283     struct i965_driver_data *i965 = i965_driver_data(ctx);
2284     struct intel_batchbuffer *batch = i965->batch;
2285     unsigned int index = obj_surface->subpic_render_idx;
2286     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
2287
2288     assert(obj_subpic);
2289     gen6_render_initialize(ctx);
2290     gen6_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
2291     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2292     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
2293     intel_batchbuffer_flush(batch);
2294 }
2295
2296 /*
2297  * for GEN7
2298  */
2299 static void 
2300 gen7_render_initialize(VADriverContextP ctx)
2301 {
2302     struct i965_driver_data *i965 = i965_driver_data(ctx);
2303     struct i965_render_state *render_state = &i965->render_state;
2304     dri_bo *bo;
2305
2306     /* VERTEX BUFFER */
2307     dri_bo_unreference(render_state->vb.vertex_buffer);
2308     bo = dri_bo_alloc(i965->intel.bufmgr,
2309                       "vertex buffer",
2310                       4096,
2311                       4096);
2312     assert(bo);
2313     render_state->vb.vertex_buffer = bo;
2314
2315     /* WM */
2316     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2317     bo = dri_bo_alloc(i965->intel.bufmgr,
2318                       "surface state & binding table",
2319                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2320                       4096);
2321     assert(bo);
2322     render_state->wm.surface_state_binding_table_bo = bo;
2323
2324     dri_bo_unreference(render_state->wm.sampler);
2325     bo = dri_bo_alloc(i965->intel.bufmgr,
2326                       "sampler state",
2327                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2328                       4096);
2329     assert(bo);
2330     render_state->wm.sampler = bo;
2331     render_state->wm.sampler_count = 0;
2332
2333     /* COLOR CALCULATOR */
2334     dri_bo_unreference(render_state->cc.state);
2335     bo = dri_bo_alloc(i965->intel.bufmgr,
2336                       "color calc state",
2337                       sizeof(struct gen6_color_calc_state),
2338                       4096);
2339     assert(bo);
2340     render_state->cc.state = bo;
2341
2342     /* CC VIEWPORT */
2343     dri_bo_unreference(render_state->cc.viewport);
2344     bo = dri_bo_alloc(i965->intel.bufmgr,
2345                       "cc viewport",
2346                       sizeof(struct i965_cc_viewport),
2347                       4096);
2348     assert(bo);
2349     render_state->cc.viewport = bo;
2350
2351     /* BLEND STATE */
2352     dri_bo_unreference(render_state->cc.blend);
2353     bo = dri_bo_alloc(i965->intel.bufmgr,
2354                       "blend state",
2355                       sizeof(struct gen6_blend_state),
2356                       4096);
2357     assert(bo);
2358     render_state->cc.blend = bo;
2359
2360     /* DEPTH & STENCIL STATE */
2361     dri_bo_unreference(render_state->cc.depth_stencil);
2362     bo = dri_bo_alloc(i965->intel.bufmgr,
2363                       "depth & stencil state",
2364                       sizeof(struct gen6_depth_stencil_state),
2365                       4096);
2366     assert(bo);
2367     render_state->cc.depth_stencil = bo;
2368 }
2369
2370 /*
2371  * for GEN8
2372  */
2373 #define ALIGNMENT       64
2374
2375 static void
2376 gen7_render_color_calc_state(VADriverContextP ctx)
2377 {
2378     struct i965_driver_data *i965 = i965_driver_data(ctx);
2379     struct i965_render_state *render_state = &i965->render_state;
2380     struct gen6_color_calc_state *color_calc_state;
2381     
2382     dri_bo_map(render_state->cc.state, 1);
2383     assert(render_state->cc.state->virtual);
2384     color_calc_state = render_state->cc.state->virtual;
2385     memset(color_calc_state, 0, sizeof(*color_calc_state));
2386     color_calc_state->constant_r = 1.0;
2387     color_calc_state->constant_g = 0.0;
2388     color_calc_state->constant_b = 1.0;
2389     color_calc_state->constant_a = 1.0;
2390     dri_bo_unmap(render_state->cc.state);
2391 }
2392
2393 static void
2394 gen7_render_blend_state(VADriverContextP ctx)
2395 {
2396     struct i965_driver_data *i965 = i965_driver_data(ctx);
2397     struct i965_render_state *render_state = &i965->render_state;
2398     struct gen6_blend_state *blend_state;
2399     
2400     dri_bo_map(render_state->cc.blend, 1);
2401     assert(render_state->cc.blend->virtual);
2402     blend_state = render_state->cc.blend->virtual;
2403     memset(blend_state, 0, sizeof(*blend_state));
2404     blend_state->blend1.logic_op_enable = 1;
2405     blend_state->blend1.logic_op_func = 0xc;
2406     blend_state->blend1.pre_blend_clamp_enable = 1;
2407     dri_bo_unmap(render_state->cc.blend);
2408 }
2409
2410 static void
2411 gen7_render_depth_stencil_state(VADriverContextP ctx)
2412 {
2413     struct i965_driver_data *i965 = i965_driver_data(ctx);
2414     struct i965_render_state *render_state = &i965->render_state;
2415     struct gen6_depth_stencil_state *depth_stencil_state;
2416     
2417     dri_bo_map(render_state->cc.depth_stencil, 1);
2418     assert(render_state->cc.depth_stencil->virtual);
2419     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2420     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2421     dri_bo_unmap(render_state->cc.depth_stencil);
2422 }
2423
2424 static void 
2425 gen7_render_sampler(VADriverContextP ctx)
2426 {
2427     struct i965_driver_data *i965 = i965_driver_data(ctx);
2428     struct i965_render_state *render_state = &i965->render_state;
2429     struct gen7_sampler_state *sampler_state;
2430     int i;
2431     
2432     assert(render_state->wm.sampler_count > 0);
2433     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2434
2435     dri_bo_map(render_state->wm.sampler, 1);
2436     assert(render_state->wm.sampler->virtual);
2437     sampler_state = render_state->wm.sampler->virtual;
2438     for (i = 0; i < render_state->wm.sampler_count; i++) {
2439         memset(sampler_state, 0, sizeof(*sampler_state));
2440         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2441         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2442         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2443         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2444         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2445         sampler_state++;
2446     }
2447
2448     dri_bo_unmap(render_state->wm.sampler);
2449 }
2450
2451
2452 static void
2453 gen7_render_setup_states(
2454     VADriverContextP   ctx,
2455     struct object_surface *obj_surface,
2456     const VARectangle *src_rect,
2457     const VARectangle *dst_rect,
2458     unsigned int       flags
2459 )
2460 {
2461     i965_render_dest_surface_state(ctx, 0);
2462     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2463     gen7_render_sampler(ctx);
2464     i965_render_cc_viewport(ctx);
2465     gen7_render_color_calc_state(ctx);
2466     gen7_render_blend_state(ctx);
2467     gen7_render_depth_stencil_state(ctx);
2468     i965_render_upload_constants(ctx, obj_surface, flags);
2469     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2470 }
2471
2472
2473 static void
2474 gen7_emit_invarient_states(VADriverContextP ctx)
2475 {
2476     struct i965_driver_data *i965 = i965_driver_data(ctx);
2477     struct intel_batchbuffer *batch = i965->batch;
2478
2479     BEGIN_BATCH(batch, 1);
2480     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2481     ADVANCE_BATCH(batch);
2482
2483     BEGIN_BATCH(batch, 4);
2484     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2485     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2486               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2487     OUT_BATCH(batch, 0);
2488     OUT_BATCH(batch, 0);
2489     ADVANCE_BATCH(batch);
2490
2491     BEGIN_BATCH(batch, 2);
2492     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2493     OUT_BATCH(batch, 1);
2494     ADVANCE_BATCH(batch);
2495
2496     /* Set system instruction pointer */
2497     BEGIN_BATCH(batch, 2);
2498     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2499     OUT_BATCH(batch, 0);
2500     ADVANCE_BATCH(batch);
2501 }
2502
2503 static void
2504 gen7_emit_state_base_address(VADriverContextP ctx)
2505 {
2506     struct i965_driver_data *i965 = i965_driver_data(ctx);
2507     struct intel_batchbuffer *batch = i965->batch;
2508     struct i965_render_state *render_state = &i965->render_state;
2509
2510     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2511     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2512     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2513     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2514     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2515     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2516     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2517     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2518     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2519     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2520 }
2521
2522 static void
2523 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
2524 {
2525     struct i965_driver_data *i965 = i965_driver_data(ctx);
2526     struct intel_batchbuffer *batch = i965->batch;
2527     struct i965_render_state *render_state = &i965->render_state;
2528
2529     BEGIN_BATCH(batch, 2);
2530     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
2531     OUT_RELOC(batch,
2532               render_state->cc.viewport,
2533               I915_GEM_DOMAIN_INSTRUCTION, 0,
2534               0);
2535     ADVANCE_BATCH(batch);
2536
2537     BEGIN_BATCH(batch, 2);
2538     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
2539     OUT_BATCH(batch, 0);
2540     ADVANCE_BATCH(batch);
2541 }
2542
2543 /*
2544  * URB layout on GEN7 
2545  * ----------------------------------------
2546  * | PS Push Constants (8KB) | VS entries |
2547  * ----------------------------------------
2548  */
2549 static void
2550 gen7_emit_urb(VADriverContextP ctx)
2551 {
2552     struct i965_driver_data *i965 = i965_driver_data(ctx);
2553     struct intel_batchbuffer *batch = i965->batch;
2554     unsigned int num_urb_entries = 32;
2555
2556     if (IS_HASWELL(i965->intel.device_id))
2557         num_urb_entries = 64;
2558
2559     BEGIN_BATCH(batch, 2);
2560     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
2561     OUT_BATCH(batch, 8); /* in 1KBs */
2562     ADVANCE_BATCH(batch);
2563
2564     BEGIN_BATCH(batch, 2);
2565     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
2566     OUT_BATCH(batch, 
2567               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
2568               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
2569               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2570    ADVANCE_BATCH(batch);
2571
2572    BEGIN_BATCH(batch, 2);
2573    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
2574    OUT_BATCH(batch,
2575              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2576              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2577    ADVANCE_BATCH(batch);
2578
2579    BEGIN_BATCH(batch, 2);
2580    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
2581    OUT_BATCH(batch,
2582              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2583              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2584    ADVANCE_BATCH(batch);
2585
2586    BEGIN_BATCH(batch, 2);
2587    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
2588    OUT_BATCH(batch,
2589              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2590              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2591    ADVANCE_BATCH(batch);
2592 }
2593
2594 static void
2595 gen7_emit_cc_state_pointers(VADriverContextP ctx)
2596 {
2597     struct i965_driver_data *i965 = i965_driver_data(ctx);
2598     struct intel_batchbuffer *batch = i965->batch;
2599     struct i965_render_state *render_state = &i965->render_state;
2600
2601     BEGIN_BATCH(batch, 2);
2602     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
2603     OUT_RELOC(batch,
2604               render_state->cc.state,
2605               I915_GEM_DOMAIN_INSTRUCTION, 0,
2606               1);
2607     ADVANCE_BATCH(batch);
2608
2609     BEGIN_BATCH(batch, 2);
2610     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
2611     OUT_RELOC(batch,
2612               render_state->cc.blend,
2613               I915_GEM_DOMAIN_INSTRUCTION, 0,
2614               1);
2615     ADVANCE_BATCH(batch);
2616
2617     BEGIN_BATCH(batch, 2);
2618     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
2619     OUT_RELOC(batch,
2620               render_state->cc.depth_stencil,
2621               I915_GEM_DOMAIN_INSTRUCTION, 0, 
2622               1);
2623     ADVANCE_BATCH(batch);
2624 }
2625
2626 static void
2627 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
2628 {
2629     struct i965_driver_data *i965 = i965_driver_data(ctx);
2630     struct intel_batchbuffer *batch = i965->batch;
2631     struct i965_render_state *render_state = &i965->render_state;
2632
2633     BEGIN_BATCH(batch, 2);
2634     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
2635     OUT_RELOC(batch,
2636               render_state->wm.sampler,
2637               I915_GEM_DOMAIN_INSTRUCTION, 0,
2638               0);
2639     ADVANCE_BATCH(batch);
2640 }
2641
2642 static void
2643 gen7_emit_binding_table(VADriverContextP ctx)
2644 {
2645     struct i965_driver_data *i965 = i965_driver_data(ctx);
2646     struct intel_batchbuffer *batch = i965->batch;
2647
2648     BEGIN_BATCH(batch, 2);
2649     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
2650     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2651     ADVANCE_BATCH(batch);
2652 }
2653
2654 static void
2655 gen7_emit_depth_buffer_state(VADriverContextP ctx)
2656 {
2657     struct i965_driver_data *i965 = i965_driver_data(ctx);
2658     struct intel_batchbuffer *batch = i965->batch;
2659
2660     BEGIN_BATCH(batch, 7);
2661     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
2662     OUT_BATCH(batch,
2663               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
2664               (I965_SURFACE_NULL << 29));
2665     OUT_BATCH(batch, 0);
2666     OUT_BATCH(batch, 0);
2667     OUT_BATCH(batch, 0);
2668     OUT_BATCH(batch, 0);
2669     OUT_BATCH(batch, 0);
2670     ADVANCE_BATCH(batch);
2671
2672     BEGIN_BATCH(batch, 3);
2673     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
2674     OUT_BATCH(batch, 0);
2675     OUT_BATCH(batch, 0);
2676     ADVANCE_BATCH(batch);
2677 }
2678
2679 static void
2680 gen7_emit_drawing_rectangle(VADriverContextP ctx)
2681 {
2682     i965_render_drawing_rectangle(ctx);
2683 }
2684
2685 static void 
2686 gen7_emit_vs_state(VADriverContextP ctx)
2687 {
2688     struct i965_driver_data *i965 = i965_driver_data(ctx);
2689     struct intel_batchbuffer *batch = i965->batch;
2690
2691     /* disable VS constant buffer */
2692     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
2693     OUT_BATCH(batch, 0);
2694     OUT_BATCH(batch, 0);
2695     OUT_BATCH(batch, 0);
2696     OUT_BATCH(batch, 0);
2697     OUT_BATCH(batch, 0);
2698     OUT_BATCH(batch, 0);
2699         
2700     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2701     OUT_BATCH(batch, 0); /* without VS kernel */
2702     OUT_BATCH(batch, 0);
2703     OUT_BATCH(batch, 0);
2704     OUT_BATCH(batch, 0);
2705     OUT_BATCH(batch, 0); /* pass-through */
2706 }
2707
2708 static void 
2709 gen7_emit_bypass_state(VADriverContextP ctx)
2710 {
2711     struct i965_driver_data *i965 = i965_driver_data(ctx);
2712     struct intel_batchbuffer *batch = i965->batch;
2713
2714     /* bypass GS */
2715     BEGIN_BATCH(batch, 7);
2716     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
2717     OUT_BATCH(batch, 0);
2718     OUT_BATCH(batch, 0);
2719     OUT_BATCH(batch, 0);
2720     OUT_BATCH(batch, 0);
2721     OUT_BATCH(batch, 0);
2722     OUT_BATCH(batch, 0);
2723     ADVANCE_BATCH(batch);
2724
2725     BEGIN_BATCH(batch, 7);      
2726     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2727     OUT_BATCH(batch, 0); /* without GS kernel */
2728     OUT_BATCH(batch, 0);
2729     OUT_BATCH(batch, 0);
2730     OUT_BATCH(batch, 0);
2731     OUT_BATCH(batch, 0);
2732     OUT_BATCH(batch, 0); /* pass-through */
2733     ADVANCE_BATCH(batch);
2734
2735     BEGIN_BATCH(batch, 2);
2736     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
2737     OUT_BATCH(batch, 0);
2738     ADVANCE_BATCH(batch);
2739
2740     /* disable HS */
2741     BEGIN_BATCH(batch, 7);
2742     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
2743     OUT_BATCH(batch, 0);
2744     OUT_BATCH(batch, 0);
2745     OUT_BATCH(batch, 0);
2746     OUT_BATCH(batch, 0);
2747     OUT_BATCH(batch, 0);
2748     OUT_BATCH(batch, 0);
2749     ADVANCE_BATCH(batch);
2750
2751     BEGIN_BATCH(batch, 7);
2752     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
2753     OUT_BATCH(batch, 0);
2754     OUT_BATCH(batch, 0);
2755     OUT_BATCH(batch, 0);
2756     OUT_BATCH(batch, 0);
2757     OUT_BATCH(batch, 0);
2758     OUT_BATCH(batch, 0);
2759     ADVANCE_BATCH(batch);
2760
2761     BEGIN_BATCH(batch, 2);
2762     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
2763     OUT_BATCH(batch, 0);
2764     ADVANCE_BATCH(batch);
2765
2766     /* Disable TE */
2767     BEGIN_BATCH(batch, 4);
2768     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
2769     OUT_BATCH(batch, 0);
2770     OUT_BATCH(batch, 0);
2771     OUT_BATCH(batch, 0);
2772     ADVANCE_BATCH(batch);
2773
2774     /* Disable DS */
2775     BEGIN_BATCH(batch, 7);
2776     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
2777     OUT_BATCH(batch, 0);
2778     OUT_BATCH(batch, 0);
2779     OUT_BATCH(batch, 0);
2780     OUT_BATCH(batch, 0);
2781     OUT_BATCH(batch, 0);
2782     OUT_BATCH(batch, 0);
2783     ADVANCE_BATCH(batch);
2784
2785     BEGIN_BATCH(batch, 6);
2786     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
2787     OUT_BATCH(batch, 0);
2788     OUT_BATCH(batch, 0);
2789     OUT_BATCH(batch, 0);
2790     OUT_BATCH(batch, 0);
2791     OUT_BATCH(batch, 0);
2792     ADVANCE_BATCH(batch);
2793
2794     BEGIN_BATCH(batch, 2);
2795     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
2796     OUT_BATCH(batch, 0);
2797     ADVANCE_BATCH(batch);
2798
2799     /* Disable STREAMOUT */
2800     BEGIN_BATCH(batch, 3);
2801     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
2802     OUT_BATCH(batch, 0);
2803     OUT_BATCH(batch, 0);
2804     ADVANCE_BATCH(batch);
2805 }
2806
2807 static void 
2808 gen7_emit_clip_state(VADriverContextP ctx)
2809 {
2810     struct i965_driver_data *i965 = i965_driver_data(ctx);
2811     struct intel_batchbuffer *batch = i965->batch;
2812
2813     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2814     OUT_BATCH(batch, 0);
2815     OUT_BATCH(batch, 0); /* pass-through */
2816     OUT_BATCH(batch, 0);
2817 }
2818
2819 static void 
2820 gen7_emit_sf_state(VADriverContextP ctx)
2821 {
2822     struct i965_driver_data *i965 = i965_driver_data(ctx);
2823     struct intel_batchbuffer *batch = i965->batch;
2824
2825     BEGIN_BATCH(batch, 14);
2826     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
2827     OUT_BATCH(batch,
2828               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
2829               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
2830               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
2831     OUT_BATCH(batch, 0);
2832     OUT_BATCH(batch, 0);
2833     OUT_BATCH(batch, 0); /* DW4 */
2834     OUT_BATCH(batch, 0);
2835     OUT_BATCH(batch, 0);
2836     OUT_BATCH(batch, 0);
2837     OUT_BATCH(batch, 0);
2838     OUT_BATCH(batch, 0); /* DW9 */
2839     OUT_BATCH(batch, 0);
2840     OUT_BATCH(batch, 0);
2841     OUT_BATCH(batch, 0);
2842     OUT_BATCH(batch, 0);
2843     ADVANCE_BATCH(batch);
2844
2845     BEGIN_BATCH(batch, 7);
2846     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
2847     OUT_BATCH(batch, 0);
2848     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2849     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
2850     OUT_BATCH(batch, 0);
2851     OUT_BATCH(batch, 0);
2852     OUT_BATCH(batch, 0);
2853     ADVANCE_BATCH(batch);
2854 }
2855
2856 static void 
2857 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
2858 {
2859     struct i965_driver_data *i965 = i965_driver_data(ctx);
2860     struct intel_batchbuffer *batch = i965->batch;
2861     struct i965_render_state *render_state = &i965->render_state;
2862     unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
2863     unsigned int num_samples = 0;
2864
2865     if (IS_HASWELL(i965->intel.device_id)) {
2866         max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
2867         num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
2868     }
2869
2870     BEGIN_BATCH(batch, 3);
2871     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
2872     OUT_BATCH(batch,
2873               GEN7_WM_DISPATCH_ENABLE |
2874               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2875     OUT_BATCH(batch, 0);
2876     ADVANCE_BATCH(batch);
2877
2878     BEGIN_BATCH(batch, 7);
2879     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
2880     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
2881     OUT_BATCH(batch, 0);
2882     OUT_RELOC(batch, 
2883               render_state->curbe.bo,
2884               I915_GEM_DOMAIN_INSTRUCTION, 0,
2885               0);
2886     OUT_BATCH(batch, 0);
2887     OUT_BATCH(batch, 0);
2888     OUT_BATCH(batch, 0);
2889     ADVANCE_BATCH(batch);
2890
2891     BEGIN_BATCH(batch, 8);
2892     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
2893     OUT_RELOC(batch, 
2894               render_state->render_kernels[kernel].bo,
2895               I915_GEM_DOMAIN_INSTRUCTION, 0,
2896               0);
2897     OUT_BATCH(batch, 
2898               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
2899               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2900     OUT_BATCH(batch, 0); /* scratch space base offset */
2901     OUT_BATCH(batch, 
2902               ((i965->intel.device_info->max_wm_threads - 1) << max_threads_shift) | num_samples |
2903               GEN7_PS_PUSH_CONSTANT_ENABLE |
2904               GEN7_PS_ATTRIBUTE_ENABLE |
2905               GEN7_PS_16_DISPATCH_ENABLE);
2906     OUT_BATCH(batch, 
2907               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
2908     OUT_BATCH(batch, 0); /* kernel 1 pointer */
2909     OUT_BATCH(batch, 0); /* kernel 2 pointer */
2910     ADVANCE_BATCH(batch);
2911 }
2912
2913 static void
2914 gen7_emit_vertex_element_state(VADriverContextP ctx)
2915 {
2916     struct i965_driver_data *i965 = i965_driver_data(ctx);
2917     struct intel_batchbuffer *batch = i965->batch;
2918
2919     /* Set up our vertex elements, sourced from the single vertex buffer. */
2920     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2921     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2922     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2923               GEN6_VE0_VALID |
2924               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2925               (0 << VE0_OFFSET_SHIFT));
2926     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2927               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2928               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2929               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2930     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2931     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2932               GEN6_VE0_VALID |
2933               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2934               (8 << VE0_OFFSET_SHIFT));
2935     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2936               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2937               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2938               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2939 }
2940
2941 static void
2942 gen7_emit_vertices(VADriverContextP ctx)
2943 {
2944     struct i965_driver_data *i965 = i965_driver_data(ctx);
2945     struct intel_batchbuffer *batch = i965->batch;
2946     struct i965_render_state *render_state = &i965->render_state;
2947
2948     BEGIN_BATCH(batch, 5);
2949     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
2950     OUT_BATCH(batch, 
2951               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2952               GEN6_VB0_VERTEXDATA |
2953               GEN7_VB0_ADDRESS_MODIFYENABLE |
2954               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2955     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2956     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2957     OUT_BATCH(batch, 0);
2958     ADVANCE_BATCH(batch);
2959
2960     BEGIN_BATCH(batch, 7);
2961     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
2962     OUT_BATCH(batch,
2963               _3DPRIM_RECTLIST |
2964               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
2965     OUT_BATCH(batch, 3); /* vertex count per instance */
2966     OUT_BATCH(batch, 0); /* start vertex offset */
2967     OUT_BATCH(batch, 1); /* single instance */
2968     OUT_BATCH(batch, 0); /* start instance location */
2969     OUT_BATCH(batch, 0);
2970     ADVANCE_BATCH(batch);
2971 }
2972
2973 static void
2974 gen7_render_emit_states(VADriverContextP ctx, int kernel)
2975 {
2976     struct i965_driver_data *i965 = i965_driver_data(ctx);
2977     struct intel_batchbuffer *batch = i965->batch;
2978
2979     intel_batchbuffer_start_atomic(batch, 0x1000);
2980     intel_batchbuffer_emit_mi_flush(batch);
2981     gen7_emit_invarient_states(ctx);
2982     gen7_emit_state_base_address(ctx);
2983     gen7_emit_viewport_state_pointers(ctx);
2984     gen7_emit_urb(ctx);
2985     gen7_emit_cc_state_pointers(ctx);
2986     gen7_emit_sampler_state_pointers(ctx);
2987     gen7_emit_bypass_state(ctx);
2988     gen7_emit_vs_state(ctx);
2989     gen7_emit_clip_state(ctx);
2990     gen7_emit_sf_state(ctx);
2991     gen7_emit_wm_state(ctx, kernel);
2992     gen7_emit_binding_table(ctx);
2993     gen7_emit_depth_buffer_state(ctx);
2994     gen7_emit_drawing_rectangle(ctx);
2995     gen7_emit_vertex_element_state(ctx);
2996     gen7_emit_vertices(ctx);
2997     intel_batchbuffer_end_atomic(batch);
2998 }
2999
3000
3001 static void
3002 gen7_render_put_surface(
3003     VADriverContextP   ctx,
3004     struct object_surface *obj_surface,    
3005     const VARectangle *src_rect,
3006     const VARectangle *dst_rect,
3007     unsigned int       flags
3008 )
3009 {
3010     struct i965_driver_data *i965 = i965_driver_data(ctx);
3011     struct intel_batchbuffer *batch = i965->batch;
3012
3013     gen7_render_initialize(ctx);
3014     gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
3015     i965_clear_dest_region(ctx);
3016     gen7_render_emit_states(ctx, PS_KERNEL);
3017     intel_batchbuffer_flush(batch);
3018 }
3019
3020
3021 static void
3022 gen7_subpicture_render_blend_state(VADriverContextP ctx)
3023 {
3024     struct i965_driver_data *i965 = i965_driver_data(ctx);
3025     struct i965_render_state *render_state = &i965->render_state;
3026     struct gen6_blend_state *blend_state;
3027
3028     dri_bo_unmap(render_state->cc.state);    
3029     dri_bo_map(render_state->cc.blend, 1);
3030     assert(render_state->cc.blend->virtual);
3031     blend_state = render_state->cc.blend->virtual;
3032     memset(blend_state, 0, sizeof(*blend_state));
3033     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
3034     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
3035     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
3036     blend_state->blend0.blend_enable = 1;
3037     blend_state->blend1.post_blend_clamp_enable = 1;
3038     blend_state->blend1.pre_blend_clamp_enable = 1;
3039     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
3040     dri_bo_unmap(render_state->cc.blend);
3041 }
3042
3043 static void
3044 gen7_subpicture_render_setup_states(
3045     VADriverContextP   ctx,
3046     struct object_surface *obj_surface,
3047     const VARectangle *src_rect,
3048     const VARectangle *dst_rect
3049 )
3050 {
3051     i965_render_dest_surface_state(ctx, 0);
3052     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
3053     i965_render_sampler(ctx);
3054     i965_render_cc_viewport(ctx);
3055     gen7_render_color_calc_state(ctx);
3056     gen7_subpicture_render_blend_state(ctx);
3057     gen7_render_depth_stencil_state(ctx);
3058     i965_subpic_render_upload_constants(ctx, obj_surface);
3059     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
3060 }
3061
3062 static void
3063 gen7_render_put_subpicture(
3064     VADriverContextP   ctx,
3065     struct object_surface *obj_surface,
3066     const VARectangle *src_rect,
3067     const VARectangle *dst_rect
3068 )
3069 {
3070     struct i965_driver_data *i965 = i965_driver_data(ctx);
3071     struct intel_batchbuffer *batch = i965->batch;
3072     unsigned int index = obj_surface->subpic_render_idx;
3073     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
3074
3075     assert(obj_subpic);
3076     gen7_render_initialize(ctx);
3077     gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
3078     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
3079     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
3080     intel_batchbuffer_flush(batch);
3081 }
3082
3083
3084 void
3085 intel_render_put_surface(
3086     VADriverContextP   ctx,
3087     struct object_surface *obj_surface,
3088     const VARectangle *src_rect,
3089     const VARectangle *dst_rect,
3090     unsigned int       flags
3091 )
3092 {
3093     struct i965_driver_data *i965 = i965_driver_data(ctx);
3094     struct i965_render_state *render_state = &i965->render_state;
3095     int has_done_scaling = 0;
3096     VASurfaceID out_surface_id = i965_post_processing(ctx,
3097                                                       obj_surface,
3098                                                       src_rect,
3099                                                       dst_rect,
3100                                                       flags,
3101                                                       &has_done_scaling);
3102
3103     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
3104
3105     if (out_surface_id != VA_INVALID_ID) {
3106         struct object_surface *new_obj_surface = SURFACE(out_surface_id);
3107         
3108         if (new_obj_surface && new_obj_surface->bo)
3109             obj_surface = new_obj_surface;
3110
3111         if (has_done_scaling)
3112             src_rect = dst_rect;
3113     }
3114
3115     render_state->render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
3116
3117     if (out_surface_id != VA_INVALID_ID)
3118         i965_DestroySurfaces(ctx, &out_surface_id, 1);
3119 }
3120
3121 void
3122 intel_render_put_subpicture(
3123     VADriverContextP   ctx,
3124     struct object_surface *obj_surface,
3125     const VARectangle *src_rect,
3126     const VARectangle *dst_rect
3127 )
3128 {
3129     struct i965_driver_data *i965 = i965_driver_data(ctx);
3130     struct i965_render_state *render_state = &i965->render_state;
3131
3132     render_state->render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
3133 }
3134
3135
3136 bool 
3137 i965_render_init(VADriverContextP ctx)
3138 {
3139     struct i965_driver_data *i965 = i965_driver_data(ctx);
3140     struct i965_render_state *render_state = &i965->render_state;
3141     int i;
3142
3143     /* kernel */
3144     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
3145                                  sizeof(render_kernels_gen5[0])));
3146     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
3147                                  sizeof(render_kernels_gen6[0])));
3148
3149     if (IS_GEN8(i965->intel.device_id)) {
3150         return gen8_render_init(ctx);
3151     } else  if (IS_GEN7(i965->intel.device_id)) {
3152         memcpy(render_state->render_kernels,
3153                (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7),
3154                sizeof(render_state->render_kernels));
3155         render_state->render_put_surface = gen7_render_put_surface;
3156         render_state->render_put_subpicture = gen7_render_put_subpicture;
3157     } else if (IS_GEN6(i965->intel.device_id)) {
3158         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
3159         render_state->render_put_surface = gen6_render_put_surface;
3160         render_state->render_put_subpicture = gen6_render_put_subpicture;
3161     } else if (IS_IRONLAKE(i965->intel.device_id)) {
3162         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
3163         render_state->render_put_surface = i965_render_put_surface;
3164         render_state->render_put_subpicture = i965_render_put_subpicture;
3165     } else {
3166         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
3167         render_state->render_put_surface = i965_render_put_surface;
3168         render_state->render_put_subpicture = i965_render_put_subpicture;
3169     }
3170
3171     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3172         struct i965_kernel *kernel = &render_state->render_kernels[i];
3173
3174         if (!kernel->size)
3175             continue;
3176
3177         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
3178                                   kernel->name, 
3179                                   kernel->size, 0x1000);
3180         assert(kernel->bo);
3181         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
3182     }
3183
3184     /* constant buffer */
3185     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
3186                       "constant buffer",
3187                       4096, 64);
3188     assert(render_state->curbe.bo);
3189
3190     return true;
3191 }
3192
3193 void 
3194 i965_render_terminate(VADriverContextP ctx)
3195 {
3196     int i;
3197     struct i965_driver_data *i965 = i965_driver_data(ctx);
3198     struct i965_render_state *render_state = &i965->render_state;
3199
3200     if (IS_GEN8(i965->intel.device_id)) {
3201         gen8_render_terminate(ctx);
3202         return;
3203     } 
3204
3205     dri_bo_unreference(render_state->curbe.bo);
3206     render_state->curbe.bo = NULL;
3207
3208     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3209         struct i965_kernel *kernel = &render_state->render_kernels[i];
3210         
3211         dri_bo_unreference(kernel->bo);
3212         kernel->bo = NULL;
3213     }
3214
3215     dri_bo_unreference(render_state->vb.vertex_buffer);
3216     render_state->vb.vertex_buffer = NULL;
3217     dri_bo_unreference(render_state->vs.state);
3218     render_state->vs.state = NULL;
3219     dri_bo_unreference(render_state->sf.state);
3220     render_state->sf.state = NULL;
3221     dri_bo_unreference(render_state->wm.sampler);
3222     render_state->wm.sampler = NULL;
3223     dri_bo_unreference(render_state->wm.state);
3224     render_state->wm.state = NULL;
3225     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
3226     dri_bo_unreference(render_state->cc.viewport);
3227     render_state->cc.viewport = NULL;
3228     dri_bo_unreference(render_state->cc.state);
3229     render_state->cc.state = NULL;
3230     dri_bo_unreference(render_state->cc.blend);
3231     render_state->cc.blend = NULL;
3232     dri_bo_unreference(render_state->cc.depth_stencil);
3233     render_state->cc.depth_stencil = NULL;
3234
3235     if (render_state->draw_region) {
3236         dri_bo_unreference(render_state->draw_region->bo);
3237         free(render_state->draw_region);
3238         render_state->draw_region = NULL;
3239     }
3240 }
3241