CHV: Add PCIID placeholders for CHV
[platform/upstream/libva-intel-driver.git] / src / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <math.h>
39
40 #include <va/va_drmcommon.h>
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44 #include "i965_defines.h"
45 #include "i965_drv_video.h"
46 #include "i965_structs.h"
47
48 #include "i965_render.h"
49
50 #define SF_KERNEL_NUM_GRF       16
51 #define SF_MAX_THREADS          1
52
53 static const uint32_t sf_kernel_static[][4] = 
54 {
55 #include "shaders/render/exa_sf.g4b"
56 };
57
58 #define PS_KERNEL_NUM_GRF       48
59 #define PS_MAX_THREADS          32
60
61 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
62
63 static const uint32_t ps_kernel_static[][4] = 
64 {
65 #include "shaders/render/exa_wm_xy.g4b"
66 #include "shaders/render/exa_wm_src_affine.g4b"
67 #include "shaders/render/exa_wm_src_sample_planar.g4b"
68 #include "shaders/render/exa_wm_yuv_color_balance.g4b"
69 #include "shaders/render/exa_wm_yuv_rgb.g4b"
70 #include "shaders/render/exa_wm_write.g4b"
71 };
72 static const uint32_t ps_subpic_kernel_static[][4] = 
73 {
74 #include "shaders/render/exa_wm_xy.g4b"
75 #include "shaders/render/exa_wm_src_affine.g4b"
76 #include "shaders/render/exa_wm_src_sample_argb.g4b"
77 #include "shaders/render/exa_wm_write.g4b"
78 };
79
80 /* On IRONLAKE */
81 static const uint32_t sf_kernel_static_gen5[][4] = 
82 {
83 #include "shaders/render/exa_sf.g4b.gen5"
84 };
85
86 static const uint32_t ps_kernel_static_gen5[][4] = 
87 {
88 #include "shaders/render/exa_wm_xy.g4b.gen5"
89 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
90 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
91 #include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5"
92 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
93 #include "shaders/render/exa_wm_write.g4b.gen5"
94 };
95 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
96 {
97 #include "shaders/render/exa_wm_xy.g4b.gen5"
98 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
99 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
100 #include "shaders/render/exa_wm_write.g4b.gen5"
101 };
102
103 /* programs for Sandybridge */
104 static const uint32_t sf_kernel_static_gen6[][4] = 
105 {
106 };
107
108 static const uint32_t ps_kernel_static_gen6[][4] = {
109 #include "shaders/render/exa_wm_src_affine.g6b"
110 #include "shaders/render/exa_wm_src_sample_planar.g6b"
111 #include "shaders/render/exa_wm_yuv_color_balance.g6b"
112 #include "shaders/render/exa_wm_yuv_rgb.g6b"
113 #include "shaders/render/exa_wm_write.g6b"
114 };
115
116 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
117 #include "shaders/render/exa_wm_src_affine.g6b"
118 #include "shaders/render/exa_wm_src_sample_argb.g6b"
119 #include "shaders/render/exa_wm_write.g6b"
120 };
121
122 /* programs for Ivybridge */
123 static const uint32_t sf_kernel_static_gen7[][4] = 
124 {
125 };
126
127 static const uint32_t ps_kernel_static_gen7[][4] = {
128 #include "shaders/render/exa_wm_src_affine.g7b"
129 #include "shaders/render/exa_wm_src_sample_planar.g7b"
130 #include "shaders/render/exa_wm_yuv_color_balance.g7b"
131 #include "shaders/render/exa_wm_yuv_rgb.g7b"
132 #include "shaders/render/exa_wm_write.g7b"
133 };
134
135 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
136 #include "shaders/render/exa_wm_src_affine.g7b"
137 #include "shaders/render/exa_wm_src_sample_argb.g7b"
138 #include "shaders/render/exa_wm_write.g7b"
139 };
140
141 /* Programs for Haswell */
142 static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
143 #include "shaders/render/exa_wm_src_affine.g7b"
144 #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
145 #include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell"
146 #include "shaders/render/exa_wm_yuv_rgb.g7b"
147 #include "shaders/render/exa_wm_write.g7b"
148 };
149
150
151 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
152
153 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
154 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
155
156 static uint32_t float_to_uint (float f) 
157 {
158     union {
159         uint32_t i; 
160         float f;
161     } x;
162
163     x.f = f;
164     return x.i;
165 }
166
167 enum 
168 {
169     SF_KERNEL = 0,
170     PS_KERNEL,
171     PS_SUBPIC_KERNEL
172 };
173
174 static struct i965_kernel render_kernels_gen4[] = {
175     {
176         "SF",
177         SF_KERNEL,
178         sf_kernel_static,
179         sizeof(sf_kernel_static),
180         NULL
181     },
182     {
183         "PS",
184         PS_KERNEL,
185         ps_kernel_static,
186         sizeof(ps_kernel_static),
187         NULL
188     },
189
190     {
191         "PS_SUBPIC",
192         PS_SUBPIC_KERNEL,
193         ps_subpic_kernel_static,
194         sizeof(ps_subpic_kernel_static),
195         NULL
196     }
197 };
198
199 static struct i965_kernel render_kernels_gen5[] = {
200     {
201         "SF",
202         SF_KERNEL,
203         sf_kernel_static_gen5,
204         sizeof(sf_kernel_static_gen5),
205         NULL
206     },
207     {
208         "PS",
209         PS_KERNEL,
210         ps_kernel_static_gen5,
211         sizeof(ps_kernel_static_gen5),
212         NULL
213     },
214
215     {
216         "PS_SUBPIC",
217         PS_SUBPIC_KERNEL,
218         ps_subpic_kernel_static_gen5,
219         sizeof(ps_subpic_kernel_static_gen5),
220         NULL
221     }
222 };
223
224 static struct i965_kernel render_kernels_gen6[] = {
225     {
226         "SF",
227         SF_KERNEL,
228         sf_kernel_static_gen6,
229         sizeof(sf_kernel_static_gen6),
230         NULL
231     },
232     {
233         "PS",
234         PS_KERNEL,
235         ps_kernel_static_gen6,
236         sizeof(ps_kernel_static_gen6),
237         NULL
238     },
239
240     {
241         "PS_SUBPIC",
242         PS_SUBPIC_KERNEL,
243         ps_subpic_kernel_static_gen6,
244         sizeof(ps_subpic_kernel_static_gen6),
245         NULL
246     }
247 };
248
249 static struct i965_kernel render_kernels_gen7[] = {
250     {
251         "SF",
252         SF_KERNEL,
253         sf_kernel_static_gen7,
254         sizeof(sf_kernel_static_gen7),
255         NULL
256     },
257     {
258         "PS",
259         PS_KERNEL,
260         ps_kernel_static_gen7,
261         sizeof(ps_kernel_static_gen7),
262         NULL
263     },
264
265     {
266         "PS_SUBPIC",
267         PS_SUBPIC_KERNEL,
268         ps_subpic_kernel_static_gen7,
269         sizeof(ps_subpic_kernel_static_gen7),
270         NULL
271     }
272 };
273
274 static struct i965_kernel render_kernels_gen7_haswell[] = {
275     {
276         "SF",
277         SF_KERNEL,
278         sf_kernel_static_gen7,
279         sizeof(sf_kernel_static_gen7),
280         NULL
281     },
282     {
283         "PS",
284         PS_KERNEL,
285         ps_kernel_static_gen7_haswell,
286         sizeof(ps_kernel_static_gen7_haswell),
287         NULL
288     },
289
290     {
291         "PS_SUBPIC",
292         PS_SUBPIC_KERNEL,
293         ps_subpic_kernel_static_gen7,
294         sizeof(ps_subpic_kernel_static_gen7),
295         NULL
296     }
297 };
298
299 #define URB_VS_ENTRIES        8
300 #define URB_VS_ENTRY_SIZE     1
301
302 #define URB_GS_ENTRIES        0
303 #define URB_GS_ENTRY_SIZE     0
304
305 #define URB_CLIP_ENTRIES      0
306 #define URB_CLIP_ENTRY_SIZE   0
307
308 #define URB_SF_ENTRIES        1
309 #define URB_SF_ENTRY_SIZE     2
310
311 #define URB_CS_ENTRIES        4
312 #define URB_CS_ENTRY_SIZE     4
313
314 static float yuv_to_rgb_bt601[3][4] = {
315 {1.164,         0,      1.596,          -0.06275,},
316 {1.164,         -0.392, -0.813,         -0.50196,},
317 {1.164,         2.017,  0,              -0.50196,},
318 };
319
320 static float yuv_to_rgb_bt709[3][4] = {
321 {1.164,         0,      1.793,          -0.06275,},
322 {1.164,         -0.213, -0.533,         -0.50196,},
323 {1.164,         2.112,  0,              -0.50196,},
324 };
325
326 static float yuv_to_rgb_smpte_240[3][4] = {
327 {1.164,         0,      1.794,          -0.06275,},
328 {1.164,         -0.258, -0.5425,        -0.50196,},
329 {1.164,         2.078,  0,              -0.50196,},
330 };
331
332 static void
333 i965_render_vs_unit(VADriverContextP ctx)
334 {
335     struct i965_driver_data *i965 = i965_driver_data(ctx);
336     struct i965_render_state *render_state = &i965->render_state;
337     struct i965_vs_unit_state *vs_state;
338
339     dri_bo_map(render_state->vs.state, 1);
340     assert(render_state->vs.state->virtual);
341     vs_state = render_state->vs.state->virtual;
342     memset(vs_state, 0, sizeof(*vs_state));
343
344     if (IS_IRONLAKE(i965->intel.device_info))
345         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
346     else
347         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
348
349     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
350     vs_state->vs6.vs_enable = 0;
351     vs_state->vs6.vert_cache_disable = 1;
352     
353     dri_bo_unmap(render_state->vs.state);
354 }
355
356 static void
357 i965_render_sf_unit(VADriverContextP ctx)
358 {
359     struct i965_driver_data *i965 = i965_driver_data(ctx);
360     struct i965_render_state *render_state = &i965->render_state;
361     struct i965_sf_unit_state *sf_state;
362
363     dri_bo_map(render_state->sf.state, 1);
364     assert(render_state->sf.state->virtual);
365     sf_state = render_state->sf.state->virtual;
366     memset(sf_state, 0, sizeof(*sf_state));
367
368     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
369     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
370
371     sf_state->sf1.single_program_flow = 1; /* XXX */
372     sf_state->sf1.binding_table_entry_count = 0;
373     sf_state->sf1.thread_priority = 0;
374     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
375     sf_state->sf1.illegal_op_exception_enable = 1;
376     sf_state->sf1.mask_stack_exception_enable = 1;
377     sf_state->sf1.sw_exception_enable = 1;
378
379     /* scratch space is not used in our kernel */
380     sf_state->thread2.per_thread_scratch_space = 0;
381     sf_state->thread2.scratch_space_base_pointer = 0;
382
383     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
384     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
385     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
386     sf_state->thread3.urb_entry_read_offset = 0;
387     sf_state->thread3.dispatch_grf_start_reg = 3;
388
389     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
390     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
391     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
392     sf_state->thread4.stats_enable = 1;
393
394     sf_state->sf5.viewport_transform = 0; /* skip viewport */
395
396     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
397     sf_state->sf6.scissor = 0;
398
399     sf_state->sf7.trifan_pv = 2;
400
401     sf_state->sf6.dest_org_vbias = 0x8;
402     sf_state->sf6.dest_org_hbias = 0x8;
403
404     dri_bo_emit_reloc(render_state->sf.state,
405                       I915_GEM_DOMAIN_INSTRUCTION, 0,
406                       sf_state->thread0.grf_reg_count << 1,
407                       offsetof(struct i965_sf_unit_state, thread0),
408                       render_state->render_kernels[SF_KERNEL].bo);
409
410     dri_bo_unmap(render_state->sf.state);
411 }
412
413 static void 
414 i965_render_sampler(VADriverContextP ctx)
415 {
416     struct i965_driver_data *i965 = i965_driver_data(ctx);
417     struct i965_render_state *render_state = &i965->render_state;
418     struct i965_sampler_state *sampler_state;
419     int i;
420     
421     assert(render_state->wm.sampler_count > 0);
422     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
423
424     dri_bo_map(render_state->wm.sampler, 1);
425     assert(render_state->wm.sampler->virtual);
426     sampler_state = render_state->wm.sampler->virtual;
427     for (i = 0; i < render_state->wm.sampler_count; i++) {
428         memset(sampler_state, 0, sizeof(*sampler_state));
429         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
430         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
431         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
432         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
433         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
434         sampler_state++;
435     }
436
437     dri_bo_unmap(render_state->wm.sampler);
438 }
439 static void
440 i965_subpic_render_wm_unit(VADriverContextP ctx)
441 {
442     struct i965_driver_data *i965 = i965_driver_data(ctx);
443     struct i965_render_state *render_state = &i965->render_state;
444     struct i965_wm_unit_state *wm_state;
445
446     assert(render_state->wm.sampler);
447
448     dri_bo_map(render_state->wm.state, 1);
449     assert(render_state->wm.state->virtual);
450     wm_state = render_state->wm.state->virtual;
451     memset(wm_state, 0, sizeof(*wm_state));
452
453     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
454     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
455
456     wm_state->thread1.single_program_flow = 1; /* XXX */
457
458     if (IS_IRONLAKE(i965->intel.device_info))
459         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
460     else
461         wm_state->thread1.binding_table_entry_count = 7;
462
463     wm_state->thread2.scratch_space_base_pointer = 0;
464     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
465
466     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
467     wm_state->thread3.const_urb_entry_read_length = 4;
468     wm_state->thread3.const_urb_entry_read_offset = 0;
469     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
470     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
471
472     wm_state->wm4.stats_enable = 0;
473     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
474
475     if (IS_IRONLAKE(i965->intel.device_info)) {
476         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
477     } else {
478         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
479     }
480
481     wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1;
482     wm_state->wm5.thread_dispatch_enable = 1;
483     wm_state->wm5.enable_16_pix = 1;
484     wm_state->wm5.enable_8_pix = 0;
485     wm_state->wm5.early_depth_test = 1;
486
487     dri_bo_emit_reloc(render_state->wm.state,
488                       I915_GEM_DOMAIN_INSTRUCTION, 0,
489                       wm_state->thread0.grf_reg_count << 1,
490                       offsetof(struct i965_wm_unit_state, thread0),
491                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
492
493     dri_bo_emit_reloc(render_state->wm.state,
494                       I915_GEM_DOMAIN_INSTRUCTION, 0,
495                       wm_state->wm4.sampler_count << 2,
496                       offsetof(struct i965_wm_unit_state, wm4),
497                       render_state->wm.sampler);
498
499     dri_bo_unmap(render_state->wm.state);
500 }
501
502
503 static void
504 i965_render_wm_unit(VADriverContextP ctx)
505 {
506     struct i965_driver_data *i965 = i965_driver_data(ctx);
507     struct i965_render_state *render_state = &i965->render_state;
508     struct i965_wm_unit_state *wm_state;
509
510     assert(render_state->wm.sampler);
511
512     dri_bo_map(render_state->wm.state, 1);
513     assert(render_state->wm.state->virtual);
514     wm_state = render_state->wm.state->virtual;
515     memset(wm_state, 0, sizeof(*wm_state));
516
517     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
518     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
519
520     wm_state->thread1.single_program_flow = 1; /* XXX */
521
522     if (IS_IRONLAKE(i965->intel.device_info))
523         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
524     else
525         wm_state->thread1.binding_table_entry_count = 7;
526
527     wm_state->thread2.scratch_space_base_pointer = 0;
528     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
529
530     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
531     wm_state->thread3.const_urb_entry_read_length = 4;
532     wm_state->thread3.const_urb_entry_read_offset = 0;
533     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
534     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
535
536     wm_state->wm4.stats_enable = 0;
537     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
538
539     if (IS_IRONLAKE(i965->intel.device_info)) {
540         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
541     } else {
542         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
543     }
544
545     wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1;
546     wm_state->wm5.thread_dispatch_enable = 1;
547     wm_state->wm5.enable_16_pix = 1;
548     wm_state->wm5.enable_8_pix = 0;
549     wm_state->wm5.early_depth_test = 1;
550
551     dri_bo_emit_reloc(render_state->wm.state,
552                       I915_GEM_DOMAIN_INSTRUCTION, 0,
553                       wm_state->thread0.grf_reg_count << 1,
554                       offsetof(struct i965_wm_unit_state, thread0),
555                       render_state->render_kernels[PS_KERNEL].bo);
556
557     dri_bo_emit_reloc(render_state->wm.state,
558                       I915_GEM_DOMAIN_INSTRUCTION, 0,
559                       wm_state->wm4.sampler_count << 2,
560                       offsetof(struct i965_wm_unit_state, wm4),
561                       render_state->wm.sampler);
562
563     dri_bo_unmap(render_state->wm.state);
564 }
565
566 static void 
567 i965_render_cc_viewport(VADriverContextP ctx)
568 {
569     struct i965_driver_data *i965 = i965_driver_data(ctx);
570     struct i965_render_state *render_state = &i965->render_state;
571     struct i965_cc_viewport *cc_viewport;
572
573     dri_bo_map(render_state->cc.viewport, 1);
574     assert(render_state->cc.viewport->virtual);
575     cc_viewport = render_state->cc.viewport->virtual;
576     memset(cc_viewport, 0, sizeof(*cc_viewport));
577     
578     cc_viewport->min_depth = -1.e35;
579     cc_viewport->max_depth = 1.e35;
580
581     dri_bo_unmap(render_state->cc.viewport);
582 }
583
584 static void 
585 i965_subpic_render_cc_unit(VADriverContextP ctx)
586 {
587     struct i965_driver_data *i965 = i965_driver_data(ctx);
588     struct i965_render_state *render_state = &i965->render_state;
589     struct i965_cc_unit_state *cc_state;
590
591     assert(render_state->cc.viewport);
592
593     dri_bo_map(render_state->cc.state, 1);
594     assert(render_state->cc.state->virtual);
595     cc_state = render_state->cc.state->virtual;
596     memset(cc_state, 0, sizeof(*cc_state));
597
598     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
599     cc_state->cc2.depth_test = 0;       /* disable depth test */
600     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
601     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
602     cc_state->cc3.blend_enable = 1;     /* enable color blend */
603     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
604     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
605     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
606     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
607
608     cc_state->cc5.dither_enable = 0;    /* disable dither */
609     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
610     cc_state->cc5.statistics_enable = 1;
611     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
612     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
613     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
614
615     cc_state->cc6.clamp_post_alpha_blend = 0; 
616     cc_state->cc6.clamp_pre_alpha_blend  =0; 
617     
618     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
619     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
620     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
621     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
622    
623     /*alpha test reference*/
624     cc_state->cc7.alpha_ref.f =0.0 ;
625
626
627     dri_bo_emit_reloc(render_state->cc.state,
628                       I915_GEM_DOMAIN_INSTRUCTION, 0,
629                       0,
630                       offsetof(struct i965_cc_unit_state, cc4),
631                       render_state->cc.viewport);
632
633     dri_bo_unmap(render_state->cc.state);
634 }
635
636
637 static void 
638 i965_render_cc_unit(VADriverContextP ctx)
639 {
640     struct i965_driver_data *i965 = i965_driver_data(ctx);
641     struct i965_render_state *render_state = &i965->render_state;
642     struct i965_cc_unit_state *cc_state;
643
644     assert(render_state->cc.viewport);
645
646     dri_bo_map(render_state->cc.state, 1);
647     assert(render_state->cc.state->virtual);
648     cc_state = render_state->cc.state->virtual;
649     memset(cc_state, 0, sizeof(*cc_state));
650
651     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
652     cc_state->cc2.depth_test = 0;       /* disable depth test */
653     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
654     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
655     cc_state->cc3.blend_enable = 0;     /* disable color blend */
656     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
657     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
658
659     cc_state->cc5.dither_enable = 0;    /* disable dither */
660     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
661     cc_state->cc5.statistics_enable = 1;
662     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
663     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
664     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
665
666     dri_bo_emit_reloc(render_state->cc.state,
667                       I915_GEM_DOMAIN_INSTRUCTION, 0,
668                       0,
669                       offsetof(struct i965_cc_unit_state, cc4),
670                       render_state->cc.viewport);
671
672     dri_bo_unmap(render_state->cc.state);
673 }
674
675 static void
676 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
677 {
678     switch (tiling) {
679     case I915_TILING_NONE:
680         ss->ss3.tiled_surface = 0;
681         ss->ss3.tile_walk = 0;
682         break;
683     case I915_TILING_X:
684         ss->ss3.tiled_surface = 1;
685         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
686         break;
687     case I915_TILING_Y:
688         ss->ss3.tiled_surface = 1;
689         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
690         break;
691     }
692 }
693
694 static void
695 i965_render_set_surface_state(
696     struct i965_surface_state *ss,
697     dri_bo                    *bo,
698     unsigned long              offset,
699     unsigned int               width,
700     unsigned int               height,
701     unsigned int               pitch,
702     unsigned int               format,
703     unsigned int               flags
704 )
705 {
706     unsigned int tiling;
707     unsigned int swizzle;
708
709     memset(ss, 0, sizeof(*ss));
710
711     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
712     case I965_PP_FLAG_BOTTOM_FIELD:
713         ss->ss0.vert_line_stride_ofs = 1;
714         /* fall-through */
715     case I965_PP_FLAG_TOP_FIELD:
716         ss->ss0.vert_line_stride = 1;
717         height /= 2;
718         break;
719     }
720
721     ss->ss0.surface_type = I965_SURFACE_2D;
722     ss->ss0.surface_format = format;
723     ss->ss0.color_blend = 1;
724
725     ss->ss1.base_addr = bo->offset + offset;
726
727     ss->ss2.width = width - 1;
728     ss->ss2.height = height - 1;
729
730     ss->ss3.pitch = pitch - 1;
731
732     dri_bo_get_tiling(bo, &tiling, &swizzle);
733     i965_render_set_surface_tiling(ss, tiling);
734 }
735
736 static void
737 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
738 {
739    switch (tiling) {
740    case I915_TILING_NONE:
741       ss->ss0.tiled_surface = 0;
742       ss->ss0.tile_walk = 0;
743       break;
744    case I915_TILING_X:
745       ss->ss0.tiled_surface = 1;
746       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
747       break;
748    case I915_TILING_Y:
749       ss->ss0.tiled_surface = 1;
750       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
751       break;
752    }
753 }
754
755 /* Set "Shader Channel Select" */
756 void
757 gen7_render_set_surface_scs(struct gen7_surface_state *ss)
758 {
759     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
760     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
761     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
762     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
763 }
764
765 static void
766 gen7_render_set_surface_state(
767     struct gen7_surface_state *ss,
768     dri_bo                    *bo,
769     unsigned long              offset,
770     int                        width,
771     int                        height,
772     int                        pitch,
773     int                        format,
774     unsigned int               flags
775 )
776 {
777     unsigned int tiling;
778     unsigned int swizzle;
779
780     memset(ss, 0, sizeof(*ss));
781
782     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
783     case I965_PP_FLAG_BOTTOM_FIELD:
784         ss->ss0.vert_line_stride_ofs = 1;
785         /* fall-through */
786     case I965_PP_FLAG_TOP_FIELD:
787         ss->ss0.vert_line_stride = 1;
788         height /= 2;
789         break;
790     }
791
792     ss->ss0.surface_type = I965_SURFACE_2D;
793     ss->ss0.surface_format = format;
794
795     ss->ss1.base_addr = bo->offset + offset;
796
797     ss->ss2.width = width - 1;
798     ss->ss2.height = height - 1;
799
800     ss->ss3.pitch = pitch - 1;
801
802     dri_bo_get_tiling(bo, &tiling, &swizzle);
803     gen7_render_set_surface_tiling(ss, tiling);
804 }
805
806
807 static void
808 i965_render_src_surface_state(
809     VADriverContextP ctx, 
810     int              index,
811     dri_bo          *region,
812     unsigned long    offset,
813     int              w,
814     int              h,
815     int              pitch,
816     int              format,
817     unsigned int     flags
818 )
819 {
820     struct i965_driver_data *i965 = i965_driver_data(ctx);  
821     struct i965_render_state *render_state = &i965->render_state;
822     void *ss;
823     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
824
825     assert(index < MAX_RENDER_SURFACES);
826
827     dri_bo_map(ss_bo, 1);
828     assert(ss_bo->virtual);
829     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
830
831     if (IS_GEN7(i965->intel.device_info)) {
832         gen7_render_set_surface_state(ss,
833                                       region, offset,
834                                       w, h,
835                                       pitch, format, flags);
836         if (IS_HASWELL(i965->intel.device_info))
837             gen7_render_set_surface_scs(ss);
838         dri_bo_emit_reloc(ss_bo,
839                           I915_GEM_DOMAIN_SAMPLER, 0,
840                           offset,
841                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
842                           region);
843     } else {
844         i965_render_set_surface_state(ss,
845                                       region, offset,
846                                       w, h,
847                                       pitch, format, flags);
848         dri_bo_emit_reloc(ss_bo,
849                           I915_GEM_DOMAIN_SAMPLER, 0,
850                           offset,
851                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
852                           region);
853     }
854
855     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
856     dri_bo_unmap(ss_bo);
857     render_state->wm.sampler_count++;
858 }
859
860 static void
861 i965_render_src_surfaces_state(
862     VADriverContextP ctx,
863     struct object_surface *obj_surface,
864     unsigned int     flags
865 )
866 {
867     int region_pitch;
868     int rw, rh;
869     dri_bo *region;
870
871     region_pitch = obj_surface->width;
872     rw = obj_surface->orig_width;
873     rh = obj_surface->orig_height;
874     region = obj_surface->bo;
875
876     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
877     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
878
879     if (obj_surface->fourcc == VA_FOURCC_Y800) /* single plane for grayscale */
880         return;
881
882     if (obj_surface->fourcc == VA_FOURCC_NV12) {
883         i965_render_src_surface_state(ctx, 3, region,
884                                       region_pitch * obj_surface->y_cb_offset,
885                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
886                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
887         i965_render_src_surface_state(ctx, 4, region,
888                                       region_pitch * obj_surface->y_cb_offset,
889                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
890                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
891     } else {
892         i965_render_src_surface_state(ctx, 3, region,
893                                       region_pitch * obj_surface->y_cb_offset,
894                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
895                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
896         i965_render_src_surface_state(ctx, 4, region,
897                                       region_pitch * obj_surface->y_cb_offset,
898                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
899                                       I965_SURFACEFORMAT_R8_UNORM, flags);
900         i965_render_src_surface_state(ctx, 5, region,
901                                       region_pitch * obj_surface->y_cr_offset,
902                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
903                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
904         i965_render_src_surface_state(ctx, 6, region,
905                                       region_pitch * obj_surface->y_cr_offset,
906                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
907                                       I965_SURFACEFORMAT_R8_UNORM, flags);
908     }
909 }
910
911 static void
912 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
913                                       struct object_surface *obj_surface)
914 {
915     dri_bo *subpic_region;
916     unsigned int index = obj_surface->subpic_render_idx;
917     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
918     struct object_image *obj_image = obj_subpic->obj_image;
919
920     assert(obj_surface);
921     assert(obj_surface->bo);
922     subpic_region = obj_image->bo;
923     /*subpicture surface*/
924     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
925     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
926 }
927
928 static void
929 i965_render_dest_surface_state(VADriverContextP ctx, int index)
930 {
931     struct i965_driver_data *i965 = i965_driver_data(ctx);  
932     struct i965_render_state *render_state = &i965->render_state;
933     struct intel_region *dest_region = render_state->draw_region;
934     void *ss;
935     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
936     int format;
937     assert(index < MAX_RENDER_SURFACES);
938
939     if (dest_region->cpp == 2) {
940         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
941     } else {
942         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
943     }
944
945     dri_bo_map(ss_bo, 1);
946     assert(ss_bo->virtual);
947     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
948
949     if (IS_GEN7(i965->intel.device_info)) {
950         gen7_render_set_surface_state(ss,
951                                       dest_region->bo, 0,
952                                       dest_region->width, dest_region->height,
953                                       dest_region->pitch, format, 0);
954         if (IS_HASWELL(i965->intel.device_info))
955             gen7_render_set_surface_scs(ss);
956         dri_bo_emit_reloc(ss_bo,
957                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
958                           0,
959                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
960                           dest_region->bo);
961     } else {
962         i965_render_set_surface_state(ss,
963                                       dest_region->bo, 0,
964                                       dest_region->width, dest_region->height,
965                                       dest_region->pitch, format, 0);
966         dri_bo_emit_reloc(ss_bo,
967                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
968                           0,
969                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
970                           dest_region->bo);
971     }
972
973     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
974     dri_bo_unmap(ss_bo);
975 }
976
977 static void
978 i965_fill_vertex_buffer(
979     VADriverContextP ctx,
980     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
981     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
982 )
983 {
984     struct i965_driver_data * const i965 = i965_driver_data(ctx);
985     float vb[12];
986
987     enum { X1, Y1, X2, Y2 };
988
989     static const unsigned int g_rotation_indices[][6] = {
990         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
991         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
992         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
993         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
994     };
995
996     const unsigned int * const rotation_indices =
997         g_rotation_indices[i965->rotation_attrib->value];
998
999     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
1000     vb[1]  = tex_coords[rotation_indices[1]];
1001     vb[2]  = vid_coords[X2];
1002     vb[3]  = vid_coords[Y2];
1003
1004     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
1005     vb[5]  = tex_coords[rotation_indices[3]];
1006     vb[6]  = vid_coords[X1];
1007     vb[7]  = vid_coords[Y2];
1008
1009     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
1010     vb[9]  = tex_coords[rotation_indices[5]];
1011     vb[10] = vid_coords[X1];
1012     vb[11] = vid_coords[Y1];
1013
1014     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
1015 }
1016
1017 static void 
1018 i965_subpic_render_upload_vertex(VADriverContextP ctx,
1019                                  struct object_surface *obj_surface,
1020                                  const VARectangle *output_rect)
1021 {    
1022     unsigned int index = obj_surface->subpic_render_idx;
1023     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
1024     float tex_coords[4], vid_coords[4];
1025     VARectangle dst_rect;
1026
1027     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
1028         dst_rect = obj_subpic->dst_rect;
1029     else {
1030         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
1031         const float sy  = (float)output_rect->height / obj_surface->orig_height;
1032         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
1033         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
1034         dst_rect.width  = sx * obj_subpic->dst_rect.width;
1035         dst_rect.height = sy * obj_subpic->dst_rect.height;
1036     }
1037
1038     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
1039     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
1040     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
1041     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
1042
1043     vid_coords[0] = dst_rect.x;
1044     vid_coords[1] = dst_rect.y;
1045     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
1046     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
1047
1048     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1049 }
1050
1051 static void 
1052 i965_render_upload_vertex(
1053     VADriverContextP   ctx,
1054     struct object_surface *obj_surface,
1055     const VARectangle *src_rect,
1056     const VARectangle *dst_rect
1057 )
1058 {
1059     struct i965_driver_data *i965 = i965_driver_data(ctx);
1060     struct i965_render_state *render_state = &i965->render_state;
1061     struct intel_region *dest_region = render_state->draw_region;
1062     float tex_coords[4], vid_coords[4];
1063     int width, height;
1064
1065     width  = obj_surface->orig_width;
1066     height = obj_surface->orig_height;
1067
1068     tex_coords[0] = (float)src_rect->x / width;
1069     tex_coords[1] = (float)src_rect->y / height;
1070     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
1071     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
1072
1073     vid_coords[0] = dest_region->x + dst_rect->x;
1074     vid_coords[1] = dest_region->y + dst_rect->y;
1075     vid_coords[2] = vid_coords[0] + dst_rect->width;
1076     vid_coords[3] = vid_coords[1] + dst_rect->height;
1077
1078     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1079 }
1080
1081 #define PI  3.1415926
1082
1083 static void
1084 i965_render_upload_constants(VADriverContextP ctx,
1085                              struct object_surface *obj_surface,
1086                              unsigned int flags)
1087 {
1088     struct i965_driver_data *i965 = i965_driver_data(ctx);
1089     struct i965_render_state *render_state = &i965->render_state;
1090     unsigned short *constant_buffer;
1091     float *color_balance_base;
1092     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
1093     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
1094     float hue = (float)i965->hue_attrib->value / 180 * PI;
1095     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
1096     float *yuv_to_rgb;
1097     unsigned int color_flag;
1098
1099     dri_bo_map(render_state->curbe.bo, 1);
1100     assert(render_state->curbe.bo->virtual);
1101     constant_buffer = render_state->curbe.bo->virtual;
1102
1103     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
1104         assert(obj_surface->fourcc == VA_FOURCC_Y800);
1105
1106         constant_buffer[0] = 2;
1107     } else {
1108         if (obj_surface->fourcc == VA_FOURCC_NV12)
1109             constant_buffer[0] = 1;
1110         else
1111             constant_buffer[0] = 0;
1112     }
1113
1114     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
1115         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
1116         i965->hue_attrib->value == DEFAULT_HUE &&
1117         i965->saturation_attrib->value == DEFAULT_SATURATION)
1118         constant_buffer[1] = 1; /* skip color balance transformation */
1119     else
1120         constant_buffer[1] = 0;
1121
1122     color_balance_base = (float *)constant_buffer + 4;
1123     *color_balance_base++ = contrast;
1124     *color_balance_base++ = brightness;
1125     *color_balance_base++ = cos(hue) * contrast * saturation;
1126     *color_balance_base++ = sin(hue) * contrast * saturation;
1127
1128     color_flag = flags & VA_SRC_COLOR_MASK;
1129     yuv_to_rgb = (float *)constant_buffer + 8;
1130     if (color_flag == VA_SRC_BT709)
1131         memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709));
1132     else if (color_flag == VA_SRC_SMPTE_240)
1133         memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240));
1134     else
1135         memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601));
1136
1137     dri_bo_unmap(render_state->curbe.bo);
1138 }
1139
1140 static void
1141 i965_subpic_render_upload_constants(VADriverContextP ctx,
1142                                     struct object_surface *obj_surface)
1143 {
1144     struct i965_driver_data *i965 = i965_driver_data(ctx);
1145     struct i965_render_state *render_state = &i965->render_state;
1146     float *constant_buffer;
1147     float global_alpha = 1.0;
1148     unsigned int index = obj_surface->subpic_render_idx;
1149     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1150     
1151     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1152         global_alpha = obj_subpic->global_alpha;
1153     }
1154
1155     dri_bo_map(render_state->curbe.bo, 1);
1156
1157     assert(render_state->curbe.bo->virtual);
1158     constant_buffer = render_state->curbe.bo->virtual;
1159     *constant_buffer = global_alpha;
1160
1161     dri_bo_unmap(render_state->curbe.bo);
1162 }
1163  
1164 static void
1165 i965_surface_render_state_setup(
1166     VADriverContextP   ctx,
1167     struct object_surface *obj_surface,
1168     const VARectangle *src_rect,
1169     const VARectangle *dst_rect,
1170     unsigned int       flags
1171 )
1172 {
1173     i965_render_vs_unit(ctx);
1174     i965_render_sf_unit(ctx);
1175     i965_render_dest_surface_state(ctx, 0);
1176     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1177     i965_render_sampler(ctx);
1178     i965_render_wm_unit(ctx);
1179     i965_render_cc_viewport(ctx);
1180     i965_render_cc_unit(ctx);
1181     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1182     i965_render_upload_constants(ctx, obj_surface, flags);
1183 }
1184
1185 static void
1186 i965_subpic_render_state_setup(
1187     VADriverContextP   ctx,
1188     struct object_surface *obj_surface,
1189     const VARectangle *src_rect,
1190     const VARectangle *dst_rect
1191 )
1192 {
1193     i965_render_vs_unit(ctx);
1194     i965_render_sf_unit(ctx);
1195     i965_render_dest_surface_state(ctx, 0);
1196     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
1197     i965_render_sampler(ctx);
1198     i965_subpic_render_wm_unit(ctx);
1199     i965_render_cc_viewport(ctx);
1200     i965_subpic_render_cc_unit(ctx);
1201     i965_subpic_render_upload_constants(ctx, obj_surface);
1202     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1203 }
1204
1205
1206 static void
1207 i965_render_pipeline_select(VADriverContextP ctx)
1208 {
1209     struct i965_driver_data *i965 = i965_driver_data(ctx);
1210     struct intel_batchbuffer *batch = i965->batch;
1211  
1212     BEGIN_BATCH(batch, 1);
1213     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1214     ADVANCE_BATCH(batch);
1215 }
1216
1217 static void
1218 i965_render_state_sip(VADriverContextP ctx)
1219 {
1220     struct i965_driver_data *i965 = i965_driver_data(ctx);
1221     struct intel_batchbuffer *batch = i965->batch;
1222
1223     BEGIN_BATCH(batch, 2);
1224     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1225     OUT_BATCH(batch, 0);
1226     ADVANCE_BATCH(batch);
1227 }
1228
1229 static void
1230 i965_render_state_base_address(VADriverContextP ctx)
1231 {
1232     struct i965_driver_data *i965 = i965_driver_data(ctx);
1233     struct intel_batchbuffer *batch = i965->batch;
1234     struct i965_render_state *render_state = &i965->render_state;
1235
1236     if (IS_IRONLAKE(i965->intel.device_info)) {
1237         BEGIN_BATCH(batch, 8);
1238         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1239         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1240         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1241         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1242         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1243         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1244         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1245         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1246         ADVANCE_BATCH(batch);
1247     } else {
1248         BEGIN_BATCH(batch, 6);
1249         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1250         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1251         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1252         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1253         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1254         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1255         ADVANCE_BATCH(batch);
1256     }
1257 }
1258
1259 static void
1260 i965_render_binding_table_pointers(VADriverContextP ctx)
1261 {
1262     struct i965_driver_data *i965 = i965_driver_data(ctx);
1263     struct intel_batchbuffer *batch = i965->batch;
1264
1265     BEGIN_BATCH(batch, 6);
1266     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1267     OUT_BATCH(batch, 0); /* vs */
1268     OUT_BATCH(batch, 0); /* gs */
1269     OUT_BATCH(batch, 0); /* clip */
1270     OUT_BATCH(batch, 0); /* sf */
1271     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1272     ADVANCE_BATCH(batch);
1273 }
1274
1275 static void 
1276 i965_render_constant_color(VADriverContextP ctx)
1277 {
1278     struct i965_driver_data *i965 = i965_driver_data(ctx);
1279     struct intel_batchbuffer *batch = i965->batch;
1280
1281     BEGIN_BATCH(batch, 5);
1282     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1283     OUT_BATCH(batch, float_to_uint(1.0));
1284     OUT_BATCH(batch, float_to_uint(0.0));
1285     OUT_BATCH(batch, float_to_uint(1.0));
1286     OUT_BATCH(batch, float_to_uint(1.0));
1287     ADVANCE_BATCH(batch);
1288 }
1289
1290 static void
1291 i965_render_pipelined_pointers(VADriverContextP ctx)
1292 {
1293     struct i965_driver_data *i965 = i965_driver_data(ctx);
1294     struct intel_batchbuffer *batch = i965->batch;
1295     struct i965_render_state *render_state = &i965->render_state;
1296
1297     BEGIN_BATCH(batch, 7);
1298     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1299     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1300     OUT_BATCH(batch, 0);  /* disable GS */
1301     OUT_BATCH(batch, 0);  /* disable CLIP */
1302     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1303     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1304     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1305     ADVANCE_BATCH(batch);
1306 }
1307
1308 static void
1309 i965_render_urb_layout(VADriverContextP ctx)
1310 {
1311     struct i965_driver_data *i965 = i965_driver_data(ctx);
1312     struct intel_batchbuffer *batch = i965->batch;
1313     int urb_vs_start, urb_vs_size;
1314     int urb_gs_start, urb_gs_size;
1315     int urb_clip_start, urb_clip_size;
1316     int urb_sf_start, urb_sf_size;
1317     int urb_cs_start, urb_cs_size;
1318
1319     urb_vs_start = 0;
1320     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1321     urb_gs_start = urb_vs_start + urb_vs_size;
1322     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1323     urb_clip_start = urb_gs_start + urb_gs_size;
1324     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1325     urb_sf_start = urb_clip_start + urb_clip_size;
1326     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1327     urb_cs_start = urb_sf_start + urb_sf_size;
1328     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1329
1330     BEGIN_BATCH(batch, 3);
1331     OUT_BATCH(batch, 
1332               CMD_URB_FENCE |
1333               UF0_CS_REALLOC |
1334               UF0_SF_REALLOC |
1335               UF0_CLIP_REALLOC |
1336               UF0_GS_REALLOC |
1337               UF0_VS_REALLOC |
1338               1);
1339     OUT_BATCH(batch, 
1340               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1341               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1342               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1343     OUT_BATCH(batch,
1344               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1345               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1346     ADVANCE_BATCH(batch);
1347 }
1348
1349 static void 
1350 i965_render_cs_urb_layout(VADriverContextP ctx)
1351 {
1352     struct i965_driver_data *i965 = i965_driver_data(ctx);
1353     struct intel_batchbuffer *batch = i965->batch;
1354
1355     BEGIN_BATCH(batch, 2);
1356     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1357     OUT_BATCH(batch,
1358               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1359               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1360     ADVANCE_BATCH(batch);
1361 }
1362
1363 static void
1364 i965_render_constant_buffer(VADriverContextP ctx)
1365 {
1366     struct i965_driver_data *i965 = i965_driver_data(ctx);
1367     struct intel_batchbuffer *batch = i965->batch;
1368     struct i965_render_state *render_state = &i965->render_state;
1369
1370     BEGIN_BATCH(batch, 2);
1371     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1372     OUT_RELOC(batch, render_state->curbe.bo,
1373               I915_GEM_DOMAIN_INSTRUCTION, 0,
1374               URB_CS_ENTRY_SIZE - 1);
1375     ADVANCE_BATCH(batch);    
1376 }
1377
1378 static void
1379 i965_render_drawing_rectangle(VADriverContextP ctx)
1380 {
1381     struct i965_driver_data *i965 = i965_driver_data(ctx);
1382     struct intel_batchbuffer *batch = i965->batch;
1383     struct i965_render_state *render_state = &i965->render_state;
1384     struct intel_region *dest_region = render_state->draw_region;
1385
1386     BEGIN_BATCH(batch, 4);
1387     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1388     OUT_BATCH(batch, 0x00000000);
1389     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1390     OUT_BATCH(batch, 0x00000000);         
1391     ADVANCE_BATCH(batch);
1392 }
1393
1394 static void
1395 i965_render_vertex_elements(VADriverContextP ctx)
1396 {
1397     struct i965_driver_data *i965 = i965_driver_data(ctx);
1398     struct intel_batchbuffer *batch = i965->batch;
1399
1400     if (IS_IRONLAKE(i965->intel.device_info)) {
1401         BEGIN_BATCH(batch, 5);
1402         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1403         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1404         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1405                   VE0_VALID |
1406                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1407                   (0 << VE0_OFFSET_SHIFT));
1408         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1409                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1410                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1411                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1412         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1413         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1414                   VE0_VALID |
1415                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1416                   (8 << VE0_OFFSET_SHIFT));
1417         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1418                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1419                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1420                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1421         ADVANCE_BATCH(batch);
1422     } else {
1423         BEGIN_BATCH(batch, 5);
1424         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1425         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1426         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1427                   VE0_VALID |
1428                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1429                   (0 << VE0_OFFSET_SHIFT));
1430         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1431                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1432                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1433                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1434                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1435         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1436         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1437                   VE0_VALID |
1438                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1439                   (8 << VE0_OFFSET_SHIFT));
1440         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1441                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1442                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1443                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1444                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1445         ADVANCE_BATCH(batch);
1446     }
1447 }
1448
1449 static void
1450 i965_render_upload_image_palette(
1451     VADriverContextP ctx,
1452     struct object_image *obj_image,
1453     unsigned int     alpha
1454 )
1455 {
1456     struct i965_driver_data *i965 = i965_driver_data(ctx);
1457     struct intel_batchbuffer *batch = i965->batch;
1458     unsigned int i;
1459
1460     assert(obj_image);
1461
1462     if (!obj_image)
1463         return;
1464
1465     if (obj_image->image.num_palette_entries == 0)
1466         return;
1467
1468     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1469     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1470     /*fill palette*/
1471     //int32_t out[16]; //0-23:color 23-31:alpha
1472     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1473         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1474     ADVANCE_BATCH(batch);
1475 }
1476
1477 static void
1478 i965_render_startup(VADriverContextP ctx)
1479 {
1480     struct i965_driver_data *i965 = i965_driver_data(ctx);
1481     struct intel_batchbuffer *batch = i965->batch;
1482     struct i965_render_state *render_state = &i965->render_state;
1483
1484     BEGIN_BATCH(batch, 11);
1485     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1486     OUT_BATCH(batch, 
1487               (0 << VB0_BUFFER_INDEX_SHIFT) |
1488               VB0_VERTEXDATA |
1489               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1490     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1491
1492     if (IS_IRONLAKE(i965->intel.device_info))
1493         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1494     else
1495         OUT_BATCH(batch, 3);
1496
1497     OUT_BATCH(batch, 0);
1498
1499     OUT_BATCH(batch, 
1500               CMD_3DPRIMITIVE |
1501               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1502               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1503               (0 << 9) |
1504               4);
1505     OUT_BATCH(batch, 3); /* vertex count per instance */
1506     OUT_BATCH(batch, 0); /* start vertex offset */
1507     OUT_BATCH(batch, 1); /* single instance */
1508     OUT_BATCH(batch, 0); /* start instance location */
1509     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1510     ADVANCE_BATCH(batch);
1511 }
1512
1513 static void 
1514 i965_clear_dest_region(VADriverContextP ctx)
1515 {
1516     struct i965_driver_data *i965 = i965_driver_data(ctx);
1517     struct intel_batchbuffer *batch = i965->batch;
1518     struct i965_render_state *render_state = &i965->render_state;
1519     struct intel_region *dest_region = render_state->draw_region;
1520     unsigned int blt_cmd, br13;
1521     int pitch;
1522
1523     blt_cmd = XY_COLOR_BLT_CMD;
1524     br13 = 0xf0 << 16;
1525     pitch = dest_region->pitch;
1526
1527     if (dest_region->cpp == 4) {
1528         br13 |= BR13_8888;
1529         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1530     } else {
1531         assert(dest_region->cpp == 2);
1532         br13 |= BR13_565;
1533     }
1534
1535     if (dest_region->tiling != I915_TILING_NONE) {
1536         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1537         pitch /= 4;
1538     }
1539
1540     br13 |= pitch;
1541
1542     if (IS_GEN6(i965->intel.device_info) ||
1543         IS_GEN7(i965->intel.device_info)) {
1544         intel_batchbuffer_start_atomic_blt(batch, 24);
1545         BEGIN_BLT_BATCH(batch, 6);
1546     } else {
1547         intel_batchbuffer_start_atomic(batch, 24);
1548         BEGIN_BATCH(batch, 6);
1549     }
1550
1551     OUT_BATCH(batch, blt_cmd);
1552     OUT_BATCH(batch, br13);
1553     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1554     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1555               (dest_region->x + dest_region->width));
1556     OUT_RELOC(batch, dest_region->bo, 
1557               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1558               0);
1559     OUT_BATCH(batch, 0x0);
1560     ADVANCE_BATCH(batch);
1561     intel_batchbuffer_end_atomic(batch);
1562 }
1563
1564 static void
1565 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1566 {
1567     struct i965_driver_data *i965 = i965_driver_data(ctx);
1568     struct intel_batchbuffer *batch = i965->batch;
1569
1570     i965_clear_dest_region(ctx);
1571     intel_batchbuffer_start_atomic(batch, 0x1000);
1572     intel_batchbuffer_emit_mi_flush(batch);
1573     i965_render_pipeline_select(ctx);
1574     i965_render_state_sip(ctx);
1575     i965_render_state_base_address(ctx);
1576     i965_render_binding_table_pointers(ctx);
1577     i965_render_constant_color(ctx);
1578     i965_render_pipelined_pointers(ctx);
1579     i965_render_urb_layout(ctx);
1580     i965_render_cs_urb_layout(ctx);
1581     i965_render_constant_buffer(ctx);
1582     i965_render_drawing_rectangle(ctx);
1583     i965_render_vertex_elements(ctx);
1584     i965_render_startup(ctx);
1585     intel_batchbuffer_end_atomic(batch);
1586 }
1587
1588 static void
1589 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1590 {
1591     struct i965_driver_data *i965 = i965_driver_data(ctx);
1592     struct intel_batchbuffer *batch = i965->batch;
1593
1594     intel_batchbuffer_start_atomic(batch, 0x1000);
1595     intel_batchbuffer_emit_mi_flush(batch);
1596     i965_render_pipeline_select(ctx);
1597     i965_render_state_sip(ctx);
1598     i965_render_state_base_address(ctx);
1599     i965_render_binding_table_pointers(ctx);
1600     i965_render_constant_color(ctx);
1601     i965_render_pipelined_pointers(ctx);
1602     i965_render_urb_layout(ctx);
1603     i965_render_cs_urb_layout(ctx);
1604     i965_render_constant_buffer(ctx);
1605     i965_render_drawing_rectangle(ctx);
1606     i965_render_vertex_elements(ctx);
1607     i965_render_startup(ctx);
1608     intel_batchbuffer_end_atomic(batch);
1609 }
1610
1611
1612 static void 
1613 i965_render_initialize(VADriverContextP ctx)
1614 {
1615     struct i965_driver_data *i965 = i965_driver_data(ctx);
1616     struct i965_render_state *render_state = &i965->render_state;
1617     dri_bo *bo;
1618
1619     /* VERTEX BUFFER */
1620     dri_bo_unreference(render_state->vb.vertex_buffer);
1621     bo = dri_bo_alloc(i965->intel.bufmgr,
1622                       "vertex buffer",
1623                       4096,
1624                       4096);
1625     assert(bo);
1626     render_state->vb.vertex_buffer = bo;
1627
1628     /* VS */
1629     dri_bo_unreference(render_state->vs.state);
1630     bo = dri_bo_alloc(i965->intel.bufmgr,
1631                       "vs state",
1632                       sizeof(struct i965_vs_unit_state),
1633                       64);
1634     assert(bo);
1635     render_state->vs.state = bo;
1636
1637     /* GS */
1638     /* CLIP */
1639     /* SF */
1640     dri_bo_unreference(render_state->sf.state);
1641     bo = dri_bo_alloc(i965->intel.bufmgr,
1642                       "sf state",
1643                       sizeof(struct i965_sf_unit_state),
1644                       64);
1645     assert(bo);
1646     render_state->sf.state = bo;
1647
1648     /* WM */
1649     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1650     bo = dri_bo_alloc(i965->intel.bufmgr,
1651                       "surface state & binding table",
1652                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1653                       4096);
1654     assert(bo);
1655     render_state->wm.surface_state_binding_table_bo = bo;
1656
1657     dri_bo_unreference(render_state->wm.sampler);
1658     bo = dri_bo_alloc(i965->intel.bufmgr,
1659                       "sampler state",
1660                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1661                       64);
1662     assert(bo);
1663     render_state->wm.sampler = bo;
1664     render_state->wm.sampler_count = 0;
1665
1666     dri_bo_unreference(render_state->wm.state);
1667     bo = dri_bo_alloc(i965->intel.bufmgr,
1668                       "wm state",
1669                       sizeof(struct i965_wm_unit_state),
1670                       64);
1671     assert(bo);
1672     render_state->wm.state = bo;
1673
1674     /* COLOR CALCULATOR */
1675     dri_bo_unreference(render_state->cc.state);
1676     bo = dri_bo_alloc(i965->intel.bufmgr,
1677                       "color calc state",
1678                       sizeof(struct i965_cc_unit_state),
1679                       64);
1680     assert(bo);
1681     render_state->cc.state = bo;
1682
1683     dri_bo_unreference(render_state->cc.viewport);
1684     bo = dri_bo_alloc(i965->intel.bufmgr,
1685                       "cc viewport",
1686                       sizeof(struct i965_cc_viewport),
1687                       64);
1688     assert(bo);
1689     render_state->cc.viewport = bo;
1690 }
1691
1692 static void
1693 i965_render_put_surface(
1694     VADriverContextP   ctx,
1695     struct object_surface *obj_surface,
1696     const VARectangle *src_rect,
1697     const VARectangle *dst_rect,
1698     unsigned int       flags
1699 )
1700 {
1701     struct i965_driver_data *i965 = i965_driver_data(ctx);
1702     struct intel_batchbuffer *batch = i965->batch;
1703
1704     i965_render_initialize(ctx);
1705     i965_surface_render_state_setup(ctx, obj_surface, src_rect, dst_rect, flags);
1706     i965_surface_render_pipeline_setup(ctx);
1707     intel_batchbuffer_flush(batch);
1708 }
1709
1710 static void
1711 i965_render_put_subpicture(
1712     VADriverContextP   ctx,
1713     struct object_surface *obj_surface,
1714     const VARectangle *src_rect,
1715     const VARectangle *dst_rect
1716 )
1717 {
1718     struct i965_driver_data *i965 = i965_driver_data(ctx);
1719     struct intel_batchbuffer *batch = i965->batch;
1720     unsigned int index = obj_surface->subpic_render_idx;
1721     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1722
1723     assert(obj_subpic);
1724
1725     i965_render_initialize(ctx);
1726     i965_subpic_render_state_setup(ctx, obj_surface, src_rect, dst_rect);
1727     i965_subpic_render_pipeline_setup(ctx);
1728     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1729     intel_batchbuffer_flush(batch);
1730 }
1731
1732 /*
1733  * for GEN6+
1734  */
1735 static void 
1736 gen6_render_initialize(VADriverContextP ctx)
1737 {
1738     struct i965_driver_data *i965 = i965_driver_data(ctx);
1739     struct i965_render_state *render_state = &i965->render_state;
1740     dri_bo *bo;
1741
1742     /* VERTEX BUFFER */
1743     dri_bo_unreference(render_state->vb.vertex_buffer);
1744     bo = dri_bo_alloc(i965->intel.bufmgr,
1745                       "vertex buffer",
1746                       4096,
1747                       4096);
1748     assert(bo);
1749     render_state->vb.vertex_buffer = bo;
1750
1751     /* WM */
1752     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1753     bo = dri_bo_alloc(i965->intel.bufmgr,
1754                       "surface state & binding table",
1755                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1756                       4096);
1757     assert(bo);
1758     render_state->wm.surface_state_binding_table_bo = bo;
1759
1760     dri_bo_unreference(render_state->wm.sampler);
1761     bo = dri_bo_alloc(i965->intel.bufmgr,
1762                       "sampler state",
1763                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1764                       4096);
1765     assert(bo);
1766     render_state->wm.sampler = bo;
1767     render_state->wm.sampler_count = 0;
1768
1769     /* COLOR CALCULATOR */
1770     dri_bo_unreference(render_state->cc.state);
1771     bo = dri_bo_alloc(i965->intel.bufmgr,
1772                       "color calc state",
1773                       sizeof(struct gen6_color_calc_state),
1774                       4096);
1775     assert(bo);
1776     render_state->cc.state = bo;
1777
1778     /* CC VIEWPORT */
1779     dri_bo_unreference(render_state->cc.viewport);
1780     bo = dri_bo_alloc(i965->intel.bufmgr,
1781                       "cc viewport",
1782                       sizeof(struct i965_cc_viewport),
1783                       4096);
1784     assert(bo);
1785     render_state->cc.viewport = bo;
1786
1787     /* BLEND STATE */
1788     dri_bo_unreference(render_state->cc.blend);
1789     bo = dri_bo_alloc(i965->intel.bufmgr,
1790                       "blend state",
1791                       sizeof(struct gen6_blend_state),
1792                       4096);
1793     assert(bo);
1794     render_state->cc.blend = bo;
1795
1796     /* DEPTH & STENCIL STATE */
1797     dri_bo_unreference(render_state->cc.depth_stencil);
1798     bo = dri_bo_alloc(i965->intel.bufmgr,
1799                       "depth & stencil state",
1800                       sizeof(struct gen6_depth_stencil_state),
1801                       4096);
1802     assert(bo);
1803     render_state->cc.depth_stencil = bo;
1804 }
1805
1806 static void
1807 gen6_render_color_calc_state(VADriverContextP ctx)
1808 {
1809     struct i965_driver_data *i965 = i965_driver_data(ctx);
1810     struct i965_render_state *render_state = &i965->render_state;
1811     struct gen6_color_calc_state *color_calc_state;
1812     
1813     dri_bo_map(render_state->cc.state, 1);
1814     assert(render_state->cc.state->virtual);
1815     color_calc_state = render_state->cc.state->virtual;
1816     memset(color_calc_state, 0, sizeof(*color_calc_state));
1817     color_calc_state->constant_r = 1.0;
1818     color_calc_state->constant_g = 0.0;
1819     color_calc_state->constant_b = 1.0;
1820     color_calc_state->constant_a = 1.0;
1821     dri_bo_unmap(render_state->cc.state);
1822 }
1823
1824 static void
1825 gen6_render_blend_state(VADriverContextP ctx)
1826 {
1827     struct i965_driver_data *i965 = i965_driver_data(ctx);
1828     struct i965_render_state *render_state = &i965->render_state;
1829     struct gen6_blend_state *blend_state;
1830     
1831     dri_bo_map(render_state->cc.blend, 1);
1832     assert(render_state->cc.blend->virtual);
1833     blend_state = render_state->cc.blend->virtual;
1834     memset(blend_state, 0, sizeof(*blend_state));
1835     blend_state->blend1.logic_op_enable = 1;
1836     blend_state->blend1.logic_op_func = 0xc;
1837     dri_bo_unmap(render_state->cc.blend);
1838 }
1839
1840 static void
1841 gen6_render_depth_stencil_state(VADriverContextP ctx)
1842 {
1843     struct i965_driver_data *i965 = i965_driver_data(ctx);
1844     struct i965_render_state *render_state = &i965->render_state;
1845     struct gen6_depth_stencil_state *depth_stencil_state;
1846     
1847     dri_bo_map(render_state->cc.depth_stencil, 1);
1848     assert(render_state->cc.depth_stencil->virtual);
1849     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1850     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1851     dri_bo_unmap(render_state->cc.depth_stencil);
1852 }
1853
1854 static void
1855 gen6_render_setup_states(
1856     VADriverContextP   ctx,
1857     struct object_surface *obj_surface,
1858     const VARectangle *src_rect,
1859     const VARectangle *dst_rect,
1860     unsigned int       flags
1861 )
1862 {
1863     i965_render_dest_surface_state(ctx, 0);
1864     i965_render_src_surfaces_state(ctx, obj_surface, flags);
1865     i965_render_sampler(ctx);
1866     i965_render_cc_viewport(ctx);
1867     gen6_render_color_calc_state(ctx);
1868     gen6_render_blend_state(ctx);
1869     gen6_render_depth_stencil_state(ctx);
1870     i965_render_upload_constants(ctx, obj_surface, flags);
1871     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1872 }
1873
1874 static void
1875 gen6_emit_invarient_states(VADriverContextP ctx)
1876 {
1877     struct i965_driver_data *i965 = i965_driver_data(ctx);
1878     struct intel_batchbuffer *batch = i965->batch;
1879
1880     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1881
1882     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1883     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1884               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1885     OUT_BATCH(batch, 0);
1886
1887     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1888     OUT_BATCH(batch, 1);
1889
1890     /* Set system instruction pointer */
1891     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1892     OUT_BATCH(batch, 0);
1893 }
1894
1895 static void
1896 gen6_emit_state_base_address(VADriverContextP ctx)
1897 {
1898     struct i965_driver_data *i965 = i965_driver_data(ctx);
1899     struct intel_batchbuffer *batch = i965->batch;
1900     struct i965_render_state *render_state = &i965->render_state;
1901
1902     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1903     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
1904     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1905     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1906     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1907     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
1908     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
1909     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1910     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1911     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1912 }
1913
1914 static void
1915 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1916 {
1917     struct i965_driver_data *i965 = i965_driver_data(ctx);
1918     struct intel_batchbuffer *batch = i965->batch;
1919     struct i965_render_state *render_state = &i965->render_state;
1920
1921     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1922               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1923               (4 - 2));
1924     OUT_BATCH(batch, 0);
1925     OUT_BATCH(batch, 0);
1926     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1927 }
1928
1929 static void
1930 gen6_emit_urb(VADriverContextP ctx)
1931 {
1932     struct i965_driver_data *i965 = i965_driver_data(ctx);
1933     struct intel_batchbuffer *batch = i965->batch;
1934
1935     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
1936     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1937               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1938     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1939               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1940 }
1941
1942 static void
1943 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1944 {
1945     struct i965_driver_data *i965 = i965_driver_data(ctx);
1946     struct intel_batchbuffer *batch = i965->batch;
1947     struct i965_render_state *render_state = &i965->render_state;
1948
1949     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1950     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1951     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1952     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1953 }
1954
1955 static void
1956 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1957 {
1958     struct i965_driver_data *i965 = i965_driver_data(ctx);
1959     struct intel_batchbuffer *batch = i965->batch;
1960     struct i965_render_state *render_state = &i965->render_state;
1961
1962     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1963               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1964               (4 - 2));
1965     OUT_BATCH(batch, 0); /* VS */
1966     OUT_BATCH(batch, 0); /* GS */
1967     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1968 }
1969
1970 static void
1971 gen6_emit_binding_table(VADriverContextP ctx)
1972 {
1973     struct i965_driver_data *i965 = i965_driver_data(ctx);
1974     struct intel_batchbuffer *batch = i965->batch;
1975
1976     /* Binding table pointers */
1977     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
1978               GEN6_BINDING_TABLE_MODIFY_PS |
1979               (4 - 2));
1980     OUT_BATCH(batch, 0);                /* vs */
1981     OUT_BATCH(batch, 0);                /* gs */
1982     /* Only the PS uses the binding table */
1983     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1984 }
1985
1986 static void
1987 gen6_emit_depth_buffer_state(VADriverContextP ctx)
1988 {
1989     struct i965_driver_data *i965 = i965_driver_data(ctx);
1990     struct intel_batchbuffer *batch = i965->batch;
1991
1992     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
1993     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
1994               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
1995     OUT_BATCH(batch, 0);
1996     OUT_BATCH(batch, 0);
1997     OUT_BATCH(batch, 0);
1998     OUT_BATCH(batch, 0);
1999     OUT_BATCH(batch, 0);
2000
2001     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
2002     OUT_BATCH(batch, 0);
2003 }
2004
2005 static void
2006 gen6_emit_drawing_rectangle(VADriverContextP ctx)
2007 {
2008     i965_render_drawing_rectangle(ctx);
2009 }
2010
2011 static void 
2012 gen6_emit_vs_state(VADriverContextP ctx)
2013 {
2014     struct i965_driver_data *i965 = i965_driver_data(ctx);
2015     struct intel_batchbuffer *batch = i965->batch;
2016
2017     /* disable VS constant buffer */
2018     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
2019     OUT_BATCH(batch, 0);
2020     OUT_BATCH(batch, 0);
2021     OUT_BATCH(batch, 0);
2022     OUT_BATCH(batch, 0);
2023         
2024     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2025     OUT_BATCH(batch, 0); /* without VS kernel */
2026     OUT_BATCH(batch, 0);
2027     OUT_BATCH(batch, 0);
2028     OUT_BATCH(batch, 0);
2029     OUT_BATCH(batch, 0); /* pass-through */
2030 }
2031
2032 static void 
2033 gen6_emit_gs_state(VADriverContextP ctx)
2034 {
2035     struct i965_driver_data *i965 = i965_driver_data(ctx);
2036     struct intel_batchbuffer *batch = i965->batch;
2037
2038     /* disable GS constant buffer */
2039     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
2040     OUT_BATCH(batch, 0);
2041     OUT_BATCH(batch, 0);
2042     OUT_BATCH(batch, 0);
2043     OUT_BATCH(batch, 0);
2044         
2045     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2046     OUT_BATCH(batch, 0); /* without GS kernel */
2047     OUT_BATCH(batch, 0);
2048     OUT_BATCH(batch, 0);
2049     OUT_BATCH(batch, 0);
2050     OUT_BATCH(batch, 0);
2051     OUT_BATCH(batch, 0); /* pass-through */
2052 }
2053
2054 static void 
2055 gen6_emit_clip_state(VADriverContextP ctx)
2056 {
2057     struct i965_driver_data *i965 = i965_driver_data(ctx);
2058     struct intel_batchbuffer *batch = i965->batch;
2059
2060     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2061     OUT_BATCH(batch, 0);
2062     OUT_BATCH(batch, 0); /* pass-through */
2063     OUT_BATCH(batch, 0);
2064 }
2065
2066 static void 
2067 gen6_emit_sf_state(VADriverContextP ctx)
2068 {
2069     struct i965_driver_data *i965 = i965_driver_data(ctx);
2070     struct intel_batchbuffer *batch = i965->batch;
2071
2072     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
2073     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
2074               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
2075               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
2076     OUT_BATCH(batch, 0);
2077     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2078     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
2079     OUT_BATCH(batch, 0);
2080     OUT_BATCH(batch, 0);
2081     OUT_BATCH(batch, 0);
2082     OUT_BATCH(batch, 0);
2083     OUT_BATCH(batch, 0); /* DW9 */
2084     OUT_BATCH(batch, 0);
2085     OUT_BATCH(batch, 0);
2086     OUT_BATCH(batch, 0);
2087     OUT_BATCH(batch, 0);
2088     OUT_BATCH(batch, 0); /* DW14 */
2089     OUT_BATCH(batch, 0);
2090     OUT_BATCH(batch, 0);
2091     OUT_BATCH(batch, 0);
2092     OUT_BATCH(batch, 0);
2093     OUT_BATCH(batch, 0); /* DW19 */
2094 }
2095
2096 static void 
2097 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
2098 {
2099     struct i965_driver_data *i965 = i965_driver_data(ctx);
2100     struct intel_batchbuffer *batch = i965->batch;
2101     struct i965_render_state *render_state = &i965->render_state;
2102
2103     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
2104               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
2105               (5 - 2));
2106     OUT_RELOC(batch, 
2107               render_state->curbe.bo,
2108               I915_GEM_DOMAIN_INSTRUCTION, 0,
2109               (URB_CS_ENTRY_SIZE-1));
2110     OUT_BATCH(batch, 0);
2111     OUT_BATCH(batch, 0);
2112     OUT_BATCH(batch, 0);
2113
2114     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
2115     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
2116               I915_GEM_DOMAIN_INSTRUCTION, 0,
2117               0);
2118     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
2119               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2120     OUT_BATCH(batch, 0);
2121     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2122     OUT_BATCH(batch, ((i965->intel.device_info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2123               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2124               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2125     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2126               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2127     OUT_BATCH(batch, 0);
2128     OUT_BATCH(batch, 0);
2129 }
2130
2131 static void
2132 gen6_emit_vertex_element_state(VADriverContextP ctx)
2133 {
2134     struct i965_driver_data *i965 = i965_driver_data(ctx);
2135     struct intel_batchbuffer *batch = i965->batch;
2136
2137     /* Set up our vertex elements, sourced from the single vertex buffer. */
2138     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2139     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2140     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2141               GEN6_VE0_VALID |
2142               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2143               (0 << VE0_OFFSET_SHIFT));
2144     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2145               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2146               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2147               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2148     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2149     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2150               GEN6_VE0_VALID |
2151               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2152               (8 << VE0_OFFSET_SHIFT));
2153     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2154               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2155               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2156               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2157 }
2158
2159 static void
2160 gen6_emit_vertices(VADriverContextP ctx)
2161 {
2162     struct i965_driver_data *i965 = i965_driver_data(ctx);
2163     struct intel_batchbuffer *batch = i965->batch;
2164     struct i965_render_state *render_state = &i965->render_state;
2165
2166     BEGIN_BATCH(batch, 11);
2167     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2168     OUT_BATCH(batch, 
2169               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2170               GEN6_VB0_VERTEXDATA |
2171               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2172     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2173     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2174     OUT_BATCH(batch, 0);
2175
2176     OUT_BATCH(batch, 
2177               CMD_3DPRIMITIVE |
2178               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2179               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2180               (0 << 9) |
2181               4);
2182     OUT_BATCH(batch, 3); /* vertex count per instance */
2183     OUT_BATCH(batch, 0); /* start vertex offset */
2184     OUT_BATCH(batch, 1); /* single instance */
2185     OUT_BATCH(batch, 0); /* start instance location */
2186     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2187     ADVANCE_BATCH(batch);
2188 }
2189
2190 static void
2191 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2192 {
2193     struct i965_driver_data *i965 = i965_driver_data(ctx);
2194     struct intel_batchbuffer *batch = i965->batch;
2195
2196     intel_batchbuffer_start_atomic(batch, 0x1000);
2197     intel_batchbuffer_emit_mi_flush(batch);
2198     gen6_emit_invarient_states(ctx);
2199     gen6_emit_state_base_address(ctx);
2200     gen6_emit_viewport_state_pointers(ctx);
2201     gen6_emit_urb(ctx);
2202     gen6_emit_cc_state_pointers(ctx);
2203     gen6_emit_sampler_state_pointers(ctx);
2204     gen6_emit_vs_state(ctx);
2205     gen6_emit_gs_state(ctx);
2206     gen6_emit_clip_state(ctx);
2207     gen6_emit_sf_state(ctx);
2208     gen6_emit_wm_state(ctx, kernel);
2209     gen6_emit_binding_table(ctx);
2210     gen6_emit_depth_buffer_state(ctx);
2211     gen6_emit_drawing_rectangle(ctx);
2212     gen6_emit_vertex_element_state(ctx);
2213     gen6_emit_vertices(ctx);
2214     intel_batchbuffer_end_atomic(batch);
2215 }
2216
2217 static void
2218 gen6_render_put_surface(
2219     VADriverContextP   ctx,
2220     struct object_surface *obj_surface,
2221     const VARectangle *src_rect,
2222     const VARectangle *dst_rect,
2223     unsigned int       flags
2224 )
2225 {
2226     struct i965_driver_data *i965 = i965_driver_data(ctx);
2227     struct intel_batchbuffer *batch = i965->batch;
2228
2229     gen6_render_initialize(ctx);
2230     gen6_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2231     i965_clear_dest_region(ctx);
2232     gen6_render_emit_states(ctx, PS_KERNEL);
2233     intel_batchbuffer_flush(batch);
2234 }
2235
2236 static void
2237 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2238 {
2239     struct i965_driver_data *i965 = i965_driver_data(ctx);
2240     struct i965_render_state *render_state = &i965->render_state;
2241     struct gen6_blend_state *blend_state;
2242
2243     dri_bo_unmap(render_state->cc.state);    
2244     dri_bo_map(render_state->cc.blend, 1);
2245     assert(render_state->cc.blend->virtual);
2246     blend_state = render_state->cc.blend->virtual;
2247     memset(blend_state, 0, sizeof(*blend_state));
2248     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2249     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2250     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2251     blend_state->blend0.blend_enable = 1;
2252     blend_state->blend1.post_blend_clamp_enable = 1;
2253     blend_state->blend1.pre_blend_clamp_enable = 1;
2254     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2255     dri_bo_unmap(render_state->cc.blend);
2256 }
2257
2258 static void
2259 gen6_subpicture_render_setup_states(
2260     VADriverContextP   ctx,
2261     struct object_surface *obj_surface,
2262     const VARectangle *src_rect,
2263     const VARectangle *dst_rect
2264 )
2265 {
2266     i965_render_dest_surface_state(ctx, 0);
2267     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
2268     i965_render_sampler(ctx);
2269     i965_render_cc_viewport(ctx);
2270     gen6_render_color_calc_state(ctx);
2271     gen6_subpicture_render_blend_state(ctx);
2272     gen6_render_depth_stencil_state(ctx);
2273     i965_subpic_render_upload_constants(ctx, obj_surface);
2274     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
2275 }
2276
2277 static void
2278 gen6_render_put_subpicture(
2279     VADriverContextP   ctx,
2280     struct object_surface *obj_surface,
2281     const VARectangle *src_rect,
2282     const VARectangle *dst_rect
2283 )
2284 {
2285     struct i965_driver_data *i965 = i965_driver_data(ctx);
2286     struct intel_batchbuffer *batch = i965->batch;
2287     unsigned int index = obj_surface->subpic_render_idx;
2288     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
2289
2290     assert(obj_subpic);
2291     gen6_render_initialize(ctx);
2292     gen6_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
2293     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2294     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
2295     intel_batchbuffer_flush(batch);
2296 }
2297
2298 /*
2299  * for GEN7
2300  */
2301 static void 
2302 gen7_render_initialize(VADriverContextP ctx)
2303 {
2304     struct i965_driver_data *i965 = i965_driver_data(ctx);
2305     struct i965_render_state *render_state = &i965->render_state;
2306     dri_bo *bo;
2307
2308     /* VERTEX BUFFER */
2309     dri_bo_unreference(render_state->vb.vertex_buffer);
2310     bo = dri_bo_alloc(i965->intel.bufmgr,
2311                       "vertex buffer",
2312                       4096,
2313                       4096);
2314     assert(bo);
2315     render_state->vb.vertex_buffer = bo;
2316
2317     /* WM */
2318     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2319     bo = dri_bo_alloc(i965->intel.bufmgr,
2320                       "surface state & binding table",
2321                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2322                       4096);
2323     assert(bo);
2324     render_state->wm.surface_state_binding_table_bo = bo;
2325
2326     dri_bo_unreference(render_state->wm.sampler);
2327     bo = dri_bo_alloc(i965->intel.bufmgr,
2328                       "sampler state",
2329                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2330                       4096);
2331     assert(bo);
2332     render_state->wm.sampler = bo;
2333     render_state->wm.sampler_count = 0;
2334
2335     /* COLOR CALCULATOR */
2336     dri_bo_unreference(render_state->cc.state);
2337     bo = dri_bo_alloc(i965->intel.bufmgr,
2338                       "color calc state",
2339                       sizeof(struct gen6_color_calc_state),
2340                       4096);
2341     assert(bo);
2342     render_state->cc.state = bo;
2343
2344     /* CC VIEWPORT */
2345     dri_bo_unreference(render_state->cc.viewport);
2346     bo = dri_bo_alloc(i965->intel.bufmgr,
2347                       "cc viewport",
2348                       sizeof(struct i965_cc_viewport),
2349                       4096);
2350     assert(bo);
2351     render_state->cc.viewport = bo;
2352
2353     /* BLEND STATE */
2354     dri_bo_unreference(render_state->cc.blend);
2355     bo = dri_bo_alloc(i965->intel.bufmgr,
2356                       "blend state",
2357                       sizeof(struct gen6_blend_state),
2358                       4096);
2359     assert(bo);
2360     render_state->cc.blend = bo;
2361
2362     /* DEPTH & STENCIL STATE */
2363     dri_bo_unreference(render_state->cc.depth_stencil);
2364     bo = dri_bo_alloc(i965->intel.bufmgr,
2365                       "depth & stencil state",
2366                       sizeof(struct gen6_depth_stencil_state),
2367                       4096);
2368     assert(bo);
2369     render_state->cc.depth_stencil = bo;
2370 }
2371
2372 /*
2373  * for GEN8
2374  */
2375 #define ALIGNMENT       64
2376
2377 static void
2378 gen7_render_color_calc_state(VADriverContextP ctx)
2379 {
2380     struct i965_driver_data *i965 = i965_driver_data(ctx);
2381     struct i965_render_state *render_state = &i965->render_state;
2382     struct gen6_color_calc_state *color_calc_state;
2383     
2384     dri_bo_map(render_state->cc.state, 1);
2385     assert(render_state->cc.state->virtual);
2386     color_calc_state = render_state->cc.state->virtual;
2387     memset(color_calc_state, 0, sizeof(*color_calc_state));
2388     color_calc_state->constant_r = 1.0;
2389     color_calc_state->constant_g = 0.0;
2390     color_calc_state->constant_b = 1.0;
2391     color_calc_state->constant_a = 1.0;
2392     dri_bo_unmap(render_state->cc.state);
2393 }
2394
2395 static void
2396 gen7_render_blend_state(VADriverContextP ctx)
2397 {
2398     struct i965_driver_data *i965 = i965_driver_data(ctx);
2399     struct i965_render_state *render_state = &i965->render_state;
2400     struct gen6_blend_state *blend_state;
2401     
2402     dri_bo_map(render_state->cc.blend, 1);
2403     assert(render_state->cc.blend->virtual);
2404     blend_state = render_state->cc.blend->virtual;
2405     memset(blend_state, 0, sizeof(*blend_state));
2406     blend_state->blend1.logic_op_enable = 1;
2407     blend_state->blend1.logic_op_func = 0xc;
2408     blend_state->blend1.pre_blend_clamp_enable = 1;
2409     dri_bo_unmap(render_state->cc.blend);
2410 }
2411
2412 static void
2413 gen7_render_depth_stencil_state(VADriverContextP ctx)
2414 {
2415     struct i965_driver_data *i965 = i965_driver_data(ctx);
2416     struct i965_render_state *render_state = &i965->render_state;
2417     struct gen6_depth_stencil_state *depth_stencil_state;
2418     
2419     dri_bo_map(render_state->cc.depth_stencil, 1);
2420     assert(render_state->cc.depth_stencil->virtual);
2421     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2422     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2423     dri_bo_unmap(render_state->cc.depth_stencil);
2424 }
2425
2426 static void 
2427 gen7_render_sampler(VADriverContextP ctx)
2428 {
2429     struct i965_driver_data *i965 = i965_driver_data(ctx);
2430     struct i965_render_state *render_state = &i965->render_state;
2431     struct gen7_sampler_state *sampler_state;
2432     int i;
2433     
2434     assert(render_state->wm.sampler_count > 0);
2435     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2436
2437     dri_bo_map(render_state->wm.sampler, 1);
2438     assert(render_state->wm.sampler->virtual);
2439     sampler_state = render_state->wm.sampler->virtual;
2440     for (i = 0; i < render_state->wm.sampler_count; i++) {
2441         memset(sampler_state, 0, sizeof(*sampler_state));
2442         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2443         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2444         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2445         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2446         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2447         sampler_state++;
2448     }
2449
2450     dri_bo_unmap(render_state->wm.sampler);
2451 }
2452
2453
2454 static void
2455 gen7_render_setup_states(
2456     VADriverContextP   ctx,
2457     struct object_surface *obj_surface,
2458     const VARectangle *src_rect,
2459     const VARectangle *dst_rect,
2460     unsigned int       flags
2461 )
2462 {
2463     i965_render_dest_surface_state(ctx, 0);
2464     i965_render_src_surfaces_state(ctx, obj_surface, flags);
2465     gen7_render_sampler(ctx);
2466     i965_render_cc_viewport(ctx);
2467     gen7_render_color_calc_state(ctx);
2468     gen7_render_blend_state(ctx);
2469     gen7_render_depth_stencil_state(ctx);
2470     i965_render_upload_constants(ctx, obj_surface, flags);
2471     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2472 }
2473
2474
2475 static void
2476 gen7_emit_invarient_states(VADriverContextP ctx)
2477 {
2478     struct i965_driver_data *i965 = i965_driver_data(ctx);
2479     struct intel_batchbuffer *batch = i965->batch;
2480
2481     BEGIN_BATCH(batch, 1);
2482     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2483     ADVANCE_BATCH(batch);
2484
2485     BEGIN_BATCH(batch, 4);
2486     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2487     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2488               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2489     OUT_BATCH(batch, 0);
2490     OUT_BATCH(batch, 0);
2491     ADVANCE_BATCH(batch);
2492
2493     BEGIN_BATCH(batch, 2);
2494     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2495     OUT_BATCH(batch, 1);
2496     ADVANCE_BATCH(batch);
2497
2498     /* Set system instruction pointer */
2499     BEGIN_BATCH(batch, 2);
2500     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2501     OUT_BATCH(batch, 0);
2502     ADVANCE_BATCH(batch);
2503 }
2504
2505 static void
2506 gen7_emit_state_base_address(VADriverContextP ctx)
2507 {
2508     struct i965_driver_data *i965 = i965_driver_data(ctx);
2509     struct intel_batchbuffer *batch = i965->batch;
2510     struct i965_render_state *render_state = &i965->render_state;
2511
2512     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2513     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2514     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2515     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2516     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2517     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2518     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2519     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2520     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2521     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2522 }
2523
2524 static void
2525 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
2526 {
2527     struct i965_driver_data *i965 = i965_driver_data(ctx);
2528     struct intel_batchbuffer *batch = i965->batch;
2529     struct i965_render_state *render_state = &i965->render_state;
2530
2531     BEGIN_BATCH(batch, 2);
2532     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
2533     OUT_RELOC(batch,
2534               render_state->cc.viewport,
2535               I915_GEM_DOMAIN_INSTRUCTION, 0,
2536               0);
2537     ADVANCE_BATCH(batch);
2538
2539     BEGIN_BATCH(batch, 2);
2540     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
2541     OUT_BATCH(batch, 0);
2542     ADVANCE_BATCH(batch);
2543 }
2544
2545 /*
2546  * URB layout on GEN7 
2547  * ----------------------------------------
2548  * | PS Push Constants (8KB) | VS entries |
2549  * ----------------------------------------
2550  */
2551 static void
2552 gen7_emit_urb(VADriverContextP ctx)
2553 {
2554     struct i965_driver_data *i965 = i965_driver_data(ctx);
2555     struct intel_batchbuffer *batch = i965->batch;
2556     unsigned int num_urb_entries = 32;
2557
2558     if (IS_HASWELL(i965->intel.device_info))
2559         num_urb_entries = 64;
2560
2561     BEGIN_BATCH(batch, 2);
2562     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
2563     OUT_BATCH(batch, 8); /* in 1KBs */
2564     ADVANCE_BATCH(batch);
2565
2566     BEGIN_BATCH(batch, 2);
2567     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
2568     OUT_BATCH(batch, 
2569               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
2570               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
2571               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2572    ADVANCE_BATCH(batch);
2573
2574    BEGIN_BATCH(batch, 2);
2575    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
2576    OUT_BATCH(batch,
2577              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2578              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2579    ADVANCE_BATCH(batch);
2580
2581    BEGIN_BATCH(batch, 2);
2582    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
2583    OUT_BATCH(batch,
2584              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2585              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2586    ADVANCE_BATCH(batch);
2587
2588    BEGIN_BATCH(batch, 2);
2589    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
2590    OUT_BATCH(batch,
2591              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2592              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2593    ADVANCE_BATCH(batch);
2594 }
2595
2596 static void
2597 gen7_emit_cc_state_pointers(VADriverContextP ctx)
2598 {
2599     struct i965_driver_data *i965 = i965_driver_data(ctx);
2600     struct intel_batchbuffer *batch = i965->batch;
2601     struct i965_render_state *render_state = &i965->render_state;
2602
2603     BEGIN_BATCH(batch, 2);
2604     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
2605     OUT_RELOC(batch,
2606               render_state->cc.state,
2607               I915_GEM_DOMAIN_INSTRUCTION, 0,
2608               1);
2609     ADVANCE_BATCH(batch);
2610
2611     BEGIN_BATCH(batch, 2);
2612     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
2613     OUT_RELOC(batch,
2614               render_state->cc.blend,
2615               I915_GEM_DOMAIN_INSTRUCTION, 0,
2616               1);
2617     ADVANCE_BATCH(batch);
2618
2619     BEGIN_BATCH(batch, 2);
2620     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
2621     OUT_RELOC(batch,
2622               render_state->cc.depth_stencil,
2623               I915_GEM_DOMAIN_INSTRUCTION, 0, 
2624               1);
2625     ADVANCE_BATCH(batch);
2626 }
2627
2628 static void
2629 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
2630 {
2631     struct i965_driver_data *i965 = i965_driver_data(ctx);
2632     struct intel_batchbuffer *batch = i965->batch;
2633     struct i965_render_state *render_state = &i965->render_state;
2634
2635     BEGIN_BATCH(batch, 2);
2636     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
2637     OUT_RELOC(batch,
2638               render_state->wm.sampler,
2639               I915_GEM_DOMAIN_INSTRUCTION, 0,
2640               0);
2641     ADVANCE_BATCH(batch);
2642 }
2643
2644 static void
2645 gen7_emit_binding_table(VADriverContextP ctx)
2646 {
2647     struct i965_driver_data *i965 = i965_driver_data(ctx);
2648     struct intel_batchbuffer *batch = i965->batch;
2649
2650     BEGIN_BATCH(batch, 2);
2651     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
2652     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2653     ADVANCE_BATCH(batch);
2654 }
2655
2656 static void
2657 gen7_emit_depth_buffer_state(VADriverContextP ctx)
2658 {
2659     struct i965_driver_data *i965 = i965_driver_data(ctx);
2660     struct intel_batchbuffer *batch = i965->batch;
2661
2662     BEGIN_BATCH(batch, 7);
2663     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
2664     OUT_BATCH(batch,
2665               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
2666               (I965_SURFACE_NULL << 29));
2667     OUT_BATCH(batch, 0);
2668     OUT_BATCH(batch, 0);
2669     OUT_BATCH(batch, 0);
2670     OUT_BATCH(batch, 0);
2671     OUT_BATCH(batch, 0);
2672     ADVANCE_BATCH(batch);
2673
2674     BEGIN_BATCH(batch, 3);
2675     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
2676     OUT_BATCH(batch, 0);
2677     OUT_BATCH(batch, 0);
2678     ADVANCE_BATCH(batch);
2679 }
2680
2681 static void
2682 gen7_emit_drawing_rectangle(VADriverContextP ctx)
2683 {
2684     i965_render_drawing_rectangle(ctx);
2685 }
2686
2687 static void 
2688 gen7_emit_vs_state(VADriverContextP ctx)
2689 {
2690     struct i965_driver_data *i965 = i965_driver_data(ctx);
2691     struct intel_batchbuffer *batch = i965->batch;
2692
2693     /* disable VS constant buffer */
2694     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
2695     OUT_BATCH(batch, 0);
2696     OUT_BATCH(batch, 0);
2697     OUT_BATCH(batch, 0);
2698     OUT_BATCH(batch, 0);
2699     OUT_BATCH(batch, 0);
2700     OUT_BATCH(batch, 0);
2701         
2702     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2703     OUT_BATCH(batch, 0); /* without VS kernel */
2704     OUT_BATCH(batch, 0);
2705     OUT_BATCH(batch, 0);
2706     OUT_BATCH(batch, 0);
2707     OUT_BATCH(batch, 0); /* pass-through */
2708 }
2709
2710 static void 
2711 gen7_emit_bypass_state(VADriverContextP ctx)
2712 {
2713     struct i965_driver_data *i965 = i965_driver_data(ctx);
2714     struct intel_batchbuffer *batch = i965->batch;
2715
2716     /* bypass GS */
2717     BEGIN_BATCH(batch, 7);
2718     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
2719     OUT_BATCH(batch, 0);
2720     OUT_BATCH(batch, 0);
2721     OUT_BATCH(batch, 0);
2722     OUT_BATCH(batch, 0);
2723     OUT_BATCH(batch, 0);
2724     OUT_BATCH(batch, 0);
2725     ADVANCE_BATCH(batch);
2726
2727     BEGIN_BATCH(batch, 7);      
2728     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2729     OUT_BATCH(batch, 0); /* without GS kernel */
2730     OUT_BATCH(batch, 0);
2731     OUT_BATCH(batch, 0);
2732     OUT_BATCH(batch, 0);
2733     OUT_BATCH(batch, 0);
2734     OUT_BATCH(batch, 0); /* pass-through */
2735     ADVANCE_BATCH(batch);
2736
2737     BEGIN_BATCH(batch, 2);
2738     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
2739     OUT_BATCH(batch, 0);
2740     ADVANCE_BATCH(batch);
2741
2742     /* disable HS */
2743     BEGIN_BATCH(batch, 7);
2744     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
2745     OUT_BATCH(batch, 0);
2746     OUT_BATCH(batch, 0);
2747     OUT_BATCH(batch, 0);
2748     OUT_BATCH(batch, 0);
2749     OUT_BATCH(batch, 0);
2750     OUT_BATCH(batch, 0);
2751     ADVANCE_BATCH(batch);
2752
2753     BEGIN_BATCH(batch, 7);
2754     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
2755     OUT_BATCH(batch, 0);
2756     OUT_BATCH(batch, 0);
2757     OUT_BATCH(batch, 0);
2758     OUT_BATCH(batch, 0);
2759     OUT_BATCH(batch, 0);
2760     OUT_BATCH(batch, 0);
2761     ADVANCE_BATCH(batch);
2762
2763     BEGIN_BATCH(batch, 2);
2764     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
2765     OUT_BATCH(batch, 0);
2766     ADVANCE_BATCH(batch);
2767
2768     /* Disable TE */
2769     BEGIN_BATCH(batch, 4);
2770     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
2771     OUT_BATCH(batch, 0);
2772     OUT_BATCH(batch, 0);
2773     OUT_BATCH(batch, 0);
2774     ADVANCE_BATCH(batch);
2775
2776     /* Disable DS */
2777     BEGIN_BATCH(batch, 7);
2778     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
2779     OUT_BATCH(batch, 0);
2780     OUT_BATCH(batch, 0);
2781     OUT_BATCH(batch, 0);
2782     OUT_BATCH(batch, 0);
2783     OUT_BATCH(batch, 0);
2784     OUT_BATCH(batch, 0);
2785     ADVANCE_BATCH(batch);
2786
2787     BEGIN_BATCH(batch, 6);
2788     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
2789     OUT_BATCH(batch, 0);
2790     OUT_BATCH(batch, 0);
2791     OUT_BATCH(batch, 0);
2792     OUT_BATCH(batch, 0);
2793     OUT_BATCH(batch, 0);
2794     ADVANCE_BATCH(batch);
2795
2796     BEGIN_BATCH(batch, 2);
2797     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
2798     OUT_BATCH(batch, 0);
2799     ADVANCE_BATCH(batch);
2800
2801     /* Disable STREAMOUT */
2802     BEGIN_BATCH(batch, 3);
2803     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
2804     OUT_BATCH(batch, 0);
2805     OUT_BATCH(batch, 0);
2806     ADVANCE_BATCH(batch);
2807 }
2808
2809 static void 
2810 gen7_emit_clip_state(VADriverContextP ctx)
2811 {
2812     struct i965_driver_data *i965 = i965_driver_data(ctx);
2813     struct intel_batchbuffer *batch = i965->batch;
2814
2815     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2816     OUT_BATCH(batch, 0);
2817     OUT_BATCH(batch, 0); /* pass-through */
2818     OUT_BATCH(batch, 0);
2819 }
2820
2821 static void 
2822 gen7_emit_sf_state(VADriverContextP ctx)
2823 {
2824     struct i965_driver_data *i965 = i965_driver_data(ctx);
2825     struct intel_batchbuffer *batch = i965->batch;
2826
2827     BEGIN_BATCH(batch, 14);
2828     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
2829     OUT_BATCH(batch,
2830               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
2831               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
2832               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
2833     OUT_BATCH(batch, 0);
2834     OUT_BATCH(batch, 0);
2835     OUT_BATCH(batch, 0); /* DW4 */
2836     OUT_BATCH(batch, 0);
2837     OUT_BATCH(batch, 0);
2838     OUT_BATCH(batch, 0);
2839     OUT_BATCH(batch, 0);
2840     OUT_BATCH(batch, 0); /* DW9 */
2841     OUT_BATCH(batch, 0);
2842     OUT_BATCH(batch, 0);
2843     OUT_BATCH(batch, 0);
2844     OUT_BATCH(batch, 0);
2845     ADVANCE_BATCH(batch);
2846
2847     BEGIN_BATCH(batch, 7);
2848     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
2849     OUT_BATCH(batch, 0);
2850     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2851     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
2852     OUT_BATCH(batch, 0);
2853     OUT_BATCH(batch, 0);
2854     OUT_BATCH(batch, 0);
2855     ADVANCE_BATCH(batch);
2856 }
2857
2858 static void 
2859 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
2860 {
2861     struct i965_driver_data *i965 = i965_driver_data(ctx);
2862     struct intel_batchbuffer *batch = i965->batch;
2863     struct i965_render_state *render_state = &i965->render_state;
2864     unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
2865     unsigned int num_samples = 0;
2866
2867     if (IS_HASWELL(i965->intel.device_info)) {
2868         max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
2869         num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
2870     }
2871
2872     BEGIN_BATCH(batch, 3);
2873     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
2874     OUT_BATCH(batch,
2875               GEN7_WM_DISPATCH_ENABLE |
2876               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2877     OUT_BATCH(batch, 0);
2878     ADVANCE_BATCH(batch);
2879
2880     BEGIN_BATCH(batch, 7);
2881     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
2882     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
2883     OUT_BATCH(batch, 0);
2884     OUT_RELOC(batch, 
2885               render_state->curbe.bo,
2886               I915_GEM_DOMAIN_INSTRUCTION, 0,
2887               0);
2888     OUT_BATCH(batch, 0);
2889     OUT_BATCH(batch, 0);
2890     OUT_BATCH(batch, 0);
2891     ADVANCE_BATCH(batch);
2892
2893     BEGIN_BATCH(batch, 8);
2894     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
2895     OUT_RELOC(batch, 
2896               render_state->render_kernels[kernel].bo,
2897               I915_GEM_DOMAIN_INSTRUCTION, 0,
2898               0);
2899     OUT_BATCH(batch, 
2900               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
2901               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2902     OUT_BATCH(batch, 0); /* scratch space base offset */
2903     OUT_BATCH(batch, 
2904               ((i965->intel.device_info->max_wm_threads - 1) << max_threads_shift) | num_samples |
2905               GEN7_PS_PUSH_CONSTANT_ENABLE |
2906               GEN7_PS_ATTRIBUTE_ENABLE |
2907               GEN7_PS_16_DISPATCH_ENABLE);
2908     OUT_BATCH(batch, 
2909               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
2910     OUT_BATCH(batch, 0); /* kernel 1 pointer */
2911     OUT_BATCH(batch, 0); /* kernel 2 pointer */
2912     ADVANCE_BATCH(batch);
2913 }
2914
2915 static void
2916 gen7_emit_vertex_element_state(VADriverContextP ctx)
2917 {
2918     struct i965_driver_data *i965 = i965_driver_data(ctx);
2919     struct intel_batchbuffer *batch = i965->batch;
2920
2921     /* Set up our vertex elements, sourced from the single vertex buffer. */
2922     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2923     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2924     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2925               GEN6_VE0_VALID |
2926               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2927               (0 << VE0_OFFSET_SHIFT));
2928     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2929               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2930               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2931               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2932     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2933     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2934               GEN6_VE0_VALID |
2935               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2936               (8 << VE0_OFFSET_SHIFT));
2937     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2938               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2939               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2940               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2941 }
2942
2943 static void
2944 gen7_emit_vertices(VADriverContextP ctx)
2945 {
2946     struct i965_driver_data *i965 = i965_driver_data(ctx);
2947     struct intel_batchbuffer *batch = i965->batch;
2948     struct i965_render_state *render_state = &i965->render_state;
2949
2950     BEGIN_BATCH(batch, 5);
2951     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
2952     OUT_BATCH(batch, 
2953               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2954               GEN6_VB0_VERTEXDATA |
2955               GEN7_VB0_ADDRESS_MODIFYENABLE |
2956               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2957     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2958     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2959     OUT_BATCH(batch, 0);
2960     ADVANCE_BATCH(batch);
2961
2962     BEGIN_BATCH(batch, 7);
2963     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
2964     OUT_BATCH(batch,
2965               _3DPRIM_RECTLIST |
2966               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
2967     OUT_BATCH(batch, 3); /* vertex count per instance */
2968     OUT_BATCH(batch, 0); /* start vertex offset */
2969     OUT_BATCH(batch, 1); /* single instance */
2970     OUT_BATCH(batch, 0); /* start instance location */
2971     OUT_BATCH(batch, 0);
2972     ADVANCE_BATCH(batch);
2973 }
2974
2975 static void
2976 gen7_render_emit_states(VADriverContextP ctx, int kernel)
2977 {
2978     struct i965_driver_data *i965 = i965_driver_data(ctx);
2979     struct intel_batchbuffer *batch = i965->batch;
2980
2981     intel_batchbuffer_start_atomic(batch, 0x1000);
2982     intel_batchbuffer_emit_mi_flush(batch);
2983     gen7_emit_invarient_states(ctx);
2984     gen7_emit_state_base_address(ctx);
2985     gen7_emit_viewport_state_pointers(ctx);
2986     gen7_emit_urb(ctx);
2987     gen7_emit_cc_state_pointers(ctx);
2988     gen7_emit_sampler_state_pointers(ctx);
2989     gen7_emit_bypass_state(ctx);
2990     gen7_emit_vs_state(ctx);
2991     gen7_emit_clip_state(ctx);
2992     gen7_emit_sf_state(ctx);
2993     gen7_emit_wm_state(ctx, kernel);
2994     gen7_emit_binding_table(ctx);
2995     gen7_emit_depth_buffer_state(ctx);
2996     gen7_emit_drawing_rectangle(ctx);
2997     gen7_emit_vertex_element_state(ctx);
2998     gen7_emit_vertices(ctx);
2999     intel_batchbuffer_end_atomic(batch);
3000 }
3001
3002
3003 static void
3004 gen7_render_put_surface(
3005     VADriverContextP   ctx,
3006     struct object_surface *obj_surface,    
3007     const VARectangle *src_rect,
3008     const VARectangle *dst_rect,
3009     unsigned int       flags
3010 )
3011 {
3012     struct i965_driver_data *i965 = i965_driver_data(ctx);
3013     struct intel_batchbuffer *batch = i965->batch;
3014
3015     gen7_render_initialize(ctx);
3016     gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
3017     i965_clear_dest_region(ctx);
3018     gen7_render_emit_states(ctx, PS_KERNEL);
3019     intel_batchbuffer_flush(batch);
3020 }
3021
3022
3023 static void
3024 gen7_subpicture_render_blend_state(VADriverContextP ctx)
3025 {
3026     struct i965_driver_data *i965 = i965_driver_data(ctx);
3027     struct i965_render_state *render_state = &i965->render_state;
3028     struct gen6_blend_state *blend_state;
3029
3030     dri_bo_unmap(render_state->cc.state);    
3031     dri_bo_map(render_state->cc.blend, 1);
3032     assert(render_state->cc.blend->virtual);
3033     blend_state = render_state->cc.blend->virtual;
3034     memset(blend_state, 0, sizeof(*blend_state));
3035     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
3036     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
3037     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
3038     blend_state->blend0.blend_enable = 1;
3039     blend_state->blend1.post_blend_clamp_enable = 1;
3040     blend_state->blend1.pre_blend_clamp_enable = 1;
3041     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
3042     dri_bo_unmap(render_state->cc.blend);
3043 }
3044
3045 static void
3046 gen7_subpicture_render_setup_states(
3047     VADriverContextP   ctx,
3048     struct object_surface *obj_surface,
3049     const VARectangle *src_rect,
3050     const VARectangle *dst_rect
3051 )
3052 {
3053     i965_render_dest_surface_state(ctx, 0);
3054     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
3055     i965_render_sampler(ctx);
3056     i965_render_cc_viewport(ctx);
3057     gen7_render_color_calc_state(ctx);
3058     gen7_subpicture_render_blend_state(ctx);
3059     gen7_render_depth_stencil_state(ctx);
3060     i965_subpic_render_upload_constants(ctx, obj_surface);
3061     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
3062 }
3063
3064 static void
3065 gen7_render_put_subpicture(
3066     VADriverContextP   ctx,
3067     struct object_surface *obj_surface,
3068     const VARectangle *src_rect,
3069     const VARectangle *dst_rect
3070 )
3071 {
3072     struct i965_driver_data *i965 = i965_driver_data(ctx);
3073     struct intel_batchbuffer *batch = i965->batch;
3074     unsigned int index = obj_surface->subpic_render_idx;
3075     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
3076
3077     assert(obj_subpic);
3078     gen7_render_initialize(ctx);
3079     gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
3080     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
3081     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
3082     intel_batchbuffer_flush(batch);
3083 }
3084
3085
3086 void
3087 intel_render_put_surface(
3088     VADriverContextP   ctx,
3089     struct object_surface *obj_surface,
3090     const VARectangle *src_rect,
3091     const VARectangle *dst_rect,
3092     unsigned int       flags
3093 )
3094 {
3095     struct i965_driver_data *i965 = i965_driver_data(ctx);
3096     struct i965_render_state *render_state = &i965->render_state;
3097     int has_done_scaling = 0;
3098     VASurfaceID out_surface_id = i965_post_processing(ctx,
3099                                                       obj_surface,
3100                                                       src_rect,
3101                                                       dst_rect,
3102                                                       flags,
3103                                                       &has_done_scaling);
3104
3105     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
3106
3107     if (out_surface_id != VA_INVALID_ID) {
3108         struct object_surface *new_obj_surface = SURFACE(out_surface_id);
3109         
3110         if (new_obj_surface && new_obj_surface->bo)
3111             obj_surface = new_obj_surface;
3112
3113         if (has_done_scaling)
3114             src_rect = dst_rect;
3115     }
3116
3117     render_state->render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
3118
3119     if (out_surface_id != VA_INVALID_ID)
3120         i965_DestroySurfaces(ctx, &out_surface_id, 1);
3121 }
3122
3123 void
3124 intel_render_put_subpicture(
3125     VADriverContextP   ctx,
3126     struct object_surface *obj_surface,
3127     const VARectangle *src_rect,
3128     const VARectangle *dst_rect
3129 )
3130 {
3131     struct i965_driver_data *i965 = i965_driver_data(ctx);
3132     struct i965_render_state *render_state = &i965->render_state;
3133
3134     render_state->render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
3135 }
3136
3137 static void
3138 genx_render_terminate(VADriverContextP ctx)
3139 {
3140     int i;
3141     struct i965_driver_data *i965 = i965_driver_data(ctx);
3142     struct i965_render_state *render_state = &i965->render_state;
3143
3144     dri_bo_unreference(render_state->curbe.bo);
3145     render_state->curbe.bo = NULL;
3146
3147     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3148         struct i965_kernel *kernel = &render_state->render_kernels[i];
3149
3150         dri_bo_unreference(kernel->bo);
3151         kernel->bo = NULL;
3152     }
3153
3154     dri_bo_unreference(render_state->vb.vertex_buffer);
3155     render_state->vb.vertex_buffer = NULL;
3156     dri_bo_unreference(render_state->vs.state);
3157     render_state->vs.state = NULL;
3158     dri_bo_unreference(render_state->sf.state);
3159     render_state->sf.state = NULL;
3160     dri_bo_unreference(render_state->wm.sampler);
3161     render_state->wm.sampler = NULL;
3162     dri_bo_unreference(render_state->wm.state);
3163     render_state->wm.state = NULL;
3164     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
3165     dri_bo_unreference(render_state->cc.viewport);
3166     render_state->cc.viewport = NULL;
3167     dri_bo_unreference(render_state->cc.state);
3168     render_state->cc.state = NULL;
3169     dri_bo_unreference(render_state->cc.blend);
3170     render_state->cc.blend = NULL;
3171     dri_bo_unreference(render_state->cc.depth_stencil);
3172     render_state->cc.depth_stencil = NULL;
3173
3174     if (render_state->draw_region) {
3175         dri_bo_unreference(render_state->draw_region->bo);
3176         free(render_state->draw_region);
3177         render_state->draw_region = NULL;
3178     }
3179 }
3180
3181 bool 
3182 genx_render_init(VADriverContextP ctx)
3183 {
3184     struct i965_driver_data *i965 = i965_driver_data(ctx);
3185     struct i965_render_state *render_state = &i965->render_state;
3186     int i;
3187
3188     /* kernel */
3189     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
3190                                  sizeof(render_kernels_gen5[0])));
3191     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
3192                                  sizeof(render_kernels_gen6[0])));
3193
3194     if (IS_GEN7(i965->intel.device_info)) {
3195         memcpy(render_state->render_kernels,
3196                (IS_HASWELL(i965->intel.device_info) ? render_kernels_gen7_haswell : render_kernels_gen7),
3197                sizeof(render_state->render_kernels));
3198         render_state->render_put_surface = gen7_render_put_surface;
3199         render_state->render_put_subpicture = gen7_render_put_subpicture;
3200     } else if (IS_GEN6(i965->intel.device_info)) {
3201         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
3202         render_state->render_put_surface = gen6_render_put_surface;
3203         render_state->render_put_subpicture = gen6_render_put_subpicture;
3204     } else if (IS_IRONLAKE(i965->intel.device_info)) {
3205         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
3206         render_state->render_put_surface = i965_render_put_surface;
3207         render_state->render_put_subpicture = i965_render_put_subpicture;
3208     } else {
3209         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
3210         render_state->render_put_surface = i965_render_put_surface;
3211         render_state->render_put_subpicture = i965_render_put_subpicture;
3212     }
3213
3214     render_state->render_terminate = genx_render_terminate;
3215
3216     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3217         struct i965_kernel *kernel = &render_state->render_kernels[i];
3218
3219         if (!kernel->size)
3220             continue;
3221
3222         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
3223                                   kernel->name, 
3224                                   kernel->size, 0x1000);
3225         assert(kernel->bo);
3226         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
3227     }
3228
3229     /* constant buffer */
3230     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
3231                       "constant buffer",
3232                       4096, 64);
3233     assert(render_state->curbe.bo);
3234
3235     return true;
3236 }
3237
3238 bool
3239 i965_render_init(VADriverContextP ctx)
3240 {
3241     struct i965_driver_data *i965 = i965_driver_data(ctx);
3242
3243     return i965->codec_info->render_init(ctx);
3244 }
3245
3246 void
3247 i965_render_terminate(VADriverContextP ctx)
3248 {
3249     struct i965_driver_data *i965 = i965_driver_data(ctx);
3250     struct i965_render_state *render_state = &i965->render_state;
3251
3252     render_state->render_terminate(ctx);
3253 }