53237e4624844041e2f137de205547face28a81b
[platform/upstream/libva-intel-driver.git] / src / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38
39 #include <va/va_drmcommon.h>
40
41 #include "intel_batchbuffer.h"
42 #include "intel_driver.h"
43 #include "i965_defines.h"
44 #include "i965_drv_video.h"
45 #include "i965_structs.h"
46
47 #include "i965_render.h"
48
49 #define SF_KERNEL_NUM_GRF       16
50 #define SF_MAX_THREADS          1
51
52 static const uint32_t sf_kernel_static[][4] = 
53 {
54 #include "shaders/render/exa_sf.g4b"
55 };
56
57 #define PS_KERNEL_NUM_GRF       32
58 #define PS_MAX_THREADS          32
59
60 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
61
62 static const uint32_t ps_kernel_static[][4] = 
63 {
64 #include "shaders/render/exa_wm_xy.g4b"
65 #include "shaders/render/exa_wm_src_affine.g4b"
66 #include "shaders/render/exa_wm_src_sample_planar.g4b"
67 #include "shaders/render/exa_wm_yuv_rgb.g4b"
68 #include "shaders/render/exa_wm_write.g4b"
69 };
70 static const uint32_t ps_subpic_kernel_static[][4] = 
71 {
72 #include "shaders/render/exa_wm_xy.g4b"
73 #include "shaders/render/exa_wm_src_affine.g4b"
74 #include "shaders/render/exa_wm_src_sample_argb.g4b"
75 #include "shaders/render/exa_wm_write.g4b"
76 };
77
78 /* On IRONLAKE */
79 static const uint32_t sf_kernel_static_gen5[][4] = 
80 {
81 #include "shaders/render/exa_sf.g4b.gen5"
82 };
83
84 static const uint32_t ps_kernel_static_gen5[][4] = 
85 {
86 #include "shaders/render/exa_wm_xy.g4b.gen5"
87 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
88 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
89 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
90 #include "shaders/render/exa_wm_write.g4b.gen5"
91 };
92 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
93 {
94 #include "shaders/render/exa_wm_xy.g4b.gen5"
95 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
96 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
97 #include "shaders/render/exa_wm_write.g4b.gen5"
98 };
99
100 /* programs for Sandybridge */
101 static const uint32_t sf_kernel_static_gen6[][4] = 
102 {
103 };
104
105 static const uint32_t ps_kernel_static_gen6[][4] = {
106 #include "shaders/render/exa_wm_src_affine.g6b"
107 #include "shaders/render/exa_wm_src_sample_planar.g6b"
108 #include "shaders/render/exa_wm_yuv_rgb.g6b"
109 #include "shaders/render/exa_wm_write.g6b"
110 };
111
112 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
113 #include "shaders/render/exa_wm_src_affine.g6b"
114 #include "shaders/render/exa_wm_src_sample_argb.g6b"
115 #include "shaders/render/exa_wm_write.g6b"
116 };
117
118 /* programs for Ivybridge */
119 static const uint32_t sf_kernel_static_gen7[][4] = 
120 {
121 };
122
123 static const uint32_t ps_kernel_static_gen7[][4] = {
124 #include "shaders/render/exa_wm_src_affine.g7b"
125 #include "shaders/render/exa_wm_src_sample_planar.g7b"
126 #include "shaders/render/exa_wm_yuv_rgb.g7b"
127 #include "shaders/render/exa_wm_write.g7b"
128 };
129
130 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
131 #include "shaders/render/exa_wm_src_affine.g7b"
132 #include "shaders/render/exa_wm_src_sample_argb.g7b"
133 #include "shaders/render/exa_wm_write.g7b"
134 };
135
136 /* Programs for Haswell */
137 static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
138 #include "shaders/render/exa_wm_src_affine.g7b"
139 #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
140 #include "shaders/render/exa_wm_yuv_rgb.g7b"
141 #include "shaders/render/exa_wm_write.g7b"
142 };
143
144 #define SURFACE_STATE_PADDED_SIZE_I965  ALIGN(sizeof(struct i965_surface_state), 32)
145 #define SURFACE_STATE_PADDED_SIZE_GEN7  ALIGN(sizeof(struct gen7_surface_state), 32)
146 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
147 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
148 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
149
150 static uint32_t float_to_uint (float f) 
151 {
152     union {
153         uint32_t i; 
154         float f;
155     } x;
156
157     x.f = f;
158     return x.i;
159 }
160
161 enum 
162 {
163     SF_KERNEL = 0,
164     PS_KERNEL,
165     PS_SUBPIC_KERNEL
166 };
167
168 static struct i965_kernel render_kernels_gen4[] = {
169     {
170         "SF",
171         SF_KERNEL,
172         sf_kernel_static,
173         sizeof(sf_kernel_static),
174         NULL
175     },
176     {
177         "PS",
178         PS_KERNEL,
179         ps_kernel_static,
180         sizeof(ps_kernel_static),
181         NULL
182     },
183
184     {
185         "PS_SUBPIC",
186         PS_SUBPIC_KERNEL,
187         ps_subpic_kernel_static,
188         sizeof(ps_subpic_kernel_static),
189         NULL
190     }
191 };
192
193 static struct i965_kernel render_kernels_gen5[] = {
194     {
195         "SF",
196         SF_KERNEL,
197         sf_kernel_static_gen5,
198         sizeof(sf_kernel_static_gen5),
199         NULL
200     },
201     {
202         "PS",
203         PS_KERNEL,
204         ps_kernel_static_gen5,
205         sizeof(ps_kernel_static_gen5),
206         NULL
207     },
208
209     {
210         "PS_SUBPIC",
211         PS_SUBPIC_KERNEL,
212         ps_subpic_kernel_static_gen5,
213         sizeof(ps_subpic_kernel_static_gen5),
214         NULL
215     }
216 };
217
218 static struct i965_kernel render_kernels_gen6[] = {
219     {
220         "SF",
221         SF_KERNEL,
222         sf_kernel_static_gen6,
223         sizeof(sf_kernel_static_gen6),
224         NULL
225     },
226     {
227         "PS",
228         PS_KERNEL,
229         ps_kernel_static_gen6,
230         sizeof(ps_kernel_static_gen6),
231         NULL
232     },
233
234     {
235         "PS_SUBPIC",
236         PS_SUBPIC_KERNEL,
237         ps_subpic_kernel_static_gen6,
238         sizeof(ps_subpic_kernel_static_gen6),
239         NULL
240     }
241 };
242
243 static struct i965_kernel render_kernels_gen7[] = {
244     {
245         "SF",
246         SF_KERNEL,
247         sf_kernel_static_gen7,
248         sizeof(sf_kernel_static_gen7),
249         NULL
250     },
251     {
252         "PS",
253         PS_KERNEL,
254         ps_kernel_static_gen7,
255         sizeof(ps_kernel_static_gen7),
256         NULL
257     },
258
259     {
260         "PS_SUBPIC",
261         PS_SUBPIC_KERNEL,
262         ps_subpic_kernel_static_gen7,
263         sizeof(ps_subpic_kernel_static_gen7),
264         NULL
265     }
266 };
267
268 static struct i965_kernel render_kernels_gen7_haswell[] = {
269     {
270         "SF",
271         SF_KERNEL,
272         sf_kernel_static_gen7,
273         sizeof(sf_kernel_static_gen7),
274         NULL
275     },
276     {
277         "PS",
278         PS_KERNEL,
279         ps_kernel_static_gen7_haswell,
280         sizeof(ps_kernel_static_gen7_haswell),
281         NULL
282     },
283
284     {
285         "PS_SUBPIC",
286         PS_SUBPIC_KERNEL,
287         ps_subpic_kernel_static_gen7,
288         sizeof(ps_subpic_kernel_static_gen7),
289         NULL
290     }
291 };
292
293 #define URB_VS_ENTRIES        8
294 #define URB_VS_ENTRY_SIZE     1
295
296 #define URB_GS_ENTRIES        0
297 #define URB_GS_ENTRY_SIZE     0
298
299 #define URB_CLIP_ENTRIES      0
300 #define URB_CLIP_ENTRY_SIZE   0
301
302 #define URB_SF_ENTRIES        1
303 #define URB_SF_ENTRY_SIZE     2
304
305 #define URB_CS_ENTRIES        1
306 #define URB_CS_ENTRY_SIZE     1
307
308 static void
309 i965_render_vs_unit(VADriverContextP ctx)
310 {
311     struct i965_driver_data *i965 = i965_driver_data(ctx);
312     struct i965_render_state *render_state = &i965->render_state;
313     struct i965_vs_unit_state *vs_state;
314
315     dri_bo_map(render_state->vs.state, 1);
316     assert(render_state->vs.state->virtual);
317     vs_state = render_state->vs.state->virtual;
318     memset(vs_state, 0, sizeof(*vs_state));
319
320     if (IS_IRONLAKE(i965->intel.device_id))
321         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
322     else
323         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
324
325     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
326     vs_state->vs6.vs_enable = 0;
327     vs_state->vs6.vert_cache_disable = 1;
328     
329     dri_bo_unmap(render_state->vs.state);
330 }
331
332 static void
333 i965_render_sf_unit(VADriverContextP ctx)
334 {
335     struct i965_driver_data *i965 = i965_driver_data(ctx);
336     struct i965_render_state *render_state = &i965->render_state;
337     struct i965_sf_unit_state *sf_state;
338
339     dri_bo_map(render_state->sf.state, 1);
340     assert(render_state->sf.state->virtual);
341     sf_state = render_state->sf.state->virtual;
342     memset(sf_state, 0, sizeof(*sf_state));
343
344     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
345     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
346
347     sf_state->sf1.single_program_flow = 1; /* XXX */
348     sf_state->sf1.binding_table_entry_count = 0;
349     sf_state->sf1.thread_priority = 0;
350     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
351     sf_state->sf1.illegal_op_exception_enable = 1;
352     sf_state->sf1.mask_stack_exception_enable = 1;
353     sf_state->sf1.sw_exception_enable = 1;
354
355     /* scratch space is not used in our kernel */
356     sf_state->thread2.per_thread_scratch_space = 0;
357     sf_state->thread2.scratch_space_base_pointer = 0;
358
359     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
360     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
361     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
362     sf_state->thread3.urb_entry_read_offset = 0;
363     sf_state->thread3.dispatch_grf_start_reg = 3;
364
365     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
366     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
367     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
368     sf_state->thread4.stats_enable = 1;
369
370     sf_state->sf5.viewport_transform = 0; /* skip viewport */
371
372     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
373     sf_state->sf6.scissor = 0;
374
375     sf_state->sf7.trifan_pv = 2;
376
377     sf_state->sf6.dest_org_vbias = 0x8;
378     sf_state->sf6.dest_org_hbias = 0x8;
379
380     dri_bo_emit_reloc(render_state->sf.state,
381                       I915_GEM_DOMAIN_INSTRUCTION, 0,
382                       sf_state->thread0.grf_reg_count << 1,
383                       offsetof(struct i965_sf_unit_state, thread0),
384                       render_state->render_kernels[SF_KERNEL].bo);
385
386     dri_bo_unmap(render_state->sf.state);
387 }
388
389 static void 
390 i965_render_sampler(VADriverContextP ctx)
391 {
392     struct i965_driver_data *i965 = i965_driver_data(ctx);
393     struct i965_render_state *render_state = &i965->render_state;
394     struct i965_sampler_state *sampler_state;
395     int i;
396     
397     assert(render_state->wm.sampler_count > 0);
398     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
399
400     dri_bo_map(render_state->wm.sampler, 1);
401     assert(render_state->wm.sampler->virtual);
402     sampler_state = render_state->wm.sampler->virtual;
403     for (i = 0; i < render_state->wm.sampler_count; i++) {
404         memset(sampler_state, 0, sizeof(*sampler_state));
405         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
406         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
407         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
408         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
409         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
410         sampler_state++;
411     }
412
413     dri_bo_unmap(render_state->wm.sampler);
414 }
415 static void
416 i965_subpic_render_wm_unit(VADriverContextP ctx)
417 {
418     struct i965_driver_data *i965 = i965_driver_data(ctx);
419     struct i965_render_state *render_state = &i965->render_state;
420     struct i965_wm_unit_state *wm_state;
421
422     assert(render_state->wm.sampler);
423
424     dri_bo_map(render_state->wm.state, 1);
425     assert(render_state->wm.state->virtual);
426     wm_state = render_state->wm.state->virtual;
427     memset(wm_state, 0, sizeof(*wm_state));
428
429     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
430     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
431
432     wm_state->thread1.single_program_flow = 1; /* XXX */
433
434     if (IS_IRONLAKE(i965->intel.device_id))
435         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
436     else
437         wm_state->thread1.binding_table_entry_count = 7;
438
439     wm_state->thread2.scratch_space_base_pointer = 0;
440     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
441
442     wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
443     wm_state->thread3.const_urb_entry_read_length = 0;
444     wm_state->thread3.const_urb_entry_read_offset = 0;
445     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
446     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
447
448     wm_state->wm4.stats_enable = 0;
449     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
450
451     if (IS_IRONLAKE(i965->intel.device_id)) {
452         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
453         wm_state->wm5.max_threads = 12 * 6 - 1;
454     } else {
455         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
456         wm_state->wm5.max_threads = 10 * 5 - 1;
457     }
458
459     wm_state->wm5.thread_dispatch_enable = 1;
460     wm_state->wm5.enable_16_pix = 1;
461     wm_state->wm5.enable_8_pix = 0;
462     wm_state->wm5.early_depth_test = 1;
463
464     dri_bo_emit_reloc(render_state->wm.state,
465                       I915_GEM_DOMAIN_INSTRUCTION, 0,
466                       wm_state->thread0.grf_reg_count << 1,
467                       offsetof(struct i965_wm_unit_state, thread0),
468                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
469
470     dri_bo_emit_reloc(render_state->wm.state,
471                       I915_GEM_DOMAIN_INSTRUCTION, 0,
472                       wm_state->wm4.sampler_count << 2,
473                       offsetof(struct i965_wm_unit_state, wm4),
474                       render_state->wm.sampler);
475
476     dri_bo_unmap(render_state->wm.state);
477 }
478
479
480 static void
481 i965_render_wm_unit(VADriverContextP ctx)
482 {
483     struct i965_driver_data *i965 = i965_driver_data(ctx);
484     struct i965_render_state *render_state = &i965->render_state;
485     struct i965_wm_unit_state *wm_state;
486
487     assert(render_state->wm.sampler);
488
489     dri_bo_map(render_state->wm.state, 1);
490     assert(render_state->wm.state->virtual);
491     wm_state = render_state->wm.state->virtual;
492     memset(wm_state, 0, sizeof(*wm_state));
493
494     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
495     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
496
497     wm_state->thread1.single_program_flow = 1; /* XXX */
498
499     if (IS_IRONLAKE(i965->intel.device_id))
500         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
501     else
502         wm_state->thread1.binding_table_entry_count = 7;
503
504     wm_state->thread2.scratch_space_base_pointer = 0;
505     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
506
507     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
508     wm_state->thread3.const_urb_entry_read_length = 1;
509     wm_state->thread3.const_urb_entry_read_offset = 0;
510     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
511     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
512
513     wm_state->wm4.stats_enable = 0;
514     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
515
516     if (IS_IRONLAKE(i965->intel.device_id)) {
517         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
518         wm_state->wm5.max_threads = 12 * 6 - 1;
519     } else {
520         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
521         wm_state->wm5.max_threads = 10 * 5 - 1;
522     }
523
524     wm_state->wm5.thread_dispatch_enable = 1;
525     wm_state->wm5.enable_16_pix = 1;
526     wm_state->wm5.enable_8_pix = 0;
527     wm_state->wm5.early_depth_test = 1;
528
529     dri_bo_emit_reloc(render_state->wm.state,
530                       I915_GEM_DOMAIN_INSTRUCTION, 0,
531                       wm_state->thread0.grf_reg_count << 1,
532                       offsetof(struct i965_wm_unit_state, thread0),
533                       render_state->render_kernels[PS_KERNEL].bo);
534
535     dri_bo_emit_reloc(render_state->wm.state,
536                       I915_GEM_DOMAIN_INSTRUCTION, 0,
537                       wm_state->wm4.sampler_count << 2,
538                       offsetof(struct i965_wm_unit_state, wm4),
539                       render_state->wm.sampler);
540
541     dri_bo_unmap(render_state->wm.state);
542 }
543
544 static void 
545 i965_render_cc_viewport(VADriverContextP ctx)
546 {
547     struct i965_driver_data *i965 = i965_driver_data(ctx);
548     struct i965_render_state *render_state = &i965->render_state;
549     struct i965_cc_viewport *cc_viewport;
550
551     dri_bo_map(render_state->cc.viewport, 1);
552     assert(render_state->cc.viewport->virtual);
553     cc_viewport = render_state->cc.viewport->virtual;
554     memset(cc_viewport, 0, sizeof(*cc_viewport));
555     
556     cc_viewport->min_depth = -1.e35;
557     cc_viewport->max_depth = 1.e35;
558
559     dri_bo_unmap(render_state->cc.viewport);
560 }
561
562 static void 
563 i965_subpic_render_cc_unit(VADriverContextP ctx)
564 {
565     struct i965_driver_data *i965 = i965_driver_data(ctx);
566     struct i965_render_state *render_state = &i965->render_state;
567     struct i965_cc_unit_state *cc_state;
568
569     assert(render_state->cc.viewport);
570
571     dri_bo_map(render_state->cc.state, 1);
572     assert(render_state->cc.state->virtual);
573     cc_state = render_state->cc.state->virtual;
574     memset(cc_state, 0, sizeof(*cc_state));
575
576     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
577     cc_state->cc2.depth_test = 0;       /* disable depth test */
578     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
579     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
580     cc_state->cc3.blend_enable = 1;     /* enable color blend */
581     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
582     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
583     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
584     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
585
586     cc_state->cc5.dither_enable = 0;    /* disable dither */
587     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
588     cc_state->cc5.statistics_enable = 1;
589     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
590     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
591     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
592
593     cc_state->cc6.clamp_post_alpha_blend = 0; 
594     cc_state->cc6.clamp_pre_alpha_blend  =0; 
595     
596     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
597     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
598     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
599     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
600    
601     /*alpha test reference*/
602     cc_state->cc7.alpha_ref.f =0.0 ;
603
604
605     dri_bo_emit_reloc(render_state->cc.state,
606                       I915_GEM_DOMAIN_INSTRUCTION, 0,
607                       0,
608                       offsetof(struct i965_cc_unit_state, cc4),
609                       render_state->cc.viewport);
610
611     dri_bo_unmap(render_state->cc.state);
612 }
613
614
615 static void 
616 i965_render_cc_unit(VADriverContextP ctx)
617 {
618     struct i965_driver_data *i965 = i965_driver_data(ctx);
619     struct i965_render_state *render_state = &i965->render_state;
620     struct i965_cc_unit_state *cc_state;
621
622     assert(render_state->cc.viewport);
623
624     dri_bo_map(render_state->cc.state, 1);
625     assert(render_state->cc.state->virtual);
626     cc_state = render_state->cc.state->virtual;
627     memset(cc_state, 0, sizeof(*cc_state));
628
629     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
630     cc_state->cc2.depth_test = 0;       /* disable depth test */
631     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
632     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
633     cc_state->cc3.blend_enable = 0;     /* disable color blend */
634     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
635     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
636
637     cc_state->cc5.dither_enable = 0;    /* disable dither */
638     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
639     cc_state->cc5.statistics_enable = 1;
640     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
641     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
642     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
643
644     dri_bo_emit_reloc(render_state->cc.state,
645                       I915_GEM_DOMAIN_INSTRUCTION, 0,
646                       0,
647                       offsetof(struct i965_cc_unit_state, cc4),
648                       render_state->cc.viewport);
649
650     dri_bo_unmap(render_state->cc.state);
651 }
652
653 static void
654 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
655 {
656     switch (tiling) {
657     case I915_TILING_NONE:
658         ss->ss3.tiled_surface = 0;
659         ss->ss3.tile_walk = 0;
660         break;
661     case I915_TILING_X:
662         ss->ss3.tiled_surface = 1;
663         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
664         break;
665     case I915_TILING_Y:
666         ss->ss3.tiled_surface = 1;
667         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
668         break;
669     }
670 }
671
672 static void
673 i965_render_set_surface_state(
674     struct i965_surface_state *ss,
675     dri_bo                    *bo,
676     unsigned long              offset,
677     unsigned int               width,
678     unsigned int               height,
679     unsigned int               pitch,
680     unsigned int               format,
681     unsigned int               flags
682 )
683 {
684     unsigned int tiling;
685     unsigned int swizzle;
686
687     memset(ss, 0, sizeof(*ss));
688
689     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
690     case I965_PP_FLAG_BOTTOM_FIELD:
691         ss->ss0.vert_line_stride_ofs = 1;
692         /* fall-through */
693     case I965_PP_FLAG_TOP_FIELD:
694         ss->ss0.vert_line_stride = 1;
695         height /= 2;
696         break;
697     }
698
699     ss->ss0.surface_type = I965_SURFACE_2D;
700     ss->ss0.surface_format = format;
701     ss->ss0.color_blend = 1;
702
703     ss->ss1.base_addr = bo->offset + offset;
704
705     ss->ss2.width = width - 1;
706     ss->ss2.height = height - 1;
707
708     ss->ss3.pitch = pitch - 1;
709
710     dri_bo_get_tiling(bo, &tiling, &swizzle);
711     i965_render_set_surface_tiling(ss, tiling);
712 }
713
714 static void
715 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
716 {
717    switch (tiling) {
718    case I915_TILING_NONE:
719       ss->ss0.tiled_surface = 0;
720       ss->ss0.tile_walk = 0;
721       break;
722    case I915_TILING_X:
723       ss->ss0.tiled_surface = 1;
724       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
725       break;
726    case I915_TILING_Y:
727       ss->ss0.tiled_surface = 1;
728       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
729       break;
730    }
731 }
732
733 /* Set "Shader Channel Select" */
734 void
735 gen7_render_set_surface_scs(struct gen7_surface_state *ss)
736 {
737     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
738     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
739     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
740     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
741 }
742
743 static void
744 gen7_render_set_surface_state(
745     struct gen7_surface_state *ss,
746     dri_bo                    *bo,
747     unsigned long              offset,
748     int                        width,
749     int                        height,
750     int                        pitch,
751     int                        format,
752     unsigned int               flags
753 )
754 {
755     unsigned int tiling;
756     unsigned int swizzle;
757
758     memset(ss, 0, sizeof(*ss));
759
760     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
761     case I965_PP_FLAG_BOTTOM_FIELD:
762         ss->ss0.vert_line_stride_ofs = 1;
763         /* fall-through */
764     case I965_PP_FLAG_TOP_FIELD:
765         ss->ss0.vert_line_stride = 1;
766         height /= 2;
767         break;
768     }
769
770     ss->ss0.surface_type = I965_SURFACE_2D;
771     ss->ss0.surface_format = format;
772
773     ss->ss1.base_addr = bo->offset + offset;
774
775     ss->ss2.width = width - 1;
776     ss->ss2.height = height - 1;
777
778     ss->ss3.pitch = pitch - 1;
779
780     dri_bo_get_tiling(bo, &tiling, &swizzle);
781     gen7_render_set_surface_tiling(ss, tiling);
782 }
783
784 static void
785 i965_render_src_surface_state(
786     VADriverContextP ctx, 
787     int              index,
788     dri_bo          *region,
789     unsigned long    offset,
790     int              w,
791     int              h,
792     int              pitch,
793     int              format,
794     unsigned int     flags
795 )
796 {
797     struct i965_driver_data *i965 = i965_driver_data(ctx);  
798     struct i965_render_state *render_state = &i965->render_state;
799     void *ss;
800     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
801
802     assert(index < MAX_RENDER_SURFACES);
803
804     dri_bo_map(ss_bo, 1);
805     assert(ss_bo->virtual);
806     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
807
808     if (IS_GEN7(i965->intel.device_id)) {
809         gen7_render_set_surface_state(ss,
810                                       region, offset,
811                                       w, h,
812                                       pitch, format, flags);
813         if (IS_HASWELL(i965->intel.device_id))
814             gen7_render_set_surface_scs(ss);
815         dri_bo_emit_reloc(ss_bo,
816                           I915_GEM_DOMAIN_SAMPLER, 0,
817                           offset,
818                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
819                           region);
820     } else {
821         i965_render_set_surface_state(ss,
822                                       region, offset,
823                                       w, h,
824                                       pitch, format, flags);
825         dri_bo_emit_reloc(ss_bo,
826                           I915_GEM_DOMAIN_SAMPLER, 0,
827                           offset,
828                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
829                           region);
830     }
831
832     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
833     dri_bo_unmap(ss_bo);
834     render_state->wm.sampler_count++;
835 }
836
837 static void
838 i965_render_src_surfaces_state(
839     VADriverContextP ctx,
840     VASurfaceID      surface,
841     unsigned int     flags
842 )
843 {
844     struct i965_driver_data *i965 = i965_driver_data(ctx);  
845     struct object_surface *obj_surface;
846     int region_pitch;
847     int rw, rh;
848     dri_bo *region;
849
850     obj_surface = SURFACE(surface);
851     assert(obj_surface);
852
853     region_pitch = obj_surface->width;
854     rw = obj_surface->orig_width;
855     rh = obj_surface->orig_height;
856     region = obj_surface->bo;
857
858     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
859     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
860
861     if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) {
862         i965_render_src_surface_state(ctx, 3, region,
863                                       region_pitch * obj_surface->y_cb_offset,
864                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
865                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
866         i965_render_src_surface_state(ctx, 4, region,
867                                       region_pitch * obj_surface->y_cb_offset,
868                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
869                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
870     } else {
871         i965_render_src_surface_state(ctx, 3, region,
872                                       region_pitch * obj_surface->y_cb_offset,
873                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
874                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
875         i965_render_src_surface_state(ctx, 4, region,
876                                       region_pitch * obj_surface->y_cb_offset,
877                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
878                                       I965_SURFACEFORMAT_R8_UNORM, flags);
879         i965_render_src_surface_state(ctx, 5, region,
880                                       region_pitch * obj_surface->y_cr_offset,
881                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
882                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
883         i965_render_src_surface_state(ctx, 6, region,
884                                       region_pitch * obj_surface->y_cr_offset,
885                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
886                                       I965_SURFACEFORMAT_R8_UNORM, flags);
887     }
888 }
889
890 static void
891 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
892                               VASurfaceID surface)
893 {
894     struct i965_driver_data *i965 = i965_driver_data(ctx);  
895     struct object_surface *obj_surface = SURFACE(surface);
896     dri_bo *subpic_region;
897     unsigned int index = obj_surface->subpic_render_idx;
898     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic[index]);
899     struct object_image *obj_image = IMAGE(obj_subpic->image);
900     assert(obj_surface);
901     assert(obj_surface->bo);
902     subpic_region = obj_image->bo;
903     /*subpicture surface*/
904     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
905     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
906 }
907
908 static void
909 i965_render_dest_surface_state(VADriverContextP ctx, int index)
910 {
911     struct i965_driver_data *i965 = i965_driver_data(ctx);  
912     struct i965_render_state *render_state = &i965->render_state;
913     struct intel_region *dest_region = render_state->draw_region;
914     void *ss;
915     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
916     int format;
917     assert(index < MAX_RENDER_SURFACES);
918
919     if (dest_region->cpp == 2) {
920         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
921     } else {
922         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
923     }
924
925     dri_bo_map(ss_bo, 1);
926     assert(ss_bo->virtual);
927     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
928
929     if (IS_GEN7(i965->intel.device_id)) {
930         gen7_render_set_surface_state(ss,
931                                       dest_region->bo, 0,
932                                       dest_region->width, dest_region->height,
933                                       dest_region->pitch, format, 0);
934         if (IS_HASWELL(i965->intel.device_id))
935             gen7_render_set_surface_scs(ss);
936         dri_bo_emit_reloc(ss_bo,
937                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
938                           0,
939                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
940                           dest_region->bo);
941     } else {
942         i965_render_set_surface_state(ss,
943                                       dest_region->bo, 0,
944                                       dest_region->width, dest_region->height,
945                                       dest_region->pitch, format, 0);
946         dri_bo_emit_reloc(ss_bo,
947                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
948                           0,
949                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
950                           dest_region->bo);
951     }
952
953     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
954     dri_bo_unmap(ss_bo);
955 }
956
957 static void
958 i965_fill_vertex_buffer(
959     VADriverContextP ctx,
960     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
961     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
962 )
963 {
964     struct i965_driver_data * const i965 = i965_driver_data(ctx);
965     float vb[12];
966
967     enum { X1, Y1, X2, Y2 };
968
969     static const unsigned int g_rotation_indices[][6] = {
970         [VA_ROTATION_NONE] = { X1, Y1, X2, Y1, X2, Y2 },
971         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
972         [VA_ROTATION_180]  = { X2, Y2, X1, Y2, X1, Y1 },
973         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
974     };
975
976     const unsigned int * const rotation_indices =
977         g_rotation_indices[i965->rotation_attrib->value];
978
979     vb[0]  = tex_coords[X1]; /* top-left corner */
980     vb[1]  = tex_coords[Y1];
981     vb[2]  = vid_coords[rotation_indices[0]];
982     vb[3]  = vid_coords[rotation_indices[1]];
983
984     vb[4]  = tex_coords[X2]; /* top-right corner */
985     vb[5]  = tex_coords[Y1];
986     vb[6]  = vid_coords[rotation_indices[2]];
987     vb[7]  = vid_coords[rotation_indices[3]];
988
989     vb[8]  = tex_coords[X2]; /* bottom-right corner */
990     vb[9]  = tex_coords[Y2];
991     vb[10] = vid_coords[rotation_indices[4]];
992     vb[11] = vid_coords[rotation_indices[5]];
993
994     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
995 }
996
997 static void 
998 i965_subpic_render_upload_vertex(VADriverContextP ctx,
999                                  VASurfaceID surface,
1000                                  const VARectangle *output_rect)
1001 {    
1002     struct i965_driver_data  *i965         = i965_driver_data(ctx);
1003     struct object_surface    *obj_surface  = SURFACE(surface);
1004     unsigned int index = obj_surface->subpic_render_idx;
1005     struct object_subpic     *obj_subpic   = SUBPIC(obj_surface->subpic[index]);
1006     float tex_coords[4], vid_coords[4];
1007     VARectangle dst_rect;
1008
1009     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
1010         dst_rect = obj_subpic->dst_rect;
1011     else {
1012         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
1013         const float sy  = (float)output_rect->height / obj_surface->orig_height;
1014         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
1015         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
1016         dst_rect.width  = sx * obj_subpic->dst_rect.width;
1017         dst_rect.height = sy * obj_subpic->dst_rect.height;
1018     }
1019
1020     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
1021     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
1022     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
1023     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
1024
1025     vid_coords[0] = dst_rect.x;
1026     vid_coords[1] = dst_rect.y;
1027     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
1028     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
1029
1030     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1031 }
1032
1033 static void 
1034 i965_render_upload_vertex(
1035     VADriverContextP   ctx,
1036     VASurfaceID        surface,
1037     const VARectangle *src_rect,
1038     const VARectangle *dst_rect
1039 )
1040 {
1041     struct i965_driver_data *i965 = i965_driver_data(ctx);
1042     struct i965_render_state *render_state = &i965->render_state;
1043     struct intel_region *dest_region = render_state->draw_region;
1044     struct object_surface *obj_surface;
1045     float tex_coords[4], vid_coords[4];
1046     int width, height;
1047
1048     obj_surface = SURFACE(surface);
1049     assert(surface);
1050
1051     width  = obj_surface->orig_width;
1052     height = obj_surface->orig_height;
1053
1054     tex_coords[0] = (float)src_rect->x / width;
1055     tex_coords[1] = (float)src_rect->y / height;
1056     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
1057     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
1058
1059     vid_coords[0] = dest_region->x + dst_rect->x;
1060     vid_coords[1] = dest_region->y + dst_rect->y;
1061     vid_coords[2] = vid_coords[0] + dst_rect->width;
1062     vid_coords[3] = vid_coords[1] + dst_rect->height;
1063
1064     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1065 }
1066
1067 static void
1068 i965_render_upload_constants(VADriverContextP ctx,
1069                              VASurfaceID surface)
1070 {
1071     struct i965_driver_data *i965 = i965_driver_data(ctx);
1072     struct i965_render_state *render_state = &i965->render_state;
1073     unsigned short *constant_buffer;
1074     struct object_surface *obj_surface = SURFACE(surface);
1075
1076     dri_bo_map(render_state->curbe.bo, 1);
1077     assert(render_state->curbe.bo->virtual);
1078     constant_buffer = render_state->curbe.bo->virtual;
1079
1080     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
1081         assert(obj_surface->fourcc == VA_FOURCC('I', 'M', 'C', '1') ||
1082                obj_surface->fourcc == VA_FOURCC('I', 'M', 'C', '3'));
1083         *constant_buffer = 2;
1084     } else {
1085         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'))
1086             *constant_buffer = 1;
1087         else
1088             *constant_buffer = 0;
1089     }
1090
1091     dri_bo_unmap(render_state->curbe.bo);
1092 }
1093
1094 static void
1095 i965_subpic_render_upload_constants(VADriverContextP ctx,
1096                              VASurfaceID surface)
1097 {
1098     struct i965_driver_data *i965 = i965_driver_data(ctx);
1099     struct i965_render_state *render_state = &i965->render_state;
1100     float *constant_buffer;
1101     float global_alpha = 1.0;
1102     struct object_surface *obj_surface = SURFACE(surface);
1103     unsigned int index = obj_surface->subpic_render_idx;
1104
1105     if(obj_surface->subpic[index] != VA_INVALID_ID){
1106         struct object_subpic *obj_subpic= SUBPIC(obj_surface->subpic[index]);
1107         if(obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA){
1108            global_alpha = obj_subpic->global_alpha;
1109         }
1110      }   
1111
1112     dri_bo_map(render_state->curbe.bo, 1);
1113
1114     assert(render_state->curbe.bo->virtual);
1115     constant_buffer = render_state->curbe.bo->virtual;
1116     *constant_buffer = global_alpha;
1117
1118     dri_bo_unmap(render_state->curbe.bo);
1119 }
1120  
1121 static void
1122 i965_surface_render_state_setup(
1123     VADriverContextP   ctx,
1124     VASurfaceID        surface,
1125     const VARectangle *src_rect,
1126     const VARectangle *dst_rect,
1127     unsigned int       flags
1128 )
1129 {
1130     i965_render_vs_unit(ctx);
1131     i965_render_sf_unit(ctx);
1132     i965_render_dest_surface_state(ctx, 0);
1133     i965_render_src_surfaces_state(ctx, surface, flags);
1134     i965_render_sampler(ctx);
1135     i965_render_wm_unit(ctx);
1136     i965_render_cc_viewport(ctx);
1137     i965_render_cc_unit(ctx);
1138     i965_render_upload_vertex(ctx, surface, src_rect, dst_rect);
1139     i965_render_upload_constants(ctx, surface);
1140 }
1141
1142 static void
1143 i965_subpic_render_state_setup(
1144     VADriverContextP   ctx,
1145     VASurfaceID        surface,
1146     const VARectangle *src_rect,
1147     const VARectangle *dst_rect
1148 )
1149 {
1150     i965_render_vs_unit(ctx);
1151     i965_render_sf_unit(ctx);
1152     i965_render_dest_surface_state(ctx, 0);
1153     i965_subpic_render_src_surfaces_state(ctx, surface);
1154     i965_render_sampler(ctx);
1155     i965_subpic_render_wm_unit(ctx);
1156     i965_render_cc_viewport(ctx);
1157     i965_subpic_render_cc_unit(ctx);
1158     i965_subpic_render_upload_constants(ctx, surface);
1159     i965_subpic_render_upload_vertex(ctx, surface, dst_rect);
1160 }
1161
1162
1163 static void
1164 i965_render_pipeline_select(VADriverContextP ctx)
1165 {
1166     struct i965_driver_data *i965 = i965_driver_data(ctx);
1167     struct intel_batchbuffer *batch = i965->batch;
1168  
1169     BEGIN_BATCH(batch, 1);
1170     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1171     ADVANCE_BATCH(batch);
1172 }
1173
1174 static void
1175 i965_render_state_sip(VADriverContextP ctx)
1176 {
1177     struct i965_driver_data *i965 = i965_driver_data(ctx);
1178     struct intel_batchbuffer *batch = i965->batch;
1179
1180     BEGIN_BATCH(batch, 2);
1181     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1182     OUT_BATCH(batch, 0);
1183     ADVANCE_BATCH(batch);
1184 }
1185
1186 static void
1187 i965_render_state_base_address(VADriverContextP ctx)
1188 {
1189     struct i965_driver_data *i965 = i965_driver_data(ctx);
1190     struct intel_batchbuffer *batch = i965->batch;
1191     struct i965_render_state *render_state = &i965->render_state;
1192
1193     if (IS_IRONLAKE(i965->intel.device_id)) {
1194         BEGIN_BATCH(batch, 8);
1195         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1196         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1197         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1198         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1199         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1200         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1201         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1202         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1203         ADVANCE_BATCH(batch);
1204     } else {
1205         BEGIN_BATCH(batch, 6);
1206         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1207         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1208         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1209         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1210         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1211         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1212         ADVANCE_BATCH(batch);
1213     }
1214 }
1215
1216 static void
1217 i965_render_binding_table_pointers(VADriverContextP ctx)
1218 {
1219     struct i965_driver_data *i965 = i965_driver_data(ctx);
1220     struct intel_batchbuffer *batch = i965->batch;
1221
1222     BEGIN_BATCH(batch, 6);
1223     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1224     OUT_BATCH(batch, 0); /* vs */
1225     OUT_BATCH(batch, 0); /* gs */
1226     OUT_BATCH(batch, 0); /* clip */
1227     OUT_BATCH(batch, 0); /* sf */
1228     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1229     ADVANCE_BATCH(batch);
1230 }
1231
1232 static void 
1233 i965_render_constant_color(VADriverContextP ctx)
1234 {
1235     struct i965_driver_data *i965 = i965_driver_data(ctx);
1236     struct intel_batchbuffer *batch = i965->batch;
1237
1238     BEGIN_BATCH(batch, 5);
1239     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1240     OUT_BATCH(batch, float_to_uint(1.0));
1241     OUT_BATCH(batch, float_to_uint(0.0));
1242     OUT_BATCH(batch, float_to_uint(1.0));
1243     OUT_BATCH(batch, float_to_uint(1.0));
1244     ADVANCE_BATCH(batch);
1245 }
1246
1247 static void
1248 i965_render_pipelined_pointers(VADriverContextP ctx)
1249 {
1250     struct i965_driver_data *i965 = i965_driver_data(ctx);
1251     struct intel_batchbuffer *batch = i965->batch;
1252     struct i965_render_state *render_state = &i965->render_state;
1253
1254     BEGIN_BATCH(batch, 7);
1255     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1256     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1257     OUT_BATCH(batch, 0);  /* disable GS */
1258     OUT_BATCH(batch, 0);  /* disable CLIP */
1259     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1260     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1261     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1262     ADVANCE_BATCH(batch);
1263 }
1264
1265 static void
1266 i965_render_urb_layout(VADriverContextP ctx)
1267 {
1268     struct i965_driver_data *i965 = i965_driver_data(ctx);
1269     struct intel_batchbuffer *batch = i965->batch;
1270     int urb_vs_start, urb_vs_size;
1271     int urb_gs_start, urb_gs_size;
1272     int urb_clip_start, urb_clip_size;
1273     int urb_sf_start, urb_sf_size;
1274     int urb_cs_start, urb_cs_size;
1275
1276     urb_vs_start = 0;
1277     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1278     urb_gs_start = urb_vs_start + urb_vs_size;
1279     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1280     urb_clip_start = urb_gs_start + urb_gs_size;
1281     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1282     urb_sf_start = urb_clip_start + urb_clip_size;
1283     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1284     urb_cs_start = urb_sf_start + urb_sf_size;
1285     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1286
1287     BEGIN_BATCH(batch, 3);
1288     OUT_BATCH(batch, 
1289               CMD_URB_FENCE |
1290               UF0_CS_REALLOC |
1291               UF0_SF_REALLOC |
1292               UF0_CLIP_REALLOC |
1293               UF0_GS_REALLOC |
1294               UF0_VS_REALLOC |
1295               1);
1296     OUT_BATCH(batch, 
1297               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1298               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1299               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1300     OUT_BATCH(batch,
1301               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1302               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1303     ADVANCE_BATCH(batch);
1304 }
1305
1306 static void 
1307 i965_render_cs_urb_layout(VADriverContextP ctx)
1308 {
1309     struct i965_driver_data *i965 = i965_driver_data(ctx);
1310     struct intel_batchbuffer *batch = i965->batch;
1311
1312     BEGIN_BATCH(batch, 2);
1313     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1314     OUT_BATCH(batch,
1315               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1316               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1317     ADVANCE_BATCH(batch);
1318 }
1319
1320 static void
1321 i965_render_constant_buffer(VADriverContextP ctx)
1322 {
1323     struct i965_driver_data *i965 = i965_driver_data(ctx);
1324     struct intel_batchbuffer *batch = i965->batch;
1325     struct i965_render_state *render_state = &i965->render_state;
1326
1327     BEGIN_BATCH(batch, 2);
1328     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1329     OUT_RELOC(batch, render_state->curbe.bo,
1330               I915_GEM_DOMAIN_INSTRUCTION, 0,
1331               URB_CS_ENTRY_SIZE - 1);
1332     ADVANCE_BATCH(batch);    
1333 }
1334
1335 static void
1336 i965_render_drawing_rectangle(VADriverContextP ctx)
1337 {
1338     struct i965_driver_data *i965 = i965_driver_data(ctx);
1339     struct intel_batchbuffer *batch = i965->batch;
1340     struct i965_render_state *render_state = &i965->render_state;
1341     struct intel_region *dest_region = render_state->draw_region;
1342
1343     BEGIN_BATCH(batch, 4);
1344     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1345     OUT_BATCH(batch, 0x00000000);
1346     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1347     OUT_BATCH(batch, 0x00000000);         
1348     ADVANCE_BATCH(batch);
1349 }
1350
1351 static void
1352 i965_render_vertex_elements(VADriverContextP ctx)
1353 {
1354     struct i965_driver_data *i965 = i965_driver_data(ctx);
1355     struct intel_batchbuffer *batch = i965->batch;
1356
1357     if (IS_IRONLAKE(i965->intel.device_id)) {
1358         BEGIN_BATCH(batch, 5);
1359         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1360         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1361         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1362                   VE0_VALID |
1363                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1364                   (0 << VE0_OFFSET_SHIFT));
1365         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1366                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1367                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1368                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1369         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1370         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1371                   VE0_VALID |
1372                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1373                   (8 << VE0_OFFSET_SHIFT));
1374         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1375                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1376                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1377                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1378         ADVANCE_BATCH(batch);
1379     } else {
1380         BEGIN_BATCH(batch, 5);
1381         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1382         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1383         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1384                   VE0_VALID |
1385                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1386                   (0 << VE0_OFFSET_SHIFT));
1387         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1388                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1389                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1390                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1391                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1392         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1393         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1394                   VE0_VALID |
1395                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1396                   (8 << VE0_OFFSET_SHIFT));
1397         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1398                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1399                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1400                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1401                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1402         ADVANCE_BATCH(batch);
1403     }
1404 }
1405
1406 static void
1407 i965_render_upload_image_palette(
1408     VADriverContextP ctx,
1409     VAImageID        image_id,
1410     unsigned int     alpha
1411 )
1412 {
1413     struct i965_driver_data *i965 = i965_driver_data(ctx);
1414     struct intel_batchbuffer *batch = i965->batch;
1415     unsigned int i;
1416
1417     struct object_image *obj_image = IMAGE(image_id);
1418     assert(obj_image);
1419
1420     if (obj_image->image.num_palette_entries == 0)
1421         return;
1422
1423     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1424     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1425     /*fill palette*/
1426     //int32_t out[16]; //0-23:color 23-31:alpha
1427     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1428         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1429     ADVANCE_BATCH(batch);
1430 }
1431
1432 static void
1433 i965_render_startup(VADriverContextP ctx)
1434 {
1435     struct i965_driver_data *i965 = i965_driver_data(ctx);
1436     struct intel_batchbuffer *batch = i965->batch;
1437     struct i965_render_state *render_state = &i965->render_state;
1438
1439     BEGIN_BATCH(batch, 11);
1440     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1441     OUT_BATCH(batch, 
1442               (0 << VB0_BUFFER_INDEX_SHIFT) |
1443               VB0_VERTEXDATA |
1444               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1445     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1446
1447     if (IS_IRONLAKE(i965->intel.device_id))
1448         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1449     else
1450         OUT_BATCH(batch, 3);
1451
1452     OUT_BATCH(batch, 0);
1453
1454     OUT_BATCH(batch, 
1455               CMD_3DPRIMITIVE |
1456               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1457               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1458               (0 << 9) |
1459               4);
1460     OUT_BATCH(batch, 3); /* vertex count per instance */
1461     OUT_BATCH(batch, 0); /* start vertex offset */
1462     OUT_BATCH(batch, 1); /* single instance */
1463     OUT_BATCH(batch, 0); /* start instance location */
1464     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1465     ADVANCE_BATCH(batch);
1466 }
1467
1468 static void 
1469 i965_clear_dest_region(VADriverContextP ctx)
1470 {
1471     struct i965_driver_data *i965 = i965_driver_data(ctx);
1472     struct intel_batchbuffer *batch = i965->batch;
1473     struct i965_render_state *render_state = &i965->render_state;
1474     struct intel_region *dest_region = render_state->draw_region;
1475     unsigned int blt_cmd, br13;
1476     int pitch;
1477
1478     blt_cmd = XY_COLOR_BLT_CMD;
1479     br13 = 0xf0 << 16;
1480     pitch = dest_region->pitch;
1481
1482     if (dest_region->cpp == 4) {
1483         br13 |= BR13_8888;
1484         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1485     } else {
1486         assert(dest_region->cpp == 2);
1487         br13 |= BR13_565;
1488     }
1489
1490     if (dest_region->tiling != I915_TILING_NONE) {
1491         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1492         pitch /= 4;
1493     }
1494
1495     br13 |= pitch;
1496
1497     if (IS_GEN6(i965->intel.device_id) ||
1498         IS_GEN7(i965->intel.device_id)) {
1499         intel_batchbuffer_start_atomic_blt(batch, 24);
1500         BEGIN_BLT_BATCH(batch, 6);
1501     } else {
1502         intel_batchbuffer_start_atomic(batch, 24);
1503         BEGIN_BATCH(batch, 6);
1504     }
1505
1506     OUT_BATCH(batch, blt_cmd);
1507     OUT_BATCH(batch, br13);
1508     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1509     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1510               (dest_region->x + dest_region->width));
1511     OUT_RELOC(batch, dest_region->bo, 
1512               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1513               0);
1514     OUT_BATCH(batch, 0x0);
1515     ADVANCE_BATCH(batch);
1516     intel_batchbuffer_end_atomic(batch);
1517 }
1518
1519 static void
1520 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1521 {
1522     struct i965_driver_data *i965 = i965_driver_data(ctx);
1523     struct intel_batchbuffer *batch = i965->batch;
1524
1525     i965_clear_dest_region(ctx);
1526     intel_batchbuffer_start_atomic(batch, 0x1000);
1527     intel_batchbuffer_emit_mi_flush(batch);
1528     i965_render_pipeline_select(ctx);
1529     i965_render_state_sip(ctx);
1530     i965_render_state_base_address(ctx);
1531     i965_render_binding_table_pointers(ctx);
1532     i965_render_constant_color(ctx);
1533     i965_render_pipelined_pointers(ctx);
1534     i965_render_urb_layout(ctx);
1535     i965_render_cs_urb_layout(ctx);
1536     i965_render_constant_buffer(ctx);
1537     i965_render_drawing_rectangle(ctx);
1538     i965_render_vertex_elements(ctx);
1539     i965_render_startup(ctx);
1540     intel_batchbuffer_end_atomic(batch);
1541 }
1542
1543 static void
1544 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1545 {
1546     struct i965_driver_data *i965 = i965_driver_data(ctx);
1547     struct intel_batchbuffer *batch = i965->batch;
1548
1549     intel_batchbuffer_start_atomic(batch, 0x1000);
1550     intel_batchbuffer_emit_mi_flush(batch);
1551     i965_render_pipeline_select(ctx);
1552     i965_render_state_sip(ctx);
1553     i965_render_state_base_address(ctx);
1554     i965_render_binding_table_pointers(ctx);
1555     i965_render_constant_color(ctx);
1556     i965_render_pipelined_pointers(ctx);
1557     i965_render_urb_layout(ctx);
1558     i965_render_cs_urb_layout(ctx);
1559     i965_render_drawing_rectangle(ctx);
1560     i965_render_vertex_elements(ctx);
1561     i965_render_startup(ctx);
1562     intel_batchbuffer_end_atomic(batch);
1563 }
1564
1565
1566 static void 
1567 i965_render_initialize(VADriverContextP ctx)
1568 {
1569     struct i965_driver_data *i965 = i965_driver_data(ctx);
1570     struct i965_render_state *render_state = &i965->render_state;
1571     dri_bo *bo;
1572
1573     /* VERTEX BUFFER */
1574     dri_bo_unreference(render_state->vb.vertex_buffer);
1575     bo = dri_bo_alloc(i965->intel.bufmgr,
1576                       "vertex buffer",
1577                       4096,
1578                       4096);
1579     assert(bo);
1580     render_state->vb.vertex_buffer = bo;
1581
1582     /* VS */
1583     dri_bo_unreference(render_state->vs.state);
1584     bo = dri_bo_alloc(i965->intel.bufmgr,
1585                       "vs state",
1586                       sizeof(struct i965_vs_unit_state),
1587                       64);
1588     assert(bo);
1589     render_state->vs.state = bo;
1590
1591     /* GS */
1592     /* CLIP */
1593     /* SF */
1594     dri_bo_unreference(render_state->sf.state);
1595     bo = dri_bo_alloc(i965->intel.bufmgr,
1596                       "sf state",
1597                       sizeof(struct i965_sf_unit_state),
1598                       64);
1599     assert(bo);
1600     render_state->sf.state = bo;
1601
1602     /* WM */
1603     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1604     bo = dri_bo_alloc(i965->intel.bufmgr,
1605                       "surface state & binding table",
1606                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1607                       4096);
1608     assert(bo);
1609     render_state->wm.surface_state_binding_table_bo = bo;
1610
1611     dri_bo_unreference(render_state->wm.sampler);
1612     bo = dri_bo_alloc(i965->intel.bufmgr,
1613                       "sampler state",
1614                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1615                       64);
1616     assert(bo);
1617     render_state->wm.sampler = bo;
1618     render_state->wm.sampler_count = 0;
1619
1620     dri_bo_unreference(render_state->wm.state);
1621     bo = dri_bo_alloc(i965->intel.bufmgr,
1622                       "wm state",
1623                       sizeof(struct i965_wm_unit_state),
1624                       64);
1625     assert(bo);
1626     render_state->wm.state = bo;
1627
1628     /* COLOR CALCULATOR */
1629     dri_bo_unreference(render_state->cc.state);
1630     bo = dri_bo_alloc(i965->intel.bufmgr,
1631                       "color calc state",
1632                       sizeof(struct i965_cc_unit_state),
1633                       64);
1634     assert(bo);
1635     render_state->cc.state = bo;
1636
1637     dri_bo_unreference(render_state->cc.viewport);
1638     bo = dri_bo_alloc(i965->intel.bufmgr,
1639                       "cc viewport",
1640                       sizeof(struct i965_cc_viewport),
1641                       64);
1642     assert(bo);
1643     render_state->cc.viewport = bo;
1644 }
1645
1646 static void
1647 i965_render_put_surface(
1648     VADriverContextP   ctx,
1649     VASurfaceID        surface,
1650     const VARectangle *src_rect,
1651     const VARectangle *dst_rect,
1652     unsigned int       flags
1653 )
1654 {
1655     struct i965_driver_data *i965 = i965_driver_data(ctx);
1656     struct intel_batchbuffer *batch = i965->batch;
1657
1658     i965_render_initialize(ctx);
1659     i965_surface_render_state_setup(ctx, surface, src_rect, dst_rect, flags);
1660     i965_surface_render_pipeline_setup(ctx);
1661     intel_batchbuffer_flush(batch);
1662 }
1663
1664 static void
1665 i965_render_put_subpicture(
1666     VADriverContextP   ctx,
1667     VASurfaceID        surface,
1668     const VARectangle *src_rect,
1669     const VARectangle *dst_rect
1670 )
1671 {
1672     struct i965_driver_data *i965 = i965_driver_data(ctx);
1673     struct intel_batchbuffer *batch = i965->batch;
1674     struct object_surface *obj_surface = SURFACE(surface);
1675     unsigned int index = obj_surface->subpic_render_idx;
1676     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic[index]);
1677
1678     assert(obj_subpic);
1679
1680     i965_render_initialize(ctx);
1681     i965_subpic_render_state_setup(ctx, surface, src_rect, dst_rect);
1682     i965_subpic_render_pipeline_setup(ctx);
1683     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
1684     intel_batchbuffer_flush(batch);
1685 }
1686
1687 /*
1688  * for GEN6+
1689  */
1690 static void 
1691 gen6_render_initialize(VADriverContextP ctx)
1692 {
1693     struct i965_driver_data *i965 = i965_driver_data(ctx);
1694     struct i965_render_state *render_state = &i965->render_state;
1695     dri_bo *bo;
1696
1697     /* VERTEX BUFFER */
1698     dri_bo_unreference(render_state->vb.vertex_buffer);
1699     bo = dri_bo_alloc(i965->intel.bufmgr,
1700                       "vertex buffer",
1701                       4096,
1702                       4096);
1703     assert(bo);
1704     render_state->vb.vertex_buffer = bo;
1705
1706     /* WM */
1707     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1708     bo = dri_bo_alloc(i965->intel.bufmgr,
1709                       "surface state & binding table",
1710                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1711                       4096);
1712     assert(bo);
1713     render_state->wm.surface_state_binding_table_bo = bo;
1714
1715     dri_bo_unreference(render_state->wm.sampler);
1716     bo = dri_bo_alloc(i965->intel.bufmgr,
1717                       "sampler state",
1718                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1719                       4096);
1720     assert(bo);
1721     render_state->wm.sampler = bo;
1722     render_state->wm.sampler_count = 0;
1723
1724     /* COLOR CALCULATOR */
1725     dri_bo_unreference(render_state->cc.state);
1726     bo = dri_bo_alloc(i965->intel.bufmgr,
1727                       "color calc state",
1728                       sizeof(struct gen6_color_calc_state),
1729                       4096);
1730     assert(bo);
1731     render_state->cc.state = bo;
1732
1733     /* CC VIEWPORT */
1734     dri_bo_unreference(render_state->cc.viewport);
1735     bo = dri_bo_alloc(i965->intel.bufmgr,
1736                       "cc viewport",
1737                       sizeof(struct i965_cc_viewport),
1738                       4096);
1739     assert(bo);
1740     render_state->cc.viewport = bo;
1741
1742     /* BLEND STATE */
1743     dri_bo_unreference(render_state->cc.blend);
1744     bo = dri_bo_alloc(i965->intel.bufmgr,
1745                       "blend state",
1746                       sizeof(struct gen6_blend_state),
1747                       4096);
1748     assert(bo);
1749     render_state->cc.blend = bo;
1750
1751     /* DEPTH & STENCIL STATE */
1752     dri_bo_unreference(render_state->cc.depth_stencil);
1753     bo = dri_bo_alloc(i965->intel.bufmgr,
1754                       "depth & stencil state",
1755                       sizeof(struct gen6_depth_stencil_state),
1756                       4096);
1757     assert(bo);
1758     render_state->cc.depth_stencil = bo;
1759 }
1760
1761 static void
1762 gen6_render_color_calc_state(VADriverContextP ctx)
1763 {
1764     struct i965_driver_data *i965 = i965_driver_data(ctx);
1765     struct i965_render_state *render_state = &i965->render_state;
1766     struct gen6_color_calc_state *color_calc_state;
1767     
1768     dri_bo_map(render_state->cc.state, 1);
1769     assert(render_state->cc.state->virtual);
1770     color_calc_state = render_state->cc.state->virtual;
1771     memset(color_calc_state, 0, sizeof(*color_calc_state));
1772     color_calc_state->constant_r = 1.0;
1773     color_calc_state->constant_g = 0.0;
1774     color_calc_state->constant_b = 1.0;
1775     color_calc_state->constant_a = 1.0;
1776     dri_bo_unmap(render_state->cc.state);
1777 }
1778
1779 static void
1780 gen6_render_blend_state(VADriverContextP ctx)
1781 {
1782     struct i965_driver_data *i965 = i965_driver_data(ctx);
1783     struct i965_render_state *render_state = &i965->render_state;
1784     struct gen6_blend_state *blend_state;
1785     
1786     dri_bo_map(render_state->cc.blend, 1);
1787     assert(render_state->cc.blend->virtual);
1788     blend_state = render_state->cc.blend->virtual;
1789     memset(blend_state, 0, sizeof(*blend_state));
1790     blend_state->blend1.logic_op_enable = 1;
1791     blend_state->blend1.logic_op_func = 0xc;
1792     dri_bo_unmap(render_state->cc.blend);
1793 }
1794
1795 static void
1796 gen6_render_depth_stencil_state(VADriverContextP ctx)
1797 {
1798     struct i965_driver_data *i965 = i965_driver_data(ctx);
1799     struct i965_render_state *render_state = &i965->render_state;
1800     struct gen6_depth_stencil_state *depth_stencil_state;
1801     
1802     dri_bo_map(render_state->cc.depth_stencil, 1);
1803     assert(render_state->cc.depth_stencil->virtual);
1804     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1805     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1806     dri_bo_unmap(render_state->cc.depth_stencil);
1807 }
1808
1809 static void
1810 gen6_render_setup_states(
1811     VADriverContextP   ctx,
1812     VASurfaceID        surface,
1813     const VARectangle *src_rect,
1814     const VARectangle *dst_rect,
1815     unsigned int       flags
1816 )
1817 {
1818     i965_render_dest_surface_state(ctx, 0);
1819     i965_render_src_surfaces_state(ctx, surface, flags);
1820     i965_render_sampler(ctx);
1821     i965_render_cc_viewport(ctx);
1822     gen6_render_color_calc_state(ctx);
1823     gen6_render_blend_state(ctx);
1824     gen6_render_depth_stencil_state(ctx);
1825     i965_render_upload_constants(ctx, surface);
1826     i965_render_upload_vertex(ctx, surface, src_rect, dst_rect);
1827 }
1828
1829 static void
1830 gen6_emit_invarient_states(VADriverContextP ctx)
1831 {
1832     struct i965_driver_data *i965 = i965_driver_data(ctx);
1833     struct intel_batchbuffer *batch = i965->batch;
1834
1835     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1836
1837     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1838     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1839               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1840     OUT_BATCH(batch, 0);
1841
1842     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1843     OUT_BATCH(batch, 1);
1844
1845     /* Set system instruction pointer */
1846     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1847     OUT_BATCH(batch, 0);
1848 }
1849
1850 static void
1851 gen6_emit_state_base_address(VADriverContextP ctx)
1852 {
1853     struct i965_driver_data *i965 = i965_driver_data(ctx);
1854     struct intel_batchbuffer *batch = i965->batch;
1855     struct i965_render_state *render_state = &i965->render_state;
1856
1857     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1858     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
1859     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1860     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1861     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1862     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
1863     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
1864     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1865     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1866     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1867 }
1868
1869 static void
1870 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1871 {
1872     struct i965_driver_data *i965 = i965_driver_data(ctx);
1873     struct intel_batchbuffer *batch = i965->batch;
1874     struct i965_render_state *render_state = &i965->render_state;
1875
1876     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1877               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1878               (4 - 2));
1879     OUT_BATCH(batch, 0);
1880     OUT_BATCH(batch, 0);
1881     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1882 }
1883
1884 static void
1885 gen6_emit_urb(VADriverContextP ctx)
1886 {
1887     struct i965_driver_data *i965 = i965_driver_data(ctx);
1888     struct intel_batchbuffer *batch = i965->batch;
1889
1890     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
1891     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1892               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1893     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1894               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1895 }
1896
1897 static void
1898 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1899 {
1900     struct i965_driver_data *i965 = i965_driver_data(ctx);
1901     struct intel_batchbuffer *batch = i965->batch;
1902     struct i965_render_state *render_state = &i965->render_state;
1903
1904     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1905     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1906     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1907     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1908 }
1909
1910 static void
1911 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1912 {
1913     struct i965_driver_data *i965 = i965_driver_data(ctx);
1914     struct intel_batchbuffer *batch = i965->batch;
1915     struct i965_render_state *render_state = &i965->render_state;
1916
1917     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1918               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1919               (4 - 2));
1920     OUT_BATCH(batch, 0); /* VS */
1921     OUT_BATCH(batch, 0); /* GS */
1922     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1923 }
1924
1925 static void
1926 gen6_emit_binding_table(VADriverContextP ctx)
1927 {
1928     struct i965_driver_data *i965 = i965_driver_data(ctx);
1929     struct intel_batchbuffer *batch = i965->batch;
1930
1931     /* Binding table pointers */
1932     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
1933               GEN6_BINDING_TABLE_MODIFY_PS |
1934               (4 - 2));
1935     OUT_BATCH(batch, 0);                /* vs */
1936     OUT_BATCH(batch, 0);                /* gs */
1937     /* Only the PS uses the binding table */
1938     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1939 }
1940
1941 static void
1942 gen6_emit_depth_buffer_state(VADriverContextP ctx)
1943 {
1944     struct i965_driver_data *i965 = i965_driver_data(ctx);
1945     struct intel_batchbuffer *batch = i965->batch;
1946
1947     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
1948     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
1949               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
1950     OUT_BATCH(batch, 0);
1951     OUT_BATCH(batch, 0);
1952     OUT_BATCH(batch, 0);
1953     OUT_BATCH(batch, 0);
1954     OUT_BATCH(batch, 0);
1955
1956     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
1957     OUT_BATCH(batch, 0);
1958 }
1959
1960 static void
1961 gen6_emit_drawing_rectangle(VADriverContextP ctx)
1962 {
1963     i965_render_drawing_rectangle(ctx);
1964 }
1965
1966 static void 
1967 gen6_emit_vs_state(VADriverContextP ctx)
1968 {
1969     struct i965_driver_data *i965 = i965_driver_data(ctx);
1970     struct intel_batchbuffer *batch = i965->batch;
1971
1972     /* disable VS constant buffer */
1973     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
1974     OUT_BATCH(batch, 0);
1975     OUT_BATCH(batch, 0);
1976     OUT_BATCH(batch, 0);
1977     OUT_BATCH(batch, 0);
1978         
1979     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
1980     OUT_BATCH(batch, 0); /* without VS kernel */
1981     OUT_BATCH(batch, 0);
1982     OUT_BATCH(batch, 0);
1983     OUT_BATCH(batch, 0);
1984     OUT_BATCH(batch, 0); /* pass-through */
1985 }
1986
1987 static void 
1988 gen6_emit_gs_state(VADriverContextP ctx)
1989 {
1990     struct i965_driver_data *i965 = i965_driver_data(ctx);
1991     struct intel_batchbuffer *batch = i965->batch;
1992
1993     /* disable GS constant buffer */
1994     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
1995     OUT_BATCH(batch, 0);
1996     OUT_BATCH(batch, 0);
1997     OUT_BATCH(batch, 0);
1998     OUT_BATCH(batch, 0);
1999         
2000     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2001     OUT_BATCH(batch, 0); /* without GS kernel */
2002     OUT_BATCH(batch, 0);
2003     OUT_BATCH(batch, 0);
2004     OUT_BATCH(batch, 0);
2005     OUT_BATCH(batch, 0);
2006     OUT_BATCH(batch, 0); /* pass-through */
2007 }
2008
2009 static void 
2010 gen6_emit_clip_state(VADriverContextP ctx)
2011 {
2012     struct i965_driver_data *i965 = i965_driver_data(ctx);
2013     struct intel_batchbuffer *batch = i965->batch;
2014
2015     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2016     OUT_BATCH(batch, 0);
2017     OUT_BATCH(batch, 0); /* pass-through */
2018     OUT_BATCH(batch, 0);
2019 }
2020
2021 static void 
2022 gen6_emit_sf_state(VADriverContextP ctx)
2023 {
2024     struct i965_driver_data *i965 = i965_driver_data(ctx);
2025     struct intel_batchbuffer *batch = i965->batch;
2026
2027     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
2028     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
2029               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
2030               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
2031     OUT_BATCH(batch, 0);
2032     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2033     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
2034     OUT_BATCH(batch, 0);
2035     OUT_BATCH(batch, 0);
2036     OUT_BATCH(batch, 0);
2037     OUT_BATCH(batch, 0);
2038     OUT_BATCH(batch, 0); /* DW9 */
2039     OUT_BATCH(batch, 0);
2040     OUT_BATCH(batch, 0);
2041     OUT_BATCH(batch, 0);
2042     OUT_BATCH(batch, 0);
2043     OUT_BATCH(batch, 0); /* DW14 */
2044     OUT_BATCH(batch, 0);
2045     OUT_BATCH(batch, 0);
2046     OUT_BATCH(batch, 0);
2047     OUT_BATCH(batch, 0);
2048     OUT_BATCH(batch, 0); /* DW19 */
2049 }
2050
2051 static void 
2052 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
2053 {
2054     struct i965_driver_data *i965 = i965_driver_data(ctx);
2055     struct intel_batchbuffer *batch = i965->batch;
2056     struct i965_render_state *render_state = &i965->render_state;
2057
2058     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
2059               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
2060               (5 - 2));
2061     OUT_RELOC(batch, 
2062               render_state->curbe.bo,
2063               I915_GEM_DOMAIN_INSTRUCTION, 0,
2064               0);
2065     OUT_BATCH(batch, 0);
2066     OUT_BATCH(batch, 0);
2067     OUT_BATCH(batch, 0);
2068
2069     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
2070     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
2071               I915_GEM_DOMAIN_INSTRUCTION, 0,
2072               0);
2073     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
2074               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2075     OUT_BATCH(batch, 0);
2076     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2077     OUT_BATCH(batch, ((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2078               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2079               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2080     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2081               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2082     OUT_BATCH(batch, 0);
2083     OUT_BATCH(batch, 0);
2084 }
2085
2086 static void
2087 gen6_emit_vertex_element_state(VADriverContextP ctx)
2088 {
2089     struct i965_driver_data *i965 = i965_driver_data(ctx);
2090     struct intel_batchbuffer *batch = i965->batch;
2091
2092     /* Set up our vertex elements, sourced from the single vertex buffer. */
2093     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2094     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2095     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2096               GEN6_VE0_VALID |
2097               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2098               (0 << VE0_OFFSET_SHIFT));
2099     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2100               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2101               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2102               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2103     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2104     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2105               GEN6_VE0_VALID |
2106               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2107               (8 << VE0_OFFSET_SHIFT));
2108     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2109               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2110               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2111               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2112 }
2113
2114 static void
2115 gen6_emit_vertices(VADriverContextP ctx)
2116 {
2117     struct i965_driver_data *i965 = i965_driver_data(ctx);
2118     struct intel_batchbuffer *batch = i965->batch;
2119     struct i965_render_state *render_state = &i965->render_state;
2120
2121     BEGIN_BATCH(batch, 11);
2122     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2123     OUT_BATCH(batch, 
2124               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2125               GEN6_VB0_VERTEXDATA |
2126               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2127     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2128     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2129     OUT_BATCH(batch, 0);
2130
2131     OUT_BATCH(batch, 
2132               CMD_3DPRIMITIVE |
2133               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2134               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2135               (0 << 9) |
2136               4);
2137     OUT_BATCH(batch, 3); /* vertex count per instance */
2138     OUT_BATCH(batch, 0); /* start vertex offset */
2139     OUT_BATCH(batch, 1); /* single instance */
2140     OUT_BATCH(batch, 0); /* start instance location */
2141     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2142     ADVANCE_BATCH(batch);
2143 }
2144
2145 static void
2146 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2147 {
2148     struct i965_driver_data *i965 = i965_driver_data(ctx);
2149     struct intel_batchbuffer *batch = i965->batch;
2150
2151     intel_batchbuffer_start_atomic(batch, 0x1000);
2152     intel_batchbuffer_emit_mi_flush(batch);
2153     gen6_emit_invarient_states(ctx);
2154     gen6_emit_state_base_address(ctx);
2155     gen6_emit_viewport_state_pointers(ctx);
2156     gen6_emit_urb(ctx);
2157     gen6_emit_cc_state_pointers(ctx);
2158     gen6_emit_sampler_state_pointers(ctx);
2159     gen6_emit_vs_state(ctx);
2160     gen6_emit_gs_state(ctx);
2161     gen6_emit_clip_state(ctx);
2162     gen6_emit_sf_state(ctx);
2163     gen6_emit_wm_state(ctx, kernel);
2164     gen6_emit_binding_table(ctx);
2165     gen6_emit_depth_buffer_state(ctx);
2166     gen6_emit_drawing_rectangle(ctx);
2167     gen6_emit_vertex_element_state(ctx);
2168     gen6_emit_vertices(ctx);
2169     intel_batchbuffer_end_atomic(batch);
2170 }
2171
2172 static void
2173 gen6_render_put_surface(
2174     VADriverContextP   ctx,
2175     VASurfaceID        surface,
2176     const VARectangle *src_rect,
2177     const VARectangle *dst_rect,
2178     unsigned int       flags
2179 )
2180 {
2181     struct i965_driver_data *i965 = i965_driver_data(ctx);
2182     struct intel_batchbuffer *batch = i965->batch;
2183
2184     gen6_render_initialize(ctx);
2185     gen6_render_setup_states(ctx, surface, src_rect, dst_rect, flags);
2186     i965_clear_dest_region(ctx);
2187     gen6_render_emit_states(ctx, PS_KERNEL);
2188     intel_batchbuffer_flush(batch);
2189 }
2190
2191 static void
2192 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2193 {
2194     struct i965_driver_data *i965 = i965_driver_data(ctx);
2195     struct i965_render_state *render_state = &i965->render_state;
2196     struct gen6_blend_state *blend_state;
2197
2198     dri_bo_unmap(render_state->cc.state);    
2199     dri_bo_map(render_state->cc.blend, 1);
2200     assert(render_state->cc.blend->virtual);
2201     blend_state = render_state->cc.blend->virtual;
2202     memset(blend_state, 0, sizeof(*blend_state));
2203     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2204     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2205     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2206     blend_state->blend0.blend_enable = 1;
2207     blend_state->blend1.post_blend_clamp_enable = 1;
2208     blend_state->blend1.pre_blend_clamp_enable = 1;
2209     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2210     dri_bo_unmap(render_state->cc.blend);
2211 }
2212
2213 static void
2214 gen6_subpicture_render_setup_states(
2215     VADriverContextP   ctx,
2216     VASurfaceID        surface,
2217     const VARectangle *src_rect,
2218     const VARectangle *dst_rect
2219 )
2220 {
2221     i965_render_dest_surface_state(ctx, 0);
2222     i965_subpic_render_src_surfaces_state(ctx, surface);
2223     i965_render_sampler(ctx);
2224     i965_render_cc_viewport(ctx);
2225     gen6_render_color_calc_state(ctx);
2226     gen6_subpicture_render_blend_state(ctx);
2227     gen6_render_depth_stencil_state(ctx);
2228     i965_subpic_render_upload_constants(ctx, surface);
2229     i965_subpic_render_upload_vertex(ctx, surface, dst_rect);
2230 }
2231
2232 static void
2233 gen6_render_put_subpicture(
2234     VADriverContextP   ctx,
2235     VASurfaceID        surface,
2236     const VARectangle *src_rect,
2237     const VARectangle *dst_rect
2238 )
2239 {
2240     struct i965_driver_data *i965 = i965_driver_data(ctx);
2241     struct intel_batchbuffer *batch = i965->batch;
2242     struct object_surface *obj_surface = SURFACE(surface);
2243     unsigned int index = obj_surface->subpic_render_idx;
2244     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic[index]);
2245
2246     assert(obj_subpic);
2247     gen6_render_initialize(ctx);
2248     gen6_subpicture_render_setup_states(ctx, surface, src_rect, dst_rect);
2249     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2250     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
2251     intel_batchbuffer_flush(batch);
2252 }
2253
2254 /*
2255  * for GEN7
2256  */
2257 static void 
2258 gen7_render_initialize(VADriverContextP ctx)
2259 {
2260     struct i965_driver_data *i965 = i965_driver_data(ctx);
2261     struct i965_render_state *render_state = &i965->render_state;
2262     dri_bo *bo;
2263
2264     /* VERTEX BUFFER */
2265     dri_bo_unreference(render_state->vb.vertex_buffer);
2266     bo = dri_bo_alloc(i965->intel.bufmgr,
2267                       "vertex buffer",
2268                       4096,
2269                       4096);
2270     assert(bo);
2271     render_state->vb.vertex_buffer = bo;
2272
2273     /* WM */
2274     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2275     bo = dri_bo_alloc(i965->intel.bufmgr,
2276                       "surface state & binding table",
2277                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2278                       4096);
2279     assert(bo);
2280     render_state->wm.surface_state_binding_table_bo = bo;
2281
2282     dri_bo_unreference(render_state->wm.sampler);
2283     bo = dri_bo_alloc(i965->intel.bufmgr,
2284                       "sampler state",
2285                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2286                       4096);
2287     assert(bo);
2288     render_state->wm.sampler = bo;
2289     render_state->wm.sampler_count = 0;
2290
2291     /* COLOR CALCULATOR */
2292     dri_bo_unreference(render_state->cc.state);
2293     bo = dri_bo_alloc(i965->intel.bufmgr,
2294                       "color calc state",
2295                       sizeof(struct gen6_color_calc_state),
2296                       4096);
2297     assert(bo);
2298     render_state->cc.state = bo;
2299
2300     /* CC VIEWPORT */
2301     dri_bo_unreference(render_state->cc.viewport);
2302     bo = dri_bo_alloc(i965->intel.bufmgr,
2303                       "cc viewport",
2304                       sizeof(struct i965_cc_viewport),
2305                       4096);
2306     assert(bo);
2307     render_state->cc.viewport = bo;
2308
2309     /* BLEND STATE */
2310     dri_bo_unreference(render_state->cc.blend);
2311     bo = dri_bo_alloc(i965->intel.bufmgr,
2312                       "blend state",
2313                       sizeof(struct gen6_blend_state),
2314                       4096);
2315     assert(bo);
2316     render_state->cc.blend = bo;
2317
2318     /* DEPTH & STENCIL STATE */
2319     dri_bo_unreference(render_state->cc.depth_stencil);
2320     bo = dri_bo_alloc(i965->intel.bufmgr,
2321                       "depth & stencil state",
2322                       sizeof(struct gen6_depth_stencil_state),
2323                       4096);
2324     assert(bo);
2325     render_state->cc.depth_stencil = bo;
2326 }
2327
2328 static void
2329 gen7_render_color_calc_state(VADriverContextP ctx)
2330 {
2331     struct i965_driver_data *i965 = i965_driver_data(ctx);
2332     struct i965_render_state *render_state = &i965->render_state;
2333     struct gen6_color_calc_state *color_calc_state;
2334     
2335     dri_bo_map(render_state->cc.state, 1);
2336     assert(render_state->cc.state->virtual);
2337     color_calc_state = render_state->cc.state->virtual;
2338     memset(color_calc_state, 0, sizeof(*color_calc_state));
2339     color_calc_state->constant_r = 1.0;
2340     color_calc_state->constant_g = 0.0;
2341     color_calc_state->constant_b = 1.0;
2342     color_calc_state->constant_a = 1.0;
2343     dri_bo_unmap(render_state->cc.state);
2344 }
2345
2346 static void
2347 gen7_render_blend_state(VADriverContextP ctx)
2348 {
2349     struct i965_driver_data *i965 = i965_driver_data(ctx);
2350     struct i965_render_state *render_state = &i965->render_state;
2351     struct gen6_blend_state *blend_state;
2352     
2353     dri_bo_map(render_state->cc.blend, 1);
2354     assert(render_state->cc.blend->virtual);
2355     blend_state = render_state->cc.blend->virtual;
2356     memset(blend_state, 0, sizeof(*blend_state));
2357     blend_state->blend1.logic_op_enable = 1;
2358     blend_state->blend1.logic_op_func = 0xc;
2359     blend_state->blend1.pre_blend_clamp_enable = 1;
2360     dri_bo_unmap(render_state->cc.blend);
2361 }
2362
2363 static void
2364 gen7_render_depth_stencil_state(VADriverContextP ctx)
2365 {
2366     struct i965_driver_data *i965 = i965_driver_data(ctx);
2367     struct i965_render_state *render_state = &i965->render_state;
2368     struct gen6_depth_stencil_state *depth_stencil_state;
2369     
2370     dri_bo_map(render_state->cc.depth_stencil, 1);
2371     assert(render_state->cc.depth_stencil->virtual);
2372     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2373     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2374     dri_bo_unmap(render_state->cc.depth_stencil);
2375 }
2376
2377 static void 
2378 gen7_render_sampler(VADriverContextP ctx)
2379 {
2380     struct i965_driver_data *i965 = i965_driver_data(ctx);
2381     struct i965_render_state *render_state = &i965->render_state;
2382     struct gen7_sampler_state *sampler_state;
2383     int i;
2384     
2385     assert(render_state->wm.sampler_count > 0);
2386     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2387
2388     dri_bo_map(render_state->wm.sampler, 1);
2389     assert(render_state->wm.sampler->virtual);
2390     sampler_state = render_state->wm.sampler->virtual;
2391     for (i = 0; i < render_state->wm.sampler_count; i++) {
2392         memset(sampler_state, 0, sizeof(*sampler_state));
2393         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2394         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2395         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2396         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2397         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2398         sampler_state++;
2399     }
2400
2401     dri_bo_unmap(render_state->wm.sampler);
2402 }
2403
2404 static void
2405 gen7_render_setup_states(
2406     VADriverContextP   ctx,
2407     VASurfaceID        surface,
2408     const VARectangle *src_rect,
2409     const VARectangle *dst_rect,
2410     unsigned int       flags
2411 )
2412 {
2413     i965_render_dest_surface_state(ctx, 0);
2414     i965_render_src_surfaces_state(ctx, surface, flags);
2415     gen7_render_sampler(ctx);
2416     i965_render_cc_viewport(ctx);
2417     gen7_render_color_calc_state(ctx);
2418     gen7_render_blend_state(ctx);
2419     gen7_render_depth_stencil_state(ctx);
2420     i965_render_upload_constants(ctx, surface);
2421     i965_render_upload_vertex(ctx, surface, src_rect, dst_rect);
2422 }
2423
2424 static void
2425 gen7_emit_invarient_states(VADriverContextP ctx)
2426 {
2427     struct i965_driver_data *i965 = i965_driver_data(ctx);
2428     struct intel_batchbuffer *batch = i965->batch;
2429
2430     BEGIN_BATCH(batch, 1);
2431     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2432     ADVANCE_BATCH(batch);
2433
2434     BEGIN_BATCH(batch, 4);
2435     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2436     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2437               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2438     OUT_BATCH(batch, 0);
2439     OUT_BATCH(batch, 0);
2440     ADVANCE_BATCH(batch);
2441
2442     BEGIN_BATCH(batch, 2);
2443     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2444     OUT_BATCH(batch, 1);
2445     ADVANCE_BATCH(batch);
2446
2447     /* Set system instruction pointer */
2448     BEGIN_BATCH(batch, 2);
2449     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2450     OUT_BATCH(batch, 0);
2451     ADVANCE_BATCH(batch);
2452 }
2453
2454 static void
2455 gen7_emit_state_base_address(VADriverContextP ctx)
2456 {
2457     struct i965_driver_data *i965 = i965_driver_data(ctx);
2458     struct intel_batchbuffer *batch = i965->batch;
2459     struct i965_render_state *render_state = &i965->render_state;
2460
2461     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2462     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2463     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2464     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2465     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2466     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2467     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2468     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2469     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2470     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2471 }
2472
2473 static void
2474 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
2475 {
2476     struct i965_driver_data *i965 = i965_driver_data(ctx);
2477     struct intel_batchbuffer *batch = i965->batch;
2478     struct i965_render_state *render_state = &i965->render_state;
2479
2480     BEGIN_BATCH(batch, 2);
2481     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
2482     OUT_RELOC(batch,
2483               render_state->cc.viewport,
2484               I915_GEM_DOMAIN_INSTRUCTION, 0,
2485               0);
2486     ADVANCE_BATCH(batch);
2487
2488     BEGIN_BATCH(batch, 2);
2489     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
2490     OUT_BATCH(batch, 0);
2491     ADVANCE_BATCH(batch);
2492 }
2493
2494 /*
2495  * URB layout on GEN7 
2496  * ----------------------------------------
2497  * | PS Push Constants (8KB) | VS entries |
2498  * ----------------------------------------
2499  */
2500 static void
2501 gen7_emit_urb(VADriverContextP ctx)
2502 {
2503     struct i965_driver_data *i965 = i965_driver_data(ctx);
2504     struct intel_batchbuffer *batch = i965->batch;
2505     unsigned int num_urb_entries = 32;
2506
2507     if (IS_HASWELL(i965->intel.device_id))
2508         num_urb_entries = 64;
2509
2510     BEGIN_BATCH(batch, 2);
2511     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
2512     OUT_BATCH(batch, 8); /* in 1KBs */
2513     ADVANCE_BATCH(batch);
2514
2515     BEGIN_BATCH(batch, 2);
2516     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
2517     OUT_BATCH(batch, 
2518               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
2519               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
2520               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2521    ADVANCE_BATCH(batch);
2522
2523    BEGIN_BATCH(batch, 2);
2524    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
2525    OUT_BATCH(batch,
2526              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2527              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2528    ADVANCE_BATCH(batch);
2529
2530    BEGIN_BATCH(batch, 2);
2531    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
2532    OUT_BATCH(batch,
2533              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2534              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2535    ADVANCE_BATCH(batch);
2536
2537    BEGIN_BATCH(batch, 2);
2538    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
2539    OUT_BATCH(batch,
2540              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2541              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2542    ADVANCE_BATCH(batch);
2543 }
2544
2545 static void
2546 gen7_emit_cc_state_pointers(VADriverContextP ctx)
2547 {
2548     struct i965_driver_data *i965 = i965_driver_data(ctx);
2549     struct intel_batchbuffer *batch = i965->batch;
2550     struct i965_render_state *render_state = &i965->render_state;
2551
2552     BEGIN_BATCH(batch, 2);
2553     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
2554     OUT_RELOC(batch,
2555               render_state->cc.state,
2556               I915_GEM_DOMAIN_INSTRUCTION, 0,
2557               1);
2558     ADVANCE_BATCH(batch);
2559
2560     BEGIN_BATCH(batch, 2);
2561     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
2562     OUT_RELOC(batch,
2563               render_state->cc.blend,
2564               I915_GEM_DOMAIN_INSTRUCTION, 0,
2565               1);
2566     ADVANCE_BATCH(batch);
2567
2568     BEGIN_BATCH(batch, 2);
2569     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
2570     OUT_RELOC(batch,
2571               render_state->cc.depth_stencil,
2572               I915_GEM_DOMAIN_INSTRUCTION, 0, 
2573               1);
2574     ADVANCE_BATCH(batch);
2575 }
2576
2577 static void
2578 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
2579 {
2580     struct i965_driver_data *i965 = i965_driver_data(ctx);
2581     struct intel_batchbuffer *batch = i965->batch;
2582     struct i965_render_state *render_state = &i965->render_state;
2583
2584     BEGIN_BATCH(batch, 2);
2585     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
2586     OUT_RELOC(batch,
2587               render_state->wm.sampler,
2588               I915_GEM_DOMAIN_INSTRUCTION, 0,
2589               0);
2590     ADVANCE_BATCH(batch);
2591 }
2592
2593 static void
2594 gen7_emit_binding_table(VADriverContextP ctx)
2595 {
2596     struct i965_driver_data *i965 = i965_driver_data(ctx);
2597     struct intel_batchbuffer *batch = i965->batch;
2598
2599     BEGIN_BATCH(batch, 2);
2600     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
2601     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2602     ADVANCE_BATCH(batch);
2603 }
2604
2605 static void
2606 gen7_emit_depth_buffer_state(VADriverContextP ctx)
2607 {
2608     struct i965_driver_data *i965 = i965_driver_data(ctx);
2609     struct intel_batchbuffer *batch = i965->batch;
2610
2611     BEGIN_BATCH(batch, 7);
2612     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
2613     OUT_BATCH(batch,
2614               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
2615               (I965_SURFACE_NULL << 29));
2616     OUT_BATCH(batch, 0);
2617     OUT_BATCH(batch, 0);
2618     OUT_BATCH(batch, 0);
2619     OUT_BATCH(batch, 0);
2620     OUT_BATCH(batch, 0);
2621     ADVANCE_BATCH(batch);
2622
2623     BEGIN_BATCH(batch, 3);
2624     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
2625     OUT_BATCH(batch, 0);
2626     OUT_BATCH(batch, 0);
2627     ADVANCE_BATCH(batch);
2628 }
2629
2630 static void
2631 gen7_emit_drawing_rectangle(VADriverContextP ctx)
2632 {
2633     i965_render_drawing_rectangle(ctx);
2634 }
2635
2636 static void 
2637 gen7_emit_vs_state(VADriverContextP ctx)
2638 {
2639     struct i965_driver_data *i965 = i965_driver_data(ctx);
2640     struct intel_batchbuffer *batch = i965->batch;
2641
2642     /* disable VS constant buffer */
2643     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
2644     OUT_BATCH(batch, 0);
2645     OUT_BATCH(batch, 0);
2646     OUT_BATCH(batch, 0);
2647     OUT_BATCH(batch, 0);
2648     OUT_BATCH(batch, 0);
2649     OUT_BATCH(batch, 0);
2650         
2651     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2652     OUT_BATCH(batch, 0); /* without VS kernel */
2653     OUT_BATCH(batch, 0);
2654     OUT_BATCH(batch, 0);
2655     OUT_BATCH(batch, 0);
2656     OUT_BATCH(batch, 0); /* pass-through */
2657 }
2658
2659 static void 
2660 gen7_emit_bypass_state(VADriverContextP ctx)
2661 {
2662     struct i965_driver_data *i965 = i965_driver_data(ctx);
2663     struct intel_batchbuffer *batch = i965->batch;
2664
2665     /* bypass GS */
2666     BEGIN_BATCH(batch, 7);
2667     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
2668     OUT_BATCH(batch, 0);
2669     OUT_BATCH(batch, 0);
2670     OUT_BATCH(batch, 0);
2671     OUT_BATCH(batch, 0);
2672     OUT_BATCH(batch, 0);
2673     OUT_BATCH(batch, 0);
2674     ADVANCE_BATCH(batch);
2675
2676     BEGIN_BATCH(batch, 7);      
2677     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2678     OUT_BATCH(batch, 0); /* without GS kernel */
2679     OUT_BATCH(batch, 0);
2680     OUT_BATCH(batch, 0);
2681     OUT_BATCH(batch, 0);
2682     OUT_BATCH(batch, 0);
2683     OUT_BATCH(batch, 0); /* pass-through */
2684     ADVANCE_BATCH(batch);
2685
2686     BEGIN_BATCH(batch, 2);
2687     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
2688     OUT_BATCH(batch, 0);
2689     ADVANCE_BATCH(batch);
2690
2691     /* disable HS */
2692     BEGIN_BATCH(batch, 7);
2693     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
2694     OUT_BATCH(batch, 0);
2695     OUT_BATCH(batch, 0);
2696     OUT_BATCH(batch, 0);
2697     OUT_BATCH(batch, 0);
2698     OUT_BATCH(batch, 0);
2699     OUT_BATCH(batch, 0);
2700     ADVANCE_BATCH(batch);
2701
2702     BEGIN_BATCH(batch, 7);
2703     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
2704     OUT_BATCH(batch, 0);
2705     OUT_BATCH(batch, 0);
2706     OUT_BATCH(batch, 0);
2707     OUT_BATCH(batch, 0);
2708     OUT_BATCH(batch, 0);
2709     OUT_BATCH(batch, 0);
2710     ADVANCE_BATCH(batch);
2711
2712     BEGIN_BATCH(batch, 2);
2713     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
2714     OUT_BATCH(batch, 0);
2715     ADVANCE_BATCH(batch);
2716
2717     /* Disable TE */
2718     BEGIN_BATCH(batch, 4);
2719     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
2720     OUT_BATCH(batch, 0);
2721     OUT_BATCH(batch, 0);
2722     OUT_BATCH(batch, 0);
2723     ADVANCE_BATCH(batch);
2724
2725     /* Disable DS */
2726     BEGIN_BATCH(batch, 7);
2727     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
2728     OUT_BATCH(batch, 0);
2729     OUT_BATCH(batch, 0);
2730     OUT_BATCH(batch, 0);
2731     OUT_BATCH(batch, 0);
2732     OUT_BATCH(batch, 0);
2733     OUT_BATCH(batch, 0);
2734     ADVANCE_BATCH(batch);
2735
2736     BEGIN_BATCH(batch, 6);
2737     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
2738     OUT_BATCH(batch, 0);
2739     OUT_BATCH(batch, 0);
2740     OUT_BATCH(batch, 0);
2741     OUT_BATCH(batch, 0);
2742     OUT_BATCH(batch, 0);
2743     ADVANCE_BATCH(batch);
2744
2745     BEGIN_BATCH(batch, 2);
2746     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
2747     OUT_BATCH(batch, 0);
2748     ADVANCE_BATCH(batch);
2749
2750     /* Disable STREAMOUT */
2751     BEGIN_BATCH(batch, 3);
2752     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
2753     OUT_BATCH(batch, 0);
2754     OUT_BATCH(batch, 0);
2755     ADVANCE_BATCH(batch);
2756 }
2757
2758 static void 
2759 gen7_emit_clip_state(VADriverContextP ctx)
2760 {
2761     struct i965_driver_data *i965 = i965_driver_data(ctx);
2762     struct intel_batchbuffer *batch = i965->batch;
2763
2764     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2765     OUT_BATCH(batch, 0);
2766     OUT_BATCH(batch, 0); /* pass-through */
2767     OUT_BATCH(batch, 0);
2768 }
2769
2770 static void 
2771 gen7_emit_sf_state(VADriverContextP ctx)
2772 {
2773     struct i965_driver_data *i965 = i965_driver_data(ctx);
2774     struct intel_batchbuffer *batch = i965->batch;
2775
2776     BEGIN_BATCH(batch, 14);
2777     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
2778     OUT_BATCH(batch,
2779               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
2780               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
2781               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
2782     OUT_BATCH(batch, 0);
2783     OUT_BATCH(batch, 0);
2784     OUT_BATCH(batch, 0); /* DW4 */
2785     OUT_BATCH(batch, 0);
2786     OUT_BATCH(batch, 0);
2787     OUT_BATCH(batch, 0);
2788     OUT_BATCH(batch, 0);
2789     OUT_BATCH(batch, 0); /* DW9 */
2790     OUT_BATCH(batch, 0);
2791     OUT_BATCH(batch, 0);
2792     OUT_BATCH(batch, 0);
2793     OUT_BATCH(batch, 0);
2794     ADVANCE_BATCH(batch);
2795
2796     BEGIN_BATCH(batch, 7);
2797     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
2798     OUT_BATCH(batch, 0);
2799     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2800     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
2801     OUT_BATCH(batch, 0);
2802     OUT_BATCH(batch, 0);
2803     OUT_BATCH(batch, 0);
2804     ADVANCE_BATCH(batch);
2805 }
2806
2807 static void 
2808 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
2809 {
2810     struct i965_driver_data *i965 = i965_driver_data(ctx);
2811     struct intel_batchbuffer *batch = i965->batch;
2812     struct i965_render_state *render_state = &i965->render_state;
2813     unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
2814     unsigned int num_samples = 0;
2815
2816     if (IS_HASWELL(i965->intel.device_id)) {
2817         max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
2818         num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
2819     }
2820
2821     BEGIN_BATCH(batch, 3);
2822     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
2823     OUT_BATCH(batch,
2824               GEN7_WM_DISPATCH_ENABLE |
2825               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2826     OUT_BATCH(batch, 0);
2827     ADVANCE_BATCH(batch);
2828
2829     BEGIN_BATCH(batch, 7);
2830     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
2831     OUT_BATCH(batch, 1);
2832     OUT_BATCH(batch, 0);
2833     OUT_RELOC(batch, 
2834               render_state->curbe.bo,
2835               I915_GEM_DOMAIN_INSTRUCTION, 0,
2836               0);
2837     OUT_BATCH(batch, 0);
2838     OUT_BATCH(batch, 0);
2839     OUT_BATCH(batch, 0);
2840     ADVANCE_BATCH(batch);
2841
2842     BEGIN_BATCH(batch, 8);
2843     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
2844     OUT_RELOC(batch, 
2845               render_state->render_kernels[kernel].bo,
2846               I915_GEM_DOMAIN_INSTRUCTION, 0,
2847               0);
2848     OUT_BATCH(batch, 
2849               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
2850               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2851     OUT_BATCH(batch, 0); /* scratch space base offset */
2852     OUT_BATCH(batch, 
2853               ((86 - 1) << max_threads_shift) | num_samples |
2854               GEN7_PS_PUSH_CONSTANT_ENABLE |
2855               GEN7_PS_ATTRIBUTE_ENABLE |
2856               GEN7_PS_16_DISPATCH_ENABLE);
2857     OUT_BATCH(batch, 
2858               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
2859     OUT_BATCH(batch, 0); /* kernel 1 pointer */
2860     OUT_BATCH(batch, 0); /* kernel 2 pointer */
2861     ADVANCE_BATCH(batch);
2862 }
2863
2864 static void
2865 gen7_emit_vertex_element_state(VADriverContextP ctx)
2866 {
2867     struct i965_driver_data *i965 = i965_driver_data(ctx);
2868     struct intel_batchbuffer *batch = i965->batch;
2869
2870     /* Set up our vertex elements, sourced from the single vertex buffer. */
2871     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2872     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2873     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2874               GEN6_VE0_VALID |
2875               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2876               (0 << VE0_OFFSET_SHIFT));
2877     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2878               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2879               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2880               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2881     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2882     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2883               GEN6_VE0_VALID |
2884               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2885               (8 << VE0_OFFSET_SHIFT));
2886     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2887               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2888               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2889               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2890 }
2891
2892 static void
2893 gen7_emit_vertices(VADriverContextP ctx)
2894 {
2895     struct i965_driver_data *i965 = i965_driver_data(ctx);
2896     struct intel_batchbuffer *batch = i965->batch;
2897     struct i965_render_state *render_state = &i965->render_state;
2898
2899     BEGIN_BATCH(batch, 5);
2900     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
2901     OUT_BATCH(batch, 
2902               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2903               GEN6_VB0_VERTEXDATA |
2904               GEN7_VB0_ADDRESS_MODIFYENABLE |
2905               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2906     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2907     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2908     OUT_BATCH(batch, 0);
2909     ADVANCE_BATCH(batch);
2910
2911     BEGIN_BATCH(batch, 7);
2912     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
2913     OUT_BATCH(batch,
2914               _3DPRIM_RECTLIST |
2915               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
2916     OUT_BATCH(batch, 3); /* vertex count per instance */
2917     OUT_BATCH(batch, 0); /* start vertex offset */
2918     OUT_BATCH(batch, 1); /* single instance */
2919     OUT_BATCH(batch, 0); /* start instance location */
2920     OUT_BATCH(batch, 0);
2921     ADVANCE_BATCH(batch);
2922 }
2923
2924 static void
2925 gen7_render_emit_states(VADriverContextP ctx, int kernel)
2926 {
2927     struct i965_driver_data *i965 = i965_driver_data(ctx);
2928     struct intel_batchbuffer *batch = i965->batch;
2929
2930     intel_batchbuffer_start_atomic(batch, 0x1000);
2931     intel_batchbuffer_emit_mi_flush(batch);
2932     gen7_emit_invarient_states(ctx);
2933     gen7_emit_state_base_address(ctx);
2934     gen7_emit_viewport_state_pointers(ctx);
2935     gen7_emit_urb(ctx);
2936     gen7_emit_cc_state_pointers(ctx);
2937     gen7_emit_sampler_state_pointers(ctx);
2938     gen7_emit_bypass_state(ctx);
2939     gen7_emit_vs_state(ctx);
2940     gen7_emit_clip_state(ctx);
2941     gen7_emit_sf_state(ctx);
2942     gen7_emit_wm_state(ctx, kernel);
2943     gen7_emit_binding_table(ctx);
2944     gen7_emit_depth_buffer_state(ctx);
2945     gen7_emit_drawing_rectangle(ctx);
2946     gen7_emit_vertex_element_state(ctx);
2947     gen7_emit_vertices(ctx);
2948     intel_batchbuffer_end_atomic(batch);
2949 }
2950
2951 static void
2952 gen7_render_put_surface(
2953     VADriverContextP   ctx,
2954     VASurfaceID        surface,
2955     const VARectangle *src_rect,
2956     const VARectangle *dst_rect,
2957     unsigned int       flags
2958 )
2959 {
2960     struct i965_driver_data *i965 = i965_driver_data(ctx);
2961     struct intel_batchbuffer *batch = i965->batch;
2962
2963     gen7_render_initialize(ctx);
2964     gen7_render_setup_states(ctx, surface, src_rect, dst_rect, flags);
2965     i965_clear_dest_region(ctx);
2966     gen7_render_emit_states(ctx, PS_KERNEL);
2967     intel_batchbuffer_flush(batch);
2968 }
2969
2970 static void
2971 gen7_subpicture_render_blend_state(VADriverContextP ctx)
2972 {
2973     struct i965_driver_data *i965 = i965_driver_data(ctx);
2974     struct i965_render_state *render_state = &i965->render_state;
2975     struct gen6_blend_state *blend_state;
2976
2977     dri_bo_unmap(render_state->cc.state);    
2978     dri_bo_map(render_state->cc.blend, 1);
2979     assert(render_state->cc.blend->virtual);
2980     blend_state = render_state->cc.blend->virtual;
2981     memset(blend_state, 0, sizeof(*blend_state));
2982     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2983     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2984     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2985     blend_state->blend0.blend_enable = 1;
2986     blend_state->blend1.post_blend_clamp_enable = 1;
2987     blend_state->blend1.pre_blend_clamp_enable = 1;
2988     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2989     dri_bo_unmap(render_state->cc.blend);
2990 }
2991
2992 static void
2993 gen7_subpicture_render_setup_states(
2994     VADriverContextP   ctx,
2995     VASurfaceID        surface,
2996     const VARectangle *src_rect,
2997     const VARectangle *dst_rect
2998 )
2999 {
3000     i965_render_dest_surface_state(ctx, 0);
3001     i965_subpic_render_src_surfaces_state(ctx, surface);
3002     i965_render_sampler(ctx);
3003     i965_render_cc_viewport(ctx);
3004     gen7_render_color_calc_state(ctx);
3005     gen7_subpicture_render_blend_state(ctx);
3006     gen7_render_depth_stencil_state(ctx);
3007     i965_subpic_render_upload_constants(ctx, surface);
3008     i965_subpic_render_upload_vertex(ctx, surface, dst_rect);
3009 }
3010
3011 static void
3012 gen7_render_put_subpicture(
3013     VADriverContextP   ctx,
3014     VASurfaceID        surface,
3015     const VARectangle *src_rect,
3016     const VARectangle *dst_rect
3017 )
3018 {
3019     struct i965_driver_data *i965 = i965_driver_data(ctx);
3020     struct intel_batchbuffer *batch = i965->batch;
3021     struct object_surface *obj_surface = SURFACE(surface);
3022     unsigned int index = obj_surface->subpic_render_idx;
3023     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic[index]);
3024
3025     assert(obj_subpic);
3026     gen7_render_initialize(ctx);
3027     gen7_subpicture_render_setup_states(ctx, surface, src_rect, dst_rect);
3028     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
3029     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
3030     intel_batchbuffer_flush(batch);
3031 }
3032
3033
3034 /*
3035  * global functions
3036  */
3037 VAStatus 
3038 i965_DestroySurfaces(VADriverContextP ctx,
3039                      VASurfaceID *surface_list,
3040                      int num_surfaces);
3041 void
3042 intel_render_put_surface(
3043     VADriverContextP   ctx,
3044     VASurfaceID        surface,
3045     const VARectangle *src_rect,
3046     const VARectangle *dst_rect,
3047     unsigned int       flags
3048 )
3049 {
3050     struct i965_driver_data *i965 = i965_driver_data(ctx);
3051     int has_done_scaling = 0;
3052     VASurfaceID in_surface_id = surface;
3053     VASurfaceID out_surface_id = i965_post_processing(ctx, surface, src_rect, dst_rect, flags, &has_done_scaling);
3054
3055     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
3056
3057     if (out_surface_id != VA_INVALID_ID)
3058         in_surface_id = out_surface_id;
3059
3060     if (IS_GEN7(i965->intel.device_id))
3061         gen7_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags);
3062     else if (IS_GEN6(i965->intel.device_id))
3063         gen6_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags);
3064     else
3065         i965_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags);
3066
3067     if (in_surface_id != surface)
3068         i965_DestroySurfaces(ctx, &in_surface_id, 1);
3069 }
3070
3071 void
3072 intel_render_put_subpicture(
3073     VADriverContextP   ctx,
3074     VASurfaceID        surface,
3075     const VARectangle *src_rect,
3076     const VARectangle *dst_rect
3077 )
3078 {
3079     struct i965_driver_data *i965 = i965_driver_data(ctx);
3080
3081     if (IS_GEN7(i965->intel.device_id))
3082         gen7_render_put_subpicture(ctx, surface, src_rect, dst_rect);
3083     else if (IS_GEN6(i965->intel.device_id))
3084         gen6_render_put_subpicture(ctx, surface, src_rect, dst_rect);
3085     else
3086         i965_render_put_subpicture(ctx, surface, src_rect, dst_rect);
3087 }
3088
3089 Bool 
3090 i965_render_init(VADriverContextP ctx)
3091 {
3092     struct i965_driver_data *i965 = i965_driver_data(ctx);
3093     struct i965_render_state *render_state = &i965->render_state;
3094     int i;
3095
3096     /* kernel */
3097     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
3098                                  sizeof(render_kernels_gen5[0])));
3099     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
3100                                  sizeof(render_kernels_gen6[0])));
3101
3102     if (IS_GEN7(i965->intel.device_id))
3103         memcpy(render_state->render_kernels,
3104                (IS_HASWELL(i965->intel.device_id) ? render_kernels_gen7_haswell : render_kernels_gen7),
3105                sizeof(render_state->render_kernels));
3106     else if (IS_GEN6(i965->intel.device_id))
3107         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
3108     else if (IS_IRONLAKE(i965->intel.device_id))
3109         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
3110     else
3111         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
3112
3113     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3114         struct i965_kernel *kernel = &render_state->render_kernels[i];
3115
3116         if (!kernel->size)
3117             continue;
3118
3119         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
3120                                   kernel->name, 
3121                                   kernel->size, 0x1000);
3122         assert(kernel->bo);
3123         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
3124     }
3125
3126     /* constant buffer */
3127     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
3128                       "constant buffer",
3129                       4096, 64);
3130     assert(render_state->curbe.bo);
3131
3132     return True;
3133 }
3134
3135 Bool 
3136 i965_render_terminate(VADriverContextP ctx)
3137 {
3138     int i;
3139     struct i965_driver_data *i965 = i965_driver_data(ctx);
3140     struct i965_render_state *render_state = &i965->render_state;
3141
3142     dri_bo_unreference(render_state->curbe.bo);
3143     render_state->curbe.bo = NULL;
3144
3145     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3146         struct i965_kernel *kernel = &render_state->render_kernels[i];
3147         
3148         dri_bo_unreference(kernel->bo);
3149         kernel->bo = NULL;
3150     }
3151
3152     dri_bo_unreference(render_state->vb.vertex_buffer);
3153     render_state->vb.vertex_buffer = NULL;
3154     dri_bo_unreference(render_state->vs.state);
3155     render_state->vs.state = NULL;
3156     dri_bo_unreference(render_state->sf.state);
3157     render_state->sf.state = NULL;
3158     dri_bo_unreference(render_state->wm.sampler);
3159     render_state->wm.sampler = NULL;
3160     dri_bo_unreference(render_state->wm.state);
3161     render_state->wm.state = NULL;
3162     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
3163     dri_bo_unreference(render_state->cc.viewport);
3164     render_state->cc.viewport = NULL;
3165     dri_bo_unreference(render_state->cc.state);
3166     render_state->cc.state = NULL;
3167     dri_bo_unreference(render_state->cc.blend);
3168     render_state->cc.blend = NULL;
3169     dri_bo_unreference(render_state->cc.depth_stencil);
3170     render_state->cc.depth_stencil = NULL;
3171
3172     if (render_state->draw_region) {
3173         dri_bo_unreference(render_state->draw_region->bo);
3174         free(render_state->draw_region);
3175         render_state->draw_region = NULL;
3176     }
3177
3178     return True;
3179 }
3180