i965_drv_video: store kernel info in the corresponding context
[platform/upstream/libva.git] / i965_drv_video / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38
39 #include <va/va_backend.h>
40 #include "va/x11/va_dricommon.h"
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44 #include "i965_defines.h"
45 #include "i965_drv_video.h"
46 #include "i965_structs.h"
47
48 #include "i965_render.h"
49
50 #define SF_KERNEL_NUM_GRF       16
51 #define SF_MAX_THREADS          1
52
53 static const uint32_t sf_kernel_static[][4] = 
54 {
55 #include "shaders/render/exa_sf.g4b"
56 };
57
58 #define PS_KERNEL_NUM_GRF       32
59 #define PS_MAX_THREADS          32
60
61 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
62
63 static const uint32_t ps_kernel_static[][4] = 
64 {
65 #include "shaders/render/exa_wm_xy.g4b"
66 #include "shaders/render/exa_wm_src_affine.g4b"
67 #include "shaders/render/exa_wm_src_sample_planar.g4b"
68 #include "shaders/render/exa_wm_yuv_rgb.g4b"
69 #include "shaders/render/exa_wm_write.g4b"
70 };
71 static const uint32_t ps_subpic_kernel_static[][4] = 
72 {
73 #include "shaders/render/exa_wm_xy.g4b"
74 #include "shaders/render/exa_wm_src_affine.g4b"
75 #include "shaders/render/exa_wm_src_sample_argb.g4b"
76 #include "shaders/render/exa_wm_write.g4b"
77 };
78
79 /* On IRONLAKE */
80 static const uint32_t sf_kernel_static_gen5[][4] = 
81 {
82 #include "shaders/render/exa_sf.g4b.gen5"
83 };
84
85 static const uint32_t ps_kernel_static_gen5[][4] = 
86 {
87 #include "shaders/render/exa_wm_xy.g4b.gen5"
88 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
89 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
90 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
91 #include "shaders/render/exa_wm_write.g4b.gen5"
92 };
93 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
94 {
95 #include "shaders/render/exa_wm_xy.g4b.gen5"
96 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
97 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
98 #include "shaders/render/exa_wm_write.g4b.gen5"
99 };
100
101 /* programs for Sandybridge */
102 static const uint32_t sf_kernel_static_gen6[][4] = 
103 {
104 };
105
106 static const uint32_t ps_kernel_static_gen6[][4] = {
107 #include "shaders/render/exa_wm_src_affine.g6b"
108 #include "shaders/render/exa_wm_src_sample_planar.g6b"
109 #include "shaders/render/exa_wm_yuv_rgb.g6b"
110 #include "shaders/render/exa_wm_write.g6b"
111 };
112
113 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
114 #include "shaders/render/exa_wm_src_affine.g6b"
115 #include "shaders/render/exa_wm_src_sample_argb.g6b"
116 #include "shaders/render/exa_wm_write.g6b"
117 };
118
119 #define SURFACE_STATE_PADDED_SIZE       ALIGN(sizeof(struct i965_surface_state), 32)
120 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
121 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
122
123 static uint32_t float_to_uint (float f) 
124 {
125     union {
126         uint32_t i; 
127         float f;
128     } x;
129
130     x.f = f;
131     return x.i;
132 }
133
134 enum 
135 {
136     SF_KERNEL = 0,
137     PS_KERNEL,
138     PS_SUBPIC_KERNEL
139 };
140
141 static struct i965_kernel render_kernels_gen4[] = {
142     {
143         "SF",
144         SF_KERNEL,
145         sf_kernel_static,
146         sizeof(sf_kernel_static),
147         NULL
148     },
149     {
150         "PS",
151         PS_KERNEL,
152         ps_kernel_static,
153         sizeof(ps_kernel_static),
154         NULL
155     },
156
157     {
158         "PS_SUBPIC",
159         PS_SUBPIC_KERNEL,
160         ps_subpic_kernel_static,
161         sizeof(ps_subpic_kernel_static),
162         NULL
163     }
164 };
165
166 static struct i965_kernel render_kernels_gen5[] = {
167     {
168         "SF",
169         SF_KERNEL,
170         sf_kernel_static_gen5,
171         sizeof(sf_kernel_static_gen5),
172         NULL
173     },
174     {
175         "PS",
176         PS_KERNEL,
177         ps_kernel_static_gen5,
178         sizeof(ps_kernel_static_gen5),
179         NULL
180     },
181
182     {
183         "PS_SUBPIC",
184         PS_SUBPIC_KERNEL,
185         ps_subpic_kernel_static_gen5,
186         sizeof(ps_subpic_kernel_static_gen5),
187         NULL
188     }
189 };
190
191 static struct i965_kernel render_kernels_gen6[] = {
192     {
193         "SF",
194         SF_KERNEL,
195         sf_kernel_static_gen6,
196         sizeof(sf_kernel_static_gen6),
197         NULL
198     },
199     {
200         "PS",
201         PS_KERNEL,
202         ps_kernel_static_gen6,
203         sizeof(ps_kernel_static_gen6),
204         NULL
205     },
206
207     {
208         "PS_SUBPIC",
209         PS_SUBPIC_KERNEL,
210         ps_subpic_kernel_static_gen6,
211         sizeof(ps_subpic_kernel_static_gen6),
212         NULL
213     }
214 };
215
216 #define URB_VS_ENTRIES        8
217 #define URB_VS_ENTRY_SIZE     1
218
219 #define URB_GS_ENTRIES        0
220 #define URB_GS_ENTRY_SIZE     0
221
222 #define URB_CLIP_ENTRIES      0
223 #define URB_CLIP_ENTRY_SIZE   0
224
225 #define URB_SF_ENTRIES        1
226 #define URB_SF_ENTRY_SIZE     2
227
228 #define URB_CS_ENTRIES        1
229 #define URB_CS_ENTRY_SIZE     1
230
231 static void
232 i965_render_vs_unit(VADriverContextP ctx)
233 {
234     struct i965_driver_data *i965 = i965_driver_data(ctx);
235     struct i965_render_state *render_state = &i965->render_state;
236     struct i965_vs_unit_state *vs_state;
237
238     dri_bo_map(render_state->vs.state, 1);
239     assert(render_state->vs.state->virtual);
240     vs_state = render_state->vs.state->virtual;
241     memset(vs_state, 0, sizeof(*vs_state));
242
243     if (IS_IRONLAKE(i965->intel.device_id))
244         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
245     else
246         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
247
248     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
249     vs_state->vs6.vs_enable = 0;
250     vs_state->vs6.vert_cache_disable = 1;
251     
252     dri_bo_unmap(render_state->vs.state);
253 }
254
255 static void
256 i965_render_sf_unit(VADriverContextP ctx)
257 {
258     struct i965_driver_data *i965 = i965_driver_data(ctx);
259     struct i965_render_state *render_state = &i965->render_state;
260     struct i965_sf_unit_state *sf_state;
261
262     dri_bo_map(render_state->sf.state, 1);
263     assert(render_state->sf.state->virtual);
264     sf_state = render_state->sf.state->virtual;
265     memset(sf_state, 0, sizeof(*sf_state));
266
267     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
268     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
269
270     sf_state->sf1.single_program_flow = 1; /* XXX */
271     sf_state->sf1.binding_table_entry_count = 0;
272     sf_state->sf1.thread_priority = 0;
273     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
274     sf_state->sf1.illegal_op_exception_enable = 1;
275     sf_state->sf1.mask_stack_exception_enable = 1;
276     sf_state->sf1.sw_exception_enable = 1;
277
278     /* scratch space is not used in our kernel */
279     sf_state->thread2.per_thread_scratch_space = 0;
280     sf_state->thread2.scratch_space_base_pointer = 0;
281
282     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
283     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
284     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
285     sf_state->thread3.urb_entry_read_offset = 0;
286     sf_state->thread3.dispatch_grf_start_reg = 3;
287
288     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
289     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
290     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
291     sf_state->thread4.stats_enable = 1;
292
293     sf_state->sf5.viewport_transform = 0; /* skip viewport */
294
295     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
296     sf_state->sf6.scissor = 0;
297
298     sf_state->sf7.trifan_pv = 2;
299
300     sf_state->sf6.dest_org_vbias = 0x8;
301     sf_state->sf6.dest_org_hbias = 0x8;
302
303     dri_bo_emit_reloc(render_state->sf.state,
304                       I915_GEM_DOMAIN_INSTRUCTION, 0,
305                       sf_state->thread0.grf_reg_count << 1,
306                       offsetof(struct i965_sf_unit_state, thread0),
307                       render_state->render_kernels[SF_KERNEL].bo);
308
309     dri_bo_unmap(render_state->sf.state);
310 }
311
312 static void 
313 i965_render_sampler(VADriverContextP ctx)
314 {
315     struct i965_driver_data *i965 = i965_driver_data(ctx);
316     struct i965_render_state *render_state = &i965->render_state;
317     struct i965_sampler_state *sampler_state;
318     int i;
319     
320     assert(render_state->wm.sampler_count > 0);
321     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
322
323     dri_bo_map(render_state->wm.sampler, 1);
324     assert(render_state->wm.sampler->virtual);
325     sampler_state = render_state->wm.sampler->virtual;
326     for (i = 0; i < render_state->wm.sampler_count; i++) {
327         memset(sampler_state, 0, sizeof(*sampler_state));
328         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
329         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
330         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
331         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
332         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
333         sampler_state++;
334     }
335
336     dri_bo_unmap(render_state->wm.sampler);
337 }
338 static void
339 i965_subpic_render_wm_unit(VADriverContextP ctx)
340 {
341     struct i965_driver_data *i965 = i965_driver_data(ctx);
342     struct i965_render_state *render_state = &i965->render_state;
343     struct i965_wm_unit_state *wm_state;
344
345     assert(render_state->wm.sampler);
346
347     dri_bo_map(render_state->wm.state, 1);
348     assert(render_state->wm.state->virtual);
349     wm_state = render_state->wm.state->virtual;
350     memset(wm_state, 0, sizeof(*wm_state));
351
352     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
353     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
354
355     wm_state->thread1.single_program_flow = 1; /* XXX */
356
357     if (IS_IRONLAKE(i965->intel.device_id))
358         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
359     else
360         wm_state->thread1.binding_table_entry_count = 7;
361
362     wm_state->thread2.scratch_space_base_pointer = 0;
363     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
364
365     wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
366     wm_state->thread3.const_urb_entry_read_length = 0;
367     wm_state->thread3.const_urb_entry_read_offset = 0;
368     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
369     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
370
371     wm_state->wm4.stats_enable = 0;
372     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
373
374     if (IS_IRONLAKE(i965->intel.device_id)) {
375         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
376         wm_state->wm5.max_threads = 12 * 6 - 1;
377     } else {
378         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
379         wm_state->wm5.max_threads = 10 * 5 - 1;
380     }
381
382     wm_state->wm5.thread_dispatch_enable = 1;
383     wm_state->wm5.enable_16_pix = 1;
384     wm_state->wm5.enable_8_pix = 0;
385     wm_state->wm5.early_depth_test = 1;
386
387     dri_bo_emit_reloc(render_state->wm.state,
388                       I915_GEM_DOMAIN_INSTRUCTION, 0,
389                       wm_state->thread0.grf_reg_count << 1,
390                       offsetof(struct i965_wm_unit_state, thread0),
391                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
392
393     dri_bo_emit_reloc(render_state->wm.state,
394                       I915_GEM_DOMAIN_INSTRUCTION, 0,
395                       wm_state->wm4.sampler_count << 2,
396                       offsetof(struct i965_wm_unit_state, wm4),
397                       render_state->wm.sampler);
398
399     dri_bo_unmap(render_state->wm.state);
400 }
401
402
403 static void
404 i965_render_wm_unit(VADriverContextP ctx)
405 {
406     struct i965_driver_data *i965 = i965_driver_data(ctx);
407     struct i965_render_state *render_state = &i965->render_state;
408     struct i965_wm_unit_state *wm_state;
409
410     assert(render_state->wm.sampler);
411
412     dri_bo_map(render_state->wm.state, 1);
413     assert(render_state->wm.state->virtual);
414     wm_state = render_state->wm.state->virtual;
415     memset(wm_state, 0, sizeof(*wm_state));
416
417     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
418     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
419
420     wm_state->thread1.single_program_flow = 1; /* XXX */
421
422     if (IS_IRONLAKE(i965->intel.device_id))
423         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
424     else
425         wm_state->thread1.binding_table_entry_count = 7;
426
427     wm_state->thread2.scratch_space_base_pointer = 0;
428     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
429
430     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
431     wm_state->thread3.const_urb_entry_read_length = 1;
432     wm_state->thread3.const_urb_entry_read_offset = 0;
433     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
434     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
435
436     wm_state->wm4.stats_enable = 0;
437     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
438
439     if (IS_IRONLAKE(i965->intel.device_id)) {
440         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
441         wm_state->wm5.max_threads = 12 * 6 - 1;
442     } else {
443         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
444         wm_state->wm5.max_threads = 10 * 5 - 1;
445     }
446
447     wm_state->wm5.thread_dispatch_enable = 1;
448     wm_state->wm5.enable_16_pix = 1;
449     wm_state->wm5.enable_8_pix = 0;
450     wm_state->wm5.early_depth_test = 1;
451
452     dri_bo_emit_reloc(render_state->wm.state,
453                       I915_GEM_DOMAIN_INSTRUCTION, 0,
454                       wm_state->thread0.grf_reg_count << 1,
455                       offsetof(struct i965_wm_unit_state, thread0),
456                       render_state->render_kernels[PS_KERNEL].bo);
457
458     dri_bo_emit_reloc(render_state->wm.state,
459                       I915_GEM_DOMAIN_INSTRUCTION, 0,
460                       wm_state->wm4.sampler_count << 2,
461                       offsetof(struct i965_wm_unit_state, wm4),
462                       render_state->wm.sampler);
463
464     dri_bo_unmap(render_state->wm.state);
465 }
466
467 static void 
468 i965_render_cc_viewport(VADriverContextP ctx)
469 {
470     struct i965_driver_data *i965 = i965_driver_data(ctx);
471     struct i965_render_state *render_state = &i965->render_state;
472     struct i965_cc_viewport *cc_viewport;
473
474     dri_bo_map(render_state->cc.viewport, 1);
475     assert(render_state->cc.viewport->virtual);
476     cc_viewport = render_state->cc.viewport->virtual;
477     memset(cc_viewport, 0, sizeof(*cc_viewport));
478     
479     cc_viewport->min_depth = -1.e35;
480     cc_viewport->max_depth = 1.e35;
481
482     dri_bo_unmap(render_state->cc.viewport);
483 }
484
485 static void 
486 i965_subpic_render_cc_unit(VADriverContextP ctx)
487 {
488     struct i965_driver_data *i965 = i965_driver_data(ctx);
489     struct i965_render_state *render_state = &i965->render_state;
490     struct i965_cc_unit_state *cc_state;
491
492     assert(render_state->cc.viewport);
493
494     dri_bo_map(render_state->cc.state, 1);
495     assert(render_state->cc.state->virtual);
496     cc_state = render_state->cc.state->virtual;
497     memset(cc_state, 0, sizeof(*cc_state));
498
499     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
500     cc_state->cc2.depth_test = 0;       /* disable depth test */
501     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
502     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
503     cc_state->cc3.blend_enable = 1;     /* enable color blend */
504     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
505     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
506     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
507     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
508
509     cc_state->cc5.dither_enable = 0;    /* disable dither */
510     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
511     cc_state->cc5.statistics_enable = 1;
512     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
513     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
514     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
515
516     cc_state->cc6.clamp_post_alpha_blend = 0; 
517     cc_state->cc6.clamp_pre_alpha_blend  =0; 
518     
519     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
520     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
521     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
522     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
523    
524     /*alpha test reference*/
525     cc_state->cc7.alpha_ref.f =0.0 ;
526
527
528     dri_bo_emit_reloc(render_state->cc.state,
529                       I915_GEM_DOMAIN_INSTRUCTION, 0,
530                       0,
531                       offsetof(struct i965_cc_unit_state, cc4),
532                       render_state->cc.viewport);
533
534     dri_bo_unmap(render_state->cc.state);
535 }
536
537
538 static void 
539 i965_render_cc_unit(VADriverContextP ctx)
540 {
541     struct i965_driver_data *i965 = i965_driver_data(ctx);
542     struct i965_render_state *render_state = &i965->render_state;
543     struct i965_cc_unit_state *cc_state;
544
545     assert(render_state->cc.viewport);
546
547     dri_bo_map(render_state->cc.state, 1);
548     assert(render_state->cc.state->virtual);
549     cc_state = render_state->cc.state->virtual;
550     memset(cc_state, 0, sizeof(*cc_state));
551
552     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
553     cc_state->cc2.depth_test = 0;       /* disable depth test */
554     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
555     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
556     cc_state->cc3.blend_enable = 0;     /* disable color blend */
557     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
558     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
559
560     cc_state->cc5.dither_enable = 0;    /* disable dither */
561     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
562     cc_state->cc5.statistics_enable = 1;
563     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
564     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
565     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
566
567     dri_bo_emit_reloc(render_state->cc.state,
568                       I915_GEM_DOMAIN_INSTRUCTION, 0,
569                       0,
570                       offsetof(struct i965_cc_unit_state, cc4),
571                       render_state->cc.viewport);
572
573     dri_bo_unmap(render_state->cc.state);
574 }
575
576 static void
577 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
578 {
579     switch (tiling) {
580     case I915_TILING_NONE:
581         ss->ss3.tiled_surface = 0;
582         ss->ss3.tile_walk = 0;
583         break;
584     case I915_TILING_X:
585         ss->ss3.tiled_surface = 1;
586         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
587         break;
588     case I915_TILING_Y:
589         ss->ss3.tiled_surface = 1;
590         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
591         break;
592     }
593 }
594
595 static void
596 i965_render_src_surface_state(VADriverContextP ctx, 
597                               int index,
598                               dri_bo *region,
599                               unsigned long offset,
600                               int w, int h,
601                               int pitch, int format)
602 {
603     struct i965_driver_data *i965 = i965_driver_data(ctx);  
604     struct i965_render_state *render_state = &i965->render_state;
605     struct i965_surface_state *ss;
606     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
607     unsigned int tiling;
608     unsigned int swizzle;
609
610     assert(index < MAX_RENDER_SURFACES);
611
612     dri_bo_map(ss_bo, 1);
613     assert(ss_bo->virtual);
614     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
615     memset(ss, 0, sizeof(*ss));
616     ss->ss0.surface_type = I965_SURFACE_2D;
617     ss->ss0.surface_format = format;
618     ss->ss0.writedisable_alpha = 0;
619     ss->ss0.writedisable_red = 0;
620     ss->ss0.writedisable_green = 0;
621     ss->ss0.writedisable_blue = 0;
622     ss->ss0.color_blend = 1;
623     ss->ss0.vert_line_stride = 0;
624     ss->ss0.vert_line_stride_ofs = 0;
625     ss->ss0.mipmap_layout_mode = 0;
626     ss->ss0.render_cache_read_mode = 0;
627
628     ss->ss1.base_addr = region->offset + offset;
629
630     ss->ss2.width = w - 1;
631     ss->ss2.height = h - 1;
632     ss->ss2.mip_count = 0;
633     ss->ss2.render_target_rotation = 0;
634
635     ss->ss3.pitch = pitch - 1;
636
637     dri_bo_get_tiling(region, &tiling, &swizzle);
638     i965_render_set_surface_tiling(ss, tiling);
639
640     dri_bo_emit_reloc(ss_bo,
641                       I915_GEM_DOMAIN_SAMPLER, 0,
642                       offset,
643                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
644                       region);
645
646     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
647     dri_bo_unmap(ss_bo);
648     render_state->wm.sampler_count++;
649 }
650
651 static void
652 i965_render_src_surfaces_state(VADriverContextP ctx,
653                               VASurfaceID surface)
654 {
655     struct i965_driver_data *i965 = i965_driver_data(ctx);  
656     struct i965_render_state *render_state = &i965->render_state;
657     struct object_surface *obj_surface;
658     int w, h;
659     int rw, rh;
660     dri_bo *region;
661
662     obj_surface = SURFACE(surface);
663     assert(obj_surface);
664
665     if (obj_surface->pp_out_bo) {
666         w = obj_surface->pp_out_width;
667         h = obj_surface->pp_out_height;
668         rw = obj_surface->orig_pp_out_width;
669         rh = obj_surface->orig_pp_out_height;
670         region = obj_surface->pp_out_bo;
671     } else {
672         w = obj_surface->width;
673         h = obj_surface->height;
674         rw = obj_surface->orig_width;
675         rh = obj_surface->orig_height;
676         region = obj_surface->bo;
677     }
678
679     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, w, I965_SURFACEFORMAT_R8_UNORM);     /* Y */
680     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, w, I965_SURFACEFORMAT_R8_UNORM);
681
682     if (!render_state->inited) {
683         int u3 = 5, u4 = 6, v5 = 3, v6 = 4;
684
685         i965_render_src_surface_state(ctx, u3, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM); /* U */
686         i965_render_src_surface_state(ctx, u4, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
687         i965_render_src_surface_state(ctx, v5, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);     /* V */
688         i965_render_src_surface_state(ctx, v6, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
689     } else {
690         if (render_state->interleaved_uv) {
691             i965_render_src_surface_state(ctx, 3, region, w * h, rw / 2, rh / 2, w, I965_SURFACEFORMAT_R8G8_UNORM); /* UV */
692             i965_render_src_surface_state(ctx, 4, region, w * h, rw / 2, rh / 2, w, I965_SURFACEFORMAT_R8G8_UNORM);
693         } else {
694             int u3 = 3, u4 = 4, v5 = 5, v6 = 6;
695
696             i965_render_src_surface_state(ctx, u3, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM); /* U */
697             i965_render_src_surface_state(ctx, u4, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
698             i965_render_src_surface_state(ctx, v5, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);     /* V */
699             i965_render_src_surface_state(ctx, v6, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
700         }
701     }
702 }
703
704 static void
705 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
706                               VASurfaceID surface)
707 {
708     struct i965_driver_data *i965 = i965_driver_data(ctx);  
709     struct object_surface *obj_surface = SURFACE(surface);
710     int w, h;
711     dri_bo *region;
712     dri_bo *subpic_region;
713     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
714     struct object_image *obj_image = IMAGE(obj_subpic->image);
715     assert(obj_surface);
716     assert(obj_surface->bo);
717     w = obj_surface->width;
718     h = obj_surface->height;
719     region = obj_surface->bo;
720     subpic_region = obj_image->bo;
721     /*subpicture surface*/
722     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
723     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
724 }
725
726 static void
727 i965_render_dest_surface_state(VADriverContextP ctx, int index)
728 {
729     struct i965_driver_data *i965 = i965_driver_data(ctx);  
730     struct i965_render_state *render_state = &i965->render_state;
731     struct intel_region *dest_region = render_state->draw_region;
732     struct i965_surface_state *ss;
733     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
734
735     assert(index < MAX_RENDER_SURFACES);
736
737     dri_bo_map(ss_bo, 1);
738     assert(ss_bo->virtual);
739     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
740     memset(ss, 0, sizeof(*ss));
741
742     ss->ss0.surface_type = I965_SURFACE_2D;
743     ss->ss0.data_return_format = I965_SURFACERETURNFORMAT_FLOAT32;
744
745     if (dest_region->cpp == 2) {
746         ss->ss0.surface_format = I965_SURFACEFORMAT_B5G6R5_UNORM;
747         } else {
748         ss->ss0.surface_format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
749     }
750
751     ss->ss0.writedisable_alpha = 0;
752     ss->ss0.writedisable_red = 0;
753     ss->ss0.writedisable_green = 0;
754     ss->ss0.writedisable_blue = 0;
755     ss->ss0.color_blend = 1;
756     ss->ss0.vert_line_stride = 0;
757     ss->ss0.vert_line_stride_ofs = 0;
758     ss->ss0.mipmap_layout_mode = 0;
759     ss->ss0.render_cache_read_mode = 0;
760
761     ss->ss1.base_addr = dest_region->bo->offset;
762
763     ss->ss2.width = dest_region->width - 1;
764     ss->ss2.height = dest_region->height - 1;
765     ss->ss2.mip_count = 0;
766     ss->ss2.render_target_rotation = 0;
767     ss->ss3.pitch = dest_region->pitch - 1;
768     i965_render_set_surface_tiling(ss, dest_region->tiling);
769
770     dri_bo_emit_reloc(ss_bo,
771                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
772                       0,
773                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
774                       dest_region->bo);
775
776     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
777     dri_bo_unmap(ss_bo);
778 }
779
780 static void 
781 i965_subpic_render_upload_vertex(VADriverContextP ctx,
782                                  VASurfaceID surface,
783                                  const VARectangle *output_rect)
784 {    
785     struct i965_driver_data  *i965         = i965_driver_data(ctx);
786     struct i965_render_state *render_state = &i965->render_state;
787     struct object_surface    *obj_surface  = SURFACE(surface);
788     struct object_subpic     *obj_subpic   = SUBPIC(obj_surface->subpic);
789
790     const float sx = (float)output_rect->width  / (float)obj_surface->orig_width;
791     const float sy = (float)output_rect->height / (float)obj_surface->orig_height;
792     float *vb, tx1, tx2, ty1, ty2, x1, x2, y1, y2;
793     int i = 0;
794
795     VARectangle dst_rect;
796     dst_rect.x      = output_rect->x + sx * (float)obj_subpic->dst_rect.x;
797     dst_rect.y      = output_rect->y + sx * (float)obj_subpic->dst_rect.y;
798     dst_rect.width  = sx * (float)obj_subpic->dst_rect.width;
799     dst_rect.height = sy * (float)obj_subpic->dst_rect.height;
800
801     dri_bo_map(render_state->vb.vertex_buffer, 1);
802     assert(render_state->vb.vertex_buffer->virtual);
803     vb = render_state->vb.vertex_buffer->virtual;
804
805     tx1 = (float)obj_subpic->src_rect.x / (float)obj_subpic->width;
806     ty1 = (float)obj_subpic->src_rect.y / (float)obj_subpic->height;
807     tx2 = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / (float)obj_subpic->width;
808     ty2 = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / (float)obj_subpic->height;
809
810     x1 = (float)dst_rect.x;
811     y1 = (float)dst_rect.y;
812     x2 = (float)(dst_rect.x + dst_rect.width);
813     y2 = (float)(dst_rect.y + dst_rect.height);
814
815     vb[i++] = tx2;
816     vb[i++] = ty2;
817     vb[i++] = x2;
818     vb[i++] = y2;
819
820     vb[i++] = tx1;
821     vb[i++] = ty2;
822     vb[i++] = x1;
823     vb[i++] = y2;
824
825     vb[i++] = tx1;
826     vb[i++] = ty1;
827     vb[i++] = x1;
828     vb[i++] = y1;
829     dri_bo_unmap(render_state->vb.vertex_buffer);
830 }
831
832 static void 
833 i965_render_upload_vertex(VADriverContextP ctx,
834                           VASurfaceID surface,
835                           short srcx,
836                           short srcy,
837                           unsigned short srcw,
838                           unsigned short srch,
839                           short destx,
840                           short desty,
841                           unsigned short destw,
842                           unsigned short desth)
843 {
844     struct i965_driver_data *i965 = i965_driver_data(ctx);
845     struct i965_render_state *render_state = &i965->render_state;
846     struct intel_region *dest_region = render_state->draw_region;
847     struct object_surface *obj_surface;
848     float *vb;
849
850     float u1, v1, u2, v2;
851     int i, width, height;
852     int box_x1 = dest_region->x + destx;
853     int box_y1 = dest_region->y + desty;
854     int box_x2 = box_x1 + destw;
855     int box_y2 = box_y1 + desth;
856
857     obj_surface = SURFACE(surface);
858     assert(surface);
859     width = obj_surface->orig_width;
860     height = obj_surface->orig_height;
861
862     u1 = (float)srcx / width;
863     v1 = (float)srcy / height;
864     u2 = (float)(srcx + srcw) / width;
865     v2 = (float)(srcy + srch) / height;
866
867     dri_bo_map(render_state->vb.vertex_buffer, 1);
868     assert(render_state->vb.vertex_buffer->virtual);
869     vb = render_state->vb.vertex_buffer->virtual;
870
871     i = 0;
872     vb[i++] = u2;
873     vb[i++] = v2;
874     vb[i++] = (float)box_x2;
875     vb[i++] = (float)box_y2;
876     
877     vb[i++] = u1;
878     vb[i++] = v2;
879     vb[i++] = (float)box_x1;
880     vb[i++] = (float)box_y2;
881
882     vb[i++] = u1;
883     vb[i++] = v1;
884     vb[i++] = (float)box_x1;
885     vb[i++] = (float)box_y1;
886
887     dri_bo_unmap(render_state->vb.vertex_buffer);
888 }
889
890 static void
891 i965_render_upload_constants(VADriverContextP ctx)
892 {
893     struct i965_driver_data *i965 = i965_driver_data(ctx);
894     struct i965_render_state *render_state = &i965->render_state;
895     unsigned short *constant_buffer;
896
897     if (render_state->curbe.upload)
898         return;
899
900     dri_bo_map(render_state->curbe.bo, 1);
901     assert(render_state->curbe.bo->virtual);
902     constant_buffer = render_state->curbe.bo->virtual;
903
904     if (render_state->interleaved_uv)
905         *constant_buffer = 1;
906     else
907         *constant_buffer = 0;
908
909     dri_bo_unmap(render_state->curbe.bo);
910     render_state->curbe.upload = 1;
911 }
912
913 static void
914 i965_surface_render_state_setup(VADriverContextP ctx,
915                         VASurfaceID surface,
916                         short srcx,
917                         short srcy,
918                         unsigned short srcw,
919                         unsigned short srch,
920                         short destx,
921                         short desty,
922                         unsigned short destw,
923                         unsigned short desth)
924 {
925     i965_render_vs_unit(ctx);
926     i965_render_sf_unit(ctx);
927     i965_render_dest_surface_state(ctx, 0);
928     i965_render_src_surfaces_state(ctx, surface);
929     i965_render_sampler(ctx);
930     i965_render_wm_unit(ctx);
931     i965_render_cc_viewport(ctx);
932     i965_render_cc_unit(ctx);
933     i965_render_upload_vertex(ctx, surface,
934                               srcx, srcy, srcw, srch,
935                               destx, desty, destw, desth);
936     i965_render_upload_constants(ctx);
937 }
938 static void
939 i965_subpic_render_state_setup(VADriverContextP ctx,
940                         VASurfaceID surface,
941                         short srcx,
942                         short srcy,
943                         unsigned short srcw,
944                         unsigned short srch,
945                         short destx,
946                         short desty,
947                         unsigned short destw,
948                         unsigned short desth)
949 {
950     i965_render_vs_unit(ctx);
951     i965_render_sf_unit(ctx);
952     i965_render_dest_surface_state(ctx, 0);
953     i965_subpic_render_src_surfaces_state(ctx, surface);
954     i965_render_sampler(ctx);
955     i965_subpic_render_wm_unit(ctx);
956     i965_render_cc_viewport(ctx);
957     i965_subpic_render_cc_unit(ctx);
958
959     VARectangle output_rect;
960     output_rect.x      = destx;
961     output_rect.y      = desty;
962     output_rect.width  = destw;
963     output_rect.height = desth;
964     i965_subpic_render_upload_vertex(ctx, surface, &output_rect);
965 }
966
967
968 static void
969 i965_render_pipeline_select(VADriverContextP ctx)
970 {
971     BEGIN_BATCH(ctx, 1);
972     OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
973     ADVANCE_BATCH(ctx);
974 }
975
976 static void
977 i965_render_state_sip(VADriverContextP ctx)
978 {
979     BEGIN_BATCH(ctx, 2);
980     OUT_BATCH(ctx, CMD_STATE_SIP | 0);
981     OUT_BATCH(ctx, 0);
982     ADVANCE_BATCH(ctx);
983 }
984
985 static void
986 i965_render_state_base_address(VADriverContextP ctx)
987 {
988     struct i965_driver_data *i965 = i965_driver_data(ctx);
989     struct i965_render_state *render_state = &i965->render_state;
990
991     if (IS_IRONLAKE(i965->intel.device_id)) {
992         BEGIN_BATCH(ctx, 8);
993         OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6);
994         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
995         OUT_RELOC(ctx, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
996         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
997         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
998         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
999         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1000         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1001         ADVANCE_BATCH(ctx);
1002     } else {
1003         BEGIN_BATCH(ctx, 6);
1004         OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 4);
1005         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1006         OUT_RELOC(ctx, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1007         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1008         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1009         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1010         ADVANCE_BATCH(ctx);
1011     }
1012 }
1013
1014 static void
1015 i965_render_binding_table_pointers(VADriverContextP ctx)
1016 {
1017     BEGIN_BATCH(ctx, 6);
1018     OUT_BATCH(ctx, CMD_BINDING_TABLE_POINTERS | 4);
1019     OUT_BATCH(ctx, 0); /* vs */
1020     OUT_BATCH(ctx, 0); /* gs */
1021     OUT_BATCH(ctx, 0); /* clip */
1022     OUT_BATCH(ctx, 0); /* sf */
1023     OUT_BATCH(ctx, BINDING_TABLE_OFFSET);
1024     ADVANCE_BATCH(ctx);
1025 }
1026
1027 static void 
1028 i965_render_constant_color(VADriverContextP ctx)
1029 {
1030     BEGIN_BATCH(ctx, 5);
1031     OUT_BATCH(ctx, CMD_CONSTANT_COLOR | 3);
1032     OUT_BATCH(ctx, float_to_uint(1.0));
1033     OUT_BATCH(ctx, float_to_uint(0.0));
1034     OUT_BATCH(ctx, float_to_uint(1.0));
1035     OUT_BATCH(ctx, float_to_uint(1.0));
1036     ADVANCE_BATCH(ctx);
1037 }
1038
1039 static void
1040 i965_render_pipelined_pointers(VADriverContextP ctx)
1041 {
1042     struct i965_driver_data *i965 = i965_driver_data(ctx);
1043     struct i965_render_state *render_state = &i965->render_state;
1044
1045     BEGIN_BATCH(ctx, 7);
1046     OUT_BATCH(ctx, CMD_PIPELINED_POINTERS | 5);
1047     OUT_RELOC(ctx, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1048     OUT_BATCH(ctx, 0);  /* disable GS */
1049     OUT_BATCH(ctx, 0);  /* disable CLIP */
1050     OUT_RELOC(ctx, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1051     OUT_RELOC(ctx, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1052     OUT_RELOC(ctx, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1053     ADVANCE_BATCH(ctx);
1054 }
1055
1056 static void
1057 i965_render_urb_layout(VADriverContextP ctx)
1058 {
1059     int urb_vs_start, urb_vs_size;
1060     int urb_gs_start, urb_gs_size;
1061     int urb_clip_start, urb_clip_size;
1062     int urb_sf_start, urb_sf_size;
1063     int urb_cs_start, urb_cs_size;
1064
1065     urb_vs_start = 0;
1066     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1067     urb_gs_start = urb_vs_start + urb_vs_size;
1068     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1069     urb_clip_start = urb_gs_start + urb_gs_size;
1070     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1071     urb_sf_start = urb_clip_start + urb_clip_size;
1072     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1073     urb_cs_start = urb_sf_start + urb_sf_size;
1074     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1075
1076     BEGIN_BATCH(ctx, 3);
1077     OUT_BATCH(ctx, 
1078               CMD_URB_FENCE |
1079               UF0_CS_REALLOC |
1080               UF0_SF_REALLOC |
1081               UF0_CLIP_REALLOC |
1082               UF0_GS_REALLOC |
1083               UF0_VS_REALLOC |
1084               1);
1085     OUT_BATCH(ctx, 
1086               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1087               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1088               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1089     OUT_BATCH(ctx,
1090               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1091               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1092     ADVANCE_BATCH(ctx);
1093 }
1094
1095 static void 
1096 i965_render_cs_urb_layout(VADriverContextP ctx)
1097 {
1098     BEGIN_BATCH(ctx, 2);
1099     OUT_BATCH(ctx, CMD_CS_URB_STATE | 0);
1100     OUT_BATCH(ctx,
1101               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1102               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1103     ADVANCE_BATCH(ctx);
1104 }
1105
1106 static void
1107 i965_render_constant_buffer(VADriverContextP ctx)
1108 {
1109     struct i965_driver_data *i965 = i965_driver_data(ctx);
1110     struct i965_render_state *render_state = &i965->render_state;
1111
1112     BEGIN_BATCH(ctx, 2);
1113     OUT_BATCH(ctx, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1114     OUT_RELOC(ctx, render_state->curbe.bo,
1115               I915_GEM_DOMAIN_INSTRUCTION, 0,
1116               URB_CS_ENTRY_SIZE - 1);
1117     ADVANCE_BATCH(ctx);    
1118 }
1119
1120 static void
1121 i965_render_drawing_rectangle(VADriverContextP ctx)
1122 {
1123     struct i965_driver_data *i965 = i965_driver_data(ctx);  
1124     struct i965_render_state *render_state = &i965->render_state;
1125     struct intel_region *dest_region = render_state->draw_region;
1126
1127     BEGIN_BATCH(ctx, 4);
1128     OUT_BATCH(ctx, CMD_DRAWING_RECTANGLE | 2);
1129     OUT_BATCH(ctx, 0x00000000);
1130     OUT_BATCH(ctx, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1131     OUT_BATCH(ctx, 0x00000000);         
1132     ADVANCE_BATCH(ctx);
1133 }
1134
1135 static void
1136 i965_render_vertex_elements(VADriverContextP ctx)
1137 {
1138     struct i965_driver_data *i965 = i965_driver_data(ctx);  
1139
1140     if (IS_IRONLAKE(i965->intel.device_id)) {
1141         BEGIN_BATCH(ctx, 5);
1142         OUT_BATCH(ctx, CMD_VERTEX_ELEMENTS | 3);
1143         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1144         OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1145                   VE0_VALID |
1146                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1147                   (0 << VE0_OFFSET_SHIFT));
1148         OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1149                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1150                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1151                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1152         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1153         OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1154                   VE0_VALID |
1155                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1156                   (8 << VE0_OFFSET_SHIFT));
1157         OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1158                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1159                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1160                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1161         ADVANCE_BATCH(ctx);
1162     } else {
1163         BEGIN_BATCH(ctx, 5);
1164         OUT_BATCH(ctx, CMD_VERTEX_ELEMENTS | 3);
1165         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1166         OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1167                   VE0_VALID |
1168                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1169                   (0 << VE0_OFFSET_SHIFT));
1170         OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1171                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1172                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1173                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1174                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1175         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1176         OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1177                   VE0_VALID |
1178                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1179                   (8 << VE0_OFFSET_SHIFT));
1180         OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1181                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1182                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1183                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1184                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1185         ADVANCE_BATCH(ctx);
1186     }
1187 }
1188
1189 static void
1190 i965_render_upload_image_palette(
1191     VADriverContextP ctx,
1192     VAImageID        image_id,
1193     unsigned int     alpha
1194 )
1195 {
1196     struct i965_driver_data *i965 = i965_driver_data(ctx);
1197     unsigned int i;
1198
1199     struct object_image *obj_image = IMAGE(image_id);
1200     assert(obj_image);
1201
1202     if (obj_image->image.num_palette_entries == 0)
1203         return;
1204
1205     BEGIN_BATCH(ctx, 1 + obj_image->image.num_palette_entries);
1206     OUT_BATCH(ctx, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1207     /*fill palette*/
1208     //int32_t out[16]; //0-23:color 23-31:alpha
1209     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1210         OUT_BATCH(ctx, (alpha << 24) | obj_image->palette[i]);
1211     ADVANCE_BATCH(ctx);
1212 }
1213
1214 static void
1215 i965_render_startup(VADriverContextP ctx)
1216 {
1217     struct i965_driver_data *i965 = i965_driver_data(ctx);
1218     struct i965_render_state *render_state = &i965->render_state;
1219
1220     BEGIN_BATCH(ctx, 11);
1221     OUT_BATCH(ctx, CMD_VERTEX_BUFFERS | 3);
1222     OUT_BATCH(ctx, 
1223               (0 << VB0_BUFFER_INDEX_SHIFT) |
1224               VB0_VERTEXDATA |
1225               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1226     OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1227
1228     if (IS_IRONLAKE(i965->intel.device_id))
1229         OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1230     else
1231         OUT_BATCH(ctx, 3);
1232
1233     OUT_BATCH(ctx, 0);
1234
1235     OUT_BATCH(ctx, 
1236               CMD_3DPRIMITIVE |
1237               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1238               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1239               (0 << 9) |
1240               4);
1241     OUT_BATCH(ctx, 3); /* vertex count per instance */
1242     OUT_BATCH(ctx, 0); /* start vertex offset */
1243     OUT_BATCH(ctx, 1); /* single instance */
1244     OUT_BATCH(ctx, 0); /* start instance location */
1245     OUT_BATCH(ctx, 0); /* index buffer offset, ignored */
1246     ADVANCE_BATCH(ctx);
1247 }
1248
1249 static void 
1250 i965_clear_dest_region(VADriverContextP ctx)
1251 {
1252     struct i965_driver_data *i965 = i965_driver_data(ctx);  
1253     struct i965_render_state *render_state = &i965->render_state;
1254     struct intel_region *dest_region = render_state->draw_region;
1255     unsigned int blt_cmd, br13;
1256     int pitch;
1257
1258     blt_cmd = XY_COLOR_BLT_CMD;
1259     br13 = 0xf0 << 16;
1260     pitch = dest_region->pitch;
1261
1262     if (dest_region->cpp == 4) {
1263         br13 |= BR13_8888;
1264         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1265     } else {
1266         assert(dest_region->cpp == 2);
1267         br13 |= BR13_565;
1268     }
1269
1270     if (dest_region->tiling != I915_TILING_NONE) {
1271         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1272         pitch /= 4;
1273     }
1274
1275     br13 |= pitch;
1276
1277     if (IS_GEN6(i965->intel.device_id))
1278         BEGIN_BLT_BATCH(ctx, 6);
1279     else
1280         BEGIN_BATCH(ctx, 6);
1281     OUT_BATCH(ctx, blt_cmd);
1282     OUT_BATCH(ctx, br13);
1283     OUT_BATCH(ctx, (dest_region->y << 16) | (dest_region->x));
1284     OUT_BATCH(ctx, ((dest_region->y + dest_region->height) << 16) |
1285               (dest_region->x + dest_region->width));
1286     OUT_RELOC(ctx, dest_region->bo, 
1287               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1288               0);
1289     OUT_BATCH(ctx, 0x0);
1290     ADVANCE_BATCH(ctx);
1291 }
1292
1293 static void
1294 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1295 {
1296     i965_clear_dest_region(ctx);
1297     intel_batchbuffer_start_atomic(ctx, 0x1000);
1298     intel_batchbuffer_emit_mi_flush(ctx);
1299     i965_render_pipeline_select(ctx);
1300     i965_render_state_sip(ctx);
1301     i965_render_state_base_address(ctx);
1302     i965_render_binding_table_pointers(ctx);
1303     i965_render_constant_color(ctx);
1304     i965_render_pipelined_pointers(ctx);
1305     i965_render_urb_layout(ctx);
1306     i965_render_cs_urb_layout(ctx);
1307     i965_render_constant_buffer(ctx);
1308     i965_render_drawing_rectangle(ctx);
1309     i965_render_vertex_elements(ctx);
1310     i965_render_startup(ctx);
1311     intel_batchbuffer_end_atomic(ctx);
1312 }
1313
1314 static void
1315 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1316 {
1317     intel_batchbuffer_start_atomic(ctx, 0x1000);
1318     intel_batchbuffer_emit_mi_flush(ctx);
1319     i965_render_pipeline_select(ctx);
1320     i965_render_state_sip(ctx);
1321     i965_render_state_base_address(ctx);
1322     i965_render_binding_table_pointers(ctx);
1323     i965_render_constant_color(ctx);
1324     i965_render_pipelined_pointers(ctx);
1325     i965_render_urb_layout(ctx);
1326     i965_render_cs_urb_layout(ctx);
1327     i965_render_drawing_rectangle(ctx);
1328     i965_render_vertex_elements(ctx);
1329     i965_render_startup(ctx);
1330     intel_batchbuffer_end_atomic(ctx);
1331 }
1332
1333
1334 static void 
1335 i965_render_initialize(VADriverContextP ctx)
1336 {
1337     struct i965_driver_data *i965 = i965_driver_data(ctx);
1338     struct i965_render_state *render_state = &i965->render_state;
1339     dri_bo *bo;
1340
1341     /* VERTEX BUFFER */
1342     dri_bo_unreference(render_state->vb.vertex_buffer);
1343     bo = dri_bo_alloc(i965->intel.bufmgr,
1344                       "vertex buffer",
1345                       4096,
1346                       4096);
1347     assert(bo);
1348     render_state->vb.vertex_buffer = bo;
1349
1350     /* VS */
1351     dri_bo_unreference(render_state->vs.state);
1352     bo = dri_bo_alloc(i965->intel.bufmgr,
1353                       "vs state",
1354                       sizeof(struct i965_vs_unit_state),
1355                       64);
1356     assert(bo);
1357     render_state->vs.state = bo;
1358
1359     /* GS */
1360     /* CLIP */
1361     /* SF */
1362     dri_bo_unreference(render_state->sf.state);
1363     bo = dri_bo_alloc(i965->intel.bufmgr,
1364                       "sf state",
1365                       sizeof(struct i965_sf_unit_state),
1366                       64);
1367     assert(bo);
1368     render_state->sf.state = bo;
1369
1370     /* WM */
1371     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1372     bo = dri_bo_alloc(i965->intel.bufmgr,
1373                       "surface state & binding table",
1374                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1375                       4096);
1376     assert(bo);
1377     render_state->wm.surface_state_binding_table_bo = bo;
1378
1379     dri_bo_unreference(render_state->wm.sampler);
1380     bo = dri_bo_alloc(i965->intel.bufmgr,
1381                       "sampler state",
1382                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1383                       64);
1384     assert(bo);
1385     render_state->wm.sampler = bo;
1386     render_state->wm.sampler_count = 0;
1387
1388     dri_bo_unreference(render_state->wm.state);
1389     bo = dri_bo_alloc(i965->intel.bufmgr,
1390                       "wm state",
1391                       sizeof(struct i965_wm_unit_state),
1392                       64);
1393     assert(bo);
1394     render_state->wm.state = bo;
1395
1396     /* COLOR CALCULATOR */
1397     dri_bo_unreference(render_state->cc.state);
1398     bo = dri_bo_alloc(i965->intel.bufmgr,
1399                       "color calc state",
1400                       sizeof(struct i965_cc_unit_state),
1401                       64);
1402     assert(bo);
1403     render_state->cc.state = bo;
1404
1405     dri_bo_unreference(render_state->cc.viewport);
1406     bo = dri_bo_alloc(i965->intel.bufmgr,
1407                       "cc viewport",
1408                       sizeof(struct i965_cc_viewport),
1409                       64);
1410     assert(bo);
1411     render_state->cc.viewport = bo;
1412 }
1413
1414 static void
1415 i965_render_put_surface(VADriverContextP ctx,
1416                         VASurfaceID surface,
1417                         short srcx,
1418                         short srcy,
1419                         unsigned short srcw,
1420                         unsigned short srch,
1421                         short destx,
1422                         short desty,
1423                         unsigned short destw,
1424                         unsigned short desth,
1425                         unsigned int flag)
1426 {
1427     i965_render_initialize(ctx);
1428     i965_surface_render_state_setup(ctx, surface,
1429                             srcx, srcy, srcw, srch,
1430                             destx, desty, destw, desth);
1431     i965_surface_render_pipeline_setup(ctx);
1432     intel_batchbuffer_flush(ctx);
1433 }
1434
1435 static void
1436 i965_render_put_subpicture(VADriverContextP ctx,
1437                            VASurfaceID surface,
1438                            short srcx,
1439                            short srcy,
1440                            unsigned short srcw,
1441                            unsigned short srch,
1442                            short destx,
1443                            short desty,
1444                            unsigned short destw,
1445                            unsigned short desth)
1446 {
1447     struct i965_driver_data *i965 = i965_driver_data(ctx);
1448     struct object_surface *obj_surface = SURFACE(surface);
1449     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
1450     assert(obj_subpic);
1451
1452     i965_render_initialize(ctx);
1453     i965_subpic_render_state_setup(ctx, surface,
1454                                    srcx, srcy, srcw, srch,
1455                                    destx, desty, destw, desth);
1456     i965_subpic_render_pipeline_setup(ctx);
1457     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
1458     intel_batchbuffer_flush(ctx);
1459 }
1460
1461 /*
1462  * for GEN6+
1463  */
1464 static void 
1465 gen6_render_initialize(VADriverContextP ctx)
1466 {
1467     struct i965_driver_data *i965 = i965_driver_data(ctx);
1468     struct i965_render_state *render_state = &i965->render_state;
1469     dri_bo *bo;
1470
1471     /* VERTEX BUFFER */
1472     dri_bo_unreference(render_state->vb.vertex_buffer);
1473     bo = dri_bo_alloc(i965->intel.bufmgr,
1474                       "vertex buffer",
1475                       4096,
1476                       4096);
1477     assert(bo);
1478     render_state->vb.vertex_buffer = bo;
1479
1480     /* WM */
1481     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1482     bo = dri_bo_alloc(i965->intel.bufmgr,
1483                       "surface state & binding table",
1484                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1485                       4096);
1486     assert(bo);
1487     render_state->wm.surface_state_binding_table_bo = bo;
1488
1489     dri_bo_unreference(render_state->wm.sampler);
1490     bo = dri_bo_alloc(i965->intel.bufmgr,
1491                       "sampler state",
1492                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1493                       4096);
1494     assert(bo);
1495     render_state->wm.sampler = bo;
1496     render_state->wm.sampler_count = 0;
1497
1498     /* COLOR CALCULATOR */
1499     dri_bo_unreference(render_state->cc.state);
1500     bo = dri_bo_alloc(i965->intel.bufmgr,
1501                       "color calc state",
1502                       sizeof(struct gen6_color_calc_state),
1503                       4096);
1504     assert(bo);
1505     render_state->cc.state = bo;
1506
1507     /* CC VIEWPORT */
1508     dri_bo_unreference(render_state->cc.viewport);
1509     bo = dri_bo_alloc(i965->intel.bufmgr,
1510                       "cc viewport",
1511                       sizeof(struct i965_cc_viewport),
1512                       4096);
1513     assert(bo);
1514     render_state->cc.viewport = bo;
1515
1516     /* BLEND STATE */
1517     dri_bo_unreference(render_state->cc.blend);
1518     bo = dri_bo_alloc(i965->intel.bufmgr,
1519                       "blend state",
1520                       sizeof(struct gen6_blend_state),
1521                       4096);
1522     assert(bo);
1523     render_state->cc.blend = bo;
1524
1525     /* DEPTH & STENCIL STATE */
1526     dri_bo_unreference(render_state->cc.depth_stencil);
1527     bo = dri_bo_alloc(i965->intel.bufmgr,
1528                       "depth & stencil state",
1529                       sizeof(struct gen6_depth_stencil_state),
1530                       4096);
1531     assert(bo);
1532     render_state->cc.depth_stencil = bo;
1533 }
1534
1535 static void
1536 gen6_render_color_calc_state(VADriverContextP ctx)
1537 {
1538     struct i965_driver_data *i965 = i965_driver_data(ctx);
1539     struct i965_render_state *render_state = &i965->render_state;
1540     struct gen6_color_calc_state *color_calc_state;
1541     
1542     dri_bo_map(render_state->cc.state, 1);
1543     assert(render_state->cc.state->virtual);
1544     color_calc_state = render_state->cc.state->virtual;
1545     memset(color_calc_state, 0, sizeof(*color_calc_state));
1546     color_calc_state->constant_r = 1.0;
1547     color_calc_state->constant_g = 0.0;
1548     color_calc_state->constant_b = 1.0;
1549     color_calc_state->constant_a = 1.0;
1550     dri_bo_unmap(render_state->cc.state);
1551 }
1552
1553 static void
1554 gen6_render_blend_state(VADriverContextP ctx)
1555 {
1556     struct i965_driver_data *i965 = i965_driver_data(ctx);
1557     struct i965_render_state *render_state = &i965->render_state;
1558     struct gen6_blend_state *blend_state;
1559     
1560     dri_bo_map(render_state->cc.blend, 1);
1561     assert(render_state->cc.blend->virtual);
1562     blend_state = render_state->cc.blend->virtual;
1563     memset(blend_state, 0, sizeof(*blend_state));
1564     blend_state->blend1.logic_op_enable = 1;
1565     blend_state->blend1.logic_op_func = 0xc;
1566     dri_bo_unmap(render_state->cc.blend);
1567 }
1568
1569 static void
1570 gen6_render_depth_stencil_state(VADriverContextP ctx)
1571 {
1572     struct i965_driver_data *i965 = i965_driver_data(ctx);
1573     struct i965_render_state *render_state = &i965->render_state;
1574     struct gen6_depth_stencil_state *depth_stencil_state;
1575     
1576     dri_bo_map(render_state->cc.depth_stencil, 1);
1577     assert(render_state->cc.depth_stencil->virtual);
1578     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1579     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1580     dri_bo_unmap(render_state->cc.depth_stencil);
1581 }
1582
1583 static void
1584 gen6_render_setup_states(VADriverContextP ctx,
1585                          VASurfaceID surface,
1586                          short srcx,
1587                          short srcy,
1588                          unsigned short srcw,
1589                          unsigned short srch,
1590                          short destx,
1591                          short desty,
1592                          unsigned short destw,
1593                          unsigned short desth)
1594 {
1595     i965_render_dest_surface_state(ctx, 0);
1596     i965_render_src_surfaces_state(ctx, surface);
1597     i965_render_sampler(ctx);
1598     i965_render_cc_viewport(ctx);
1599     gen6_render_color_calc_state(ctx);
1600     gen6_render_blend_state(ctx);
1601     gen6_render_depth_stencil_state(ctx);
1602     i965_render_upload_constants(ctx);
1603     i965_render_upload_vertex(ctx, surface,
1604                               srcx, srcy, srcw, srch,
1605                               destx, desty, destw, desth);
1606 }
1607
1608 static void
1609 gen6_emit_invarient_states(VADriverContextP ctx)
1610 {
1611     OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1612
1613     OUT_BATCH(ctx, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1614     OUT_BATCH(ctx, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1615               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1616     OUT_BATCH(ctx, 0);
1617
1618     OUT_BATCH(ctx, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1619     OUT_BATCH(ctx, 1);
1620
1621     /* Set system instruction pointer */
1622     OUT_BATCH(ctx, CMD_STATE_SIP | 0);
1623     OUT_BATCH(ctx, 0);
1624 }
1625
1626 static void
1627 gen6_emit_state_base_address(VADriverContextP ctx)
1628 {
1629     struct i965_driver_data *i965 = i965_driver_data(ctx);
1630     struct i965_render_state *render_state = &i965->render_state;
1631
1632     OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | (10 - 2));
1633     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* General state base address */
1634     OUT_RELOC(ctx, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1635     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1636     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1637     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Instruction base address */
1638     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* General state upper bound */
1639     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1640     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1641     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1642 }
1643
1644 static void
1645 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1646 {
1647     struct i965_driver_data *i965 = i965_driver_data(ctx);
1648     struct i965_render_state *render_state = &i965->render_state;
1649
1650     OUT_BATCH(ctx, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1651               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1652               (4 - 2));
1653     OUT_BATCH(ctx, 0);
1654     OUT_BATCH(ctx, 0);
1655     OUT_RELOC(ctx, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1656 }
1657
1658 static void
1659 gen6_emit_urb(VADriverContextP ctx)
1660 {
1661     OUT_BATCH(ctx, GEN6_3DSTATE_URB | (3 - 2));
1662     OUT_BATCH(ctx, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1663               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1664     OUT_BATCH(ctx, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1665               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1666 }
1667
1668 static void
1669 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1670 {
1671     struct i965_driver_data *i965 = i965_driver_data(ctx);
1672     struct i965_render_state *render_state = &i965->render_state;
1673
1674     OUT_BATCH(ctx, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1675     OUT_RELOC(ctx, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1676     OUT_RELOC(ctx, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1677     OUT_RELOC(ctx, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1678 }
1679
1680 static void
1681 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1682 {
1683     struct i965_driver_data *i965 = i965_driver_data(ctx);
1684     struct i965_render_state *render_state = &i965->render_state;
1685
1686     OUT_BATCH(ctx, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1687               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1688               (4 - 2));
1689     OUT_BATCH(ctx, 0); /* VS */
1690     OUT_BATCH(ctx, 0); /* GS */
1691     OUT_RELOC(ctx,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1692 }
1693
1694 static void
1695 gen6_emit_binding_table(VADriverContextP ctx)
1696 {
1697     /* Binding table pointers */
1698     OUT_BATCH(ctx, CMD_BINDING_TABLE_POINTERS |
1699               GEN6_BINDING_TABLE_MODIFY_PS |
1700               (4 - 2));
1701     OUT_BATCH(ctx, 0);          /* vs */
1702     OUT_BATCH(ctx, 0);          /* gs */
1703     /* Only the PS uses the binding table */
1704     OUT_BATCH(ctx, BINDING_TABLE_OFFSET);
1705 }
1706
1707 static void
1708 gen6_emit_depth_buffer_state(VADriverContextP ctx)
1709 {
1710     OUT_BATCH(ctx, CMD_DEPTH_BUFFER | (7 - 2));
1711     OUT_BATCH(ctx, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
1712               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
1713     OUT_BATCH(ctx, 0);
1714     OUT_BATCH(ctx, 0);
1715     OUT_BATCH(ctx, 0);
1716     OUT_BATCH(ctx, 0);
1717     OUT_BATCH(ctx, 0);
1718
1719     OUT_BATCH(ctx, CMD_CLEAR_PARAMS | (2 - 2));
1720     OUT_BATCH(ctx, 0);
1721 }
1722
1723 static void
1724 gen6_emit_drawing_rectangle(VADriverContextP ctx)
1725 {
1726     i965_render_drawing_rectangle(ctx);
1727 }
1728
1729 static void 
1730 gen6_emit_vs_state(VADriverContextP ctx)
1731 {
1732     /* disable VS constant buffer */
1733     OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
1734     OUT_BATCH(ctx, 0);
1735     OUT_BATCH(ctx, 0);
1736     OUT_BATCH(ctx, 0);
1737     OUT_BATCH(ctx, 0);
1738         
1739     OUT_BATCH(ctx, GEN6_3DSTATE_VS | (6 - 2));
1740     OUT_BATCH(ctx, 0); /* without VS kernel */
1741     OUT_BATCH(ctx, 0);
1742     OUT_BATCH(ctx, 0);
1743     OUT_BATCH(ctx, 0);
1744     OUT_BATCH(ctx, 0); /* pass-through */
1745 }
1746
1747 static void 
1748 gen6_emit_gs_state(VADriverContextP ctx)
1749 {
1750     /* disable GS constant buffer */
1751     OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
1752     OUT_BATCH(ctx, 0);
1753     OUT_BATCH(ctx, 0);
1754     OUT_BATCH(ctx, 0);
1755     OUT_BATCH(ctx, 0);
1756         
1757     OUT_BATCH(ctx, GEN6_3DSTATE_GS | (7 - 2));
1758     OUT_BATCH(ctx, 0); /* without GS kernel */
1759     OUT_BATCH(ctx, 0);
1760     OUT_BATCH(ctx, 0);
1761     OUT_BATCH(ctx, 0);
1762     OUT_BATCH(ctx, 0);
1763     OUT_BATCH(ctx, 0); /* pass-through */
1764 }
1765
1766 static void 
1767 gen6_emit_clip_state(VADriverContextP ctx)
1768 {
1769     OUT_BATCH(ctx, GEN6_3DSTATE_CLIP | (4 - 2));
1770     OUT_BATCH(ctx, 0);
1771     OUT_BATCH(ctx, 0); /* pass-through */
1772     OUT_BATCH(ctx, 0);
1773 }
1774
1775 static void 
1776 gen6_emit_sf_state(VADriverContextP ctx)
1777 {
1778     OUT_BATCH(ctx, GEN6_3DSTATE_SF | (20 - 2));
1779     OUT_BATCH(ctx, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
1780               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
1781               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
1782     OUT_BATCH(ctx, 0);
1783     OUT_BATCH(ctx, GEN6_3DSTATE_SF_CULL_NONE);
1784     OUT_BATCH(ctx, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
1785     OUT_BATCH(ctx, 0);
1786     OUT_BATCH(ctx, 0);
1787     OUT_BATCH(ctx, 0);
1788     OUT_BATCH(ctx, 0);
1789     OUT_BATCH(ctx, 0); /* DW9 */
1790     OUT_BATCH(ctx, 0);
1791     OUT_BATCH(ctx, 0);
1792     OUT_BATCH(ctx, 0);
1793     OUT_BATCH(ctx, 0);
1794     OUT_BATCH(ctx, 0); /* DW14 */
1795     OUT_BATCH(ctx, 0);
1796     OUT_BATCH(ctx, 0);
1797     OUT_BATCH(ctx, 0);
1798     OUT_BATCH(ctx, 0);
1799     OUT_BATCH(ctx, 0); /* DW19 */
1800 }
1801
1802 static void 
1803 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
1804 {
1805     struct i965_driver_data *i965 = i965_driver_data(ctx);
1806     struct i965_render_state *render_state = &i965->render_state;
1807
1808     OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_PS |
1809               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
1810               (5 - 2));
1811     OUT_RELOC(ctx, 
1812               render_state->curbe.bo,
1813               I915_GEM_DOMAIN_INSTRUCTION, 0,
1814               0);
1815     OUT_BATCH(ctx, 0);
1816     OUT_BATCH(ctx, 0);
1817     OUT_BATCH(ctx, 0);
1818
1819     OUT_BATCH(ctx, GEN6_3DSTATE_WM | (9 - 2));
1820     OUT_RELOC(ctx, render_state->render_kernels[kernel].bo,
1821               I915_GEM_DOMAIN_INSTRUCTION, 0,
1822               0);
1823     OUT_BATCH(ctx, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
1824               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
1825     OUT_BATCH(ctx, 0);
1826     OUT_BATCH(ctx, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
1827     OUT_BATCH(ctx, ((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
1828               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
1829               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
1830     OUT_BATCH(ctx, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
1831               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
1832     OUT_BATCH(ctx, 0);
1833     OUT_BATCH(ctx, 0);
1834 }
1835
1836 static void
1837 gen6_emit_vertex_element_state(VADriverContextP ctx)
1838 {
1839     /* Set up our vertex elements, sourced from the single vertex buffer. */
1840     OUT_BATCH(ctx, CMD_VERTEX_ELEMENTS | (5 - 2));
1841     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1842     OUT_BATCH(ctx, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1843               GEN6_VE0_VALID |
1844               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1845               (0 << VE0_OFFSET_SHIFT));
1846     OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1847               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1848               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1849               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1850     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1851     OUT_BATCH(ctx, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1852               GEN6_VE0_VALID |
1853               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1854               (8 << VE0_OFFSET_SHIFT));
1855     OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
1856               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1857               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1858               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1859 }
1860
1861 static void
1862 gen6_emit_vertices(VADriverContextP ctx)
1863 {
1864     struct i965_driver_data *i965 = i965_driver_data(ctx);
1865     struct i965_render_state *render_state = &i965->render_state;
1866
1867     BEGIN_BATCH(ctx, 11);
1868     OUT_BATCH(ctx, CMD_VERTEX_BUFFERS | 3);
1869     OUT_BATCH(ctx, 
1870               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
1871               GEN6_VB0_VERTEXDATA |
1872               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1873     OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1874     OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1875     OUT_BATCH(ctx, 0);
1876
1877     OUT_BATCH(ctx, 
1878               CMD_3DPRIMITIVE |
1879               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1880               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1881               (0 << 9) |
1882               4);
1883     OUT_BATCH(ctx, 3); /* vertex count per instance */
1884     OUT_BATCH(ctx, 0); /* start vertex offset */
1885     OUT_BATCH(ctx, 1); /* single instance */
1886     OUT_BATCH(ctx, 0); /* start instance location */
1887     OUT_BATCH(ctx, 0); /* index buffer offset, ignored */
1888     ADVANCE_BATCH(ctx);
1889 }
1890
1891 static void
1892 gen6_render_emit_states(VADriverContextP ctx, int kernel)
1893 {
1894     intel_batchbuffer_start_atomic(ctx, 0x1000);
1895     intel_batchbuffer_emit_mi_flush(ctx);
1896     gen6_emit_invarient_states(ctx);
1897     gen6_emit_state_base_address(ctx);
1898     gen6_emit_viewport_state_pointers(ctx);
1899     gen6_emit_urb(ctx);
1900     gen6_emit_cc_state_pointers(ctx);
1901     gen6_emit_sampler_state_pointers(ctx);
1902     gen6_emit_vs_state(ctx);
1903     gen6_emit_gs_state(ctx);
1904     gen6_emit_clip_state(ctx);
1905     gen6_emit_sf_state(ctx);
1906     gen6_emit_wm_state(ctx, kernel);
1907     gen6_emit_binding_table(ctx);
1908     gen6_emit_depth_buffer_state(ctx);
1909     gen6_emit_drawing_rectangle(ctx);
1910     gen6_emit_vertex_element_state(ctx);
1911     gen6_emit_vertices(ctx);
1912     intel_batchbuffer_end_atomic(ctx);
1913 }
1914
1915 static void
1916 gen6_render_put_surface(VADriverContextP ctx,
1917                         VASurfaceID surface,
1918                         short srcx,
1919                         short srcy,
1920                         unsigned short srcw,
1921                         unsigned short srch,
1922                         short destx,
1923                         short desty,
1924                         unsigned short destw,
1925                         unsigned short desth,
1926                         unsigned int flag)
1927 {
1928     gen6_render_initialize(ctx);
1929     gen6_render_setup_states(ctx, surface,
1930                              srcx, srcy, srcw, srch,
1931                              destx, desty, destw, desth);
1932     i965_clear_dest_region(ctx);
1933     gen6_render_emit_states(ctx, PS_KERNEL);
1934     intel_batchbuffer_flush(ctx);
1935 }
1936
1937 static void
1938 gen6_subpicture_render_blend_state(VADriverContextP ctx)
1939 {
1940     struct i965_driver_data *i965 = i965_driver_data(ctx);
1941     struct i965_render_state *render_state = &i965->render_state;
1942     struct gen6_blend_state *blend_state;
1943
1944     dri_bo_unmap(render_state->cc.state);    
1945     dri_bo_map(render_state->cc.blend, 1);
1946     assert(render_state->cc.blend->virtual);
1947     blend_state = render_state->cc.blend->virtual;
1948     memset(blend_state, 0, sizeof(*blend_state));
1949     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
1950     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
1951     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
1952     blend_state->blend0.blend_enable = 1;
1953     blend_state->blend1.post_blend_clamp_enable = 1;
1954     blend_state->blend1.pre_blend_clamp_enable = 1;
1955     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
1956     dri_bo_unmap(render_state->cc.blend);
1957 }
1958
1959 static void
1960 gen6_subpicture_render_setup_states(VADriverContextP ctx,
1961                                     VASurfaceID surface,
1962                                     short srcx,
1963                                     short srcy,
1964                                     unsigned short srcw,
1965                                     unsigned short srch,
1966                                     short destx,
1967                                     short desty,
1968                                     unsigned short destw,
1969                                     unsigned short desth)
1970 {
1971     VARectangle output_rect;
1972
1973     output_rect.x      = destx;
1974     output_rect.y      = desty;
1975     output_rect.width  = destw;
1976     output_rect.height = desth;
1977
1978     i965_render_dest_surface_state(ctx, 0);
1979     i965_subpic_render_src_surfaces_state(ctx, surface);
1980     i965_render_sampler(ctx);
1981     i965_render_cc_viewport(ctx);
1982     gen6_render_color_calc_state(ctx);
1983     gen6_subpicture_render_blend_state(ctx);
1984     gen6_render_depth_stencil_state(ctx);
1985     i965_subpic_render_upload_vertex(ctx, surface, &output_rect);
1986 }
1987
1988 static void
1989 gen6_render_put_subpicture(VADriverContextP ctx,
1990                            VASurfaceID surface,
1991                            short srcx,
1992                            short srcy,
1993                            unsigned short srcw,
1994                            unsigned short srch,
1995                            short destx,
1996                            short desty,
1997                            unsigned short destw,
1998                            unsigned short desth)
1999 {
2000     struct i965_driver_data *i965 = i965_driver_data(ctx);
2001     struct object_surface *obj_surface = SURFACE(surface);
2002     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
2003
2004     assert(obj_subpic);
2005     gen6_render_initialize(ctx);
2006     gen6_subpicture_render_setup_states(ctx, surface,
2007                                         srcx, srcy, srcw, srch,
2008                                         destx, desty, destw, desth);
2009     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2010     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
2011     intel_batchbuffer_flush(ctx);
2012 }
2013
2014 /*
2015  * global functions
2016  */
2017 void
2018 intel_render_put_surface(VADriverContextP ctx,
2019                         VASurfaceID surface,
2020                         short srcx,
2021                         short srcy,
2022                         unsigned short srcw,
2023                         unsigned short srch,
2024                         short destx,
2025                         short desty,
2026                         unsigned short destw,
2027                         unsigned short desth,
2028                         unsigned int flag)
2029 {
2030     struct i965_driver_data *i965 = i965_driver_data(ctx);
2031
2032     i965_post_processing(ctx, surface,
2033                          srcx, srcy, srcw, srch,
2034                          destx, desty, destw, desth,
2035                          flag);
2036
2037     if (IS_GEN6(i965->intel.device_id))
2038         gen6_render_put_surface(ctx, surface,
2039                                 srcx, srcy, srcw, srch,
2040                                 destx, desty, destw, desth,
2041                                 flag);
2042     else
2043         i965_render_put_surface(ctx, surface,
2044                                 srcx, srcy, srcw, srch,
2045                                 destx, desty, destw, desth,
2046                                 flag);
2047 }
2048
2049 void
2050 intel_render_put_subpicture(VADriverContextP ctx,
2051                            VASurfaceID surface,
2052                            short srcx,
2053                            short srcy,
2054                            unsigned short srcw,
2055                            unsigned short srch,
2056                            short destx,
2057                            short desty,
2058                            unsigned short destw,
2059                            unsigned short desth)
2060 {
2061     struct i965_driver_data *i965 = i965_driver_data(ctx);
2062
2063     if (IS_GEN6(i965->intel.device_id))
2064         gen6_render_put_subpicture(ctx, surface,
2065                                    srcx, srcy, srcw, srch,
2066                                    destx, desty, destw, desth);
2067     else
2068         i965_render_put_subpicture(ctx, surface,
2069                                    srcx, srcy, srcw, srch,
2070                                    destx, desty, destw, desth);
2071 }
2072
2073 Bool 
2074 i965_render_init(VADriverContextP ctx)
2075 {
2076     struct i965_driver_data *i965 = i965_driver_data(ctx);
2077     struct i965_render_state *render_state = &i965->render_state;
2078     int i;
2079
2080     /* kernel */
2081     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
2082                                  sizeof(render_kernels_gen5[0])));
2083     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
2084                                  sizeof(render_kernels_gen6[0])));
2085
2086     if (IS_GEN6(i965->intel.device_id))
2087         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
2088     else if (IS_IRONLAKE(i965->intel.device_id))
2089         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
2090     else
2091         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
2092
2093     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
2094         struct i965_kernel *kernel = &render_state->render_kernels[i];
2095
2096         if (!kernel->size)
2097             continue;
2098
2099         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
2100                                   kernel->name, 
2101                                   kernel->size, 0x1000);
2102         assert(kernel->bo);
2103         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
2104     }
2105
2106     /* constant buffer */
2107     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
2108                       "constant buffer",
2109                       4096, 64);
2110     assert(render_state->curbe.bo);
2111     render_state->curbe.upload = 0;
2112
2113     return True;
2114 }
2115
2116 Bool 
2117 i965_render_terminate(VADriverContextP ctx)
2118 {
2119     int i;
2120     struct i965_driver_data *i965 = i965_driver_data(ctx);
2121     struct i965_render_state *render_state = &i965->render_state;
2122
2123     dri_bo_unreference(render_state->curbe.bo);
2124     render_state->curbe.bo = NULL;
2125
2126     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
2127         struct i965_kernel *kernel = &render_state->render_kernels[i];
2128         
2129         dri_bo_unreference(kernel->bo);
2130         kernel->bo = NULL;
2131     }
2132
2133     dri_bo_unreference(render_state->vb.vertex_buffer);
2134     render_state->vb.vertex_buffer = NULL;
2135     dri_bo_unreference(render_state->vs.state);
2136     render_state->vs.state = NULL;
2137     dri_bo_unreference(render_state->sf.state);
2138     render_state->sf.state = NULL;
2139     dri_bo_unreference(render_state->wm.sampler);
2140     render_state->wm.sampler = NULL;
2141     dri_bo_unreference(render_state->wm.state);
2142     render_state->wm.state = NULL;
2143     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2144     dri_bo_unreference(render_state->cc.viewport);
2145     render_state->cc.viewport = NULL;
2146     dri_bo_unreference(render_state->cc.state);
2147     render_state->cc.state = NULL;
2148     dri_bo_unreference(render_state->cc.blend);
2149     render_state->cc.blend = NULL;
2150     dri_bo_unreference(render_state->cc.depth_stencil);
2151     render_state->cc.depth_stencil = NULL;
2152
2153     if (render_state->draw_region) {
2154         dri_bo_unreference(render_state->draw_region->bo);
2155         free(render_state->draw_region);
2156         render_state->draw_region = NULL;
2157     }
2158
2159     return True;
2160 }
2161