i965_drv_video: use the same structure for all kernels
[platform/upstream/libva.git] / i965_drv_video / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38
39 #include <va/va_backend.h>
40 #include "va/x11/va_dricommon.h"
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44 #include "i965_defines.h"
45 #include "i965_drv_video.h"
46 #include "i965_structs.h"
47
48 #include "i965_render.h"
49
50 #define SF_KERNEL_NUM_GRF       16
51 #define SF_MAX_THREADS          1
52
53 static const uint32_t sf_kernel_static[][4] = 
54 {
55 #include "shaders/render/exa_sf.g4b"
56 };
57
58 #define PS_KERNEL_NUM_GRF       32
59 #define PS_MAX_THREADS          32
60
61 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
62
63 static const uint32_t ps_kernel_static[][4] = 
64 {
65 #include "shaders/render/exa_wm_xy.g4b"
66 #include "shaders/render/exa_wm_src_affine.g4b"
67 #include "shaders/render/exa_wm_src_sample_planar.g4b"
68 #include "shaders/render/exa_wm_yuv_rgb.g4b"
69 #include "shaders/render/exa_wm_write.g4b"
70 };
71 static const uint32_t ps_subpic_kernel_static[][4] = 
72 {
73 #include "shaders/render/exa_wm_xy.g4b"
74 #include "shaders/render/exa_wm_src_affine.g4b"
75 #include "shaders/render/exa_wm_src_sample_argb.g4b"
76 #include "shaders/render/exa_wm_write.g4b"
77 };
78
79 /* On IRONLAKE */
80 static const uint32_t sf_kernel_static_gen5[][4] = 
81 {
82 #include "shaders/render/exa_sf.g4b.gen5"
83 };
84
85 static const uint32_t ps_kernel_static_gen5[][4] = 
86 {
87 #include "shaders/render/exa_wm_xy.g4b.gen5"
88 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
89 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
90 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
91 #include "shaders/render/exa_wm_write.g4b.gen5"
92 };
93 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
94 {
95 #include "shaders/render/exa_wm_xy.g4b.gen5"
96 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
97 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
98 #include "shaders/render/exa_wm_write.g4b.gen5"
99 };
100
101 /* programs for Sandybridge */
102 static const uint32_t sf_kernel_static_gen6[][4] = 
103 {
104 };
105
106 static const uint32_t ps_kernel_static_gen6[][4] = {
107 #include "shaders/render/exa_wm_src_affine.g6b"
108 #include "shaders/render/exa_wm_src_sample_planar.g6b"
109 #include "shaders/render/exa_wm_yuv_rgb.g6b"
110 #include "shaders/render/exa_wm_write.g6b"
111 };
112
113 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
114 #include "shaders/render/exa_wm_src_affine.g6b"
115 #include "shaders/render/exa_wm_src_sample_argb.g6b"
116 #include "shaders/render/exa_wm_write.g6b"
117 };
118
119 #define SURFACE_STATE_PADDED_SIZE       ALIGN(sizeof(struct i965_surface_state), 32)
120 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
121 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
122
123 static uint32_t float_to_uint (float f) 
124 {
125     union {
126         uint32_t i; 
127         float f;
128     } x;
129
130     x.f = f;
131     return x.i;
132 }
133
134 enum 
135 {
136     SF_KERNEL = 0,
137     PS_KERNEL,
138     PS_SUBPIC_KERNEL
139 };
140
141 static struct i965_kernel render_kernels_gen4[] = {
142     {
143         "SF",
144         SF_KERNEL,
145         sf_kernel_static,
146         sizeof(sf_kernel_static),
147         NULL
148     },
149     {
150         "PS",
151         PS_KERNEL,
152         ps_kernel_static,
153         sizeof(ps_kernel_static),
154         NULL
155     },
156
157     {
158         "PS_SUBPIC",
159         PS_SUBPIC_KERNEL,
160         ps_subpic_kernel_static,
161         sizeof(ps_subpic_kernel_static),
162         NULL
163     }
164 };
165
166 static struct i965_kernel render_kernels_gen5[] = {
167     {
168         "SF",
169         SF_KERNEL,
170         sf_kernel_static_gen5,
171         sizeof(sf_kernel_static_gen5),
172         NULL
173     },
174     {
175         "PS",
176         PS_KERNEL,
177         ps_kernel_static_gen5,
178         sizeof(ps_kernel_static_gen5),
179         NULL
180     },
181
182     {
183         "PS_SUBPIC",
184         PS_SUBPIC_KERNEL,
185         ps_subpic_kernel_static_gen5,
186         sizeof(ps_subpic_kernel_static_gen5),
187         NULL
188     }
189 };
190
191 static struct i965_kernel render_kernels_gen6[] = {
192     {
193         "SF",
194         SF_KERNEL,
195         sf_kernel_static_gen6,
196         sizeof(sf_kernel_static_gen6),
197         NULL
198     },
199     {
200         "PS",
201         PS_KERNEL,
202         ps_kernel_static_gen6,
203         sizeof(ps_kernel_static_gen6),
204         NULL
205     },
206
207     {
208         "PS_SUBPIC",
209         PS_SUBPIC_KERNEL,
210         ps_subpic_kernel_static_gen6,
211         sizeof(ps_subpic_kernel_static_gen6),
212         NULL
213     }
214 };
215
216 static struct i965_kernel *render_kernels = NULL;
217
218 #define NUM_RENDER_KERNEL (sizeof(render_kernels_gen4)/sizeof(render_kernels_gen4[0]))
219
220 #define URB_VS_ENTRIES        8
221 #define URB_VS_ENTRY_SIZE     1
222
223 #define URB_GS_ENTRIES        0
224 #define URB_GS_ENTRY_SIZE     0
225
226 #define URB_CLIP_ENTRIES      0
227 #define URB_CLIP_ENTRY_SIZE   0
228
229 #define URB_SF_ENTRIES        1
230 #define URB_SF_ENTRY_SIZE     2
231
232 #define URB_CS_ENTRIES        1
233 #define URB_CS_ENTRY_SIZE     1
234
235 static void
236 i965_render_vs_unit(VADriverContextP ctx)
237 {
238     struct i965_driver_data *i965 = i965_driver_data(ctx);
239     struct i965_render_state *render_state = &i965->render_state;
240     struct i965_vs_unit_state *vs_state;
241
242     dri_bo_map(render_state->vs.state, 1);
243     assert(render_state->vs.state->virtual);
244     vs_state = render_state->vs.state->virtual;
245     memset(vs_state, 0, sizeof(*vs_state));
246
247     if (IS_IRONLAKE(i965->intel.device_id))
248         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
249     else
250         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
251
252     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
253     vs_state->vs6.vs_enable = 0;
254     vs_state->vs6.vert_cache_disable = 1;
255     
256     dri_bo_unmap(render_state->vs.state);
257 }
258
259 static void
260 i965_render_sf_unit(VADriverContextP ctx)
261 {
262     struct i965_driver_data *i965 = i965_driver_data(ctx);
263     struct i965_render_state *render_state = &i965->render_state;
264     struct i965_sf_unit_state *sf_state;
265
266     dri_bo_map(render_state->sf.state, 1);
267     assert(render_state->sf.state->virtual);
268     sf_state = render_state->sf.state->virtual;
269     memset(sf_state, 0, sizeof(*sf_state));
270
271     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
272     sf_state->thread0.kernel_start_pointer = render_kernels[SF_KERNEL].bo->offset >> 6;
273
274     sf_state->sf1.single_program_flow = 1; /* XXX */
275     sf_state->sf1.binding_table_entry_count = 0;
276     sf_state->sf1.thread_priority = 0;
277     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
278     sf_state->sf1.illegal_op_exception_enable = 1;
279     sf_state->sf1.mask_stack_exception_enable = 1;
280     sf_state->sf1.sw_exception_enable = 1;
281
282     /* scratch space is not used in our kernel */
283     sf_state->thread2.per_thread_scratch_space = 0;
284     sf_state->thread2.scratch_space_base_pointer = 0;
285
286     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
287     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
288     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
289     sf_state->thread3.urb_entry_read_offset = 0;
290     sf_state->thread3.dispatch_grf_start_reg = 3;
291
292     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
293     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
294     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
295     sf_state->thread4.stats_enable = 1;
296
297     sf_state->sf5.viewport_transform = 0; /* skip viewport */
298
299     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
300     sf_state->sf6.scissor = 0;
301
302     sf_state->sf7.trifan_pv = 2;
303
304     sf_state->sf6.dest_org_vbias = 0x8;
305     sf_state->sf6.dest_org_hbias = 0x8;
306
307     dri_bo_emit_reloc(render_state->sf.state,
308                       I915_GEM_DOMAIN_INSTRUCTION, 0,
309                       sf_state->thread0.grf_reg_count << 1,
310                       offsetof(struct i965_sf_unit_state, thread0),
311                       render_kernels[SF_KERNEL].bo);
312
313     dri_bo_unmap(render_state->sf.state);
314 }
315
316 static void 
317 i965_render_sampler(VADriverContextP ctx)
318 {
319     struct i965_driver_data *i965 = i965_driver_data(ctx);
320     struct i965_render_state *render_state = &i965->render_state;
321     struct i965_sampler_state *sampler_state;
322     int i;
323     
324     assert(render_state->wm.sampler_count > 0);
325     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
326
327     dri_bo_map(render_state->wm.sampler, 1);
328     assert(render_state->wm.sampler->virtual);
329     sampler_state = render_state->wm.sampler->virtual;
330     for (i = 0; i < render_state->wm.sampler_count; i++) {
331         memset(sampler_state, 0, sizeof(*sampler_state));
332         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
333         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
334         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
335         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
336         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
337         sampler_state++;
338     }
339
340     dri_bo_unmap(render_state->wm.sampler);
341 }
342 static void
343 i965_subpic_render_wm_unit(VADriverContextP ctx)
344 {
345     struct i965_driver_data *i965 = i965_driver_data(ctx);
346     struct i965_render_state *render_state = &i965->render_state;
347     struct i965_wm_unit_state *wm_state;
348
349     assert(render_state->wm.sampler);
350
351     dri_bo_map(render_state->wm.state, 1);
352     assert(render_state->wm.state->virtual);
353     wm_state = render_state->wm.state->virtual;
354     memset(wm_state, 0, sizeof(*wm_state));
355
356     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
357     wm_state->thread0.kernel_start_pointer = render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
358
359     wm_state->thread1.single_program_flow = 1; /* XXX */
360
361     if (IS_IRONLAKE(i965->intel.device_id))
362         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
363     else
364         wm_state->thread1.binding_table_entry_count = 7;
365
366     wm_state->thread2.scratch_space_base_pointer = 0;
367     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
368
369     wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
370     wm_state->thread3.const_urb_entry_read_length = 0;
371     wm_state->thread3.const_urb_entry_read_offset = 0;
372     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
373     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
374
375     wm_state->wm4.stats_enable = 0;
376     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
377
378     if (IS_IRONLAKE(i965->intel.device_id)) {
379         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
380         wm_state->wm5.max_threads = 12 * 6 - 1;
381     } else {
382         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
383         wm_state->wm5.max_threads = 10 * 5 - 1;
384     }
385
386     wm_state->wm5.thread_dispatch_enable = 1;
387     wm_state->wm5.enable_16_pix = 1;
388     wm_state->wm5.enable_8_pix = 0;
389     wm_state->wm5.early_depth_test = 1;
390
391     dri_bo_emit_reloc(render_state->wm.state,
392                       I915_GEM_DOMAIN_INSTRUCTION, 0,
393                       wm_state->thread0.grf_reg_count << 1,
394                       offsetof(struct i965_wm_unit_state, thread0),
395                       render_kernels[PS_SUBPIC_KERNEL].bo);
396
397     dri_bo_emit_reloc(render_state->wm.state,
398                       I915_GEM_DOMAIN_INSTRUCTION, 0,
399                       wm_state->wm4.sampler_count << 2,
400                       offsetof(struct i965_wm_unit_state, wm4),
401                       render_state->wm.sampler);
402
403     dri_bo_unmap(render_state->wm.state);
404 }
405
406
407 static void
408 i965_render_wm_unit(VADriverContextP ctx)
409 {
410     struct i965_driver_data *i965 = i965_driver_data(ctx);
411     struct i965_render_state *render_state = &i965->render_state;
412     struct i965_wm_unit_state *wm_state;
413
414     assert(render_state->wm.sampler);
415
416     dri_bo_map(render_state->wm.state, 1);
417     assert(render_state->wm.state->virtual);
418     wm_state = render_state->wm.state->virtual;
419     memset(wm_state, 0, sizeof(*wm_state));
420
421     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
422     wm_state->thread0.kernel_start_pointer = render_kernels[PS_KERNEL].bo->offset >> 6;
423
424     wm_state->thread1.single_program_flow = 1; /* XXX */
425
426     if (IS_IRONLAKE(i965->intel.device_id))
427         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
428     else
429         wm_state->thread1.binding_table_entry_count = 7;
430
431     wm_state->thread2.scratch_space_base_pointer = 0;
432     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
433
434     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
435     wm_state->thread3.const_urb_entry_read_length = 1;
436     wm_state->thread3.const_urb_entry_read_offset = 0;
437     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
438     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
439
440     wm_state->wm4.stats_enable = 0;
441     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
442
443     if (IS_IRONLAKE(i965->intel.device_id)) {
444         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
445         wm_state->wm5.max_threads = 12 * 6 - 1;
446     } else {
447         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
448         wm_state->wm5.max_threads = 10 * 5 - 1;
449     }
450
451     wm_state->wm5.thread_dispatch_enable = 1;
452     wm_state->wm5.enable_16_pix = 1;
453     wm_state->wm5.enable_8_pix = 0;
454     wm_state->wm5.early_depth_test = 1;
455
456     dri_bo_emit_reloc(render_state->wm.state,
457                       I915_GEM_DOMAIN_INSTRUCTION, 0,
458                       wm_state->thread0.grf_reg_count << 1,
459                       offsetof(struct i965_wm_unit_state, thread0),
460                       render_kernels[PS_KERNEL].bo);
461
462     dri_bo_emit_reloc(render_state->wm.state,
463                       I915_GEM_DOMAIN_INSTRUCTION, 0,
464                       wm_state->wm4.sampler_count << 2,
465                       offsetof(struct i965_wm_unit_state, wm4),
466                       render_state->wm.sampler);
467
468     dri_bo_unmap(render_state->wm.state);
469 }
470
471 static void 
472 i965_render_cc_viewport(VADriverContextP ctx)
473 {
474     struct i965_driver_data *i965 = i965_driver_data(ctx);
475     struct i965_render_state *render_state = &i965->render_state;
476     struct i965_cc_viewport *cc_viewport;
477
478     dri_bo_map(render_state->cc.viewport, 1);
479     assert(render_state->cc.viewport->virtual);
480     cc_viewport = render_state->cc.viewport->virtual;
481     memset(cc_viewport, 0, sizeof(*cc_viewport));
482     
483     cc_viewport->min_depth = -1.e35;
484     cc_viewport->max_depth = 1.e35;
485
486     dri_bo_unmap(render_state->cc.viewport);
487 }
488
489 static void 
490 i965_subpic_render_cc_unit(VADriverContextP ctx)
491 {
492     struct i965_driver_data *i965 = i965_driver_data(ctx);
493     struct i965_render_state *render_state = &i965->render_state;
494     struct i965_cc_unit_state *cc_state;
495
496     assert(render_state->cc.viewport);
497
498     dri_bo_map(render_state->cc.state, 1);
499     assert(render_state->cc.state->virtual);
500     cc_state = render_state->cc.state->virtual;
501     memset(cc_state, 0, sizeof(*cc_state));
502
503     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
504     cc_state->cc2.depth_test = 0;       /* disable depth test */
505     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
506     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
507     cc_state->cc3.blend_enable = 1;     /* enable color blend */
508     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
509     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
510     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
511     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
512
513     cc_state->cc5.dither_enable = 0;    /* disable dither */
514     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
515     cc_state->cc5.statistics_enable = 1;
516     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
517     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
518     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
519
520     cc_state->cc6.clamp_post_alpha_blend = 0; 
521     cc_state->cc6.clamp_pre_alpha_blend  =0; 
522     
523     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
524     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
525     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
526     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
527    
528     /*alpha test reference*/
529     cc_state->cc7.alpha_ref.f =0.0 ;
530
531
532     dri_bo_emit_reloc(render_state->cc.state,
533                       I915_GEM_DOMAIN_INSTRUCTION, 0,
534                       0,
535                       offsetof(struct i965_cc_unit_state, cc4),
536                       render_state->cc.viewport);
537
538     dri_bo_unmap(render_state->cc.state);
539 }
540
541
542 static void 
543 i965_render_cc_unit(VADriverContextP ctx)
544 {
545     struct i965_driver_data *i965 = i965_driver_data(ctx);
546     struct i965_render_state *render_state = &i965->render_state;
547     struct i965_cc_unit_state *cc_state;
548
549     assert(render_state->cc.viewport);
550
551     dri_bo_map(render_state->cc.state, 1);
552     assert(render_state->cc.state->virtual);
553     cc_state = render_state->cc.state->virtual;
554     memset(cc_state, 0, sizeof(*cc_state));
555
556     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
557     cc_state->cc2.depth_test = 0;       /* disable depth test */
558     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
559     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
560     cc_state->cc3.blend_enable = 0;     /* disable color blend */
561     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
562     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
563
564     cc_state->cc5.dither_enable = 0;    /* disable dither */
565     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
566     cc_state->cc5.statistics_enable = 1;
567     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
568     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
569     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
570
571     dri_bo_emit_reloc(render_state->cc.state,
572                       I915_GEM_DOMAIN_INSTRUCTION, 0,
573                       0,
574                       offsetof(struct i965_cc_unit_state, cc4),
575                       render_state->cc.viewport);
576
577     dri_bo_unmap(render_state->cc.state);
578 }
579
580 static void
581 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
582 {
583     switch (tiling) {
584     case I915_TILING_NONE:
585         ss->ss3.tiled_surface = 0;
586         ss->ss3.tile_walk = 0;
587         break;
588     case I915_TILING_X:
589         ss->ss3.tiled_surface = 1;
590         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
591         break;
592     case I915_TILING_Y:
593         ss->ss3.tiled_surface = 1;
594         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
595         break;
596     }
597 }
598
599 static void
600 i965_render_src_surface_state(VADriverContextP ctx, 
601                               int index,
602                               dri_bo *region,
603                               unsigned long offset,
604                               int w, int h,
605                               int pitch, int format)
606 {
607     struct i965_driver_data *i965 = i965_driver_data(ctx);  
608     struct i965_render_state *render_state = &i965->render_state;
609     struct i965_surface_state *ss;
610     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
611     unsigned int tiling;
612     unsigned int swizzle;
613
614     assert(index < MAX_RENDER_SURFACES);
615
616     dri_bo_map(ss_bo, 1);
617     assert(ss_bo->virtual);
618     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
619     memset(ss, 0, sizeof(*ss));
620     ss->ss0.surface_type = I965_SURFACE_2D;
621     ss->ss0.surface_format = format;
622     ss->ss0.writedisable_alpha = 0;
623     ss->ss0.writedisable_red = 0;
624     ss->ss0.writedisable_green = 0;
625     ss->ss0.writedisable_blue = 0;
626     ss->ss0.color_blend = 1;
627     ss->ss0.vert_line_stride = 0;
628     ss->ss0.vert_line_stride_ofs = 0;
629     ss->ss0.mipmap_layout_mode = 0;
630     ss->ss0.render_cache_read_mode = 0;
631
632     ss->ss1.base_addr = region->offset + offset;
633
634     ss->ss2.width = w - 1;
635     ss->ss2.height = h - 1;
636     ss->ss2.mip_count = 0;
637     ss->ss2.render_target_rotation = 0;
638
639     ss->ss3.pitch = pitch - 1;
640
641     dri_bo_get_tiling(region, &tiling, &swizzle);
642     i965_render_set_surface_tiling(ss, tiling);
643
644     dri_bo_emit_reloc(ss_bo,
645                       I915_GEM_DOMAIN_SAMPLER, 0,
646                       offset,
647                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
648                       region);
649
650     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
651     dri_bo_unmap(ss_bo);
652     render_state->wm.sampler_count++;
653 }
654
655 static void
656 i965_render_src_surfaces_state(VADriverContextP ctx,
657                               VASurfaceID surface)
658 {
659     struct i965_driver_data *i965 = i965_driver_data(ctx);  
660     struct i965_render_state *render_state = &i965->render_state;
661     struct object_surface *obj_surface;
662     int w, h;
663     int rw, rh;
664     dri_bo *region;
665
666     obj_surface = SURFACE(surface);
667     assert(obj_surface);
668
669     if (obj_surface->pp_out_bo) {
670         w = obj_surface->pp_out_width;
671         h = obj_surface->pp_out_height;
672         rw = obj_surface->orig_pp_out_width;
673         rh = obj_surface->orig_pp_out_height;
674         region = obj_surface->pp_out_bo;
675     } else {
676         w = obj_surface->width;
677         h = obj_surface->height;
678         rw = obj_surface->orig_width;
679         rh = obj_surface->orig_height;
680         region = obj_surface->bo;
681     }
682
683     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, w, I965_SURFACEFORMAT_R8_UNORM);     /* Y */
684     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, w, I965_SURFACEFORMAT_R8_UNORM);
685
686     if (!render_state->inited) {
687         int u3 = 5, u4 = 6, v5 = 3, v6 = 4;
688
689         i965_render_src_surface_state(ctx, u3, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM); /* U */
690         i965_render_src_surface_state(ctx, u4, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
691         i965_render_src_surface_state(ctx, v5, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);     /* V */
692         i965_render_src_surface_state(ctx, v6, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
693     } else {
694         if (render_state->interleaved_uv) {
695             i965_render_src_surface_state(ctx, 3, region, w * h, rw / 2, rh / 2, w, I965_SURFACEFORMAT_R8G8_UNORM); /* UV */
696             i965_render_src_surface_state(ctx, 4, region, w * h, rw / 2, rh / 2, w, I965_SURFACEFORMAT_R8G8_UNORM);
697         } else {
698             int u3 = 3, u4 = 4, v5 = 5, v6 = 6;
699
700             i965_render_src_surface_state(ctx, u3, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM); /* U */
701             i965_render_src_surface_state(ctx, u4, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
702             i965_render_src_surface_state(ctx, v5, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);     /* V */
703             i965_render_src_surface_state(ctx, v6, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
704         }
705     }
706 }
707
708 static void
709 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
710                               VASurfaceID surface)
711 {
712     struct i965_driver_data *i965 = i965_driver_data(ctx);  
713     struct object_surface *obj_surface = SURFACE(surface);
714     int w, h;
715     dri_bo *region;
716     dri_bo *subpic_region;
717     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
718     struct object_image *obj_image = IMAGE(obj_subpic->image);
719     assert(obj_surface);
720     assert(obj_surface->bo);
721     w = obj_surface->width;
722     h = obj_surface->height;
723     region = obj_surface->bo;
724     subpic_region = obj_image->bo;
725     /*subpicture surface*/
726     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
727     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
728 }
729
730 static void
731 i965_render_dest_surface_state(VADriverContextP ctx, int index)
732 {
733     struct i965_driver_data *i965 = i965_driver_data(ctx);  
734     struct i965_render_state *render_state = &i965->render_state;
735     struct intel_region *dest_region = render_state->draw_region;
736     struct i965_surface_state *ss;
737     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
738
739     assert(index < MAX_RENDER_SURFACES);
740
741     dri_bo_map(ss_bo, 1);
742     assert(ss_bo->virtual);
743     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
744     memset(ss, 0, sizeof(*ss));
745
746     ss->ss0.surface_type = I965_SURFACE_2D;
747     ss->ss0.data_return_format = I965_SURFACERETURNFORMAT_FLOAT32;
748
749     if (dest_region->cpp == 2) {
750         ss->ss0.surface_format = I965_SURFACEFORMAT_B5G6R5_UNORM;
751         } else {
752         ss->ss0.surface_format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
753     }
754
755     ss->ss0.writedisable_alpha = 0;
756     ss->ss0.writedisable_red = 0;
757     ss->ss0.writedisable_green = 0;
758     ss->ss0.writedisable_blue = 0;
759     ss->ss0.color_blend = 1;
760     ss->ss0.vert_line_stride = 0;
761     ss->ss0.vert_line_stride_ofs = 0;
762     ss->ss0.mipmap_layout_mode = 0;
763     ss->ss0.render_cache_read_mode = 0;
764
765     ss->ss1.base_addr = dest_region->bo->offset;
766
767     ss->ss2.width = dest_region->width - 1;
768     ss->ss2.height = dest_region->height - 1;
769     ss->ss2.mip_count = 0;
770     ss->ss2.render_target_rotation = 0;
771     ss->ss3.pitch = dest_region->pitch - 1;
772     i965_render_set_surface_tiling(ss, dest_region->tiling);
773
774     dri_bo_emit_reloc(ss_bo,
775                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
776                       0,
777                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
778                       dest_region->bo);
779
780     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
781     dri_bo_unmap(ss_bo);
782 }
783
784 static void 
785 i965_subpic_render_upload_vertex(VADriverContextP ctx,
786                                  VASurfaceID surface,
787                                  const VARectangle *output_rect)
788 {    
789     struct i965_driver_data  *i965         = i965_driver_data(ctx);
790     struct i965_render_state *render_state = &i965->render_state;
791     struct object_surface    *obj_surface  = SURFACE(surface);
792     struct object_subpic     *obj_subpic   = SUBPIC(obj_surface->subpic);
793
794     const float sx = (float)output_rect->width  / (float)obj_surface->orig_width;
795     const float sy = (float)output_rect->height / (float)obj_surface->orig_height;
796     float *vb, tx1, tx2, ty1, ty2, x1, x2, y1, y2;
797     int i = 0;
798
799     VARectangle dst_rect;
800     dst_rect.x      = output_rect->x + sx * (float)obj_subpic->dst_rect.x;
801     dst_rect.y      = output_rect->y + sx * (float)obj_subpic->dst_rect.y;
802     dst_rect.width  = sx * (float)obj_subpic->dst_rect.width;
803     dst_rect.height = sy * (float)obj_subpic->dst_rect.height;
804
805     dri_bo_map(render_state->vb.vertex_buffer, 1);
806     assert(render_state->vb.vertex_buffer->virtual);
807     vb = render_state->vb.vertex_buffer->virtual;
808
809     tx1 = (float)obj_subpic->src_rect.x / (float)obj_subpic->width;
810     ty1 = (float)obj_subpic->src_rect.y / (float)obj_subpic->height;
811     tx2 = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / (float)obj_subpic->width;
812     ty2 = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / (float)obj_subpic->height;
813
814     x1 = (float)dst_rect.x;
815     y1 = (float)dst_rect.y;
816     x2 = (float)(dst_rect.x + dst_rect.width);
817     y2 = (float)(dst_rect.y + dst_rect.height);
818
819     vb[i++] = tx2;
820     vb[i++] = ty2;
821     vb[i++] = x2;
822     vb[i++] = y2;
823
824     vb[i++] = tx1;
825     vb[i++] = ty2;
826     vb[i++] = x1;
827     vb[i++] = y2;
828
829     vb[i++] = tx1;
830     vb[i++] = ty1;
831     vb[i++] = x1;
832     vb[i++] = y1;
833     dri_bo_unmap(render_state->vb.vertex_buffer);
834 }
835
836 static void 
837 i965_render_upload_vertex(VADriverContextP ctx,
838                           VASurfaceID surface,
839                           short srcx,
840                           short srcy,
841                           unsigned short srcw,
842                           unsigned short srch,
843                           short destx,
844                           short desty,
845                           unsigned short destw,
846                           unsigned short desth)
847 {
848     struct i965_driver_data *i965 = i965_driver_data(ctx);
849     struct i965_render_state *render_state = &i965->render_state;
850     struct intel_region *dest_region = render_state->draw_region;
851     struct object_surface *obj_surface;
852     float *vb;
853
854     float u1, v1, u2, v2;
855     int i, width, height;
856     int box_x1 = dest_region->x + destx;
857     int box_y1 = dest_region->y + desty;
858     int box_x2 = box_x1 + destw;
859     int box_y2 = box_y1 + desth;
860
861     obj_surface = SURFACE(surface);
862     assert(surface);
863     width = obj_surface->orig_width;
864     height = obj_surface->orig_height;
865
866     u1 = (float)srcx / width;
867     v1 = (float)srcy / height;
868     u2 = (float)(srcx + srcw) / width;
869     v2 = (float)(srcy + srch) / height;
870
871     dri_bo_map(render_state->vb.vertex_buffer, 1);
872     assert(render_state->vb.vertex_buffer->virtual);
873     vb = render_state->vb.vertex_buffer->virtual;
874
875     i = 0;
876     vb[i++] = u2;
877     vb[i++] = v2;
878     vb[i++] = (float)box_x2;
879     vb[i++] = (float)box_y2;
880     
881     vb[i++] = u1;
882     vb[i++] = v2;
883     vb[i++] = (float)box_x1;
884     vb[i++] = (float)box_y2;
885
886     vb[i++] = u1;
887     vb[i++] = v1;
888     vb[i++] = (float)box_x1;
889     vb[i++] = (float)box_y1;
890
891     dri_bo_unmap(render_state->vb.vertex_buffer);
892 }
893
894 static void
895 i965_render_upload_constants(VADriverContextP ctx)
896 {
897     struct i965_driver_data *i965 = i965_driver_data(ctx);
898     struct i965_render_state *render_state = &i965->render_state;
899     unsigned short *constant_buffer;
900
901     if (render_state->curbe.upload)
902         return;
903
904     dri_bo_map(render_state->curbe.bo, 1);
905     assert(render_state->curbe.bo->virtual);
906     constant_buffer = render_state->curbe.bo->virtual;
907
908     if (render_state->interleaved_uv)
909         *constant_buffer = 1;
910     else
911         *constant_buffer = 0;
912
913     dri_bo_unmap(render_state->curbe.bo);
914     render_state->curbe.upload = 1;
915 }
916
917 static void
918 i965_surface_render_state_setup(VADriverContextP ctx,
919                         VASurfaceID surface,
920                         short srcx,
921                         short srcy,
922                         unsigned short srcw,
923                         unsigned short srch,
924                         short destx,
925                         short desty,
926                         unsigned short destw,
927                         unsigned short desth)
928 {
929     i965_render_vs_unit(ctx);
930     i965_render_sf_unit(ctx);
931     i965_render_dest_surface_state(ctx, 0);
932     i965_render_src_surfaces_state(ctx, surface);
933     i965_render_sampler(ctx);
934     i965_render_wm_unit(ctx);
935     i965_render_cc_viewport(ctx);
936     i965_render_cc_unit(ctx);
937     i965_render_upload_vertex(ctx, surface,
938                               srcx, srcy, srcw, srch,
939                               destx, desty, destw, desth);
940     i965_render_upload_constants(ctx);
941 }
942 static void
943 i965_subpic_render_state_setup(VADriverContextP ctx,
944                         VASurfaceID surface,
945                         short srcx,
946                         short srcy,
947                         unsigned short srcw,
948                         unsigned short srch,
949                         short destx,
950                         short desty,
951                         unsigned short destw,
952                         unsigned short desth)
953 {
954     i965_render_vs_unit(ctx);
955     i965_render_sf_unit(ctx);
956     i965_render_dest_surface_state(ctx, 0);
957     i965_subpic_render_src_surfaces_state(ctx, surface);
958     i965_render_sampler(ctx);
959     i965_subpic_render_wm_unit(ctx);
960     i965_render_cc_viewport(ctx);
961     i965_subpic_render_cc_unit(ctx);
962
963     VARectangle output_rect;
964     output_rect.x      = destx;
965     output_rect.y      = desty;
966     output_rect.width  = destw;
967     output_rect.height = desth;
968     i965_subpic_render_upload_vertex(ctx, surface, &output_rect);
969 }
970
971
972 static void
973 i965_render_pipeline_select(VADriverContextP ctx)
974 {
975     BEGIN_BATCH(ctx, 1);
976     OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
977     ADVANCE_BATCH(ctx);
978 }
979
980 static void
981 i965_render_state_sip(VADriverContextP ctx)
982 {
983     BEGIN_BATCH(ctx, 2);
984     OUT_BATCH(ctx, CMD_STATE_SIP | 0);
985     OUT_BATCH(ctx, 0);
986     ADVANCE_BATCH(ctx);
987 }
988
989 static void
990 i965_render_state_base_address(VADriverContextP ctx)
991 {
992     struct i965_driver_data *i965 = i965_driver_data(ctx);
993     struct i965_render_state *render_state = &i965->render_state;
994
995     if (IS_IRONLAKE(i965->intel.device_id)) {
996         BEGIN_BATCH(ctx, 8);
997         OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6);
998         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
999         OUT_RELOC(ctx, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1000         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1001         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1002         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1003         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1004         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1005         ADVANCE_BATCH(ctx);
1006     } else {
1007         BEGIN_BATCH(ctx, 6);
1008         OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 4);
1009         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1010         OUT_RELOC(ctx, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1011         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1012         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1013         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1014         ADVANCE_BATCH(ctx);
1015     }
1016 }
1017
1018 static void
1019 i965_render_binding_table_pointers(VADriverContextP ctx)
1020 {
1021     BEGIN_BATCH(ctx, 6);
1022     OUT_BATCH(ctx, CMD_BINDING_TABLE_POINTERS | 4);
1023     OUT_BATCH(ctx, 0); /* vs */
1024     OUT_BATCH(ctx, 0); /* gs */
1025     OUT_BATCH(ctx, 0); /* clip */
1026     OUT_BATCH(ctx, 0); /* sf */
1027     OUT_BATCH(ctx, BINDING_TABLE_OFFSET);
1028     ADVANCE_BATCH(ctx);
1029 }
1030
1031 static void 
1032 i965_render_constant_color(VADriverContextP ctx)
1033 {
1034     BEGIN_BATCH(ctx, 5);
1035     OUT_BATCH(ctx, CMD_CONSTANT_COLOR | 3);
1036     OUT_BATCH(ctx, float_to_uint(1.0));
1037     OUT_BATCH(ctx, float_to_uint(0.0));
1038     OUT_BATCH(ctx, float_to_uint(1.0));
1039     OUT_BATCH(ctx, float_to_uint(1.0));
1040     ADVANCE_BATCH(ctx);
1041 }
1042
1043 static void
1044 i965_render_pipelined_pointers(VADriverContextP ctx)
1045 {
1046     struct i965_driver_data *i965 = i965_driver_data(ctx);
1047     struct i965_render_state *render_state = &i965->render_state;
1048
1049     BEGIN_BATCH(ctx, 7);
1050     OUT_BATCH(ctx, CMD_PIPELINED_POINTERS | 5);
1051     OUT_RELOC(ctx, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1052     OUT_BATCH(ctx, 0);  /* disable GS */
1053     OUT_BATCH(ctx, 0);  /* disable CLIP */
1054     OUT_RELOC(ctx, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1055     OUT_RELOC(ctx, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1056     OUT_RELOC(ctx, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1057     ADVANCE_BATCH(ctx);
1058 }
1059
1060 static void
1061 i965_render_urb_layout(VADriverContextP ctx)
1062 {
1063     int urb_vs_start, urb_vs_size;
1064     int urb_gs_start, urb_gs_size;
1065     int urb_clip_start, urb_clip_size;
1066     int urb_sf_start, urb_sf_size;
1067     int urb_cs_start, urb_cs_size;
1068
1069     urb_vs_start = 0;
1070     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1071     urb_gs_start = urb_vs_start + urb_vs_size;
1072     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1073     urb_clip_start = urb_gs_start + urb_gs_size;
1074     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1075     urb_sf_start = urb_clip_start + urb_clip_size;
1076     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1077     urb_cs_start = urb_sf_start + urb_sf_size;
1078     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1079
1080     BEGIN_BATCH(ctx, 3);
1081     OUT_BATCH(ctx, 
1082               CMD_URB_FENCE |
1083               UF0_CS_REALLOC |
1084               UF0_SF_REALLOC |
1085               UF0_CLIP_REALLOC |
1086               UF0_GS_REALLOC |
1087               UF0_VS_REALLOC |
1088               1);
1089     OUT_BATCH(ctx, 
1090               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1091               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1092               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1093     OUT_BATCH(ctx,
1094               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1095               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1096     ADVANCE_BATCH(ctx);
1097 }
1098
1099 static void 
1100 i965_render_cs_urb_layout(VADriverContextP ctx)
1101 {
1102     BEGIN_BATCH(ctx, 2);
1103     OUT_BATCH(ctx, CMD_CS_URB_STATE | 0);
1104     OUT_BATCH(ctx,
1105               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1106               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1107     ADVANCE_BATCH(ctx);
1108 }
1109
1110 static void
1111 i965_render_constant_buffer(VADriverContextP ctx)
1112 {
1113     struct i965_driver_data *i965 = i965_driver_data(ctx);
1114     struct i965_render_state *render_state = &i965->render_state;
1115
1116     BEGIN_BATCH(ctx, 2);
1117     OUT_BATCH(ctx, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1118     OUT_RELOC(ctx, render_state->curbe.bo,
1119               I915_GEM_DOMAIN_INSTRUCTION, 0,
1120               URB_CS_ENTRY_SIZE - 1);
1121     ADVANCE_BATCH(ctx);    
1122 }
1123
1124 static void
1125 i965_render_drawing_rectangle(VADriverContextP ctx)
1126 {
1127     struct i965_driver_data *i965 = i965_driver_data(ctx);  
1128     struct i965_render_state *render_state = &i965->render_state;
1129     struct intel_region *dest_region = render_state->draw_region;
1130
1131     BEGIN_BATCH(ctx, 4);
1132     OUT_BATCH(ctx, CMD_DRAWING_RECTANGLE | 2);
1133     OUT_BATCH(ctx, 0x00000000);
1134     OUT_BATCH(ctx, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1135     OUT_BATCH(ctx, 0x00000000);         
1136     ADVANCE_BATCH(ctx);
1137 }
1138
1139 static void
1140 i965_render_vertex_elements(VADriverContextP ctx)
1141 {
1142     struct i965_driver_data *i965 = i965_driver_data(ctx);  
1143
1144     if (IS_IRONLAKE(i965->intel.device_id)) {
1145         BEGIN_BATCH(ctx, 5);
1146         OUT_BATCH(ctx, CMD_VERTEX_ELEMENTS | 3);
1147         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1148         OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1149                   VE0_VALID |
1150                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1151                   (0 << VE0_OFFSET_SHIFT));
1152         OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1153                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1154                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1155                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1156         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1157         OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1158                   VE0_VALID |
1159                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1160                   (8 << VE0_OFFSET_SHIFT));
1161         OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1162                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1163                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1164                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1165         ADVANCE_BATCH(ctx);
1166     } else {
1167         BEGIN_BATCH(ctx, 5);
1168         OUT_BATCH(ctx, CMD_VERTEX_ELEMENTS | 3);
1169         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1170         OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1171                   VE0_VALID |
1172                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1173                   (0 << VE0_OFFSET_SHIFT));
1174         OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1175                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1176                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1177                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1178                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1179         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1180         OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1181                   VE0_VALID |
1182                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1183                   (8 << VE0_OFFSET_SHIFT));
1184         OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1185                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1186                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1187                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1188                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1189         ADVANCE_BATCH(ctx);
1190     }
1191 }
1192
1193 static void
1194 i965_render_upload_image_palette(
1195     VADriverContextP ctx,
1196     VAImageID        image_id,
1197     unsigned int     alpha
1198 )
1199 {
1200     struct i965_driver_data *i965 = i965_driver_data(ctx);
1201     unsigned int i;
1202
1203     struct object_image *obj_image = IMAGE(image_id);
1204     assert(obj_image);
1205
1206     if (obj_image->image.num_palette_entries == 0)
1207         return;
1208
1209     BEGIN_BATCH(ctx, 1 + obj_image->image.num_palette_entries);
1210     OUT_BATCH(ctx, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1211     /*fill palette*/
1212     //int32_t out[16]; //0-23:color 23-31:alpha
1213     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1214         OUT_BATCH(ctx, (alpha << 24) | obj_image->palette[i]);
1215     ADVANCE_BATCH(ctx);
1216 }
1217
1218 static void
1219 i965_render_startup(VADriverContextP ctx)
1220 {
1221     struct i965_driver_data *i965 = i965_driver_data(ctx);
1222     struct i965_render_state *render_state = &i965->render_state;
1223
1224     BEGIN_BATCH(ctx, 11);
1225     OUT_BATCH(ctx, CMD_VERTEX_BUFFERS | 3);
1226     OUT_BATCH(ctx, 
1227               (0 << VB0_BUFFER_INDEX_SHIFT) |
1228               VB0_VERTEXDATA |
1229               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1230     OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1231
1232     if (IS_IRONLAKE(i965->intel.device_id))
1233         OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1234     else
1235         OUT_BATCH(ctx, 3);
1236
1237     OUT_BATCH(ctx, 0);
1238
1239     OUT_BATCH(ctx, 
1240               CMD_3DPRIMITIVE |
1241               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1242               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1243               (0 << 9) |
1244               4);
1245     OUT_BATCH(ctx, 3); /* vertex count per instance */
1246     OUT_BATCH(ctx, 0); /* start vertex offset */
1247     OUT_BATCH(ctx, 1); /* single instance */
1248     OUT_BATCH(ctx, 0); /* start instance location */
1249     OUT_BATCH(ctx, 0); /* index buffer offset, ignored */
1250     ADVANCE_BATCH(ctx);
1251 }
1252
1253 static void 
1254 i965_clear_dest_region(VADriverContextP ctx)
1255 {
1256     struct i965_driver_data *i965 = i965_driver_data(ctx);  
1257     struct i965_render_state *render_state = &i965->render_state;
1258     struct intel_region *dest_region = render_state->draw_region;
1259     unsigned int blt_cmd, br13;
1260     int pitch;
1261
1262     blt_cmd = XY_COLOR_BLT_CMD;
1263     br13 = 0xf0 << 16;
1264     pitch = dest_region->pitch;
1265
1266     if (dest_region->cpp == 4) {
1267         br13 |= BR13_8888;
1268         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1269     } else {
1270         assert(dest_region->cpp == 2);
1271         br13 |= BR13_565;
1272     }
1273
1274     if (dest_region->tiling != I915_TILING_NONE) {
1275         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1276         pitch /= 4;
1277     }
1278
1279     br13 |= pitch;
1280
1281     if (IS_GEN6(i965->intel.device_id))
1282         BEGIN_BLT_BATCH(ctx, 6);
1283     else
1284         BEGIN_BATCH(ctx, 6);
1285     OUT_BATCH(ctx, blt_cmd);
1286     OUT_BATCH(ctx, br13);
1287     OUT_BATCH(ctx, (dest_region->y << 16) | (dest_region->x));
1288     OUT_BATCH(ctx, ((dest_region->y + dest_region->height) << 16) |
1289               (dest_region->x + dest_region->width));
1290     OUT_RELOC(ctx, dest_region->bo, 
1291               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1292               0);
1293     OUT_BATCH(ctx, 0x0);
1294     ADVANCE_BATCH(ctx);
1295 }
1296
1297 static void
1298 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1299 {
1300     i965_clear_dest_region(ctx);
1301     intel_batchbuffer_start_atomic(ctx, 0x1000);
1302     intel_batchbuffer_emit_mi_flush(ctx);
1303     i965_render_pipeline_select(ctx);
1304     i965_render_state_sip(ctx);
1305     i965_render_state_base_address(ctx);
1306     i965_render_binding_table_pointers(ctx);
1307     i965_render_constant_color(ctx);
1308     i965_render_pipelined_pointers(ctx);
1309     i965_render_urb_layout(ctx);
1310     i965_render_cs_urb_layout(ctx);
1311     i965_render_constant_buffer(ctx);
1312     i965_render_drawing_rectangle(ctx);
1313     i965_render_vertex_elements(ctx);
1314     i965_render_startup(ctx);
1315     intel_batchbuffer_end_atomic(ctx);
1316 }
1317
1318 static void
1319 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1320 {
1321     intel_batchbuffer_start_atomic(ctx, 0x1000);
1322     intel_batchbuffer_emit_mi_flush(ctx);
1323     i965_render_pipeline_select(ctx);
1324     i965_render_state_sip(ctx);
1325     i965_render_state_base_address(ctx);
1326     i965_render_binding_table_pointers(ctx);
1327     i965_render_constant_color(ctx);
1328     i965_render_pipelined_pointers(ctx);
1329     i965_render_urb_layout(ctx);
1330     i965_render_cs_urb_layout(ctx);
1331     i965_render_drawing_rectangle(ctx);
1332     i965_render_vertex_elements(ctx);
1333     i965_render_startup(ctx);
1334     intel_batchbuffer_end_atomic(ctx);
1335 }
1336
1337
1338 static void 
1339 i965_render_initialize(VADriverContextP ctx)
1340 {
1341     struct i965_driver_data *i965 = i965_driver_data(ctx);
1342     struct i965_render_state *render_state = &i965->render_state;
1343     dri_bo *bo;
1344
1345     /* VERTEX BUFFER */
1346     dri_bo_unreference(render_state->vb.vertex_buffer);
1347     bo = dri_bo_alloc(i965->intel.bufmgr,
1348                       "vertex buffer",
1349                       4096,
1350                       4096);
1351     assert(bo);
1352     render_state->vb.vertex_buffer = bo;
1353
1354     /* VS */
1355     dri_bo_unreference(render_state->vs.state);
1356     bo = dri_bo_alloc(i965->intel.bufmgr,
1357                       "vs state",
1358                       sizeof(struct i965_vs_unit_state),
1359                       64);
1360     assert(bo);
1361     render_state->vs.state = bo;
1362
1363     /* GS */
1364     /* CLIP */
1365     /* SF */
1366     dri_bo_unreference(render_state->sf.state);
1367     bo = dri_bo_alloc(i965->intel.bufmgr,
1368                       "sf state",
1369                       sizeof(struct i965_sf_unit_state),
1370                       64);
1371     assert(bo);
1372     render_state->sf.state = bo;
1373
1374     /* WM */
1375     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1376     bo = dri_bo_alloc(i965->intel.bufmgr,
1377                       "surface state & binding table",
1378                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1379                       4096);
1380     assert(bo);
1381     render_state->wm.surface_state_binding_table_bo = bo;
1382
1383     dri_bo_unreference(render_state->wm.sampler);
1384     bo = dri_bo_alloc(i965->intel.bufmgr,
1385                       "sampler state",
1386                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1387                       64);
1388     assert(bo);
1389     render_state->wm.sampler = bo;
1390     render_state->wm.sampler_count = 0;
1391
1392     dri_bo_unreference(render_state->wm.state);
1393     bo = dri_bo_alloc(i965->intel.bufmgr,
1394                       "wm state",
1395                       sizeof(struct i965_wm_unit_state),
1396                       64);
1397     assert(bo);
1398     render_state->wm.state = bo;
1399
1400     /* COLOR CALCULATOR */
1401     dri_bo_unreference(render_state->cc.state);
1402     bo = dri_bo_alloc(i965->intel.bufmgr,
1403                       "color calc state",
1404                       sizeof(struct i965_cc_unit_state),
1405                       64);
1406     assert(bo);
1407     render_state->cc.state = bo;
1408
1409     dri_bo_unreference(render_state->cc.viewport);
1410     bo = dri_bo_alloc(i965->intel.bufmgr,
1411                       "cc viewport",
1412                       sizeof(struct i965_cc_viewport),
1413                       64);
1414     assert(bo);
1415     render_state->cc.viewport = bo;
1416 }
1417
1418 static void
1419 i965_render_put_surface(VADriverContextP ctx,
1420                         VASurfaceID surface,
1421                         short srcx,
1422                         short srcy,
1423                         unsigned short srcw,
1424                         unsigned short srch,
1425                         short destx,
1426                         short desty,
1427                         unsigned short destw,
1428                         unsigned short desth,
1429                         unsigned int flag)
1430 {
1431     i965_render_initialize(ctx);
1432     i965_surface_render_state_setup(ctx, surface,
1433                             srcx, srcy, srcw, srch,
1434                             destx, desty, destw, desth);
1435     i965_surface_render_pipeline_setup(ctx);
1436     intel_batchbuffer_flush(ctx);
1437 }
1438
1439 static void
1440 i965_render_put_subpicture(VADriverContextP ctx,
1441                            VASurfaceID surface,
1442                            short srcx,
1443                            short srcy,
1444                            unsigned short srcw,
1445                            unsigned short srch,
1446                            short destx,
1447                            short desty,
1448                            unsigned short destw,
1449                            unsigned short desth)
1450 {
1451     struct i965_driver_data *i965 = i965_driver_data(ctx);
1452     struct object_surface *obj_surface = SURFACE(surface);
1453     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
1454     assert(obj_subpic);
1455
1456     i965_render_initialize(ctx);
1457     i965_subpic_render_state_setup(ctx, surface,
1458                                    srcx, srcy, srcw, srch,
1459                                    destx, desty, destw, desth);
1460     i965_subpic_render_pipeline_setup(ctx);
1461     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
1462     intel_batchbuffer_flush(ctx);
1463 }
1464
1465 /*
1466  * for GEN6+
1467  */
1468 static void 
1469 gen6_render_initialize(VADriverContextP ctx)
1470 {
1471     struct i965_driver_data *i965 = i965_driver_data(ctx);
1472     struct i965_render_state *render_state = &i965->render_state;
1473     dri_bo *bo;
1474
1475     /* VERTEX BUFFER */
1476     dri_bo_unreference(render_state->vb.vertex_buffer);
1477     bo = dri_bo_alloc(i965->intel.bufmgr,
1478                       "vertex buffer",
1479                       4096,
1480                       4096);
1481     assert(bo);
1482     render_state->vb.vertex_buffer = bo;
1483
1484     /* WM */
1485     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1486     bo = dri_bo_alloc(i965->intel.bufmgr,
1487                       "surface state & binding table",
1488                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1489                       4096);
1490     assert(bo);
1491     render_state->wm.surface_state_binding_table_bo = bo;
1492
1493     dri_bo_unreference(render_state->wm.sampler);
1494     bo = dri_bo_alloc(i965->intel.bufmgr,
1495                       "sampler state",
1496                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1497                       4096);
1498     assert(bo);
1499     render_state->wm.sampler = bo;
1500     render_state->wm.sampler_count = 0;
1501
1502     /* COLOR CALCULATOR */
1503     dri_bo_unreference(render_state->cc.state);
1504     bo = dri_bo_alloc(i965->intel.bufmgr,
1505                       "color calc state",
1506                       sizeof(struct gen6_color_calc_state),
1507                       4096);
1508     assert(bo);
1509     render_state->cc.state = bo;
1510
1511     /* CC VIEWPORT */
1512     dri_bo_unreference(render_state->cc.viewport);
1513     bo = dri_bo_alloc(i965->intel.bufmgr,
1514                       "cc viewport",
1515                       sizeof(struct i965_cc_viewport),
1516                       4096);
1517     assert(bo);
1518     render_state->cc.viewport = bo;
1519
1520     /* BLEND STATE */
1521     dri_bo_unreference(render_state->cc.blend);
1522     bo = dri_bo_alloc(i965->intel.bufmgr,
1523                       "blend state",
1524                       sizeof(struct gen6_blend_state),
1525                       4096);
1526     assert(bo);
1527     render_state->cc.blend = bo;
1528
1529     /* DEPTH & STENCIL STATE */
1530     dri_bo_unreference(render_state->cc.depth_stencil);
1531     bo = dri_bo_alloc(i965->intel.bufmgr,
1532                       "depth & stencil state",
1533                       sizeof(struct gen6_depth_stencil_state),
1534                       4096);
1535     assert(bo);
1536     render_state->cc.depth_stencil = bo;
1537 }
1538
1539 static void
1540 gen6_render_color_calc_state(VADriverContextP ctx)
1541 {
1542     struct i965_driver_data *i965 = i965_driver_data(ctx);
1543     struct i965_render_state *render_state = &i965->render_state;
1544     struct gen6_color_calc_state *color_calc_state;
1545     
1546     dri_bo_map(render_state->cc.state, 1);
1547     assert(render_state->cc.state->virtual);
1548     color_calc_state = render_state->cc.state->virtual;
1549     memset(color_calc_state, 0, sizeof(*color_calc_state));
1550     color_calc_state->constant_r = 1.0;
1551     color_calc_state->constant_g = 0.0;
1552     color_calc_state->constant_b = 1.0;
1553     color_calc_state->constant_a = 1.0;
1554     dri_bo_unmap(render_state->cc.state);
1555 }
1556
1557 static void
1558 gen6_render_blend_state(VADriverContextP ctx)
1559 {
1560     struct i965_driver_data *i965 = i965_driver_data(ctx);
1561     struct i965_render_state *render_state = &i965->render_state;
1562     struct gen6_blend_state *blend_state;
1563     
1564     dri_bo_map(render_state->cc.blend, 1);
1565     assert(render_state->cc.blend->virtual);
1566     blend_state = render_state->cc.blend->virtual;
1567     memset(blend_state, 0, sizeof(*blend_state));
1568     blend_state->blend1.logic_op_enable = 1;
1569     blend_state->blend1.logic_op_func = 0xc;
1570     dri_bo_unmap(render_state->cc.blend);
1571 }
1572
1573 static void
1574 gen6_render_depth_stencil_state(VADriverContextP ctx)
1575 {
1576     struct i965_driver_data *i965 = i965_driver_data(ctx);
1577     struct i965_render_state *render_state = &i965->render_state;
1578     struct gen6_depth_stencil_state *depth_stencil_state;
1579     
1580     dri_bo_map(render_state->cc.depth_stencil, 1);
1581     assert(render_state->cc.depth_stencil->virtual);
1582     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1583     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1584     dri_bo_unmap(render_state->cc.depth_stencil);
1585 }
1586
1587 static void
1588 gen6_render_setup_states(VADriverContextP ctx,
1589                          VASurfaceID surface,
1590                          short srcx,
1591                          short srcy,
1592                          unsigned short srcw,
1593                          unsigned short srch,
1594                          short destx,
1595                          short desty,
1596                          unsigned short destw,
1597                          unsigned short desth)
1598 {
1599     i965_render_dest_surface_state(ctx, 0);
1600     i965_render_src_surfaces_state(ctx, surface);
1601     i965_render_sampler(ctx);
1602     i965_render_cc_viewport(ctx);
1603     gen6_render_color_calc_state(ctx);
1604     gen6_render_blend_state(ctx);
1605     gen6_render_depth_stencil_state(ctx);
1606     i965_render_upload_constants(ctx);
1607     i965_render_upload_vertex(ctx, surface,
1608                               srcx, srcy, srcw, srch,
1609                               destx, desty, destw, desth);
1610 }
1611
1612 static void
1613 gen6_emit_invarient_states(VADriverContextP ctx)
1614 {
1615     OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1616
1617     OUT_BATCH(ctx, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1618     OUT_BATCH(ctx, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1619               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1620     OUT_BATCH(ctx, 0);
1621
1622     OUT_BATCH(ctx, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1623     OUT_BATCH(ctx, 1);
1624
1625     /* Set system instruction pointer */
1626     OUT_BATCH(ctx, CMD_STATE_SIP | 0);
1627     OUT_BATCH(ctx, 0);
1628 }
1629
1630 static void
1631 gen6_emit_state_base_address(VADriverContextP ctx)
1632 {
1633     struct i965_driver_data *i965 = i965_driver_data(ctx);
1634     struct i965_render_state *render_state = &i965->render_state;
1635
1636     OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | (10 - 2));
1637     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* General state base address */
1638     OUT_RELOC(ctx, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1639     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1640     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1641     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Instruction base address */
1642     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* General state upper bound */
1643     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1644     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1645     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1646 }
1647
1648 static void
1649 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1650 {
1651     struct i965_driver_data *i965 = i965_driver_data(ctx);
1652     struct i965_render_state *render_state = &i965->render_state;
1653
1654     OUT_BATCH(ctx, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1655               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1656               (4 - 2));
1657     OUT_BATCH(ctx, 0);
1658     OUT_BATCH(ctx, 0);
1659     OUT_RELOC(ctx, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1660 }
1661
1662 static void
1663 gen6_emit_urb(VADriverContextP ctx)
1664 {
1665     OUT_BATCH(ctx, GEN6_3DSTATE_URB | (3 - 2));
1666     OUT_BATCH(ctx, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1667               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1668     OUT_BATCH(ctx, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1669               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1670 }
1671
1672 static void
1673 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1674 {
1675     struct i965_driver_data *i965 = i965_driver_data(ctx);
1676     struct i965_render_state *render_state = &i965->render_state;
1677
1678     OUT_BATCH(ctx, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1679     OUT_RELOC(ctx, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1680     OUT_RELOC(ctx, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1681     OUT_RELOC(ctx, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1682 }
1683
1684 static void
1685 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1686 {
1687     struct i965_driver_data *i965 = i965_driver_data(ctx);
1688     struct i965_render_state *render_state = &i965->render_state;
1689
1690     OUT_BATCH(ctx, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1691               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1692               (4 - 2));
1693     OUT_BATCH(ctx, 0); /* VS */
1694     OUT_BATCH(ctx, 0); /* GS */
1695     OUT_RELOC(ctx,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1696 }
1697
1698 static void
1699 gen6_emit_binding_table(VADriverContextP ctx)
1700 {
1701     /* Binding table pointers */
1702     OUT_BATCH(ctx, CMD_BINDING_TABLE_POINTERS |
1703               GEN6_BINDING_TABLE_MODIFY_PS |
1704               (4 - 2));
1705     OUT_BATCH(ctx, 0);          /* vs */
1706     OUT_BATCH(ctx, 0);          /* gs */
1707     /* Only the PS uses the binding table */
1708     OUT_BATCH(ctx, BINDING_TABLE_OFFSET);
1709 }
1710
1711 static void
1712 gen6_emit_depth_buffer_state(VADriverContextP ctx)
1713 {
1714     OUT_BATCH(ctx, CMD_DEPTH_BUFFER | (7 - 2));
1715     OUT_BATCH(ctx, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
1716               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
1717     OUT_BATCH(ctx, 0);
1718     OUT_BATCH(ctx, 0);
1719     OUT_BATCH(ctx, 0);
1720     OUT_BATCH(ctx, 0);
1721     OUT_BATCH(ctx, 0);
1722
1723     OUT_BATCH(ctx, CMD_CLEAR_PARAMS | (2 - 2));
1724     OUT_BATCH(ctx, 0);
1725 }
1726
1727 static void
1728 gen6_emit_drawing_rectangle(VADriverContextP ctx)
1729 {
1730     i965_render_drawing_rectangle(ctx);
1731 }
1732
1733 static void 
1734 gen6_emit_vs_state(VADriverContextP ctx)
1735 {
1736     /* disable VS constant buffer */
1737     OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
1738     OUT_BATCH(ctx, 0);
1739     OUT_BATCH(ctx, 0);
1740     OUT_BATCH(ctx, 0);
1741     OUT_BATCH(ctx, 0);
1742         
1743     OUT_BATCH(ctx, GEN6_3DSTATE_VS | (6 - 2));
1744     OUT_BATCH(ctx, 0); /* without VS kernel */
1745     OUT_BATCH(ctx, 0);
1746     OUT_BATCH(ctx, 0);
1747     OUT_BATCH(ctx, 0);
1748     OUT_BATCH(ctx, 0); /* pass-through */
1749 }
1750
1751 static void 
1752 gen6_emit_gs_state(VADriverContextP ctx)
1753 {
1754     /* disable GS constant buffer */
1755     OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
1756     OUT_BATCH(ctx, 0);
1757     OUT_BATCH(ctx, 0);
1758     OUT_BATCH(ctx, 0);
1759     OUT_BATCH(ctx, 0);
1760         
1761     OUT_BATCH(ctx, GEN6_3DSTATE_GS | (7 - 2));
1762     OUT_BATCH(ctx, 0); /* without GS kernel */
1763     OUT_BATCH(ctx, 0);
1764     OUT_BATCH(ctx, 0);
1765     OUT_BATCH(ctx, 0);
1766     OUT_BATCH(ctx, 0);
1767     OUT_BATCH(ctx, 0); /* pass-through */
1768 }
1769
1770 static void 
1771 gen6_emit_clip_state(VADriverContextP ctx)
1772 {
1773     OUT_BATCH(ctx, GEN6_3DSTATE_CLIP | (4 - 2));
1774     OUT_BATCH(ctx, 0);
1775     OUT_BATCH(ctx, 0); /* pass-through */
1776     OUT_BATCH(ctx, 0);
1777 }
1778
1779 static void 
1780 gen6_emit_sf_state(VADriverContextP ctx)
1781 {
1782     OUT_BATCH(ctx, GEN6_3DSTATE_SF | (20 - 2));
1783     OUT_BATCH(ctx, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
1784               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
1785               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
1786     OUT_BATCH(ctx, 0);
1787     OUT_BATCH(ctx, GEN6_3DSTATE_SF_CULL_NONE);
1788     OUT_BATCH(ctx, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
1789     OUT_BATCH(ctx, 0);
1790     OUT_BATCH(ctx, 0);
1791     OUT_BATCH(ctx, 0);
1792     OUT_BATCH(ctx, 0);
1793     OUT_BATCH(ctx, 0); /* DW9 */
1794     OUT_BATCH(ctx, 0);
1795     OUT_BATCH(ctx, 0);
1796     OUT_BATCH(ctx, 0);
1797     OUT_BATCH(ctx, 0);
1798     OUT_BATCH(ctx, 0); /* DW14 */
1799     OUT_BATCH(ctx, 0);
1800     OUT_BATCH(ctx, 0);
1801     OUT_BATCH(ctx, 0);
1802     OUT_BATCH(ctx, 0);
1803     OUT_BATCH(ctx, 0); /* DW19 */
1804 }
1805
1806 static void 
1807 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
1808 {
1809     struct i965_driver_data *i965 = i965_driver_data(ctx);
1810     struct i965_render_state *render_state = &i965->render_state;
1811
1812     OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_PS |
1813               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
1814               (5 - 2));
1815     OUT_RELOC(ctx, 
1816               render_state->curbe.bo,
1817               I915_GEM_DOMAIN_INSTRUCTION, 0,
1818               0);
1819     OUT_BATCH(ctx, 0);
1820     OUT_BATCH(ctx, 0);
1821     OUT_BATCH(ctx, 0);
1822
1823     OUT_BATCH(ctx, GEN6_3DSTATE_WM | (9 - 2));
1824     OUT_RELOC(ctx, render_kernels[kernel].bo,
1825               I915_GEM_DOMAIN_INSTRUCTION, 0,
1826               0);
1827     OUT_BATCH(ctx, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
1828               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
1829     OUT_BATCH(ctx, 0);
1830     OUT_BATCH(ctx, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
1831     OUT_BATCH(ctx, ((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
1832               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
1833               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
1834     OUT_BATCH(ctx, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
1835               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
1836     OUT_BATCH(ctx, 0);
1837     OUT_BATCH(ctx, 0);
1838 }
1839
1840 static void
1841 gen6_emit_vertex_element_state(VADriverContextP ctx)
1842 {
1843     /* Set up our vertex elements, sourced from the single vertex buffer. */
1844     OUT_BATCH(ctx, CMD_VERTEX_ELEMENTS | (5 - 2));
1845     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1846     OUT_BATCH(ctx, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1847               GEN6_VE0_VALID |
1848               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1849               (0 << VE0_OFFSET_SHIFT));
1850     OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1851               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1852               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1853               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1854     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1855     OUT_BATCH(ctx, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1856               GEN6_VE0_VALID |
1857               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1858               (8 << VE0_OFFSET_SHIFT));
1859     OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
1860               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1861               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1862               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1863 }
1864
1865 static void
1866 gen6_emit_vertices(VADriverContextP ctx)
1867 {
1868     struct i965_driver_data *i965 = i965_driver_data(ctx);
1869     struct i965_render_state *render_state = &i965->render_state;
1870
1871     BEGIN_BATCH(ctx, 11);
1872     OUT_BATCH(ctx, CMD_VERTEX_BUFFERS | 3);
1873     OUT_BATCH(ctx, 
1874               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
1875               GEN6_VB0_VERTEXDATA |
1876               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1877     OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1878     OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1879     OUT_BATCH(ctx, 0);
1880
1881     OUT_BATCH(ctx, 
1882               CMD_3DPRIMITIVE |
1883               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1884               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1885               (0 << 9) |
1886               4);
1887     OUT_BATCH(ctx, 3); /* vertex count per instance */
1888     OUT_BATCH(ctx, 0); /* start vertex offset */
1889     OUT_BATCH(ctx, 1); /* single instance */
1890     OUT_BATCH(ctx, 0); /* start instance location */
1891     OUT_BATCH(ctx, 0); /* index buffer offset, ignored */
1892     ADVANCE_BATCH(ctx);
1893 }
1894
1895 static void
1896 gen6_render_emit_states(VADriverContextP ctx, int kernel)
1897 {
1898     intel_batchbuffer_start_atomic(ctx, 0x1000);
1899     intel_batchbuffer_emit_mi_flush(ctx);
1900     gen6_emit_invarient_states(ctx);
1901     gen6_emit_state_base_address(ctx);
1902     gen6_emit_viewport_state_pointers(ctx);
1903     gen6_emit_urb(ctx);
1904     gen6_emit_cc_state_pointers(ctx);
1905     gen6_emit_sampler_state_pointers(ctx);
1906     gen6_emit_vs_state(ctx);
1907     gen6_emit_gs_state(ctx);
1908     gen6_emit_clip_state(ctx);
1909     gen6_emit_sf_state(ctx);
1910     gen6_emit_wm_state(ctx, kernel);
1911     gen6_emit_binding_table(ctx);
1912     gen6_emit_depth_buffer_state(ctx);
1913     gen6_emit_drawing_rectangle(ctx);
1914     gen6_emit_vertex_element_state(ctx);
1915     gen6_emit_vertices(ctx);
1916     intel_batchbuffer_end_atomic(ctx);
1917 }
1918
1919 static void
1920 gen6_render_put_surface(VADriverContextP ctx,
1921                         VASurfaceID surface,
1922                         short srcx,
1923                         short srcy,
1924                         unsigned short srcw,
1925                         unsigned short srch,
1926                         short destx,
1927                         short desty,
1928                         unsigned short destw,
1929                         unsigned short desth,
1930                         unsigned int flag)
1931 {
1932     gen6_render_initialize(ctx);
1933     gen6_render_setup_states(ctx, surface,
1934                              srcx, srcy, srcw, srch,
1935                              destx, desty, destw, desth);
1936     i965_clear_dest_region(ctx);
1937     gen6_render_emit_states(ctx, PS_KERNEL);
1938     intel_batchbuffer_flush(ctx);
1939 }
1940
1941 static void
1942 gen6_subpicture_render_blend_state(VADriverContextP ctx)
1943 {
1944     struct i965_driver_data *i965 = i965_driver_data(ctx);
1945     struct i965_render_state *render_state = &i965->render_state;
1946     struct gen6_blend_state *blend_state;
1947
1948     dri_bo_unmap(render_state->cc.state);    
1949     dri_bo_map(render_state->cc.blend, 1);
1950     assert(render_state->cc.blend->virtual);
1951     blend_state = render_state->cc.blend->virtual;
1952     memset(blend_state, 0, sizeof(*blend_state));
1953     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
1954     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
1955     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
1956     blend_state->blend0.blend_enable = 1;
1957     blend_state->blend1.post_blend_clamp_enable = 1;
1958     blend_state->blend1.pre_blend_clamp_enable = 1;
1959     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
1960     dri_bo_unmap(render_state->cc.blend);
1961 }
1962
1963 static void
1964 gen6_subpicture_render_setup_states(VADriverContextP ctx,
1965                                     VASurfaceID surface,
1966                                     short srcx,
1967                                     short srcy,
1968                                     unsigned short srcw,
1969                                     unsigned short srch,
1970                                     short destx,
1971                                     short desty,
1972                                     unsigned short destw,
1973                                     unsigned short desth)
1974 {
1975     VARectangle output_rect;
1976
1977     output_rect.x      = destx;
1978     output_rect.y      = desty;
1979     output_rect.width  = destw;
1980     output_rect.height = desth;
1981
1982     i965_render_dest_surface_state(ctx, 0);
1983     i965_subpic_render_src_surfaces_state(ctx, surface);
1984     i965_render_sampler(ctx);
1985     i965_render_cc_viewport(ctx);
1986     gen6_render_color_calc_state(ctx);
1987     gen6_subpicture_render_blend_state(ctx);
1988     gen6_render_depth_stencil_state(ctx);
1989     i965_subpic_render_upload_vertex(ctx, surface, &output_rect);
1990 }
1991
1992 static void
1993 gen6_render_put_subpicture(VADriverContextP ctx,
1994                            VASurfaceID surface,
1995                            short srcx,
1996                            short srcy,
1997                            unsigned short srcw,
1998                            unsigned short srch,
1999                            short destx,
2000                            short desty,
2001                            unsigned short destw,
2002                            unsigned short desth)
2003 {
2004     struct i965_driver_data *i965 = i965_driver_data(ctx);
2005     struct object_surface *obj_surface = SURFACE(surface);
2006     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
2007
2008     assert(obj_subpic);
2009     gen6_render_initialize(ctx);
2010     gen6_subpicture_render_setup_states(ctx, surface,
2011                                         srcx, srcy, srcw, srch,
2012                                         destx, desty, destw, desth);
2013     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2014     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
2015     intel_batchbuffer_flush(ctx);
2016 }
2017
2018 /*
2019  * global functions
2020  */
2021 void
2022 intel_render_put_surface(VADriverContextP ctx,
2023                         VASurfaceID surface,
2024                         short srcx,
2025                         short srcy,
2026                         unsigned short srcw,
2027                         unsigned short srch,
2028                         short destx,
2029                         short desty,
2030                         unsigned short destw,
2031                         unsigned short desth,
2032                         unsigned int flag)
2033 {
2034     struct i965_driver_data *i965 = i965_driver_data(ctx);
2035
2036     i965_post_processing(ctx, surface,
2037                          srcx, srcy, srcw, srch,
2038                          destx, desty, destw, desth,
2039                          flag);
2040
2041     if (IS_GEN6(i965->intel.device_id))
2042         gen6_render_put_surface(ctx, surface,
2043                                 srcx, srcy, srcw, srch,
2044                                 destx, desty, destw, desth,
2045                                 flag);
2046     else
2047         i965_render_put_surface(ctx, surface,
2048                                 srcx, srcy, srcw, srch,
2049                                 destx, desty, destw, desth,
2050                                 flag);
2051 }
2052
2053 void
2054 intel_render_put_subpicture(VADriverContextP ctx,
2055                            VASurfaceID surface,
2056                            short srcx,
2057                            short srcy,
2058                            unsigned short srcw,
2059                            unsigned short srch,
2060                            short destx,
2061                            short desty,
2062                            unsigned short destw,
2063                            unsigned short desth)
2064 {
2065     struct i965_driver_data *i965 = i965_driver_data(ctx);
2066
2067     if (IS_GEN6(i965->intel.device_id))
2068         gen6_render_put_subpicture(ctx, surface,
2069                                    srcx, srcy, srcw, srch,
2070                                    destx, desty, destw, desth);
2071     else
2072         i965_render_put_subpicture(ctx, surface,
2073                                    srcx, srcy, srcw, srch,
2074                                    destx, desty, destw, desth);
2075 }
2076
2077 Bool 
2078 i965_render_init(VADriverContextP ctx)
2079 {
2080     struct i965_driver_data *i965 = i965_driver_data(ctx);
2081     struct i965_render_state *render_state = &i965->render_state;
2082     int i;
2083
2084     /* kernel */
2085     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
2086                                  sizeof(render_kernels_gen5[0])));
2087     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
2088                                  sizeof(render_kernels_gen6[0])));
2089
2090     if (IS_GEN6(i965->intel.device_id))
2091         render_kernels = render_kernels_gen6;
2092     else if (IS_IRONLAKE(i965->intel.device_id))
2093         render_kernels = render_kernels_gen5;
2094     else
2095         render_kernels = render_kernels_gen4;
2096
2097     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
2098         struct i965_kernel *kernel = &render_kernels[i];
2099
2100         if (!kernel->size)
2101             continue;
2102
2103         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
2104                                   kernel->name, 
2105                                   kernel->size, 0x1000);
2106         assert(kernel->bo);
2107         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
2108     }
2109
2110     /* constant buffer */
2111     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
2112                       "constant buffer",
2113                       4096, 64);
2114     assert(render_state->curbe.bo);
2115     render_state->curbe.upload = 0;
2116
2117     return True;
2118 }
2119
2120 Bool 
2121 i965_render_terminate(VADriverContextP ctx)
2122 {
2123     int i;
2124     struct i965_driver_data *i965 = i965_driver_data(ctx);
2125     struct i965_render_state *render_state = &i965->render_state;
2126
2127     dri_bo_unreference(render_state->curbe.bo);
2128     render_state->curbe.bo = NULL;
2129
2130     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
2131         struct i965_kernel *kernel = &render_kernels[i];
2132         
2133         dri_bo_unreference(kernel->bo);
2134         kernel->bo = NULL;
2135     }
2136
2137     dri_bo_unreference(render_state->vb.vertex_buffer);
2138     render_state->vb.vertex_buffer = NULL;
2139     dri_bo_unreference(render_state->vs.state);
2140     render_state->vs.state = NULL;
2141     dri_bo_unreference(render_state->sf.state);
2142     render_state->sf.state = NULL;
2143     dri_bo_unreference(render_state->wm.sampler);
2144     render_state->wm.sampler = NULL;
2145     dri_bo_unreference(render_state->wm.state);
2146     render_state->wm.state = NULL;
2147     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2148     dri_bo_unreference(render_state->cc.viewport);
2149     render_state->cc.viewport = NULL;
2150     dri_bo_unreference(render_state->cc.state);
2151     render_state->cc.state = NULL;
2152     dri_bo_unreference(render_state->cc.blend);
2153     render_state->cc.blend = NULL;
2154     dri_bo_unreference(render_state->cc.depth_stencil);
2155     render_state->cc.depth_stencil = NULL;
2156
2157     if (render_state->draw_region) {
2158         dri_bo_unreference(render_state->draw_region->bo);
2159         free(render_state->draw_region);
2160         render_state->draw_region = NULL;
2161     }
2162
2163     return True;
2164 }
2165