c34f6aef9baebf933cf853cf5898e6d54b0142f2
[platform/upstream/libva.git] / i965_drv_video / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38
39 #include <va/va_backend.h>
40 #include "va/x11/va_dricommon.h"
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44 #include "i965_defines.h"
45 #include "i965_drv_video.h"
46 #include "i965_structs.h"
47
48 #include "i965_render.h"
49
50 #define SF_KERNEL_NUM_GRF       16
51 #define SF_MAX_THREADS          1
52
53 static const unsigned int sf_kernel_static[][4] = 
54 {
55 #include "shaders/render/exa_sf.g4b"
56 };
57
58 #define PS_KERNEL_NUM_GRF       32
59 #define PS_MAX_THREADS          32
60
61 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
62
63 static const unsigned int ps_kernel_static[][4] = 
64 {
65 #include "shaders/render/exa_wm_xy.g4b"
66 #include "shaders/render/exa_wm_src_affine.g4b"
67 #include "shaders/render/exa_wm_src_sample_planar.g4b"
68 #include "shaders/render/exa_wm_yuv_rgb.g4b"
69 #include "shaders/render/exa_wm_write.g4b"
70 };
71 static const unsigned int ps_subpic_kernel_static[][4] = 
72 {
73 #include "shaders/render/exa_wm_xy.g4b"
74 #include "shaders/render/exa_wm_src_affine.g4b"
75 #include "shaders/render/exa_wm_src_sample_argb.g4b"
76 #include "shaders/render/exa_wm_write.g4b"
77 };
78
79 /* On IRONLAKE */
80 static const unsigned int sf_kernel_static_gen5[][4] = 
81 {
82 #include "shaders/render/exa_sf.g4b.gen5"
83 };
84
85 static const unsigned int ps_kernel_static_gen5[][4] = 
86 {
87 #include "shaders/render/exa_wm_xy.g4b.gen5"
88 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
89 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
90 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
91 #include "shaders/render/exa_wm_write.g4b.gen5"
92 };
93 static const unsigned int ps_subpic_kernel_static_gen5[][4] = 
94 {
95 #include "shaders/render/exa_wm_xy.g4b.gen5"
96 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
97 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
98 #include "shaders/render/exa_wm_write.g4b.gen5"
99 };
100
101 /* programs for Sandybridge */
102 static const unsigned int sf_kernel_static_gen6[][4] = 
103 {
104 };
105
106 static const uint32_t ps_kernel_static_gen6[][4] = {
107 #include "shaders/render/exa_wm_src_affine.g6b"
108 #include "shaders/render/exa_wm_src_sample_planar.g6b"
109 #include "shaders/render/exa_wm_yuv_rgb.g6b"
110 #include "shaders/render/exa_wm_write.g6b"
111 };
112
113 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
114 #include "shaders/render/exa_wm_src_affine.g6b"
115 #include "shaders/render/exa_wm_src_sample_argb.g6b"
116 #include "shaders/render/exa_wm_write.g6b"
117 };
118
119 #define SURFACE_STATE_PADDED_SIZE       ALIGN(sizeof(struct i965_surface_state), 32)
120 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
121 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
122
123 static uint32_t float_to_uint (float f) 
124 {
125     union {
126         uint32_t i; 
127         float f;
128     } x;
129
130     x.f = f;
131     return x.i;
132 }
133
134 enum 
135 {
136     SF_KERNEL = 0,
137     PS_KERNEL,
138     PS_SUBPIC_KERNEL
139 };
140
141 struct render_kernel
142 {
143     char *name;
144     const unsigned int (*bin)[4];
145     int size;
146     dri_bo *bo;
147 };
148
149 static struct render_kernel render_kernels_gen4[] = {
150     {
151         "SF",
152         sf_kernel_static,
153         sizeof(sf_kernel_static),
154         NULL
155     },
156     {
157         "PS",
158         ps_kernel_static,
159         sizeof(ps_kernel_static),
160         NULL
161     },
162
163     {
164         "PS_SUBPIC",
165         ps_subpic_kernel_static,
166         sizeof(ps_subpic_kernel_static),
167         NULL
168     }
169 };
170
171 static struct render_kernel render_kernels_gen5[] = {
172     {
173         "SF",
174         sf_kernel_static_gen5,
175         sizeof(sf_kernel_static_gen5),
176         NULL
177     },
178     {
179         "PS",
180         ps_kernel_static_gen5,
181         sizeof(ps_kernel_static_gen5),
182         NULL
183     },
184
185     {
186         "PS_SUBPIC",
187         ps_subpic_kernel_static_gen5,
188         sizeof(ps_subpic_kernel_static_gen5),
189         NULL
190     }
191 };
192
193 static struct render_kernel render_kernels_gen6[] = {
194     {
195         "SF",
196         sf_kernel_static_gen6,
197         sizeof(sf_kernel_static_gen6),
198         NULL
199     },
200     {
201         "PS",
202         ps_kernel_static_gen6,
203         sizeof(ps_kernel_static_gen6),
204         NULL
205     },
206
207     {
208         "PS_SUBPIC",
209         ps_subpic_kernel_static_gen6,
210         sizeof(ps_subpic_kernel_static_gen6),
211         NULL
212     }
213 };
214
215 static struct render_kernel *render_kernels = NULL;
216
217 #define NUM_RENDER_KERNEL (sizeof(render_kernels_gen4)/sizeof(render_kernels_gen4[0]))
218
219 #define URB_VS_ENTRIES        8
220 #define URB_VS_ENTRY_SIZE     1
221
222 #define URB_GS_ENTRIES        0
223 #define URB_GS_ENTRY_SIZE     0
224
225 #define URB_CLIP_ENTRIES      0
226 #define URB_CLIP_ENTRY_SIZE   0
227
228 #define URB_SF_ENTRIES        1
229 #define URB_SF_ENTRY_SIZE     2
230
231 #define URB_CS_ENTRIES        1
232 #define URB_CS_ENTRY_SIZE     1
233
234 static void
235 i965_render_vs_unit(VADriverContextP ctx)
236 {
237     struct i965_driver_data *i965 = i965_driver_data(ctx);
238     struct i965_render_state *render_state = &i965->render_state;
239     struct i965_vs_unit_state *vs_state;
240
241     dri_bo_map(render_state->vs.state, 1);
242     assert(render_state->vs.state->virtual);
243     vs_state = render_state->vs.state->virtual;
244     memset(vs_state, 0, sizeof(*vs_state));
245
246     if (IS_IRONLAKE(i965->intel.device_id))
247         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
248     else
249         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
250
251     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
252     vs_state->vs6.vs_enable = 0;
253     vs_state->vs6.vert_cache_disable = 1;
254     
255     dri_bo_unmap(render_state->vs.state);
256 }
257
258 static void
259 i965_render_sf_unit(VADriverContextP ctx)
260 {
261     struct i965_driver_data *i965 = i965_driver_data(ctx);
262     struct i965_render_state *render_state = &i965->render_state;
263     struct i965_sf_unit_state *sf_state;
264
265     dri_bo_map(render_state->sf.state, 1);
266     assert(render_state->sf.state->virtual);
267     sf_state = render_state->sf.state->virtual;
268     memset(sf_state, 0, sizeof(*sf_state));
269
270     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
271     sf_state->thread0.kernel_start_pointer = render_kernels[SF_KERNEL].bo->offset >> 6;
272
273     sf_state->sf1.single_program_flow = 1; /* XXX */
274     sf_state->sf1.binding_table_entry_count = 0;
275     sf_state->sf1.thread_priority = 0;
276     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
277     sf_state->sf1.illegal_op_exception_enable = 1;
278     sf_state->sf1.mask_stack_exception_enable = 1;
279     sf_state->sf1.sw_exception_enable = 1;
280
281     /* scratch space is not used in our kernel */
282     sf_state->thread2.per_thread_scratch_space = 0;
283     sf_state->thread2.scratch_space_base_pointer = 0;
284
285     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
286     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
287     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
288     sf_state->thread3.urb_entry_read_offset = 0;
289     sf_state->thread3.dispatch_grf_start_reg = 3;
290
291     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
292     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
293     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
294     sf_state->thread4.stats_enable = 1;
295
296     sf_state->sf5.viewport_transform = 0; /* skip viewport */
297
298     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
299     sf_state->sf6.scissor = 0;
300
301     sf_state->sf7.trifan_pv = 2;
302
303     sf_state->sf6.dest_org_vbias = 0x8;
304     sf_state->sf6.dest_org_hbias = 0x8;
305
306     dri_bo_emit_reloc(render_state->sf.state,
307                       I915_GEM_DOMAIN_INSTRUCTION, 0,
308                       sf_state->thread0.grf_reg_count << 1,
309                       offsetof(struct i965_sf_unit_state, thread0),
310                       render_kernels[SF_KERNEL].bo);
311
312     dri_bo_unmap(render_state->sf.state);
313 }
314
315 static void 
316 i965_render_sampler(VADriverContextP ctx)
317 {
318     struct i965_driver_data *i965 = i965_driver_data(ctx);
319     struct i965_render_state *render_state = &i965->render_state;
320     struct i965_sampler_state *sampler_state;
321     int i;
322     
323     assert(render_state->wm.sampler_count > 0);
324     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
325
326     dri_bo_map(render_state->wm.sampler, 1);
327     assert(render_state->wm.sampler->virtual);
328     sampler_state = render_state->wm.sampler->virtual;
329     for (i = 0; i < render_state->wm.sampler_count; i++) {
330         memset(sampler_state, 0, sizeof(*sampler_state));
331         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
332         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
333         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
334         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
335         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
336         sampler_state++;
337     }
338
339     dri_bo_unmap(render_state->wm.sampler);
340 }
341 static void
342 i965_subpic_render_wm_unit(VADriverContextP ctx)
343 {
344     struct i965_driver_data *i965 = i965_driver_data(ctx);
345     struct i965_render_state *render_state = &i965->render_state;
346     struct i965_wm_unit_state *wm_state;
347
348     assert(render_state->wm.sampler);
349
350     dri_bo_map(render_state->wm.state, 1);
351     assert(render_state->wm.state->virtual);
352     wm_state = render_state->wm.state->virtual;
353     memset(wm_state, 0, sizeof(*wm_state));
354
355     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
356     wm_state->thread0.kernel_start_pointer = render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
357
358     wm_state->thread1.single_program_flow = 1; /* XXX */
359
360     if (IS_IRONLAKE(i965->intel.device_id))
361         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
362     else
363         wm_state->thread1.binding_table_entry_count = 7;
364
365     wm_state->thread2.scratch_space_base_pointer = 0;
366     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
367
368     wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
369     wm_state->thread3.const_urb_entry_read_length = 0;
370     wm_state->thread3.const_urb_entry_read_offset = 0;
371     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
372     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
373
374     wm_state->wm4.stats_enable = 0;
375     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
376
377     if (IS_IRONLAKE(i965->intel.device_id)) {
378         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
379         wm_state->wm5.max_threads = 12 * 6 - 1;
380     } else {
381         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
382         wm_state->wm5.max_threads = 10 * 5 - 1;
383     }
384
385     wm_state->wm5.thread_dispatch_enable = 1;
386     wm_state->wm5.enable_16_pix = 1;
387     wm_state->wm5.enable_8_pix = 0;
388     wm_state->wm5.early_depth_test = 1;
389
390     dri_bo_emit_reloc(render_state->wm.state,
391                       I915_GEM_DOMAIN_INSTRUCTION, 0,
392                       wm_state->thread0.grf_reg_count << 1,
393                       offsetof(struct i965_wm_unit_state, thread0),
394                       render_kernels[PS_SUBPIC_KERNEL].bo);
395
396     dri_bo_emit_reloc(render_state->wm.state,
397                       I915_GEM_DOMAIN_INSTRUCTION, 0,
398                       wm_state->wm4.sampler_count << 2,
399                       offsetof(struct i965_wm_unit_state, wm4),
400                       render_state->wm.sampler);
401
402     dri_bo_unmap(render_state->wm.state);
403 }
404
405
406 static void
407 i965_render_wm_unit(VADriverContextP ctx)
408 {
409     struct i965_driver_data *i965 = i965_driver_data(ctx);
410     struct i965_render_state *render_state = &i965->render_state;
411     struct i965_wm_unit_state *wm_state;
412
413     assert(render_state->wm.sampler);
414
415     dri_bo_map(render_state->wm.state, 1);
416     assert(render_state->wm.state->virtual);
417     wm_state = render_state->wm.state->virtual;
418     memset(wm_state, 0, sizeof(*wm_state));
419
420     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
421     wm_state->thread0.kernel_start_pointer = render_kernels[PS_KERNEL].bo->offset >> 6;
422
423     wm_state->thread1.single_program_flow = 1; /* XXX */
424
425     if (IS_IRONLAKE(i965->intel.device_id))
426         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
427     else
428         wm_state->thread1.binding_table_entry_count = 7;
429
430     wm_state->thread2.scratch_space_base_pointer = 0;
431     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
432
433     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
434     wm_state->thread3.const_urb_entry_read_length = 1;
435     wm_state->thread3.const_urb_entry_read_offset = 0;
436     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
437     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
438
439     wm_state->wm4.stats_enable = 0;
440     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
441
442     if (IS_IRONLAKE(i965->intel.device_id)) {
443         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
444         wm_state->wm5.max_threads = 12 * 6 - 1;
445     } else {
446         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
447         wm_state->wm5.max_threads = 10 * 5 - 1;
448     }
449
450     wm_state->wm5.thread_dispatch_enable = 1;
451     wm_state->wm5.enable_16_pix = 1;
452     wm_state->wm5.enable_8_pix = 0;
453     wm_state->wm5.early_depth_test = 1;
454
455     dri_bo_emit_reloc(render_state->wm.state,
456                       I915_GEM_DOMAIN_INSTRUCTION, 0,
457                       wm_state->thread0.grf_reg_count << 1,
458                       offsetof(struct i965_wm_unit_state, thread0),
459                       render_kernels[PS_KERNEL].bo);
460
461     dri_bo_emit_reloc(render_state->wm.state,
462                       I915_GEM_DOMAIN_INSTRUCTION, 0,
463                       wm_state->wm4.sampler_count << 2,
464                       offsetof(struct i965_wm_unit_state, wm4),
465                       render_state->wm.sampler);
466
467     dri_bo_unmap(render_state->wm.state);
468 }
469
470 static void 
471 i965_render_cc_viewport(VADriverContextP ctx)
472 {
473     struct i965_driver_data *i965 = i965_driver_data(ctx);
474     struct i965_render_state *render_state = &i965->render_state;
475     struct i965_cc_viewport *cc_viewport;
476
477     dri_bo_map(render_state->cc.viewport, 1);
478     assert(render_state->cc.viewport->virtual);
479     cc_viewport = render_state->cc.viewport->virtual;
480     memset(cc_viewport, 0, sizeof(*cc_viewport));
481     
482     cc_viewport->min_depth = -1.e35;
483     cc_viewport->max_depth = 1.e35;
484
485     dri_bo_unmap(render_state->cc.viewport);
486 }
487
488 static void 
489 i965_subpic_render_cc_unit(VADriverContextP ctx)
490 {
491     struct i965_driver_data *i965 = i965_driver_data(ctx);
492     struct i965_render_state *render_state = &i965->render_state;
493     struct i965_cc_unit_state *cc_state;
494
495     assert(render_state->cc.viewport);
496
497     dri_bo_map(render_state->cc.state, 1);
498     assert(render_state->cc.state->virtual);
499     cc_state = render_state->cc.state->virtual;
500     memset(cc_state, 0, sizeof(*cc_state));
501
502     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
503     cc_state->cc2.depth_test = 0;       /* disable depth test */
504     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
505     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
506     cc_state->cc3.blend_enable = 1;     /* enable color blend */
507     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
508     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
509     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
510     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
511
512     cc_state->cc5.dither_enable = 0;    /* disable dither */
513     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
514     cc_state->cc5.statistics_enable = 1;
515     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
516     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
517     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
518
519     cc_state->cc6.clamp_post_alpha_blend = 0; 
520     cc_state->cc6.clamp_pre_alpha_blend  =0; 
521     
522     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
523     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
524     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
525     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
526    
527     /*alpha test reference*/
528     cc_state->cc7.alpha_ref.f =0.0 ;
529
530
531     dri_bo_emit_reloc(render_state->cc.state,
532                       I915_GEM_DOMAIN_INSTRUCTION, 0,
533                       0,
534                       offsetof(struct i965_cc_unit_state, cc4),
535                       render_state->cc.viewport);
536
537     dri_bo_unmap(render_state->cc.state);
538 }
539
540
541 static void 
542 i965_render_cc_unit(VADriverContextP ctx)
543 {
544     struct i965_driver_data *i965 = i965_driver_data(ctx);
545     struct i965_render_state *render_state = &i965->render_state;
546     struct i965_cc_unit_state *cc_state;
547
548     assert(render_state->cc.viewport);
549
550     dri_bo_map(render_state->cc.state, 1);
551     assert(render_state->cc.state->virtual);
552     cc_state = render_state->cc.state->virtual;
553     memset(cc_state, 0, sizeof(*cc_state));
554
555     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
556     cc_state->cc2.depth_test = 0;       /* disable depth test */
557     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
558     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
559     cc_state->cc3.blend_enable = 0;     /* disable color blend */
560     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
561     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
562
563     cc_state->cc5.dither_enable = 0;    /* disable dither */
564     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
565     cc_state->cc5.statistics_enable = 1;
566     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
567     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
568     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
569
570     dri_bo_emit_reloc(render_state->cc.state,
571                       I915_GEM_DOMAIN_INSTRUCTION, 0,
572                       0,
573                       offsetof(struct i965_cc_unit_state, cc4),
574                       render_state->cc.viewport);
575
576     dri_bo_unmap(render_state->cc.state);
577 }
578
579 static void
580 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
581 {
582     switch (tiling) {
583     case I915_TILING_NONE:
584         ss->ss3.tiled_surface = 0;
585         ss->ss3.tile_walk = 0;
586         break;
587     case I915_TILING_X:
588         ss->ss3.tiled_surface = 1;
589         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
590         break;
591     case I915_TILING_Y:
592         ss->ss3.tiled_surface = 1;
593         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
594         break;
595     }
596 }
597
598 static void
599 i965_render_src_surface_state(VADriverContextP ctx, 
600                               int index,
601                               dri_bo *region,
602                               unsigned long offset,
603                               int w, int h,
604                               int pitch, int format)
605 {
606     struct i965_driver_data *i965 = i965_driver_data(ctx);  
607     struct i965_render_state *render_state = &i965->render_state;
608     struct i965_surface_state *ss;
609     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
610     unsigned int tiling;
611     unsigned int swizzle;
612
613     assert(index < MAX_RENDER_SURFACES);
614
615     dri_bo_map(ss_bo, 1);
616     assert(ss_bo->virtual);
617     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
618     memset(ss, 0, sizeof(*ss));
619     ss->ss0.surface_type = I965_SURFACE_2D;
620     ss->ss0.surface_format = format;
621     ss->ss0.writedisable_alpha = 0;
622     ss->ss0.writedisable_red = 0;
623     ss->ss0.writedisable_green = 0;
624     ss->ss0.writedisable_blue = 0;
625     ss->ss0.color_blend = 1;
626     ss->ss0.vert_line_stride = 0;
627     ss->ss0.vert_line_stride_ofs = 0;
628     ss->ss0.mipmap_layout_mode = 0;
629     ss->ss0.render_cache_read_mode = 0;
630
631     ss->ss1.base_addr = region->offset + offset;
632
633     ss->ss2.width = w - 1;
634     ss->ss2.height = h - 1;
635     ss->ss2.mip_count = 0;
636     ss->ss2.render_target_rotation = 0;
637
638     ss->ss3.pitch = pitch - 1;
639
640     dri_bo_get_tiling(region, &tiling, &swizzle);
641     i965_render_set_surface_tiling(ss, tiling);
642
643     dri_bo_emit_reloc(ss_bo,
644                       I915_GEM_DOMAIN_SAMPLER, 0,
645                       offset,
646                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
647                       region);
648
649     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
650     dri_bo_unmap(ss_bo);
651     render_state->wm.sampler_count++;
652 }
653
654 static void
655 i965_render_src_surfaces_state(VADriverContextP ctx,
656                               VASurfaceID surface)
657 {
658     struct i965_driver_data *i965 = i965_driver_data(ctx);  
659     struct i965_render_state *render_state = &i965->render_state;
660     struct object_surface *obj_surface;
661     int w, h;
662     int rw, rh;
663     dri_bo *region;
664
665     obj_surface = SURFACE(surface);
666     assert(obj_surface);
667
668     if (obj_surface->pp_out_bo) {
669         w = obj_surface->pp_out_width;
670         h = obj_surface->pp_out_height;
671         rw = obj_surface->orig_pp_out_width;
672         rh = obj_surface->orig_pp_out_height;
673         region = obj_surface->pp_out_bo;
674     } else {
675         w = obj_surface->width;
676         h = obj_surface->height;
677         rw = obj_surface->orig_width;
678         rh = obj_surface->orig_height;
679         region = obj_surface->bo;
680     }
681
682     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, w, I965_SURFACEFORMAT_R8_UNORM);     /* Y */
683     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, w, I965_SURFACEFORMAT_R8_UNORM);
684
685     if (!render_state->inited) {
686         int u3 = 5, u4 = 6, v5 = 3, v6 = 4;
687
688         i965_render_src_surface_state(ctx, u3, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM); /* U */
689         i965_render_src_surface_state(ctx, u4, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
690         i965_render_src_surface_state(ctx, v5, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);     /* V */
691         i965_render_src_surface_state(ctx, v6, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
692     } else {
693         if (render_state->interleaved_uv) {
694             i965_render_src_surface_state(ctx, 3, region, w * h, rw / 2, rh / 2, w, I965_SURFACEFORMAT_R8G8_UNORM); /* UV */
695             i965_render_src_surface_state(ctx, 4, region, w * h, rw / 2, rh / 2, w, I965_SURFACEFORMAT_R8G8_UNORM);
696         } else {
697             int u3 = 3, u4 = 4, v5 = 5, v6 = 6;
698
699             i965_render_src_surface_state(ctx, u3, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM); /* U */
700             i965_render_src_surface_state(ctx, u4, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
701             i965_render_src_surface_state(ctx, v5, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);     /* V */
702             i965_render_src_surface_state(ctx, v6, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
703         }
704     }
705 }
706
707 static void
708 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
709                               VASurfaceID surface)
710 {
711     struct i965_driver_data *i965 = i965_driver_data(ctx);  
712     struct object_surface *obj_surface = SURFACE(surface);
713     int w, h;
714     dri_bo *region;
715     dri_bo *subpic_region;
716     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
717     struct object_image *obj_image = IMAGE(obj_subpic->image);
718     assert(obj_surface);
719     assert(obj_surface->bo);
720     w = obj_surface->width;
721     h = obj_surface->height;
722     region = obj_surface->bo;
723     subpic_region = obj_image->bo;
724     /*subpicture surface*/
725     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
726     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
727 }
728
729 static void
730 i965_render_dest_surface_state(VADriverContextP ctx, int index)
731 {
732     struct i965_driver_data *i965 = i965_driver_data(ctx);  
733     struct i965_render_state *render_state = &i965->render_state;
734     struct intel_region *dest_region = render_state->draw_region;
735     struct i965_surface_state *ss;
736     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
737
738     assert(index < MAX_RENDER_SURFACES);
739
740     dri_bo_map(ss_bo, 1);
741     assert(ss_bo->virtual);
742     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
743     memset(ss, 0, sizeof(*ss));
744
745     ss->ss0.surface_type = I965_SURFACE_2D;
746     ss->ss0.data_return_format = I965_SURFACERETURNFORMAT_FLOAT32;
747
748     if (dest_region->cpp == 2) {
749         ss->ss0.surface_format = I965_SURFACEFORMAT_B5G6R5_UNORM;
750         } else {
751         ss->ss0.surface_format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
752     }
753
754     ss->ss0.writedisable_alpha = 0;
755     ss->ss0.writedisable_red = 0;
756     ss->ss0.writedisable_green = 0;
757     ss->ss0.writedisable_blue = 0;
758     ss->ss0.color_blend = 1;
759     ss->ss0.vert_line_stride = 0;
760     ss->ss0.vert_line_stride_ofs = 0;
761     ss->ss0.mipmap_layout_mode = 0;
762     ss->ss0.render_cache_read_mode = 0;
763
764     ss->ss1.base_addr = dest_region->bo->offset;
765
766     ss->ss2.width = dest_region->width - 1;
767     ss->ss2.height = dest_region->height - 1;
768     ss->ss2.mip_count = 0;
769     ss->ss2.render_target_rotation = 0;
770     ss->ss3.pitch = dest_region->pitch - 1;
771     i965_render_set_surface_tiling(ss, dest_region->tiling);
772
773     dri_bo_emit_reloc(ss_bo,
774                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
775                       0,
776                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
777                       dest_region->bo);
778
779     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
780     dri_bo_unmap(ss_bo);
781 }
782
783 static void 
784 i965_subpic_render_upload_vertex(VADriverContextP ctx,
785                                  VASurfaceID surface,
786                                  const VARectangle *output_rect)
787 {    
788     struct i965_driver_data  *i965         = i965_driver_data(ctx);
789     struct i965_render_state *render_state = &i965->render_state;
790     struct object_surface    *obj_surface  = SURFACE(surface);
791     struct object_subpic     *obj_subpic   = SUBPIC(obj_surface->subpic);
792
793     const float sx = (float)output_rect->width  / (float)obj_surface->orig_width;
794     const float sy = (float)output_rect->height / (float)obj_surface->orig_height;
795     float *vb, tx1, tx2, ty1, ty2, x1, x2, y1, y2;
796     int i = 0;
797
798     VARectangle dst_rect;
799     dst_rect.x      = output_rect->x + sx * (float)obj_subpic->dst_rect.x;
800     dst_rect.y      = output_rect->y + sx * (float)obj_subpic->dst_rect.y;
801     dst_rect.width  = sx * (float)obj_subpic->dst_rect.width;
802     dst_rect.height = sy * (float)obj_subpic->dst_rect.height;
803
804     dri_bo_map(render_state->vb.vertex_buffer, 1);
805     assert(render_state->vb.vertex_buffer->virtual);
806     vb = render_state->vb.vertex_buffer->virtual;
807
808     tx1 = (float)obj_subpic->src_rect.x / (float)obj_subpic->width;
809     ty1 = (float)obj_subpic->src_rect.y / (float)obj_subpic->height;
810     tx2 = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / (float)obj_subpic->width;
811     ty2 = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / (float)obj_subpic->height;
812
813     x1 = (float)dst_rect.x;
814     y1 = (float)dst_rect.y;
815     x2 = (float)(dst_rect.x + dst_rect.width);
816     y2 = (float)(dst_rect.y + dst_rect.height);
817
818     vb[i++] = tx2;
819     vb[i++] = ty2;
820     vb[i++] = x2;
821     vb[i++] = y2;
822
823     vb[i++] = tx1;
824     vb[i++] = ty2;
825     vb[i++] = x1;
826     vb[i++] = y2;
827
828     vb[i++] = tx1;
829     vb[i++] = ty1;
830     vb[i++] = x1;
831     vb[i++] = y1;
832     dri_bo_unmap(render_state->vb.vertex_buffer);
833 }
834
835 static void 
836 i965_render_upload_vertex(VADriverContextP ctx,
837                           VASurfaceID surface,
838                           short srcx,
839                           short srcy,
840                           unsigned short srcw,
841                           unsigned short srch,
842                           short destx,
843                           short desty,
844                           unsigned short destw,
845                           unsigned short desth)
846 {
847     struct i965_driver_data *i965 = i965_driver_data(ctx);
848     struct i965_render_state *render_state = &i965->render_state;
849     struct intel_region *dest_region = render_state->draw_region;
850     struct object_surface *obj_surface;
851     float *vb;
852
853     float u1, v1, u2, v2;
854     int i, width, height;
855     int box_x1 = dest_region->x + destx;
856     int box_y1 = dest_region->y + desty;
857     int box_x2 = box_x1 + destw;
858     int box_y2 = box_y1 + desth;
859
860     obj_surface = SURFACE(surface);
861     assert(surface);
862     width = obj_surface->orig_width;
863     height = obj_surface->orig_height;
864
865     u1 = (float)srcx / width;
866     v1 = (float)srcy / height;
867     u2 = (float)(srcx + srcw) / width;
868     v2 = (float)(srcy + srch) / height;
869
870     dri_bo_map(render_state->vb.vertex_buffer, 1);
871     assert(render_state->vb.vertex_buffer->virtual);
872     vb = render_state->vb.vertex_buffer->virtual;
873
874     i = 0;
875     vb[i++] = u2;
876     vb[i++] = v2;
877     vb[i++] = (float)box_x2;
878     vb[i++] = (float)box_y2;
879     
880     vb[i++] = u1;
881     vb[i++] = v2;
882     vb[i++] = (float)box_x1;
883     vb[i++] = (float)box_y2;
884
885     vb[i++] = u1;
886     vb[i++] = v1;
887     vb[i++] = (float)box_x1;
888     vb[i++] = (float)box_y1;
889
890     dri_bo_unmap(render_state->vb.vertex_buffer);
891 }
892
893 static void
894 i965_render_upload_constants(VADriverContextP ctx)
895 {
896     struct i965_driver_data *i965 = i965_driver_data(ctx);
897     struct i965_render_state *render_state = &i965->render_state;
898     unsigned short *constant_buffer;
899
900     if (render_state->curbe.upload)
901         return;
902
903     dri_bo_map(render_state->curbe.bo, 1);
904     assert(render_state->curbe.bo->virtual);
905     constant_buffer = render_state->curbe.bo->virtual;
906
907     if (render_state->interleaved_uv)
908         *constant_buffer = 1;
909     else
910         *constant_buffer = 0;
911
912     dri_bo_unmap(render_state->curbe.bo);
913     render_state->curbe.upload = 1;
914 }
915
916 static void
917 i965_surface_render_state_setup(VADriverContextP ctx,
918                         VASurfaceID surface,
919                         short srcx,
920                         short srcy,
921                         unsigned short srcw,
922                         unsigned short srch,
923                         short destx,
924                         short desty,
925                         unsigned short destw,
926                         unsigned short desth)
927 {
928     i965_render_vs_unit(ctx);
929     i965_render_sf_unit(ctx);
930     i965_render_dest_surface_state(ctx, 0);
931     i965_render_src_surfaces_state(ctx, surface);
932     i965_render_sampler(ctx);
933     i965_render_wm_unit(ctx);
934     i965_render_cc_viewport(ctx);
935     i965_render_cc_unit(ctx);
936     i965_render_upload_vertex(ctx, surface,
937                               srcx, srcy, srcw, srch,
938                               destx, desty, destw, desth);
939     i965_render_upload_constants(ctx);
940 }
941 static void
942 i965_subpic_render_state_setup(VADriverContextP ctx,
943                         VASurfaceID surface,
944                         short srcx,
945                         short srcy,
946                         unsigned short srcw,
947                         unsigned short srch,
948                         short destx,
949                         short desty,
950                         unsigned short destw,
951                         unsigned short desth)
952 {
953     i965_render_vs_unit(ctx);
954     i965_render_sf_unit(ctx);
955     i965_render_dest_surface_state(ctx, 0);
956     i965_subpic_render_src_surfaces_state(ctx, surface);
957     i965_render_sampler(ctx);
958     i965_subpic_render_wm_unit(ctx);
959     i965_render_cc_viewport(ctx);
960     i965_subpic_render_cc_unit(ctx);
961
962     VARectangle output_rect;
963     output_rect.x      = destx;
964     output_rect.y      = desty;
965     output_rect.width  = destw;
966     output_rect.height = desth;
967     i965_subpic_render_upload_vertex(ctx, surface, &output_rect);
968 }
969
970
971 static void
972 i965_render_pipeline_select(VADriverContextP ctx)
973 {
974     BEGIN_BATCH(ctx, 1);
975     OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
976     ADVANCE_BATCH(ctx);
977 }
978
979 static void
980 i965_render_state_sip(VADriverContextP ctx)
981 {
982     BEGIN_BATCH(ctx, 2);
983     OUT_BATCH(ctx, CMD_STATE_SIP | 0);
984     OUT_BATCH(ctx, 0);
985     ADVANCE_BATCH(ctx);
986 }
987
988 static void
989 i965_render_state_base_address(VADriverContextP ctx)
990 {
991     struct i965_driver_data *i965 = i965_driver_data(ctx);
992     struct i965_render_state *render_state = &i965->render_state;
993
994     if (IS_IRONLAKE(i965->intel.device_id)) {
995         BEGIN_BATCH(ctx, 8);
996         OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6);
997         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
998         OUT_RELOC(ctx, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
999         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1000         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1001         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1002         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1003         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1004         ADVANCE_BATCH(ctx);
1005     } else {
1006         BEGIN_BATCH(ctx, 6);
1007         OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 4);
1008         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1009         OUT_RELOC(ctx, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1010         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1011         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1012         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
1013         ADVANCE_BATCH(ctx);
1014     }
1015 }
1016
1017 static void
1018 i965_render_binding_table_pointers(VADriverContextP ctx)
1019 {
1020     BEGIN_BATCH(ctx, 6);
1021     OUT_BATCH(ctx, CMD_BINDING_TABLE_POINTERS | 4);
1022     OUT_BATCH(ctx, 0); /* vs */
1023     OUT_BATCH(ctx, 0); /* gs */
1024     OUT_BATCH(ctx, 0); /* clip */
1025     OUT_BATCH(ctx, 0); /* sf */
1026     OUT_BATCH(ctx, BINDING_TABLE_OFFSET);
1027     ADVANCE_BATCH(ctx);
1028 }
1029
1030 static void 
1031 i965_render_constant_color(VADriverContextP ctx)
1032 {
1033     BEGIN_BATCH(ctx, 5);
1034     OUT_BATCH(ctx, CMD_CONSTANT_COLOR | 3);
1035     OUT_BATCH(ctx, float_to_uint(1.0));
1036     OUT_BATCH(ctx, float_to_uint(0.0));
1037     OUT_BATCH(ctx, float_to_uint(1.0));
1038     OUT_BATCH(ctx, float_to_uint(1.0));
1039     ADVANCE_BATCH(ctx);
1040 }
1041
1042 static void
1043 i965_render_pipelined_pointers(VADriverContextP ctx)
1044 {
1045     struct i965_driver_data *i965 = i965_driver_data(ctx);
1046     struct i965_render_state *render_state = &i965->render_state;
1047
1048     BEGIN_BATCH(ctx, 7);
1049     OUT_BATCH(ctx, CMD_PIPELINED_POINTERS | 5);
1050     OUT_RELOC(ctx, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1051     OUT_BATCH(ctx, 0);  /* disable GS */
1052     OUT_BATCH(ctx, 0);  /* disable CLIP */
1053     OUT_RELOC(ctx, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1054     OUT_RELOC(ctx, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1055     OUT_RELOC(ctx, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1056     ADVANCE_BATCH(ctx);
1057 }
1058
1059 static void
1060 i965_render_urb_layout(VADriverContextP ctx)
1061 {
1062     int urb_vs_start, urb_vs_size;
1063     int urb_gs_start, urb_gs_size;
1064     int urb_clip_start, urb_clip_size;
1065     int urb_sf_start, urb_sf_size;
1066     int urb_cs_start, urb_cs_size;
1067
1068     urb_vs_start = 0;
1069     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1070     urb_gs_start = urb_vs_start + urb_vs_size;
1071     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1072     urb_clip_start = urb_gs_start + urb_gs_size;
1073     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1074     urb_sf_start = urb_clip_start + urb_clip_size;
1075     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1076     urb_cs_start = urb_sf_start + urb_sf_size;
1077     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1078
1079     BEGIN_BATCH(ctx, 3);
1080     OUT_BATCH(ctx, 
1081               CMD_URB_FENCE |
1082               UF0_CS_REALLOC |
1083               UF0_SF_REALLOC |
1084               UF0_CLIP_REALLOC |
1085               UF0_GS_REALLOC |
1086               UF0_VS_REALLOC |
1087               1);
1088     OUT_BATCH(ctx, 
1089               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1090               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1091               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1092     OUT_BATCH(ctx,
1093               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1094               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1095     ADVANCE_BATCH(ctx);
1096 }
1097
1098 static void 
1099 i965_render_cs_urb_layout(VADriverContextP ctx)
1100 {
1101     BEGIN_BATCH(ctx, 2);
1102     OUT_BATCH(ctx, CMD_CS_URB_STATE | 0);
1103     OUT_BATCH(ctx,
1104               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1105               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1106     ADVANCE_BATCH(ctx);
1107 }
1108
1109 static void
1110 i965_render_constant_buffer(VADriverContextP ctx)
1111 {
1112     struct i965_driver_data *i965 = i965_driver_data(ctx);
1113     struct i965_render_state *render_state = &i965->render_state;
1114
1115     BEGIN_BATCH(ctx, 2);
1116     OUT_BATCH(ctx, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1117     OUT_RELOC(ctx, render_state->curbe.bo,
1118               I915_GEM_DOMAIN_INSTRUCTION, 0,
1119               URB_CS_ENTRY_SIZE - 1);
1120     ADVANCE_BATCH(ctx);    
1121 }
1122
1123 static void
1124 i965_render_drawing_rectangle(VADriverContextP ctx)
1125 {
1126     struct i965_driver_data *i965 = i965_driver_data(ctx);  
1127     struct i965_render_state *render_state = &i965->render_state;
1128     struct intel_region *dest_region = render_state->draw_region;
1129
1130     BEGIN_BATCH(ctx, 4);
1131     OUT_BATCH(ctx, CMD_DRAWING_RECTANGLE | 2);
1132     OUT_BATCH(ctx, 0x00000000);
1133     OUT_BATCH(ctx, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1134     OUT_BATCH(ctx, 0x00000000);         
1135     ADVANCE_BATCH(ctx);
1136 }
1137
1138 static void
1139 i965_render_vertex_elements(VADriverContextP ctx)
1140 {
1141     struct i965_driver_data *i965 = i965_driver_data(ctx);  
1142
1143     if (IS_IRONLAKE(i965->intel.device_id)) {
1144         BEGIN_BATCH(ctx, 5);
1145         OUT_BATCH(ctx, CMD_VERTEX_ELEMENTS | 3);
1146         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1147         OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1148                   VE0_VALID |
1149                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1150                   (0 << VE0_OFFSET_SHIFT));
1151         OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1152                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1153                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1154                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1155         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1156         OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1157                   VE0_VALID |
1158                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1159                   (8 << VE0_OFFSET_SHIFT));
1160         OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1161                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1162                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1163                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1164         ADVANCE_BATCH(ctx);
1165     } else {
1166         BEGIN_BATCH(ctx, 5);
1167         OUT_BATCH(ctx, CMD_VERTEX_ELEMENTS | 3);
1168         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1169         OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1170                   VE0_VALID |
1171                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1172                   (0 << VE0_OFFSET_SHIFT));
1173         OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1174                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1175                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1176                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1177                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1178         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1179         OUT_BATCH(ctx, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1180                   VE0_VALID |
1181                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1182                   (8 << VE0_OFFSET_SHIFT));
1183         OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1184                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1185                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1186                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1187                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1188         ADVANCE_BATCH(ctx);
1189     }
1190 }
1191
1192 static void
1193 i965_render_upload_image_palette(
1194     VADriverContextP ctx,
1195     VAImageID        image_id,
1196     unsigned int     alpha
1197 )
1198 {
1199     struct i965_driver_data *i965 = i965_driver_data(ctx);
1200     unsigned int i;
1201
1202     struct object_image *obj_image = IMAGE(image_id);
1203     assert(obj_image);
1204
1205     if (obj_image->image.num_palette_entries == 0)
1206         return;
1207
1208     BEGIN_BATCH(ctx, 1 + obj_image->image.num_palette_entries);
1209     OUT_BATCH(ctx, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1210     /*fill palette*/
1211     //int32_t out[16]; //0-23:color 23-31:alpha
1212     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1213         OUT_BATCH(ctx, (alpha << 24) | obj_image->palette[i]);
1214     ADVANCE_BATCH(ctx);
1215 }
1216
1217 static void
1218 i965_render_startup(VADriverContextP ctx)
1219 {
1220     struct i965_driver_data *i965 = i965_driver_data(ctx);
1221     struct i965_render_state *render_state = &i965->render_state;
1222
1223     BEGIN_BATCH(ctx, 11);
1224     OUT_BATCH(ctx, CMD_VERTEX_BUFFERS | 3);
1225     OUT_BATCH(ctx, 
1226               (0 << VB0_BUFFER_INDEX_SHIFT) |
1227               VB0_VERTEXDATA |
1228               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1229     OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1230
1231     if (IS_IRONLAKE(i965->intel.device_id))
1232         OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1233     else
1234         OUT_BATCH(ctx, 3);
1235
1236     OUT_BATCH(ctx, 0);
1237
1238     OUT_BATCH(ctx, 
1239               CMD_3DPRIMITIVE |
1240               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1241               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1242               (0 << 9) |
1243               4);
1244     OUT_BATCH(ctx, 3); /* vertex count per instance */
1245     OUT_BATCH(ctx, 0); /* start vertex offset */
1246     OUT_BATCH(ctx, 1); /* single instance */
1247     OUT_BATCH(ctx, 0); /* start instance location */
1248     OUT_BATCH(ctx, 0); /* index buffer offset, ignored */
1249     ADVANCE_BATCH(ctx);
1250 }
1251
1252 static void 
1253 i965_clear_dest_region(VADriverContextP ctx)
1254 {
1255     struct i965_driver_data *i965 = i965_driver_data(ctx);  
1256     struct i965_render_state *render_state = &i965->render_state;
1257     struct intel_region *dest_region = render_state->draw_region;
1258     unsigned int blt_cmd, br13;
1259     int pitch;
1260
1261     blt_cmd = XY_COLOR_BLT_CMD;
1262     br13 = 0xf0 << 16;
1263     pitch = dest_region->pitch;
1264
1265     if (dest_region->cpp == 4) {
1266         br13 |= BR13_8888;
1267         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1268     } else {
1269         assert(dest_region->cpp == 2);
1270         br13 |= BR13_565;
1271     }
1272
1273     if (dest_region->tiling != I915_TILING_NONE) {
1274         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1275         pitch /= 4;
1276     }
1277
1278     br13 |= pitch;
1279
1280     if (IS_GEN6(i965->intel.device_id))
1281         BEGIN_BLT_BATCH(ctx, 6);
1282     else
1283         BEGIN_BATCH(ctx, 6);
1284     OUT_BATCH(ctx, blt_cmd);
1285     OUT_BATCH(ctx, br13);
1286     OUT_BATCH(ctx, (dest_region->y << 16) | (dest_region->x));
1287     OUT_BATCH(ctx, ((dest_region->y + dest_region->height) << 16) |
1288               (dest_region->x + dest_region->width));
1289     OUT_RELOC(ctx, dest_region->bo, 
1290               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1291               0);
1292     OUT_BATCH(ctx, 0x0);
1293     ADVANCE_BATCH(ctx);
1294 }
1295
1296 static void
1297 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1298 {
1299     i965_clear_dest_region(ctx);
1300     intel_batchbuffer_start_atomic(ctx, 0x1000);
1301     intel_batchbuffer_emit_mi_flush(ctx);
1302     i965_render_pipeline_select(ctx);
1303     i965_render_state_sip(ctx);
1304     i965_render_state_base_address(ctx);
1305     i965_render_binding_table_pointers(ctx);
1306     i965_render_constant_color(ctx);
1307     i965_render_pipelined_pointers(ctx);
1308     i965_render_urb_layout(ctx);
1309     i965_render_cs_urb_layout(ctx);
1310     i965_render_constant_buffer(ctx);
1311     i965_render_drawing_rectangle(ctx);
1312     i965_render_vertex_elements(ctx);
1313     i965_render_startup(ctx);
1314     intel_batchbuffer_end_atomic(ctx);
1315 }
1316
1317 static void
1318 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1319 {
1320     intel_batchbuffer_start_atomic(ctx, 0x1000);
1321     intel_batchbuffer_emit_mi_flush(ctx);
1322     i965_render_pipeline_select(ctx);
1323     i965_render_state_sip(ctx);
1324     i965_render_state_base_address(ctx);
1325     i965_render_binding_table_pointers(ctx);
1326     i965_render_constant_color(ctx);
1327     i965_render_pipelined_pointers(ctx);
1328     i965_render_urb_layout(ctx);
1329     i965_render_cs_urb_layout(ctx);
1330     i965_render_drawing_rectangle(ctx);
1331     i965_render_vertex_elements(ctx);
1332     i965_render_startup(ctx);
1333     intel_batchbuffer_end_atomic(ctx);
1334 }
1335
1336
1337 static void 
1338 i965_render_initialize(VADriverContextP ctx)
1339 {
1340     struct i965_driver_data *i965 = i965_driver_data(ctx);
1341     struct i965_render_state *render_state = &i965->render_state;
1342     dri_bo *bo;
1343
1344     /* VERTEX BUFFER */
1345     dri_bo_unreference(render_state->vb.vertex_buffer);
1346     bo = dri_bo_alloc(i965->intel.bufmgr,
1347                       "vertex buffer",
1348                       4096,
1349                       4096);
1350     assert(bo);
1351     render_state->vb.vertex_buffer = bo;
1352
1353     /* VS */
1354     dri_bo_unreference(render_state->vs.state);
1355     bo = dri_bo_alloc(i965->intel.bufmgr,
1356                       "vs state",
1357                       sizeof(struct i965_vs_unit_state),
1358                       64);
1359     assert(bo);
1360     render_state->vs.state = bo;
1361
1362     /* GS */
1363     /* CLIP */
1364     /* SF */
1365     dri_bo_unreference(render_state->sf.state);
1366     bo = dri_bo_alloc(i965->intel.bufmgr,
1367                       "sf state",
1368                       sizeof(struct i965_sf_unit_state),
1369                       64);
1370     assert(bo);
1371     render_state->sf.state = bo;
1372
1373     /* WM */
1374     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1375     bo = dri_bo_alloc(i965->intel.bufmgr,
1376                       "surface state & binding table",
1377                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1378                       4096);
1379     assert(bo);
1380     render_state->wm.surface_state_binding_table_bo = bo;
1381
1382     dri_bo_unreference(render_state->wm.sampler);
1383     bo = dri_bo_alloc(i965->intel.bufmgr,
1384                       "sampler state",
1385                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1386                       64);
1387     assert(bo);
1388     render_state->wm.sampler = bo;
1389     render_state->wm.sampler_count = 0;
1390
1391     dri_bo_unreference(render_state->wm.state);
1392     bo = dri_bo_alloc(i965->intel.bufmgr,
1393                       "wm state",
1394                       sizeof(struct i965_wm_unit_state),
1395                       64);
1396     assert(bo);
1397     render_state->wm.state = bo;
1398
1399     /* COLOR CALCULATOR */
1400     dri_bo_unreference(render_state->cc.state);
1401     bo = dri_bo_alloc(i965->intel.bufmgr,
1402                       "color calc state",
1403                       sizeof(struct i965_cc_unit_state),
1404                       64);
1405     assert(bo);
1406     render_state->cc.state = bo;
1407
1408     dri_bo_unreference(render_state->cc.viewport);
1409     bo = dri_bo_alloc(i965->intel.bufmgr,
1410                       "cc viewport",
1411                       sizeof(struct i965_cc_viewport),
1412                       64);
1413     assert(bo);
1414     render_state->cc.viewport = bo;
1415 }
1416
1417 static void
1418 i965_render_put_surface(VADriverContextP ctx,
1419                         VASurfaceID surface,
1420                         short srcx,
1421                         short srcy,
1422                         unsigned short srcw,
1423                         unsigned short srch,
1424                         short destx,
1425                         short desty,
1426                         unsigned short destw,
1427                         unsigned short desth,
1428                         unsigned int flag)
1429 {
1430     i965_render_initialize(ctx);
1431     i965_surface_render_state_setup(ctx, surface,
1432                             srcx, srcy, srcw, srch,
1433                             destx, desty, destw, desth);
1434     i965_surface_render_pipeline_setup(ctx);
1435     intel_batchbuffer_flush(ctx);
1436 }
1437
1438 static void
1439 i965_render_put_subpicture(VADriverContextP ctx,
1440                            VASurfaceID surface,
1441                            short srcx,
1442                            short srcy,
1443                            unsigned short srcw,
1444                            unsigned short srch,
1445                            short destx,
1446                            short desty,
1447                            unsigned short destw,
1448                            unsigned short desth)
1449 {
1450     struct i965_driver_data *i965 = i965_driver_data(ctx);
1451     struct object_surface *obj_surface = SURFACE(surface);
1452     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
1453     assert(obj_subpic);
1454
1455     i965_render_initialize(ctx);
1456     i965_subpic_render_state_setup(ctx, surface,
1457                                    srcx, srcy, srcw, srch,
1458                                    destx, desty, destw, desth);
1459     i965_subpic_render_pipeline_setup(ctx);
1460     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
1461     intel_batchbuffer_flush(ctx);
1462 }
1463
1464 /*
1465  * for GEN6+
1466  */
1467 static void 
1468 gen6_render_initialize(VADriverContextP ctx)
1469 {
1470     struct i965_driver_data *i965 = i965_driver_data(ctx);
1471     struct i965_render_state *render_state = &i965->render_state;
1472     dri_bo *bo;
1473
1474     /* VERTEX BUFFER */
1475     dri_bo_unreference(render_state->vb.vertex_buffer);
1476     bo = dri_bo_alloc(i965->intel.bufmgr,
1477                       "vertex buffer",
1478                       4096,
1479                       4096);
1480     assert(bo);
1481     render_state->vb.vertex_buffer = bo;
1482
1483     /* WM */
1484     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1485     bo = dri_bo_alloc(i965->intel.bufmgr,
1486                       "surface state & binding table",
1487                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1488                       4096);
1489     assert(bo);
1490     render_state->wm.surface_state_binding_table_bo = bo;
1491
1492     dri_bo_unreference(render_state->wm.sampler);
1493     bo = dri_bo_alloc(i965->intel.bufmgr,
1494                       "sampler state",
1495                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1496                       4096);
1497     assert(bo);
1498     render_state->wm.sampler = bo;
1499     render_state->wm.sampler_count = 0;
1500
1501     /* COLOR CALCULATOR */
1502     dri_bo_unreference(render_state->cc.state);
1503     bo = dri_bo_alloc(i965->intel.bufmgr,
1504                       "color calc state",
1505                       sizeof(struct gen6_color_calc_state),
1506                       4096);
1507     assert(bo);
1508     render_state->cc.state = bo;
1509
1510     /* CC VIEWPORT */
1511     dri_bo_unreference(render_state->cc.viewport);
1512     bo = dri_bo_alloc(i965->intel.bufmgr,
1513                       "cc viewport",
1514                       sizeof(struct i965_cc_viewport),
1515                       4096);
1516     assert(bo);
1517     render_state->cc.viewport = bo;
1518
1519     /* BLEND STATE */
1520     dri_bo_unreference(render_state->cc.blend);
1521     bo = dri_bo_alloc(i965->intel.bufmgr,
1522                       "blend state",
1523                       sizeof(struct gen6_blend_state),
1524                       4096);
1525     assert(bo);
1526     render_state->cc.blend = bo;
1527
1528     /* DEPTH & STENCIL STATE */
1529     dri_bo_unreference(render_state->cc.depth_stencil);
1530     bo = dri_bo_alloc(i965->intel.bufmgr,
1531                       "depth & stencil state",
1532                       sizeof(struct gen6_depth_stencil_state),
1533                       4096);
1534     assert(bo);
1535     render_state->cc.depth_stencil = bo;
1536 }
1537
1538 static void
1539 gen6_render_color_calc_state(VADriverContextP ctx)
1540 {
1541     struct i965_driver_data *i965 = i965_driver_data(ctx);
1542     struct i965_render_state *render_state = &i965->render_state;
1543     struct gen6_color_calc_state *color_calc_state;
1544     
1545     dri_bo_map(render_state->cc.state, 1);
1546     assert(render_state->cc.state->virtual);
1547     color_calc_state = render_state->cc.state->virtual;
1548     memset(color_calc_state, 0, sizeof(*color_calc_state));
1549     color_calc_state->constant_r = 1.0;
1550     color_calc_state->constant_g = 0.0;
1551     color_calc_state->constant_b = 1.0;
1552     color_calc_state->constant_a = 1.0;
1553     dri_bo_unmap(render_state->cc.state);
1554 }
1555
1556 static void
1557 gen6_render_blend_state(VADriverContextP ctx)
1558 {
1559     struct i965_driver_data *i965 = i965_driver_data(ctx);
1560     struct i965_render_state *render_state = &i965->render_state;
1561     struct gen6_blend_state *blend_state;
1562     
1563     dri_bo_map(render_state->cc.blend, 1);
1564     assert(render_state->cc.blend->virtual);
1565     blend_state = render_state->cc.blend->virtual;
1566     memset(blend_state, 0, sizeof(*blend_state));
1567     blend_state->blend1.logic_op_enable = 1;
1568     blend_state->blend1.logic_op_func = 0xc;
1569     dri_bo_unmap(render_state->cc.blend);
1570 }
1571
1572 static void
1573 gen6_render_depth_stencil_state(VADriverContextP ctx)
1574 {
1575     struct i965_driver_data *i965 = i965_driver_data(ctx);
1576     struct i965_render_state *render_state = &i965->render_state;
1577     struct gen6_depth_stencil_state *depth_stencil_state;
1578     
1579     dri_bo_map(render_state->cc.depth_stencil, 1);
1580     assert(render_state->cc.depth_stencil->virtual);
1581     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1582     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1583     dri_bo_unmap(render_state->cc.depth_stencil);
1584 }
1585
1586 static void
1587 gen6_render_setup_states(VADriverContextP ctx,
1588                          VASurfaceID surface,
1589                          short srcx,
1590                          short srcy,
1591                          unsigned short srcw,
1592                          unsigned short srch,
1593                          short destx,
1594                          short desty,
1595                          unsigned short destw,
1596                          unsigned short desth)
1597 {
1598     i965_render_dest_surface_state(ctx, 0);
1599     i965_render_src_surfaces_state(ctx, surface);
1600     i965_render_sampler(ctx);
1601     i965_render_cc_viewport(ctx);
1602     gen6_render_color_calc_state(ctx);
1603     gen6_render_blend_state(ctx);
1604     gen6_render_depth_stencil_state(ctx);
1605     i965_render_upload_constants(ctx);
1606     i965_render_upload_vertex(ctx, surface,
1607                               srcx, srcy, srcw, srch,
1608                               destx, desty, destw, desth);
1609 }
1610
1611 static void
1612 gen6_emit_invarient_states(VADriverContextP ctx)
1613 {
1614     OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1615
1616     OUT_BATCH(ctx, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1617     OUT_BATCH(ctx, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1618               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1619     OUT_BATCH(ctx, 0);
1620
1621     OUT_BATCH(ctx, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1622     OUT_BATCH(ctx, 1);
1623
1624     /* Set system instruction pointer */
1625     OUT_BATCH(ctx, CMD_STATE_SIP | 0);
1626     OUT_BATCH(ctx, 0);
1627 }
1628
1629 static void
1630 gen6_emit_state_base_address(VADriverContextP ctx)
1631 {
1632     struct i965_driver_data *i965 = i965_driver_data(ctx);
1633     struct i965_render_state *render_state = &i965->render_state;
1634
1635     OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | (10 - 2));
1636     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* General state base address */
1637     OUT_RELOC(ctx, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1638     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1639     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1640     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Instruction base address */
1641     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* General state upper bound */
1642     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1643     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1644     OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1645 }
1646
1647 static void
1648 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1649 {
1650     struct i965_driver_data *i965 = i965_driver_data(ctx);
1651     struct i965_render_state *render_state = &i965->render_state;
1652
1653     OUT_BATCH(ctx, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1654               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1655               (4 - 2));
1656     OUT_BATCH(ctx, 0);
1657     OUT_BATCH(ctx, 0);
1658     OUT_RELOC(ctx, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1659 }
1660
1661 static void
1662 gen6_emit_urb(VADriverContextP ctx)
1663 {
1664     OUT_BATCH(ctx, GEN6_3DSTATE_URB | (3 - 2));
1665     OUT_BATCH(ctx, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1666               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1667     OUT_BATCH(ctx, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1668               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1669 }
1670
1671 static void
1672 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1673 {
1674     struct i965_driver_data *i965 = i965_driver_data(ctx);
1675     struct i965_render_state *render_state = &i965->render_state;
1676
1677     OUT_BATCH(ctx, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1678     OUT_RELOC(ctx, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1679     OUT_RELOC(ctx, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1680     OUT_RELOC(ctx, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1681 }
1682
1683 static void
1684 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1685 {
1686     struct i965_driver_data *i965 = i965_driver_data(ctx);
1687     struct i965_render_state *render_state = &i965->render_state;
1688
1689     OUT_BATCH(ctx, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1690               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1691               (4 - 2));
1692     OUT_BATCH(ctx, 0); /* VS */
1693     OUT_BATCH(ctx, 0); /* GS */
1694     OUT_RELOC(ctx,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1695 }
1696
1697 static void
1698 gen6_emit_binding_table(VADriverContextP ctx)
1699 {
1700     /* Binding table pointers */
1701     OUT_BATCH(ctx, CMD_BINDING_TABLE_POINTERS |
1702               GEN6_BINDING_TABLE_MODIFY_PS |
1703               (4 - 2));
1704     OUT_BATCH(ctx, 0);          /* vs */
1705     OUT_BATCH(ctx, 0);          /* gs */
1706     /* Only the PS uses the binding table */
1707     OUT_BATCH(ctx, BINDING_TABLE_OFFSET);
1708 }
1709
1710 static void
1711 gen6_emit_depth_buffer_state(VADriverContextP ctx)
1712 {
1713     OUT_BATCH(ctx, CMD_DEPTH_BUFFER | (7 - 2));
1714     OUT_BATCH(ctx, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
1715               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
1716     OUT_BATCH(ctx, 0);
1717     OUT_BATCH(ctx, 0);
1718     OUT_BATCH(ctx, 0);
1719     OUT_BATCH(ctx, 0);
1720     OUT_BATCH(ctx, 0);
1721
1722     OUT_BATCH(ctx, CMD_CLEAR_PARAMS | (2 - 2));
1723     OUT_BATCH(ctx, 0);
1724 }
1725
1726 static void
1727 gen6_emit_drawing_rectangle(VADriverContextP ctx)
1728 {
1729     i965_render_drawing_rectangle(ctx);
1730 }
1731
1732 static void 
1733 gen6_emit_vs_state(VADriverContextP ctx)
1734 {
1735     /* disable VS constant buffer */
1736     OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
1737     OUT_BATCH(ctx, 0);
1738     OUT_BATCH(ctx, 0);
1739     OUT_BATCH(ctx, 0);
1740     OUT_BATCH(ctx, 0);
1741         
1742     OUT_BATCH(ctx, GEN6_3DSTATE_VS | (6 - 2));
1743     OUT_BATCH(ctx, 0); /* without VS kernel */
1744     OUT_BATCH(ctx, 0);
1745     OUT_BATCH(ctx, 0);
1746     OUT_BATCH(ctx, 0);
1747     OUT_BATCH(ctx, 0); /* pass-through */
1748 }
1749
1750 static void 
1751 gen6_emit_gs_state(VADriverContextP ctx)
1752 {
1753     /* disable GS constant buffer */
1754     OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
1755     OUT_BATCH(ctx, 0);
1756     OUT_BATCH(ctx, 0);
1757     OUT_BATCH(ctx, 0);
1758     OUT_BATCH(ctx, 0);
1759         
1760     OUT_BATCH(ctx, GEN6_3DSTATE_GS | (7 - 2));
1761     OUT_BATCH(ctx, 0); /* without GS kernel */
1762     OUT_BATCH(ctx, 0);
1763     OUT_BATCH(ctx, 0);
1764     OUT_BATCH(ctx, 0);
1765     OUT_BATCH(ctx, 0);
1766     OUT_BATCH(ctx, 0); /* pass-through */
1767 }
1768
1769 static void 
1770 gen6_emit_clip_state(VADriverContextP ctx)
1771 {
1772     OUT_BATCH(ctx, GEN6_3DSTATE_CLIP | (4 - 2));
1773     OUT_BATCH(ctx, 0);
1774     OUT_BATCH(ctx, 0); /* pass-through */
1775     OUT_BATCH(ctx, 0);
1776 }
1777
1778 static void 
1779 gen6_emit_sf_state(VADriverContextP ctx)
1780 {
1781     OUT_BATCH(ctx, GEN6_3DSTATE_SF | (20 - 2));
1782     OUT_BATCH(ctx, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
1783               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
1784               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
1785     OUT_BATCH(ctx, 0);
1786     OUT_BATCH(ctx, GEN6_3DSTATE_SF_CULL_NONE);
1787     OUT_BATCH(ctx, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
1788     OUT_BATCH(ctx, 0);
1789     OUT_BATCH(ctx, 0);
1790     OUT_BATCH(ctx, 0);
1791     OUT_BATCH(ctx, 0);
1792     OUT_BATCH(ctx, 0); /* DW9 */
1793     OUT_BATCH(ctx, 0);
1794     OUT_BATCH(ctx, 0);
1795     OUT_BATCH(ctx, 0);
1796     OUT_BATCH(ctx, 0);
1797     OUT_BATCH(ctx, 0); /* DW14 */
1798     OUT_BATCH(ctx, 0);
1799     OUT_BATCH(ctx, 0);
1800     OUT_BATCH(ctx, 0);
1801     OUT_BATCH(ctx, 0);
1802     OUT_BATCH(ctx, 0); /* DW19 */
1803 }
1804
1805 static void 
1806 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
1807 {
1808     struct i965_driver_data *i965 = i965_driver_data(ctx);
1809     struct i965_render_state *render_state = &i965->render_state;
1810
1811     OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_PS |
1812               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
1813               (5 - 2));
1814     OUT_RELOC(ctx, 
1815               render_state->curbe.bo,
1816               I915_GEM_DOMAIN_INSTRUCTION, 0,
1817               0);
1818     OUT_BATCH(ctx, 0);
1819     OUT_BATCH(ctx, 0);
1820     OUT_BATCH(ctx, 0);
1821
1822     OUT_BATCH(ctx, GEN6_3DSTATE_WM | (9 - 2));
1823     OUT_RELOC(ctx, render_kernels[kernel].bo,
1824               I915_GEM_DOMAIN_INSTRUCTION, 0,
1825               0);
1826     OUT_BATCH(ctx, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
1827               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
1828     OUT_BATCH(ctx, 0);
1829     OUT_BATCH(ctx, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
1830     OUT_BATCH(ctx, ((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
1831               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
1832               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
1833     OUT_BATCH(ctx, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
1834               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
1835     OUT_BATCH(ctx, 0);
1836     OUT_BATCH(ctx, 0);
1837 }
1838
1839 static void
1840 gen6_emit_vertex_element_state(VADriverContextP ctx)
1841 {
1842     /* Set up our vertex elements, sourced from the single vertex buffer. */
1843     OUT_BATCH(ctx, CMD_VERTEX_ELEMENTS | (5 - 2));
1844     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1845     OUT_BATCH(ctx, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1846               GEN6_VE0_VALID |
1847               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1848               (0 << VE0_OFFSET_SHIFT));
1849     OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1850               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1851               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1852               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1853     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1854     OUT_BATCH(ctx, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1855               GEN6_VE0_VALID |
1856               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1857               (8 << VE0_OFFSET_SHIFT));
1858     OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
1859               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1860               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1861               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1862 }
1863
1864 static void
1865 gen6_emit_vertices(VADriverContextP ctx)
1866 {
1867     struct i965_driver_data *i965 = i965_driver_data(ctx);
1868     struct i965_render_state *render_state = &i965->render_state;
1869
1870     BEGIN_BATCH(ctx, 11);
1871     OUT_BATCH(ctx, CMD_VERTEX_BUFFERS | 3);
1872     OUT_BATCH(ctx, 
1873               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
1874               GEN6_VB0_VERTEXDATA |
1875               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1876     OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1877     OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1878     OUT_BATCH(ctx, 0);
1879
1880     OUT_BATCH(ctx, 
1881               CMD_3DPRIMITIVE |
1882               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1883               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1884               (0 << 9) |
1885               4);
1886     OUT_BATCH(ctx, 3); /* vertex count per instance */
1887     OUT_BATCH(ctx, 0); /* start vertex offset */
1888     OUT_BATCH(ctx, 1); /* single instance */
1889     OUT_BATCH(ctx, 0); /* start instance location */
1890     OUT_BATCH(ctx, 0); /* index buffer offset, ignored */
1891     ADVANCE_BATCH(ctx);
1892 }
1893
1894 static void
1895 gen6_render_emit_states(VADriverContextP ctx, int kernel)
1896 {
1897     intel_batchbuffer_start_atomic(ctx, 0x1000);
1898     intel_batchbuffer_emit_mi_flush(ctx);
1899     gen6_emit_invarient_states(ctx);
1900     gen6_emit_state_base_address(ctx);
1901     gen6_emit_viewport_state_pointers(ctx);
1902     gen6_emit_urb(ctx);
1903     gen6_emit_cc_state_pointers(ctx);
1904     gen6_emit_sampler_state_pointers(ctx);
1905     gen6_emit_vs_state(ctx);
1906     gen6_emit_gs_state(ctx);
1907     gen6_emit_clip_state(ctx);
1908     gen6_emit_sf_state(ctx);
1909     gen6_emit_wm_state(ctx, kernel);
1910     gen6_emit_binding_table(ctx);
1911     gen6_emit_depth_buffer_state(ctx);
1912     gen6_emit_drawing_rectangle(ctx);
1913     gen6_emit_vertex_element_state(ctx);
1914     gen6_emit_vertices(ctx);
1915     intel_batchbuffer_end_atomic(ctx);
1916 }
1917
1918 static void
1919 gen6_render_put_surface(VADriverContextP ctx,
1920                         VASurfaceID surface,
1921                         short srcx,
1922                         short srcy,
1923                         unsigned short srcw,
1924                         unsigned short srch,
1925                         short destx,
1926                         short desty,
1927                         unsigned short destw,
1928                         unsigned short desth,
1929                         unsigned int flag)
1930 {
1931     gen6_render_initialize(ctx);
1932     gen6_render_setup_states(ctx, surface,
1933                              srcx, srcy, srcw, srch,
1934                              destx, desty, destw, desth);
1935     i965_clear_dest_region(ctx);
1936     gen6_render_emit_states(ctx, PS_KERNEL);
1937     intel_batchbuffer_flush(ctx);
1938 }
1939
1940 static void
1941 gen6_subpicture_render_blend_state(VADriverContextP ctx)
1942 {
1943     struct i965_driver_data *i965 = i965_driver_data(ctx);
1944     struct i965_render_state *render_state = &i965->render_state;
1945     struct gen6_blend_state *blend_state;
1946
1947     dri_bo_unmap(render_state->cc.state);    
1948     dri_bo_map(render_state->cc.blend, 1);
1949     assert(render_state->cc.blend->virtual);
1950     blend_state = render_state->cc.blend->virtual;
1951     memset(blend_state, 0, sizeof(*blend_state));
1952     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
1953     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
1954     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
1955     blend_state->blend0.blend_enable = 1;
1956     blend_state->blend1.post_blend_clamp_enable = 1;
1957     blend_state->blend1.pre_blend_clamp_enable = 1;
1958     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
1959     dri_bo_unmap(render_state->cc.blend);
1960 }
1961
1962 static void
1963 gen6_subpicture_render_setup_states(VADriverContextP ctx,
1964                                     VASurfaceID surface,
1965                                     short srcx,
1966                                     short srcy,
1967                                     unsigned short srcw,
1968                                     unsigned short srch,
1969                                     short destx,
1970                                     short desty,
1971                                     unsigned short destw,
1972                                     unsigned short desth)
1973 {
1974     VARectangle output_rect;
1975
1976     output_rect.x      = destx;
1977     output_rect.y      = desty;
1978     output_rect.width  = destw;
1979     output_rect.height = desth;
1980
1981     i965_render_dest_surface_state(ctx, 0);
1982     i965_subpic_render_src_surfaces_state(ctx, surface);
1983     i965_render_sampler(ctx);
1984     i965_render_cc_viewport(ctx);
1985     gen6_render_color_calc_state(ctx);
1986     gen6_subpicture_render_blend_state(ctx);
1987     gen6_render_depth_stencil_state(ctx);
1988     i965_subpic_render_upload_vertex(ctx, surface, &output_rect);
1989 }
1990
1991 static void
1992 gen6_render_put_subpicture(VADriverContextP ctx,
1993                            VASurfaceID surface,
1994                            short srcx,
1995                            short srcy,
1996                            unsigned short srcw,
1997                            unsigned short srch,
1998                            short destx,
1999                            short desty,
2000                            unsigned short destw,
2001                            unsigned short desth)
2002 {
2003     struct i965_driver_data *i965 = i965_driver_data(ctx);
2004     struct object_surface *obj_surface = SURFACE(surface);
2005     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
2006
2007     assert(obj_subpic);
2008     gen6_render_initialize(ctx);
2009     gen6_subpicture_render_setup_states(ctx, surface,
2010                                         srcx, srcy, srcw, srch,
2011                                         destx, desty, destw, desth);
2012     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2013     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
2014     intel_batchbuffer_flush(ctx);
2015 }
2016
2017 /*
2018  * global functions
2019  */
2020 void
2021 intel_render_put_surface(VADriverContextP ctx,
2022                         VASurfaceID surface,
2023                         short srcx,
2024                         short srcy,
2025                         unsigned short srcw,
2026                         unsigned short srch,
2027                         short destx,
2028                         short desty,
2029                         unsigned short destw,
2030                         unsigned short desth,
2031                         unsigned int flag)
2032 {
2033     struct i965_driver_data *i965 = i965_driver_data(ctx);
2034
2035     i965_post_processing(ctx, surface,
2036                          srcx, srcy, srcw, srch,
2037                          destx, desty, destw, desth,
2038                          flag);
2039
2040     if (IS_GEN6(i965->intel.device_id))
2041         gen6_render_put_surface(ctx, surface,
2042                                 srcx, srcy, srcw, srch,
2043                                 destx, desty, destw, desth,
2044                                 flag);
2045     else
2046         i965_render_put_surface(ctx, surface,
2047                                 srcx, srcy, srcw, srch,
2048                                 destx, desty, destw, desth,
2049                                 flag);
2050 }
2051
2052 void
2053 intel_render_put_subpicture(VADriverContextP ctx,
2054                            VASurfaceID surface,
2055                            short srcx,
2056                            short srcy,
2057                            unsigned short srcw,
2058                            unsigned short srch,
2059                            short destx,
2060                            short desty,
2061                            unsigned short destw,
2062                            unsigned short desth)
2063 {
2064     struct i965_driver_data *i965 = i965_driver_data(ctx);
2065
2066     if (IS_GEN6(i965->intel.device_id))
2067         gen6_render_put_subpicture(ctx, surface,
2068                                    srcx, srcy, srcw, srch,
2069                                    destx, desty, destw, desth);
2070     else
2071         i965_render_put_subpicture(ctx, surface,
2072                                    srcx, srcy, srcw, srch,
2073                                    destx, desty, destw, desth);
2074 }
2075
2076 Bool 
2077 i965_render_init(VADriverContextP ctx)
2078 {
2079     struct i965_driver_data *i965 = i965_driver_data(ctx);
2080     struct i965_render_state *render_state = &i965->render_state;
2081     int i;
2082
2083     /* kernel */
2084     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
2085                                  sizeof(render_kernels_gen5[0])));
2086     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
2087                                  sizeof(render_kernels_gen6[0])));
2088
2089     if (IS_GEN6(i965->intel.device_id))
2090         render_kernels = render_kernels_gen6;
2091     else if (IS_IRONLAKE(i965->intel.device_id))
2092         render_kernels = render_kernels_gen5;
2093     else
2094         render_kernels = render_kernels_gen4;
2095
2096     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
2097         struct render_kernel *kernel = &render_kernels[i];
2098
2099         if (!kernel->size)
2100             continue;
2101
2102         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
2103                                   kernel->name, 
2104                                   kernel->size, 0x1000);
2105         assert(kernel->bo);
2106         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
2107     }
2108
2109     /* constant buffer */
2110     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
2111                       "constant buffer",
2112                       4096, 64);
2113     assert(render_state->curbe.bo);
2114     render_state->curbe.upload = 0;
2115
2116     return True;
2117 }
2118
2119 Bool 
2120 i965_render_terminate(VADriverContextP ctx)
2121 {
2122     int i;
2123     struct i965_driver_data *i965 = i965_driver_data(ctx);
2124     struct i965_render_state *render_state = &i965->render_state;
2125
2126     dri_bo_unreference(render_state->curbe.bo);
2127     render_state->curbe.bo = NULL;
2128
2129     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
2130         struct render_kernel *kernel = &render_kernels[i];
2131         
2132         dri_bo_unreference(kernel->bo);
2133         kernel->bo = NULL;
2134     }
2135
2136     dri_bo_unreference(render_state->vb.vertex_buffer);
2137     render_state->vb.vertex_buffer = NULL;
2138     dri_bo_unreference(render_state->vs.state);
2139     render_state->vs.state = NULL;
2140     dri_bo_unreference(render_state->sf.state);
2141     render_state->sf.state = NULL;
2142     dri_bo_unreference(render_state->wm.sampler);
2143     render_state->wm.sampler = NULL;
2144     dri_bo_unreference(render_state->wm.state);
2145     render_state->wm.state = NULL;
2146     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2147     dri_bo_unreference(render_state->cc.viewport);
2148     render_state->cc.viewport = NULL;
2149     dri_bo_unreference(render_state->cc.state);
2150     render_state->cc.state = NULL;
2151     dri_bo_unreference(render_state->cc.blend);
2152     render_state->cc.blend = NULL;
2153     dri_bo_unreference(render_state->cc.depth_stencil);
2154     render_state->cc.depth_stencil = NULL;
2155
2156     if (render_state->draw_region) {
2157         dri_bo_unreference(render_state->draw_region->bo);
2158         free(render_state->draw_region);
2159         render_state->draw_region = NULL;
2160     }
2161
2162     return True;
2163 }
2164