aa59fb9fbcc1e2ead765ed42c672336f04b3ca03
[platform/upstream/libva.git] / i965_drv_video / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38
39 #include <va/va_backend.h>
40 #include "va/x11/va_dricommon.h"
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44 #include "i965_defines.h"
45 #include "i965_drv_video.h"
46 #include "i965_structs.h"
47
48 #include "i965_render.h"
49
50 #define SF_KERNEL_NUM_GRF       16
51 #define SF_MAX_THREADS          1
52
53 static const uint32_t sf_kernel_static[][4] = 
54 {
55 #include "shaders/render/exa_sf.g4b"
56 };
57
58 #define PS_KERNEL_NUM_GRF       32
59 #define PS_MAX_THREADS          32
60
61 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
62
63 static const uint32_t ps_kernel_static[][4] = 
64 {
65 #include "shaders/render/exa_wm_xy.g4b"
66 #include "shaders/render/exa_wm_src_affine.g4b"
67 #include "shaders/render/exa_wm_src_sample_planar.g4b"
68 #include "shaders/render/exa_wm_yuv_rgb.g4b"
69 #include "shaders/render/exa_wm_write.g4b"
70 };
71 static const uint32_t ps_subpic_kernel_static[][4] = 
72 {
73 #include "shaders/render/exa_wm_xy.g4b"
74 #include "shaders/render/exa_wm_src_affine.g4b"
75 #include "shaders/render/exa_wm_src_sample_argb.g4b"
76 #include "shaders/render/exa_wm_write.g4b"
77 };
78
79 /* On IRONLAKE */
80 static const uint32_t sf_kernel_static_gen5[][4] = 
81 {
82 #include "shaders/render/exa_sf.g4b.gen5"
83 };
84
85 static const uint32_t ps_kernel_static_gen5[][4] = 
86 {
87 #include "shaders/render/exa_wm_xy.g4b.gen5"
88 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
89 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
90 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
91 #include "shaders/render/exa_wm_write.g4b.gen5"
92 };
93 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
94 {
95 #include "shaders/render/exa_wm_xy.g4b.gen5"
96 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
97 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
98 #include "shaders/render/exa_wm_write.g4b.gen5"
99 };
100
101 /* programs for Sandybridge */
102 static const uint32_t sf_kernel_static_gen6[][4] = 
103 {
104 };
105
106 static const uint32_t ps_kernel_static_gen6[][4] = {
107 #include "shaders/render/exa_wm_src_affine.g6b"
108 #include "shaders/render/exa_wm_src_sample_planar.g6b"
109 #include "shaders/render/exa_wm_yuv_rgb.g6b"
110 #include "shaders/render/exa_wm_write.g6b"
111 };
112
113 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
114 #include "shaders/render/exa_wm_src_affine.g6b"
115 #include "shaders/render/exa_wm_src_sample_argb.g6b"
116 #include "shaders/render/exa_wm_write.g6b"
117 };
118
119 #define SURFACE_STATE_PADDED_SIZE       ALIGN(sizeof(struct i965_surface_state), 32)
120 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
121 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
122
123 static uint32_t float_to_uint (float f) 
124 {
125     union {
126         uint32_t i; 
127         float f;
128     } x;
129
130     x.f = f;
131     return x.i;
132 }
133
134 enum 
135 {
136     SF_KERNEL = 0,
137     PS_KERNEL,
138     PS_SUBPIC_KERNEL
139 };
140
141 static struct i965_kernel render_kernels_gen4[] = {
142     {
143         "SF",
144         SF_KERNEL,
145         sf_kernel_static,
146         sizeof(sf_kernel_static),
147         NULL
148     },
149     {
150         "PS",
151         PS_KERNEL,
152         ps_kernel_static,
153         sizeof(ps_kernel_static),
154         NULL
155     },
156
157     {
158         "PS_SUBPIC",
159         PS_SUBPIC_KERNEL,
160         ps_subpic_kernel_static,
161         sizeof(ps_subpic_kernel_static),
162         NULL
163     }
164 };
165
166 static struct i965_kernel render_kernels_gen5[] = {
167     {
168         "SF",
169         SF_KERNEL,
170         sf_kernel_static_gen5,
171         sizeof(sf_kernel_static_gen5),
172         NULL
173     },
174     {
175         "PS",
176         PS_KERNEL,
177         ps_kernel_static_gen5,
178         sizeof(ps_kernel_static_gen5),
179         NULL
180     },
181
182     {
183         "PS_SUBPIC",
184         PS_SUBPIC_KERNEL,
185         ps_subpic_kernel_static_gen5,
186         sizeof(ps_subpic_kernel_static_gen5),
187         NULL
188     }
189 };
190
191 static struct i965_kernel render_kernels_gen6[] = {
192     {
193         "SF",
194         SF_KERNEL,
195         sf_kernel_static_gen6,
196         sizeof(sf_kernel_static_gen6),
197         NULL
198     },
199     {
200         "PS",
201         PS_KERNEL,
202         ps_kernel_static_gen6,
203         sizeof(ps_kernel_static_gen6),
204         NULL
205     },
206
207     {
208         "PS_SUBPIC",
209         PS_SUBPIC_KERNEL,
210         ps_subpic_kernel_static_gen6,
211         sizeof(ps_subpic_kernel_static_gen6),
212         NULL
213     }
214 };
215
216 #define URB_VS_ENTRIES        8
217 #define URB_VS_ENTRY_SIZE     1
218
219 #define URB_GS_ENTRIES        0
220 #define URB_GS_ENTRY_SIZE     0
221
222 #define URB_CLIP_ENTRIES      0
223 #define URB_CLIP_ENTRY_SIZE   0
224
225 #define URB_SF_ENTRIES        1
226 #define URB_SF_ENTRY_SIZE     2
227
228 #define URB_CS_ENTRIES        1
229 #define URB_CS_ENTRY_SIZE     1
230
231 static void
232 i965_render_vs_unit(VADriverContextP ctx)
233 {
234     struct i965_driver_data *i965 = i965_driver_data(ctx);
235     struct i965_render_state *render_state = &i965->render_state;
236     struct i965_vs_unit_state *vs_state;
237
238     dri_bo_map(render_state->vs.state, 1);
239     assert(render_state->vs.state->virtual);
240     vs_state = render_state->vs.state->virtual;
241     memset(vs_state, 0, sizeof(*vs_state));
242
243     if (IS_IRONLAKE(i965->intel.device_id))
244         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
245     else
246         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
247
248     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
249     vs_state->vs6.vs_enable = 0;
250     vs_state->vs6.vert_cache_disable = 1;
251     
252     dri_bo_unmap(render_state->vs.state);
253 }
254
255 static void
256 i965_render_sf_unit(VADriverContextP ctx)
257 {
258     struct i965_driver_data *i965 = i965_driver_data(ctx);
259     struct i965_render_state *render_state = &i965->render_state;
260     struct i965_sf_unit_state *sf_state;
261
262     dri_bo_map(render_state->sf.state, 1);
263     assert(render_state->sf.state->virtual);
264     sf_state = render_state->sf.state->virtual;
265     memset(sf_state, 0, sizeof(*sf_state));
266
267     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
268     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
269
270     sf_state->sf1.single_program_flow = 1; /* XXX */
271     sf_state->sf1.binding_table_entry_count = 0;
272     sf_state->sf1.thread_priority = 0;
273     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
274     sf_state->sf1.illegal_op_exception_enable = 1;
275     sf_state->sf1.mask_stack_exception_enable = 1;
276     sf_state->sf1.sw_exception_enable = 1;
277
278     /* scratch space is not used in our kernel */
279     sf_state->thread2.per_thread_scratch_space = 0;
280     sf_state->thread2.scratch_space_base_pointer = 0;
281
282     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
283     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
284     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
285     sf_state->thread3.urb_entry_read_offset = 0;
286     sf_state->thread3.dispatch_grf_start_reg = 3;
287
288     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
289     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
290     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
291     sf_state->thread4.stats_enable = 1;
292
293     sf_state->sf5.viewport_transform = 0; /* skip viewport */
294
295     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
296     sf_state->sf6.scissor = 0;
297
298     sf_state->sf7.trifan_pv = 2;
299
300     sf_state->sf6.dest_org_vbias = 0x8;
301     sf_state->sf6.dest_org_hbias = 0x8;
302
303     dri_bo_emit_reloc(render_state->sf.state,
304                       I915_GEM_DOMAIN_INSTRUCTION, 0,
305                       sf_state->thread0.grf_reg_count << 1,
306                       offsetof(struct i965_sf_unit_state, thread0),
307                       render_state->render_kernels[SF_KERNEL].bo);
308
309     dri_bo_unmap(render_state->sf.state);
310 }
311
312 static void 
313 i965_render_sampler(VADriverContextP ctx)
314 {
315     struct i965_driver_data *i965 = i965_driver_data(ctx);
316     struct i965_render_state *render_state = &i965->render_state;
317     struct i965_sampler_state *sampler_state;
318     int i;
319     
320     assert(render_state->wm.sampler_count > 0);
321     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
322
323     dri_bo_map(render_state->wm.sampler, 1);
324     assert(render_state->wm.sampler->virtual);
325     sampler_state = render_state->wm.sampler->virtual;
326     for (i = 0; i < render_state->wm.sampler_count; i++) {
327         memset(sampler_state, 0, sizeof(*sampler_state));
328         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
329         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
330         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
331         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
332         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
333         sampler_state++;
334     }
335
336     dri_bo_unmap(render_state->wm.sampler);
337 }
338 static void
339 i965_subpic_render_wm_unit(VADriverContextP ctx)
340 {
341     struct i965_driver_data *i965 = i965_driver_data(ctx);
342     struct i965_render_state *render_state = &i965->render_state;
343     struct i965_wm_unit_state *wm_state;
344
345     assert(render_state->wm.sampler);
346
347     dri_bo_map(render_state->wm.state, 1);
348     assert(render_state->wm.state->virtual);
349     wm_state = render_state->wm.state->virtual;
350     memset(wm_state, 0, sizeof(*wm_state));
351
352     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
353     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
354
355     wm_state->thread1.single_program_flow = 1; /* XXX */
356
357     if (IS_IRONLAKE(i965->intel.device_id))
358         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
359     else
360         wm_state->thread1.binding_table_entry_count = 7;
361
362     wm_state->thread2.scratch_space_base_pointer = 0;
363     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
364
365     wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
366     wm_state->thread3.const_urb_entry_read_length = 0;
367     wm_state->thread3.const_urb_entry_read_offset = 0;
368     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
369     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
370
371     wm_state->wm4.stats_enable = 0;
372     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
373
374     if (IS_IRONLAKE(i965->intel.device_id)) {
375         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
376         wm_state->wm5.max_threads = 12 * 6 - 1;
377     } else {
378         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
379         wm_state->wm5.max_threads = 10 * 5 - 1;
380     }
381
382     wm_state->wm5.thread_dispatch_enable = 1;
383     wm_state->wm5.enable_16_pix = 1;
384     wm_state->wm5.enable_8_pix = 0;
385     wm_state->wm5.early_depth_test = 1;
386
387     dri_bo_emit_reloc(render_state->wm.state,
388                       I915_GEM_DOMAIN_INSTRUCTION, 0,
389                       wm_state->thread0.grf_reg_count << 1,
390                       offsetof(struct i965_wm_unit_state, thread0),
391                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
392
393     dri_bo_emit_reloc(render_state->wm.state,
394                       I915_GEM_DOMAIN_INSTRUCTION, 0,
395                       wm_state->wm4.sampler_count << 2,
396                       offsetof(struct i965_wm_unit_state, wm4),
397                       render_state->wm.sampler);
398
399     dri_bo_unmap(render_state->wm.state);
400 }
401
402
403 static void
404 i965_render_wm_unit(VADriverContextP ctx)
405 {
406     struct i965_driver_data *i965 = i965_driver_data(ctx);
407     struct i965_render_state *render_state = &i965->render_state;
408     struct i965_wm_unit_state *wm_state;
409
410     assert(render_state->wm.sampler);
411
412     dri_bo_map(render_state->wm.state, 1);
413     assert(render_state->wm.state->virtual);
414     wm_state = render_state->wm.state->virtual;
415     memset(wm_state, 0, sizeof(*wm_state));
416
417     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
418     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
419
420     wm_state->thread1.single_program_flow = 1; /* XXX */
421
422     if (IS_IRONLAKE(i965->intel.device_id))
423         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
424     else
425         wm_state->thread1.binding_table_entry_count = 7;
426
427     wm_state->thread2.scratch_space_base_pointer = 0;
428     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
429
430     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
431     wm_state->thread3.const_urb_entry_read_length = 1;
432     wm_state->thread3.const_urb_entry_read_offset = 0;
433     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
434     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
435
436     wm_state->wm4.stats_enable = 0;
437     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
438
439     if (IS_IRONLAKE(i965->intel.device_id)) {
440         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
441         wm_state->wm5.max_threads = 12 * 6 - 1;
442     } else {
443         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
444         wm_state->wm5.max_threads = 10 * 5 - 1;
445     }
446
447     wm_state->wm5.thread_dispatch_enable = 1;
448     wm_state->wm5.enable_16_pix = 1;
449     wm_state->wm5.enable_8_pix = 0;
450     wm_state->wm5.early_depth_test = 1;
451
452     dri_bo_emit_reloc(render_state->wm.state,
453                       I915_GEM_DOMAIN_INSTRUCTION, 0,
454                       wm_state->thread0.grf_reg_count << 1,
455                       offsetof(struct i965_wm_unit_state, thread0),
456                       render_state->render_kernels[PS_KERNEL].bo);
457
458     dri_bo_emit_reloc(render_state->wm.state,
459                       I915_GEM_DOMAIN_INSTRUCTION, 0,
460                       wm_state->wm4.sampler_count << 2,
461                       offsetof(struct i965_wm_unit_state, wm4),
462                       render_state->wm.sampler);
463
464     dri_bo_unmap(render_state->wm.state);
465 }
466
467 static void 
468 i965_render_cc_viewport(VADriverContextP ctx)
469 {
470     struct i965_driver_data *i965 = i965_driver_data(ctx);
471     struct i965_render_state *render_state = &i965->render_state;
472     struct i965_cc_viewport *cc_viewport;
473
474     dri_bo_map(render_state->cc.viewport, 1);
475     assert(render_state->cc.viewport->virtual);
476     cc_viewport = render_state->cc.viewport->virtual;
477     memset(cc_viewport, 0, sizeof(*cc_viewport));
478     
479     cc_viewport->min_depth = -1.e35;
480     cc_viewport->max_depth = 1.e35;
481
482     dri_bo_unmap(render_state->cc.viewport);
483 }
484
485 static void 
486 i965_subpic_render_cc_unit(VADriverContextP ctx)
487 {
488     struct i965_driver_data *i965 = i965_driver_data(ctx);
489     struct i965_render_state *render_state = &i965->render_state;
490     struct i965_cc_unit_state *cc_state;
491
492     assert(render_state->cc.viewport);
493
494     dri_bo_map(render_state->cc.state, 1);
495     assert(render_state->cc.state->virtual);
496     cc_state = render_state->cc.state->virtual;
497     memset(cc_state, 0, sizeof(*cc_state));
498
499     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
500     cc_state->cc2.depth_test = 0;       /* disable depth test */
501     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
502     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
503     cc_state->cc3.blend_enable = 1;     /* enable color blend */
504     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
505     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
506     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
507     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
508
509     cc_state->cc5.dither_enable = 0;    /* disable dither */
510     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
511     cc_state->cc5.statistics_enable = 1;
512     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
513     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
514     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
515
516     cc_state->cc6.clamp_post_alpha_blend = 0; 
517     cc_state->cc6.clamp_pre_alpha_blend  =0; 
518     
519     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
520     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
521     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
522     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
523    
524     /*alpha test reference*/
525     cc_state->cc7.alpha_ref.f =0.0 ;
526
527
528     dri_bo_emit_reloc(render_state->cc.state,
529                       I915_GEM_DOMAIN_INSTRUCTION, 0,
530                       0,
531                       offsetof(struct i965_cc_unit_state, cc4),
532                       render_state->cc.viewport);
533
534     dri_bo_unmap(render_state->cc.state);
535 }
536
537
538 static void 
539 i965_render_cc_unit(VADriverContextP ctx)
540 {
541     struct i965_driver_data *i965 = i965_driver_data(ctx);
542     struct i965_render_state *render_state = &i965->render_state;
543     struct i965_cc_unit_state *cc_state;
544
545     assert(render_state->cc.viewport);
546
547     dri_bo_map(render_state->cc.state, 1);
548     assert(render_state->cc.state->virtual);
549     cc_state = render_state->cc.state->virtual;
550     memset(cc_state, 0, sizeof(*cc_state));
551
552     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
553     cc_state->cc2.depth_test = 0;       /* disable depth test */
554     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
555     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
556     cc_state->cc3.blend_enable = 0;     /* disable color blend */
557     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
558     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
559
560     cc_state->cc5.dither_enable = 0;    /* disable dither */
561     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
562     cc_state->cc5.statistics_enable = 1;
563     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
564     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
565     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
566
567     dri_bo_emit_reloc(render_state->cc.state,
568                       I915_GEM_DOMAIN_INSTRUCTION, 0,
569                       0,
570                       offsetof(struct i965_cc_unit_state, cc4),
571                       render_state->cc.viewport);
572
573     dri_bo_unmap(render_state->cc.state);
574 }
575
576 static void
577 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
578 {
579     switch (tiling) {
580     case I915_TILING_NONE:
581         ss->ss3.tiled_surface = 0;
582         ss->ss3.tile_walk = 0;
583         break;
584     case I915_TILING_X:
585         ss->ss3.tiled_surface = 1;
586         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
587         break;
588     case I915_TILING_Y:
589         ss->ss3.tiled_surface = 1;
590         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
591         break;
592     }
593 }
594
595 static void
596 i965_render_src_surface_state(VADriverContextP ctx, 
597                               int index,
598                               dri_bo *region,
599                               unsigned long offset,
600                               int w, int h,
601                               int pitch, int format)
602 {
603     struct i965_driver_data *i965 = i965_driver_data(ctx);  
604     struct i965_render_state *render_state = &i965->render_state;
605     struct i965_surface_state *ss;
606     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
607     unsigned int tiling;
608     unsigned int swizzle;
609
610     assert(index < MAX_RENDER_SURFACES);
611
612     dri_bo_map(ss_bo, 1);
613     assert(ss_bo->virtual);
614     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
615     memset(ss, 0, sizeof(*ss));
616     ss->ss0.surface_type = I965_SURFACE_2D;
617     ss->ss0.surface_format = format;
618     ss->ss0.writedisable_alpha = 0;
619     ss->ss0.writedisable_red = 0;
620     ss->ss0.writedisable_green = 0;
621     ss->ss0.writedisable_blue = 0;
622     ss->ss0.color_blend = 1;
623     ss->ss0.vert_line_stride = 0;
624     ss->ss0.vert_line_stride_ofs = 0;
625     ss->ss0.mipmap_layout_mode = 0;
626     ss->ss0.render_cache_read_mode = 0;
627
628     ss->ss1.base_addr = region->offset + offset;
629
630     ss->ss2.width = w - 1;
631     ss->ss2.height = h - 1;
632     ss->ss2.mip_count = 0;
633     ss->ss2.render_target_rotation = 0;
634
635     ss->ss3.pitch = pitch - 1;
636
637     dri_bo_get_tiling(region, &tiling, &swizzle);
638     i965_render_set_surface_tiling(ss, tiling);
639
640     dri_bo_emit_reloc(ss_bo,
641                       I915_GEM_DOMAIN_SAMPLER, 0,
642                       offset,
643                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
644                       region);
645
646     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
647     dri_bo_unmap(ss_bo);
648     render_state->wm.sampler_count++;
649 }
650
651 static void
652 i965_render_src_surfaces_state(VADriverContextP ctx,
653                               VASurfaceID surface)
654 {
655     struct i965_driver_data *i965 = i965_driver_data(ctx);  
656     struct i965_render_state *render_state = &i965->render_state;
657     struct object_surface *obj_surface;
658     int w, h;
659     int rw, rh;
660     dri_bo *region;
661
662     obj_surface = SURFACE(surface);
663     assert(obj_surface);
664
665     if (obj_surface->pp_out_bo) {
666         w = obj_surface->pp_out_width;
667         h = obj_surface->pp_out_height;
668         rw = obj_surface->orig_pp_out_width;
669         rh = obj_surface->orig_pp_out_height;
670         region = obj_surface->pp_out_bo;
671     } else {
672         w = obj_surface->width;
673         h = obj_surface->height;
674         rw = obj_surface->orig_width;
675         rh = obj_surface->orig_height;
676         region = obj_surface->bo;
677     }
678
679     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, w, I965_SURFACEFORMAT_R8_UNORM);     /* Y */
680     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, w, I965_SURFACEFORMAT_R8_UNORM);
681
682     if (!render_state->inited) {
683         int u3 = 5, u4 = 6, v5 = 3, v6 = 4;
684
685         i965_render_src_surface_state(ctx, u3, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM); /* U */
686         i965_render_src_surface_state(ctx, u4, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
687         i965_render_src_surface_state(ctx, v5, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);     /* V */
688         i965_render_src_surface_state(ctx, v6, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
689     } else {
690         if (render_state->interleaved_uv) {
691             i965_render_src_surface_state(ctx, 3, region, w * h, rw / 2, rh / 2, w, I965_SURFACEFORMAT_R8G8_UNORM); /* UV */
692             i965_render_src_surface_state(ctx, 4, region, w * h, rw / 2, rh / 2, w, I965_SURFACEFORMAT_R8G8_UNORM);
693         } else {
694             int u3 = 3, u4 = 4, v5 = 5, v6 = 6;
695
696             i965_render_src_surface_state(ctx, u3, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM); /* U */
697             i965_render_src_surface_state(ctx, u4, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
698             i965_render_src_surface_state(ctx, v5, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);     /* V */
699             i965_render_src_surface_state(ctx, v6, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
700         }
701     }
702 }
703
704 static void
705 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
706                               VASurfaceID surface)
707 {
708     struct i965_driver_data *i965 = i965_driver_data(ctx);  
709     struct object_surface *obj_surface = SURFACE(surface);
710     int w, h;
711     dri_bo *region;
712     dri_bo *subpic_region;
713     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
714     struct object_image *obj_image = IMAGE(obj_subpic->image);
715     assert(obj_surface);
716     assert(obj_surface->bo);
717     w = obj_surface->width;
718     h = obj_surface->height;
719     region = obj_surface->bo;
720     subpic_region = obj_image->bo;
721     /*subpicture surface*/
722     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
723     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
724 }
725
726 static void
727 i965_render_dest_surface_state(VADriverContextP ctx, int index)
728 {
729     struct i965_driver_data *i965 = i965_driver_data(ctx);  
730     struct i965_render_state *render_state = &i965->render_state;
731     struct intel_region *dest_region = render_state->draw_region;
732     struct i965_surface_state *ss;
733     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
734
735     assert(index < MAX_RENDER_SURFACES);
736
737     dri_bo_map(ss_bo, 1);
738     assert(ss_bo->virtual);
739     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
740     memset(ss, 0, sizeof(*ss));
741
742     ss->ss0.surface_type = I965_SURFACE_2D;
743     ss->ss0.data_return_format = I965_SURFACERETURNFORMAT_FLOAT32;
744
745     if (dest_region->cpp == 2) {
746         ss->ss0.surface_format = I965_SURFACEFORMAT_B5G6R5_UNORM;
747         } else {
748         ss->ss0.surface_format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
749     }
750
751     ss->ss0.writedisable_alpha = 0;
752     ss->ss0.writedisable_red = 0;
753     ss->ss0.writedisable_green = 0;
754     ss->ss0.writedisable_blue = 0;
755     ss->ss0.color_blend = 1;
756     ss->ss0.vert_line_stride = 0;
757     ss->ss0.vert_line_stride_ofs = 0;
758     ss->ss0.mipmap_layout_mode = 0;
759     ss->ss0.render_cache_read_mode = 0;
760
761     ss->ss1.base_addr = dest_region->bo->offset;
762
763     ss->ss2.width = dest_region->width - 1;
764     ss->ss2.height = dest_region->height - 1;
765     ss->ss2.mip_count = 0;
766     ss->ss2.render_target_rotation = 0;
767     ss->ss3.pitch = dest_region->pitch - 1;
768     i965_render_set_surface_tiling(ss, dest_region->tiling);
769
770     dri_bo_emit_reloc(ss_bo,
771                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
772                       0,
773                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
774                       dest_region->bo);
775
776     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
777     dri_bo_unmap(ss_bo);
778 }
779
780 static void 
781 i965_subpic_render_upload_vertex(VADriverContextP ctx,
782                                  VASurfaceID surface,
783                                  const VARectangle *output_rect)
784 {    
785     struct i965_driver_data  *i965         = i965_driver_data(ctx);
786     struct i965_render_state *render_state = &i965->render_state;
787     struct object_surface    *obj_surface  = SURFACE(surface);
788     struct object_subpic     *obj_subpic   = SUBPIC(obj_surface->subpic);
789
790     const float sx = (float)output_rect->width  / (float)obj_surface->orig_width;
791     const float sy = (float)output_rect->height / (float)obj_surface->orig_height;
792     float *vb, tx1, tx2, ty1, ty2, x1, x2, y1, y2;
793     int i = 0;
794
795     VARectangle dst_rect;
796     dst_rect.x      = output_rect->x + sx * (float)obj_subpic->dst_rect.x;
797     dst_rect.y      = output_rect->y + sx * (float)obj_subpic->dst_rect.y;
798     dst_rect.width  = sx * (float)obj_subpic->dst_rect.width;
799     dst_rect.height = sy * (float)obj_subpic->dst_rect.height;
800
801     dri_bo_map(render_state->vb.vertex_buffer, 1);
802     assert(render_state->vb.vertex_buffer->virtual);
803     vb = render_state->vb.vertex_buffer->virtual;
804
805     tx1 = (float)obj_subpic->src_rect.x / (float)obj_subpic->width;
806     ty1 = (float)obj_subpic->src_rect.y / (float)obj_subpic->height;
807     tx2 = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / (float)obj_subpic->width;
808     ty2 = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / (float)obj_subpic->height;
809
810     x1 = (float)dst_rect.x;
811     y1 = (float)dst_rect.y;
812     x2 = (float)(dst_rect.x + dst_rect.width);
813     y2 = (float)(dst_rect.y + dst_rect.height);
814
815     vb[i++] = tx2;
816     vb[i++] = ty2;
817     vb[i++] = x2;
818     vb[i++] = y2;
819
820     vb[i++] = tx1;
821     vb[i++] = ty2;
822     vb[i++] = x1;
823     vb[i++] = y2;
824
825     vb[i++] = tx1;
826     vb[i++] = ty1;
827     vb[i++] = x1;
828     vb[i++] = y1;
829     dri_bo_unmap(render_state->vb.vertex_buffer);
830 }
831
832 static void 
833 i965_render_upload_vertex(VADriverContextP ctx,
834                           VASurfaceID surface,
835                           short srcx,
836                           short srcy,
837                           unsigned short srcw,
838                           unsigned short srch,
839                           short destx,
840                           short desty,
841                           unsigned short destw,
842                           unsigned short desth)
843 {
844     struct i965_driver_data *i965 = i965_driver_data(ctx);
845     struct i965_render_state *render_state = &i965->render_state;
846     struct intel_region *dest_region = render_state->draw_region;
847     struct object_surface *obj_surface;
848     float *vb;
849
850     float u1, v1, u2, v2;
851     int i, width, height;
852     int box_x1 = dest_region->x + destx;
853     int box_y1 = dest_region->y + desty;
854     int box_x2 = box_x1 + destw;
855     int box_y2 = box_y1 + desth;
856
857     obj_surface = SURFACE(surface);
858     assert(surface);
859     width = obj_surface->orig_width;
860     height = obj_surface->orig_height;
861
862     u1 = (float)srcx / width;
863     v1 = (float)srcy / height;
864     u2 = (float)(srcx + srcw) / width;
865     v2 = (float)(srcy + srch) / height;
866
867     dri_bo_map(render_state->vb.vertex_buffer, 1);
868     assert(render_state->vb.vertex_buffer->virtual);
869     vb = render_state->vb.vertex_buffer->virtual;
870
871     i = 0;
872     vb[i++] = u2;
873     vb[i++] = v2;
874     vb[i++] = (float)box_x2;
875     vb[i++] = (float)box_y2;
876     
877     vb[i++] = u1;
878     vb[i++] = v2;
879     vb[i++] = (float)box_x1;
880     vb[i++] = (float)box_y2;
881
882     vb[i++] = u1;
883     vb[i++] = v1;
884     vb[i++] = (float)box_x1;
885     vb[i++] = (float)box_y1;
886
887     dri_bo_unmap(render_state->vb.vertex_buffer);
888 }
889
890 static void
891 i965_render_upload_constants(VADriverContextP ctx)
892 {
893     struct i965_driver_data *i965 = i965_driver_data(ctx);
894     struct i965_render_state *render_state = &i965->render_state;
895     unsigned short *constant_buffer;
896
897     if (render_state->curbe.upload)
898         return;
899
900     dri_bo_map(render_state->curbe.bo, 1);
901     assert(render_state->curbe.bo->virtual);
902     constant_buffer = render_state->curbe.bo->virtual;
903
904     if (render_state->interleaved_uv)
905         *constant_buffer = 1;
906     else
907         *constant_buffer = 0;
908
909     dri_bo_unmap(render_state->curbe.bo);
910     render_state->curbe.upload = 1;
911 }
912
913 static void
914 i965_surface_render_state_setup(VADriverContextP ctx,
915                         VASurfaceID surface,
916                         short srcx,
917                         short srcy,
918                         unsigned short srcw,
919                         unsigned short srch,
920                         short destx,
921                         short desty,
922                         unsigned short destw,
923                         unsigned short desth)
924 {
925     i965_render_vs_unit(ctx);
926     i965_render_sf_unit(ctx);
927     i965_render_dest_surface_state(ctx, 0);
928     i965_render_src_surfaces_state(ctx, surface);
929     i965_render_sampler(ctx);
930     i965_render_wm_unit(ctx);
931     i965_render_cc_viewport(ctx);
932     i965_render_cc_unit(ctx);
933     i965_render_upload_vertex(ctx, surface,
934                               srcx, srcy, srcw, srch,
935                               destx, desty, destw, desth);
936     i965_render_upload_constants(ctx);
937 }
938 static void
939 i965_subpic_render_state_setup(VADriverContextP ctx,
940                         VASurfaceID surface,
941                         short srcx,
942                         short srcy,
943                         unsigned short srcw,
944                         unsigned short srch,
945                         short destx,
946                         short desty,
947                         unsigned short destw,
948                         unsigned short desth)
949 {
950     i965_render_vs_unit(ctx);
951     i965_render_sf_unit(ctx);
952     i965_render_dest_surface_state(ctx, 0);
953     i965_subpic_render_src_surfaces_state(ctx, surface);
954     i965_render_sampler(ctx);
955     i965_subpic_render_wm_unit(ctx);
956     i965_render_cc_viewport(ctx);
957     i965_subpic_render_cc_unit(ctx);
958
959     VARectangle output_rect;
960     output_rect.x      = destx;
961     output_rect.y      = desty;
962     output_rect.width  = destw;
963     output_rect.height = desth;
964     i965_subpic_render_upload_vertex(ctx, surface, &output_rect);
965 }
966
967
968 static void
969 i965_render_pipeline_select(VADriverContextP ctx)
970 {
971     struct i965_driver_data *i965 = i965_driver_data(ctx);
972     struct intel_batchbuffer *batch = i965->batch;
973  
974     BEGIN_BATCH(batch, 1);
975     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
976     ADVANCE_BATCH(batch);
977 }
978
979 static void
980 i965_render_state_sip(VADriverContextP ctx)
981 {
982     struct i965_driver_data *i965 = i965_driver_data(ctx);
983     struct intel_batchbuffer *batch = i965->batch;
984
985     BEGIN_BATCH(batch, 2);
986     OUT_BATCH(batch, CMD_STATE_SIP | 0);
987     OUT_BATCH(batch, 0);
988     ADVANCE_BATCH(batch);
989 }
990
991 static void
992 i965_render_state_base_address(VADriverContextP ctx)
993 {
994     struct i965_driver_data *i965 = i965_driver_data(ctx);
995     struct intel_batchbuffer *batch = i965->batch;
996     struct i965_render_state *render_state = &i965->render_state;
997
998     if (IS_IRONLAKE(i965->intel.device_id)) {
999         BEGIN_BATCH(batch, 8);
1000         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1001         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1002         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1003         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1004         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1005         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1006         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1007         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1008         ADVANCE_BATCH(batch);
1009     } else {
1010         BEGIN_BATCH(batch, 6);
1011         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1012         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1013         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1014         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1015         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1016         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1017         ADVANCE_BATCH(batch);
1018     }
1019 }
1020
1021 static void
1022 i965_render_binding_table_pointers(VADriverContextP ctx)
1023 {
1024     struct i965_driver_data *i965 = i965_driver_data(ctx);
1025     struct intel_batchbuffer *batch = i965->batch;
1026
1027     BEGIN_BATCH(batch, 6);
1028     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1029     OUT_BATCH(batch, 0); /* vs */
1030     OUT_BATCH(batch, 0); /* gs */
1031     OUT_BATCH(batch, 0); /* clip */
1032     OUT_BATCH(batch, 0); /* sf */
1033     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1034     ADVANCE_BATCH(batch);
1035 }
1036
1037 static void 
1038 i965_render_constant_color(VADriverContextP ctx)
1039 {
1040     struct i965_driver_data *i965 = i965_driver_data(ctx);
1041     struct intel_batchbuffer *batch = i965->batch;
1042
1043     BEGIN_BATCH(batch, 5);
1044     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1045     OUT_BATCH(batch, float_to_uint(1.0));
1046     OUT_BATCH(batch, float_to_uint(0.0));
1047     OUT_BATCH(batch, float_to_uint(1.0));
1048     OUT_BATCH(batch, float_to_uint(1.0));
1049     ADVANCE_BATCH(batch);
1050 }
1051
1052 static void
1053 i965_render_pipelined_pointers(VADriverContextP ctx)
1054 {
1055     struct i965_driver_data *i965 = i965_driver_data(ctx);
1056     struct intel_batchbuffer *batch = i965->batch;
1057     struct i965_render_state *render_state = &i965->render_state;
1058
1059     BEGIN_BATCH(batch, 7);
1060     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1061     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1062     OUT_BATCH(batch, 0);  /* disable GS */
1063     OUT_BATCH(batch, 0);  /* disable CLIP */
1064     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1065     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1066     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1067     ADVANCE_BATCH(batch);
1068 }
1069
1070 static void
1071 i965_render_urb_layout(VADriverContextP ctx)
1072 {
1073     struct i965_driver_data *i965 = i965_driver_data(ctx);
1074     struct intel_batchbuffer *batch = i965->batch;
1075     int urb_vs_start, urb_vs_size;
1076     int urb_gs_start, urb_gs_size;
1077     int urb_clip_start, urb_clip_size;
1078     int urb_sf_start, urb_sf_size;
1079     int urb_cs_start, urb_cs_size;
1080
1081     urb_vs_start = 0;
1082     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1083     urb_gs_start = urb_vs_start + urb_vs_size;
1084     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1085     urb_clip_start = urb_gs_start + urb_gs_size;
1086     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1087     urb_sf_start = urb_clip_start + urb_clip_size;
1088     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1089     urb_cs_start = urb_sf_start + urb_sf_size;
1090     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1091
1092     BEGIN_BATCH(batch, 3);
1093     OUT_BATCH(batch, 
1094               CMD_URB_FENCE |
1095               UF0_CS_REALLOC |
1096               UF0_SF_REALLOC |
1097               UF0_CLIP_REALLOC |
1098               UF0_GS_REALLOC |
1099               UF0_VS_REALLOC |
1100               1);
1101     OUT_BATCH(batch, 
1102               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1103               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1104               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1105     OUT_BATCH(batch,
1106               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1107               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1108     ADVANCE_BATCH(batch);
1109 }
1110
1111 static void 
1112 i965_render_cs_urb_layout(VADriverContextP ctx)
1113 {
1114     struct i965_driver_data *i965 = i965_driver_data(ctx);
1115     struct intel_batchbuffer *batch = i965->batch;
1116
1117     BEGIN_BATCH(batch, 2);
1118     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1119     OUT_BATCH(batch,
1120               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1121               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1122     ADVANCE_BATCH(batch);
1123 }
1124
1125 static void
1126 i965_render_constant_buffer(VADriverContextP ctx)
1127 {
1128     struct i965_driver_data *i965 = i965_driver_data(ctx);
1129     struct intel_batchbuffer *batch = i965->batch;
1130     struct i965_render_state *render_state = &i965->render_state;
1131
1132     BEGIN_BATCH(batch, 2);
1133     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1134     OUT_RELOC(batch, render_state->curbe.bo,
1135               I915_GEM_DOMAIN_INSTRUCTION, 0,
1136               URB_CS_ENTRY_SIZE - 1);
1137     ADVANCE_BATCH(batch);    
1138 }
1139
1140 static void
1141 i965_render_drawing_rectangle(VADriverContextP ctx)
1142 {
1143     struct i965_driver_data *i965 = i965_driver_data(ctx);
1144     struct intel_batchbuffer *batch = i965->batch;
1145     struct i965_render_state *render_state = &i965->render_state;
1146     struct intel_region *dest_region = render_state->draw_region;
1147
1148     BEGIN_BATCH(batch, 4);
1149     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1150     OUT_BATCH(batch, 0x00000000);
1151     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1152     OUT_BATCH(batch, 0x00000000);         
1153     ADVANCE_BATCH(batch);
1154 }
1155
1156 static void
1157 i965_render_vertex_elements(VADriverContextP ctx)
1158 {
1159     struct i965_driver_data *i965 = i965_driver_data(ctx);
1160     struct intel_batchbuffer *batch = i965->batch;
1161
1162     if (IS_IRONLAKE(i965->intel.device_id)) {
1163         BEGIN_BATCH(batch, 5);
1164         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1165         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1166         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1167                   VE0_VALID |
1168                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1169                   (0 << VE0_OFFSET_SHIFT));
1170         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1171                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1172                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1173                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1174         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1175         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1176                   VE0_VALID |
1177                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1178                   (8 << VE0_OFFSET_SHIFT));
1179         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1180                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1181                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1182                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1183         ADVANCE_BATCH(batch);
1184     } else {
1185         BEGIN_BATCH(batch, 5);
1186         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1187         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1188         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1189                   VE0_VALID |
1190                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1191                   (0 << VE0_OFFSET_SHIFT));
1192         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1193                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1194                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1195                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1196                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1197         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1198         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1199                   VE0_VALID |
1200                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1201                   (8 << VE0_OFFSET_SHIFT));
1202         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1203                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1204                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1205                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1206                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1207         ADVANCE_BATCH(batch);
1208     }
1209 }
1210
1211 static void
1212 i965_render_upload_image_palette(
1213     VADriverContextP ctx,
1214     VAImageID        image_id,
1215     unsigned int     alpha
1216 )
1217 {
1218     struct i965_driver_data *i965 = i965_driver_data(ctx);
1219     struct intel_batchbuffer *batch = i965->batch;
1220     unsigned int i;
1221
1222     struct object_image *obj_image = IMAGE(image_id);
1223     assert(obj_image);
1224
1225     if (obj_image->image.num_palette_entries == 0)
1226         return;
1227
1228     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1229     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1230     /*fill palette*/
1231     //int32_t out[16]; //0-23:color 23-31:alpha
1232     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1233         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1234     ADVANCE_BATCH(batch);
1235 }
1236
1237 static void
1238 i965_render_startup(VADriverContextP ctx)
1239 {
1240     struct i965_driver_data *i965 = i965_driver_data(ctx);
1241     struct intel_batchbuffer *batch = i965->batch;
1242     struct i965_render_state *render_state = &i965->render_state;
1243
1244     BEGIN_BATCH(batch, 11);
1245     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1246     OUT_BATCH(batch, 
1247               (0 << VB0_BUFFER_INDEX_SHIFT) |
1248               VB0_VERTEXDATA |
1249               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1250     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1251
1252     if (IS_IRONLAKE(i965->intel.device_id))
1253         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1254     else
1255         OUT_BATCH(batch, 3);
1256
1257     OUT_BATCH(batch, 0);
1258
1259     OUT_BATCH(batch, 
1260               CMD_3DPRIMITIVE |
1261               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1262               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1263               (0 << 9) |
1264               4);
1265     OUT_BATCH(batch, 3); /* vertex count per instance */
1266     OUT_BATCH(batch, 0); /* start vertex offset */
1267     OUT_BATCH(batch, 1); /* single instance */
1268     OUT_BATCH(batch, 0); /* start instance location */
1269     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1270     ADVANCE_BATCH(batch);
1271 }
1272
1273 static void 
1274 i965_clear_dest_region(VADriverContextP ctx)
1275 {
1276     struct i965_driver_data *i965 = i965_driver_data(ctx);
1277     struct intel_batchbuffer *batch = i965->batch;
1278     struct i965_render_state *render_state = &i965->render_state;
1279     struct intel_region *dest_region = render_state->draw_region;
1280     unsigned int blt_cmd, br13;
1281     int pitch;
1282
1283     blt_cmd = XY_COLOR_BLT_CMD;
1284     br13 = 0xf0 << 16;
1285     pitch = dest_region->pitch;
1286
1287     if (dest_region->cpp == 4) {
1288         br13 |= BR13_8888;
1289         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1290     } else {
1291         assert(dest_region->cpp == 2);
1292         br13 |= BR13_565;
1293     }
1294
1295     if (dest_region->tiling != I915_TILING_NONE) {
1296         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1297         pitch /= 4;
1298     }
1299
1300     br13 |= pitch;
1301
1302     if (IS_GEN6(i965->intel.device_id)) {
1303         intel_batchbuffer_start_atomic_blt(batch, 24);
1304         BEGIN_BLT_BATCH(batch, 6);
1305     } else {
1306         intel_batchbuffer_start_atomic(batch, 24);
1307         BEGIN_BATCH(batch, 6);
1308     }
1309
1310     OUT_BATCH(batch, blt_cmd);
1311     OUT_BATCH(batch, br13);
1312     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1313     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1314               (dest_region->x + dest_region->width));
1315     OUT_RELOC(batch, dest_region->bo, 
1316               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1317               0);
1318     OUT_BATCH(batch, 0x0);
1319     ADVANCE_BATCH(batch);
1320     intel_batchbuffer_end_atomic(batch);
1321 }
1322
1323 static void
1324 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1325 {
1326     struct i965_driver_data *i965 = i965_driver_data(ctx);
1327     struct intel_batchbuffer *batch = i965->batch;
1328
1329     i965_clear_dest_region(ctx);
1330     intel_batchbuffer_start_atomic(batch, 0x1000);
1331     intel_batchbuffer_emit_mi_flush(batch);
1332     i965_render_pipeline_select(ctx);
1333     i965_render_state_sip(ctx);
1334     i965_render_state_base_address(ctx);
1335     i965_render_binding_table_pointers(ctx);
1336     i965_render_constant_color(ctx);
1337     i965_render_pipelined_pointers(ctx);
1338     i965_render_urb_layout(ctx);
1339     i965_render_cs_urb_layout(ctx);
1340     i965_render_constant_buffer(ctx);
1341     i965_render_drawing_rectangle(ctx);
1342     i965_render_vertex_elements(ctx);
1343     i965_render_startup(ctx);
1344     intel_batchbuffer_end_atomic(batch);
1345 }
1346
1347 static void
1348 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1349 {
1350     struct i965_driver_data *i965 = i965_driver_data(ctx);
1351     struct intel_batchbuffer *batch = i965->batch;
1352
1353     intel_batchbuffer_start_atomic(batch, 0x1000);
1354     intel_batchbuffer_emit_mi_flush(batch);
1355     i965_render_pipeline_select(ctx);
1356     i965_render_state_sip(ctx);
1357     i965_render_state_base_address(ctx);
1358     i965_render_binding_table_pointers(ctx);
1359     i965_render_constant_color(ctx);
1360     i965_render_pipelined_pointers(ctx);
1361     i965_render_urb_layout(ctx);
1362     i965_render_cs_urb_layout(ctx);
1363     i965_render_drawing_rectangle(ctx);
1364     i965_render_vertex_elements(ctx);
1365     i965_render_startup(ctx);
1366     intel_batchbuffer_end_atomic(batch);
1367 }
1368
1369
1370 static void 
1371 i965_render_initialize(VADriverContextP ctx)
1372 {
1373     struct i965_driver_data *i965 = i965_driver_data(ctx);
1374     struct i965_render_state *render_state = &i965->render_state;
1375     dri_bo *bo;
1376
1377     /* VERTEX BUFFER */
1378     dri_bo_unreference(render_state->vb.vertex_buffer);
1379     bo = dri_bo_alloc(i965->intel.bufmgr,
1380                       "vertex buffer",
1381                       4096,
1382                       4096);
1383     assert(bo);
1384     render_state->vb.vertex_buffer = bo;
1385
1386     /* VS */
1387     dri_bo_unreference(render_state->vs.state);
1388     bo = dri_bo_alloc(i965->intel.bufmgr,
1389                       "vs state",
1390                       sizeof(struct i965_vs_unit_state),
1391                       64);
1392     assert(bo);
1393     render_state->vs.state = bo;
1394
1395     /* GS */
1396     /* CLIP */
1397     /* SF */
1398     dri_bo_unreference(render_state->sf.state);
1399     bo = dri_bo_alloc(i965->intel.bufmgr,
1400                       "sf state",
1401                       sizeof(struct i965_sf_unit_state),
1402                       64);
1403     assert(bo);
1404     render_state->sf.state = bo;
1405
1406     /* WM */
1407     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1408     bo = dri_bo_alloc(i965->intel.bufmgr,
1409                       "surface state & binding table",
1410                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1411                       4096);
1412     assert(bo);
1413     render_state->wm.surface_state_binding_table_bo = bo;
1414
1415     dri_bo_unreference(render_state->wm.sampler);
1416     bo = dri_bo_alloc(i965->intel.bufmgr,
1417                       "sampler state",
1418                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1419                       64);
1420     assert(bo);
1421     render_state->wm.sampler = bo;
1422     render_state->wm.sampler_count = 0;
1423
1424     dri_bo_unreference(render_state->wm.state);
1425     bo = dri_bo_alloc(i965->intel.bufmgr,
1426                       "wm state",
1427                       sizeof(struct i965_wm_unit_state),
1428                       64);
1429     assert(bo);
1430     render_state->wm.state = bo;
1431
1432     /* COLOR CALCULATOR */
1433     dri_bo_unreference(render_state->cc.state);
1434     bo = dri_bo_alloc(i965->intel.bufmgr,
1435                       "color calc state",
1436                       sizeof(struct i965_cc_unit_state),
1437                       64);
1438     assert(bo);
1439     render_state->cc.state = bo;
1440
1441     dri_bo_unreference(render_state->cc.viewport);
1442     bo = dri_bo_alloc(i965->intel.bufmgr,
1443                       "cc viewport",
1444                       sizeof(struct i965_cc_viewport),
1445                       64);
1446     assert(bo);
1447     render_state->cc.viewport = bo;
1448 }
1449
1450 static void
1451 i965_render_put_surface(VADriverContextP ctx,
1452                         VASurfaceID surface,
1453                         short srcx,
1454                         short srcy,
1455                         unsigned short srcw,
1456                         unsigned short srch,
1457                         short destx,
1458                         short desty,
1459                         unsigned short destw,
1460                         unsigned short desth,
1461                         unsigned int flag)
1462 {
1463     struct i965_driver_data *i965 = i965_driver_data(ctx);
1464     struct intel_batchbuffer *batch = i965->batch;
1465
1466     i965_render_initialize(ctx);
1467     i965_surface_render_state_setup(ctx, surface,
1468                             srcx, srcy, srcw, srch,
1469                             destx, desty, destw, desth);
1470     i965_surface_render_pipeline_setup(ctx);
1471     intel_batchbuffer_flush(batch);
1472 }
1473
1474 static void
1475 i965_render_put_subpicture(VADriverContextP ctx,
1476                            VASurfaceID surface,
1477                            short srcx,
1478                            short srcy,
1479                            unsigned short srcw,
1480                            unsigned short srch,
1481                            short destx,
1482                            short desty,
1483                            unsigned short destw,
1484                            unsigned short desth)
1485 {
1486     struct i965_driver_data *i965 = i965_driver_data(ctx);
1487     struct intel_batchbuffer *batch = i965->batch;
1488     struct object_surface *obj_surface = SURFACE(surface);
1489     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
1490
1491     assert(obj_subpic);
1492
1493     i965_render_initialize(ctx);
1494     i965_subpic_render_state_setup(ctx, surface,
1495                                    srcx, srcy, srcw, srch,
1496                                    destx, desty, destw, desth);
1497     i965_subpic_render_pipeline_setup(ctx);
1498     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
1499     intel_batchbuffer_flush(batch);
1500 }
1501
1502 /*
1503  * for GEN6+
1504  */
1505 static void 
1506 gen6_render_initialize(VADriverContextP ctx)
1507 {
1508     struct i965_driver_data *i965 = i965_driver_data(ctx);
1509     struct i965_render_state *render_state = &i965->render_state;
1510     dri_bo *bo;
1511
1512     /* VERTEX BUFFER */
1513     dri_bo_unreference(render_state->vb.vertex_buffer);
1514     bo = dri_bo_alloc(i965->intel.bufmgr,
1515                       "vertex buffer",
1516                       4096,
1517                       4096);
1518     assert(bo);
1519     render_state->vb.vertex_buffer = bo;
1520
1521     /* WM */
1522     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1523     bo = dri_bo_alloc(i965->intel.bufmgr,
1524                       "surface state & binding table",
1525                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1526                       4096);
1527     assert(bo);
1528     render_state->wm.surface_state_binding_table_bo = bo;
1529
1530     dri_bo_unreference(render_state->wm.sampler);
1531     bo = dri_bo_alloc(i965->intel.bufmgr,
1532                       "sampler state",
1533                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1534                       4096);
1535     assert(bo);
1536     render_state->wm.sampler = bo;
1537     render_state->wm.sampler_count = 0;
1538
1539     /* COLOR CALCULATOR */
1540     dri_bo_unreference(render_state->cc.state);
1541     bo = dri_bo_alloc(i965->intel.bufmgr,
1542                       "color calc state",
1543                       sizeof(struct gen6_color_calc_state),
1544                       4096);
1545     assert(bo);
1546     render_state->cc.state = bo;
1547
1548     /* CC VIEWPORT */
1549     dri_bo_unreference(render_state->cc.viewport);
1550     bo = dri_bo_alloc(i965->intel.bufmgr,
1551                       "cc viewport",
1552                       sizeof(struct i965_cc_viewport),
1553                       4096);
1554     assert(bo);
1555     render_state->cc.viewport = bo;
1556
1557     /* BLEND STATE */
1558     dri_bo_unreference(render_state->cc.blend);
1559     bo = dri_bo_alloc(i965->intel.bufmgr,
1560                       "blend state",
1561                       sizeof(struct gen6_blend_state),
1562                       4096);
1563     assert(bo);
1564     render_state->cc.blend = bo;
1565
1566     /* DEPTH & STENCIL STATE */
1567     dri_bo_unreference(render_state->cc.depth_stencil);
1568     bo = dri_bo_alloc(i965->intel.bufmgr,
1569                       "depth & stencil state",
1570                       sizeof(struct gen6_depth_stencil_state),
1571                       4096);
1572     assert(bo);
1573     render_state->cc.depth_stencil = bo;
1574 }
1575
1576 static void
1577 gen6_render_color_calc_state(VADriverContextP ctx)
1578 {
1579     struct i965_driver_data *i965 = i965_driver_data(ctx);
1580     struct i965_render_state *render_state = &i965->render_state;
1581     struct gen6_color_calc_state *color_calc_state;
1582     
1583     dri_bo_map(render_state->cc.state, 1);
1584     assert(render_state->cc.state->virtual);
1585     color_calc_state = render_state->cc.state->virtual;
1586     memset(color_calc_state, 0, sizeof(*color_calc_state));
1587     color_calc_state->constant_r = 1.0;
1588     color_calc_state->constant_g = 0.0;
1589     color_calc_state->constant_b = 1.0;
1590     color_calc_state->constant_a = 1.0;
1591     dri_bo_unmap(render_state->cc.state);
1592 }
1593
1594 static void
1595 gen6_render_blend_state(VADriverContextP ctx)
1596 {
1597     struct i965_driver_data *i965 = i965_driver_data(ctx);
1598     struct i965_render_state *render_state = &i965->render_state;
1599     struct gen6_blend_state *blend_state;
1600     
1601     dri_bo_map(render_state->cc.blend, 1);
1602     assert(render_state->cc.blend->virtual);
1603     blend_state = render_state->cc.blend->virtual;
1604     memset(blend_state, 0, sizeof(*blend_state));
1605     blend_state->blend1.logic_op_enable = 1;
1606     blend_state->blend1.logic_op_func = 0xc;
1607     dri_bo_unmap(render_state->cc.blend);
1608 }
1609
1610 static void
1611 gen6_render_depth_stencil_state(VADriverContextP ctx)
1612 {
1613     struct i965_driver_data *i965 = i965_driver_data(ctx);
1614     struct i965_render_state *render_state = &i965->render_state;
1615     struct gen6_depth_stencil_state *depth_stencil_state;
1616     
1617     dri_bo_map(render_state->cc.depth_stencil, 1);
1618     assert(render_state->cc.depth_stencil->virtual);
1619     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1620     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1621     dri_bo_unmap(render_state->cc.depth_stencil);
1622 }
1623
1624 static void
1625 gen6_render_setup_states(VADriverContextP ctx,
1626                          VASurfaceID surface,
1627                          short srcx,
1628                          short srcy,
1629                          unsigned short srcw,
1630                          unsigned short srch,
1631                          short destx,
1632                          short desty,
1633                          unsigned short destw,
1634                          unsigned short desth)
1635 {
1636     i965_render_dest_surface_state(ctx, 0);
1637     i965_render_src_surfaces_state(ctx, surface);
1638     i965_render_sampler(ctx);
1639     i965_render_cc_viewport(ctx);
1640     gen6_render_color_calc_state(ctx);
1641     gen6_render_blend_state(ctx);
1642     gen6_render_depth_stencil_state(ctx);
1643     i965_render_upload_constants(ctx);
1644     i965_render_upload_vertex(ctx, surface,
1645                               srcx, srcy, srcw, srch,
1646                               destx, desty, destw, desth);
1647 }
1648
1649 static void
1650 gen6_emit_invarient_states(VADriverContextP ctx)
1651 {
1652     struct i965_driver_data *i965 = i965_driver_data(ctx);
1653     struct intel_batchbuffer *batch = i965->batch;
1654
1655     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1656
1657     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1658     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1659               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1660     OUT_BATCH(batch, 0);
1661
1662     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1663     OUT_BATCH(batch, 1);
1664
1665     /* Set system instruction pointer */
1666     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1667     OUT_BATCH(batch, 0);
1668 }
1669
1670 static void
1671 gen6_emit_state_base_address(VADriverContextP ctx)
1672 {
1673     struct i965_driver_data *i965 = i965_driver_data(ctx);
1674     struct intel_batchbuffer *batch = i965->batch;
1675     struct i965_render_state *render_state = &i965->render_state;
1676
1677     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1678     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
1679     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1680     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1681     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1682     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
1683     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
1684     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1685     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1686     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1687 }
1688
1689 static void
1690 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1691 {
1692     struct i965_driver_data *i965 = i965_driver_data(ctx);
1693     struct intel_batchbuffer *batch = i965->batch;
1694     struct i965_render_state *render_state = &i965->render_state;
1695
1696     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1697               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1698               (4 - 2));
1699     OUT_BATCH(batch, 0);
1700     OUT_BATCH(batch, 0);
1701     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1702 }
1703
1704 static void
1705 gen6_emit_urb(VADriverContextP ctx)
1706 {
1707     struct i965_driver_data *i965 = i965_driver_data(ctx);
1708     struct intel_batchbuffer *batch = i965->batch;
1709
1710     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
1711     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1712               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1713     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1714               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1715 }
1716
1717 static void
1718 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1719 {
1720     struct i965_driver_data *i965 = i965_driver_data(ctx);
1721     struct intel_batchbuffer *batch = i965->batch;
1722     struct i965_render_state *render_state = &i965->render_state;
1723
1724     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1725     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1726     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1727     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1728 }
1729
1730 static void
1731 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1732 {
1733     struct i965_driver_data *i965 = i965_driver_data(ctx);
1734     struct intel_batchbuffer *batch = i965->batch;
1735     struct i965_render_state *render_state = &i965->render_state;
1736
1737     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1738               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1739               (4 - 2));
1740     OUT_BATCH(batch, 0); /* VS */
1741     OUT_BATCH(batch, 0); /* GS */
1742     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1743 }
1744
1745 static void
1746 gen6_emit_binding_table(VADriverContextP ctx)
1747 {
1748     struct i965_driver_data *i965 = i965_driver_data(ctx);
1749     struct intel_batchbuffer *batch = i965->batch;
1750
1751     /* Binding table pointers */
1752     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
1753               GEN6_BINDING_TABLE_MODIFY_PS |
1754               (4 - 2));
1755     OUT_BATCH(batch, 0);                /* vs */
1756     OUT_BATCH(batch, 0);                /* gs */
1757     /* Only the PS uses the binding table */
1758     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1759 }
1760
1761 static void
1762 gen6_emit_depth_buffer_state(VADriverContextP ctx)
1763 {
1764     struct i965_driver_data *i965 = i965_driver_data(ctx);
1765     struct intel_batchbuffer *batch = i965->batch;
1766
1767     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
1768     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
1769               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
1770     OUT_BATCH(batch, 0);
1771     OUT_BATCH(batch, 0);
1772     OUT_BATCH(batch, 0);
1773     OUT_BATCH(batch, 0);
1774     OUT_BATCH(batch, 0);
1775
1776     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
1777     OUT_BATCH(batch, 0);
1778 }
1779
1780 static void
1781 gen6_emit_drawing_rectangle(VADriverContextP ctx)
1782 {
1783     i965_render_drawing_rectangle(ctx);
1784 }
1785
1786 static void 
1787 gen6_emit_vs_state(VADriverContextP ctx)
1788 {
1789     struct i965_driver_data *i965 = i965_driver_data(ctx);
1790     struct intel_batchbuffer *batch = i965->batch;
1791
1792     /* disable VS constant buffer */
1793     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
1794     OUT_BATCH(batch, 0);
1795     OUT_BATCH(batch, 0);
1796     OUT_BATCH(batch, 0);
1797     OUT_BATCH(batch, 0);
1798         
1799     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
1800     OUT_BATCH(batch, 0); /* without VS kernel */
1801     OUT_BATCH(batch, 0);
1802     OUT_BATCH(batch, 0);
1803     OUT_BATCH(batch, 0);
1804     OUT_BATCH(batch, 0); /* pass-through */
1805 }
1806
1807 static void 
1808 gen6_emit_gs_state(VADriverContextP ctx)
1809 {
1810     struct i965_driver_data *i965 = i965_driver_data(ctx);
1811     struct intel_batchbuffer *batch = i965->batch;
1812
1813     /* disable GS constant buffer */
1814     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
1815     OUT_BATCH(batch, 0);
1816     OUT_BATCH(batch, 0);
1817     OUT_BATCH(batch, 0);
1818     OUT_BATCH(batch, 0);
1819         
1820     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
1821     OUT_BATCH(batch, 0); /* without GS kernel */
1822     OUT_BATCH(batch, 0);
1823     OUT_BATCH(batch, 0);
1824     OUT_BATCH(batch, 0);
1825     OUT_BATCH(batch, 0);
1826     OUT_BATCH(batch, 0); /* pass-through */
1827 }
1828
1829 static void 
1830 gen6_emit_clip_state(VADriverContextP ctx)
1831 {
1832     struct i965_driver_data *i965 = i965_driver_data(ctx);
1833     struct intel_batchbuffer *batch = i965->batch;
1834
1835     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
1836     OUT_BATCH(batch, 0);
1837     OUT_BATCH(batch, 0); /* pass-through */
1838     OUT_BATCH(batch, 0);
1839 }
1840
1841 static void 
1842 gen6_emit_sf_state(VADriverContextP ctx)
1843 {
1844     struct i965_driver_data *i965 = i965_driver_data(ctx);
1845     struct intel_batchbuffer *batch = i965->batch;
1846
1847     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
1848     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
1849               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
1850               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
1851     OUT_BATCH(batch, 0);
1852     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
1853     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
1854     OUT_BATCH(batch, 0);
1855     OUT_BATCH(batch, 0);
1856     OUT_BATCH(batch, 0);
1857     OUT_BATCH(batch, 0);
1858     OUT_BATCH(batch, 0); /* DW9 */
1859     OUT_BATCH(batch, 0);
1860     OUT_BATCH(batch, 0);
1861     OUT_BATCH(batch, 0);
1862     OUT_BATCH(batch, 0);
1863     OUT_BATCH(batch, 0); /* DW14 */
1864     OUT_BATCH(batch, 0);
1865     OUT_BATCH(batch, 0);
1866     OUT_BATCH(batch, 0);
1867     OUT_BATCH(batch, 0);
1868     OUT_BATCH(batch, 0); /* DW19 */
1869 }
1870
1871 static void 
1872 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
1873 {
1874     struct i965_driver_data *i965 = i965_driver_data(ctx);
1875     struct intel_batchbuffer *batch = i965->batch;
1876     struct i965_render_state *render_state = &i965->render_state;
1877
1878     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
1879               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
1880               (5 - 2));
1881     OUT_RELOC(batch, 
1882               render_state->curbe.bo,
1883               I915_GEM_DOMAIN_INSTRUCTION, 0,
1884               0);
1885     OUT_BATCH(batch, 0);
1886     OUT_BATCH(batch, 0);
1887     OUT_BATCH(batch, 0);
1888
1889     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
1890     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
1891               I915_GEM_DOMAIN_INSTRUCTION, 0,
1892               0);
1893     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
1894               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
1895     OUT_BATCH(batch, 0);
1896     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
1897     OUT_BATCH(batch, ((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
1898               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
1899               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
1900     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
1901               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
1902     OUT_BATCH(batch, 0);
1903     OUT_BATCH(batch, 0);
1904 }
1905
1906 static void
1907 gen6_emit_vertex_element_state(VADriverContextP ctx)
1908 {
1909     struct i965_driver_data *i965 = i965_driver_data(ctx);
1910     struct intel_batchbuffer *batch = i965->batch;
1911
1912     /* Set up our vertex elements, sourced from the single vertex buffer. */
1913     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
1914     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1915     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1916               GEN6_VE0_VALID |
1917               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1918               (0 << VE0_OFFSET_SHIFT));
1919     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1920               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1921               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1922               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1923     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1924     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1925               GEN6_VE0_VALID |
1926               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1927               (8 << VE0_OFFSET_SHIFT));
1928     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
1929               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1930               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1931               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1932 }
1933
1934 static void
1935 gen6_emit_vertices(VADriverContextP ctx)
1936 {
1937     struct i965_driver_data *i965 = i965_driver_data(ctx);
1938     struct intel_batchbuffer *batch = i965->batch;
1939     struct i965_render_state *render_state = &i965->render_state;
1940
1941     BEGIN_BATCH(batch, 11);
1942     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1943     OUT_BATCH(batch, 
1944               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
1945               GEN6_VB0_VERTEXDATA |
1946               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1947     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1948     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1949     OUT_BATCH(batch, 0);
1950
1951     OUT_BATCH(batch, 
1952               CMD_3DPRIMITIVE |
1953               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1954               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1955               (0 << 9) |
1956               4);
1957     OUT_BATCH(batch, 3); /* vertex count per instance */
1958     OUT_BATCH(batch, 0); /* start vertex offset */
1959     OUT_BATCH(batch, 1); /* single instance */
1960     OUT_BATCH(batch, 0); /* start instance location */
1961     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1962     ADVANCE_BATCH(batch);
1963 }
1964
1965 static void
1966 gen6_render_emit_states(VADriverContextP ctx, int kernel)
1967 {
1968     struct i965_driver_data *i965 = i965_driver_data(ctx);
1969     struct intel_batchbuffer *batch = i965->batch;
1970
1971     intel_batchbuffer_start_atomic(batch, 0x1000);
1972     intel_batchbuffer_emit_mi_flush(batch);
1973     gen6_emit_invarient_states(ctx);
1974     gen6_emit_state_base_address(ctx);
1975     gen6_emit_viewport_state_pointers(ctx);
1976     gen6_emit_urb(ctx);
1977     gen6_emit_cc_state_pointers(ctx);
1978     gen6_emit_sampler_state_pointers(ctx);
1979     gen6_emit_vs_state(ctx);
1980     gen6_emit_gs_state(ctx);
1981     gen6_emit_clip_state(ctx);
1982     gen6_emit_sf_state(ctx);
1983     gen6_emit_wm_state(ctx, kernel);
1984     gen6_emit_binding_table(ctx);
1985     gen6_emit_depth_buffer_state(ctx);
1986     gen6_emit_drawing_rectangle(ctx);
1987     gen6_emit_vertex_element_state(ctx);
1988     gen6_emit_vertices(ctx);
1989     intel_batchbuffer_end_atomic(batch);
1990 }
1991
1992 static void
1993 gen6_render_put_surface(VADriverContextP ctx,
1994                         VASurfaceID surface,
1995                         short srcx,
1996                         short srcy,
1997                         unsigned short srcw,
1998                         unsigned short srch,
1999                         short destx,
2000                         short desty,
2001                         unsigned short destw,
2002                         unsigned short desth,
2003                         unsigned int flag)
2004 {
2005     struct i965_driver_data *i965 = i965_driver_data(ctx);
2006     struct intel_batchbuffer *batch = i965->batch;
2007
2008     gen6_render_initialize(ctx);
2009     gen6_render_setup_states(ctx, surface,
2010                              srcx, srcy, srcw, srch,
2011                              destx, desty, destw, desth);
2012     i965_clear_dest_region(ctx);
2013     gen6_render_emit_states(ctx, PS_KERNEL);
2014     intel_batchbuffer_flush(batch);
2015 }
2016
2017 static void
2018 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2019 {
2020     struct i965_driver_data *i965 = i965_driver_data(ctx);
2021     struct i965_render_state *render_state = &i965->render_state;
2022     struct gen6_blend_state *blend_state;
2023
2024     dri_bo_unmap(render_state->cc.state);    
2025     dri_bo_map(render_state->cc.blend, 1);
2026     assert(render_state->cc.blend->virtual);
2027     blend_state = render_state->cc.blend->virtual;
2028     memset(blend_state, 0, sizeof(*blend_state));
2029     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2030     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2031     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2032     blend_state->blend0.blend_enable = 1;
2033     blend_state->blend1.post_blend_clamp_enable = 1;
2034     blend_state->blend1.pre_blend_clamp_enable = 1;
2035     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2036     dri_bo_unmap(render_state->cc.blend);
2037 }
2038
2039 static void
2040 gen6_subpicture_render_setup_states(VADriverContextP ctx,
2041                                     VASurfaceID surface,
2042                                     short srcx,
2043                                     short srcy,
2044                                     unsigned short srcw,
2045                                     unsigned short srch,
2046                                     short destx,
2047                                     short desty,
2048                                     unsigned short destw,
2049                                     unsigned short desth)
2050 {
2051     VARectangle output_rect;
2052
2053     output_rect.x      = destx;
2054     output_rect.y      = desty;
2055     output_rect.width  = destw;
2056     output_rect.height = desth;
2057
2058     i965_render_dest_surface_state(ctx, 0);
2059     i965_subpic_render_src_surfaces_state(ctx, surface);
2060     i965_render_sampler(ctx);
2061     i965_render_cc_viewport(ctx);
2062     gen6_render_color_calc_state(ctx);
2063     gen6_subpicture_render_blend_state(ctx);
2064     gen6_render_depth_stencil_state(ctx);
2065     i965_subpic_render_upload_vertex(ctx, surface, &output_rect);
2066 }
2067
2068 static void
2069 gen6_render_put_subpicture(VADriverContextP ctx,
2070                            VASurfaceID surface,
2071                            short srcx,
2072                            short srcy,
2073                            unsigned short srcw,
2074                            unsigned short srch,
2075                            short destx,
2076                            short desty,
2077                            unsigned short destw,
2078                            unsigned short desth)
2079 {
2080     struct i965_driver_data *i965 = i965_driver_data(ctx);
2081     struct intel_batchbuffer *batch = i965->batch;
2082     struct object_surface *obj_surface = SURFACE(surface);
2083     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
2084
2085     assert(obj_subpic);
2086     gen6_render_initialize(ctx);
2087     gen6_subpicture_render_setup_states(ctx, surface,
2088                                         srcx, srcy, srcw, srch,
2089                                         destx, desty, destw, desth);
2090     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2091     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
2092     intel_batchbuffer_flush(batch);
2093 }
2094
2095 /*
2096  * global functions
2097  */
2098 void
2099 intel_render_put_surface(VADriverContextP ctx,
2100                         VASurfaceID surface,
2101                         short srcx,
2102                         short srcy,
2103                         unsigned short srcw,
2104                         unsigned short srch,
2105                         short destx,
2106                         short desty,
2107                         unsigned short destw,
2108                         unsigned short desth,
2109                         unsigned int flag)
2110 {
2111     struct i965_driver_data *i965 = i965_driver_data(ctx);
2112
2113     i965_post_processing(ctx, surface,
2114                          srcx, srcy, srcw, srch,
2115                          destx, desty, destw, desth,
2116                          flag);
2117
2118     if (IS_GEN6(i965->intel.device_id))
2119         gen6_render_put_surface(ctx, surface,
2120                                 srcx, srcy, srcw, srch,
2121                                 destx, desty, destw, desth,
2122                                 flag);
2123     else
2124         i965_render_put_surface(ctx, surface,
2125                                 srcx, srcy, srcw, srch,
2126                                 destx, desty, destw, desth,
2127                                 flag);
2128 }
2129
2130 void
2131 intel_render_put_subpicture(VADriverContextP ctx,
2132                            VASurfaceID surface,
2133                            short srcx,
2134                            short srcy,
2135                            unsigned short srcw,
2136                            unsigned short srch,
2137                            short destx,
2138                            short desty,
2139                            unsigned short destw,
2140                            unsigned short desth)
2141 {
2142     struct i965_driver_data *i965 = i965_driver_data(ctx);
2143
2144     if (IS_GEN6(i965->intel.device_id))
2145         gen6_render_put_subpicture(ctx, surface,
2146                                    srcx, srcy, srcw, srch,
2147                                    destx, desty, destw, desth);
2148     else
2149         i965_render_put_subpicture(ctx, surface,
2150                                    srcx, srcy, srcw, srch,
2151                                    destx, desty, destw, desth);
2152 }
2153
2154 Bool 
2155 i965_render_init(VADriverContextP ctx)
2156 {
2157     struct i965_driver_data *i965 = i965_driver_data(ctx);
2158     struct i965_render_state *render_state = &i965->render_state;
2159     int i;
2160
2161     /* kernel */
2162     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
2163                                  sizeof(render_kernels_gen5[0])));
2164     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
2165                                  sizeof(render_kernels_gen6[0])));
2166
2167     if (IS_GEN6(i965->intel.device_id))
2168         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
2169     else if (IS_IRONLAKE(i965->intel.device_id))
2170         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
2171     else
2172         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
2173
2174     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
2175         struct i965_kernel *kernel = &render_state->render_kernels[i];
2176
2177         if (!kernel->size)
2178             continue;
2179
2180         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
2181                                   kernel->name, 
2182                                   kernel->size, 0x1000);
2183         assert(kernel->bo);
2184         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
2185     }
2186
2187     /* constant buffer */
2188     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
2189                       "constant buffer",
2190                       4096, 64);
2191     assert(render_state->curbe.bo);
2192     render_state->curbe.upload = 0;
2193
2194     return True;
2195 }
2196
2197 Bool 
2198 i965_render_terminate(VADriverContextP ctx)
2199 {
2200     int i;
2201     struct i965_driver_data *i965 = i965_driver_data(ctx);
2202     struct i965_render_state *render_state = &i965->render_state;
2203
2204     dri_bo_unreference(render_state->curbe.bo);
2205     render_state->curbe.bo = NULL;
2206
2207     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
2208         struct i965_kernel *kernel = &render_state->render_kernels[i];
2209         
2210         dri_bo_unreference(kernel->bo);
2211         kernel->bo = NULL;
2212     }
2213
2214     dri_bo_unreference(render_state->vb.vertex_buffer);
2215     render_state->vb.vertex_buffer = NULL;
2216     dri_bo_unreference(render_state->vs.state);
2217     render_state->vs.state = NULL;
2218     dri_bo_unreference(render_state->sf.state);
2219     render_state->sf.state = NULL;
2220     dri_bo_unreference(render_state->wm.sampler);
2221     render_state->wm.sampler = NULL;
2222     dri_bo_unreference(render_state->wm.state);
2223     render_state->wm.state = NULL;
2224     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2225     dri_bo_unreference(render_state->cc.viewport);
2226     render_state->cc.viewport = NULL;
2227     dri_bo_unreference(render_state->cc.state);
2228     render_state->cc.state = NULL;
2229     dri_bo_unreference(render_state->cc.blend);
2230     render_state->cc.blend = NULL;
2231     dri_bo_unreference(render_state->cc.depth_stencil);
2232     render_state->cc.depth_stencil = NULL;
2233
2234     if (render_state->draw_region) {
2235         dri_bo_unreference(render_state->draw_region->bo);
2236         free(render_state->draw_region);
2237         render_state->draw_region = NULL;
2238     }
2239
2240     return True;
2241 }
2242