i965_drv_video: Ivybridge PCI IDs
[platform/upstream/libva.git] / i965_drv_video / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38
39 #include <va/va_backend.h>
40 #include "va/x11/va_dricommon.h"
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44 #include "i965_defines.h"
45 #include "i965_drv_video.h"
46 #include "i965_structs.h"
47
48 #include "i965_render.h"
49
50 #define SF_KERNEL_NUM_GRF       16
51 #define SF_MAX_THREADS          1
52
53 static const uint32_t sf_kernel_static[][4] = 
54 {
55 #include "shaders/render/exa_sf.g4b"
56 };
57
58 #define PS_KERNEL_NUM_GRF       32
59 #define PS_MAX_THREADS          32
60
61 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
62
63 static const uint32_t ps_kernel_static[][4] = 
64 {
65 #include "shaders/render/exa_wm_xy.g4b"
66 #include "shaders/render/exa_wm_src_affine.g4b"
67 #include "shaders/render/exa_wm_src_sample_planar.g4b"
68 #include "shaders/render/exa_wm_yuv_rgb.g4b"
69 #include "shaders/render/exa_wm_write.g4b"
70 };
71 static const uint32_t ps_subpic_kernel_static[][4] = 
72 {
73 #include "shaders/render/exa_wm_xy.g4b"
74 #include "shaders/render/exa_wm_src_affine.g4b"
75 #include "shaders/render/exa_wm_src_sample_argb.g4b"
76 #include "shaders/render/exa_wm_write.g4b"
77 };
78
79 /* On IRONLAKE */
80 static const uint32_t sf_kernel_static_gen5[][4] = 
81 {
82 #include "shaders/render/exa_sf.g4b.gen5"
83 };
84
85 static const uint32_t ps_kernel_static_gen5[][4] = 
86 {
87 #include "shaders/render/exa_wm_xy.g4b.gen5"
88 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
89 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
90 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
91 #include "shaders/render/exa_wm_write.g4b.gen5"
92 };
93 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
94 {
95 #include "shaders/render/exa_wm_xy.g4b.gen5"
96 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
97 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
98 #include "shaders/render/exa_wm_write.g4b.gen5"
99 };
100
101 /* programs for Sandybridge */
102 static const uint32_t sf_kernel_static_gen6[][4] = 
103 {
104 };
105
106 static const uint32_t ps_kernel_static_gen6[][4] = {
107 #include "shaders/render/exa_wm_src_affine.g6b"
108 #include "shaders/render/exa_wm_src_sample_planar.g6b"
109 #include "shaders/render/exa_wm_yuv_rgb.g6b"
110 #include "shaders/render/exa_wm_write.g6b"
111 };
112
113 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
114 #include "shaders/render/exa_wm_src_affine.g6b"
115 #include "shaders/render/exa_wm_src_sample_argb.g6b"
116 #include "shaders/render/exa_wm_write.g6b"
117 };
118
119 #define SURFACE_STATE_PADDED_SIZE       ALIGN(sizeof(struct i965_surface_state), 32)
120 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
121 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
122
123 static uint32_t float_to_uint (float f) 
124 {
125     union {
126         uint32_t i; 
127         float f;
128     } x;
129
130     x.f = f;
131     return x.i;
132 }
133
134 enum 
135 {
136     SF_KERNEL = 0,
137     PS_KERNEL,
138     PS_SUBPIC_KERNEL
139 };
140
141 static struct i965_kernel render_kernels_gen4[] = {
142     {
143         "SF",
144         SF_KERNEL,
145         sf_kernel_static,
146         sizeof(sf_kernel_static),
147         NULL
148     },
149     {
150         "PS",
151         PS_KERNEL,
152         ps_kernel_static,
153         sizeof(ps_kernel_static),
154         NULL
155     },
156
157     {
158         "PS_SUBPIC",
159         PS_SUBPIC_KERNEL,
160         ps_subpic_kernel_static,
161         sizeof(ps_subpic_kernel_static),
162         NULL
163     }
164 };
165
166 static struct i965_kernel render_kernels_gen5[] = {
167     {
168         "SF",
169         SF_KERNEL,
170         sf_kernel_static_gen5,
171         sizeof(sf_kernel_static_gen5),
172         NULL
173     },
174     {
175         "PS",
176         PS_KERNEL,
177         ps_kernel_static_gen5,
178         sizeof(ps_kernel_static_gen5),
179         NULL
180     },
181
182     {
183         "PS_SUBPIC",
184         PS_SUBPIC_KERNEL,
185         ps_subpic_kernel_static_gen5,
186         sizeof(ps_subpic_kernel_static_gen5),
187         NULL
188     }
189 };
190
191 static struct i965_kernel render_kernels_gen6[] = {
192     {
193         "SF",
194         SF_KERNEL,
195         sf_kernel_static_gen6,
196         sizeof(sf_kernel_static_gen6),
197         NULL
198     },
199     {
200         "PS",
201         PS_KERNEL,
202         ps_kernel_static_gen6,
203         sizeof(ps_kernel_static_gen6),
204         NULL
205     },
206
207     {
208         "PS_SUBPIC",
209         PS_SUBPIC_KERNEL,
210         ps_subpic_kernel_static_gen6,
211         sizeof(ps_subpic_kernel_static_gen6),
212         NULL
213     }
214 };
215
216 #define URB_VS_ENTRIES        8
217 #define URB_VS_ENTRY_SIZE     1
218
219 #define URB_GS_ENTRIES        0
220 #define URB_GS_ENTRY_SIZE     0
221
222 #define URB_CLIP_ENTRIES      0
223 #define URB_CLIP_ENTRY_SIZE   0
224
225 #define URB_SF_ENTRIES        1
226 #define URB_SF_ENTRY_SIZE     2
227
228 #define URB_CS_ENTRIES        1
229 #define URB_CS_ENTRY_SIZE     1
230
231 static void
232 i965_render_vs_unit(VADriverContextP ctx)
233 {
234     struct i965_driver_data *i965 = i965_driver_data(ctx);
235     struct i965_render_state *render_state = &i965->render_state;
236     struct i965_vs_unit_state *vs_state;
237
238     dri_bo_map(render_state->vs.state, 1);
239     assert(render_state->vs.state->virtual);
240     vs_state = render_state->vs.state->virtual;
241     memset(vs_state, 0, sizeof(*vs_state));
242
243     if (IS_IRONLAKE(i965->intel.device_id))
244         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
245     else
246         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
247
248     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
249     vs_state->vs6.vs_enable = 0;
250     vs_state->vs6.vert_cache_disable = 1;
251     
252     dri_bo_unmap(render_state->vs.state);
253 }
254
255 static void
256 i965_render_sf_unit(VADriverContextP ctx)
257 {
258     struct i965_driver_data *i965 = i965_driver_data(ctx);
259     struct i965_render_state *render_state = &i965->render_state;
260     struct i965_sf_unit_state *sf_state;
261
262     dri_bo_map(render_state->sf.state, 1);
263     assert(render_state->sf.state->virtual);
264     sf_state = render_state->sf.state->virtual;
265     memset(sf_state, 0, sizeof(*sf_state));
266
267     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
268     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
269
270     sf_state->sf1.single_program_flow = 1; /* XXX */
271     sf_state->sf1.binding_table_entry_count = 0;
272     sf_state->sf1.thread_priority = 0;
273     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
274     sf_state->sf1.illegal_op_exception_enable = 1;
275     sf_state->sf1.mask_stack_exception_enable = 1;
276     sf_state->sf1.sw_exception_enable = 1;
277
278     /* scratch space is not used in our kernel */
279     sf_state->thread2.per_thread_scratch_space = 0;
280     sf_state->thread2.scratch_space_base_pointer = 0;
281
282     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
283     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
284     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
285     sf_state->thread3.urb_entry_read_offset = 0;
286     sf_state->thread3.dispatch_grf_start_reg = 3;
287
288     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
289     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
290     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
291     sf_state->thread4.stats_enable = 1;
292
293     sf_state->sf5.viewport_transform = 0; /* skip viewport */
294
295     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
296     sf_state->sf6.scissor = 0;
297
298     sf_state->sf7.trifan_pv = 2;
299
300     sf_state->sf6.dest_org_vbias = 0x8;
301     sf_state->sf6.dest_org_hbias = 0x8;
302
303     dri_bo_emit_reloc(render_state->sf.state,
304                       I915_GEM_DOMAIN_INSTRUCTION, 0,
305                       sf_state->thread0.grf_reg_count << 1,
306                       offsetof(struct i965_sf_unit_state, thread0),
307                       render_state->render_kernels[SF_KERNEL].bo);
308
309     dri_bo_unmap(render_state->sf.state);
310 }
311
312 static void 
313 i965_render_sampler(VADriverContextP ctx)
314 {
315     struct i965_driver_data *i965 = i965_driver_data(ctx);
316     struct i965_render_state *render_state = &i965->render_state;
317     struct i965_sampler_state *sampler_state;
318     int i;
319     
320     assert(render_state->wm.sampler_count > 0);
321     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
322
323     dri_bo_map(render_state->wm.sampler, 1);
324     assert(render_state->wm.sampler->virtual);
325     sampler_state = render_state->wm.sampler->virtual;
326     for (i = 0; i < render_state->wm.sampler_count; i++) {
327         memset(sampler_state, 0, sizeof(*sampler_state));
328         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
329         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
330         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
331         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
332         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
333         sampler_state++;
334     }
335
336     dri_bo_unmap(render_state->wm.sampler);
337 }
338 static void
339 i965_subpic_render_wm_unit(VADriverContextP ctx)
340 {
341     struct i965_driver_data *i965 = i965_driver_data(ctx);
342     struct i965_render_state *render_state = &i965->render_state;
343     struct i965_wm_unit_state *wm_state;
344
345     assert(render_state->wm.sampler);
346
347     dri_bo_map(render_state->wm.state, 1);
348     assert(render_state->wm.state->virtual);
349     wm_state = render_state->wm.state->virtual;
350     memset(wm_state, 0, sizeof(*wm_state));
351
352     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
353     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
354
355     wm_state->thread1.single_program_flow = 1; /* XXX */
356
357     if (IS_IRONLAKE(i965->intel.device_id))
358         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
359     else
360         wm_state->thread1.binding_table_entry_count = 7;
361
362     wm_state->thread2.scratch_space_base_pointer = 0;
363     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
364
365     wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
366     wm_state->thread3.const_urb_entry_read_length = 0;
367     wm_state->thread3.const_urb_entry_read_offset = 0;
368     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
369     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
370
371     wm_state->wm4.stats_enable = 0;
372     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
373
374     if (IS_IRONLAKE(i965->intel.device_id)) {
375         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
376         wm_state->wm5.max_threads = 12 * 6 - 1;
377     } else {
378         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
379         wm_state->wm5.max_threads = 10 * 5 - 1;
380     }
381
382     wm_state->wm5.thread_dispatch_enable = 1;
383     wm_state->wm5.enable_16_pix = 1;
384     wm_state->wm5.enable_8_pix = 0;
385     wm_state->wm5.early_depth_test = 1;
386
387     dri_bo_emit_reloc(render_state->wm.state,
388                       I915_GEM_DOMAIN_INSTRUCTION, 0,
389                       wm_state->thread0.grf_reg_count << 1,
390                       offsetof(struct i965_wm_unit_state, thread0),
391                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
392
393     dri_bo_emit_reloc(render_state->wm.state,
394                       I915_GEM_DOMAIN_INSTRUCTION, 0,
395                       wm_state->wm4.sampler_count << 2,
396                       offsetof(struct i965_wm_unit_state, wm4),
397                       render_state->wm.sampler);
398
399     dri_bo_unmap(render_state->wm.state);
400 }
401
402
403 static void
404 i965_render_wm_unit(VADriverContextP ctx)
405 {
406     struct i965_driver_data *i965 = i965_driver_data(ctx);
407     struct i965_render_state *render_state = &i965->render_state;
408     struct i965_wm_unit_state *wm_state;
409
410     assert(render_state->wm.sampler);
411
412     dri_bo_map(render_state->wm.state, 1);
413     assert(render_state->wm.state->virtual);
414     wm_state = render_state->wm.state->virtual;
415     memset(wm_state, 0, sizeof(*wm_state));
416
417     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
418     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
419
420     wm_state->thread1.single_program_flow = 1; /* XXX */
421
422     if (IS_IRONLAKE(i965->intel.device_id))
423         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
424     else
425         wm_state->thread1.binding_table_entry_count = 7;
426
427     wm_state->thread2.scratch_space_base_pointer = 0;
428     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
429
430     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
431     wm_state->thread3.const_urb_entry_read_length = 1;
432     wm_state->thread3.const_urb_entry_read_offset = 0;
433     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
434     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
435
436     wm_state->wm4.stats_enable = 0;
437     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
438
439     if (IS_IRONLAKE(i965->intel.device_id)) {
440         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
441         wm_state->wm5.max_threads = 12 * 6 - 1;
442     } else {
443         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
444         wm_state->wm5.max_threads = 10 * 5 - 1;
445     }
446
447     wm_state->wm5.thread_dispatch_enable = 1;
448     wm_state->wm5.enable_16_pix = 1;
449     wm_state->wm5.enable_8_pix = 0;
450     wm_state->wm5.early_depth_test = 1;
451
452     dri_bo_emit_reloc(render_state->wm.state,
453                       I915_GEM_DOMAIN_INSTRUCTION, 0,
454                       wm_state->thread0.grf_reg_count << 1,
455                       offsetof(struct i965_wm_unit_state, thread0),
456                       render_state->render_kernels[PS_KERNEL].bo);
457
458     dri_bo_emit_reloc(render_state->wm.state,
459                       I915_GEM_DOMAIN_INSTRUCTION, 0,
460                       wm_state->wm4.sampler_count << 2,
461                       offsetof(struct i965_wm_unit_state, wm4),
462                       render_state->wm.sampler);
463
464     dri_bo_unmap(render_state->wm.state);
465 }
466
467 static void 
468 i965_render_cc_viewport(VADriverContextP ctx)
469 {
470     struct i965_driver_data *i965 = i965_driver_data(ctx);
471     struct i965_render_state *render_state = &i965->render_state;
472     struct i965_cc_viewport *cc_viewport;
473
474     dri_bo_map(render_state->cc.viewport, 1);
475     assert(render_state->cc.viewport->virtual);
476     cc_viewport = render_state->cc.viewport->virtual;
477     memset(cc_viewport, 0, sizeof(*cc_viewport));
478     
479     cc_viewport->min_depth = -1.e35;
480     cc_viewport->max_depth = 1.e35;
481
482     dri_bo_unmap(render_state->cc.viewport);
483 }
484
485 static void 
486 i965_subpic_render_cc_unit(VADriverContextP ctx)
487 {
488     struct i965_driver_data *i965 = i965_driver_data(ctx);
489     struct i965_render_state *render_state = &i965->render_state;
490     struct i965_cc_unit_state *cc_state;
491
492     assert(render_state->cc.viewport);
493
494     dri_bo_map(render_state->cc.state, 1);
495     assert(render_state->cc.state->virtual);
496     cc_state = render_state->cc.state->virtual;
497     memset(cc_state, 0, sizeof(*cc_state));
498
499     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
500     cc_state->cc2.depth_test = 0;       /* disable depth test */
501     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
502     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
503     cc_state->cc3.blend_enable = 1;     /* enable color blend */
504     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
505     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
506     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
507     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
508
509     cc_state->cc5.dither_enable = 0;    /* disable dither */
510     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
511     cc_state->cc5.statistics_enable = 1;
512     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
513     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
514     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
515
516     cc_state->cc6.clamp_post_alpha_blend = 0; 
517     cc_state->cc6.clamp_pre_alpha_blend  =0; 
518     
519     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
520     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
521     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
522     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
523    
524     /*alpha test reference*/
525     cc_state->cc7.alpha_ref.f =0.0 ;
526
527
528     dri_bo_emit_reloc(render_state->cc.state,
529                       I915_GEM_DOMAIN_INSTRUCTION, 0,
530                       0,
531                       offsetof(struct i965_cc_unit_state, cc4),
532                       render_state->cc.viewport);
533
534     dri_bo_unmap(render_state->cc.state);
535 }
536
537
538 static void 
539 i965_render_cc_unit(VADriverContextP ctx)
540 {
541     struct i965_driver_data *i965 = i965_driver_data(ctx);
542     struct i965_render_state *render_state = &i965->render_state;
543     struct i965_cc_unit_state *cc_state;
544
545     assert(render_state->cc.viewport);
546
547     dri_bo_map(render_state->cc.state, 1);
548     assert(render_state->cc.state->virtual);
549     cc_state = render_state->cc.state->virtual;
550     memset(cc_state, 0, sizeof(*cc_state));
551
552     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
553     cc_state->cc2.depth_test = 0;       /* disable depth test */
554     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
555     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
556     cc_state->cc3.blend_enable = 0;     /* disable color blend */
557     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
558     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
559
560     cc_state->cc5.dither_enable = 0;    /* disable dither */
561     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
562     cc_state->cc5.statistics_enable = 1;
563     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
564     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
565     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
566
567     dri_bo_emit_reloc(render_state->cc.state,
568                       I915_GEM_DOMAIN_INSTRUCTION, 0,
569                       0,
570                       offsetof(struct i965_cc_unit_state, cc4),
571                       render_state->cc.viewport);
572
573     dri_bo_unmap(render_state->cc.state);
574 }
575
576 static void
577 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
578 {
579     switch (tiling) {
580     case I915_TILING_NONE:
581         ss->ss3.tiled_surface = 0;
582         ss->ss3.tile_walk = 0;
583         break;
584     case I915_TILING_X:
585         ss->ss3.tiled_surface = 1;
586         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
587         break;
588     case I915_TILING_Y:
589         ss->ss3.tiled_surface = 1;
590         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
591         break;
592     }
593 }
594
595 static void
596 i965_render_src_surface_state(VADriverContextP ctx, 
597                               int index,
598                               dri_bo *region,
599                               unsigned long offset,
600                               int w, int h,
601                               int pitch, int format)
602 {
603     struct i965_driver_data *i965 = i965_driver_data(ctx);  
604     struct i965_render_state *render_state = &i965->render_state;
605     struct i965_surface_state *ss;
606     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
607     unsigned int tiling;
608     unsigned int swizzle;
609
610     assert(index < MAX_RENDER_SURFACES);
611
612     dri_bo_map(ss_bo, 1);
613     assert(ss_bo->virtual);
614     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
615     memset(ss, 0, sizeof(*ss));
616     ss->ss0.surface_type = I965_SURFACE_2D;
617     ss->ss0.surface_format = format;
618     ss->ss0.writedisable_alpha = 0;
619     ss->ss0.writedisable_red = 0;
620     ss->ss0.writedisable_green = 0;
621     ss->ss0.writedisable_blue = 0;
622     ss->ss0.color_blend = 1;
623     ss->ss0.vert_line_stride = 0;
624     ss->ss0.vert_line_stride_ofs = 0;
625     ss->ss0.mipmap_layout_mode = 0;
626     ss->ss0.render_cache_read_mode = 0;
627
628     ss->ss1.base_addr = region->offset + offset;
629
630     ss->ss2.width = w - 1;
631     ss->ss2.height = h - 1;
632     ss->ss2.mip_count = 0;
633     ss->ss2.render_target_rotation = 0;
634
635     ss->ss3.pitch = pitch - 1;
636
637     dri_bo_get_tiling(region, &tiling, &swizzle);
638     i965_render_set_surface_tiling(ss, tiling);
639
640     dri_bo_emit_reloc(ss_bo,
641                       I915_GEM_DOMAIN_SAMPLER, 0,
642                       offset,
643                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
644                       region);
645
646     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
647     dri_bo_unmap(ss_bo);
648     render_state->wm.sampler_count++;
649 }
650
651 static void
652 i965_render_src_surfaces_state(VADriverContextP ctx,
653                               VASurfaceID surface)
654 {
655     struct i965_driver_data *i965 = i965_driver_data(ctx);  
656     struct i965_render_state *render_state = &i965->render_state;
657     struct object_surface *obj_surface;
658     int w, h;
659     int rw, rh;
660     dri_bo *region;
661
662     obj_surface = SURFACE(surface);
663     assert(obj_surface);
664
665     if (obj_surface->pp_out_bo) {
666         w = obj_surface->pp_out_width;
667         h = obj_surface->pp_out_height;
668         rw = obj_surface->orig_pp_out_width;
669         rh = obj_surface->orig_pp_out_height;
670         region = obj_surface->pp_out_bo;
671     } else {
672         w = obj_surface->width;
673         h = obj_surface->height;
674         rw = obj_surface->orig_width;
675         rh = obj_surface->orig_height;
676         region = obj_surface->bo;
677     }
678
679     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, w, I965_SURFACEFORMAT_R8_UNORM);     /* Y */
680     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, w, I965_SURFACEFORMAT_R8_UNORM);
681
682     if (!render_state->inited) {
683         int u3 = 5, u4 = 6, v5 = 3, v6 = 4;
684
685         i965_render_src_surface_state(ctx, u3, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM); /* U */
686         i965_render_src_surface_state(ctx, u4, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
687         i965_render_src_surface_state(ctx, v5, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);     /* V */
688         i965_render_src_surface_state(ctx, v6, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
689     } else {
690         if (render_state->interleaved_uv) {
691             i965_render_src_surface_state(ctx, 3, region, w * h, rw / 2, rh / 2, w, I965_SURFACEFORMAT_R8G8_UNORM); /* UV */
692             i965_render_src_surface_state(ctx, 4, region, w * h, rw / 2, rh / 2, w, I965_SURFACEFORMAT_R8G8_UNORM);
693         } else {
694             int u3 = 3, u4 = 4, v5 = 5, v6 = 6;
695
696             i965_render_src_surface_state(ctx, u3, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM); /* U */
697             i965_render_src_surface_state(ctx, u4, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
698             i965_render_src_surface_state(ctx, v5, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);     /* V */
699             i965_render_src_surface_state(ctx, v6, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
700         }
701     }
702 }
703
704 static void
705 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
706                               VASurfaceID surface)
707 {
708     struct i965_driver_data *i965 = i965_driver_data(ctx);  
709     struct object_surface *obj_surface = SURFACE(surface);
710     int w, h;
711     dri_bo *region;
712     dri_bo *subpic_region;
713     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
714     struct object_image *obj_image = IMAGE(obj_subpic->image);
715     assert(obj_surface);
716     assert(obj_surface->bo);
717     w = obj_surface->width;
718     h = obj_surface->height;
719     region = obj_surface->bo;
720     subpic_region = obj_image->bo;
721     /*subpicture surface*/
722     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
723     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
724 }
725
726 static void
727 i965_render_dest_surface_state(VADriverContextP ctx, int index)
728 {
729     struct i965_driver_data *i965 = i965_driver_data(ctx);  
730     struct i965_render_state *render_state = &i965->render_state;
731     struct intel_region *dest_region = render_state->draw_region;
732     struct i965_surface_state *ss;
733     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
734
735     assert(index < MAX_RENDER_SURFACES);
736
737     dri_bo_map(ss_bo, 1);
738     assert(ss_bo->virtual);
739     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
740     memset(ss, 0, sizeof(*ss));
741
742     ss->ss0.surface_type = I965_SURFACE_2D;
743     ss->ss0.data_return_format = I965_SURFACERETURNFORMAT_FLOAT32;
744
745     if (dest_region->cpp == 2) {
746         ss->ss0.surface_format = I965_SURFACEFORMAT_B5G6R5_UNORM;
747         } else {
748         ss->ss0.surface_format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
749     }
750
751     ss->ss0.writedisable_alpha = 0;
752     ss->ss0.writedisable_red = 0;
753     ss->ss0.writedisable_green = 0;
754     ss->ss0.writedisable_blue = 0;
755     ss->ss0.color_blend = 1;
756     ss->ss0.vert_line_stride = 0;
757     ss->ss0.vert_line_stride_ofs = 0;
758     ss->ss0.mipmap_layout_mode = 0;
759     ss->ss0.render_cache_read_mode = 0;
760
761     ss->ss1.base_addr = dest_region->bo->offset;
762
763     ss->ss2.width = dest_region->width - 1;
764     ss->ss2.height = dest_region->height - 1;
765     ss->ss2.mip_count = 0;
766     ss->ss2.render_target_rotation = 0;
767     ss->ss3.pitch = dest_region->pitch - 1;
768     i965_render_set_surface_tiling(ss, dest_region->tiling);
769
770     dri_bo_emit_reloc(ss_bo,
771                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
772                       0,
773                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
774                       dest_region->bo);
775
776     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
777     dri_bo_unmap(ss_bo);
778 }
779
780 static void 
781 i965_subpic_render_upload_vertex(VADriverContextP ctx,
782                                  VASurfaceID surface,
783                                  const VARectangle *output_rect)
784 {    
785     struct i965_driver_data  *i965         = i965_driver_data(ctx);
786     struct i965_render_state *render_state = &i965->render_state;
787     struct object_surface    *obj_surface  = SURFACE(surface);
788     struct object_subpic     *obj_subpic   = SUBPIC(obj_surface->subpic);
789
790     const float sx = (float)output_rect->width  / (float)obj_surface->orig_width;
791     const float sy = (float)output_rect->height / (float)obj_surface->orig_height;
792     float *vb, tx1, tx2, ty1, ty2, x1, x2, y1, y2;
793     int i = 0;
794
795     VARectangle dst_rect;
796     dst_rect.x      = output_rect->x + sx * (float)obj_subpic->dst_rect.x;
797     dst_rect.y      = output_rect->y + sx * (float)obj_subpic->dst_rect.y;
798     dst_rect.width  = sx * (float)obj_subpic->dst_rect.width;
799     dst_rect.height = sy * (float)obj_subpic->dst_rect.height;
800
801     dri_bo_map(render_state->vb.vertex_buffer, 1);
802     assert(render_state->vb.vertex_buffer->virtual);
803     vb = render_state->vb.vertex_buffer->virtual;
804
805     tx1 = (float)obj_subpic->src_rect.x / (float)obj_subpic->width;
806     ty1 = (float)obj_subpic->src_rect.y / (float)obj_subpic->height;
807     tx2 = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / (float)obj_subpic->width;
808     ty2 = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / (float)obj_subpic->height;
809
810     x1 = (float)dst_rect.x;
811     y1 = (float)dst_rect.y;
812     x2 = (float)(dst_rect.x + dst_rect.width);
813     y2 = (float)(dst_rect.y + dst_rect.height);
814
815     vb[i++] = tx2;
816     vb[i++] = ty2;
817     vb[i++] = x2;
818     vb[i++] = y2;
819
820     vb[i++] = tx1;
821     vb[i++] = ty2;
822     vb[i++] = x1;
823     vb[i++] = y2;
824
825     vb[i++] = tx1;
826     vb[i++] = ty1;
827     vb[i++] = x1;
828     vb[i++] = y1;
829     dri_bo_unmap(render_state->vb.vertex_buffer);
830 }
831
832 static void 
833 i965_render_upload_vertex(VADriverContextP ctx,
834                           VASurfaceID surface,
835                           short srcx,
836                           short srcy,
837                           unsigned short srcw,
838                           unsigned short srch,
839                           short destx,
840                           short desty,
841                           unsigned short destw,
842                           unsigned short desth)
843 {
844     struct i965_driver_data *i965 = i965_driver_data(ctx);
845     struct i965_render_state *render_state = &i965->render_state;
846     struct intel_region *dest_region = render_state->draw_region;
847     struct object_surface *obj_surface;
848     float *vb;
849
850     float u1, v1, u2, v2;
851     int i, width, height;
852     int box_x1 = dest_region->x + destx;
853     int box_y1 = dest_region->y + desty;
854     int box_x2 = box_x1 + destw;
855     int box_y2 = box_y1 + desth;
856
857     obj_surface = SURFACE(surface);
858     assert(surface);
859     width = obj_surface->orig_width;
860     height = obj_surface->orig_height;
861
862     u1 = (float)srcx / width;
863     v1 = (float)srcy / height;
864     u2 = (float)(srcx + srcw) / width;
865     v2 = (float)(srcy + srch) / height;
866
867     dri_bo_map(render_state->vb.vertex_buffer, 1);
868     assert(render_state->vb.vertex_buffer->virtual);
869     vb = render_state->vb.vertex_buffer->virtual;
870
871     i = 0;
872     vb[i++] = u2;
873     vb[i++] = v2;
874     vb[i++] = (float)box_x2;
875     vb[i++] = (float)box_y2;
876     
877     vb[i++] = u1;
878     vb[i++] = v2;
879     vb[i++] = (float)box_x1;
880     vb[i++] = (float)box_y2;
881
882     vb[i++] = u1;
883     vb[i++] = v1;
884     vb[i++] = (float)box_x1;
885     vb[i++] = (float)box_y1;
886
887     dri_bo_unmap(render_state->vb.vertex_buffer);
888 }
889
890 static void
891 i965_render_upload_constants(VADriverContextP ctx)
892 {
893     struct i965_driver_data *i965 = i965_driver_data(ctx);
894     struct i965_render_state *render_state = &i965->render_state;
895     unsigned short *constant_buffer;
896
897     if (render_state->curbe.upload)
898         return;
899
900     dri_bo_map(render_state->curbe.bo, 1);
901     assert(render_state->curbe.bo->virtual);
902     constant_buffer = render_state->curbe.bo->virtual;
903
904     if (render_state->interleaved_uv)
905         *constant_buffer = 1;
906     else
907         *constant_buffer = 0;
908
909     dri_bo_unmap(render_state->curbe.bo);
910     render_state->curbe.upload = 1;
911 }
912
913 static void
914 i965_surface_render_state_setup(VADriverContextP ctx,
915                         VASurfaceID surface,
916                         short srcx,
917                         short srcy,
918                         unsigned short srcw,
919                         unsigned short srch,
920                         short destx,
921                         short desty,
922                         unsigned short destw,
923                         unsigned short desth)
924 {
925     i965_render_vs_unit(ctx);
926     i965_render_sf_unit(ctx);
927     i965_render_dest_surface_state(ctx, 0);
928     i965_render_src_surfaces_state(ctx, surface);
929     i965_render_sampler(ctx);
930     i965_render_wm_unit(ctx);
931     i965_render_cc_viewport(ctx);
932     i965_render_cc_unit(ctx);
933     i965_render_upload_vertex(ctx, surface,
934                               srcx, srcy, srcw, srch,
935                               destx, desty, destw, desth);
936     i965_render_upload_constants(ctx);
937 }
938 static void
939 i965_subpic_render_state_setup(VADriverContextP ctx,
940                         VASurfaceID surface,
941                         short srcx,
942                         short srcy,
943                         unsigned short srcw,
944                         unsigned short srch,
945                         short destx,
946                         short desty,
947                         unsigned short destw,
948                         unsigned short desth)
949 {
950     i965_render_vs_unit(ctx);
951     i965_render_sf_unit(ctx);
952     i965_render_dest_surface_state(ctx, 0);
953     i965_subpic_render_src_surfaces_state(ctx, surface);
954     i965_render_sampler(ctx);
955     i965_subpic_render_wm_unit(ctx);
956     i965_render_cc_viewport(ctx);
957     i965_subpic_render_cc_unit(ctx);
958
959     VARectangle output_rect;
960     output_rect.x      = destx;
961     output_rect.y      = desty;
962     output_rect.width  = destw;
963     output_rect.height = desth;
964     i965_subpic_render_upload_vertex(ctx, surface, &output_rect);
965 }
966
967
968 static void
969 i965_render_pipeline_select(VADriverContextP ctx)
970 {
971     struct i965_driver_data *i965 = i965_driver_data(ctx);
972     struct intel_batchbuffer *batch = i965->batch;
973  
974     BEGIN_BATCH(batch, 1);
975     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
976     ADVANCE_BATCH(batch);
977 }
978
979 static void
980 i965_render_state_sip(VADriverContextP ctx)
981 {
982     struct i965_driver_data *i965 = i965_driver_data(ctx);
983     struct intel_batchbuffer *batch = i965->batch;
984
985     BEGIN_BATCH(batch, 2);
986     OUT_BATCH(batch, CMD_STATE_SIP | 0);
987     OUT_BATCH(batch, 0);
988     ADVANCE_BATCH(batch);
989 }
990
991 static void
992 i965_render_state_base_address(VADriverContextP ctx)
993 {
994     struct i965_driver_data *i965 = i965_driver_data(ctx);
995     struct intel_batchbuffer *batch = i965->batch;
996     struct i965_render_state *render_state = &i965->render_state;
997
998     if (IS_IRONLAKE(i965->intel.device_id)) {
999         BEGIN_BATCH(batch, 8);
1000         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1001         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1002         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1003         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1004         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1005         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1006         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1007         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1008         ADVANCE_BATCH(batch);
1009     } else {
1010         BEGIN_BATCH(batch, 6);
1011         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1012         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1013         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1014         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1015         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1016         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1017         ADVANCE_BATCH(batch);
1018     }
1019 }
1020
1021 static void
1022 i965_render_binding_table_pointers(VADriverContextP ctx)
1023 {
1024     struct i965_driver_data *i965 = i965_driver_data(ctx);
1025     struct intel_batchbuffer *batch = i965->batch;
1026
1027     BEGIN_BATCH(batch, 6);
1028     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1029     OUT_BATCH(batch, 0); /* vs */
1030     OUT_BATCH(batch, 0); /* gs */
1031     OUT_BATCH(batch, 0); /* clip */
1032     OUT_BATCH(batch, 0); /* sf */
1033     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1034     ADVANCE_BATCH(batch);
1035 }
1036
1037 static void 
1038 i965_render_constant_color(VADriverContextP ctx)
1039 {
1040     struct i965_driver_data *i965 = i965_driver_data(ctx);
1041     struct intel_batchbuffer *batch = i965->batch;
1042
1043     BEGIN_BATCH(batch, 5);
1044     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1045     OUT_BATCH(batch, float_to_uint(1.0));
1046     OUT_BATCH(batch, float_to_uint(0.0));
1047     OUT_BATCH(batch, float_to_uint(1.0));
1048     OUT_BATCH(batch, float_to_uint(1.0));
1049     ADVANCE_BATCH(batch);
1050 }
1051
1052 static void
1053 i965_render_pipelined_pointers(VADriverContextP ctx)
1054 {
1055     struct i965_driver_data *i965 = i965_driver_data(ctx);
1056     struct intel_batchbuffer *batch = i965->batch;
1057     struct i965_render_state *render_state = &i965->render_state;
1058
1059     BEGIN_BATCH(batch, 7);
1060     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1061     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1062     OUT_BATCH(batch, 0);  /* disable GS */
1063     OUT_BATCH(batch, 0);  /* disable CLIP */
1064     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1065     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1066     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1067     ADVANCE_BATCH(batch);
1068 }
1069
1070 static void
1071 i965_render_urb_layout(VADriverContextP ctx)
1072 {
1073     struct i965_driver_data *i965 = i965_driver_data(ctx);
1074     struct intel_batchbuffer *batch = i965->batch;
1075     int urb_vs_start, urb_vs_size;
1076     int urb_gs_start, urb_gs_size;
1077     int urb_clip_start, urb_clip_size;
1078     int urb_sf_start, urb_sf_size;
1079     int urb_cs_start, urb_cs_size;
1080
1081     urb_vs_start = 0;
1082     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1083     urb_gs_start = urb_vs_start + urb_vs_size;
1084     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1085     urb_clip_start = urb_gs_start + urb_gs_size;
1086     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1087     urb_sf_start = urb_clip_start + urb_clip_size;
1088     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1089     urb_cs_start = urb_sf_start + urb_sf_size;
1090     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1091
1092     BEGIN_BATCH(batch, 3);
1093     OUT_BATCH(batch, 
1094               CMD_URB_FENCE |
1095               UF0_CS_REALLOC |
1096               UF0_SF_REALLOC |
1097               UF0_CLIP_REALLOC |
1098               UF0_GS_REALLOC |
1099               UF0_VS_REALLOC |
1100               1);
1101     OUT_BATCH(batch, 
1102               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1103               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1104               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1105     OUT_BATCH(batch,
1106               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1107               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1108     ADVANCE_BATCH(batch);
1109 }
1110
1111 static void 
1112 i965_render_cs_urb_layout(VADriverContextP ctx)
1113 {
1114     struct i965_driver_data *i965 = i965_driver_data(ctx);
1115     struct intel_batchbuffer *batch = i965->batch;
1116
1117     BEGIN_BATCH(batch, 2);
1118     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1119     OUT_BATCH(batch,
1120               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1121               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1122     ADVANCE_BATCH(batch);
1123 }
1124
1125 static void
1126 i965_render_constant_buffer(VADriverContextP ctx)
1127 {
1128     struct i965_driver_data *i965 = i965_driver_data(ctx);
1129     struct intel_batchbuffer *batch = i965->batch;
1130     struct i965_render_state *render_state = &i965->render_state;
1131
1132     BEGIN_BATCH(batch, 2);
1133     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1134     OUT_RELOC(batch, render_state->curbe.bo,
1135               I915_GEM_DOMAIN_INSTRUCTION, 0,
1136               URB_CS_ENTRY_SIZE - 1);
1137     ADVANCE_BATCH(batch);    
1138 }
1139
1140 static void
1141 i965_render_drawing_rectangle(VADriverContextP ctx)
1142 {
1143     struct i965_driver_data *i965 = i965_driver_data(ctx);
1144     struct intel_batchbuffer *batch = i965->batch;
1145     struct i965_render_state *render_state = &i965->render_state;
1146     struct intel_region *dest_region = render_state->draw_region;
1147
1148     BEGIN_BATCH(batch, 4);
1149     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1150     OUT_BATCH(batch, 0x00000000);
1151     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1152     OUT_BATCH(batch, 0x00000000);         
1153     ADVANCE_BATCH(batch);
1154 }
1155
1156 static void
1157 i965_render_vertex_elements(VADriverContextP ctx)
1158 {
1159     struct i965_driver_data *i965 = i965_driver_data(ctx);
1160     struct intel_batchbuffer *batch = i965->batch;
1161
1162     if (IS_IRONLAKE(i965->intel.device_id)) {
1163         BEGIN_BATCH(batch, 5);
1164         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1165         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1166         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1167                   VE0_VALID |
1168                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1169                   (0 << VE0_OFFSET_SHIFT));
1170         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1171                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1172                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1173                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1174         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1175         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1176                   VE0_VALID |
1177                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1178                   (8 << VE0_OFFSET_SHIFT));
1179         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1180                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1181                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1182                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1183         ADVANCE_BATCH(batch);
1184     } else {
1185         BEGIN_BATCH(batch, 5);
1186         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1187         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1188         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1189                   VE0_VALID |
1190                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1191                   (0 << VE0_OFFSET_SHIFT));
1192         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1193                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1194                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1195                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1196                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1197         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1198         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1199                   VE0_VALID |
1200                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1201                   (8 << VE0_OFFSET_SHIFT));
1202         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1203                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1204                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1205                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1206                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1207         ADVANCE_BATCH(batch);
1208     }
1209 }
1210
1211 static void
1212 i965_render_upload_image_palette(
1213     VADriverContextP ctx,
1214     VAImageID        image_id,
1215     unsigned int     alpha
1216 )
1217 {
1218     struct i965_driver_data *i965 = i965_driver_data(ctx);
1219     struct intel_batchbuffer *batch = i965->batch;
1220     unsigned int i;
1221
1222     struct object_image *obj_image = IMAGE(image_id);
1223     assert(obj_image);
1224
1225     if (obj_image->image.num_palette_entries == 0)
1226         return;
1227
1228     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1229     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1230     /*fill palette*/
1231     //int32_t out[16]; //0-23:color 23-31:alpha
1232     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1233         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1234     ADVANCE_BATCH(batch);
1235 }
1236
1237 static void
1238 i965_render_startup(VADriverContextP ctx)
1239 {
1240     struct i965_driver_data *i965 = i965_driver_data(ctx);
1241     struct intel_batchbuffer *batch = i965->batch;
1242     struct i965_render_state *render_state = &i965->render_state;
1243
1244     BEGIN_BATCH(batch, 11);
1245     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1246     OUT_BATCH(batch, 
1247               (0 << VB0_BUFFER_INDEX_SHIFT) |
1248               VB0_VERTEXDATA |
1249               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1250     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1251
1252     if (IS_IRONLAKE(i965->intel.device_id))
1253         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1254     else
1255         OUT_BATCH(batch, 3);
1256
1257     OUT_BATCH(batch, 0);
1258
1259     OUT_BATCH(batch, 
1260               CMD_3DPRIMITIVE |
1261               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1262               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1263               (0 << 9) |
1264               4);
1265     OUT_BATCH(batch, 3); /* vertex count per instance */
1266     OUT_BATCH(batch, 0); /* start vertex offset */
1267     OUT_BATCH(batch, 1); /* single instance */
1268     OUT_BATCH(batch, 0); /* start instance location */
1269     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1270     ADVANCE_BATCH(batch);
1271 }
1272
1273 static void 
1274 i965_clear_dest_region(VADriverContextP ctx)
1275 {
1276     struct i965_driver_data *i965 = i965_driver_data(ctx);
1277     struct intel_batchbuffer *batch = i965->batch;
1278     struct i965_render_state *render_state = &i965->render_state;
1279     struct intel_region *dest_region = render_state->draw_region;
1280     unsigned int blt_cmd, br13;
1281     int pitch;
1282
1283     blt_cmd = XY_COLOR_BLT_CMD;
1284     br13 = 0xf0 << 16;
1285     pitch = dest_region->pitch;
1286
1287     if (dest_region->cpp == 4) {
1288         br13 |= BR13_8888;
1289         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1290     } else {
1291         assert(dest_region->cpp == 2);
1292         br13 |= BR13_565;
1293     }
1294
1295     if (dest_region->tiling != I915_TILING_NONE) {
1296         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1297         pitch /= 4;
1298     }
1299
1300     br13 |= pitch;
1301
1302     if (IS_GEN6(i965->intel.device_id) ||
1303         IS_GEN7(i965->intel.device_id)) {
1304         intel_batchbuffer_start_atomic_blt(batch, 24);
1305         BEGIN_BLT_BATCH(batch, 6);
1306     } else {
1307         intel_batchbuffer_start_atomic(batch, 24);
1308         BEGIN_BATCH(batch, 6);
1309     }
1310
1311     OUT_BATCH(batch, blt_cmd);
1312     OUT_BATCH(batch, br13);
1313     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1314     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1315               (dest_region->x + dest_region->width));
1316     OUT_RELOC(batch, dest_region->bo, 
1317               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1318               0);
1319     OUT_BATCH(batch, 0x0);
1320     ADVANCE_BATCH(batch);
1321     intel_batchbuffer_end_atomic(batch);
1322 }
1323
1324 static void
1325 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1326 {
1327     struct i965_driver_data *i965 = i965_driver_data(ctx);
1328     struct intel_batchbuffer *batch = i965->batch;
1329
1330     i965_clear_dest_region(ctx);
1331     intel_batchbuffer_start_atomic(batch, 0x1000);
1332     intel_batchbuffer_emit_mi_flush(batch);
1333     i965_render_pipeline_select(ctx);
1334     i965_render_state_sip(ctx);
1335     i965_render_state_base_address(ctx);
1336     i965_render_binding_table_pointers(ctx);
1337     i965_render_constant_color(ctx);
1338     i965_render_pipelined_pointers(ctx);
1339     i965_render_urb_layout(ctx);
1340     i965_render_cs_urb_layout(ctx);
1341     i965_render_constant_buffer(ctx);
1342     i965_render_drawing_rectangle(ctx);
1343     i965_render_vertex_elements(ctx);
1344     i965_render_startup(ctx);
1345     intel_batchbuffer_end_atomic(batch);
1346 }
1347
1348 static void
1349 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1350 {
1351     struct i965_driver_data *i965 = i965_driver_data(ctx);
1352     struct intel_batchbuffer *batch = i965->batch;
1353
1354     intel_batchbuffer_start_atomic(batch, 0x1000);
1355     intel_batchbuffer_emit_mi_flush(batch);
1356     i965_render_pipeline_select(ctx);
1357     i965_render_state_sip(ctx);
1358     i965_render_state_base_address(ctx);
1359     i965_render_binding_table_pointers(ctx);
1360     i965_render_constant_color(ctx);
1361     i965_render_pipelined_pointers(ctx);
1362     i965_render_urb_layout(ctx);
1363     i965_render_cs_urb_layout(ctx);
1364     i965_render_drawing_rectangle(ctx);
1365     i965_render_vertex_elements(ctx);
1366     i965_render_startup(ctx);
1367     intel_batchbuffer_end_atomic(batch);
1368 }
1369
1370
1371 static void 
1372 i965_render_initialize(VADriverContextP ctx)
1373 {
1374     struct i965_driver_data *i965 = i965_driver_data(ctx);
1375     struct i965_render_state *render_state = &i965->render_state;
1376     dri_bo *bo;
1377
1378     /* VERTEX BUFFER */
1379     dri_bo_unreference(render_state->vb.vertex_buffer);
1380     bo = dri_bo_alloc(i965->intel.bufmgr,
1381                       "vertex buffer",
1382                       4096,
1383                       4096);
1384     assert(bo);
1385     render_state->vb.vertex_buffer = bo;
1386
1387     /* VS */
1388     dri_bo_unreference(render_state->vs.state);
1389     bo = dri_bo_alloc(i965->intel.bufmgr,
1390                       "vs state",
1391                       sizeof(struct i965_vs_unit_state),
1392                       64);
1393     assert(bo);
1394     render_state->vs.state = bo;
1395
1396     /* GS */
1397     /* CLIP */
1398     /* SF */
1399     dri_bo_unreference(render_state->sf.state);
1400     bo = dri_bo_alloc(i965->intel.bufmgr,
1401                       "sf state",
1402                       sizeof(struct i965_sf_unit_state),
1403                       64);
1404     assert(bo);
1405     render_state->sf.state = bo;
1406
1407     /* WM */
1408     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1409     bo = dri_bo_alloc(i965->intel.bufmgr,
1410                       "surface state & binding table",
1411                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1412                       4096);
1413     assert(bo);
1414     render_state->wm.surface_state_binding_table_bo = bo;
1415
1416     dri_bo_unreference(render_state->wm.sampler);
1417     bo = dri_bo_alloc(i965->intel.bufmgr,
1418                       "sampler state",
1419                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1420                       64);
1421     assert(bo);
1422     render_state->wm.sampler = bo;
1423     render_state->wm.sampler_count = 0;
1424
1425     dri_bo_unreference(render_state->wm.state);
1426     bo = dri_bo_alloc(i965->intel.bufmgr,
1427                       "wm state",
1428                       sizeof(struct i965_wm_unit_state),
1429                       64);
1430     assert(bo);
1431     render_state->wm.state = bo;
1432
1433     /* COLOR CALCULATOR */
1434     dri_bo_unreference(render_state->cc.state);
1435     bo = dri_bo_alloc(i965->intel.bufmgr,
1436                       "color calc state",
1437                       sizeof(struct i965_cc_unit_state),
1438                       64);
1439     assert(bo);
1440     render_state->cc.state = bo;
1441
1442     dri_bo_unreference(render_state->cc.viewport);
1443     bo = dri_bo_alloc(i965->intel.bufmgr,
1444                       "cc viewport",
1445                       sizeof(struct i965_cc_viewport),
1446                       64);
1447     assert(bo);
1448     render_state->cc.viewport = bo;
1449 }
1450
1451 static void
1452 i965_render_put_surface(VADriverContextP ctx,
1453                         VASurfaceID surface,
1454                         short srcx,
1455                         short srcy,
1456                         unsigned short srcw,
1457                         unsigned short srch,
1458                         short destx,
1459                         short desty,
1460                         unsigned short destw,
1461                         unsigned short desth,
1462                         unsigned int flag)
1463 {
1464     struct i965_driver_data *i965 = i965_driver_data(ctx);
1465     struct intel_batchbuffer *batch = i965->batch;
1466
1467     i965_render_initialize(ctx);
1468     i965_surface_render_state_setup(ctx, surface,
1469                             srcx, srcy, srcw, srch,
1470                             destx, desty, destw, desth);
1471     i965_surface_render_pipeline_setup(ctx);
1472     intel_batchbuffer_flush(batch);
1473 }
1474
1475 static void
1476 i965_render_put_subpicture(VADriverContextP ctx,
1477                            VASurfaceID surface,
1478                            short srcx,
1479                            short srcy,
1480                            unsigned short srcw,
1481                            unsigned short srch,
1482                            short destx,
1483                            short desty,
1484                            unsigned short destw,
1485                            unsigned short desth)
1486 {
1487     struct i965_driver_data *i965 = i965_driver_data(ctx);
1488     struct intel_batchbuffer *batch = i965->batch;
1489     struct object_surface *obj_surface = SURFACE(surface);
1490     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
1491
1492     assert(obj_subpic);
1493
1494     i965_render_initialize(ctx);
1495     i965_subpic_render_state_setup(ctx, surface,
1496                                    srcx, srcy, srcw, srch,
1497                                    destx, desty, destw, desth);
1498     i965_subpic_render_pipeline_setup(ctx);
1499     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
1500     intel_batchbuffer_flush(batch);
1501 }
1502
1503 /*
1504  * for GEN6+
1505  */
1506 static void 
1507 gen6_render_initialize(VADriverContextP ctx)
1508 {
1509     struct i965_driver_data *i965 = i965_driver_data(ctx);
1510     struct i965_render_state *render_state = &i965->render_state;
1511     dri_bo *bo;
1512
1513     /* VERTEX BUFFER */
1514     dri_bo_unreference(render_state->vb.vertex_buffer);
1515     bo = dri_bo_alloc(i965->intel.bufmgr,
1516                       "vertex buffer",
1517                       4096,
1518                       4096);
1519     assert(bo);
1520     render_state->vb.vertex_buffer = bo;
1521
1522     /* WM */
1523     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1524     bo = dri_bo_alloc(i965->intel.bufmgr,
1525                       "surface state & binding table",
1526                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1527                       4096);
1528     assert(bo);
1529     render_state->wm.surface_state_binding_table_bo = bo;
1530
1531     dri_bo_unreference(render_state->wm.sampler);
1532     bo = dri_bo_alloc(i965->intel.bufmgr,
1533                       "sampler state",
1534                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1535                       4096);
1536     assert(bo);
1537     render_state->wm.sampler = bo;
1538     render_state->wm.sampler_count = 0;
1539
1540     /* COLOR CALCULATOR */
1541     dri_bo_unreference(render_state->cc.state);
1542     bo = dri_bo_alloc(i965->intel.bufmgr,
1543                       "color calc state",
1544                       sizeof(struct gen6_color_calc_state),
1545                       4096);
1546     assert(bo);
1547     render_state->cc.state = bo;
1548
1549     /* CC VIEWPORT */
1550     dri_bo_unreference(render_state->cc.viewport);
1551     bo = dri_bo_alloc(i965->intel.bufmgr,
1552                       "cc viewport",
1553                       sizeof(struct i965_cc_viewport),
1554                       4096);
1555     assert(bo);
1556     render_state->cc.viewport = bo;
1557
1558     /* BLEND STATE */
1559     dri_bo_unreference(render_state->cc.blend);
1560     bo = dri_bo_alloc(i965->intel.bufmgr,
1561                       "blend state",
1562                       sizeof(struct gen6_blend_state),
1563                       4096);
1564     assert(bo);
1565     render_state->cc.blend = bo;
1566
1567     /* DEPTH & STENCIL STATE */
1568     dri_bo_unreference(render_state->cc.depth_stencil);
1569     bo = dri_bo_alloc(i965->intel.bufmgr,
1570                       "depth & stencil state",
1571                       sizeof(struct gen6_depth_stencil_state),
1572                       4096);
1573     assert(bo);
1574     render_state->cc.depth_stencil = bo;
1575 }
1576
1577 static void
1578 gen6_render_color_calc_state(VADriverContextP ctx)
1579 {
1580     struct i965_driver_data *i965 = i965_driver_data(ctx);
1581     struct i965_render_state *render_state = &i965->render_state;
1582     struct gen6_color_calc_state *color_calc_state;
1583     
1584     dri_bo_map(render_state->cc.state, 1);
1585     assert(render_state->cc.state->virtual);
1586     color_calc_state = render_state->cc.state->virtual;
1587     memset(color_calc_state, 0, sizeof(*color_calc_state));
1588     color_calc_state->constant_r = 1.0;
1589     color_calc_state->constant_g = 0.0;
1590     color_calc_state->constant_b = 1.0;
1591     color_calc_state->constant_a = 1.0;
1592     dri_bo_unmap(render_state->cc.state);
1593 }
1594
1595 static void
1596 gen6_render_blend_state(VADriverContextP ctx)
1597 {
1598     struct i965_driver_data *i965 = i965_driver_data(ctx);
1599     struct i965_render_state *render_state = &i965->render_state;
1600     struct gen6_blend_state *blend_state;
1601     
1602     dri_bo_map(render_state->cc.blend, 1);
1603     assert(render_state->cc.blend->virtual);
1604     blend_state = render_state->cc.blend->virtual;
1605     memset(blend_state, 0, sizeof(*blend_state));
1606     blend_state->blend1.logic_op_enable = 1;
1607     blend_state->blend1.logic_op_func = 0xc;
1608     dri_bo_unmap(render_state->cc.blend);
1609 }
1610
1611 static void
1612 gen6_render_depth_stencil_state(VADriverContextP ctx)
1613 {
1614     struct i965_driver_data *i965 = i965_driver_data(ctx);
1615     struct i965_render_state *render_state = &i965->render_state;
1616     struct gen6_depth_stencil_state *depth_stencil_state;
1617     
1618     dri_bo_map(render_state->cc.depth_stencil, 1);
1619     assert(render_state->cc.depth_stencil->virtual);
1620     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1621     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1622     dri_bo_unmap(render_state->cc.depth_stencil);
1623 }
1624
1625 static void
1626 gen6_render_setup_states(VADriverContextP ctx,
1627                          VASurfaceID surface,
1628                          short srcx,
1629                          short srcy,
1630                          unsigned short srcw,
1631                          unsigned short srch,
1632                          short destx,
1633                          short desty,
1634                          unsigned short destw,
1635                          unsigned short desth)
1636 {
1637     i965_render_dest_surface_state(ctx, 0);
1638     i965_render_src_surfaces_state(ctx, surface);
1639     i965_render_sampler(ctx);
1640     i965_render_cc_viewport(ctx);
1641     gen6_render_color_calc_state(ctx);
1642     gen6_render_blend_state(ctx);
1643     gen6_render_depth_stencil_state(ctx);
1644     i965_render_upload_constants(ctx);
1645     i965_render_upload_vertex(ctx, surface,
1646                               srcx, srcy, srcw, srch,
1647                               destx, desty, destw, desth);
1648 }
1649
1650 static void
1651 gen6_emit_invarient_states(VADriverContextP ctx)
1652 {
1653     struct i965_driver_data *i965 = i965_driver_data(ctx);
1654     struct intel_batchbuffer *batch = i965->batch;
1655
1656     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1657
1658     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1659     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1660               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1661     OUT_BATCH(batch, 0);
1662
1663     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1664     OUT_BATCH(batch, 1);
1665
1666     /* Set system instruction pointer */
1667     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1668     OUT_BATCH(batch, 0);
1669 }
1670
1671 static void
1672 gen6_emit_state_base_address(VADriverContextP ctx)
1673 {
1674     struct i965_driver_data *i965 = i965_driver_data(ctx);
1675     struct intel_batchbuffer *batch = i965->batch;
1676     struct i965_render_state *render_state = &i965->render_state;
1677
1678     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1679     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
1680     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1681     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1682     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1683     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
1684     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
1685     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1686     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1687     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1688 }
1689
1690 static void
1691 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1692 {
1693     struct i965_driver_data *i965 = i965_driver_data(ctx);
1694     struct intel_batchbuffer *batch = i965->batch;
1695     struct i965_render_state *render_state = &i965->render_state;
1696
1697     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1698               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1699               (4 - 2));
1700     OUT_BATCH(batch, 0);
1701     OUT_BATCH(batch, 0);
1702     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1703 }
1704
1705 static void
1706 gen6_emit_urb(VADriverContextP ctx)
1707 {
1708     struct i965_driver_data *i965 = i965_driver_data(ctx);
1709     struct intel_batchbuffer *batch = i965->batch;
1710
1711     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
1712     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1713               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1714     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1715               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1716 }
1717
1718 static void
1719 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1720 {
1721     struct i965_driver_data *i965 = i965_driver_data(ctx);
1722     struct intel_batchbuffer *batch = i965->batch;
1723     struct i965_render_state *render_state = &i965->render_state;
1724
1725     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1726     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1727     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1728     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1729 }
1730
1731 static void
1732 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1733 {
1734     struct i965_driver_data *i965 = i965_driver_data(ctx);
1735     struct intel_batchbuffer *batch = i965->batch;
1736     struct i965_render_state *render_state = &i965->render_state;
1737
1738     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1739               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1740               (4 - 2));
1741     OUT_BATCH(batch, 0); /* VS */
1742     OUT_BATCH(batch, 0); /* GS */
1743     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1744 }
1745
1746 static void
1747 gen6_emit_binding_table(VADriverContextP ctx)
1748 {
1749     struct i965_driver_data *i965 = i965_driver_data(ctx);
1750     struct intel_batchbuffer *batch = i965->batch;
1751
1752     /* Binding table pointers */
1753     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
1754               GEN6_BINDING_TABLE_MODIFY_PS |
1755               (4 - 2));
1756     OUT_BATCH(batch, 0);                /* vs */
1757     OUT_BATCH(batch, 0);                /* gs */
1758     /* Only the PS uses the binding table */
1759     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1760 }
1761
1762 static void
1763 gen6_emit_depth_buffer_state(VADriverContextP ctx)
1764 {
1765     struct i965_driver_data *i965 = i965_driver_data(ctx);
1766     struct intel_batchbuffer *batch = i965->batch;
1767
1768     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
1769     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
1770               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
1771     OUT_BATCH(batch, 0);
1772     OUT_BATCH(batch, 0);
1773     OUT_BATCH(batch, 0);
1774     OUT_BATCH(batch, 0);
1775     OUT_BATCH(batch, 0);
1776
1777     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
1778     OUT_BATCH(batch, 0);
1779 }
1780
1781 static void
1782 gen6_emit_drawing_rectangle(VADriverContextP ctx)
1783 {
1784     i965_render_drawing_rectangle(ctx);
1785 }
1786
1787 static void 
1788 gen6_emit_vs_state(VADriverContextP ctx)
1789 {
1790     struct i965_driver_data *i965 = i965_driver_data(ctx);
1791     struct intel_batchbuffer *batch = i965->batch;
1792
1793     /* disable VS constant buffer */
1794     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
1795     OUT_BATCH(batch, 0);
1796     OUT_BATCH(batch, 0);
1797     OUT_BATCH(batch, 0);
1798     OUT_BATCH(batch, 0);
1799         
1800     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
1801     OUT_BATCH(batch, 0); /* without VS kernel */
1802     OUT_BATCH(batch, 0);
1803     OUT_BATCH(batch, 0);
1804     OUT_BATCH(batch, 0);
1805     OUT_BATCH(batch, 0); /* pass-through */
1806 }
1807
1808 static void 
1809 gen6_emit_gs_state(VADriverContextP ctx)
1810 {
1811     struct i965_driver_data *i965 = i965_driver_data(ctx);
1812     struct intel_batchbuffer *batch = i965->batch;
1813
1814     /* disable GS constant buffer */
1815     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
1816     OUT_BATCH(batch, 0);
1817     OUT_BATCH(batch, 0);
1818     OUT_BATCH(batch, 0);
1819     OUT_BATCH(batch, 0);
1820         
1821     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
1822     OUT_BATCH(batch, 0); /* without GS kernel */
1823     OUT_BATCH(batch, 0);
1824     OUT_BATCH(batch, 0);
1825     OUT_BATCH(batch, 0);
1826     OUT_BATCH(batch, 0);
1827     OUT_BATCH(batch, 0); /* pass-through */
1828 }
1829
1830 static void 
1831 gen6_emit_clip_state(VADriverContextP ctx)
1832 {
1833     struct i965_driver_data *i965 = i965_driver_data(ctx);
1834     struct intel_batchbuffer *batch = i965->batch;
1835
1836     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
1837     OUT_BATCH(batch, 0);
1838     OUT_BATCH(batch, 0); /* pass-through */
1839     OUT_BATCH(batch, 0);
1840 }
1841
1842 static void 
1843 gen6_emit_sf_state(VADriverContextP ctx)
1844 {
1845     struct i965_driver_data *i965 = i965_driver_data(ctx);
1846     struct intel_batchbuffer *batch = i965->batch;
1847
1848     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
1849     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
1850               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
1851               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
1852     OUT_BATCH(batch, 0);
1853     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
1854     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
1855     OUT_BATCH(batch, 0);
1856     OUT_BATCH(batch, 0);
1857     OUT_BATCH(batch, 0);
1858     OUT_BATCH(batch, 0);
1859     OUT_BATCH(batch, 0); /* DW9 */
1860     OUT_BATCH(batch, 0);
1861     OUT_BATCH(batch, 0);
1862     OUT_BATCH(batch, 0);
1863     OUT_BATCH(batch, 0);
1864     OUT_BATCH(batch, 0); /* DW14 */
1865     OUT_BATCH(batch, 0);
1866     OUT_BATCH(batch, 0);
1867     OUT_BATCH(batch, 0);
1868     OUT_BATCH(batch, 0);
1869     OUT_BATCH(batch, 0); /* DW19 */
1870 }
1871
1872 static void 
1873 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
1874 {
1875     struct i965_driver_data *i965 = i965_driver_data(ctx);
1876     struct intel_batchbuffer *batch = i965->batch;
1877     struct i965_render_state *render_state = &i965->render_state;
1878
1879     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
1880               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
1881               (5 - 2));
1882     OUT_RELOC(batch, 
1883               render_state->curbe.bo,
1884               I915_GEM_DOMAIN_INSTRUCTION, 0,
1885               0);
1886     OUT_BATCH(batch, 0);
1887     OUT_BATCH(batch, 0);
1888     OUT_BATCH(batch, 0);
1889
1890     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
1891     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
1892               I915_GEM_DOMAIN_INSTRUCTION, 0,
1893               0);
1894     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
1895               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
1896     OUT_BATCH(batch, 0);
1897     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
1898     OUT_BATCH(batch, ((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
1899               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
1900               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
1901     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
1902               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
1903     OUT_BATCH(batch, 0);
1904     OUT_BATCH(batch, 0);
1905 }
1906
1907 static void
1908 gen6_emit_vertex_element_state(VADriverContextP ctx)
1909 {
1910     struct i965_driver_data *i965 = i965_driver_data(ctx);
1911     struct intel_batchbuffer *batch = i965->batch;
1912
1913     /* Set up our vertex elements, sourced from the single vertex buffer. */
1914     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
1915     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1916     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1917               GEN6_VE0_VALID |
1918               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1919               (0 << VE0_OFFSET_SHIFT));
1920     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1921               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1922               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1923               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1924     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1925     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1926               GEN6_VE0_VALID |
1927               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1928               (8 << VE0_OFFSET_SHIFT));
1929     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
1930               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1931               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1932               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1933 }
1934
1935 static void
1936 gen6_emit_vertices(VADriverContextP ctx)
1937 {
1938     struct i965_driver_data *i965 = i965_driver_data(ctx);
1939     struct intel_batchbuffer *batch = i965->batch;
1940     struct i965_render_state *render_state = &i965->render_state;
1941
1942     BEGIN_BATCH(batch, 11);
1943     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1944     OUT_BATCH(batch, 
1945               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
1946               GEN6_VB0_VERTEXDATA |
1947               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1948     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1949     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1950     OUT_BATCH(batch, 0);
1951
1952     OUT_BATCH(batch, 
1953               CMD_3DPRIMITIVE |
1954               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1955               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1956               (0 << 9) |
1957               4);
1958     OUT_BATCH(batch, 3); /* vertex count per instance */
1959     OUT_BATCH(batch, 0); /* start vertex offset */
1960     OUT_BATCH(batch, 1); /* single instance */
1961     OUT_BATCH(batch, 0); /* start instance location */
1962     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1963     ADVANCE_BATCH(batch);
1964 }
1965
1966 static void
1967 gen6_render_emit_states(VADriverContextP ctx, int kernel)
1968 {
1969     struct i965_driver_data *i965 = i965_driver_data(ctx);
1970     struct intel_batchbuffer *batch = i965->batch;
1971
1972     intel_batchbuffer_start_atomic(batch, 0x1000);
1973     intel_batchbuffer_emit_mi_flush(batch);
1974     gen6_emit_invarient_states(ctx);
1975     gen6_emit_state_base_address(ctx);
1976     gen6_emit_viewport_state_pointers(ctx);
1977     gen6_emit_urb(ctx);
1978     gen6_emit_cc_state_pointers(ctx);
1979     gen6_emit_sampler_state_pointers(ctx);
1980     gen6_emit_vs_state(ctx);
1981     gen6_emit_gs_state(ctx);
1982     gen6_emit_clip_state(ctx);
1983     gen6_emit_sf_state(ctx);
1984     gen6_emit_wm_state(ctx, kernel);
1985     gen6_emit_binding_table(ctx);
1986     gen6_emit_depth_buffer_state(ctx);
1987     gen6_emit_drawing_rectangle(ctx);
1988     gen6_emit_vertex_element_state(ctx);
1989     gen6_emit_vertices(ctx);
1990     intel_batchbuffer_end_atomic(batch);
1991 }
1992
1993 static void
1994 gen6_render_put_surface(VADriverContextP ctx,
1995                         VASurfaceID surface,
1996                         short srcx,
1997                         short srcy,
1998                         unsigned short srcw,
1999                         unsigned short srch,
2000                         short destx,
2001                         short desty,
2002                         unsigned short destw,
2003                         unsigned short desth,
2004                         unsigned int flag)
2005 {
2006     struct i965_driver_data *i965 = i965_driver_data(ctx);
2007     struct intel_batchbuffer *batch = i965->batch;
2008
2009     gen6_render_initialize(ctx);
2010     gen6_render_setup_states(ctx, surface,
2011                              srcx, srcy, srcw, srch,
2012                              destx, desty, destw, desth);
2013     i965_clear_dest_region(ctx);
2014     gen6_render_emit_states(ctx, PS_KERNEL);
2015     intel_batchbuffer_flush(batch);
2016 }
2017
2018 static void
2019 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2020 {
2021     struct i965_driver_data *i965 = i965_driver_data(ctx);
2022     struct i965_render_state *render_state = &i965->render_state;
2023     struct gen6_blend_state *blend_state;
2024
2025     dri_bo_unmap(render_state->cc.state);    
2026     dri_bo_map(render_state->cc.blend, 1);
2027     assert(render_state->cc.blend->virtual);
2028     blend_state = render_state->cc.blend->virtual;
2029     memset(blend_state, 0, sizeof(*blend_state));
2030     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2031     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2032     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2033     blend_state->blend0.blend_enable = 1;
2034     blend_state->blend1.post_blend_clamp_enable = 1;
2035     blend_state->blend1.pre_blend_clamp_enable = 1;
2036     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2037     dri_bo_unmap(render_state->cc.blend);
2038 }
2039
2040 static void
2041 gen6_subpicture_render_setup_states(VADriverContextP ctx,
2042                                     VASurfaceID surface,
2043                                     short srcx,
2044                                     short srcy,
2045                                     unsigned short srcw,
2046                                     unsigned short srch,
2047                                     short destx,
2048                                     short desty,
2049                                     unsigned short destw,
2050                                     unsigned short desth)
2051 {
2052     VARectangle output_rect;
2053
2054     output_rect.x      = destx;
2055     output_rect.y      = desty;
2056     output_rect.width  = destw;
2057     output_rect.height = desth;
2058
2059     i965_render_dest_surface_state(ctx, 0);
2060     i965_subpic_render_src_surfaces_state(ctx, surface);
2061     i965_render_sampler(ctx);
2062     i965_render_cc_viewport(ctx);
2063     gen6_render_color_calc_state(ctx);
2064     gen6_subpicture_render_blend_state(ctx);
2065     gen6_render_depth_stencil_state(ctx);
2066     i965_subpic_render_upload_vertex(ctx, surface, &output_rect);
2067 }
2068
2069 static void
2070 gen6_render_put_subpicture(VADriverContextP ctx,
2071                            VASurfaceID surface,
2072                            short srcx,
2073                            short srcy,
2074                            unsigned short srcw,
2075                            unsigned short srch,
2076                            short destx,
2077                            short desty,
2078                            unsigned short destw,
2079                            unsigned short desth)
2080 {
2081     struct i965_driver_data *i965 = i965_driver_data(ctx);
2082     struct intel_batchbuffer *batch = i965->batch;
2083     struct object_surface *obj_surface = SURFACE(surface);
2084     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
2085
2086     assert(obj_subpic);
2087     gen6_render_initialize(ctx);
2088     gen6_subpicture_render_setup_states(ctx, surface,
2089                                         srcx, srcy, srcw, srch,
2090                                         destx, desty, destw, desth);
2091     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2092     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
2093     intel_batchbuffer_flush(batch);
2094 }
2095
2096 /*
2097  * global functions
2098  */
2099 void
2100 intel_render_put_surface(VADriverContextP ctx,
2101                         VASurfaceID surface,
2102                         short srcx,
2103                         short srcy,
2104                         unsigned short srcw,
2105                         unsigned short srch,
2106                         short destx,
2107                         short desty,
2108                         unsigned short destw,
2109                         unsigned short desth,
2110                         unsigned int flag)
2111 {
2112     struct i965_driver_data *i965 = i965_driver_data(ctx);
2113
2114     i965_post_processing(ctx, surface,
2115                          srcx, srcy, srcw, srch,
2116                          destx, desty, destw, desth,
2117                          flag);
2118
2119     if (IS_GEN6(i965->intel.device_id) ||
2120         IS_GEN7(i965->intel.device_id))
2121         gen6_render_put_surface(ctx, surface,
2122                                 srcx, srcy, srcw, srch,
2123                                 destx, desty, destw, desth,
2124                                 flag);
2125     else
2126         i965_render_put_surface(ctx, surface,
2127                                 srcx, srcy, srcw, srch,
2128                                 destx, desty, destw, desth,
2129                                 flag);
2130 }
2131
2132 void
2133 intel_render_put_subpicture(VADriverContextP ctx,
2134                            VASurfaceID surface,
2135                            short srcx,
2136                            short srcy,
2137                            unsigned short srcw,
2138                            unsigned short srch,
2139                            short destx,
2140                            short desty,
2141                            unsigned short destw,
2142                            unsigned short desth)
2143 {
2144     struct i965_driver_data *i965 = i965_driver_data(ctx);
2145
2146     if (IS_GEN6(i965->intel.device_id) ||
2147         IS_GEN7(i965->intel.device_id))
2148         gen6_render_put_subpicture(ctx, surface,
2149                                    srcx, srcy, srcw, srch,
2150                                    destx, desty, destw, desth);
2151     else
2152         i965_render_put_subpicture(ctx, surface,
2153                                    srcx, srcy, srcw, srch,
2154                                    destx, desty, destw, desth);
2155 }
2156
2157 Bool 
2158 i965_render_init(VADriverContextP ctx)
2159 {
2160     struct i965_driver_data *i965 = i965_driver_data(ctx);
2161     struct i965_render_state *render_state = &i965->render_state;
2162     int i;
2163
2164     /* kernel */
2165     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
2166                                  sizeof(render_kernels_gen5[0])));
2167     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
2168                                  sizeof(render_kernels_gen6[0])));
2169
2170     if (IS_GEN6(i965->intel.device_id) ||
2171         IS_GEN7(i965->intel.device_id))
2172         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
2173     else if (IS_IRONLAKE(i965->intel.device_id))
2174         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
2175     else
2176         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
2177
2178     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
2179         struct i965_kernel *kernel = &render_state->render_kernels[i];
2180
2181         if (!kernel->size)
2182             continue;
2183
2184         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
2185                                   kernel->name, 
2186                                   kernel->size, 0x1000);
2187         assert(kernel->bo);
2188         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
2189     }
2190
2191     /* constant buffer */
2192     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
2193                       "constant buffer",
2194                       4096, 64);
2195     assert(render_state->curbe.bo);
2196     render_state->curbe.upload = 0;
2197
2198     return True;
2199 }
2200
2201 Bool 
2202 i965_render_terminate(VADriverContextP ctx)
2203 {
2204     int i;
2205     struct i965_driver_data *i965 = i965_driver_data(ctx);
2206     struct i965_render_state *render_state = &i965->render_state;
2207
2208     dri_bo_unreference(render_state->curbe.bo);
2209     render_state->curbe.bo = NULL;
2210
2211     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
2212         struct i965_kernel *kernel = &render_state->render_kernels[i];
2213         
2214         dri_bo_unreference(kernel->bo);
2215         kernel->bo = NULL;
2216     }
2217
2218     dri_bo_unreference(render_state->vb.vertex_buffer);
2219     render_state->vb.vertex_buffer = NULL;
2220     dri_bo_unreference(render_state->vs.state);
2221     render_state->vs.state = NULL;
2222     dri_bo_unreference(render_state->sf.state);
2223     render_state->sf.state = NULL;
2224     dri_bo_unreference(render_state->wm.sampler);
2225     render_state->wm.sampler = NULL;
2226     dri_bo_unreference(render_state->wm.state);
2227     render_state->wm.state = NULL;
2228     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2229     dri_bo_unreference(render_state->cc.viewport);
2230     render_state->cc.viewport = NULL;
2231     dri_bo_unreference(render_state->cc.state);
2232     render_state->cc.state = NULL;
2233     dri_bo_unreference(render_state->cc.blend);
2234     render_state->cc.blend = NULL;
2235     dri_bo_unreference(render_state->cc.depth_stencil);
2236     render_state->cc.depth_stencil = NULL;
2237
2238     if (render_state->draw_region) {
2239         dri_bo_unreference(render_state->draw_region->bo);
2240         free(render_state->draw_region);
2241         render_state->draw_region = NULL;
2242     }
2243
2244     return True;
2245 }
2246