4059af1610a0de71a52f624d39c076fe26bd5e79
[platform/upstream/libva-intel-driver.git] / src / i965_render.c
1 /*
2  * Copyright © 2006 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 /*
31  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32  */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38
39 #include <va/va_drmcommon.h>
40
41 #include "intel_batchbuffer.h"
42 #include "intel_driver.h"
43 #include "i965_defines.h"
44 #include "i965_drv_video.h"
45 #include "i965_structs.h"
46
47 #include "i965_render.h"
48
49 #define SF_KERNEL_NUM_GRF       16
50 #define SF_MAX_THREADS          1
51
52 static const uint32_t sf_kernel_static[][4] = 
53 {
54 #include "shaders/render/exa_sf.g4b"
55 };
56
57 #define PS_KERNEL_NUM_GRF       32
58 #define PS_MAX_THREADS          32
59
60 #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
61
62 static const uint32_t ps_kernel_static[][4] = 
63 {
64 #include "shaders/render/exa_wm_xy.g4b"
65 #include "shaders/render/exa_wm_src_affine.g4b"
66 #include "shaders/render/exa_wm_src_sample_planar.g4b"
67 #include "shaders/render/exa_wm_yuv_rgb.g4b"
68 #include "shaders/render/exa_wm_write.g4b"
69 };
70 static const uint32_t ps_subpic_kernel_static[][4] = 
71 {
72 #include "shaders/render/exa_wm_xy.g4b"
73 #include "shaders/render/exa_wm_src_affine.g4b"
74 #include "shaders/render/exa_wm_src_sample_argb.g4b"
75 #include "shaders/render/exa_wm_write.g4b"
76 };
77
78 /* On IRONLAKE */
79 static const uint32_t sf_kernel_static_gen5[][4] = 
80 {
81 #include "shaders/render/exa_sf.g4b.gen5"
82 };
83
84 static const uint32_t ps_kernel_static_gen5[][4] = 
85 {
86 #include "shaders/render/exa_wm_xy.g4b.gen5"
87 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
88 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
89 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
90 #include "shaders/render/exa_wm_write.g4b.gen5"
91 };
92 static const uint32_t ps_subpic_kernel_static_gen5[][4] = 
93 {
94 #include "shaders/render/exa_wm_xy.g4b.gen5"
95 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
96 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
97 #include "shaders/render/exa_wm_write.g4b.gen5"
98 };
99
100 /* programs for Sandybridge */
101 static const uint32_t sf_kernel_static_gen6[][4] = 
102 {
103 };
104
105 static const uint32_t ps_kernel_static_gen6[][4] = {
106 #include "shaders/render/exa_wm_src_affine.g6b"
107 #include "shaders/render/exa_wm_src_sample_planar.g6b"
108 #include "shaders/render/exa_wm_yuv_rgb.g6b"
109 #include "shaders/render/exa_wm_write.g6b"
110 };
111
112 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
113 #include "shaders/render/exa_wm_src_affine.g6b"
114 #include "shaders/render/exa_wm_src_sample_argb.g6b"
115 #include "shaders/render/exa_wm_write.g6b"
116 };
117
118 /* programs for Ivybridge */
119 static const uint32_t sf_kernel_static_gen7[][4] = 
120 {
121 };
122
123 static const uint32_t ps_kernel_static_gen7[][4] = {
124 #include "shaders/render/exa_wm_src_affine.g7b"
125 #include "shaders/render/exa_wm_src_sample_planar.g7b"
126 #include "shaders/render/exa_wm_yuv_rgb.g7b"
127 #include "shaders/render/exa_wm_write.g7b"
128 };
129
130 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
131 #include "shaders/render/exa_wm_src_affine.g7b"
132 #include "shaders/render/exa_wm_src_sample_argb.g7b"
133 #include "shaders/render/exa_wm_write.g7b"
134 };
135
136 #define SURFACE_STATE_PADDED_SIZE_I965  ALIGN(sizeof(struct i965_surface_state), 32)
137 #define SURFACE_STATE_PADDED_SIZE_GEN7  ALIGN(sizeof(struct gen7_surface_state), 32)
138 #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
139 #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
140 #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
141
142 static uint32_t float_to_uint (float f) 
143 {
144     union {
145         uint32_t i; 
146         float f;
147     } x;
148
149     x.f = f;
150     return x.i;
151 }
152
153 enum 
154 {
155     SF_KERNEL = 0,
156     PS_KERNEL,
157     PS_SUBPIC_KERNEL
158 };
159
160 static struct i965_kernel render_kernels_gen4[] = {
161     {
162         "SF",
163         SF_KERNEL,
164         sf_kernel_static,
165         sizeof(sf_kernel_static),
166         NULL
167     },
168     {
169         "PS",
170         PS_KERNEL,
171         ps_kernel_static,
172         sizeof(ps_kernel_static),
173         NULL
174     },
175
176     {
177         "PS_SUBPIC",
178         PS_SUBPIC_KERNEL,
179         ps_subpic_kernel_static,
180         sizeof(ps_subpic_kernel_static),
181         NULL
182     }
183 };
184
185 static struct i965_kernel render_kernels_gen5[] = {
186     {
187         "SF",
188         SF_KERNEL,
189         sf_kernel_static_gen5,
190         sizeof(sf_kernel_static_gen5),
191         NULL
192     },
193     {
194         "PS",
195         PS_KERNEL,
196         ps_kernel_static_gen5,
197         sizeof(ps_kernel_static_gen5),
198         NULL
199     },
200
201     {
202         "PS_SUBPIC",
203         PS_SUBPIC_KERNEL,
204         ps_subpic_kernel_static_gen5,
205         sizeof(ps_subpic_kernel_static_gen5),
206         NULL
207     }
208 };
209
210 static struct i965_kernel render_kernels_gen6[] = {
211     {
212         "SF",
213         SF_KERNEL,
214         sf_kernel_static_gen6,
215         sizeof(sf_kernel_static_gen6),
216         NULL
217     },
218     {
219         "PS",
220         PS_KERNEL,
221         ps_kernel_static_gen6,
222         sizeof(ps_kernel_static_gen6),
223         NULL
224     },
225
226     {
227         "PS_SUBPIC",
228         PS_SUBPIC_KERNEL,
229         ps_subpic_kernel_static_gen6,
230         sizeof(ps_subpic_kernel_static_gen6),
231         NULL
232     }
233 };
234
235 static struct i965_kernel render_kernels_gen7[] = {
236     {
237         "SF",
238         SF_KERNEL,
239         sf_kernel_static_gen7,
240         sizeof(sf_kernel_static_gen7),
241         NULL
242     },
243     {
244         "PS",
245         PS_KERNEL,
246         ps_kernel_static_gen7,
247         sizeof(ps_kernel_static_gen7),
248         NULL
249     },
250
251     {
252         "PS_SUBPIC",
253         PS_SUBPIC_KERNEL,
254         ps_subpic_kernel_static_gen7,
255         sizeof(ps_subpic_kernel_static_gen7),
256         NULL
257     }
258 };
259
260 #define URB_VS_ENTRIES        8
261 #define URB_VS_ENTRY_SIZE     1
262
263 #define URB_GS_ENTRIES        0
264 #define URB_GS_ENTRY_SIZE     0
265
266 #define URB_CLIP_ENTRIES      0
267 #define URB_CLIP_ENTRY_SIZE   0
268
269 #define URB_SF_ENTRIES        1
270 #define URB_SF_ENTRY_SIZE     2
271
272 #define URB_CS_ENTRIES        1
273 #define URB_CS_ENTRY_SIZE     1
274
275 static void
276 i965_render_vs_unit(VADriverContextP ctx)
277 {
278     struct i965_driver_data *i965 = i965_driver_data(ctx);
279     struct i965_render_state *render_state = &i965->render_state;
280     struct i965_vs_unit_state *vs_state;
281
282     dri_bo_map(render_state->vs.state, 1);
283     assert(render_state->vs.state->virtual);
284     vs_state = render_state->vs.state->virtual;
285     memset(vs_state, 0, sizeof(*vs_state));
286
287     if (IS_IRONLAKE(i965->intel.device_id))
288         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
289     else
290         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
291
292     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
293     vs_state->vs6.vs_enable = 0;
294     vs_state->vs6.vert_cache_disable = 1;
295     
296     dri_bo_unmap(render_state->vs.state);
297 }
298
299 static void
300 i965_render_sf_unit(VADriverContextP ctx)
301 {
302     struct i965_driver_data *i965 = i965_driver_data(ctx);
303     struct i965_render_state *render_state = &i965->render_state;
304     struct i965_sf_unit_state *sf_state;
305
306     dri_bo_map(render_state->sf.state, 1);
307     assert(render_state->sf.state->virtual);
308     sf_state = render_state->sf.state->virtual;
309     memset(sf_state, 0, sizeof(*sf_state));
310
311     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
312     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
313
314     sf_state->sf1.single_program_flow = 1; /* XXX */
315     sf_state->sf1.binding_table_entry_count = 0;
316     sf_state->sf1.thread_priority = 0;
317     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
318     sf_state->sf1.illegal_op_exception_enable = 1;
319     sf_state->sf1.mask_stack_exception_enable = 1;
320     sf_state->sf1.sw_exception_enable = 1;
321
322     /* scratch space is not used in our kernel */
323     sf_state->thread2.per_thread_scratch_space = 0;
324     sf_state->thread2.scratch_space_base_pointer = 0;
325
326     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
327     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
328     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
329     sf_state->thread3.urb_entry_read_offset = 0;
330     sf_state->thread3.dispatch_grf_start_reg = 3;
331
332     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
333     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
334     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
335     sf_state->thread4.stats_enable = 1;
336
337     sf_state->sf5.viewport_transform = 0; /* skip viewport */
338
339     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
340     sf_state->sf6.scissor = 0;
341
342     sf_state->sf7.trifan_pv = 2;
343
344     sf_state->sf6.dest_org_vbias = 0x8;
345     sf_state->sf6.dest_org_hbias = 0x8;
346
347     dri_bo_emit_reloc(render_state->sf.state,
348                       I915_GEM_DOMAIN_INSTRUCTION, 0,
349                       sf_state->thread0.grf_reg_count << 1,
350                       offsetof(struct i965_sf_unit_state, thread0),
351                       render_state->render_kernels[SF_KERNEL].bo);
352
353     dri_bo_unmap(render_state->sf.state);
354 }
355
356 static void 
357 i965_render_sampler(VADriverContextP ctx)
358 {
359     struct i965_driver_data *i965 = i965_driver_data(ctx);
360     struct i965_render_state *render_state = &i965->render_state;
361     struct i965_sampler_state *sampler_state;
362     int i;
363     
364     assert(render_state->wm.sampler_count > 0);
365     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
366
367     dri_bo_map(render_state->wm.sampler, 1);
368     assert(render_state->wm.sampler->virtual);
369     sampler_state = render_state->wm.sampler->virtual;
370     for (i = 0; i < render_state->wm.sampler_count; i++) {
371         memset(sampler_state, 0, sizeof(*sampler_state));
372         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
373         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
374         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
375         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
376         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
377         sampler_state++;
378     }
379
380     dri_bo_unmap(render_state->wm.sampler);
381 }
382 static void
383 i965_subpic_render_wm_unit(VADriverContextP ctx)
384 {
385     struct i965_driver_data *i965 = i965_driver_data(ctx);
386     struct i965_render_state *render_state = &i965->render_state;
387     struct i965_wm_unit_state *wm_state;
388
389     assert(render_state->wm.sampler);
390
391     dri_bo_map(render_state->wm.state, 1);
392     assert(render_state->wm.state->virtual);
393     wm_state = render_state->wm.state->virtual;
394     memset(wm_state, 0, sizeof(*wm_state));
395
396     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
397     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
398
399     wm_state->thread1.single_program_flow = 1; /* XXX */
400
401     if (IS_IRONLAKE(i965->intel.device_id))
402         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
403     else
404         wm_state->thread1.binding_table_entry_count = 7;
405
406     wm_state->thread2.scratch_space_base_pointer = 0;
407     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
408
409     wm_state->thread3.dispatch_grf_start_reg = 3; /* XXX */
410     wm_state->thread3.const_urb_entry_read_length = 0;
411     wm_state->thread3.const_urb_entry_read_offset = 0;
412     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
413     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
414
415     wm_state->wm4.stats_enable = 0;
416     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
417
418     if (IS_IRONLAKE(i965->intel.device_id)) {
419         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
420         wm_state->wm5.max_threads = 12 * 6 - 1;
421     } else {
422         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
423         wm_state->wm5.max_threads = 10 * 5 - 1;
424     }
425
426     wm_state->wm5.thread_dispatch_enable = 1;
427     wm_state->wm5.enable_16_pix = 1;
428     wm_state->wm5.enable_8_pix = 0;
429     wm_state->wm5.early_depth_test = 1;
430
431     dri_bo_emit_reloc(render_state->wm.state,
432                       I915_GEM_DOMAIN_INSTRUCTION, 0,
433                       wm_state->thread0.grf_reg_count << 1,
434                       offsetof(struct i965_wm_unit_state, thread0),
435                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
436
437     dri_bo_emit_reloc(render_state->wm.state,
438                       I915_GEM_DOMAIN_INSTRUCTION, 0,
439                       wm_state->wm4.sampler_count << 2,
440                       offsetof(struct i965_wm_unit_state, wm4),
441                       render_state->wm.sampler);
442
443     dri_bo_unmap(render_state->wm.state);
444 }
445
446
447 static void
448 i965_render_wm_unit(VADriverContextP ctx)
449 {
450     struct i965_driver_data *i965 = i965_driver_data(ctx);
451     struct i965_render_state *render_state = &i965->render_state;
452     struct i965_wm_unit_state *wm_state;
453
454     assert(render_state->wm.sampler);
455
456     dri_bo_map(render_state->wm.state, 1);
457     assert(render_state->wm.state->virtual);
458     wm_state = render_state->wm.state->virtual;
459     memset(wm_state, 0, sizeof(*wm_state));
460
461     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
462     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
463
464     wm_state->thread1.single_program_flow = 1; /* XXX */
465
466     if (IS_IRONLAKE(i965->intel.device_id))
467         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
468     else
469         wm_state->thread1.binding_table_entry_count = 7;
470
471     wm_state->thread2.scratch_space_base_pointer = 0;
472     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
473
474     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
475     wm_state->thread3.const_urb_entry_read_length = 1;
476     wm_state->thread3.const_urb_entry_read_offset = 0;
477     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
478     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
479
480     wm_state->wm4.stats_enable = 0;
481     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5; 
482
483     if (IS_IRONLAKE(i965->intel.device_id)) {
484         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
485         wm_state->wm5.max_threads = 12 * 6 - 1;
486     } else {
487         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
488         wm_state->wm5.max_threads = 10 * 5 - 1;
489     }
490
491     wm_state->wm5.thread_dispatch_enable = 1;
492     wm_state->wm5.enable_16_pix = 1;
493     wm_state->wm5.enable_8_pix = 0;
494     wm_state->wm5.early_depth_test = 1;
495
496     dri_bo_emit_reloc(render_state->wm.state,
497                       I915_GEM_DOMAIN_INSTRUCTION, 0,
498                       wm_state->thread0.grf_reg_count << 1,
499                       offsetof(struct i965_wm_unit_state, thread0),
500                       render_state->render_kernels[PS_KERNEL].bo);
501
502     dri_bo_emit_reloc(render_state->wm.state,
503                       I915_GEM_DOMAIN_INSTRUCTION, 0,
504                       wm_state->wm4.sampler_count << 2,
505                       offsetof(struct i965_wm_unit_state, wm4),
506                       render_state->wm.sampler);
507
508     dri_bo_unmap(render_state->wm.state);
509 }
510
511 static void 
512 i965_render_cc_viewport(VADriverContextP ctx)
513 {
514     struct i965_driver_data *i965 = i965_driver_data(ctx);
515     struct i965_render_state *render_state = &i965->render_state;
516     struct i965_cc_viewport *cc_viewport;
517
518     dri_bo_map(render_state->cc.viewport, 1);
519     assert(render_state->cc.viewport->virtual);
520     cc_viewport = render_state->cc.viewport->virtual;
521     memset(cc_viewport, 0, sizeof(*cc_viewport));
522     
523     cc_viewport->min_depth = -1.e35;
524     cc_viewport->max_depth = 1.e35;
525
526     dri_bo_unmap(render_state->cc.viewport);
527 }
528
529 static void 
530 i965_subpic_render_cc_unit(VADriverContextP ctx)
531 {
532     struct i965_driver_data *i965 = i965_driver_data(ctx);
533     struct i965_render_state *render_state = &i965->render_state;
534     struct i965_cc_unit_state *cc_state;
535
536     assert(render_state->cc.viewport);
537
538     dri_bo_map(render_state->cc.state, 1);
539     assert(render_state->cc.state->virtual);
540     cc_state = render_state->cc.state->virtual;
541     memset(cc_state, 0, sizeof(*cc_state));
542
543     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
544     cc_state->cc2.depth_test = 0;       /* disable depth test */
545     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
546     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
547     cc_state->cc3.blend_enable = 1;     /* enable color blend */
548     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
549     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
550     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
551     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
552
553     cc_state->cc5.dither_enable = 0;    /* disable dither */
554     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
555     cc_state->cc5.statistics_enable = 1;
556     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
557     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
558     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
559
560     cc_state->cc6.clamp_post_alpha_blend = 0; 
561     cc_state->cc6.clamp_pre_alpha_blend  =0; 
562     
563     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
564     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
565     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
566     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
567    
568     /*alpha test reference*/
569     cc_state->cc7.alpha_ref.f =0.0 ;
570
571
572     dri_bo_emit_reloc(render_state->cc.state,
573                       I915_GEM_DOMAIN_INSTRUCTION, 0,
574                       0,
575                       offsetof(struct i965_cc_unit_state, cc4),
576                       render_state->cc.viewport);
577
578     dri_bo_unmap(render_state->cc.state);
579 }
580
581
582 static void 
583 i965_render_cc_unit(VADriverContextP ctx)
584 {
585     struct i965_driver_data *i965 = i965_driver_data(ctx);
586     struct i965_render_state *render_state = &i965->render_state;
587     struct i965_cc_unit_state *cc_state;
588
589     assert(render_state->cc.viewport);
590
591     dri_bo_map(render_state->cc.state, 1);
592     assert(render_state->cc.state->virtual);
593     cc_state = render_state->cc.state->virtual;
594     memset(cc_state, 0, sizeof(*cc_state));
595
596     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
597     cc_state->cc2.depth_test = 0;       /* disable depth test */
598     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
599     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
600     cc_state->cc3.blend_enable = 0;     /* disable color blend */
601     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
602     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
603
604     cc_state->cc5.dither_enable = 0;    /* disable dither */
605     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
606     cc_state->cc5.statistics_enable = 1;
607     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
608     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
609     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
610
611     dri_bo_emit_reloc(render_state->cc.state,
612                       I915_GEM_DOMAIN_INSTRUCTION, 0,
613                       0,
614                       offsetof(struct i965_cc_unit_state, cc4),
615                       render_state->cc.viewport);
616
617     dri_bo_unmap(render_state->cc.state);
618 }
619
620 static void
621 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
622 {
623     switch (tiling) {
624     case I915_TILING_NONE:
625         ss->ss3.tiled_surface = 0;
626         ss->ss3.tile_walk = 0;
627         break;
628     case I915_TILING_X:
629         ss->ss3.tiled_surface = 1;
630         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
631         break;
632     case I915_TILING_Y:
633         ss->ss3.tiled_surface = 1;
634         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
635         break;
636     }
637 }
638
639 static void
640 i965_render_set_surface_state(
641     struct i965_surface_state *ss,
642     dri_bo                    *bo,
643     unsigned long              offset,
644     unsigned int               width,
645     unsigned int               height,
646     unsigned int               pitch,
647     unsigned int               format,
648     unsigned int               flags
649 )
650 {
651     unsigned int tiling;
652     unsigned int swizzle;
653
654     memset(ss, 0, sizeof(*ss));
655
656     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
657     case I965_PP_FLAG_BOTTOM_FIELD:
658         ss->ss0.vert_line_stride_ofs = 1;
659         /* fall-through */
660     case I965_PP_FLAG_TOP_FIELD:
661         ss->ss0.vert_line_stride = 1;
662         height /= 2;
663         break;
664     }
665
666     ss->ss0.surface_type = I965_SURFACE_2D;
667     ss->ss0.surface_format = format;
668     ss->ss0.color_blend = 1;
669
670     ss->ss1.base_addr = bo->offset + offset;
671
672     ss->ss2.width = width - 1;
673     ss->ss2.height = height - 1;
674
675     ss->ss3.pitch = pitch - 1;
676
677     dri_bo_get_tiling(bo, &tiling, &swizzle);
678     i965_render_set_surface_tiling(ss, tiling);
679 }
680
681 static void
682 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
683 {
684    switch (tiling) {
685    case I915_TILING_NONE:
686       ss->ss0.tiled_surface = 0;
687       ss->ss0.tile_walk = 0;
688       break;
689    case I915_TILING_X:
690       ss->ss0.tiled_surface = 1;
691       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
692       break;
693    case I915_TILING_Y:
694       ss->ss0.tiled_surface = 1;
695       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
696       break;
697    }
698 }
699
700 static void
701 gen7_render_set_surface_state(
702     struct gen7_surface_state *ss,
703     dri_bo                    *bo,
704     unsigned long              offset,
705     int                        width,
706     int                        height,
707     int                        pitch,
708     int                        format,
709     unsigned int               flags
710 )
711 {
712     unsigned int tiling;
713     unsigned int swizzle;
714
715     memset(ss, 0, sizeof(*ss));
716
717     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
718     case I965_PP_FLAG_BOTTOM_FIELD:
719         ss->ss0.vert_line_stride_ofs = 1;
720         /* fall-through */
721     case I965_PP_FLAG_TOP_FIELD:
722         ss->ss0.vert_line_stride = 1;
723         height /= 2;
724         break;
725     }
726
727     ss->ss0.surface_type = I965_SURFACE_2D;
728     ss->ss0.surface_format = format;
729
730     ss->ss1.base_addr = bo->offset + offset;
731
732     ss->ss2.width = width - 1;
733     ss->ss2.height = height - 1;
734
735     ss->ss3.pitch = pitch - 1;
736
737     dri_bo_get_tiling(bo, &tiling, &swizzle);
738     gen7_render_set_surface_tiling(ss, tiling);
739 }
740
741 static void
742 i965_render_src_surface_state(
743     VADriverContextP ctx, 
744     int              index,
745     dri_bo          *region,
746     unsigned long    offset,
747     int              w,
748     int              h,
749     int              pitch,
750     int              format,
751     unsigned int     flags
752 )
753 {
754     struct i965_driver_data *i965 = i965_driver_data(ctx);  
755     struct i965_render_state *render_state = &i965->render_state;
756     void *ss;
757     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
758
759     assert(index < MAX_RENDER_SURFACES);
760
761     dri_bo_map(ss_bo, 1);
762     assert(ss_bo->virtual);
763     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
764
765     if (IS_GEN7(i965->intel.device_id)) {
766         gen7_render_set_surface_state(ss,
767                                       region, offset,
768                                       w, h,
769                                       pitch, format, flags);
770         dri_bo_emit_reloc(ss_bo,
771                           I915_GEM_DOMAIN_SAMPLER, 0,
772                           offset,
773                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
774                           region);
775     } else {
776         i965_render_set_surface_state(ss,
777                                       region, offset,
778                                       w, h,
779                                       pitch, format, flags);
780         dri_bo_emit_reloc(ss_bo,
781                           I915_GEM_DOMAIN_SAMPLER, 0,
782                           offset,
783                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
784                           region);
785     }
786
787     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
788     dri_bo_unmap(ss_bo);
789     render_state->wm.sampler_count++;
790 }
791
792 static void
793 i965_render_src_surfaces_state(
794     VADriverContextP ctx,
795     VASurfaceID      surface,
796     unsigned int     flags
797 )
798 {
799     struct i965_driver_data *i965 = i965_driver_data(ctx);  
800     struct object_surface *obj_surface;
801     int region_pitch;
802     int rw, rh;
803     dri_bo *region;
804
805     obj_surface = SURFACE(surface);
806     assert(obj_surface);
807
808     region_pitch = obj_surface->width;
809     rw = obj_surface->orig_width;
810     rh = obj_surface->orig_height;
811     region = obj_surface->bo;
812
813     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
814     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
815
816     if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) {
817         i965_render_src_surface_state(ctx, 3, region,
818                                       region_pitch * obj_surface->y_cb_offset,
819                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
820                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
821         i965_render_src_surface_state(ctx, 4, region,
822                                       region_pitch * obj_surface->y_cb_offset,
823                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
824                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
825     } else {
826         i965_render_src_surface_state(ctx, 3, region,
827                                       region_pitch * obj_surface->y_cb_offset,
828                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
829                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
830         i965_render_src_surface_state(ctx, 4, region,
831                                       region_pitch * obj_surface->y_cb_offset,
832                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
833                                       I965_SURFACEFORMAT_R8_UNORM, flags);
834         i965_render_src_surface_state(ctx, 5, region,
835                                       region_pitch * obj_surface->y_cr_offset,
836                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
837                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
838         i965_render_src_surface_state(ctx, 6, region,
839                                       region_pitch * obj_surface->y_cr_offset,
840                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
841                                       I965_SURFACEFORMAT_R8_UNORM, flags);
842     }
843 }
844
845 static void
846 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
847                               VASurfaceID surface)
848 {
849     struct i965_driver_data *i965 = i965_driver_data(ctx);  
850     struct object_surface *obj_surface = SURFACE(surface);
851     int w, h;
852     dri_bo *region;
853     dri_bo *subpic_region;
854     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
855     struct object_image *obj_image = IMAGE(obj_subpic->image);
856     assert(obj_surface);
857     assert(obj_surface->bo);
858     w = obj_surface->width;
859     h = obj_surface->height;
860     region = obj_surface->bo;
861     subpic_region = obj_image->bo;
862     /*subpicture surface*/
863     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
864     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
865 }
866
867 static void
868 i965_render_dest_surface_state(VADriverContextP ctx, int index)
869 {
870     struct i965_driver_data *i965 = i965_driver_data(ctx);  
871     struct i965_render_state *render_state = &i965->render_state;
872     struct intel_region *dest_region = render_state->draw_region;
873     void *ss;
874     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
875     int format;
876     assert(index < MAX_RENDER_SURFACES);
877
878     if (dest_region->cpp == 2) {
879         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
880     } else {
881         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
882     }
883
884     dri_bo_map(ss_bo, 1);
885     assert(ss_bo->virtual);
886     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
887
888     if (IS_GEN7(i965->intel.device_id)) {
889         gen7_render_set_surface_state(ss,
890                                       dest_region->bo, 0,
891                                       dest_region->width, dest_region->height,
892                                       dest_region->pitch, format, 0);
893         dri_bo_emit_reloc(ss_bo,
894                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
895                           0,
896                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
897                           dest_region->bo);
898     } else {
899         i965_render_set_surface_state(ss,
900                                       dest_region->bo, 0,
901                                       dest_region->width, dest_region->height,
902                                       dest_region->pitch, format, 0);
903         dri_bo_emit_reloc(ss_bo,
904                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
905                           0,
906                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
907                           dest_region->bo);
908     }
909
910     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
911     dri_bo_unmap(ss_bo);
912 }
913
914 static void
915 i965_fill_vertex_buffer(
916     VADriverContextP ctx,
917     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
918     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
919 )
920 {
921     struct i965_driver_data * const i965 = i965_driver_data(ctx);
922     float vb[12];
923
924     enum { X1, Y1, X2, Y2 };
925
926     static const unsigned int g_rotation_indices[][6] = {
927         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
928         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
929         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
930         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
931     };
932
933     const unsigned int * const rotation_indices =
934         g_rotation_indices[i965->rotation_attrib->value];
935
936     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
937     vb[1]  = tex_coords[rotation_indices[1]];
938     vb[2]  = vid_coords[X2];
939     vb[3]  = vid_coords[Y2];
940
941     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
942     vb[5]  = tex_coords[rotation_indices[3]];
943     vb[6]  = vid_coords[X1];
944     vb[7]  = vid_coords[Y2];
945
946     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
947     vb[9]  = tex_coords[rotation_indices[5]];
948     vb[10] = vid_coords[X1];
949     vb[11] = vid_coords[Y1];
950
951     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
952 }
953
954 static void 
955 i965_subpic_render_upload_vertex(VADriverContextP ctx,
956                                  VASurfaceID surface,
957                                  const VARectangle *output_rect)
958 {    
959     struct i965_driver_data  *i965         = i965_driver_data(ctx);
960     struct object_surface    *obj_surface  = SURFACE(surface);
961     struct object_subpic     *obj_subpic   = SUBPIC(obj_surface->subpic);
962     float tex_coords[4], vid_coords[4];
963     VARectangle dst_rect;
964
965     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
966         dst_rect = obj_subpic->dst_rect;
967     else {
968         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
969         const float sy  = (float)output_rect->height / obj_surface->orig_height;
970         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
971         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
972         dst_rect.width  = sx * obj_subpic->dst_rect.width;
973         dst_rect.height = sy * obj_subpic->dst_rect.height;
974     }
975
976     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
977     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
978     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
979     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
980
981     vid_coords[0] = dst_rect.x;
982     vid_coords[1] = dst_rect.y;
983     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
984     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
985
986     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
987 }
988
989 static void 
990 i965_render_upload_vertex(
991     VADriverContextP   ctx,
992     VASurfaceID        surface,
993     const VARectangle *src_rect,
994     const VARectangle *dst_rect
995 )
996 {
997     struct i965_driver_data *i965 = i965_driver_data(ctx);
998     struct i965_render_state *render_state = &i965->render_state;
999     struct intel_region *dest_region = render_state->draw_region;
1000     struct object_surface *obj_surface;
1001     float tex_coords[4], vid_coords[4];
1002     int width, height;
1003
1004     obj_surface = SURFACE(surface);
1005     assert(surface);
1006
1007     width  = obj_surface->orig_width;
1008     height = obj_surface->orig_height;
1009
1010     tex_coords[0] = (float)src_rect->x / width;
1011     tex_coords[1] = (float)src_rect->y / height;
1012     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
1013     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
1014
1015     vid_coords[0] = dest_region->x + dst_rect->x;
1016     vid_coords[1] = dest_region->y + dst_rect->y;
1017     vid_coords[2] = vid_coords[0] + dst_rect->width;
1018     vid_coords[3] = vid_coords[1] + dst_rect->height;
1019
1020     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1021 }
1022
1023 static void
1024 i965_render_upload_constants(VADriverContextP ctx,
1025                              VASurfaceID surface)
1026 {
1027     struct i965_driver_data *i965 = i965_driver_data(ctx);
1028     struct i965_render_state *render_state = &i965->render_state;
1029     unsigned short *constant_buffer;
1030     struct object_surface *obj_surface = SURFACE(surface);
1031
1032     dri_bo_map(render_state->curbe.bo, 1);
1033     assert(render_state->curbe.bo->virtual);
1034     constant_buffer = render_state->curbe.bo->virtual;
1035
1036     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
1037         assert(obj_surface->fourcc == VA_FOURCC('I', 'M', 'C', '1') ||
1038                obj_surface->fourcc == VA_FOURCC('I', 'M', 'C', '3'));
1039         *constant_buffer = 2;
1040     } else {
1041         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'))
1042             *constant_buffer = 1;
1043         else
1044             *constant_buffer = 0;
1045     }
1046
1047     dri_bo_unmap(render_state->curbe.bo);
1048 }
1049
1050 static void
1051 i965_surface_render_state_setup(
1052     VADriverContextP   ctx,
1053     VASurfaceID        surface,
1054     const VARectangle *src_rect,
1055     const VARectangle *dst_rect,
1056     unsigned int       flags
1057 )
1058 {
1059     i965_render_vs_unit(ctx);
1060     i965_render_sf_unit(ctx);
1061     i965_render_dest_surface_state(ctx, 0);
1062     i965_render_src_surfaces_state(ctx, surface, flags);
1063     i965_render_sampler(ctx);
1064     i965_render_wm_unit(ctx);
1065     i965_render_cc_viewport(ctx);
1066     i965_render_cc_unit(ctx);
1067     i965_render_upload_vertex(ctx, surface, src_rect, dst_rect);
1068     i965_render_upload_constants(ctx, surface);
1069 }
1070
1071 static void
1072 i965_subpic_render_state_setup(
1073     VADriverContextP   ctx,
1074     VASurfaceID        surface,
1075     const VARectangle *src_rect,
1076     const VARectangle *dst_rect
1077 )
1078 {
1079     i965_render_vs_unit(ctx);
1080     i965_render_sf_unit(ctx);
1081     i965_render_dest_surface_state(ctx, 0);
1082     i965_subpic_render_src_surfaces_state(ctx, surface);
1083     i965_render_sampler(ctx);
1084     i965_subpic_render_wm_unit(ctx);
1085     i965_render_cc_viewport(ctx);
1086     i965_subpic_render_cc_unit(ctx);
1087     i965_subpic_render_upload_vertex(ctx, surface, dst_rect);
1088 }
1089
1090
1091 static void
1092 i965_render_pipeline_select(VADriverContextP ctx)
1093 {
1094     struct i965_driver_data *i965 = i965_driver_data(ctx);
1095     struct intel_batchbuffer *batch = i965->batch;
1096  
1097     BEGIN_BATCH(batch, 1);
1098     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1099     ADVANCE_BATCH(batch);
1100 }
1101
1102 static void
1103 i965_render_state_sip(VADriverContextP ctx)
1104 {
1105     struct i965_driver_data *i965 = i965_driver_data(ctx);
1106     struct intel_batchbuffer *batch = i965->batch;
1107
1108     BEGIN_BATCH(batch, 2);
1109     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1110     OUT_BATCH(batch, 0);
1111     ADVANCE_BATCH(batch);
1112 }
1113
1114 static void
1115 i965_render_state_base_address(VADriverContextP ctx)
1116 {
1117     struct i965_driver_data *i965 = i965_driver_data(ctx);
1118     struct intel_batchbuffer *batch = i965->batch;
1119     struct i965_render_state *render_state = &i965->render_state;
1120
1121     if (IS_IRONLAKE(i965->intel.device_id)) {
1122         BEGIN_BATCH(batch, 8);
1123         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1124         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1125         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1126         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1127         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1128         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1129         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1130         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1131         ADVANCE_BATCH(batch);
1132     } else {
1133         BEGIN_BATCH(batch, 6);
1134         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1135         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1136         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1137         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1138         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1139         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1140         ADVANCE_BATCH(batch);
1141     }
1142 }
1143
1144 static void
1145 i965_render_binding_table_pointers(VADriverContextP ctx)
1146 {
1147     struct i965_driver_data *i965 = i965_driver_data(ctx);
1148     struct intel_batchbuffer *batch = i965->batch;
1149
1150     BEGIN_BATCH(batch, 6);
1151     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1152     OUT_BATCH(batch, 0); /* vs */
1153     OUT_BATCH(batch, 0); /* gs */
1154     OUT_BATCH(batch, 0); /* clip */
1155     OUT_BATCH(batch, 0); /* sf */
1156     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1157     ADVANCE_BATCH(batch);
1158 }
1159
1160 static void 
1161 i965_render_constant_color(VADriverContextP ctx)
1162 {
1163     struct i965_driver_data *i965 = i965_driver_data(ctx);
1164     struct intel_batchbuffer *batch = i965->batch;
1165
1166     BEGIN_BATCH(batch, 5);
1167     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1168     OUT_BATCH(batch, float_to_uint(1.0));
1169     OUT_BATCH(batch, float_to_uint(0.0));
1170     OUT_BATCH(batch, float_to_uint(1.0));
1171     OUT_BATCH(batch, float_to_uint(1.0));
1172     ADVANCE_BATCH(batch);
1173 }
1174
1175 static void
1176 i965_render_pipelined_pointers(VADriverContextP ctx)
1177 {
1178     struct i965_driver_data *i965 = i965_driver_data(ctx);
1179     struct intel_batchbuffer *batch = i965->batch;
1180     struct i965_render_state *render_state = &i965->render_state;
1181
1182     BEGIN_BATCH(batch, 7);
1183     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1184     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1185     OUT_BATCH(batch, 0);  /* disable GS */
1186     OUT_BATCH(batch, 0);  /* disable CLIP */
1187     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1188     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1189     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1190     ADVANCE_BATCH(batch);
1191 }
1192
1193 static void
1194 i965_render_urb_layout(VADriverContextP ctx)
1195 {
1196     struct i965_driver_data *i965 = i965_driver_data(ctx);
1197     struct intel_batchbuffer *batch = i965->batch;
1198     int urb_vs_start, urb_vs_size;
1199     int urb_gs_start, urb_gs_size;
1200     int urb_clip_start, urb_clip_size;
1201     int urb_sf_start, urb_sf_size;
1202     int urb_cs_start, urb_cs_size;
1203
1204     urb_vs_start = 0;
1205     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1206     urb_gs_start = urb_vs_start + urb_vs_size;
1207     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1208     urb_clip_start = urb_gs_start + urb_gs_size;
1209     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1210     urb_sf_start = urb_clip_start + urb_clip_size;
1211     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1212     urb_cs_start = urb_sf_start + urb_sf_size;
1213     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1214
1215     BEGIN_BATCH(batch, 3);
1216     OUT_BATCH(batch, 
1217               CMD_URB_FENCE |
1218               UF0_CS_REALLOC |
1219               UF0_SF_REALLOC |
1220               UF0_CLIP_REALLOC |
1221               UF0_GS_REALLOC |
1222               UF0_VS_REALLOC |
1223               1);
1224     OUT_BATCH(batch, 
1225               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1226               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1227               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1228     OUT_BATCH(batch,
1229               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1230               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1231     ADVANCE_BATCH(batch);
1232 }
1233
1234 static void 
1235 i965_render_cs_urb_layout(VADriverContextP ctx)
1236 {
1237     struct i965_driver_data *i965 = i965_driver_data(ctx);
1238     struct intel_batchbuffer *batch = i965->batch;
1239
1240     BEGIN_BATCH(batch, 2);
1241     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1242     OUT_BATCH(batch,
1243               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
1244               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
1245     ADVANCE_BATCH(batch);
1246 }
1247
1248 static void
1249 i965_render_constant_buffer(VADriverContextP ctx)
1250 {
1251     struct i965_driver_data *i965 = i965_driver_data(ctx);
1252     struct intel_batchbuffer *batch = i965->batch;
1253     struct i965_render_state *render_state = &i965->render_state;
1254
1255     BEGIN_BATCH(batch, 2);
1256     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1257     OUT_RELOC(batch, render_state->curbe.bo,
1258               I915_GEM_DOMAIN_INSTRUCTION, 0,
1259               URB_CS_ENTRY_SIZE - 1);
1260     ADVANCE_BATCH(batch);    
1261 }
1262
1263 static void
1264 i965_render_drawing_rectangle(VADriverContextP ctx)
1265 {
1266     struct i965_driver_data *i965 = i965_driver_data(ctx);
1267     struct intel_batchbuffer *batch = i965->batch;
1268     struct i965_render_state *render_state = &i965->render_state;
1269     struct intel_region *dest_region = render_state->draw_region;
1270
1271     BEGIN_BATCH(batch, 4);
1272     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1273     OUT_BATCH(batch, 0x00000000);
1274     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1275     OUT_BATCH(batch, 0x00000000);         
1276     ADVANCE_BATCH(batch);
1277 }
1278
1279 static void
1280 i965_render_vertex_elements(VADriverContextP ctx)
1281 {
1282     struct i965_driver_data *i965 = i965_driver_data(ctx);
1283     struct intel_batchbuffer *batch = i965->batch;
1284
1285     if (IS_IRONLAKE(i965->intel.device_id)) {
1286         BEGIN_BATCH(batch, 5);
1287         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1288         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1289         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1290                   VE0_VALID |
1291                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1292                   (0 << VE0_OFFSET_SHIFT));
1293         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1294                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1295                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1296                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1297         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1298         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1299                   VE0_VALID |
1300                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1301                   (8 << VE0_OFFSET_SHIFT));
1302         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1303                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1304                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1305                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1306         ADVANCE_BATCH(batch);
1307     } else {
1308         BEGIN_BATCH(batch, 5);
1309         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1310         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1311         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1312                   VE0_VALID |
1313                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1314                   (0 << VE0_OFFSET_SHIFT));
1315         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1316                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1317                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1318                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1319                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1320         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1321         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1322                   VE0_VALID |
1323                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1324                   (8 << VE0_OFFSET_SHIFT));
1325         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1326                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1327                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1328                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1329                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1330         ADVANCE_BATCH(batch);
1331     }
1332 }
1333
1334 static void
1335 i965_render_upload_image_palette(
1336     VADriverContextP ctx,
1337     VAImageID        image_id,
1338     unsigned int     alpha
1339 )
1340 {
1341     struct i965_driver_data *i965 = i965_driver_data(ctx);
1342     struct intel_batchbuffer *batch = i965->batch;
1343     unsigned int i;
1344
1345     struct object_image *obj_image = IMAGE(image_id);
1346     assert(obj_image);
1347
1348     if (obj_image->image.num_palette_entries == 0)
1349         return;
1350
1351     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1352     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1353     /*fill palette*/
1354     //int32_t out[16]; //0-23:color 23-31:alpha
1355     for (i = 0; i < obj_image->image.num_palette_entries; i++)
1356         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1357     ADVANCE_BATCH(batch);
1358 }
1359
1360 static void
1361 i965_render_startup(VADriverContextP ctx)
1362 {
1363     struct i965_driver_data *i965 = i965_driver_data(ctx);
1364     struct intel_batchbuffer *batch = i965->batch;
1365     struct i965_render_state *render_state = &i965->render_state;
1366
1367     BEGIN_BATCH(batch, 11);
1368     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1369     OUT_BATCH(batch, 
1370               (0 << VB0_BUFFER_INDEX_SHIFT) |
1371               VB0_VERTEXDATA |
1372               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1373     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1374
1375     if (IS_IRONLAKE(i965->intel.device_id))
1376         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1377     else
1378         OUT_BATCH(batch, 3);
1379
1380     OUT_BATCH(batch, 0);
1381
1382     OUT_BATCH(batch, 
1383               CMD_3DPRIMITIVE |
1384               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1385               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1386               (0 << 9) |
1387               4);
1388     OUT_BATCH(batch, 3); /* vertex count per instance */
1389     OUT_BATCH(batch, 0); /* start vertex offset */
1390     OUT_BATCH(batch, 1); /* single instance */
1391     OUT_BATCH(batch, 0); /* start instance location */
1392     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1393     ADVANCE_BATCH(batch);
1394 }
1395
1396 static void 
1397 i965_clear_dest_region(VADriverContextP ctx)
1398 {
1399     struct i965_driver_data *i965 = i965_driver_data(ctx);
1400     struct intel_batchbuffer *batch = i965->batch;
1401     struct i965_render_state *render_state = &i965->render_state;
1402     struct intel_region *dest_region = render_state->draw_region;
1403     unsigned int blt_cmd, br13;
1404     int pitch;
1405
1406     blt_cmd = XY_COLOR_BLT_CMD;
1407     br13 = 0xf0 << 16;
1408     pitch = dest_region->pitch;
1409
1410     if (dest_region->cpp == 4) {
1411         br13 |= BR13_8888;
1412         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1413     } else {
1414         assert(dest_region->cpp == 2);
1415         br13 |= BR13_565;
1416     }
1417
1418     if (dest_region->tiling != I915_TILING_NONE) {
1419         blt_cmd |= XY_COLOR_BLT_DST_TILED;
1420         pitch /= 4;
1421     }
1422
1423     br13 |= pitch;
1424
1425     if (IS_GEN6(i965->intel.device_id) ||
1426         IS_GEN7(i965->intel.device_id)) {
1427         intel_batchbuffer_start_atomic_blt(batch, 24);
1428         BEGIN_BLT_BATCH(batch, 6);
1429     } else {
1430         intel_batchbuffer_start_atomic(batch, 24);
1431         BEGIN_BATCH(batch, 6);
1432     }
1433
1434     OUT_BATCH(batch, blt_cmd);
1435     OUT_BATCH(batch, br13);
1436     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1437     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1438               (dest_region->x + dest_region->width));
1439     OUT_RELOC(batch, dest_region->bo, 
1440               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1441               0);
1442     OUT_BATCH(batch, 0x0);
1443     ADVANCE_BATCH(batch);
1444     intel_batchbuffer_end_atomic(batch);
1445 }
1446
1447 static void
1448 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1449 {
1450     struct i965_driver_data *i965 = i965_driver_data(ctx);
1451     struct intel_batchbuffer *batch = i965->batch;
1452
1453     i965_clear_dest_region(ctx);
1454     intel_batchbuffer_start_atomic(batch, 0x1000);
1455     intel_batchbuffer_emit_mi_flush(batch);
1456     i965_render_pipeline_select(ctx);
1457     i965_render_state_sip(ctx);
1458     i965_render_state_base_address(ctx);
1459     i965_render_binding_table_pointers(ctx);
1460     i965_render_constant_color(ctx);
1461     i965_render_pipelined_pointers(ctx);
1462     i965_render_urb_layout(ctx);
1463     i965_render_cs_urb_layout(ctx);
1464     i965_render_constant_buffer(ctx);
1465     i965_render_drawing_rectangle(ctx);
1466     i965_render_vertex_elements(ctx);
1467     i965_render_startup(ctx);
1468     intel_batchbuffer_end_atomic(batch);
1469 }
1470
1471 static void
1472 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1473 {
1474     struct i965_driver_data *i965 = i965_driver_data(ctx);
1475     struct intel_batchbuffer *batch = i965->batch;
1476
1477     intel_batchbuffer_start_atomic(batch, 0x1000);
1478     intel_batchbuffer_emit_mi_flush(batch);
1479     i965_render_pipeline_select(ctx);
1480     i965_render_state_sip(ctx);
1481     i965_render_state_base_address(ctx);
1482     i965_render_binding_table_pointers(ctx);
1483     i965_render_constant_color(ctx);
1484     i965_render_pipelined_pointers(ctx);
1485     i965_render_urb_layout(ctx);
1486     i965_render_cs_urb_layout(ctx);
1487     i965_render_drawing_rectangle(ctx);
1488     i965_render_vertex_elements(ctx);
1489     i965_render_startup(ctx);
1490     intel_batchbuffer_end_atomic(batch);
1491 }
1492
1493
1494 static void 
1495 i965_render_initialize(VADriverContextP ctx)
1496 {
1497     struct i965_driver_data *i965 = i965_driver_data(ctx);
1498     struct i965_render_state *render_state = &i965->render_state;
1499     dri_bo *bo;
1500
1501     /* VERTEX BUFFER */
1502     dri_bo_unreference(render_state->vb.vertex_buffer);
1503     bo = dri_bo_alloc(i965->intel.bufmgr,
1504                       "vertex buffer",
1505                       4096,
1506                       4096);
1507     assert(bo);
1508     render_state->vb.vertex_buffer = bo;
1509
1510     /* VS */
1511     dri_bo_unreference(render_state->vs.state);
1512     bo = dri_bo_alloc(i965->intel.bufmgr,
1513                       "vs state",
1514                       sizeof(struct i965_vs_unit_state),
1515                       64);
1516     assert(bo);
1517     render_state->vs.state = bo;
1518
1519     /* GS */
1520     /* CLIP */
1521     /* SF */
1522     dri_bo_unreference(render_state->sf.state);
1523     bo = dri_bo_alloc(i965->intel.bufmgr,
1524                       "sf state",
1525                       sizeof(struct i965_sf_unit_state),
1526                       64);
1527     assert(bo);
1528     render_state->sf.state = bo;
1529
1530     /* WM */
1531     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1532     bo = dri_bo_alloc(i965->intel.bufmgr,
1533                       "surface state & binding table",
1534                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1535                       4096);
1536     assert(bo);
1537     render_state->wm.surface_state_binding_table_bo = bo;
1538
1539     dri_bo_unreference(render_state->wm.sampler);
1540     bo = dri_bo_alloc(i965->intel.bufmgr,
1541                       "sampler state",
1542                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1543                       64);
1544     assert(bo);
1545     render_state->wm.sampler = bo;
1546     render_state->wm.sampler_count = 0;
1547
1548     dri_bo_unreference(render_state->wm.state);
1549     bo = dri_bo_alloc(i965->intel.bufmgr,
1550                       "wm state",
1551                       sizeof(struct i965_wm_unit_state),
1552                       64);
1553     assert(bo);
1554     render_state->wm.state = bo;
1555
1556     /* COLOR CALCULATOR */
1557     dri_bo_unreference(render_state->cc.state);
1558     bo = dri_bo_alloc(i965->intel.bufmgr,
1559                       "color calc state",
1560                       sizeof(struct i965_cc_unit_state),
1561                       64);
1562     assert(bo);
1563     render_state->cc.state = bo;
1564
1565     dri_bo_unreference(render_state->cc.viewport);
1566     bo = dri_bo_alloc(i965->intel.bufmgr,
1567                       "cc viewport",
1568                       sizeof(struct i965_cc_viewport),
1569                       64);
1570     assert(bo);
1571     render_state->cc.viewport = bo;
1572 }
1573
1574 static void
1575 i965_render_put_surface(
1576     VADriverContextP   ctx,
1577     VASurfaceID        surface,
1578     const VARectangle *src_rect,
1579     const VARectangle *dst_rect,
1580     unsigned int       flags
1581 )
1582 {
1583     struct i965_driver_data *i965 = i965_driver_data(ctx);
1584     struct intel_batchbuffer *batch = i965->batch;
1585
1586     i965_render_initialize(ctx);
1587     i965_surface_render_state_setup(ctx, surface, src_rect, dst_rect, flags);
1588     i965_surface_render_pipeline_setup(ctx);
1589     intel_batchbuffer_flush(batch);
1590 }
1591
1592 static void
1593 i965_render_put_subpicture(
1594     VADriverContextP   ctx,
1595     VASurfaceID        surface,
1596     const VARectangle *src_rect,
1597     const VARectangle *dst_rect
1598 )
1599 {
1600     struct i965_driver_data *i965 = i965_driver_data(ctx);
1601     struct intel_batchbuffer *batch = i965->batch;
1602     struct object_surface *obj_surface = SURFACE(surface);
1603     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
1604
1605     assert(obj_subpic);
1606
1607     i965_render_initialize(ctx);
1608     i965_subpic_render_state_setup(ctx, surface, src_rect, dst_rect);
1609     i965_subpic_render_pipeline_setup(ctx);
1610     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
1611     intel_batchbuffer_flush(batch);
1612 }
1613
1614 /*
1615  * for GEN6+
1616  */
1617 static void 
1618 gen6_render_initialize(VADriverContextP ctx)
1619 {
1620     struct i965_driver_data *i965 = i965_driver_data(ctx);
1621     struct i965_render_state *render_state = &i965->render_state;
1622     dri_bo *bo;
1623
1624     /* VERTEX BUFFER */
1625     dri_bo_unreference(render_state->vb.vertex_buffer);
1626     bo = dri_bo_alloc(i965->intel.bufmgr,
1627                       "vertex buffer",
1628                       4096,
1629                       4096);
1630     assert(bo);
1631     render_state->vb.vertex_buffer = bo;
1632
1633     /* WM */
1634     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1635     bo = dri_bo_alloc(i965->intel.bufmgr,
1636                       "surface state & binding table",
1637                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1638                       4096);
1639     assert(bo);
1640     render_state->wm.surface_state_binding_table_bo = bo;
1641
1642     dri_bo_unreference(render_state->wm.sampler);
1643     bo = dri_bo_alloc(i965->intel.bufmgr,
1644                       "sampler state",
1645                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1646                       4096);
1647     assert(bo);
1648     render_state->wm.sampler = bo;
1649     render_state->wm.sampler_count = 0;
1650
1651     /* COLOR CALCULATOR */
1652     dri_bo_unreference(render_state->cc.state);
1653     bo = dri_bo_alloc(i965->intel.bufmgr,
1654                       "color calc state",
1655                       sizeof(struct gen6_color_calc_state),
1656                       4096);
1657     assert(bo);
1658     render_state->cc.state = bo;
1659
1660     /* CC VIEWPORT */
1661     dri_bo_unreference(render_state->cc.viewport);
1662     bo = dri_bo_alloc(i965->intel.bufmgr,
1663                       "cc viewport",
1664                       sizeof(struct i965_cc_viewport),
1665                       4096);
1666     assert(bo);
1667     render_state->cc.viewport = bo;
1668
1669     /* BLEND STATE */
1670     dri_bo_unreference(render_state->cc.blend);
1671     bo = dri_bo_alloc(i965->intel.bufmgr,
1672                       "blend state",
1673                       sizeof(struct gen6_blend_state),
1674                       4096);
1675     assert(bo);
1676     render_state->cc.blend = bo;
1677
1678     /* DEPTH & STENCIL STATE */
1679     dri_bo_unreference(render_state->cc.depth_stencil);
1680     bo = dri_bo_alloc(i965->intel.bufmgr,
1681                       "depth & stencil state",
1682                       sizeof(struct gen6_depth_stencil_state),
1683                       4096);
1684     assert(bo);
1685     render_state->cc.depth_stencil = bo;
1686 }
1687
1688 static void
1689 gen6_render_color_calc_state(VADriverContextP ctx)
1690 {
1691     struct i965_driver_data *i965 = i965_driver_data(ctx);
1692     struct i965_render_state *render_state = &i965->render_state;
1693     struct gen6_color_calc_state *color_calc_state;
1694     
1695     dri_bo_map(render_state->cc.state, 1);
1696     assert(render_state->cc.state->virtual);
1697     color_calc_state = render_state->cc.state->virtual;
1698     memset(color_calc_state, 0, sizeof(*color_calc_state));
1699     color_calc_state->constant_r = 1.0;
1700     color_calc_state->constant_g = 0.0;
1701     color_calc_state->constant_b = 1.0;
1702     color_calc_state->constant_a = 1.0;
1703     dri_bo_unmap(render_state->cc.state);
1704 }
1705
1706 static void
1707 gen6_render_blend_state(VADriverContextP ctx)
1708 {
1709     struct i965_driver_data *i965 = i965_driver_data(ctx);
1710     struct i965_render_state *render_state = &i965->render_state;
1711     struct gen6_blend_state *blend_state;
1712     
1713     dri_bo_map(render_state->cc.blend, 1);
1714     assert(render_state->cc.blend->virtual);
1715     blend_state = render_state->cc.blend->virtual;
1716     memset(blend_state, 0, sizeof(*blend_state));
1717     blend_state->blend1.logic_op_enable = 1;
1718     blend_state->blend1.logic_op_func = 0xc;
1719     dri_bo_unmap(render_state->cc.blend);
1720 }
1721
1722 static void
1723 gen6_render_depth_stencil_state(VADriverContextP ctx)
1724 {
1725     struct i965_driver_data *i965 = i965_driver_data(ctx);
1726     struct i965_render_state *render_state = &i965->render_state;
1727     struct gen6_depth_stencil_state *depth_stencil_state;
1728     
1729     dri_bo_map(render_state->cc.depth_stencil, 1);
1730     assert(render_state->cc.depth_stencil->virtual);
1731     depth_stencil_state = render_state->cc.depth_stencil->virtual;
1732     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1733     dri_bo_unmap(render_state->cc.depth_stencil);
1734 }
1735
1736 static void
1737 gen6_render_setup_states(
1738     VADriverContextP   ctx,
1739     VASurfaceID        surface,
1740     const VARectangle *src_rect,
1741     const VARectangle *dst_rect,
1742     unsigned int       flags
1743 )
1744 {
1745     i965_render_dest_surface_state(ctx, 0);
1746     i965_render_src_surfaces_state(ctx, surface, flags);
1747     i965_render_sampler(ctx);
1748     i965_render_cc_viewport(ctx);
1749     gen6_render_color_calc_state(ctx);
1750     gen6_render_blend_state(ctx);
1751     gen6_render_depth_stencil_state(ctx);
1752     i965_render_upload_constants(ctx, surface);
1753     i965_render_upload_vertex(ctx, surface, src_rect, dst_rect);
1754 }
1755
1756 static void
1757 gen6_emit_invarient_states(VADriverContextP ctx)
1758 {
1759     struct i965_driver_data *i965 = i965_driver_data(ctx);
1760     struct intel_batchbuffer *batch = i965->batch;
1761
1762     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1763
1764     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1765     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1766               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1767     OUT_BATCH(batch, 0);
1768
1769     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1770     OUT_BATCH(batch, 1);
1771
1772     /* Set system instruction pointer */
1773     OUT_BATCH(batch, CMD_STATE_SIP | 0);
1774     OUT_BATCH(batch, 0);
1775 }
1776
1777 static void
1778 gen6_emit_state_base_address(VADriverContextP ctx)
1779 {
1780     struct i965_driver_data *i965 = i965_driver_data(ctx);
1781     struct intel_batchbuffer *batch = i965->batch;
1782     struct i965_render_state *render_state = &i965->render_state;
1783
1784     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1785     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
1786     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1787     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1788     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1789     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
1790     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
1791     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1792     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1793     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1794 }
1795
1796 static void
1797 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1798 {
1799     struct i965_driver_data *i965 = i965_driver_data(ctx);
1800     struct intel_batchbuffer *batch = i965->batch;
1801     struct i965_render_state *render_state = &i965->render_state;
1802
1803     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1804               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1805               (4 - 2));
1806     OUT_BATCH(batch, 0);
1807     OUT_BATCH(batch, 0);
1808     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1809 }
1810
1811 static void
1812 gen6_emit_urb(VADriverContextP ctx)
1813 {
1814     struct i965_driver_data *i965 = i965_driver_data(ctx);
1815     struct intel_batchbuffer *batch = i965->batch;
1816
1817     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
1818     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1819               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1820     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1821               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1822 }
1823
1824 static void
1825 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1826 {
1827     struct i965_driver_data *i965 = i965_driver_data(ctx);
1828     struct intel_batchbuffer *batch = i965->batch;
1829     struct i965_render_state *render_state = &i965->render_state;
1830
1831     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1832     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1833     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1834     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1835 }
1836
1837 static void
1838 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1839 {
1840     struct i965_driver_data *i965 = i965_driver_data(ctx);
1841     struct intel_batchbuffer *batch = i965->batch;
1842     struct i965_render_state *render_state = &i965->render_state;
1843
1844     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1845               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1846               (4 - 2));
1847     OUT_BATCH(batch, 0); /* VS */
1848     OUT_BATCH(batch, 0); /* GS */
1849     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1850 }
1851
1852 static void
1853 gen6_emit_binding_table(VADriverContextP ctx)
1854 {
1855     struct i965_driver_data *i965 = i965_driver_data(ctx);
1856     struct intel_batchbuffer *batch = i965->batch;
1857
1858     /* Binding table pointers */
1859     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
1860               GEN6_BINDING_TABLE_MODIFY_PS |
1861               (4 - 2));
1862     OUT_BATCH(batch, 0);                /* vs */
1863     OUT_BATCH(batch, 0);                /* gs */
1864     /* Only the PS uses the binding table */
1865     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1866 }
1867
1868 static void
1869 gen6_emit_depth_buffer_state(VADriverContextP ctx)
1870 {
1871     struct i965_driver_data *i965 = i965_driver_data(ctx);
1872     struct intel_batchbuffer *batch = i965->batch;
1873
1874     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
1875     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
1876               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
1877     OUT_BATCH(batch, 0);
1878     OUT_BATCH(batch, 0);
1879     OUT_BATCH(batch, 0);
1880     OUT_BATCH(batch, 0);
1881     OUT_BATCH(batch, 0);
1882
1883     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
1884     OUT_BATCH(batch, 0);
1885 }
1886
1887 static void
1888 gen6_emit_drawing_rectangle(VADriverContextP ctx)
1889 {
1890     i965_render_drawing_rectangle(ctx);
1891 }
1892
1893 static void 
1894 gen6_emit_vs_state(VADriverContextP ctx)
1895 {
1896     struct i965_driver_data *i965 = i965_driver_data(ctx);
1897     struct intel_batchbuffer *batch = i965->batch;
1898
1899     /* disable VS constant buffer */
1900     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
1901     OUT_BATCH(batch, 0);
1902     OUT_BATCH(batch, 0);
1903     OUT_BATCH(batch, 0);
1904     OUT_BATCH(batch, 0);
1905         
1906     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
1907     OUT_BATCH(batch, 0); /* without VS kernel */
1908     OUT_BATCH(batch, 0);
1909     OUT_BATCH(batch, 0);
1910     OUT_BATCH(batch, 0);
1911     OUT_BATCH(batch, 0); /* pass-through */
1912 }
1913
1914 static void 
1915 gen6_emit_gs_state(VADriverContextP ctx)
1916 {
1917     struct i965_driver_data *i965 = i965_driver_data(ctx);
1918     struct intel_batchbuffer *batch = i965->batch;
1919
1920     /* disable GS constant buffer */
1921     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
1922     OUT_BATCH(batch, 0);
1923     OUT_BATCH(batch, 0);
1924     OUT_BATCH(batch, 0);
1925     OUT_BATCH(batch, 0);
1926         
1927     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
1928     OUT_BATCH(batch, 0); /* without GS kernel */
1929     OUT_BATCH(batch, 0);
1930     OUT_BATCH(batch, 0);
1931     OUT_BATCH(batch, 0);
1932     OUT_BATCH(batch, 0);
1933     OUT_BATCH(batch, 0); /* pass-through */
1934 }
1935
1936 static void 
1937 gen6_emit_clip_state(VADriverContextP ctx)
1938 {
1939     struct i965_driver_data *i965 = i965_driver_data(ctx);
1940     struct intel_batchbuffer *batch = i965->batch;
1941
1942     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
1943     OUT_BATCH(batch, 0);
1944     OUT_BATCH(batch, 0); /* pass-through */
1945     OUT_BATCH(batch, 0);
1946 }
1947
1948 static void 
1949 gen6_emit_sf_state(VADriverContextP ctx)
1950 {
1951     struct i965_driver_data *i965 = i965_driver_data(ctx);
1952     struct intel_batchbuffer *batch = i965->batch;
1953
1954     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
1955     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
1956               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
1957               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
1958     OUT_BATCH(batch, 0);
1959     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
1960     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
1961     OUT_BATCH(batch, 0);
1962     OUT_BATCH(batch, 0);
1963     OUT_BATCH(batch, 0);
1964     OUT_BATCH(batch, 0);
1965     OUT_BATCH(batch, 0); /* DW9 */
1966     OUT_BATCH(batch, 0);
1967     OUT_BATCH(batch, 0);
1968     OUT_BATCH(batch, 0);
1969     OUT_BATCH(batch, 0);
1970     OUT_BATCH(batch, 0); /* DW14 */
1971     OUT_BATCH(batch, 0);
1972     OUT_BATCH(batch, 0);
1973     OUT_BATCH(batch, 0);
1974     OUT_BATCH(batch, 0);
1975     OUT_BATCH(batch, 0); /* DW19 */
1976 }
1977
1978 static void 
1979 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
1980 {
1981     struct i965_driver_data *i965 = i965_driver_data(ctx);
1982     struct intel_batchbuffer *batch = i965->batch;
1983     struct i965_render_state *render_state = &i965->render_state;
1984
1985     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
1986               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
1987               (5 - 2));
1988     OUT_RELOC(batch, 
1989               render_state->curbe.bo,
1990               I915_GEM_DOMAIN_INSTRUCTION, 0,
1991               0);
1992     OUT_BATCH(batch, 0);
1993     OUT_BATCH(batch, 0);
1994     OUT_BATCH(batch, 0);
1995
1996     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
1997     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
1998               I915_GEM_DOMAIN_INSTRUCTION, 0,
1999               0);
2000     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
2001               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2002     OUT_BATCH(batch, 0);
2003     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2004     OUT_BATCH(batch, ((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2005               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2006               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2007     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2008               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2009     OUT_BATCH(batch, 0);
2010     OUT_BATCH(batch, 0);
2011 }
2012
2013 static void
2014 gen6_emit_vertex_element_state(VADriverContextP ctx)
2015 {
2016     struct i965_driver_data *i965 = i965_driver_data(ctx);
2017     struct intel_batchbuffer *batch = i965->batch;
2018
2019     /* Set up our vertex elements, sourced from the single vertex buffer. */
2020     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2021     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2022     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2023               GEN6_VE0_VALID |
2024               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2025               (0 << VE0_OFFSET_SHIFT));
2026     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2027               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2028               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2029               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2030     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2031     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2032               GEN6_VE0_VALID |
2033               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2034               (8 << VE0_OFFSET_SHIFT));
2035     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2036               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2037               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2038               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2039 }
2040
2041 static void
2042 gen6_emit_vertices(VADriverContextP ctx)
2043 {
2044     struct i965_driver_data *i965 = i965_driver_data(ctx);
2045     struct intel_batchbuffer *batch = i965->batch;
2046     struct i965_render_state *render_state = &i965->render_state;
2047
2048     BEGIN_BATCH(batch, 11);
2049     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2050     OUT_BATCH(batch, 
2051               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2052               GEN6_VB0_VERTEXDATA |
2053               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2054     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2055     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2056     OUT_BATCH(batch, 0);
2057
2058     OUT_BATCH(batch, 
2059               CMD_3DPRIMITIVE |
2060               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2061               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2062               (0 << 9) |
2063               4);
2064     OUT_BATCH(batch, 3); /* vertex count per instance */
2065     OUT_BATCH(batch, 0); /* start vertex offset */
2066     OUT_BATCH(batch, 1); /* single instance */
2067     OUT_BATCH(batch, 0); /* start instance location */
2068     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2069     ADVANCE_BATCH(batch);
2070 }
2071
2072 static void
2073 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2074 {
2075     struct i965_driver_data *i965 = i965_driver_data(ctx);
2076     struct intel_batchbuffer *batch = i965->batch;
2077
2078     intel_batchbuffer_start_atomic(batch, 0x1000);
2079     intel_batchbuffer_emit_mi_flush(batch);
2080     gen6_emit_invarient_states(ctx);
2081     gen6_emit_state_base_address(ctx);
2082     gen6_emit_viewport_state_pointers(ctx);
2083     gen6_emit_urb(ctx);
2084     gen6_emit_cc_state_pointers(ctx);
2085     gen6_emit_sampler_state_pointers(ctx);
2086     gen6_emit_vs_state(ctx);
2087     gen6_emit_gs_state(ctx);
2088     gen6_emit_clip_state(ctx);
2089     gen6_emit_sf_state(ctx);
2090     gen6_emit_wm_state(ctx, kernel);
2091     gen6_emit_binding_table(ctx);
2092     gen6_emit_depth_buffer_state(ctx);
2093     gen6_emit_drawing_rectangle(ctx);
2094     gen6_emit_vertex_element_state(ctx);
2095     gen6_emit_vertices(ctx);
2096     intel_batchbuffer_end_atomic(batch);
2097 }
2098
2099 static void
2100 gen6_render_put_surface(
2101     VADriverContextP   ctx,
2102     VASurfaceID        surface,
2103     const VARectangle *src_rect,
2104     const VARectangle *dst_rect,
2105     unsigned int       flags
2106 )
2107 {
2108     struct i965_driver_data *i965 = i965_driver_data(ctx);
2109     struct intel_batchbuffer *batch = i965->batch;
2110
2111     gen6_render_initialize(ctx);
2112     gen6_render_setup_states(ctx, surface, src_rect, dst_rect, flags);
2113     i965_clear_dest_region(ctx);
2114     gen6_render_emit_states(ctx, PS_KERNEL);
2115     intel_batchbuffer_flush(batch);
2116 }
2117
2118 static void
2119 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2120 {
2121     struct i965_driver_data *i965 = i965_driver_data(ctx);
2122     struct i965_render_state *render_state = &i965->render_state;
2123     struct gen6_blend_state *blend_state;
2124
2125     dri_bo_unmap(render_state->cc.state);    
2126     dri_bo_map(render_state->cc.blend, 1);
2127     assert(render_state->cc.blend->virtual);
2128     blend_state = render_state->cc.blend->virtual;
2129     memset(blend_state, 0, sizeof(*blend_state));
2130     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2131     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2132     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2133     blend_state->blend0.blend_enable = 1;
2134     blend_state->blend1.post_blend_clamp_enable = 1;
2135     blend_state->blend1.pre_blend_clamp_enable = 1;
2136     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2137     dri_bo_unmap(render_state->cc.blend);
2138 }
2139
2140 static void
2141 gen6_subpicture_render_setup_states(
2142     VADriverContextP   ctx,
2143     VASurfaceID        surface,
2144     const VARectangle *src_rect,
2145     const VARectangle *dst_rect
2146 )
2147 {
2148     i965_render_dest_surface_state(ctx, 0);
2149     i965_subpic_render_src_surfaces_state(ctx, surface);
2150     i965_render_sampler(ctx);
2151     i965_render_cc_viewport(ctx);
2152     gen6_render_color_calc_state(ctx);
2153     gen6_subpicture_render_blend_state(ctx);
2154     gen6_render_depth_stencil_state(ctx);
2155     i965_subpic_render_upload_vertex(ctx, surface, dst_rect);
2156 }
2157
2158 static void
2159 gen6_render_put_subpicture(
2160     VADriverContextP   ctx,
2161     VASurfaceID        surface,
2162     const VARectangle *src_rect,
2163     const VARectangle *dst_rect
2164 )
2165 {
2166     struct i965_driver_data *i965 = i965_driver_data(ctx);
2167     struct intel_batchbuffer *batch = i965->batch;
2168     struct object_surface *obj_surface = SURFACE(surface);
2169     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
2170
2171     assert(obj_subpic);
2172     gen6_render_initialize(ctx);
2173     gen6_subpicture_render_setup_states(ctx, surface, src_rect, dst_rect);
2174     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2175     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
2176     intel_batchbuffer_flush(batch);
2177 }
2178
2179 /*
2180  * for GEN7
2181  */
2182 static void 
2183 gen7_render_initialize(VADriverContextP ctx)
2184 {
2185     struct i965_driver_data *i965 = i965_driver_data(ctx);
2186     struct i965_render_state *render_state = &i965->render_state;
2187     dri_bo *bo;
2188
2189     /* VERTEX BUFFER */
2190     dri_bo_unreference(render_state->vb.vertex_buffer);
2191     bo = dri_bo_alloc(i965->intel.bufmgr,
2192                       "vertex buffer",
2193                       4096,
2194                       4096);
2195     assert(bo);
2196     render_state->vb.vertex_buffer = bo;
2197
2198     /* WM */
2199     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2200     bo = dri_bo_alloc(i965->intel.bufmgr,
2201                       "surface state & binding table",
2202                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2203                       4096);
2204     assert(bo);
2205     render_state->wm.surface_state_binding_table_bo = bo;
2206
2207     dri_bo_unreference(render_state->wm.sampler);
2208     bo = dri_bo_alloc(i965->intel.bufmgr,
2209                       "sampler state",
2210                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2211                       4096);
2212     assert(bo);
2213     render_state->wm.sampler = bo;
2214     render_state->wm.sampler_count = 0;
2215
2216     /* COLOR CALCULATOR */
2217     dri_bo_unreference(render_state->cc.state);
2218     bo = dri_bo_alloc(i965->intel.bufmgr,
2219                       "color calc state",
2220                       sizeof(struct gen6_color_calc_state),
2221                       4096);
2222     assert(bo);
2223     render_state->cc.state = bo;
2224
2225     /* CC VIEWPORT */
2226     dri_bo_unreference(render_state->cc.viewport);
2227     bo = dri_bo_alloc(i965->intel.bufmgr,
2228                       "cc viewport",
2229                       sizeof(struct i965_cc_viewport),
2230                       4096);
2231     assert(bo);
2232     render_state->cc.viewport = bo;
2233
2234     /* BLEND STATE */
2235     dri_bo_unreference(render_state->cc.blend);
2236     bo = dri_bo_alloc(i965->intel.bufmgr,
2237                       "blend state",
2238                       sizeof(struct gen6_blend_state),
2239                       4096);
2240     assert(bo);
2241     render_state->cc.blend = bo;
2242
2243     /* DEPTH & STENCIL STATE */
2244     dri_bo_unreference(render_state->cc.depth_stencil);
2245     bo = dri_bo_alloc(i965->intel.bufmgr,
2246                       "depth & stencil state",
2247                       sizeof(struct gen6_depth_stencil_state),
2248                       4096);
2249     assert(bo);
2250     render_state->cc.depth_stencil = bo;
2251 }
2252
2253 static void
2254 gen7_render_color_calc_state(VADriverContextP ctx)
2255 {
2256     struct i965_driver_data *i965 = i965_driver_data(ctx);
2257     struct i965_render_state *render_state = &i965->render_state;
2258     struct gen6_color_calc_state *color_calc_state;
2259     
2260     dri_bo_map(render_state->cc.state, 1);
2261     assert(render_state->cc.state->virtual);
2262     color_calc_state = render_state->cc.state->virtual;
2263     memset(color_calc_state, 0, sizeof(*color_calc_state));
2264     color_calc_state->constant_r = 1.0;
2265     color_calc_state->constant_g = 0.0;
2266     color_calc_state->constant_b = 1.0;
2267     color_calc_state->constant_a = 1.0;
2268     dri_bo_unmap(render_state->cc.state);
2269 }
2270
2271 static void
2272 gen7_render_blend_state(VADriverContextP ctx)
2273 {
2274     struct i965_driver_data *i965 = i965_driver_data(ctx);
2275     struct i965_render_state *render_state = &i965->render_state;
2276     struct gen6_blend_state *blend_state;
2277     
2278     dri_bo_map(render_state->cc.blend, 1);
2279     assert(render_state->cc.blend->virtual);
2280     blend_state = render_state->cc.blend->virtual;
2281     memset(blend_state, 0, sizeof(*blend_state));
2282     blend_state->blend1.logic_op_enable = 1;
2283     blend_state->blend1.logic_op_func = 0xc;
2284     blend_state->blend1.pre_blend_clamp_enable = 1;
2285     dri_bo_unmap(render_state->cc.blend);
2286 }
2287
2288 static void
2289 gen7_render_depth_stencil_state(VADriverContextP ctx)
2290 {
2291     struct i965_driver_data *i965 = i965_driver_data(ctx);
2292     struct i965_render_state *render_state = &i965->render_state;
2293     struct gen6_depth_stencil_state *depth_stencil_state;
2294     
2295     dri_bo_map(render_state->cc.depth_stencil, 1);
2296     assert(render_state->cc.depth_stencil->virtual);
2297     depth_stencil_state = render_state->cc.depth_stencil->virtual;
2298     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2299     dri_bo_unmap(render_state->cc.depth_stencil);
2300 }
2301
2302 static void 
2303 gen7_render_sampler(VADriverContextP ctx)
2304 {
2305     struct i965_driver_data *i965 = i965_driver_data(ctx);
2306     struct i965_render_state *render_state = &i965->render_state;
2307     struct gen7_sampler_state *sampler_state;
2308     int i;
2309     
2310     assert(render_state->wm.sampler_count > 0);
2311     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2312
2313     dri_bo_map(render_state->wm.sampler, 1);
2314     assert(render_state->wm.sampler->virtual);
2315     sampler_state = render_state->wm.sampler->virtual;
2316     for (i = 0; i < render_state->wm.sampler_count; i++) {
2317         memset(sampler_state, 0, sizeof(*sampler_state));
2318         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2319         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2320         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2321         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2322         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2323         sampler_state++;
2324     }
2325
2326     dri_bo_unmap(render_state->wm.sampler);
2327 }
2328
2329 static void
2330 gen7_render_setup_states(
2331     VADriverContextP   ctx,
2332     VASurfaceID        surface,
2333     const VARectangle *src_rect,
2334     const VARectangle *dst_rect,
2335     unsigned int       flags
2336 )
2337 {
2338     i965_render_dest_surface_state(ctx, 0);
2339     i965_render_src_surfaces_state(ctx, surface, flags);
2340     gen7_render_sampler(ctx);
2341     i965_render_cc_viewport(ctx);
2342     gen7_render_color_calc_state(ctx);
2343     gen7_render_blend_state(ctx);
2344     gen7_render_depth_stencil_state(ctx);
2345     i965_render_upload_constants(ctx, surface);
2346     i965_render_upload_vertex(ctx, surface, src_rect, dst_rect);
2347 }
2348
2349 static void
2350 gen7_emit_invarient_states(VADriverContextP ctx)
2351 {
2352     struct i965_driver_data *i965 = i965_driver_data(ctx);
2353     struct intel_batchbuffer *batch = i965->batch;
2354
2355     BEGIN_BATCH(batch, 1);
2356     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2357     ADVANCE_BATCH(batch);
2358
2359     BEGIN_BATCH(batch, 4);
2360     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2361     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2362               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2363     OUT_BATCH(batch, 0);
2364     OUT_BATCH(batch, 0);
2365     ADVANCE_BATCH(batch);
2366
2367     BEGIN_BATCH(batch, 2);
2368     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2369     OUT_BATCH(batch, 1);
2370     ADVANCE_BATCH(batch);
2371
2372     /* Set system instruction pointer */
2373     BEGIN_BATCH(batch, 2);
2374     OUT_BATCH(batch, CMD_STATE_SIP | 0);
2375     OUT_BATCH(batch, 0);
2376     ADVANCE_BATCH(batch);
2377 }
2378
2379 static void
2380 gen7_emit_state_base_address(VADriverContextP ctx)
2381 {
2382     struct i965_driver_data *i965 = i965_driver_data(ctx);
2383     struct intel_batchbuffer *batch = i965->batch;
2384     struct i965_render_state *render_state = &i965->render_state;
2385
2386     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2387     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2388     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2389     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2390     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2391     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2392     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2393     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2394     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2395     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2396 }
2397
2398 static void
2399 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
2400 {
2401     struct i965_driver_data *i965 = i965_driver_data(ctx);
2402     struct intel_batchbuffer *batch = i965->batch;
2403     struct i965_render_state *render_state = &i965->render_state;
2404
2405     BEGIN_BATCH(batch, 2);
2406     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
2407     OUT_RELOC(batch,
2408               render_state->cc.viewport,
2409               I915_GEM_DOMAIN_INSTRUCTION, 0,
2410               0);
2411     ADVANCE_BATCH(batch);
2412
2413     BEGIN_BATCH(batch, 2);
2414     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
2415     OUT_BATCH(batch, 0);
2416     ADVANCE_BATCH(batch);
2417 }
2418
2419 /*
2420  * URB layout on GEN7 
2421  * ----------------------------------------
2422  * | PS Push Constants (8KB) | VS entries |
2423  * ----------------------------------------
2424  */
2425 static void
2426 gen7_emit_urb(VADriverContextP ctx)
2427 {
2428     struct i965_driver_data *i965 = i965_driver_data(ctx);
2429     struct intel_batchbuffer *batch = i965->batch;
2430
2431     BEGIN_BATCH(batch, 2);
2432     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
2433     OUT_BATCH(batch, 8); /* in 1KBs */
2434     ADVANCE_BATCH(batch);
2435
2436     BEGIN_BATCH(batch, 2);
2437     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
2438     OUT_BATCH(batch, 
2439               (32 << GEN7_URB_ENTRY_NUMBER_SHIFT) | /* at least 32 */
2440               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
2441               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2442    ADVANCE_BATCH(batch);
2443
2444    BEGIN_BATCH(batch, 2);
2445    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
2446    OUT_BATCH(batch,
2447              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2448              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2449    ADVANCE_BATCH(batch);
2450
2451    BEGIN_BATCH(batch, 2);
2452    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
2453    OUT_BATCH(batch,
2454              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2455              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2456    ADVANCE_BATCH(batch);
2457
2458    BEGIN_BATCH(batch, 2);
2459    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
2460    OUT_BATCH(batch,
2461              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2462              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2463    ADVANCE_BATCH(batch);
2464 }
2465
2466 static void
2467 gen7_emit_cc_state_pointers(VADriverContextP ctx)
2468 {
2469     struct i965_driver_data *i965 = i965_driver_data(ctx);
2470     struct intel_batchbuffer *batch = i965->batch;
2471     struct i965_render_state *render_state = &i965->render_state;
2472
2473     BEGIN_BATCH(batch, 2);
2474     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
2475     OUT_RELOC(batch,
2476               render_state->cc.state,
2477               I915_GEM_DOMAIN_INSTRUCTION, 0,
2478               1);
2479     ADVANCE_BATCH(batch);
2480
2481     BEGIN_BATCH(batch, 2);
2482     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
2483     OUT_RELOC(batch,
2484               render_state->cc.blend,
2485               I915_GEM_DOMAIN_INSTRUCTION, 0,
2486               1);
2487     ADVANCE_BATCH(batch);
2488
2489     BEGIN_BATCH(batch, 2);
2490     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
2491     OUT_RELOC(batch,
2492               render_state->cc.depth_stencil,
2493               I915_GEM_DOMAIN_INSTRUCTION, 0, 
2494               1);
2495     ADVANCE_BATCH(batch);
2496 }
2497
2498 static void
2499 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
2500 {
2501     struct i965_driver_data *i965 = i965_driver_data(ctx);
2502     struct intel_batchbuffer *batch = i965->batch;
2503     struct i965_render_state *render_state = &i965->render_state;
2504
2505     BEGIN_BATCH(batch, 2);
2506     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
2507     OUT_RELOC(batch,
2508               render_state->wm.sampler,
2509               I915_GEM_DOMAIN_INSTRUCTION, 0,
2510               0);
2511     ADVANCE_BATCH(batch);
2512 }
2513
2514 static void
2515 gen7_emit_binding_table(VADriverContextP ctx)
2516 {
2517     struct i965_driver_data *i965 = i965_driver_data(ctx);
2518     struct intel_batchbuffer *batch = i965->batch;
2519
2520     BEGIN_BATCH(batch, 2);
2521     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
2522     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2523     ADVANCE_BATCH(batch);
2524 }
2525
2526 static void
2527 gen7_emit_depth_buffer_state(VADriverContextP ctx)
2528 {
2529     struct i965_driver_data *i965 = i965_driver_data(ctx);
2530     struct intel_batchbuffer *batch = i965->batch;
2531
2532     BEGIN_BATCH(batch, 7);
2533     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
2534     OUT_BATCH(batch,
2535               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
2536               (I965_SURFACE_NULL << 29));
2537     OUT_BATCH(batch, 0);
2538     OUT_BATCH(batch, 0);
2539     OUT_BATCH(batch, 0);
2540     OUT_BATCH(batch, 0);
2541     OUT_BATCH(batch, 0);
2542     ADVANCE_BATCH(batch);
2543
2544     BEGIN_BATCH(batch, 3);
2545     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
2546     OUT_BATCH(batch, 0);
2547     OUT_BATCH(batch, 0);
2548     ADVANCE_BATCH(batch);
2549 }
2550
2551 static void
2552 gen7_emit_drawing_rectangle(VADriverContextP ctx)
2553 {
2554     i965_render_drawing_rectangle(ctx);
2555 }
2556
2557 static void 
2558 gen7_emit_vs_state(VADriverContextP ctx)
2559 {
2560     struct i965_driver_data *i965 = i965_driver_data(ctx);
2561     struct intel_batchbuffer *batch = i965->batch;
2562
2563     /* disable VS constant buffer */
2564     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
2565     OUT_BATCH(batch, 0);
2566     OUT_BATCH(batch, 0);
2567     OUT_BATCH(batch, 0);
2568     OUT_BATCH(batch, 0);
2569     OUT_BATCH(batch, 0);
2570     OUT_BATCH(batch, 0);
2571         
2572     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2573     OUT_BATCH(batch, 0); /* without VS kernel */
2574     OUT_BATCH(batch, 0);
2575     OUT_BATCH(batch, 0);
2576     OUT_BATCH(batch, 0);
2577     OUT_BATCH(batch, 0); /* pass-through */
2578 }
2579
2580 static void 
2581 gen7_emit_bypass_state(VADriverContextP ctx)
2582 {
2583     struct i965_driver_data *i965 = i965_driver_data(ctx);
2584     struct intel_batchbuffer *batch = i965->batch;
2585
2586     /* bypass GS */
2587     BEGIN_BATCH(batch, 7);
2588     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
2589     OUT_BATCH(batch, 0);
2590     OUT_BATCH(batch, 0);
2591     OUT_BATCH(batch, 0);
2592     OUT_BATCH(batch, 0);
2593     OUT_BATCH(batch, 0);
2594     OUT_BATCH(batch, 0);
2595     ADVANCE_BATCH(batch);
2596
2597     BEGIN_BATCH(batch, 7);      
2598     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2599     OUT_BATCH(batch, 0); /* without GS kernel */
2600     OUT_BATCH(batch, 0);
2601     OUT_BATCH(batch, 0);
2602     OUT_BATCH(batch, 0);
2603     OUT_BATCH(batch, 0);
2604     OUT_BATCH(batch, 0); /* pass-through */
2605     ADVANCE_BATCH(batch);
2606
2607     BEGIN_BATCH(batch, 2);
2608     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
2609     OUT_BATCH(batch, 0);
2610     ADVANCE_BATCH(batch);
2611
2612     /* disable HS */
2613     BEGIN_BATCH(batch, 7);
2614     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
2615     OUT_BATCH(batch, 0);
2616     OUT_BATCH(batch, 0);
2617     OUT_BATCH(batch, 0);
2618     OUT_BATCH(batch, 0);
2619     OUT_BATCH(batch, 0);
2620     OUT_BATCH(batch, 0);
2621     ADVANCE_BATCH(batch);
2622
2623     BEGIN_BATCH(batch, 7);
2624     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
2625     OUT_BATCH(batch, 0);
2626     OUT_BATCH(batch, 0);
2627     OUT_BATCH(batch, 0);
2628     OUT_BATCH(batch, 0);
2629     OUT_BATCH(batch, 0);
2630     OUT_BATCH(batch, 0);
2631     ADVANCE_BATCH(batch);
2632
2633     BEGIN_BATCH(batch, 2);
2634     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
2635     OUT_BATCH(batch, 0);
2636     ADVANCE_BATCH(batch);
2637
2638     /* Disable TE */
2639     BEGIN_BATCH(batch, 4);
2640     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
2641     OUT_BATCH(batch, 0);
2642     OUT_BATCH(batch, 0);
2643     OUT_BATCH(batch, 0);
2644     ADVANCE_BATCH(batch);
2645
2646     /* Disable DS */
2647     BEGIN_BATCH(batch, 7);
2648     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
2649     OUT_BATCH(batch, 0);
2650     OUT_BATCH(batch, 0);
2651     OUT_BATCH(batch, 0);
2652     OUT_BATCH(batch, 0);
2653     OUT_BATCH(batch, 0);
2654     OUT_BATCH(batch, 0);
2655     ADVANCE_BATCH(batch);
2656
2657     BEGIN_BATCH(batch, 6);
2658     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
2659     OUT_BATCH(batch, 0);
2660     OUT_BATCH(batch, 0);
2661     OUT_BATCH(batch, 0);
2662     OUT_BATCH(batch, 0);
2663     OUT_BATCH(batch, 0);
2664     ADVANCE_BATCH(batch);
2665
2666     BEGIN_BATCH(batch, 2);
2667     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
2668     OUT_BATCH(batch, 0);
2669     ADVANCE_BATCH(batch);
2670
2671     /* Disable STREAMOUT */
2672     BEGIN_BATCH(batch, 3);
2673     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
2674     OUT_BATCH(batch, 0);
2675     OUT_BATCH(batch, 0);
2676     ADVANCE_BATCH(batch);
2677 }
2678
2679 static void 
2680 gen7_emit_clip_state(VADriverContextP ctx)
2681 {
2682     struct i965_driver_data *i965 = i965_driver_data(ctx);
2683     struct intel_batchbuffer *batch = i965->batch;
2684
2685     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2686     OUT_BATCH(batch, 0);
2687     OUT_BATCH(batch, 0); /* pass-through */
2688     OUT_BATCH(batch, 0);
2689 }
2690
2691 static void 
2692 gen7_emit_sf_state(VADriverContextP ctx)
2693 {
2694     struct i965_driver_data *i965 = i965_driver_data(ctx);
2695     struct intel_batchbuffer *batch = i965->batch;
2696
2697     BEGIN_BATCH(batch, 14);
2698     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
2699     OUT_BATCH(batch,
2700               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
2701               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
2702               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
2703     OUT_BATCH(batch, 0);
2704     OUT_BATCH(batch, 0);
2705     OUT_BATCH(batch, 0); /* DW4 */
2706     OUT_BATCH(batch, 0);
2707     OUT_BATCH(batch, 0);
2708     OUT_BATCH(batch, 0);
2709     OUT_BATCH(batch, 0);
2710     OUT_BATCH(batch, 0); /* DW9 */
2711     OUT_BATCH(batch, 0);
2712     OUT_BATCH(batch, 0);
2713     OUT_BATCH(batch, 0);
2714     OUT_BATCH(batch, 0);
2715     ADVANCE_BATCH(batch);
2716
2717     BEGIN_BATCH(batch, 7);
2718     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
2719     OUT_BATCH(batch, 0);
2720     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2721     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
2722     OUT_BATCH(batch, 0);
2723     OUT_BATCH(batch, 0);
2724     OUT_BATCH(batch, 0);
2725     ADVANCE_BATCH(batch);
2726 }
2727
2728 static void 
2729 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
2730 {
2731     struct i965_driver_data *i965 = i965_driver_data(ctx);
2732     struct intel_batchbuffer *batch = i965->batch;
2733     struct i965_render_state *render_state = &i965->render_state;
2734
2735     BEGIN_BATCH(batch, 3);
2736     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
2737     OUT_BATCH(batch,
2738               GEN7_WM_DISPATCH_ENABLE |
2739               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2740     OUT_BATCH(batch, 0);
2741     ADVANCE_BATCH(batch);
2742
2743     BEGIN_BATCH(batch, 7);
2744     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
2745     OUT_BATCH(batch, 1);
2746     OUT_BATCH(batch, 0);
2747     OUT_RELOC(batch, 
2748               render_state->curbe.bo,
2749               I915_GEM_DOMAIN_INSTRUCTION, 0,
2750               0);
2751     OUT_BATCH(batch, 0);
2752     OUT_BATCH(batch, 0);
2753     OUT_BATCH(batch, 0);
2754     ADVANCE_BATCH(batch);
2755
2756     BEGIN_BATCH(batch, 8);
2757     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
2758     OUT_RELOC(batch, 
2759               render_state->render_kernels[kernel].bo,
2760               I915_GEM_DOMAIN_INSTRUCTION, 0,
2761               0);
2762     OUT_BATCH(batch, 
2763               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
2764               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2765     OUT_BATCH(batch, 0); /* scratch space base offset */
2766     OUT_BATCH(batch, 
2767               ((86 - 1) << GEN7_PS_MAX_THREADS_SHIFT) |
2768               GEN7_PS_PUSH_CONSTANT_ENABLE |
2769               GEN7_PS_ATTRIBUTE_ENABLE |
2770               GEN7_PS_16_DISPATCH_ENABLE);
2771     OUT_BATCH(batch, 
2772               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
2773     OUT_BATCH(batch, 0); /* kernel 1 pointer */
2774     OUT_BATCH(batch, 0); /* kernel 2 pointer */
2775     ADVANCE_BATCH(batch);
2776 }
2777
2778 static void
2779 gen7_emit_vertex_element_state(VADriverContextP ctx)
2780 {
2781     struct i965_driver_data *i965 = i965_driver_data(ctx);
2782     struct intel_batchbuffer *batch = i965->batch;
2783
2784     /* Set up our vertex elements, sourced from the single vertex buffer. */
2785     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2786     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2787     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2788               GEN6_VE0_VALID |
2789               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2790               (0 << VE0_OFFSET_SHIFT));
2791     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2792               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2793               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2794               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2795     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2796     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2797               GEN6_VE0_VALID |
2798               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2799               (8 << VE0_OFFSET_SHIFT));
2800     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
2801               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2802               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2803               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2804 }
2805
2806 static void
2807 gen7_emit_vertices(VADriverContextP ctx)
2808 {
2809     struct i965_driver_data *i965 = i965_driver_data(ctx);
2810     struct intel_batchbuffer *batch = i965->batch;
2811     struct i965_render_state *render_state = &i965->render_state;
2812
2813     BEGIN_BATCH(batch, 5);
2814     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
2815     OUT_BATCH(batch, 
2816               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2817               GEN6_VB0_VERTEXDATA |
2818               GEN7_VB0_ADDRESS_MODIFYENABLE |
2819               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2820     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2821     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2822     OUT_BATCH(batch, 0);
2823     ADVANCE_BATCH(batch);
2824
2825     BEGIN_BATCH(batch, 7);
2826     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
2827     OUT_BATCH(batch,
2828               _3DPRIM_RECTLIST |
2829               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
2830     OUT_BATCH(batch, 3); /* vertex count per instance */
2831     OUT_BATCH(batch, 0); /* start vertex offset */
2832     OUT_BATCH(batch, 1); /* single instance */
2833     OUT_BATCH(batch, 0); /* start instance location */
2834     OUT_BATCH(batch, 0);
2835     ADVANCE_BATCH(batch);
2836 }
2837
2838 static void
2839 gen7_render_emit_states(VADriverContextP ctx, int kernel)
2840 {
2841     struct i965_driver_data *i965 = i965_driver_data(ctx);
2842     struct intel_batchbuffer *batch = i965->batch;
2843
2844     intel_batchbuffer_start_atomic(batch, 0x1000);
2845     intel_batchbuffer_emit_mi_flush(batch);
2846     gen7_emit_invarient_states(ctx);
2847     gen7_emit_state_base_address(ctx);
2848     gen7_emit_viewport_state_pointers(ctx);
2849     gen7_emit_urb(ctx);
2850     gen7_emit_cc_state_pointers(ctx);
2851     gen7_emit_sampler_state_pointers(ctx);
2852     gen7_emit_bypass_state(ctx);
2853     gen7_emit_vs_state(ctx);
2854     gen7_emit_clip_state(ctx);
2855     gen7_emit_sf_state(ctx);
2856     gen7_emit_wm_state(ctx, kernel);
2857     gen7_emit_binding_table(ctx);
2858     gen7_emit_depth_buffer_state(ctx);
2859     gen7_emit_drawing_rectangle(ctx);
2860     gen7_emit_vertex_element_state(ctx);
2861     gen7_emit_vertices(ctx);
2862     intel_batchbuffer_end_atomic(batch);
2863 }
2864
2865 static void
2866 gen7_render_put_surface(
2867     VADriverContextP   ctx,
2868     VASurfaceID        surface,
2869     const VARectangle *src_rect,
2870     const VARectangle *dst_rect,
2871     unsigned int       flags
2872 )
2873 {
2874     struct i965_driver_data *i965 = i965_driver_data(ctx);
2875     struct intel_batchbuffer *batch = i965->batch;
2876
2877     gen7_render_initialize(ctx);
2878     gen7_render_setup_states(ctx, surface, src_rect, dst_rect, flags);
2879     i965_clear_dest_region(ctx);
2880     gen7_render_emit_states(ctx, PS_KERNEL);
2881     intel_batchbuffer_flush(batch);
2882 }
2883
2884 static void
2885 gen7_subpicture_render_blend_state(VADriverContextP ctx)
2886 {
2887     struct i965_driver_data *i965 = i965_driver_data(ctx);
2888     struct i965_render_state *render_state = &i965->render_state;
2889     struct gen6_blend_state *blend_state;
2890
2891     dri_bo_unmap(render_state->cc.state);    
2892     dri_bo_map(render_state->cc.blend, 1);
2893     assert(render_state->cc.blend->virtual);
2894     blend_state = render_state->cc.blend->virtual;
2895     memset(blend_state, 0, sizeof(*blend_state));
2896     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2897     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2898     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2899     blend_state->blend0.blend_enable = 1;
2900     blend_state->blend1.post_blend_clamp_enable = 1;
2901     blend_state->blend1.pre_blend_clamp_enable = 1;
2902     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2903     dri_bo_unmap(render_state->cc.blend);
2904 }
2905
2906 static void
2907 gen7_subpicture_render_setup_states(
2908     VADriverContextP   ctx,
2909     VASurfaceID        surface,
2910     const VARectangle *src_rect,
2911     const VARectangle *dst_rect
2912 )
2913 {
2914     i965_render_dest_surface_state(ctx, 0);
2915     i965_subpic_render_src_surfaces_state(ctx, surface);
2916     i965_render_sampler(ctx);
2917     i965_render_cc_viewport(ctx);
2918     gen7_render_color_calc_state(ctx);
2919     gen7_subpicture_render_blend_state(ctx);
2920     gen7_render_depth_stencil_state(ctx);
2921     i965_subpic_render_upload_vertex(ctx, surface, dst_rect);
2922 }
2923
2924 static void
2925 gen7_render_put_subpicture(
2926     VADriverContextP   ctx,
2927     VASurfaceID        surface,
2928     const VARectangle *src_rect,
2929     const VARectangle *dst_rect
2930 )
2931 {
2932     struct i965_driver_data *i965 = i965_driver_data(ctx);
2933     struct intel_batchbuffer *batch = i965->batch;
2934     struct object_surface *obj_surface = SURFACE(surface);
2935     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
2936
2937     assert(obj_subpic);
2938     gen7_render_initialize(ctx);
2939     gen7_subpicture_render_setup_states(ctx, surface, src_rect, dst_rect);
2940     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2941     i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
2942     intel_batchbuffer_flush(batch);
2943 }
2944
2945
2946 /*
2947  * global functions
2948  */
2949 VAStatus 
2950 i965_DestroySurfaces(VADriverContextP ctx,
2951                      VASurfaceID *surface_list,
2952                      int num_surfaces);
2953 void
2954 intel_render_put_surface(
2955     VADriverContextP   ctx,
2956     VASurfaceID        surface,
2957     const VARectangle *src_rect,
2958     const VARectangle *dst_rect,
2959     unsigned int       flags
2960 )
2961 {
2962     struct i965_driver_data *i965 = i965_driver_data(ctx);
2963     int has_done_scaling = 0;
2964     VASurfaceID in_surface_id = surface;
2965     VASurfaceID out_surface_id = i965_post_processing(ctx, surface, src_rect, dst_rect, flags, &has_done_scaling);
2966
2967     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
2968
2969     if (out_surface_id != VA_INVALID_ID)
2970         in_surface_id = out_surface_id;
2971
2972     if (IS_GEN7(i965->intel.device_id))
2973         gen7_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags);
2974     else if (IS_GEN6(i965->intel.device_id))
2975         gen6_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags);
2976     else
2977         i965_render_put_surface(ctx, in_surface_id, has_done_scaling ? dst_rect : src_rect, dst_rect, flags);
2978
2979     if (in_surface_id != surface)
2980         i965_DestroySurfaces(ctx, &in_surface_id, 1);
2981 }
2982
2983 void
2984 intel_render_put_subpicture(
2985     VADriverContextP   ctx,
2986     VASurfaceID        surface,
2987     const VARectangle *src_rect,
2988     const VARectangle *dst_rect
2989 )
2990 {
2991     struct i965_driver_data *i965 = i965_driver_data(ctx);
2992
2993     if (IS_GEN7(i965->intel.device_id))
2994         gen7_render_put_subpicture(ctx, surface, src_rect, dst_rect);
2995     else if (IS_GEN6(i965->intel.device_id))
2996         gen6_render_put_subpicture(ctx, surface, src_rect, dst_rect);
2997     else
2998         i965_render_put_subpicture(ctx, surface, src_rect, dst_rect);
2999 }
3000
3001 Bool 
3002 i965_render_init(VADriverContextP ctx)
3003 {
3004     struct i965_driver_data *i965 = i965_driver_data(ctx);
3005     struct i965_render_state *render_state = &i965->render_state;
3006     int i;
3007
3008     /* kernel */
3009     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
3010                                  sizeof(render_kernels_gen5[0])));
3011     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
3012                                  sizeof(render_kernels_gen6[0])));
3013
3014     if (IS_GEN7(i965->intel.device_id))
3015         memcpy(render_state->render_kernels, render_kernels_gen7, sizeof(render_state->render_kernels));
3016     else if (IS_GEN6(i965->intel.device_id))
3017         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
3018     else if (IS_IRONLAKE(i965->intel.device_id))
3019         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
3020     else
3021         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
3022
3023     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3024         struct i965_kernel *kernel = &render_state->render_kernels[i];
3025
3026         if (!kernel->size)
3027             continue;
3028
3029         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
3030                                   kernel->name, 
3031                                   kernel->size, 0x1000);
3032         assert(kernel->bo);
3033         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
3034     }
3035
3036     /* constant buffer */
3037     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
3038                       "constant buffer",
3039                       4096, 64);
3040     assert(render_state->curbe.bo);
3041
3042     return True;
3043 }
3044
3045 Bool 
3046 i965_render_terminate(VADriverContextP ctx)
3047 {
3048     int i;
3049     struct i965_driver_data *i965 = i965_driver_data(ctx);
3050     struct i965_render_state *render_state = &i965->render_state;
3051
3052     dri_bo_unreference(render_state->curbe.bo);
3053     render_state->curbe.bo = NULL;
3054
3055     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3056         struct i965_kernel *kernel = &render_state->render_kernels[i];
3057         
3058         dri_bo_unreference(kernel->bo);
3059         kernel->bo = NULL;
3060     }
3061
3062     dri_bo_unreference(render_state->vb.vertex_buffer);
3063     render_state->vb.vertex_buffer = NULL;
3064     dri_bo_unreference(render_state->vs.state);
3065     render_state->vs.state = NULL;
3066     dri_bo_unreference(render_state->sf.state);
3067     render_state->sf.state = NULL;
3068     dri_bo_unreference(render_state->wm.sampler);
3069     render_state->wm.sampler = NULL;
3070     dri_bo_unreference(render_state->wm.state);
3071     render_state->wm.state = NULL;
3072     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
3073     dri_bo_unreference(render_state->cc.viewport);
3074     render_state->cc.viewport = NULL;
3075     dri_bo_unreference(render_state->cc.state);
3076     render_state->cc.state = NULL;
3077     dri_bo_unreference(render_state->cc.blend);
3078     render_state->cc.blend = NULL;
3079     dri_bo_unreference(render_state->cc.depth_stencil);
3080     render_state->cc.depth_stencil = NULL;
3081
3082     if (render_state->draw_region) {
3083         dri_bo_unreference(render_state->draw_region->bo);
3084         free(render_state->draw_region);
3085         render_state->draw_region = NULL;
3086     }
3087
3088     return True;
3089 }
3090